From 750a82a4ff615190a2793c0cfae9f4c1f5c75433 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Feb 2024 13:23:33 +0100 Subject: [PATCH 001/283] Update doc --- .../mergetree-family/mergetree.md | 2 + docs/en/operations/storing-data.md | 146 ++++++++++++++++-- 2 files changed, 134 insertions(+), 14 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index f185c11bab3..e1eef8db9ab 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -1106,6 +1106,8 @@ Configuration markup: ``` +Also see [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage). + :::note cache configuration ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) if you are using one of those versions. ::: diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 003277c8d4f..7a7edfb1a90 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -11,45 +11,163 @@ To work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-en To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver). -## Configuring HDFS {#configuring-hdfs} +## Configuring external storage {#configuring-external-storage} -[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`. +[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. Configuration markup: +Let's take a loop at different storage configuration options on the example of `S3` storage. +Firstly, define configuration in server configuration file. In order to configure `S3` storage the following configuration can be used: + ``` xml - - hdfs - hdfs://hdfs1:9000/clickhouse/ - + + s3 + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + - +
- hdfs + s3
-
+
+
+``` +Starting with 24.1 clickhouse version, a different type of configuration is supported in addition to the older one: + +``` xml + + + + + object_storage + s3 + local + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + + + + + +
+ s3 +
+
+
+
+
+
+``` + +In order to make a specific kind of storage a default option for all `MergeTree` tables add the following section to configuration file: + +``` xml + - 0 + s3 ``` -Required parameters: +If you want to configure a specific storage policy only to specific table, you can define it in settings while creating the table: -- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data. +``` sql +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY a +SETTINGS storage_policy = 's3'; +``` -Optional parameters: +You can also use `disk` instead of `storage_policy`. In this case it is not requires to have `storage_policy` section in configuration file, only `disk` section would be enough. -- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`. +``` sql +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY a +SETTINGS disk = 's3'; +``` + +There is also a possibility to specify storage configuration without a preconfigured disk in configuration file: + +``` sql +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY a +SETTINGS disk = disk(name = 's3_disk', type = 's3', endpoint = 'https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/', use_environment_credentials = 1); +``` + +Adding cache is also possible: + +``` sql +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY a +SETTINGS disk = disk(name = 'cached_s3_disk', type = 'cache', max_size = '10Gi', path = '/s3_cache', disk = disk(name = 's3_disk', type = 's3', endpoint = 'https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/', use_environment_credentials = 1)); +``` + +A combination of config file disk configuration and sql-defined configuration is also possible: + +``` sql +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY a +SETTINGS disk = disk(name = 'cached_s3_disk', type = 'cache', max_size = '10Gi', path = '/s3_cache', disk = 's3'); +``` + +Here `s3` is a disk name from server configuration file, while `cache` disk is defined via sql. + +Let's take a closer look at configuration parameters. + +All disk configuration require `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local`, `cache`, `web`. Then goes configuration of a specific storage type. +Starting from 24.1 clickhouse version, you can you a new configuration option. For it you are required to specify `type` as `object_storage`, `object_storage_type` as one of `s3`, `azure_blob_storage`, `hdfs`, `local`, `cache`, `web`, and optionally you can specify `metadata_type`, which is `local` by default, but it can also be set to `plain`, `web`. + +E.g. first configuration option: +``` xml + + s3 + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +and second (from `24.1`): +``` xml + + object_storage + s3 + local + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +Configuration like +``` xml + + s3_plain + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +is equal to +``` xml + + object_storage + s3 + plain + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +For details configuration options of each storage see [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md). ## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system} From 9bcd4daabe56e29132fc5098420afb4dcba9001d Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Feb 2024 16:19:31 +0100 Subject: [PATCH 002/283] Better --- .../mergetree-family/mergetree.md | 294 +------------ docs/en/operations/storing-data.md | 411 +++++++++++++++--- 2 files changed, 346 insertions(+), 359 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index e1eef8db9ab..0fff13c906f 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -987,49 +987,6 @@ ORDER BY (postcode1, postcode2, addr1, addr2) # highlight-end ``` -### Nested Dynamic Storage - -This example query builds on the above dynamic disk configuration and shows how to -use a local disk to cache data from a table stored at a URL. Neither the cache disk -nor the web storage is configured in the ClickHouse configuration files; both are -configured in the CREATE/ATTACH query settings. - -In the settings highlighted below notice that the disk of `type=web` is nested within -the disk of `type=cache`. - -```sql -ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' -( - price UInt32, - date Date, - postcode1 LowCardinality(String), - postcode2 LowCardinality(String), - type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), - is_new UInt8, - duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), - addr1 String, - addr2 String, - street LowCardinality(String), - locality LowCardinality(String), - town LowCardinality(String), - district LowCardinality(String), - county LowCardinality(String) -) -ENGINE = MergeTree -ORDER BY (postcode1, postcode2, addr1, addr2) - # highlight-start - SETTINGS disk = disk( - type=cache, - max_size='1Gi', - path='/var/lib/clickhouse/custom_disk_cache/', - disk=disk( - type=web, - endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' - ) - ); - # highlight-end -``` - ### Details {#details} In the case of `MergeTree` tables, data is getting to disk in different ways: @@ -1058,19 +1015,17 @@ During this time, they are not moved to other volumes or disks. Therefore, until User can assign new big parts to different disks of a [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures) volume in a balanced way using the [min_bytes_to_rebalance_partition_over_jbod](/docs/en/operations/settings/merge-tree-settings.md/#min-bytes-to-rebalance-partition-over-jbod) setting. -## Using S3 for Data Storage {#table_engine-mergetree-s3} +## Using External Storage for Data Storage {#table_engine-mergetree-s3} -:::note -Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs). -::: +[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. See [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage) for more details. -`MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`. +Example for [S3](https://aws.amazon.com/s3/) as external storage using a disk with type `s3`. Configuration markup: ``` xml ... - +e s3 true @@ -1112,247 +1067,6 @@ Also see [configuring external storage options](/docs/en/operations/storing-data ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) if you are using one of those versions. ::: -### Configuring the S3 disk - -Required parameters: - -- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data. -- `access_key_id` — S3 access key id. -- `secret_access_key` — S3 secret access key. - -Optional parameters: - -- `region` — S3 region name. -- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs. -- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`. -- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`. -- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`. -- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. -- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`. -- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`. -- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`. -- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`. -- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`. -- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks//`. -- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. -- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. -- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. -- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional. -- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional. -- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting). -- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. -- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. -- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). -- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). -- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`. -- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here. - -### Configuring the cache - -This is the cache configuration from above: -```xml - - cache - s3 - /var/lib/clickhouse/disks/s3_cache/ - 10Gi - -``` - -These parameters define the cache layer: -- `type` — If a disk is of type `cache` it caches mark and index files in memory. -- `disk` — The name of the disk that will be cached. - -Cache parameters: -- `path` — The path where metadata for the cache is stored. -- `max_size` — The size (amount of disk space) that the cache can grow to. - -:::tip -There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details. -::: - -S3 disk can be configured as `main` or `cold` storage: -``` xml - - ... - - - s3 - https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/ - your_access_key_id - your_secret_access_key - - - - - -
- s3 -
-
-
- - -
- default -
- - s3 - -
- 0.2 -
-
- ... -
-``` - -In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule. - -## Using Azure Blob Storage for Data Storage {#table_engine-mergetree-azure-blob-storage} - -`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`. - -As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented. - -Configuration markup: -``` xml - - ... - - - azure_blob_storage - http://account.blob.core.windows.net - container - account - pass123 - /var/lib/clickhouse/disks/blob_storage_disk/ - /var/lib/clickhouse/disks/blob_storage_disk/cache/ - false - - - ... - -``` - -Connection parameters: -* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`. -* `container_name` - Target container name, defaults to `default-container`. -* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet. - -Authentication parameters (the disk will try all available methods **and** Managed Identity Credential): -* `connection_string` - For authentication using a connection string. -* `account_name` and `account_key` - For authentication using Shared Key. - -Limit parameters (mainly for internal usage): -* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage. -* `min_bytes_for_seek` - Limits the size of a seekable region. -* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage. -* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage. -* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated. -* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. - -Other parameters: -* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks//`. -* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`. -* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). -* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). - -Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)). - -:::note Zero-copy replication is not ready for production -Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. -::: - -## HDFS storage {#hdfs-storage} - -In this sample configuration: -- the disk is of type `hdfs` -- the data is hosted at `hdfs://hdfs1:9000/clickhouse/` - -```xml - - - - - hdfs - hdfs://hdfs1:9000/clickhouse/ - true - - - local - / - - - - - -
- hdfs -
- - hdd - -
-
-
-
-
-``` - -## Web storage (read-only) {#web-storage} - -Web storage can be used for read-only purposes. An example use is for hosting sample -data, or for migrating data. - -:::tip -Storage can also be configured temporarily within a query, if a web dataset is not expected -to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the -configuration file. -::: - -In this sample configuration: -- the disk is of type `web` -- the data is hosted at `http://nginx:80/test1/` -- a cache on local storage is used - -```xml - - - - - web - http://nginx:80/test1/ - - - cache - web - cached_web_cache/ - 100000000 - - - - - -
- web -
-
-
- - -
- cached_web -
-
-
-
-
-
-``` - ## Virtual Columns {#virtual-columns} - `_part` — Name of a part. diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 7a7edfb1a90..baf4e1999a7 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -5,21 +5,68 @@ sidebar_label: "External Disks for Storing Data" title: "External Disks for Storing Data" --- -Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely — on [Amazon S3](https://aws.amazon.com/s3/) disks or in the Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)). +Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely. Various storages are supported: +1. [Amazon S3](https://aws.amazon.com/s3/) object storage. +2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) +3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). -To work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, and to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine. - -To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver). +Note: to work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine, and to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/AzureBlobStorage.md) table engine. They are different from external storage described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` famility or `Log` family tables. ## Configuring external storage {#configuring-external-storage} [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. -Configuration markup: +Disk configuration requires: +1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`. +2. Configuration of a specific external storage type. -Let's take a loop at different storage configuration options on the example of `S3` storage. -Firstly, define configuration in server configuration file. In order to configure `S3` storage the following configuration can be used: +Starting from 24.1 clickhouse version, it is possible to use a new configuration option. +It requires to specify: +1. `type` equal to `object_storage` +2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`. +Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web`. +E.g. configuration option +``` xml + + s3 + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +is equal to configuration (from `24.1`): +``` xml + + object_storage + s3 + local + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +Configuration +``` xml + + s3_plain + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +is equal to +``` xml + + object_storage + s3 + plain + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +Example of full storage configuration will look like: ``` xml @@ -43,8 +90,7 @@ Firstly, define configuration in server configuration file. In order to configur ``` -Starting with 24.1 clickhouse version, a different type of configuration is supported in addition to the older one: - +Starting with 24.1 clickhouse version, it can also look like: ``` xml @@ -71,7 +117,6 @@ Starting with 24.1 clickhouse version, a different type of configuration is supp ``` In order to make a specific kind of storage a default option for all `MergeTree` tables add the following section to configuration file: - ``` xml @@ -96,80 +141,259 @@ ENGINE = MergeTree() ORDER BY a SETTINGS disk = 's3'; ``` -There is also a possibility to specify storage configuration without a preconfigured disk in configuration file: +## Dynamic Configuration {#dynamic-configuration} -``` sql -CREATE TABLE test (a Int32, b String) -ENGINE = MergeTree() ORDER BY a -SETTINGS disk = disk(name = 's3_disk', type = 's3', endpoint = 'https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/', use_environment_credentials = 1); +There is also a possibility to specify storage configuration without a predefined disk in configuration in a configuration file, but can be configured in the CREATE/ATTACH query settings. + +The following example query builds on the above dynamic disk configuration and shows how to use a local disk to cache data from a table stored at a URL. + +```sql +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ); + # highlight-end ``` -Adding cache is also possible: +The example below adds cache to external storage. -``` sql -CREATE TABLE test (a Int32, b String) -ENGINE = MergeTree() ORDER BY a -SETTINGS disk = disk(name = 'cached_s3_disk', type = 'cache', max_size = '10Gi', path = '/s3_cache', disk = disk(name = 's3_disk', type = 's3', endpoint = 'https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/', use_environment_credentials = 1)); +```sql +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=cache, + max_size='1Gi', + path='/var/lib/clickhouse/custom_disk_cache/', + disk=disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ) + ); + # highlight-end ``` -A combination of config file disk configuration and sql-defined configuration is also possible: +In the settings highlighted below notice that the disk of `type=web` is nested within +the disk of `type=cache`. -``` sql -CREATE TABLE test (a Int32, b String) -ENGINE = MergeTree() ORDER BY a -SETTINGS disk = disk(name = 'cached_s3_disk', type = 'cache', max_size = '10Gi', path = '/s3_cache', disk = 's3'); +A combination of config-based configuration and sql-defined configuration is also possible: + +```sql +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=cache, + max_size='1Gi', + path='/var/lib/clickhouse/custom_disk_cache/', + disk=disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ) + ); + # highlight-end ``` -Here `s3` is a disk name from server configuration file, while `cache` disk is defined via sql. +where `web` is a from a server configuration file: -Let's take a closer look at configuration parameters. - -All disk configuration require `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local`, `cache`, `web`. Then goes configuration of a specific storage type. -Starting from 24.1 clickhouse version, you can you a new configuration option. For it you are required to specify `type` as `object_storage`, `object_storage_type` as one of `s3`, `azure_blob_storage`, `hdfs`, `local`, `cache`, `web`, and optionally you can specify `metadata_type`, which is `local` by default, but it can also be set to `plain`, `web`. - -E.g. first configuration option: ``` xml - - s3 - https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 - + + + + web + 'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + + + ``` -and second (from `24.1`): +### Using S3 Storage {#s3-storage} + +Required parameters: + +- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data. +- `access_key_id` — S3 access key id. +- `secret_access_key` — S3 secret access key. + +Optional parameters: + +- `region` — S3 region name. +- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs. +- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`. +- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`. +- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`. +- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. +- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`. +- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`. +- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`. +- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`. +- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`. +- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks//`. +- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. +- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. +- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. +- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional. +- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional. +- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting). +- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. +- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. +- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`. +- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here. + +:::note +Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs). +::: + +### Using Azure Blob Storage {#azure-blob-storage} + +`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`. + +As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented. + +Configuration markup: ``` xml - - object_storage - s3 - local - https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 - + + ... + + + azure_blob_storage + http://account.blob.core.windows.net + container + account + pass123 + /var/lib/clickhouse/disks/blob_storage_disk/ + /var/lib/clickhouse/disks/blob_storage_disk/cache/ + false + + + ... + ``` -Configuration like -``` xml - - s3_plain - https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 - +Connection parameters: +* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`. +* `container_name` - Target container name, defaults to `default-container`. +* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet. + +Authentication parameters (the disk will try all available methods **and** Managed Identity Credential): +* `connection_string` - For authentication using a connection string. +* `account_name` and `account_key` - For authentication using Shared Key. + +Limit parameters (mainly for internal usage): +* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage. +* `min_bytes_for_seek` - Limits the size of a seekable region. +* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage. +* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage. +* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated. +* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. + +Other parameters: +* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks//`. +* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`. +* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). + +Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)). + +:::note Zero-copy replication is not ready for production +Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. +::: + +## Using HDFS storage {#hdfs-storage} + +In this sample configuration: +- the disk is of type `hdfs` +- the data is hosted at `hdfs://hdfs1:9000/clickhouse/` + +```xml + + + + + hdfs + hdfs://hdfs1:9000/clickhouse/ + true + + + local + / + + + + + +
+ hdfs +
+ + hdd + +
+
+
+
+
``` -is equal to -``` xml - - object_storage - s3 - plain - https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 - -``` - -For details configuration options of each storage see [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md). - -## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system} +### Using Data Encryption {#encrypted-virtual-file-system} You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one. @@ -230,7 +454,7 @@ Example of disk configuration:
``` -## Using local cache {#using-local-cache} +### Using local cache {#using-local-cache} It is possible to configure local cache over disks in storage configuration starting from version 22.3. For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. @@ -393,7 +617,56 @@ Cache profile events: - `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds` -## Storing Data on Web Server {#storing-data-on-webserver} +### Using static Web storage (read-only) {#web-storage} + +Web storage can be used for read-only purposes. An example use is for hosting sample +data, or for migrating data. + +:::tip +Storage can also be configured temporarily within a query, if a web dataset is not expected +to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the +configuration file. +::: + +In this sample configuration: +- the disk is of type `web` +- the data is hosted at `http://nginx:80/test1/` +- a cache on local storage is used + +```xml + + + + + web + http://nginx:80/test1/ + + + cache + web + cached_web_cache/ + 100000000 + + + + + +
+ web +
+
+
+ + +
+ cached_web +
+
+
+
+
+
+``` There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`. @@ -595,7 +868,7 @@ If URL is not reachable on disk load when the server is starting up tables, then Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read. -## Zero-copy Replication (not ready for production) {#zero-copy} +### Zero-copy Replication (not ready for production) {#zero-copy} Zero-copy replication is possible, but not recommended, with `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself. From 5ae410e6339fe52e33b41bbc9c6c115ac6293f57 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 15 Feb 2024 18:33:38 +0100 Subject: [PATCH 003/283] A bit more explanation --- .../mergetree-family/mergetree.md | 49 +------------------ docs/en/operations/storing-data.md | 44 ++++++++++++++++- 2 files changed, 44 insertions(+), 49 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 0fff13c906f..f23b251f3a1 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -940,53 +940,6 @@ You could change storage policy after table creation with [ALTER TABLE ... MODIF The number of threads performing background moves of data parts can be changed by [background_move_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_move_pool_size) setting. -### Dynamic Storage - -This example query shows how to attach a table stored at a URL and configure the -remote storage within the query. The web storage is not configured in the ClickHouse -configuration files; all the settings are in the CREATE/ATTACH query. - -:::note -The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk. -::: - -#### Example dynamic web storage - -:::tip -A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver) -::: - -In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content. - -```sql -# highlight-next-line -ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' -( - price UInt32, - date Date, - postcode1 LowCardinality(String), - postcode2 LowCardinality(String), - type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), - is_new UInt8, - duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), - addr1 String, - addr2 String, - street LowCardinality(String), - locality LowCardinality(String), - town LowCardinality(String), - district LowCardinality(String), - county LowCardinality(String) -) -ENGINE = MergeTree -ORDER BY (postcode1, postcode2, addr1, addr2) - # highlight-start - SETTINGS disk = disk( - type=web, - endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' - ); - # highlight-end -``` - ### Details {#details} In the case of `MergeTree` tables, data is getting to disk in different ways: @@ -1025,7 +978,7 @@ Configuration markup: ``` xml ... -e + s3 true diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index baf4e1999a7..0f818b813bf 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -213,6 +213,10 @@ ORDER BY (postcode1, postcode2, addr1, addr2) In the settings highlighted below notice that the disk of `type=web` is nested within the disk of `type=cache`. +:::note +The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk. +::: + A combination of config-based configuration and sql-defined configuration is also possible: ```sql @@ -302,6 +306,11 @@ Optional parameters: Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs). ::: +### Using Plain Storage {#s3-storage} + +There is a disk type `s3_plain`, which provides a write-once storage. Unlike `s3` disk type, it stores data as is, e.g. instead of randomly-generated blob names, it uses normal file names as clickhouse stores files on local disk. So this disk type allows to keeper a static version of the table and can also be used to create backups on it. +Configuration parameters are the same as for `s3` disk type. + ### Using Azure Blob Storage {#azure-blob-storage} `MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`. @@ -672,7 +681,40 @@ There is a tool `clickhouse-static-files-uploader`, which prepares a data direct This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md), [ALTER TABLE](/docs/en/sql-reference/statements/alter/index.md), [RENAME TABLE](/docs/en/sql-reference/statements/rename.md/#misc_operations-rename_table), [DETACH TABLE](/docs/en/sql-reference/statements/detach.md) and [TRUNCATE TABLE](/docs/en/sql-reference/statements/truncate.md). -Web server storage is supported only for the [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) engine families. To access the data stored on a `web` disk, use the [storage_policy](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#terms) setting when executing the query. For example, `ATTACH TABLE table_web UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'`. +:::tip +A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver) +::: + +In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content. + +```sql +# highlight-next-line +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ); + # highlight-end +``` A ready test case. You need to add this configuration to config: From 09e630e02be9ccd19681b34f33e24cea849ca9fd Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 15 Feb 2024 19:00:08 +0100 Subject: [PATCH 004/283] Update storing-data.md --- docs/en/operations/storing-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 0f818b813bf..60e33fe2849 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -10,7 +10,7 @@ Data, processed in ClickHouse, is usually stored in the local file system — on 2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) 3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). -Note: to work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine, and to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/AzureBlobStorage.md) table engine. They are different from external storage described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` famility or `Log` family tables. +Note: to work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine, and to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/AzureBlobStorage.md) table engine. They are different from external storage described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables. ## Configuring external storage {#configuring-external-storage} From 8c11f59ba82bd9ae3a322f7a9729c4a5a8644512 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:01:37 +0100 Subject: [PATCH 005/283] Fix bad link, update disk web description --- docs/en/operations/storing-data.md | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 60e33fe2849..4b0345a3206 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -628,14 +628,9 @@ Cache profile events: ### Using static Web storage (read-only) {#web-storage} -Web storage can be used for read-only purposes. An example use is for hosting sample -data, or for migrating data. - -:::tip -Storage can also be configured temporarily within a query, if a web dataset is not expected -to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the -configuration file. -::: +This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md), [ALTER TABLE](/docs/en/sql-reference/statements/alter/index.md), [RENAME TABLE](/docs/en/sql-reference/statements/rename.md/#misc_operations-rename_table), [DETACH TABLE](/docs/en/sql-reference/statements/detach.md) and [TRUNCATE TABLE](/docs/en/sql-reference/statements/truncate.md). +Web storage can be used for read-only purposes. An example use is for hosting sample data, or for migrating data. +There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`. In this sample configuration: - the disk is of type `web` @@ -677,9 +672,11 @@ In this sample configuration:
``` -There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`. - -This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md), [ALTER TABLE](/docs/en/sql-reference/statements/alter/index.md), [RENAME TABLE](/docs/en/sql-reference/statements/rename.md/#misc_operations-rename_table), [DETACH TABLE](/docs/en/sql-reference/statements/detach.md) and [TRUNCATE TABLE](/docs/en/sql-reference/statements/truncate.md). +:::tip +Storage can also be configured temporarily within a query, if a web dataset is not expected +to be used routinely, see [dynamic configuration](#dynamic-configuration) and skip editing the +configuration file. +::: :::tip A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver) From 601b1dfaa14323db28f169b6b193d59ec75e8bfc Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 19 Feb 2024 12:21:52 +0100 Subject: [PATCH 006/283] Fix bad link --- docs/en/operations/storing-data.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 4b0345a3206..4f676904375 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -10,7 +10,7 @@ Data, processed in ClickHouse, is usually stored in the local file system — on 2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) 3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). -Note: to work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine, and to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/AzureBlobStorage.md) table engine. They are different from external storage described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables. +Note: to work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine, and to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine. They are different from external storage described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables. ## Configuring external storage {#configuring-external-storage} From a34f42ca22c8a4820e4cbcf67cdd48a3589e3879 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 13 Jan 2024 18:48:47 +0300 Subject: [PATCH 007/283] Remove lock from the ReadProgressCallback It looks redundant (added in 5ef51ed), though it has "fix tests" in the log message, but CI reports is not available for the commits from that PR [1], so let's try. [1]: https://github.com/ClickHouse/ClickHouse/pull/37543 Also this can be a big problem, since the code under that lock (throttling or quotas with previous implementation that uses boost::atomic_shared_ptr) may sleep. Some numbers: run | time ------------------------|------ max_threads=100 before | 23.1 max_threads=100 after | 15.1 max_threads=4500 before | 4.5 max_threads=4500 after | 2.3 Query: select sum(number) from numbers_mt(2000000) settings max_threads=X, max_block_size = 1 Signed-off-by: Azat Khuzhin --- src/QueryPipeline/ReadProgressCallback.cpp | 2 -- src/QueryPipeline/ReadProgressCallback.h | 1 - tests/performance/small_block_contention.xml | 3 +++ 3 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 tests/performance/small_block_contention.xml diff --git a/src/QueryPipeline/ReadProgressCallback.cpp b/src/QueryPipeline/ReadProgressCallback.cpp index 59843d8791d..e90fc24d882 100644 --- a/src/QueryPipeline/ReadProgressCallback.cpp +++ b/src/QueryPipeline/ReadProgressCallback.cpp @@ -126,8 +126,6 @@ bool ReadProgressCallback::onProgress(uint64_t read_rows, uint64_t read_bytes, c CurrentThread::updatePerformanceCountersIfNeeded(); - std::lock_guard lock(limits_and_quotas_mutex); - /// TODO: Should be done in PipelineExecutor. for (const auto & limits : storage_limits) limits.local_limits.speed_limits.throttle(progress.read_rows, progress.read_bytes, total_rows, total_stopwatch.elapsedMicroseconds(), limits.local_limits.timeout_overflow_mode); diff --git a/src/QueryPipeline/ReadProgressCallback.h b/src/QueryPipeline/ReadProgressCallback.h index 5dbf3344bdf..7dfed9df5da 100644 --- a/src/QueryPipeline/ReadProgressCallback.h +++ b/src/QueryPipeline/ReadProgressCallback.h @@ -41,7 +41,6 @@ private: /// The total number of bytes to read. For progress bar. std::atomic_size_t total_bytes = 0; - std::mutex limits_and_quotas_mutex; Stopwatch total_stopwatch{CLOCK_MONOTONIC_COARSE}; /// Including waiting time bool update_profile_events = true; diff --git a/tests/performance/small_block_contention.xml b/tests/performance/small_block_contention.xml new file mode 100644 index 00000000000..ce1995a0a29 --- /dev/null +++ b/tests/performance/small_block_contention.xml @@ -0,0 +1,3 @@ + + select sum(number) from numbers_mt(200000) settings max_threads=100, max_block_size = 1 format Null + From 277e8d965555b4fcd09a755282666bcae36adae6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 26 Feb 2024 14:03:53 +0800 Subject: [PATCH 008/283] Fix usage plain metadata type with new configuration option --- src/Disks/DiskType.cpp | 48 +++++++++++++++++++ src/Disks/DiskType.h | 34 +------------ src/Disks/ObjectStorages/IObjectStorage.h | 1 + .../ObjectStorages/MetadataStorageFactory.cpp | 36 +++++++++++--- .../ObjectStorages/MetadataStorageFactory.h | 7 +++ .../ObjectStorages/ObjectStorageFactory.cpp | 43 +++++++++++++---- src/Disks/ObjectStorages/PlainObjectStorage.h | 29 +++++++++++ .../RegisterDiskObjectStorage.cpp | 24 ++-------- src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 21 -------- .../configs/disk_s3.xml | 7 +++ .../test_attach_backup_from_s3_plain/test.py | 25 ++++++---- 11 files changed, 178 insertions(+), 97 deletions(-) create mode 100644 src/Disks/ObjectStorages/PlainObjectStorage.h diff --git a/src/Disks/DiskType.cpp b/src/Disks/DiskType.cpp index 218b6ee7f26..1778ae8025b 100644 --- a/src/Disks/DiskType.cpp +++ b/src/Disks/DiskType.cpp @@ -1,7 +1,27 @@ #include "DiskType.h" +#include +#include namespace DB { +namespace ErrorCodes +{ + extern const int UNKNOWN_ELEMENT_IN_CONFIG; +} + +MetadataStorageType metadataTypeFromString(const String & type) +{ + auto check_type = Poco::toLower(type); + if (check_type == "local") + return MetadataStorageType::Local; + if (check_type == "plain") + return MetadataStorageType::Plain; + if (check_type == "web") + return MetadataStorageType::StaticWeb; + + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, + "MetadataStorageFactory: unknown metadata storage type: {}", type); +} bool DataSourceDescription::operator==(const DataSourceDescription & other) const { @@ -14,4 +34,32 @@ bool DataSourceDescription::sameKind(const DataSourceDescription & other) const == std::tie(other.type, other.object_storage_type, other.description); } +std::string DataSourceDescription::toString() const +{ + switch (type) + { + case DataSourceType::Local: + return "local"; + case DataSourceType::RAM: + return "memory"; + case DataSourceType::ObjectStorage: + { + switch (object_storage_type) + { + case ObjectStorageType::S3: + return "s3"; + case ObjectStorageType::HDFS: + return "hdfs"; + case ObjectStorageType::Azure: + return "azure_blob_storage"; + case ObjectStorageType::Local: + return "local_blob_storage"; + case ObjectStorageType::Web: + return "web"; + case ObjectStorageType::None: + return "none"; + } + } + } +} } diff --git a/src/Disks/DiskType.h b/src/Disks/DiskType.h index 15940ea9155..36fe4d83004 100644 --- a/src/Disks/DiskType.h +++ b/src/Disks/DiskType.h @@ -17,7 +17,6 @@ enum class ObjectStorageType { None, S3, - S3_Plain, Azure, HDFS, Web, @@ -30,9 +29,9 @@ enum class MetadataStorageType Local, Plain, StaticWeb, - Memory, }; +MetadataStorageType metadataTypeFromString(const String & type); String toString(DataSourceType data_source_type); struct DataSourceDescription @@ -49,36 +48,7 @@ struct DataSourceDescription bool operator==(const DataSourceDescription & other) const; bool sameKind(const DataSourceDescription & other) const; - std::string toString() const - { - switch (type) - { - case DataSourceType::Local: - return "local"; - case DataSourceType::RAM: - return "memory"; - case DataSourceType::ObjectStorage: - { - switch (object_storage_type) - { - case ObjectStorageType::S3: - return "s3"; - case ObjectStorageType::S3_Plain: - return "s3_plain"; - case ObjectStorageType::HDFS: - return "hdfs"; - case ObjectStorageType::Azure: - return "azure_blob_storage"; - case ObjectStorageType::Local: - return "local_blob_storage"; - case ObjectStorageType::Web: - return "web"; - case ObjectStorageType::None: - return "none"; - } - } - } - } + std::string toString() const; }; } diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 56c269a3fc5..fde97d82ad1 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -218,6 +218,7 @@ public: virtual bool isReadOnly() const { return false; } virtual bool isWriteOnce() const { return false; } + virtual bool isPlain() const { return false; } virtual bool supportParallelWrite() const { return false; } diff --git a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp index 52a0b9ec268..adc1f84372c 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFactory.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFactory.cpp @@ -32,6 +32,35 @@ void MetadataStorageFactory::registerMetadataStorageType(const std::string & met } } +std::string MetadataStorageFactory::getCompatibilityMetadataTypeHint(const ObjectStorageType & type) +{ + switch (type) + { + case ObjectStorageType::S3: + case ObjectStorageType::HDFS: + case ObjectStorageType::Local: + case ObjectStorageType::Azure: + return "local"; + case ObjectStorageType::Web: + return "web"; + default: + return ""; + } +} + +std::string MetadataStorageFactory::getMetadataType( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const std::string & compatibility_type_hint) +{ + if (compatibility_type_hint.empty() && !config.has(config_prefix + ".metadata_type")) + { + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Expected `metadata_type` in config"); + } + + return config.getString(config_prefix + ".metadata_type", compatibility_type_hint); +} + MetadataStoragePtr MetadataStorageFactory::create( const std::string & name, const Poco::Util::AbstractConfiguration & config, @@ -39,12 +68,7 @@ MetadataStoragePtr MetadataStorageFactory::create( ObjectStoragePtr object_storage, const std::string & compatibility_type_hint) const { - if (compatibility_type_hint.empty() && !config.has(config_prefix + ".metadata_type")) - { - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Expected `metadata_type` in config"); - } - - const auto type = config.getString(config_prefix + ".metadata_type", compatibility_type_hint); + const auto type = getMetadataType(config, config_prefix, compatibility_type_hint); const auto it = registry.find(type); if (it == registry.end()) diff --git a/src/Disks/ObjectStorages/MetadataStorageFactory.h b/src/Disks/ObjectStorages/MetadataStorageFactory.h index 5f61125c599..467cd3cef98 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFactory.h +++ b/src/Disks/ObjectStorages/MetadataStorageFactory.h @@ -25,6 +25,13 @@ public: ObjectStoragePtr object_storage, const std::string & compatibility_type_hint) const; + static std::string getMetadataType( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + const std::string & compatibility_type_hint = ""); + + static std::string getCompatibilityMetadataTypeHint(const ObjectStorageType & type); + private: using Registry = std::unordered_map; Registry registry; diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index b3626135177..6f6ff199902 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -16,8 +16,10 @@ #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include #include +#include #include #endif +#include #include #include @@ -32,6 +34,28 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +namespace +{ + template + ObjectStoragePtr createObjectStorage( + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix, + Args && ...args) + { + auto compatibility_hint = MetadataStorageFactory::getCompatibilityMetadataTypeHint(ObjectStorageType::S3); + auto metadata_type = MetadataStorageFactory::getMetadataType(config, config_prefix, compatibility_hint); + + if (metadataTypeFromString(metadata_type) == MetadataStorageType::Plain) + { + return std::make_shared>(std::forward(args)...); + } + else + { + return std::make_shared(std::forward(args)...); + } + } +} + ObjectStorageFactory & ObjectStorageFactory::instance() { static ObjectStorageFactory factory; @@ -129,12 +153,12 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory) auto client = getClient(config, config_prefix, context, *settings); auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix); - auto object_storage = std::make_shared( - std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); + auto object_storage = createObjectStorage( + config, config_prefix, std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); /// NOTE: should we still perform this check for clickhouse-disks? if (!skip_access_check) - checkS3Capabilities(*object_storage, s3_capabilities, name, uri.key); + checkS3Capabilities(*dynamic_cast(object_storage.get()), s3_capabilities, name, uri.key); return object_storage; }); @@ -165,12 +189,12 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory) auto client = getClient(config, config_prefix, context, *settings); auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix); - auto object_storage = std::make_shared( + auto object_storage = std::make_shared>( std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); /// NOTE: should we still perform this check for clickhouse-disks? if (!skip_access_check) - checkS3Capabilities(*object_storage, s3_capabilities, name, uri.key); + checkS3Capabilities(*dynamic_cast(object_storage.get()), s3_capabilities, name, uri.key); return object_storage; }); @@ -198,7 +222,7 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory) context->getSettingsRef().hdfs_replication ); - return std::make_unique(uri, std::move(settings), config); + return createObjectStorage(config, config_prefix, uri, std::move(settings), config); }); } #endif @@ -214,7 +238,8 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) bool /* skip_access_check */) -> ObjectStoragePtr { String container_name = config.getString(config_prefix + ".container_name", "default-container"); - return std::make_unique( + return createObjectStorage( + config, config_prefix, name, getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context), @@ -248,7 +273,7 @@ void registerWebObjectStorage(ObjectStorageFactory & factory) ErrorCodes::BAD_ARGUMENTS, "Bad URI: `{}`. Error: {}", uri, e.what()); } - return std::make_shared(uri, context); + return createObjectStorage(config, config_prefix, uri, context); }); } @@ -266,7 +291,7 @@ void registerLocalObjectStorage(ObjectStorageFactory & factory) loadDiskLocalConfig(name, config, config_prefix, context, object_key_prefix, keep_free_space_bytes); /// keys are mapped to the fs, object_key_prefix is a directory also fs::create_directories(object_key_prefix); - return std::make_shared(object_key_prefix); + return createObjectStorage(config, config_prefix, object_key_prefix); }); } #endif diff --git a/src/Disks/ObjectStorages/PlainObjectStorage.h b/src/Disks/ObjectStorages/PlainObjectStorage.h new file mode 100644 index 00000000000..3a81b85c44b --- /dev/null +++ b/src/Disks/ObjectStorages/PlainObjectStorage.h @@ -0,0 +1,29 @@ +#pragma once +#include + +namespace DB +{ + +/// Do not encode keys, store as-is, and do not require separate disk for metadata. +/// But because of this does not support renames/hardlinks/attrs/... +/// +/// NOTE: This disk has excessive API calls. +template +class PlainObjectStorage : public BaseObjectStorage +{ +public: + template + explicit PlainObjectStorage(Args && ...args) + : BaseObjectStorage(std::forward(args)...) {} + + std::string getName() const override { return "" + BaseObjectStorage::getName(); } + + /// Notes: + /// - supports BACKUP to this disk + /// - does not support INSERT into MergeTree table on this disk + bool isWriteOnce() const override { return true; } + + bool isPlain() const override { return true; } +}; + +} diff --git a/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp b/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp index 383a0b079b5..669a0102951 100644 --- a/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp +++ b/src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp @@ -10,25 +10,6 @@ namespace DB void registerObjectStorages(); void registerMetadataStorages(); -static std::string getCompatibilityMetadataTypeHint(const ObjectStorageType & type) -{ - switch (type) - { - case ObjectStorageType::S3: - case ObjectStorageType::HDFS: - case ObjectStorageType::Local: - case ObjectStorageType::Azure: - return "local"; - case ObjectStorageType::S3_Plain: - return "plain"; - case ObjectStorageType::Web: - return "web"; - case ObjectStorageType::None: - return ""; - } - UNREACHABLE(); -} - void registerDiskObjectStorage(DiskFactory & factory, bool global_skip_access_check) { registerObjectStorages(); @@ -47,7 +28,10 @@ void registerDiskObjectStorage(DiskFactory & factory, bool global_skip_access_ch std::string compatibility_metadata_type_hint; if (!config.has(config_prefix + ".metadata_type")) { - compatibility_metadata_type_hint = getCompatibilityMetadataTypeHint(object_storage->getType()); + if (object_storage->isPlain()) + compatibility_metadata_type_hint = "plain"; + else + compatibility_metadata_type_hint = MetadataStorageFactory::getCompatibilityMetadataTypeHint(object_storage->getType()); } auto metadata_storage = MetadataStorageFactory::instance().create( diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index ab0fa5bed68..4ece98c5ec4 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -182,27 +182,6 @@ private: LoggerPtr log; }; -/// Do not encode keys, store as-is, and do not require separate disk for metadata. -/// But because of this does not support renames/hardlinks/attrs/... -/// -/// NOTE: This disk has excessive API calls. -class S3PlainObjectStorage : public S3ObjectStorage -{ -public: - std::string getName() const override { return "S3PlainObjectStorage"; } - - template - explicit S3PlainObjectStorage(Args && ...args) - : S3ObjectStorage("S3PlainObjectStorage", std::forward(args)...) {} - - ObjectStorageType getType() const override { return ObjectStorageType::S3_Plain; } - - /// Notes: - /// - supports BACKUP to this disk - /// - does not support INSERT into MergeTree table on this disk - bool isWriteOnce() const override { return true; } -}; - } #endif diff --git a/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml b/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml index 779e4b6ae21..3166eea7ccb 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml +++ b/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml @@ -8,9 +8,16 @@ minio minio123 + + object_storage + local + plain + local_plain/ +
backup_disk_s3_plain + backup_disk_local_plain diff --git a/tests/integration/test_attach_backup_from_s3_plain/test.py b/tests/integration/test_attach_backup_from_s3_plain/test.py index e575c487b7a..4a8da1e6d66 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/test.py +++ b/tests/integration/test_attach_backup_from_s3_plain/test.py @@ -20,17 +20,27 @@ def start_cluster(): finally: cluster.shutdown() +s3_disk_def = """disk(type=s3_plain, + endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{backup_name}/', + access_key_id='minio', + secret_access_key='minio123');""" + +local_disk_def = "disk(type=object_storage, object_storage_type = 'local', metadata_type = 'plain'" @pytest.mark.parametrize( - "table_name,backup_name,storage_policy,min_bytes_for_wide_part", + "table_name,backup_name,storage_policy,disk_def,min_bytes_for_wide_part", [ pytest.param( - "compact", "backup_compact", "s3_backup_compact", int(1e9), id="compact" + "compact", "backup_compact_s3", "backup_disk_s3_plain", s3_disk_def, int(1e9), id="compact" ), - pytest.param("wide", "backup_wide", "s3_backup_wide", int(0), id="wide"), + pytest.param("wide", "backup_wide_s3", "backup_disk_s3_plain", s3_disk_def, int(0), id="wide"), + pytest.param( + "compact", "backup_compact_local", "backup_disk_local_plain", local_disk_def, int(1e9), id="compact" + ), + pytest.param("wide", "backup_wide_local", "backup_disk_local_plain", local_disk_def, int(0), id="wide"), ], ) -def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide_part): +def test_attach_part(table_name, backup_name, storage_policy, disk_def, min_bytes_for_wide_part): node.query( f""" -- Catch any errors (NOTE: warnings are ok) @@ -45,7 +55,7 @@ def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide settings min_bytes_for_wide_part={min_bytes_for_wide_part} as select number%5 part, number key from numbers(100); - backup table ordinary_db.{table_name} TO Disk('backup_disk_s3_plain', '{backup_name}') settings deduplicate_files=0; + backup table ordinary_db.{table_name} TO Disk('{storage_policy}', '{backup_name}') settings deduplicate_files=0; drop table ordinary_db.{table_name}; attach table ordinary_db.{table_name} (part UInt8, key UInt64) @@ -53,10 +63,7 @@ def test_attach_part(table_name, backup_name, storage_policy, min_bytes_for_wide order by key partition by part settings max_suspicious_broken_parts=0, - disk=disk(type=s3_plain, - endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{backup_name}/', - access_key_id='minio', - secret_access_key='minio123'); + disk={disk_def} """ ) From 69b5bd02a915ae044b4116de759d11ae80525dc5 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 26 Feb 2024 09:37:17 +0000 Subject: [PATCH 009/283] Automatic style fix --- .../test_attach_backup_from_s3_plain/test.py | 42 ++++++++++++++++--- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_attach_backup_from_s3_plain/test.py b/tests/integration/test_attach_backup_from_s3_plain/test.py index 4a8da1e6d66..900366b2c9c 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/test.py +++ b/tests/integration/test_attach_backup_from_s3_plain/test.py @@ -20,27 +20,57 @@ def start_cluster(): finally: cluster.shutdown() + s3_disk_def = """disk(type=s3_plain, endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{backup_name}/', access_key_id='minio', secret_access_key='minio123');""" -local_disk_def = "disk(type=object_storage, object_storage_type = 'local', metadata_type = 'plain'" +local_disk_def = ( + "disk(type=object_storage, object_storage_type = 'local', metadata_type = 'plain'" +) + @pytest.mark.parametrize( "table_name,backup_name,storage_policy,disk_def,min_bytes_for_wide_part", [ pytest.param( - "compact", "backup_compact_s3", "backup_disk_s3_plain", s3_disk_def, int(1e9), id="compact" + "compact", + "backup_compact_s3", + "backup_disk_s3_plain", + s3_disk_def, + int(1e9), + id="compact", ), - pytest.param("wide", "backup_wide_s3", "backup_disk_s3_plain", s3_disk_def, int(0), id="wide"), pytest.param( - "compact", "backup_compact_local", "backup_disk_local_plain", local_disk_def, int(1e9), id="compact" + "wide", + "backup_wide_s3", + "backup_disk_s3_plain", + s3_disk_def, + int(0), + id="wide", + ), + pytest.param( + "compact", + "backup_compact_local", + "backup_disk_local_plain", + local_disk_def, + int(1e9), + id="compact", + ), + pytest.param( + "wide", + "backup_wide_local", + "backup_disk_local_plain", + local_disk_def, + int(0), + id="wide", ), - pytest.param("wide", "backup_wide_local", "backup_disk_local_plain", local_disk_def, int(0), id="wide"), ], ) -def test_attach_part(table_name, backup_name, storage_policy, disk_def, min_bytes_for_wide_part): +def test_attach_part( + table_name, backup_name, storage_policy, disk_def, min_bytes_for_wide_part +): node.query( f""" -- Catch any errors (NOTE: warnings are ok) From f53f43b78d3cf2da6219ea4bdea7018d9811ae54 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Feb 2024 17:33:47 +0800 Subject: [PATCH 010/283] Fixes for LocalObjectStorage and plain metadata --- .../Local/LocalObjectStorage.cpp | 37 +++++++++++++++++-- .../ObjectStorages/Local/LocalObjectStorage.h | 4 ++ .../MetadataStorageFromPlainObjectStorage.cpp | 5 +-- .../ObjectStorages/ObjectStorageFactory.cpp | 31 ++++++++++------ src/Disks/ObjectStorages/PlainObjectStorage.h | 6 +++ src/Disks/ObjectStorages/S3/DiskS3Utils.cpp | 6 --- src/Disks/ObjectStorages/S3/DiskS3Utils.h | 1 - .../ObjectStorages/S3/S3ObjectStorage.cpp | 2 + .../configs/disk_s3.xml | 4 +- .../test_attach_backup_from_s3_plain/test.py | 7 ++-- 10 files changed, 71 insertions(+), 32 deletions(-) diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index 02700b358e0..51c260cc270 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -31,6 +31,8 @@ LocalObjectStorage::LocalObjectStorage(String key_prefix_) description = *block_device_id; else description = "/"; + + fs::create_directories(getCommonKeyPrefix()); } bool LocalObjectStorage::exists(const StoredObject & object) const @@ -53,6 +55,7 @@ std::unique_ptr LocalObjectStorage::readObjects( /// NOL return createReadBufferFromFileBase(file_path, modified_settings, read_hint, file_size); }; + LOG_TEST(log, "Read object: {}", objects[0].remote_path); switch (read_settings.remote_fs_method) { case RemoteFSReadMethod::read: @@ -111,8 +114,8 @@ std::unique_ptr LocalObjectStorage::readObject( /// NOLI if (!file_size) file_size = tryGetSizeFromFilePath(path); - LOG_TEST(log, "Read object: {}", path); - return createReadBufferFromFileBase(path, patchSettings(read_settings), read_hint, file_size); + LOG_TEST(log, "Read object: {}", object.remote_path); + return createReadBufferFromFileBase(object.remote_path, patchSettings(read_settings), read_hint, file_size); } std::unique_ptr LocalObjectStorage::writeObject( /// NOLINT @@ -126,6 +129,7 @@ std::unique_ptr LocalObjectStorage::writeObject( /// NO throw Exception(ErrorCodes::BAD_ARGUMENTS, "LocalObjectStorage doesn't support append to files"); LOG_TEST(log, "Write object: {}", object.remote_path); + fs::create_directories(fs::path(object.remote_path).parent_path()); return std::make_unique(object.remote_path, buf_size); } @@ -157,9 +161,34 @@ void LocalObjectStorage::removeObjectsIfExist(const StoredObjects & objects) removeObjectIfExists(object); } -ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & /* path */) const +ObjectMetadata LocalObjectStorage::getObjectMetadata(const std::string & path) const { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Metadata is not supported for LocalObjectStorage"); + ObjectMetadata object_metadata; + LOG_TEST(log, "Getting metadata for path: {}", path); + object_metadata.size_bytes = fs::file_size(path); + object_metadata.last_modified = Poco::Timestamp::fromEpochTime( + std::chrono::duration_cast(fs::last_write_time(path).time_since_epoch()).count()); + return object_metadata; +} + +void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWithMetadata & children, int /* max_keys */) const +{ + for (const auto & entry : fs::directory_iterator(path)) + { + if (entry.is_directory()) + { + listObjects(entry.path(), children, 0); + continue; + } + + auto metadata = getObjectMetadata(entry.path()); + children.emplace_back(entry.path(), std::move(metadata)); + } +} + +bool LocalObjectStorage::existsOrHasAnyChild(const std::string & path) const +{ + return exists(StoredObject(path)); } void LocalObjectStorage::copyObject( // NOLINT diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h index ed5f8c1f537..22429a99c76 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h @@ -58,6 +58,10 @@ public: ObjectMetadata getObjectMetadata(const std::string & path) const override; + void listObjects(const std::string & path, RelativePathsWithMetadata & children, int max_keys) const override; + + bool existsOrHasAnyChild(const std::string & path) const override; + void copyObject( /// NOLINT const StoredObject & object_from, const StoredObject & object_to, diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index b03809f5b39..4b8fc74e956 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -48,10 +48,7 @@ bool MetadataStorageFromPlainObjectStorage::isDirectory(const std::string & path std::string directory = object_key.serialize(); if (!directory.ends_with('/')) directory += '/'; - - RelativePathsWithMetadata files; - object_storage->listObjects(directory, files, 1); - return !files.empty(); + return object_storage->existsOrHasAnyChild(directory); } uint64_t MetadataStorageFromPlainObjectStorage::getFileSize(const String & path) const diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 6f6ff199902..f64c42c1403 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -36,16 +36,24 @@ namespace ErrorCodes namespace { + bool isPlainStorage( + ObjectStorageType type, + const Poco::Util::AbstractConfiguration & config, + const std::string & config_prefix) + { + auto compatibility_hint = MetadataStorageFactory::getCompatibilityMetadataTypeHint(type); + auto metadata_type = MetadataStorageFactory::getMetadataType(config, config_prefix, compatibility_hint); + return metadataTypeFromString(metadata_type) == MetadataStorageType::Plain; + } + template ObjectStoragePtr createObjectStorage( + ObjectStorageType type, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, Args && ...args) { - auto compatibility_hint = MetadataStorageFactory::getCompatibilityMetadataTypeHint(ObjectStorageType::S3); - auto metadata_type = MetadataStorageFactory::getMetadataType(config, config_prefix, compatibility_hint); - - if (metadataTypeFromString(metadata_type) == MetadataStorageType::Plain) + if (isPlainStorage(type, config, config_prefix)) { return std::make_shared>(std::forward(args)...); } @@ -151,10 +159,10 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory) auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); auto settings = getSettings(config, config_prefix, context); auto client = getClient(config, config_prefix, context, *settings); - auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix); + auto key_generator = getKeyGenerator(uri, config, config_prefix); auto object_storage = createObjectStorage( - config, config_prefix, std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); + ObjectStorageType::S3, config, config_prefix, std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); /// NOTE: should we still perform this check for clickhouse-disks? if (!skip_access_check) @@ -187,7 +195,7 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory) auto s3_capabilities = getCapabilitiesFromConfig(config, config_prefix); auto settings = getSettings(config, config_prefix, context); auto client = getClient(config, config_prefix, context, *settings); - auto key_generator = getKeyGenerator(disk_type, uri, config, config_prefix); + auto key_generator = getKeyGenerator(uri, config, config_prefix); auto object_storage = std::make_shared>( std::move(client), std::move(settings), uri, s3_capabilities, key_generator, name); @@ -222,7 +230,7 @@ void registerHDFSObjectStorage(ObjectStorageFactory & factory) context->getSettingsRef().hdfs_replication ); - return createObjectStorage(config, config_prefix, uri, std::move(settings), config); + return createObjectStorage(ObjectStorageType::HDFS, config, config_prefix, uri, std::move(settings), config); }); } #endif @@ -239,8 +247,7 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) { String container_name = config.getString(config_prefix + ".container_name", "default-container"); return createObjectStorage( - config, config_prefix, - name, + ObjectStorageType::Azure, config, config_prefix, name, getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context), container_name); @@ -273,7 +280,7 @@ void registerWebObjectStorage(ObjectStorageFactory & factory) ErrorCodes::BAD_ARGUMENTS, "Bad URI: `{}`. Error: {}", uri, e.what()); } - return createObjectStorage(config, config_prefix, uri, context); + return createObjectStorage(ObjectStorageType::Web, config, config_prefix, uri, context); }); } @@ -291,7 +298,7 @@ void registerLocalObjectStorage(ObjectStorageFactory & factory) loadDiskLocalConfig(name, config, config_prefix, context, object_key_prefix, keep_free_space_bytes); /// keys are mapped to the fs, object_key_prefix is a directory also fs::create_directories(object_key_prefix); - return createObjectStorage(config, config_prefix, object_key_prefix); + return createObjectStorage(ObjectStorageType::Local, config, config_prefix, object_key_prefix); }); } #endif diff --git a/src/Disks/ObjectStorages/PlainObjectStorage.h b/src/Disks/ObjectStorages/PlainObjectStorage.h index 3a81b85c44b..e0907d0b4d8 100644 --- a/src/Disks/ObjectStorages/PlainObjectStorage.h +++ b/src/Disks/ObjectStorages/PlainObjectStorage.h @@ -1,5 +1,6 @@ #pragma once #include +#include namespace DB { @@ -24,6 +25,11 @@ public: bool isWriteOnce() const override { return true; } bool isPlain() const override { return true; } + + ObjectStorageKey generateObjectKeyForPath(const std::string & path) const override + { + return ObjectStorageKey::createAsRelative(BaseObjectStorage::getCommonKeyPrefix(), path); + } }; } diff --git a/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp b/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp index bb7b53b2d22..4b889f89f90 100644 --- a/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp +++ b/src/Disks/ObjectStorages/S3/DiskS3Utils.cpp @@ -15,16 +15,10 @@ namespace ErrorCodes } ObjectStorageKeysGeneratorPtr getKeyGenerator( - String type, const S3::URI & uri, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { - if (type == "s3_plain") - return createObjectStorageKeysGeneratorAsIsWithPrefix(uri.key); - - chassert(type == "s3"); - bool storage_metadata_write_full_object_key = DiskObjectStorageMetadata::getWriteFullObjectKeySetting(); bool send_metadata = config.getBool(config_prefix + ".send_metadata", false); diff --git a/src/Disks/ObjectStorages/S3/DiskS3Utils.h b/src/Disks/ObjectStorages/S3/DiskS3Utils.h index 29e39d4bc1b..8524a9ccac3 100644 --- a/src/Disks/ObjectStorages/S3/DiskS3Utils.h +++ b/src/Disks/ObjectStorages/S3/DiskS3Utils.h @@ -12,7 +12,6 @@ namespace DB namespace S3 { struct URI; } ObjectStorageKeysGeneratorPtr getKeyGenerator( - String type, const S3::URI & uri, const Poco::Util::AbstractConfiguration & config, const String & config_prefix); diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 5771eb1ebe0..b2a9ab8fdc3 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -561,6 +561,8 @@ std::unique_ptr S3ObjectStorage::cloneObjectStorage( ObjectStorageKey S3ObjectStorage::generateObjectKeyForPath(const std::string & path) const { + if (!key_generator) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Key generator is not set"); return key_generator->generate(path); } diff --git a/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml b/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml index 3166eea7ccb..2edabc76c8b 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml +++ b/tests/integration/test_attach_backup_from_s3_plain/configs/disk_s3.xml @@ -10,9 +10,9 @@ object_storage - local + local_blob_storage plain - local_plain/ + /local_plain/ diff --git a/tests/integration/test_attach_backup_from_s3_plain/test.py b/tests/integration/test_attach_backup_from_s3_plain/test.py index 4a8da1e6d66..983275cc24f 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/test.py +++ b/tests/integration/test_attach_backup_from_s3_plain/test.py @@ -21,11 +21,11 @@ def start_cluster(): cluster.shutdown() s3_disk_def = """disk(type=s3_plain, - endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{backup_name}/', + endpoint='http://minio1:9001/root/data/disks/disk_s3_plain/{}/', access_key_id='minio', secret_access_key='minio123');""" -local_disk_def = "disk(type=object_storage, object_storage_type = 'local', metadata_type = 'plain'" +local_disk_def = "disk(type=object_storage, object_storage_type = 'local_blob_storage', metadata_type = 'plain', path = '/local_plain/{}/')" @pytest.mark.parametrize( "table_name,backup_name,storage_policy,disk_def,min_bytes_for_wide_part", @@ -41,6 +41,7 @@ local_disk_def = "disk(type=object_storage, object_storage_type = 'local', metad ], ) def test_attach_part(table_name, backup_name, storage_policy, disk_def, min_bytes_for_wide_part): + disk_definition = disk_def.format(backup_name) node.query( f""" -- Catch any errors (NOTE: warnings are ok) @@ -63,7 +64,7 @@ def test_attach_part(table_name, backup_name, storage_policy, disk_def, min_byte order by key partition by part settings max_suspicious_broken_parts=0, - disk={disk_def} + disk={disk_definition} """ ) From fb38bd139c433ead685028f232e8c4fad5e566d2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Feb 2024 17:38:02 +0800 Subject: [PATCH 011/283] Remove debug logging --- src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index 51c260cc270..4ec998a2bb0 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -55,7 +55,6 @@ std::unique_ptr LocalObjectStorage::readObjects( /// NOL return createReadBufferFromFileBase(file_path, modified_settings, read_hint, file_size); }; - LOG_TEST(log, "Read object: {}", objects[0].remote_path); switch (read_settings.remote_fs_method) { case RemoteFSReadMethod::read: @@ -109,10 +108,8 @@ std::unique_ptr LocalObjectStorage::readObject( /// NOLI std::optional read_hint, std::optional file_size) const { - const auto & path = object.remote_path; - if (!file_size) - file_size = tryGetSizeFromFilePath(path); + file_size = tryGetSizeFromFilePath(object.remote_path); LOG_TEST(log, "Read object: {}", object.remote_path); return createReadBufferFromFileBase(object.remote_path, patchSettings(read_settings), read_hint, file_size); From 978fe9fa1a069a231bb52c66b3898c6ce112a215 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Feb 2024 17:43:34 +0800 Subject: [PATCH 012/283] Add comments --- src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index 4ec998a2bb0..7f34ca48f7f 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -126,7 +126,11 @@ std::unique_ptr LocalObjectStorage::writeObject( /// NO throw Exception(ErrorCodes::BAD_ARGUMENTS, "LocalObjectStorage doesn't support append to files"); LOG_TEST(log, "Write object: {}", object.remote_path); + + /// Unlike real blob storage, in local fs we cannot create a file with non-existing prefix. + /// So let's create it. fs::create_directories(fs::path(object.remote_path).parent_path()); + return std::make_unique(object.remote_path, buf_size); } @@ -185,6 +189,8 @@ void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWith bool LocalObjectStorage::existsOrHasAnyChild(const std::string & path) const { + /// Unlike real object storage, existance of a prefix path can be checked by + /// just checking existence of this prefix directly, so simple exists is enough here. return exists(StoredObject(path)); } From 33788250b1f74384661cd241e2badef82c8fdbf6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Feb 2024 18:07:19 +0800 Subject: [PATCH 013/283] Update test.py --- tests/integration/test_attach_backup_from_s3_plain/test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_attach_backup_from_s3_plain/test.py b/tests/integration/test_attach_backup_from_s3_plain/test.py index 3a0fa70a715..c2f8936b82c 100644 --- a/tests/integration/test_attach_backup_from_s3_plain/test.py +++ b/tests/integration/test_attach_backup_from_s3_plain/test.py @@ -26,9 +26,8 @@ s3_disk_def = """disk(type=s3_plain, access_key_id='minio', secret_access_key='minio123');""" -local_disk_def = ( - "disk(type=object_storage, object_storage_type = 'local', metadata_type = 'plain', path = '/local_plain/{}/'" -) +local_disk_def = "disk(type=object_storage, object_storage_type = 'local_blob_storage', metadata_type = 'plain', path = '/local_plain/{}/');" + @pytest.mark.parametrize( "table_name,backup_name,storage_policy,disk_def,min_bytes_for_wide_part", @@ -67,7 +66,6 @@ local_disk_def = ( ), ], ) - def test_attach_part( table_name, backup_name, storage_policy, disk_def, min_bytes_for_wide_part ): From 98b27fd45fbe1109442c2313181ca4e8435e2024 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 27 Feb 2024 23:00:27 +0800 Subject: [PATCH 014/283] Fix style check --- src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp | 2 +- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index 7f34ca48f7f..eba57969580 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -189,7 +189,7 @@ void LocalObjectStorage::listObjects(const std::string & path, RelativePathsWith bool LocalObjectStorage::existsOrHasAnyChild(const std::string & path) const { - /// Unlike real object storage, existance of a prefix path can be checked by + /// Unlike real object storage, existence of a prefix path can be checked by /// just checking existence of this prefix directly, so simple exists is enough here. return exists(StoredObject(path)); } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index b2a9ab8fdc3..eec3a5914fc 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -48,6 +48,7 @@ namespace ErrorCodes { extern const int S3_ERROR; extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } namespace From 416638461fe832673252445d8fabb3fe554eed49 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 27 Feb 2024 15:02:13 +0000 Subject: [PATCH 015/283] Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike --- src/Functions/array/FunctionArrayMapped.h | 4 ++-- .../03002_map_array_functions_with_low_cardinality.reference | 1 + .../03002_map_array_functions_with_low_cardinality.sql | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.reference create mode 100644 tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h index 49ed9d495e2..136d3481771 100644 --- a/src/Functions/array/FunctionArrayMapped.h +++ b/src/Functions/array/FunctionArrayMapped.h @@ -355,7 +355,7 @@ public: { arrays.emplace_back( column_tuple->getColumnPtr(j), - recursiveRemoveLowCardinality(type_tuple.getElement(j)), + type_tuple.getElement(j), array_with_type_and_name.name + "." + tuple_names[j]); } } @@ -363,7 +363,7 @@ public: { arrays.emplace_back( column_array->getDataPtr(), - recursiveRemoveLowCardinality(array_type->getNestedType()), + array_type->getNestedType(), array_with_type_and_name.name); } diff --git a/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.reference b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql new file mode 100644 index 00000000000..8240a8f93f5 --- /dev/null +++ b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql @@ -0,0 +1,2 @@ +SELECT mapContainsKeyLike(map('aa', toLowCardinality(1), 'bb', toLowCardinality(2)), toLowCardinality('a%')); + From cb8390e9c8672bcdead0108be75021d6c6f21331 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 28 Feb 2024 13:32:43 +0800 Subject: [PATCH 016/283] Fix build --- src/Disks/ObjectStorages/ObjectStorageFactory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index f64c42c1403..d0c2c9ac4f4 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -16,10 +16,10 @@ #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD #include #include -#include #include #endif #include +#include #include #include From d2ea882bd8105f5d2e173a6670bf23b2917b3190 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 28 Feb 2024 21:26:19 +0000 Subject: [PATCH 017/283] Fix deadlock in parallel parsing when lots of rows are skipped due to errors --- .../Formats/Impl/ParallelParsingInputFormat.cpp | 4 +++- .../03001_parallel_parsing_deadlock.reference | 0 .../0_stateless/03001_parallel_parsing_deadlock.sh | 12 ++++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03001_parallel_parsing_deadlock.reference create mode 100755 tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp index 8b6969bbfcc..447adb1ed48 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp @@ -224,7 +224,9 @@ Chunk ParallelParsingInputFormat::read() /// skipped all rows. For example, it can happen while using settings /// input_format_allow_errors_num/input_format_allow_errors_ratio /// and this segment contained only rows with errors. - /// Process the next unit. + /// Return this empty unit back to segmentator and process the next unit. + unit->status = READY_TO_INSERT; + segmentator_condvar.notify_all(); ++reader_ticket_number; unit = &processing_units[reader_ticket_number % processing_units.size()]; } diff --git a/tests/queries/0_stateless/03001_parallel_parsing_deadlock.reference b/tests/queries/0_stateless/03001_parallel_parsing_deadlock.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh b/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh new file mode 100755 index 00000000000..1bf21dfc53b --- /dev/null +++ b/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-cpu-aarch64 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.csv +$CLICKHOUSE_LOCAL -q "select number > 1000000 ? 'error' : toString(number) from numbers(2000000) format CSV" > $DATA_FILE +$CLICKHOUSE_LOCAL -q "select * from file($DATA_FILE, CSV, 'x UInt64') format Null settings input_format_allow_errors_ratio=1" +rm $DATA_FILE + From 6fbd298b3d7cc06b1f11727263a25bc613f7c295 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 29 Feb 2024 05:03:09 +0300 Subject: [PATCH 018/283] Revert "Revert "Use `MergeTree` as a default table engine"" --- src/Core/Settings.h | 2 +- src/Core/SettingsChangesHistory.h | 1 + tests/queries/0_stateless/02184_default_table_engine.sql | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index ae6ea165cc9..5f52396d3bb 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -566,7 +566,7 @@ class IColumn; M(UInt64, min_free_disk_space_for_temporary_data, 0, "The minimum disk space to keep while writing temporary data used in external sorting and aggregation.", 0) \ \ M(DefaultTableEngine, default_temporary_table_engine, DefaultTableEngine::Memory, "Default table engine used when ENGINE is not set in CREATE TEMPORARY statement.",0) \ - M(DefaultTableEngine, default_table_engine, DefaultTableEngine::None, "Default table engine used when ENGINE is not set in CREATE statement.",0) \ + M(DefaultTableEngine, default_table_engine, DefaultTableEngine::MergeTree, "Default table engine used when ENGINE is not set in CREATE statement.",0) \ M(Bool, show_table_uuid_in_table_create_query_if_not_nil, false, "For tables in databases with Engine=Atomic show UUID of the table in its CREATE query.", 0) \ M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \ M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index e8d013d13ec..661e7cb80da 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -133,6 +133,7 @@ static std::map sett {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, + {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"}, {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, diff --git a/tests/queries/0_stateless/02184_default_table_engine.sql b/tests/queries/0_stateless/02184_default_table_engine.sql index a984ec1b6c9..aff30eeea98 100644 --- a/tests/queries/0_stateless/02184_default_table_engine.sql +++ b/tests/queries/0_stateless/02184_default_table_engine.sql @@ -1,3 +1,5 @@ +SET default_table_engine = 'None'; + CREATE TABLE table_02184 (x UInt8); --{serverError 119} SET default_table_engine = 'Log'; CREATE TABLE table_02184 (x UInt8); From 0d4648b535a61561d122c87cf181434215753b35 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 29 Feb 2024 10:30:17 +0800 Subject: [PATCH 019/283] Fix clang-tidy --- src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index eba57969580..c0b45e1d46a 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -32,7 +32,7 @@ LocalObjectStorage::LocalObjectStorage(String key_prefix_) else description = "/"; - fs::create_directories(getCommonKeyPrefix()); + fs::create_directories(key_prefix); } bool LocalObjectStorage::exists(const StoredObject & object) const From f8561b2265b924c64c60bdbc5305785c0f0b6f2e Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Thu, 29 Feb 2024 13:53:27 +0100 Subject: [PATCH 020/283] Revert "Revert "Support resource request canceling"" --- docs/en/operations/system-tables/scheduler.md | 4 + src/Common/Scheduler/ISchedulerNode.h | 2 + src/Common/Scheduler/ISchedulerQueue.h | 6 ++ src/Common/Scheduler/Nodes/FairPolicy.h | 99 ++++++++++--------- src/Common/Scheduler/Nodes/FifoQueue.h | 31 ++++-- src/Common/Scheduler/Nodes/PriorityPolicy.h | 38 ++++--- .../tests/gtest_dynamic_resource_manager.cpp | 1 - .../Nodes/tests/gtest_resource_scheduler.cpp | 63 ++++++++++++ src/Common/Scheduler/ResourceGuard.h | 9 +- src/Common/Scheduler/ResourceRequest.cpp | 13 +++ src/Common/Scheduler/ResourceRequest.h | 30 +++--- src/Common/Scheduler/SchedulerRoot.h | 32 +++--- .../System/StorageSystemScheduler.cpp | 4 + 13 files changed, 224 insertions(+), 108 deletions(-) create mode 100644 src/Common/Scheduler/ResourceRequest.cpp diff --git a/docs/en/operations/system-tables/scheduler.md b/docs/en/operations/system-tables/scheduler.md index 953db4c28f2..c4de7f76fdc 100644 --- a/docs/en/operations/system-tables/scheduler.md +++ b/docs/en/operations/system-tables/scheduler.md @@ -26,7 +26,9 @@ priority: 0 is_active: 0 active_children: 0 dequeued_requests: 67 +canceled_requests: 0 dequeued_cost: 4692272 +canceled_cost: 0 busy_periods: 63 vruntime: 938454.1999999989 system_vruntime: ᴺᵁᴸᴸ @@ -54,7 +56,9 @@ Columns: - `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied. - `active_children` (`UInt64`) - The number of children in active state. - `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node. +- `canceled_requests` (`UInt64`) - The total number of resource requests canceled from this node. - `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node. +- `canceled_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests canceled from this node. - `busy_periods` (`UInt64`) - The total number of deactivations of this node. - `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner. - `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`. diff --git a/src/Common/Scheduler/ISchedulerNode.h b/src/Common/Scheduler/ISchedulerNode.h index 804026d7bf4..20c1f4332da 100644 --- a/src/Common/Scheduler/ISchedulerNode.h +++ b/src/Common/Scheduler/ISchedulerNode.h @@ -387,7 +387,9 @@ public: /// Introspection std::atomic dequeued_requests{0}; + std::atomic canceled_requests{0}; std::atomic dequeued_cost{0}; + std::atomic canceled_cost{0}; std::atomic busy_periods{0}; }; diff --git a/src/Common/Scheduler/ISchedulerQueue.h b/src/Common/Scheduler/ISchedulerQueue.h index cbe63bd304a..532f4bf6c63 100644 --- a/src/Common/Scheduler/ISchedulerQueue.h +++ b/src/Common/Scheduler/ISchedulerQueue.h @@ -50,6 +50,12 @@ public: /// Should be called outside of scheduling subsystem, implementation must be thread-safe. virtual void enqueueRequest(ResourceRequest * request) = 0; + /// Cancel previously enqueued request. + /// Returns `false` and does nothing given unknown or already executed request. + /// Returns `true` if requests has been found and canceled. + /// Should be called outside of scheduling subsystem, implementation must be thread-safe. + virtual bool cancelRequest(ResourceRequest * request) = 0; + /// For introspection ResourceCost getBudget() const { diff --git a/src/Common/Scheduler/Nodes/FairPolicy.h b/src/Common/Scheduler/Nodes/FairPolicy.h index c0e187e6fa9..ce2bf729a04 100644 --- a/src/Common/Scheduler/Nodes/FairPolicy.h +++ b/src/Common/Scheduler/Nodes/FairPolicy.h @@ -134,56 +134,65 @@ public: std::pair dequeueRequest() override { - if (heap_size == 0) - return {nullptr, false}; - - // Recursively pull request from child - auto [request, child_active] = items.front().child->dequeueRequest(); - assert(request != nullptr); - std::pop_heap(items.begin(), items.begin() + heap_size); - Item & current = items[heap_size - 1]; - - // SFQ fairness invariant: system vruntime equals last served request start-time - assert(current.vruntime >= system_vruntime); - system_vruntime = current.vruntime; - - // By definition vruntime is amount of consumed resource (cost) divided by weight - current.vruntime += double(request->cost) / current.child->info.weight; - max_vruntime = std::max(max_vruntime, current.vruntime); - - if (child_active) // Put active child back in heap after vruntime update + // Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr` + while (true) { - std::push_heap(items.begin(), items.begin() + heap_size); - } - else // Deactivate child if it is empty, but remember it's vruntime for latter activations - { - heap_size--; + if (heap_size == 0) + return {nullptr, false}; - // Store index of this inactive child in `parent.idx` - // This enables O(1) search of inactive children instead of O(n) - current.child->info.parent.idx = heap_size; - } + // Recursively pull request from child + auto [request, child_active] = items.front().child->dequeueRequest(); + std::pop_heap(items.begin(), items.begin() + heap_size); + Item & current = items[heap_size - 1]; - // Reset any difference between children on busy period end - if (heap_size == 0) - { - // Reset vtime to zero to avoid floating-point error accumulation, - // but do not reset too often, because it's O(N) - UInt64 ns = clock_gettime_ns(); - if (last_reset_ns + 1000000000 < ns) + if (request) { - last_reset_ns = ns; - for (Item & item : items) - item.vruntime = 0; - max_vruntime = 0; - } - system_vruntime = max_vruntime; - busy_periods++; - } + // SFQ fairness invariant: system vruntime equals last served request start-time + assert(current.vruntime >= system_vruntime); + system_vruntime = current.vruntime; - dequeued_requests++; - dequeued_cost += request->cost; - return {request, heap_size > 0}; + // By definition vruntime is amount of consumed resource (cost) divided by weight + current.vruntime += double(request->cost) / current.child->info.weight; + max_vruntime = std::max(max_vruntime, current.vruntime); + } + + if (child_active) // Put active child back in heap after vruntime update + { + std::push_heap(items.begin(), items.begin() + heap_size); + } + else // Deactivate child if it is empty, but remember it's vruntime for latter activations + { + heap_size--; + + // Store index of this inactive child in `parent.idx` + // This enables O(1) search of inactive children instead of O(n) + current.child->info.parent.idx = heap_size; + } + + // Reset any difference between children on busy period end + if (heap_size == 0) + { + // Reset vtime to zero to avoid floating-point error accumulation, + // but do not reset too often, because it's O(N) + UInt64 ns = clock_gettime_ns(); + if (last_reset_ns + 1000000000 < ns) + { + last_reset_ns = ns; + for (Item & item : items) + item.vruntime = 0; + max_vruntime = 0; + } + system_vruntime = max_vruntime; + busy_periods++; + } + + if (request) + { + dequeued_requests++; + dequeued_cost += request->cost; + return {request, heap_size > 0}; + } + } } bool isActive() override diff --git a/src/Common/Scheduler/Nodes/FifoQueue.h b/src/Common/Scheduler/Nodes/FifoQueue.h index 38ae902bc2f..45ed32343ff 100644 --- a/src/Common/Scheduler/Nodes/FifoQueue.h +++ b/src/Common/Scheduler/Nodes/FifoQueue.h @@ -39,8 +39,7 @@ public: void enqueueRequest(ResourceRequest * request) override { - std::unique_lock lock(mutex); - request->enqueue_ns = clock_gettime_ns(); + std::lock_guard lock(mutex); queue_cost += request->cost; bool was_empty = requests.empty(); requests.push_back(request); @@ -50,7 +49,7 @@ public: std::pair dequeueRequest() override { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); if (requests.empty()) return {nullptr, false}; ResourceRequest * result = requests.front(); @@ -63,9 +62,29 @@ public: return {result, !requests.empty()}; } + bool cancelRequest(ResourceRequest * request) override + { + std::lock_guard lock(mutex); + // TODO(serxa): reimplement queue as intrusive list of ResourceRequest to make this O(1) instead of O(N) + for (auto i = requests.begin(), e = requests.end(); i != e; ++i) + { + if (*i == request) + { + requests.erase(i); + if (requests.empty()) + busy_periods++; + queue_cost -= request->cost; + canceled_requests++; + canceled_cost += request->cost; + return true; + } + } + return false; + } + bool isActive() override { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); return !requests.empty(); } @@ -98,14 +117,14 @@ public: std::pair getQueueLengthAndCost() { - std::unique_lock lock(mutex); + std::lock_guard lock(mutex); return {requests.size(), queue_cost}; } private: std::mutex mutex; Int64 queue_cost = 0; - std::deque requests; + std::deque requests; // TODO(serxa): reimplement it using intrusive list to avoid allocations/deallocations and O(N) during cancel }; } diff --git a/src/Common/Scheduler/Nodes/PriorityPolicy.h b/src/Common/Scheduler/Nodes/PriorityPolicy.h index 6d6b15bd063..9b4cfc37f8c 100644 --- a/src/Common/Scheduler/Nodes/PriorityPolicy.h +++ b/src/Common/Scheduler/Nodes/PriorityPolicy.h @@ -102,25 +102,31 @@ public: std::pair dequeueRequest() override { - if (items.empty()) - return {nullptr, false}; - - // Recursively pull request from child - auto [request, child_active] = items.front().child->dequeueRequest(); - assert(request != nullptr); - - // Deactivate child if it is empty - if (!child_active) + // Cycle is required to do deactivations in the case of canceled requests, when dequeueRequest returns `nullptr` + while (true) { - std::pop_heap(items.begin(), items.end()); - items.pop_back(); if (items.empty()) - busy_periods++; - } + return {nullptr, false}; - dequeued_requests++; - dequeued_cost += request->cost; - return {request, !items.empty()}; + // Recursively pull request from child + auto [request, child_active] = items.front().child->dequeueRequest(); + + // Deactivate child if it is empty + if (!child_active) + { + std::pop_heap(items.begin(), items.end()); + items.pop_back(); + if (items.empty()) + busy_periods++; + } + + if (request) + { + dequeued_requests++; + dequeued_cost += request->cost; + return {request, !items.empty()}; + } + } } bool isActive() override diff --git a/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp index 961a3b6f713..cdf09776077 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_dynamic_resource_manager.cpp @@ -38,7 +38,6 @@ TEST(SchedulerDynamicResourceManager, Smoke) { ResourceGuard gA(cA->get("res1"), ResourceGuard::PostponeLocking); gA.lock(); - gA.setFailure(); gA.unlock(); ResourceGuard gB(cB->get("res1")); diff --git a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp index 9fefbc02cbd..e76639a4b01 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp @@ -4,6 +4,7 @@ #include +#include #include using namespace DB; @@ -73,6 +74,22 @@ struct ResourceHolder } }; +struct MyRequest : public ResourceRequest +{ + std::function on_execute; + + explicit MyRequest(ResourceCost cost_, std::function on_execute_) + : ResourceRequest(cost_) + , on_execute(on_execute_) + {} + + void execute() override + { + if (on_execute) + on_execute(); + } +}; + TEST(SchedulerRoot, Smoke) { ResourceTest t; @@ -111,3 +128,49 @@ TEST(SchedulerRoot, Smoke) EXPECT_TRUE(fc2->requests.contains(&rg.request)); } } + +TEST(SchedulerRoot, Cancel) +{ + ResourceTest t; + + ResourceHolder r1(t); + auto * fc1 = r1.add("/", "1"); + r1.add("/prio"); + auto a = r1.addQueue("/prio/A", "1"); + auto b = r1.addQueue("/prio/B", "2"); + r1.registerResource(); + + std::barrier sync(2); + std::thread consumer1([&] + { + std::barrier destruct_sync(2); + MyRequest request(1,[&] + { + sync.arrive_and_wait(); // (A) + EXPECT_TRUE(fc1->requests.contains(&request)); + sync.arrive_and_wait(); // (B) + request.finish(); + destruct_sync.arrive_and_wait(); // (C) + }); + a.queue->enqueueRequest(&request); + destruct_sync.arrive_and_wait(); // (C) + }); + + std::thread consumer2([&] + { + MyRequest request(1,[&] + { + FAIL() << "This request must be canceled, but instead executes"; + }); + sync.arrive_and_wait(); // (A) wait for request of consumer1 to be inside execute, so that constraint is in violated state and our request will not be executed immediately + b.queue->enqueueRequest(&request); + bool canceled = b.queue->cancelRequest(&request); + EXPECT_TRUE(canceled); + sync.arrive_and_wait(); // (B) release request of consumer1 to be finished + }); + + consumer1.join(); + consumer2.join(); + + EXPECT_TRUE(fc1->requests.empty()); +} diff --git a/src/Common/Scheduler/ResourceGuard.h b/src/Common/Scheduler/ResourceGuard.h index dca4041b176..50f665a384b 100644 --- a/src/Common/Scheduler/ResourceGuard.h +++ b/src/Common/Scheduler/ResourceGuard.h @@ -71,8 +71,7 @@ public: // lock(mutex) is not required because `Dequeued` request cannot be used by the scheduler thread chassert(state == Dequeued); state = Finished; - if (constraint) - constraint->finishRequest(this); + ResourceRequest::finish(); } static Request & local() @@ -126,12 +125,6 @@ public: } } - /// Mark request as unsuccessful; by default request is considered to be successful - void setFailure() - { - request.successful = false; - } - ResourceLink link; Request & request; }; diff --git a/src/Common/Scheduler/ResourceRequest.cpp b/src/Common/Scheduler/ResourceRequest.cpp new file mode 100644 index 00000000000..26e8084cdfa --- /dev/null +++ b/src/Common/Scheduler/ResourceRequest.cpp @@ -0,0 +1,13 @@ +#include +#include + +namespace DB +{ + +void ResourceRequest::finish() +{ + if (constraint) + constraint->finishRequest(this); +} + +} diff --git a/src/Common/Scheduler/ResourceRequest.h b/src/Common/Scheduler/ResourceRequest.h index 3d2230746f9..f3153ad382c 100644 --- a/src/Common/Scheduler/ResourceRequest.h +++ b/src/Common/Scheduler/ResourceRequest.h @@ -14,9 +14,6 @@ class ISchedulerConstraint; using ResourceCost = Int64; constexpr ResourceCost ResourceCostMax = std::numeric_limits::max(); -/// Timestamps (nanoseconds since epoch) -using ResourceNs = UInt64; - /* * Request for a resource consumption. The main moving part of the scheduling subsystem. * Resource requests processing workflow: @@ -31,7 +28,7 @@ using ResourceNs = UInt64; * 3) Scheduler calls ISchedulerNode::dequeueRequest() that returns the request. * 4) Callback ResourceRequest::execute() is called to provide access to the resource. * 5) The resource consumption is happening outside of the scheduling subsystem. - * 6) request->constraint->finishRequest() is called when consumption is finished. + * 6) ResourceRequest::finish() is called when consumption is finished. * * Steps (5) and (6) can be omitted if constraint is not used by the resource. * @@ -39,7 +36,10 @@ using ResourceNs = UInt64; * Request ownership is done outside of the scheduling subsystem. * After (6) request can be destructed safely. * - * Request cancelling is not supported yet. + * Request can also be canceled before (3) using ISchedulerQueue::cancelRequest(). + * Returning false means it is too late for request to be canceled. It should be processed in a regular way. + * Returning true means successful cancel and therefore steps (4) and (5) are not going to happen + * and step (6) MUST be omitted. */ class ResourceRequest { @@ -48,32 +48,20 @@ public: /// NOTE: If cost is not known in advance, ResourceBudget should be used (note that every ISchedulerQueue has it) ResourceCost cost; - /// Request outcome - /// Should be filled during resource consumption - bool successful; - /// Scheduler node to be notified on consumption finish /// Auto-filled during request enqueue/dequeue ISchedulerConstraint * constraint; - /// Timestamps for introspection - ResourceNs enqueue_ns; - ResourceNs execute_ns; - ResourceNs finish_ns; - explicit ResourceRequest(ResourceCost cost_ = 1) { reset(cost_); } + /// ResourceRequest object may be reused again after reset() void reset(ResourceCost cost_) { cost = cost_; - successful = true; constraint = nullptr; - enqueue_ns = 0; - execute_ns = 0; - finish_ns = 0; } virtual ~ResourceRequest() = default; @@ -83,6 +71,12 @@ public: /// just triggering start of a consumption, not doing the consumption itself /// (e.g. setting an std::promise or creating a job in a thread pool) virtual void execute() = 0; + + /// Stop resource consumption and notify resource scheduler. + /// Should be called when resource consumption is finished by consumer. + /// ResourceRequest should not be destructed or reset before calling to `finish()`. + /// WARNING: this function MUST not be called if request was canceled. + void finish(); }; } diff --git a/src/Common/Scheduler/SchedulerRoot.h b/src/Common/Scheduler/SchedulerRoot.h index 3a23a8df834..ab3f702a422 100644 --- a/src/Common/Scheduler/SchedulerRoot.h +++ b/src/Common/Scheduler/SchedulerRoot.h @@ -145,22 +145,27 @@ public: std::pair dequeueRequest() override { - if (current == nullptr) // No active resources - return {nullptr, false}; + while (true) + { + if (current == nullptr) // No active resources + return {nullptr, false}; - // Dequeue request from current resource - auto [request, resource_active] = current->root->dequeueRequest(); - assert(request != nullptr); + // Dequeue request from current resource + auto [request, resource_active] = current->root->dequeueRequest(); - // Deactivate resource if required - if (!resource_active) - deactivate(current); - else - current = current->next; // Just move round-robin pointer + // Deactivate resource if required + if (!resource_active) + deactivate(current); + else + current = current->next; // Just move round-robin pointer - dequeued_requests++; - dequeued_cost += request->cost; - return {request, current != nullptr}; + if (request == nullptr) // Possible in case of request cancel, just retry + continue; + + dequeued_requests++; + dequeued_cost += request->cost; + return {request, current != nullptr}; + } } bool isActive() override @@ -245,7 +250,6 @@ private: void execute(ResourceRequest * request) { - request->execute_ns = clock_gettime_ns(); request->execute(); } diff --git a/src/Storages/System/StorageSystemScheduler.cpp b/src/Storages/System/StorageSystemScheduler.cpp index ba07d44dbf9..633bac5d285 100644 --- a/src/Storages/System/StorageSystemScheduler.cpp +++ b/src/Storages/System/StorageSystemScheduler.cpp @@ -30,7 +30,9 @@ ColumnsDescription StorageSystemScheduler::getColumnsDescription() {"is_active", std::make_shared(), "Whether this node is currently active - has resource requests to be dequeued and constraints satisfied."}, {"active_children", std::make_shared(), "The number of children in active state."}, {"dequeued_requests", std::make_shared(), "The total number of resource requests dequeued from this node."}, + {"canceled_requests", std::make_shared(), "The total number of resource requests canceled from this node."}, {"dequeued_cost", std::make_shared(), "The sum of costs (e.g. size in bytes) of all requests dequeued from this node."}, + {"canceled_cost", std::make_shared(), "The sum of costs (e.g. size in bytes) of all requests canceled from this node."}, {"busy_periods", std::make_shared(), "The total number of deactivations of this node."}, {"vruntime", std::make_shared(std::make_shared()), "For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner."}, @@ -93,7 +95,9 @@ void StorageSystemScheduler::fillData(MutableColumns & res_columns, ContextPtr c res_columns[i++]->insert(node->isActive()); res_columns[i++]->insert(node->activeChildren()); res_columns[i++]->insert(node->dequeued_requests.load()); + res_columns[i++]->insert(node->canceled_requests.load()); res_columns[i++]->insert(node->dequeued_cost.load()); + res_columns[i++]->insert(node->canceled_cost.load()); res_columns[i++]->insert(node->busy_periods.load()); Field vruntime; From 7632c2c33f357c1c616f734c7bf2502ccbfbd496 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 29 Feb 2024 15:17:12 +0000 Subject: [PATCH 021/283] Remove non-deterministic functions in virtual columns filter --- src/Storages/MergeTree/MergeTreeData.cpp | 2 ++ src/Storages/VirtualColumnUtils.cpp | 21 +++++++++++++++++++ ...with_non_deterministic_functions.reference | 11 ++++++++++ ...lumns_with_non_deterministic_functions.sql | 6 ++++++ 4 files changed, 40 insertions(+) create mode 100644 tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.reference create mode 100644 tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8aa188cfe5c..6494ed5d844 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1082,6 +1082,8 @@ std::optional MergeTreeData::totalRowsByPartitionPredicateImpl( Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, true /* one_part */); auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr); + if (!filter_dag) + return {}; // Generate valid expressions for filtering bool valid = true; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 33ff6e7104f..3e0ef1d7990 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -467,6 +467,23 @@ static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block & allo return true; } +static bool isDeterministic(const ActionsDAG::Node * node) +{ + if (node->type != ActionsDAG::ActionType::FUNCTION) + return true; + + if (!node->function_base->isDeterministic()) + return false; + + for (const auto * child : node->children) + { + if (!isDeterministic(child)) + return false; + } + + return true; +} + static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( const ActionsDAG::Node * node, const Block * allowed_inputs, @@ -542,6 +559,10 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( } } } + else if (!isDeterministic(node)) + { + return nullptr; + } } if (allowed_inputs && !canEvaluateSubtree(node, *allowed_inputs)) diff --git a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.reference b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.reference new file mode 100644 index 00000000000..4c9646d6ffa --- /dev/null +++ b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.reference @@ -0,0 +1,11 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 diff --git a/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql new file mode 100644 index 00000000000..9f8bc6bd3d7 --- /dev/null +++ b/tests/queries/0_stateless/03002_filter_skip_virtual_columns_with_non_deterministic_functions.sql @@ -0,0 +1,6 @@ +create table test (number UInt64) engine=MergeTree order by number; +insert into test select * from numbers(100000000); +select ignore(number) from test where RAND() > 4292390314 limit 10; +select count() > 0 from test where RAND() > 4292390314; +drop table test; + From 09a392772d75b38e1b19ad6bd2a863168ea0de5c Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 29 Feb 2024 15:34:45 +0000 Subject: [PATCH 022/283] Use isDeterministicInScopeOfQuery --- src/Storages/VirtualColumnUtils.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 3e0ef1d7990..6d66453442e 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -467,17 +467,17 @@ static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block & allo return true; } -static bool isDeterministic(const ActionsDAG::Node * node) +static bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) { if (node->type != ActionsDAG::ActionType::FUNCTION) return true; - if (!node->function_base->isDeterministic()) + if (!node->function_base->isDeterministicInScopeOfQuery()) return false; for (const auto * child : node->children) { - if (!isDeterministic(child)) + if (!isDeterministicInScopeOfQuery(child)) return false; } @@ -559,7 +559,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs( } } } - else if (!isDeterministic(node)) + else if (!isDeterministicInScopeOfQuery(node)) { return nullptr; } From 3825cb3ad0d7f2296cf075648d022ef26f1e0cef Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Sat, 2 Mar 2024 15:28:45 +0000 Subject: [PATCH 023/283] expand CTE in alter modify query --- src/Interpreters/InterpreterAlterQuery.cpp | 11 +++++++++++ .../0_stateless/03002_modify_query_cte.reference | 2 ++ .../0_stateless/03002_modify_query_cte.sql | 15 +++++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 tests/queries/0_stateless/03002_modify_query_cte.reference create mode 100644 tests/queries/0_stateless/03002_modify_query_cte.sql diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index b768593da98..7acaf95becc 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -71,11 +72,15 @@ BlockIO InterpreterAlterQuery::execute() BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) { + ASTSelectWithUnionQuery * modify_query = nullptr; + for (auto & child : alter.command_list->children) { auto * command_ast = child->as(); if (command_ast->sql_security) InterpreterCreateQuery::processSQLSecurityOption(getContext(), command_ast->sql_security->as()); + else if (command_ast->type == ASTAlterCommand::MODIFY_QUERY) + modify_query = command_ast->select->as(); } BlockIO res; @@ -123,6 +128,12 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only"); auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); + if (modify_query) + { + // Expand CTE before filling default database + ApplyWithSubqueryVisitor().visit(*modify_query); + } + /// Add default database to table identifiers that we can encounter in e.g. default expressions, mutation expression, etc. AddDefaultDatabaseVisitor visitor(getContext(), table_id.getDatabaseName()); ASTPtr command_list_ptr = alter.command_list->ptr(); diff --git a/tests/queries/0_stateless/03002_modify_query_cte.reference b/tests/queries/0_stateless/03002_modify_query_cte.reference new file mode 100644 index 00000000000..a3d66f70f8f --- /dev/null +++ b/tests/queries/0_stateless/03002_modify_query_cte.reference @@ -0,0 +1,2 @@ +CREATE MATERIALIZED VIEW default.mv_03002 TO default.table_03002\n(\n `ts` DateTime\n)\nAS SELECT ts\nFROM default.table_03002 +CREATE MATERIALIZED VIEW default.mv_03002 TO default.table_03002\n(\n `ts` DateTime\n)\nAS WITH MY_CTE AS\n (\n SELECT ts\n FROM default.table_03002\n )\nSELECT *\nFROM\nMY_CTE diff --git a/tests/queries/0_stateless/03002_modify_query_cte.sql b/tests/queries/0_stateless/03002_modify_query_cte.sql new file mode 100644 index 00000000000..3a36ce7e7fd --- /dev/null +++ b/tests/queries/0_stateless/03002_modify_query_cte.sql @@ -0,0 +1,15 @@ + +CREATE TABLE table_03002 (ts DateTime, event_type String) ENGINE = MergeTree ORDER BY (event_type, ts); + +CREATE MATERIALIZED VIEW mv_03002 TO table_03002 AS SELECT ts FROM table_03002; + +SHOW CREATE TABLE mv_03002; + +ALTER TABLE mv_03002 MODIFY QUERY +WITH MY_CTE AS (SELECT ts FROM table_03002) +SELECT * FROM MY_CTE; + +SHOW CREATE TABLE mv_03002; + +DROP TABLE mv_03002; +DROP TABLE table_03002; From 17413ded759ebcef809e03a80284f6f805507560 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Sat, 2 Mar 2024 11:11:44 -0500 Subject: [PATCH 024/283] Update 03002_modify_query_cte.reference --- tests/queries/0_stateless/03002_modify_query_cte.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03002_modify_query_cte.reference b/tests/queries/0_stateless/03002_modify_query_cte.reference index a3d66f70f8f..50e4a7c6a07 100644 --- a/tests/queries/0_stateless/03002_modify_query_cte.reference +++ b/tests/queries/0_stateless/03002_modify_query_cte.reference @@ -1,2 +1,2 @@ CREATE MATERIALIZED VIEW default.mv_03002 TO default.table_03002\n(\n `ts` DateTime\n)\nAS SELECT ts\nFROM default.table_03002 -CREATE MATERIALIZED VIEW default.mv_03002 TO default.table_03002\n(\n `ts` DateTime\n)\nAS WITH MY_CTE AS\n (\n SELECT ts\n FROM default.table_03002\n )\nSELECT *\nFROM\nMY_CTE +CREATE MATERIALIZED VIEW default.mv_03002 TO default.table_03002\n(\n `ts` DateTime\n)\nAS WITH MY_CTE AS\n (\n SELECT ts\n FROM default.table_03002\n )\nSELECT *\nFROM MY_CTE From a6cb302ab54082db5650263d6417052f81f30710 Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 3 Mar 2024 15:48:49 +0000 Subject: [PATCH 025/283] fix 'AddressSanitizer: stack-use-after-return' --- src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp index e76639a4b01..f8196d15819 100644 --- a/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp +++ b/src/Common/Scheduler/Nodes/tests/gtest_resource_scheduler.cpp @@ -140,10 +140,10 @@ TEST(SchedulerRoot, Cancel) auto b = r1.addQueue("/prio/B", "2"); r1.registerResource(); + std::barrier destruct_sync(2); std::barrier sync(2); std::thread consumer1([&] { - std::barrier destruct_sync(2); MyRequest request(1,[&] { sync.arrive_and_wait(); // (A) From 671b0f678afcdcb354a85aa141920bff09e2bcb2 Mon Sep 17 00:00:00 2001 From: M1eyu2018 <857037797@qq.com> Date: Mon, 4 Mar 2024 10:12:27 +0800 Subject: [PATCH 026/283] Add positional read in libhdfs3 Signed-off-by: M1eyu2018 <857037797@qq.com> --- contrib/libhdfs3 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/libhdfs3 b/contrib/libhdfs3 index b9598e60167..0d04201c453 160000 --- a/contrib/libhdfs3 +++ b/contrib/libhdfs3 @@ -1 +1 @@ -Subproject commit b9598e6016720a7c088bfe85ce1fa0410f9d2103 +Subproject commit 0d04201c45359f0d0701fb1e8297d25eff7cfecf From c435d5894f48d37478454b1934d000fb967e2973 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 4 Mar 2024 14:23:59 +0800 Subject: [PATCH 027/283] remove wrong assertion n quantileGK --- .../AggregateFunctionGroupArray.cpp | 13 ++++++++----- .../AggregateFunctionQuantileGK.cpp | 12 ++++-------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index d72ddb42d9e..6af8b1018dd 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -182,11 +182,14 @@ public: if constexpr (Trait::sampler == Sampler::NONE) { - if (limit_num_elems && cur_elems.value.size() >= max_elems) + if constexpr (limit_num_elems) { - if constexpr (Trait::last) - cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value; - return; + if (cur_elems.value.size() >= max_elems) + { + if constexpr (Trait::last) + cur_elems.value[(cur_elems.total_values - 1) % max_elems] = row_value; + return; + } } cur_elems.value.push_back(row_value, arena); @@ -236,7 +239,7 @@ public: void mergeNoSampler(Data & cur_elems, const Data & rhs_elems, Arena * arena) const { - if (!limit_num_elems) + if constexpr (!limit_num_elems) { if (rhs_elems.value.size()) cur_elems.value.insertByOffsets(rhs_elems.value, 0, rhs_elems.value.size(), arena); diff --git a/src/AggregateFunctions/AggregateFunctionQuantileGK.cpp b/src/AggregateFunctions/AggregateFunctionQuantileGK.cpp index 2e8ccb2e5e4..26737e43eef 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantileGK.cpp +++ b/src/AggregateFunctions/AggregateFunctionQuantileGK.cpp @@ -144,7 +144,7 @@ public: count = other.count; compressed = other.compressed; - sampled.resize(other.sampled.size()); + sampled.resize_exact(other.sampled.size()); memcpy(sampled.data(), other.sampled.data(), sizeof(Stats) * other.sampled.size()); return; } @@ -180,7 +180,7 @@ public: compress(); backup_sampled.clear(); - backup_sampled.reserve(sampled.size() + other.sampled.size()); + backup_sampled.reserve_exact(sampled.size() + other.sampled.size()); double merged_relative_error = std::max(relative_error, other.relative_error); size_t merged_count = count + other.count; Int64 additional_self_delta = static_cast(std::floor(2 * other.relative_error * other.count)); @@ -268,11 +268,7 @@ public: size_t sampled_len = 0; readBinaryLittleEndian(sampled_len, buf); - if (sampled_len > compress_threshold) - throw Exception( - ErrorCodes::INCORRECT_DATA, "The number of elements {} for quantileGK exceeds {}", sampled_len, compress_threshold); - - sampled.resize(sampled_len); + sampled.resize_exact(sampled_len); for (size_t i = 0; i < sampled_len; ++i) { @@ -317,7 +313,7 @@ private: ::sort(head_sampled.begin(), head_sampled.end()); backup_sampled.clear(); - backup_sampled.reserve(sampled.size() + head_sampled.size()); + backup_sampled.reserve_exact(sampled.size() + head_sampled.size()); size_t sample_idx = 0; size_t ops_idx = 0; From a7db6688edb50f894457c414b207c25548bb18d3 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 4 Mar 2024 18:24:24 +0800 Subject: [PATCH 028/283] Update ObjectStorageFactory.cpp --- src/Disks/ObjectStorages/ObjectStorageFactory.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 47c02f87b23..a0578ac4454 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -246,12 +246,11 @@ void registerAzureObjectStorage(ObjectStorageFactory & factory) bool /* skip_access_check */) -> ObjectStoragePtr { AzureBlobStorageEndpoint endpoint = processAzureBlobStorageEndpoint(config, config_prefix); - return std::make_unique( + return createObjectStorage( ObjectStorageType::Azure, config, config_prefix, name, getAzureBlobContainerClient(config, config_prefix), getAzureBlobStorageSettings(config, config_prefix, context), endpoint.prefix.empty() ? endpoint.container_name : endpoint.container_name + "/" + endpoint.prefix); - }); } #endif From 47ad21dd257ff1a5751d191dfd311a7950a93111 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 5 Mar 2024 12:17:04 +0100 Subject: [PATCH 029/283] Remove extra empty line --- .../03002_map_array_functions_with_low_cardinality.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql index 8240a8f93f5..8820a433da8 100644 --- a/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql +++ b/tests/queries/0_stateless/03002_map_array_functions_with_low_cardinality.sql @@ -1,2 +1 @@ SELECT mapContainsKeyLike(map('aa', toLowCardinality(1), 'bb', toLowCardinality(2)), toLowCardinality('a%')); - From aa6b70e5f2187be71b6bce835ecff0aa0c0bfca7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 5 Mar 2024 16:55:08 +0000 Subject: [PATCH 030/283] Add documentation to `simpleJSON` functions --- .../sql-reference/functions/json-functions.md | 392 +++++++++++++++--- 1 file changed, 342 insertions(+), 50 deletions(-) diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index 2c837ff4a42..246cb8972fb 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -5,80 +5,372 @@ sidebar_label: JSON --- There are two sets of functions to parse JSON. - - `visitParam*` (`simpleJSON*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast. + - `simpleJSON*` (`visitParam*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast. - `JSONExtract*` is made to parse normal JSON. -# visitParam functions +# simpleJSON/visitParam functions ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be, but they try to do as little as possible to get the job done. The following assumptions are made: 1. The field name (function argument) must be a constant. -2. The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, but `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0` +2. The field name is somehow canonically encoded in JSON. For example: `simpleJSONHas('{"abc":"def"}', 'abc') = 1`, but `simpleJSONHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0` 3. Fields are searched for on any nesting level, indiscriminately. If there are multiple matching fields, the first occurrence is used. 4. The JSON does not have space characters outside of string literals. -## visitParamHas(params, name) +## simpleJSONHas -Checks whether there is a field with the `name` name. +Checks whether there is a field named `field_name`. The result is `UInt8`. -Alias: `simpleJSONHas`. +**Syntax** -## visitParamExtractUInt(params, name) - -Parses UInt64 from the value of the field named `name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0. - -Alias: `simpleJSONExtractUInt`. - -## visitParamExtractInt(params, name) - -The same as for Int64. - -Alias: `simpleJSONExtractInt`. - -## visitParamExtractFloat(params, name) - -The same as for Float64. - -Alias: `simpleJSONExtractFloat`. - -## visitParamExtractBool(params, name) - -Parses a true/false value. The result is UInt8. - -Alias: `simpleJSONExtractBool`. - -## visitParamExtractRaw(params, name) - -Returns the value of a field, including separators. - -Alias: `simpleJSONExtractRaw`. - -Examples: - -``` sql -visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"'; -visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}'; +```sql +simpleJSONHas(json, field_name) ``` -## visitParamExtractString(params, name) +**Parameters** -Parses the string in double quotes. The value is unescaped. If unescaping failed, it returns an empty string. +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) -Alias: `simpleJSONExtractString`. +**Returned value** -Examples: +It returns `1` if the field exists, `0` otherwise. -``` sql -visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0'; -visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺'; -visitParamExtractString('{"abc":"\\u263"}', 'abc') = ''; -visitParamExtractString('{"abc":"hello}', 'abc') = ''; +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"true","qux":1}'); + +SELECT simpleJSONHas(json, 'foo') FROM jsons; +SELECT simpleJSONHas(json, 'bar') FROM jsons; ``` +```response +1 +0 +``` +## simpleJSONExtractUInt + +Parses `UInt64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. + +**Syntax** + +```sql +simpleJSONExtractUInt(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"4e3"}'); +INSERT INTO jsons VALUES ('{"foo":3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +4 +0 +3 +5 +``` + +## simpleJSONExtractInt + +Parses `Int64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. + +**Syntax** + +```sql +simpleJSONExtractInt(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +-4 +0 +-3 +5 +``` + +## simpleJSONExtractFloat + +Parses `Float64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`. + +**Syntax** + +```sql +simpleJSONExtractFloat(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the number parsed from the field if the field exists and contains a number, `0` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +-4000 +0 +-3.4 +5 +``` + +## simpleJSONExtractBool + +Parses a true/false value from the value of the field named `field_name`. The result is `UInt8`. + +**Syntax** + +```sql +simpleJSONExtractBool(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns `1` if the value of the field is `true`, `0` otherwise. This means this function will return `0` including (and not only) in the following cases: + - If the field doesn't exists. + - If the field contains `true` as a string, e.g.: `{"field":"true"}`. + - If the field contains `1` as a numerical value. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":false,"bar":true}'); +INSERT INTO jsons VALUES ('{"foo":"true","qux":1}'); + +SELECT simpleJSONExtractBool(json, 'bar') FROM jsons ORDER BY json; +SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +0 +1 +0 +0 +``` + +## simpleJSONExtractRaw + +Returns the value of the field named `field_name` as a `String`, including separators. + +**Syntax** + +```sql +simpleJSONExtractRaw(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an emtpy `String` otherwise. + +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":{"def":[1,2,3]}}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json; +``` + +```response + +"-4e3" +-3.4 +5 +{"def":[1,2,3]} +``` + +## simpleJSONExtractString + +Parses `String` in double quotes from the value of the field named `field_name`. + +**Syntax** + +```sql +simpleJSONExtractString(json, field_name) +``` + +**Parameters** + +- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string) +- `field_name`: The name of the field to search for. [String literal](../syntax#string) + +**Returned value** + +It returns the value of a field as a [`String`](../../sql-reference/data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist. + +**Implementation details** + There is currently no support for code points in the format `\uXXXX\uYYYY` that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8). +**Example** + +Query: + +```sql +CREATE TABLE jsons +( + `json` String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"\\n\\u0000"}'); +INSERT INTO jsons VALUES ('{"foo":"\\u263"}'); +INSERT INTO jsons VALUES ('{"foo":"\\u263a"}'); +INSERT INTO jsons VALUES ('{"foo":"hello}'); + +SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json; +``` + +```response +\n\0 + +☺ + +``` + +## visitParamHas + +This function is [an alias of `simpleJSONHas`](./json-functions#simplejsonhas). + +## visitParamExtractUInt + +This function is [an alias of `simpleJSONExtractUInt`](./json-functions#simplejsonextractuint). + +## visitParamExtractInt + +This function is [an alias of `simpleJSONExtractInt`](./json-functions#simplejsonextractint). + +## visitParamExtractFloat + +This function is [an alias of `simpleJSONExtractFloat`](./json-functions#simplejsonextractfloat). + +## visitParamExtractBool + +This function is [an alias of `simpleJSONExtractBool`](./json-functions#simplejsonextractbool). + +## visitParamExtractRaw + +This function is [an alias of `simpleJSONExtractRaw`](./json-functions#simplejsonextractraw). + +## visitParamExtractString + +This function is [an alias of `simpleJSONExtractString`](./json-functions#simplejsonextractstring). + # JSONExtract functions The following functions are based on [simdjson](https://github.com/lemire/simdjson) designed for more complex JSON parsing requirements. From 981c507d8007a4f7761a83a2ecfa0956a364317d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 5 Mar 2024 17:01:54 +0000 Subject: [PATCH 031/283] Add example to `sin`. --- docs/en/sql-reference/functions/math-functions.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index b27668caf0c..fc659891b5c 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -299,6 +299,18 @@ sin(x) Type: [Float*](../../sql-reference/data-types/float.md). +**Example** + +Query: + +```sql +SELECT sin(1.23); +``` + +```response +0.9424888019316975 +``` + ## cos Returns the cosine of the argument. From 6d4514c045cc565919f9c8384710eee89354f0f3 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 6 Mar 2024 16:55:48 +0800 Subject: [PATCH 032/283] Fix test --- src/Storages/System/StorageSystemDisks.cpp | 10 +++++++++- tests/integration/test_backup_restore_s3/test.py | 12 ++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 30d64156b22..0f8a6640f2c 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -25,6 +25,8 @@ StorageSystemDisks::StorageSystemDisks(const StorageID & table_id_) {"unreserved_space", std::make_shared()}, {"keep_free_space", std::make_shared()}, {"type", std::make_shared()}, + {"object_storage_type", std::make_shared()}, + {"metadata_type", std::make_shared()}, {"is_encrypted", std::make_shared()}, {"is_read_only", std::make_shared()}, {"is_write_once", std::make_shared()}, @@ -53,6 +55,8 @@ Pipe StorageSystemDisks::read( MutableColumnPtr col_unreserved = ColumnUInt64::create(); MutableColumnPtr col_keep = ColumnUInt64::create(); MutableColumnPtr col_type = ColumnString::create(); + MutableColumnPtr col_object_storage_type = ColumnString::create(); + MutableColumnPtr col_metadata_type = ColumnString::create(); MutableColumnPtr col_is_encrypted = ColumnUInt8::create(); MutableColumnPtr col_is_read_only = ColumnUInt8::create(); MutableColumnPtr col_is_write_once = ColumnUInt8::create(); @@ -69,7 +73,9 @@ Pipe StorageSystemDisks::read( col_unreserved->insert(disk_ptr->getUnreservedSpace().value_or(std::numeric_limits::max())); col_keep->insert(disk_ptr->getKeepingFreeSpace()); auto data_source_description = disk_ptr->getDataSourceDescription(); - col_type->insert(data_source_description.toString()); + col_type->insert(data_source_description.type); + col_object_storage_type->insert(data_source_description.object_storage_type); + col_metadata_type->insert(data_source_description.metadata_type); col_is_encrypted->insert(data_source_description.is_encrypted); col_is_read_only->insert(disk_ptr->isReadOnly()); col_is_write_once->insert(disk_ptr->isWriteOnce()); @@ -91,6 +97,8 @@ Pipe StorageSystemDisks::read( res_columns.emplace_back(std::move(col_unreserved)); res_columns.emplace_back(std::move(col_keep)); res_columns.emplace_back(std::move(col_type)); + res_columns.emplace_back(std::move(col_object_storage_type)); + res_columns.emplace_back(std::move(col_metadata_type)); res_columns.emplace_back(std::move(col_is_encrypted)); res_columns.emplace_back(std::move(col_is_read_only)); res_columns.emplace_back(std::move(col_is_write_once)); diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 4d3ee8200a3..95e264107e4 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -124,15 +124,15 @@ def check_backup_and_restore( def check_system_tables(backup_query_id=None): disks = [ tuple(disk.split("\t")) - for disk in node.query("SELECT name, type FROM system.disks").split("\n") + for disk in node.query("SELECT name, type, object_storage_type, metadata_type FROM system.disks").split("\n") if disk ] expected_disks = ( - ("default", "local"), - ("disk_s3", "s3"), - ("disk_s3_cache", "s3"), - ("disk_s3_other_bucket", "s3"), - ("disk_s3_plain", "s3_plain"), + ("default", "local", "", ""), + ("disk_s3", "object_storage", "s3", "local"), + ("disk_s3_cache", "object_storage", "s3", "local"), + ("disk_s3_other_bucket", "object_storage", "s3", "local"), + ("disk_s3_plain", "object_storage", "s3", "plain"), ) assert len(expected_disks) == len(disks) for expected_disk in expected_disks: From be98c95f586762cdf20a6375917e30f296175593 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 6 Mar 2024 09:12:26 +0000 Subject: [PATCH 033/283] Automatic style fix --- tests/integration/test_backup_restore_s3/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 95e264107e4..452a9143067 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -124,7 +124,9 @@ def check_backup_and_restore( def check_system_tables(backup_query_id=None): disks = [ tuple(disk.split("\t")) - for disk in node.query("SELECT name, type, object_storage_type, metadata_type FROM system.disks").split("\n") + for disk in node.query( + "SELECT name, type, object_storage_type, metadata_type FROM system.disks" + ).split("\n") if disk ] expected_disks = ( From 56fb61e1866e81e9a00b9b98299ddc56a54f5394 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 6 Mar 2024 10:53:39 +0000 Subject: [PATCH 034/283] Do not duplicate the first category in case of multiple categories in `FunctionDocumentation` --- src/Common/FunctionDocumentation.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common/FunctionDocumentation.cpp b/src/Common/FunctionDocumentation.cpp index 2aad23b90b7..0dc5b48f9d1 100644 --- a/src/Common/FunctionDocumentation.cpp +++ b/src/Common/FunctionDocumentation.cpp @@ -36,6 +36,7 @@ std::string FunctionDocumentation::categoriesAsString() const auto it = categories.begin(); std::string res = *it; + ++it; for (; it != categories.end(); ++it) res += ", " + *it; return res; From 6f726865baf3fea606e7ff46e5d8cd98bda94f5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 6 Mar 2024 11:10:02 +0000 Subject: [PATCH 035/283] Add inline docs to functions --- src/Functions/sin.cpp | 10 +++++- src/Functions/visitParamExtractBool.cpp | 30 +++++++++++++++++- src/Functions/visitParamExtractFloat.cpp | 31 ++++++++++++++++++- src/Functions/visitParamExtractInt.cpp | 31 ++++++++++++++++++- src/Functions/visitParamExtractRaw.cpp | 30 +++++++++++++++++- src/Functions/visitParamExtractString.cpp | 30 +++++++++++++++++- src/Functions/visitParamExtractUInt.cpp | 31 ++++++++++++++++++- src/Functions/visitParamHas.cpp | 23 +++++++++++++- ...new_functions_must_be_documented.reference | 8 ----- 9 files changed, 208 insertions(+), 16 deletions(-) diff --git a/src/Functions/sin.cpp b/src/Functions/sin.cpp index dc75f4800c0..914f431adb4 100644 --- a/src/Functions/sin.cpp +++ b/src/Functions/sin.cpp @@ -13,7 +13,15 @@ using FunctionSin = FunctionMathUnary>; REGISTER_FUNCTION(Sin) { - factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction( + FunctionDocumentation{ + .description = "Returns the sine of the argument.", + .syntax = "sin(x)", + .arguments = {{"x", "The number whose sine will be returned. (U)Int*, Float* or Decimal*."}}, + .returned_value = "The sine of x.", + .examples = {{.name = "simple", .query = "SELECT sin(1.23)", .result = "0.9424888019316975"}}, + .categories{"Mathematical", "Trigonometric"}}, + FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/visitParamExtractBool.cpp b/src/Functions/visitParamExtractBool.cpp index 31763fe54ce..2c413ec13bb 100644 --- a/src/Functions/visitParamExtractBool.cpp +++ b/src/Functions/visitParamExtractBool.cpp @@ -21,7 +21,35 @@ using FunctionSimpleJSONExtractBool = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description = "Parses a true/false value from the value of the field named field_name. The result is UInt8.", + .syntax = "simpleJSONExtractBool(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value + = R"(It returns 1 if the value of the field is true, 0 otherwise. This means this function will return 0 including (and not only) in the following cases: + - If the field doesn't exists. + - If the field contains true as a string, e.g.: {"field":"true"}. + - If the field contains 1 as a numerical value.)", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":false,"bar":true}'); +INSERT INTO jsons VALUES ('{"foo":"true","qux":1}'); + +SELECT simpleJSONExtractBool(json, 'bar') FROM jsons ORDER BY json; +SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(0 +1 +0 +0)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractBool", "simpleJSONExtractBool"); } diff --git a/src/Functions/visitParamExtractFloat.cpp b/src/Functions/visitParamExtractFloat.cpp index 6f6d5274050..fc839142cc7 100644 --- a/src/Functions/visitParamExtractFloat.cpp +++ b/src/Functions/visitParamExtractFloat.cpp @@ -11,7 +11,36 @@ using FunctionSimpleJSONExtractFloat = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description + = "Parses Float64 from the value of the field named field_name. If this is a string field, it tries to parse a number from the " + "beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.", + .syntax = "simpleJSONExtractFloat(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns the number parsed from the field if the field exists and contains a number, 0 otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(0 +-4000 +0 +-3.4 +5)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractFloat", "simpleJSONExtractFloat"); } diff --git a/src/Functions/visitParamExtractInt.cpp b/src/Functions/visitParamExtractInt.cpp index e020c43e8b4..4588fc55c52 100644 --- a/src/Functions/visitParamExtractInt.cpp +++ b/src/Functions/visitParamExtractInt.cpp @@ -11,7 +11,36 @@ using FunctionSimpleJSONExtractInt = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description + = "Parses Int64 from the value of the field named field_name. If this is a string field, it tries to parse a number from the " + "beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.", + .syntax = "simpleJSONExtractInt(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns the number parsed from the field if the field exists and contains a number, 0 otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(0 +-4 +0 +-3 +5)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractInt", "simpleJSONExtractInt"); } diff --git a/src/Functions/visitParamExtractRaw.cpp b/src/Functions/visitParamExtractRaw.cpp index 74a83170545..296429423fe 100644 --- a/src/Functions/visitParamExtractRaw.cpp +++ b/src/Functions/visitParamExtractRaw.cpp @@ -61,7 +61,35 @@ using FunctionSimpleJSONExtractRaw = FunctionsStringSearchToString(); + factory.registerFunction(FunctionDocumentation{ + .description = "Returns the value of the field named field_name as a String, including separators.", + .syntax = "simpleJSONExtractRaw(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value + = "It returns the value of the field as a String including separators if the field exists, or an emtpy String otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"-4e3"}'); +INSERT INTO jsons VALUES ('{"foo":-3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":{"def":[1,2,3]}}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"( +"-4e3" +-3.4 +5 +{"def":[1,2,3]})"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractRaw", "simpleJSONExtractRaw"); } diff --git a/src/Functions/visitParamExtractString.cpp b/src/Functions/visitParamExtractString.cpp index 50d5f345189..8dae10638f8 100644 --- a/src/Functions/visitParamExtractString.cpp +++ b/src/Functions/visitParamExtractString.cpp @@ -22,7 +22,35 @@ using FunctionSimpleJSONExtractString = FunctionsStringSearchToString(); + factory.registerFunction(FunctionDocumentation{ + .description = R"(Parses String in double quotes from the value of the field named field_name. + + There is currently no support for code points in the format \uXXXX\uYYYY that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8).)", + .syntax = "simpleJSONExtractString(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns the value of a field as a String, including separators. The value is unescaped. It returns an empty " + "String: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"\\n\\u0000"}'); +INSERT INTO jsons VALUES ('{"foo":"\\u263"}'); +INSERT INTO jsons VALUES ('{"foo":"\\u263a"}'); +INSERT INTO jsons VALUES ('{"foo":"hello}'); + +SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(\n\0 + +☺ +)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractString", "simpleJSONExtractString"); } diff --git a/src/Functions/visitParamExtractUInt.cpp b/src/Functions/visitParamExtractUInt.cpp index fb58e417f34..777df9fdd24 100644 --- a/src/Functions/visitParamExtractUInt.cpp +++ b/src/Functions/visitParamExtractUInt.cpp @@ -12,7 +12,36 @@ using FunctionSimpleJSONExtractUInt = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description + = "Parses UInt64 from the value of the field named field_name. If this is a string field, it tries to parse a number from the " + "beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.", + .syntax = "simpleJSONExtractUInt(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns the number parsed from the field if the field exists and contains a number, 0 otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"4e3"}'); +INSERT INTO jsons VALUES ('{"foo":3.4}'); +INSERT INTO jsons VALUES ('{"foo":5}'); +INSERT INTO jsons VALUES ('{"foo":"not1number"}'); +INSERT INTO jsons VALUES ('{"baz":2}'); + +SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json;)", + .result = R"(0 +4 +0 +3 +5)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamExtractUInt", "simpleJSONExtractUInt"); } diff --git a/src/Functions/visitParamHas.cpp b/src/Functions/visitParamHas.cpp index 1ed1f1d16e7..09fec782980 100644 --- a/src/Functions/visitParamHas.cpp +++ b/src/Functions/visitParamHas.cpp @@ -21,7 +21,28 @@ using FunctionSimpleJSONHas = FunctionsStringSearch(); + factory.registerFunction(FunctionDocumentation{ + .description = "Checks whether there is a field named field_name. The result is UInt8.", + .syntax = "simpleJSONHas(json, field_name)", + .arguments + = {{"json", "The JSON in which the field is searched for. String."}, + {"field_name", "The name of the field to search for. String literal."}}, + .returned_value = "It returns 1 if the field exists, 0 otherwise.", + .examples + = {{.name = "simple", + .query = R"(CREATE TABLE jsons +( + json String +) +ENGINE = Memory; + +INSERT INTO jsons VALUES ('{"foo":"true","qux":1}'); + +SELECT simpleJSONHas(json, 'foo') FROM jsons; +SELECT simpleJSONHas(json, 'bar') FROM jsons;)", + .result = R"(1 +0)"}}, + .categories{"JSON"}}); factory.registerAlias("visitParamHas", "simpleJSONHas"); } diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 379eea4dbbb..0a11e8b5034 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -643,14 +643,6 @@ shardNum showCertificate sigmoid sign -simpleJSONExtractBool -simpleJSONExtractFloat -simpleJSONExtractInt -simpleJSONExtractRaw -simpleJSONExtractString -simpleJSONExtractUInt -simpleJSONHas -sin sinh sipHash128 sipHash128Keyed From 5b94f9b4115e3b7e03118b4a4f4999139e58511e Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 6 Mar 2024 15:31:19 +0100 Subject: [PATCH 036/283] Check children first --- src/Storages/VirtualColumnUtils.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 6d66453442e..e8441b96782 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -469,18 +469,18 @@ static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block & allo static bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) { - if (node->type != ActionsDAG::ActionType::FUNCTION) - return true; - - if (!node->function_base->isDeterministicInScopeOfQuery()) - return false; - for (const auto * child : node->children) { if (!isDeterministicInScopeOfQuery(child)) return false; } + if (node->type != ActionsDAG::ActionType::FUNCTION) + return true; + + if (!node->function_base->isDeterministicInScopeOfQuery()) + return false; + return true; } From 526f162082dfbb4ad2fb5d3d807dfd2ad9b54bdd Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 29 Feb 2024 18:20:47 +0000 Subject: [PATCH 037/283] Fix logical error on bad compatibility setting value type --- src/Core/Settings.cpp | 4 ++++ .../03003_compatibility_setting_bad_value.reference | 0 .../0_stateless/03003_compatibility_setting_bad_value.sql | 2 ++ 3 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/03003_compatibility_setting_bad_value.reference create mode 100644 tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index a38197b9eeb..fb456b46d89 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -114,7 +114,11 @@ std::vector Settings::getAllRegisteredNames() const void Settings::set(std::string_view name, const Field & value) { if (name == "compatibility") + { + if (value.getType() != Field::Types::Which::String) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type of value for setting 'compatibility'. Expected String, got {}", value.getTypeName()); applyCompatibilitySetting(value.get()); + } /// If we change setting that was changed by compatibility setting before /// we should remove it from settings_changed_by_compatibility_setting, /// otherwise the next time we will change compatibility setting diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.reference b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql new file mode 100644 index 00000000000..9a6f4e7944a --- /dev/null +++ b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql @@ -0,0 +1,2 @@ +select 42 settings compatibility=NULL; -- {clientError BAD_GET} + From bdb76d9dd4b42ab4f40db0d371165665171afb4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 6 Mar 2024 16:30:22 +0000 Subject: [PATCH 038/283] Fix aspell errors --- docs/en/sql-reference/functions/json-functions.md | 2 +- utils/check-style/aspell-ignore/en/aspell-dict.txt | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index 246cb8972fb..e920ab82988 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -264,7 +264,7 @@ simpleJSONExtractRaw(json, field_name) **Returned value** -It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an emtpy `String` otherwise. +It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise. **Example** diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 3614bcb7452..917b2cdcc71 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,4 +1,4 @@ -personal_ws-1.1 en 2724 +personal_ws-1.1 en 2758 AArch ACLs ALTERs @@ -843,7 +843,6 @@ SendScalars ShareAlike SharedMergeTree Shortkeys -Shortkeys SimHash Simhash SimpleAggregateFunction @@ -1703,7 +1702,6 @@ hyperscan hypot hyvor iTerm -iTerm icosahedron icudata idempotency @@ -2327,6 +2325,14 @@ shortcircuit shortkeys shoutout simdjson +simpleJSON +simpleJSONExtractBool +simpleJSONExtractFloat +simpleJSONExtractInt +simpleJSONExtractRaw +simpleJSONExtractString +simpleJSONExtractUInt +simpleJSONHas simpleLinearRegression simpleaggregatefunction simplelinearregression From 77a980373a1dab7c49e5713ba7050d218c1250c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 6 Mar 2024 16:31:27 +0000 Subject: [PATCH 039/283] Fix typo in inline doc --- src/Functions/visitParamExtractRaw.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/visitParamExtractRaw.cpp b/src/Functions/visitParamExtractRaw.cpp index 296429423fe..3cdc5001e13 100644 --- a/src/Functions/visitParamExtractRaw.cpp +++ b/src/Functions/visitParamExtractRaw.cpp @@ -68,7 +68,7 @@ REGISTER_FUNCTION(VisitParamExtractRaw) = {{"json", "The JSON in which the field is searched for. String."}, {"field_name", "The name of the field to search for. String literal."}}, .returned_value - = "It returns the value of the field as a String including separators if the field exists, or an emtpy String otherwise.", + = "It returns the value of the field as a String including separators if the field exists, or an empty String otherwise.", .examples = {{.name = "simple", .query = R"(CREATE TABLE jsons From 50b84954e4810c94c1397504a64ca96e1a0fed55 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 7 Mar 2024 16:29:38 +0800 Subject: [PATCH 040/283] Update .reference --- .../0_stateless/02117_show_create_table_system.reference | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 7382b24afbc..5081527ceef 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -195,6 +195,8 @@ CREATE TABLE system.disks `unreserved_space` UInt64, `keep_free_space` UInt64, `type` String, + `object_storage_type` String, + `metadata_type` String, `is_encrypted` UInt8, `is_read_only` UInt8, `is_write_once` UInt8, From 31ed1966e3c5388e601edd6e97c0497153bb7196 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 7 Mar 2024 16:44:10 +0800 Subject: [PATCH 041/283] Fix build --- src/Disks/ObjectStorages/ObjectStorageFactory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp index 9d7e714445a..46136ad7b12 100644 --- a/src/Disks/ObjectStorages/ObjectStorageFactory.cpp +++ b/src/Disks/ObjectStorages/ObjectStorageFactory.cpp @@ -166,7 +166,7 @@ void registerS3ObjectStorage(ObjectStorageFactory & factory) /// NOTE: should we still perform this check for clickhouse-disks? if (!skip_access_check) - checkS3Capabilities(*object_storage, s3_capabilities, name); + checkS3Capabilities(*dynamic_cast(object_storage.get()), s3_capabilities, name); return object_storage; }); @@ -202,7 +202,7 @@ void registerS3PlainObjectStorage(ObjectStorageFactory & factory) /// NOTE: should we still perform this check for clickhouse-disks? if (!skip_access_check) - checkS3Capabilities(*object_storage, s3_capabilities, name); + checkS3Capabilities(*dynamic_cast(object_storage.get()), s3_capabilities, name); return object_storage; }); From 6c69e7d4dcfdfa21cfcaa103fc1cc7c53dfe0291 Mon Sep 17 00:00:00 2001 From: HowePa <2873679104@qq.com> Date: Thu, 7 Mar 2024 20:29:04 +0800 Subject: [PATCH 042/283] detect output format by file extension in clickhouse-local --- programs/local/LocalServer.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 68f0e52ce08..20974dd9751 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -327,6 +327,14 @@ static bool checkIfStdinIsRegularFile() return fstat(STDIN_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode); } + +static bool checkIfStdoutIsRegularFile() +{ + struct stat file_stat; + return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode); +} + + std::string LocalServer::getInitialCreateTableQuery() { if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!checkIfStdinIsRegularFile() || queries.empty())) @@ -638,7 +646,14 @@ void LocalServer::processConfig() if (config().has("macros")) global_context->setMacros(std::make_unique(config(), "macros", log)); - format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV")); + if (!config().has("output-format") && !config().has("format") && checkIfStdoutIsRegularFile()) + { + std::optional format_from_file_name; + format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO); + format = format_from_file_name ? *format_from_file_name : "TSV"; + } + else + format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV")); insert_format = "Values"; /// Setting value from cmd arg overrides one from config From 6d5fd2857ed50047d8acf48766165aa815ca30b9 Mon Sep 17 00:00:00 2001 From: HowePa <2873679104@qq.com> Date: Thu, 7 Mar 2024 20:29:42 +0800 Subject: [PATCH 043/283] detect output format by file extension in clickhouse-client --- programs/client/Client.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index a2bd6b6016a..fac34003553 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -50,6 +50,7 @@ #include #include #include +#include namespace fs = std::filesystem; using namespace std::literals; @@ -1137,6 +1138,13 @@ void Client::processOptions(const OptionsDescription & options_description, } +static bool checkIfStdoutIsRegularFile() +{ + struct stat file_stat; + return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode); +} + + void Client::processConfig() { if (!queries.empty() && config().has("queries-file")) @@ -1173,7 +1181,14 @@ void Client::processConfig() pager = config().getString("pager", ""); is_default_format = !config().has("vertical") && !config().has("format"); - if (config().has("vertical")) + if (is_default_format && checkIfStdoutIsRegularFile()) + { + is_default_format = false; + std::optional format_from_file_name; + format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO); + format = format_from_file_name ? *format_from_file_name : "TabSeparated"; + } + else if (config().has("vertical")) format = config().getString("format", "Vertical"); else format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated"); From 112c1efb7da2619cb67a48ff7fbe65ecea8e44a9 Mon Sep 17 00:00:00 2001 From: HowePa <2873679104@qq.com> Date: Thu, 7 Mar 2024 20:30:24 +0800 Subject: [PATCH 044/283] test detect output format by file extension --- ..._output_format_by_file_extension.reference | 20 +++++++++++++++++++ ..._detect_output_format_by_file_extension.sh | 13 ++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 tests/queries/0_stateless/02181_detect_output_format_by_file_extension.reference create mode 100755 tests/queries/0_stateless/02181_detect_output_format_by_file_extension.sh diff --git a/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.reference b/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.reference new file mode 100644 index 00000000000..7b36cc96f5e --- /dev/null +++ b/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.reference @@ -0,0 +1,20 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.sh b/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.sh new file mode 100755 index 00000000000..ec1edd710a1 --- /dev/null +++ b/tests/queries/0_stateless/02181_detect_output_format_by_file_extension.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +$CLICKHOUSE_LOCAL -q "select * from numbers(10)" > $CLICKHOUSE_TMP/data.parquet +$CLICKHOUSE_LOCAL -q "select * from table" < $CLICKHOUSE_TMP/data.parquet + +$CLICKHOUSE_CLIENT -q "select * from numbers(10)" > $CLICKHOUSE_TMP/data.parquet +$CLICKHOUSE_LOCAL -q "select * from table" < $CLICKHOUSE_TMP/data.parquet From 2196c75dd8ddaeb1d2f18ca7b05fb4ae37550a4b Mon Sep 17 00:00:00 2001 From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com> Date: Fri, 8 Mar 2024 11:07:04 -0400 Subject: [PATCH 045/283] Adds substring-UTF8 docs. --- .../functions/string-functions.md | 69 ++++++++++++++++++- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 3b49e4954ed..f9c3f91a12b 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -588,8 +588,41 @@ Result: ## substringUTF8 -Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. +Returns the substring of a string `s` which starts at the specified byte index `offset` for Unicode code points. Byte counting starts from `1`. If `offset` is `0`, an empty string is returned. If `offset` is negative, the substring starts `pos` characters from the end of the string, rather than from the beginning. An optional argument `length` specifies the maximum number of bytes the returned substring may have. +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Syntax** + +```sql +substringUTF8(s, offset[, length]) +``` + +**Arguments** + +- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md) +- `offset`: The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md). +- `length`: The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional. + +**Returned value** + +A substring of `s` with `length` many bytes, starting at index `offset`. + +**Implementation details** + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Example** + +```sql +SELECT 'database' AS string, substringUTF8(string, 5), substringUTF8(string, 5, 1) +``` + +```response +┌─string───┬─substringUTF8('database', 5)─┬─substringUTF8('database', 5, 1)─┐ +│ database │ base │ b │ +└──────────┴──────────────────────────────┴─────────────────────────────────┘ +``` ## substringIndex @@ -624,7 +657,39 @@ Result: ## substringIndexUTF8 -Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. +Returns the substring of `s` before `count` occurrences of the delimiter `delim`, specifically for Unicode code points. + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Syntax** + +```sql +substringIndexUTF8(s, delim, count) +``` + +**Arguments** + +- `s`: The string to extract substring from. [String](../../sql-reference/data-types/string.md). +- `delim`: The character to split. [String](../../sql-reference/data-types/string.md). +- `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) + +**Returned value** + +A substring [String](../../sql-reference/data-types/string.md) of `s` before `count` occurrences of `delim`. + +**Implementation details** + +Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +**Example** + +```sql +SELECT substringIndexUTF8('www.clickhouse.com', '.', 2) +``` + +```response +www.clickhouse +``` ## appendTrailingCharIfAbsent From 0336ef3557b0c3c05ef974a4c6aa6771b3aa0757 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 8 Mar 2024 16:14:11 +0100 Subject: [PATCH 046/283] reload CI From 009c2ea9f6e3c9dcd30a2528f7737cd9059296dd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 05:27:29 +0100 Subject: [PATCH 047/283] Move a class into an anonymous namespace --- src/Disks/IO/ReadBufferFromRemoteFSGather.cpp | 3 --- src/Functions/CastOverloadResolver.cpp | 5 +++++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 417f7615dd7..38da4e96ff1 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -1,7 +1,5 @@ #include "ReadBufferFromRemoteFSGather.h" -#include - #include #include #include @@ -9,7 +7,6 @@ #include #include #include -#include #include using namespace DB; diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 7fc46db50f1..a72563212ff 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -12,6 +12,9 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } +namespace +{ + /** CastInternal does not preserve nullability of the data type, * i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1). * @@ -136,6 +139,8 @@ using CastOverloadResolver = CastOverloadResolverImpl using CastInternalOverloadResolver = CastOverloadResolverImpl; +} + FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional diagnostic) { From 2be09581ddc0ddfb57134f8a0ea6c33314f8071e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 05:48:52 +0100 Subject: [PATCH 048/283] Split CastOverloadResolver translation unit --- programs/library-bridge/CMakeLists.txt | 3 + programs/odbc-bridge/CMakeLists.txt | 3 + src/Functions/CastOverloadResolver.cpp | 139 +------------------------ 3 files changed, 7 insertions(+), 138 deletions(-) diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index dd0bf67cb64..f42b574b807 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -11,6 +11,9 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES LibraryBridgeHandlers.cpp SharedLibrary.cpp library-bridge.cpp + + ../../src/Functions/CastOverloadResolverImpl.cpp + ../../src/Functions/CastInternalOverloadResolverImpl.cpp ) clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 56373601b95..4e5dbac486e 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -13,6 +13,9 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES getIdentifierQuote.cpp odbc-bridge.cpp validateODBCConnectionString.cpp + + ../../src/Functions/CastOverloadResolverImpl.cpp + ../../src/Functions/CastInternalOverloadResolverImpl.cpp ) clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index a72563212ff..1c57bcfa979 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -1,147 +1,11 @@ #include -#include #include -#include +#include namespace DB { -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; -} - -namespace -{ - -/** CastInternal does not preserve nullability of the data type, - * i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1). - * - * Cast preserves nullability according to setting `cast_keep_nullable`, - * i.e. Cast(toNullable(toInt8(1)) as Int32) will be Nullable(Int32(1)) if `cast_keep_nullable` == 1. - */ -template -class CastOverloadResolverImpl : public IFunctionOverloadResolver -{ -public: - using MonotonicityForRange = FunctionCastBase::MonotonicityForRange; - - static constexpr auto name = cast_type == CastType::accurate - ? CastName::accurate_cast_name - : (cast_type == CastType::accurateOrNull ? CastName::accurate_cast_or_null_name : CastName::cast_name); - - String getName() const override { return name; } - - size_t getNumberOfArguments() const override { return 2; } - - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - - explicit CastOverloadResolverImpl(ContextPtr context_, std::optional diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_) - : context(context_) - , diagnostic(std::move(diagnostic_)) - , keep_nullable(keep_nullable_) - , data_type_validation_settings(data_type_validation_settings_) - { - } - - static FunctionOverloadResolverPtr create(ContextPtr context) - { - const auto & settings_ref = context->getSettingsRef(); - - if constexpr (internal) - return createImpl(context, {}, false /*keep_nullable*/); - - return createImpl(context, {}, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref)); - } - - static FunctionOverloadResolverPtr createImpl(ContextPtr context, std::optional diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {}) - { - assert(!internal || !keep_nullable); - return std::make_unique(context, std::move(diagnostic), keep_nullable, data_type_validation_settings); - } - - static FunctionOverloadResolverPtr createImpl(std::optional diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {}) - { - assert(!internal || !keep_nullable); - return std::make_unique(ContextPtr(), std::move(diagnostic), keep_nullable, data_type_validation_settings); - } - -protected: - - FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override - { - DataTypes data_types(arguments.size()); - - for (size_t i = 0; i < arguments.size(); ++i) - data_types[i] = arguments[i].type; - - auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get()); - return std::make_unique>(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); - } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - const auto & column = arguments.back().column; - if (!column) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. " - "Instead there is non-constant column of type {}", getName(), arguments.back().type->getName()); - - const auto * type_col = checkAndGetColumnConst(column.get()); - if (!type_col) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. " - "Instead there is a column with the following structure: {}", getName(), column->dumpStructure()); - - DataTypePtr type = DataTypeFactory::instance().get(type_col->getValue()); - validateDataType(type, data_type_validation_settings); - - if constexpr (cast_type == CastType::accurateOrNull) - return makeNullable(type); - - if constexpr (internal) - return type; - - if (keep_nullable && arguments.front().type->isNullable() && type->canBeInsideNullable()) - return makeNullable(type); - - return type; - } - - bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForNothing() const override { return false; } - bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } - -private: - ContextPtr context; - std::optional diagnostic; - bool keep_nullable; - DataTypeValidationSettings data_type_validation_settings; -}; - - -struct CastOverloadName -{ - static constexpr auto cast_name = "CAST"; - static constexpr auto accurate_cast_name = "accurateCast"; - static constexpr auto accurate_cast_or_null_name = "accurateCastOrNull"; -}; - -struct CastInternalOverloadName -{ - static constexpr auto cast_name = "_CAST"; - static constexpr auto accurate_cast_name = "accurate_Cast"; - static constexpr auto accurate_cast_or_null_name = "accurate_CastOrNull"; -}; - -template -using CastOverloadResolver = CastOverloadResolverImpl; - -template -using CastInternalOverloadResolver = CastOverloadResolverImpl; - -} - - FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional diagnostic) { switch (type) @@ -155,7 +19,6 @@ FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, st } } - REGISTER_FUNCTION(CastOverloadResolvers) { factory.registerFunction>({}, FunctionFactory::CaseInsensitive); From c67e8d5d73a6ac1647a023740a859e739d6ab2ee Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 05:50:19 +0100 Subject: [PATCH 049/283] Add files --- .../CastInternalOverloadResolverImpl.cpp | 10 ++ src/Functions/CastOverloadResolverImpl.cpp | 10 ++ src/Functions/CastOverloadResolverImpl.h | 149 ++++++++++++++++++ 3 files changed, 169 insertions(+) create mode 100644 src/Functions/CastInternalOverloadResolverImpl.cpp create mode 100644 src/Functions/CastOverloadResolverImpl.cpp create mode 100644 src/Functions/CastOverloadResolverImpl.h diff --git a/src/Functions/CastInternalOverloadResolverImpl.cpp b/src/Functions/CastInternalOverloadResolverImpl.cpp new file mode 100644 index 00000000000..d8ee0c76fd8 --- /dev/null +++ b/src/Functions/CastInternalOverloadResolverImpl.cpp @@ -0,0 +1,10 @@ +#include + +namespace DB +{ + +template class CastOverloadResolverImpl; +template class CastOverloadResolverImpl; +template class CastOverloadResolverImpl; + +} diff --git a/src/Functions/CastOverloadResolverImpl.cpp b/src/Functions/CastOverloadResolverImpl.cpp new file mode 100644 index 00000000000..d2325db5e0a --- /dev/null +++ b/src/Functions/CastOverloadResolverImpl.cpp @@ -0,0 +1,10 @@ +#include + +namespace DB +{ + +template class CastOverloadResolverImpl; +template class CastOverloadResolverImpl; +template class CastOverloadResolverImpl; + +} diff --git a/src/Functions/CastOverloadResolverImpl.h b/src/Functions/CastOverloadResolverImpl.h new file mode 100644 index 00000000000..61ccc66fb6c --- /dev/null +++ b/src/Functions/CastOverloadResolverImpl.h @@ -0,0 +1,149 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + + +/** CastInternal does not preserve nullability of the data type, + * i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1). + * + * Cast preserves nullability according to setting `cast_keep_nullable`, + * i.e. Cast(toNullable(toInt8(1)) as Int32) will be Nullable(Int32(1)) if `cast_keep_nullable` == 1. + */ +template +class CastOverloadResolverImpl : public IFunctionOverloadResolver +{ +public: + using MonotonicityForRange = FunctionCastBase::MonotonicityForRange; + + static constexpr auto name = cast_type == CastType::accurate + ? CastName::accurate_cast_name + : (cast_type == CastType::accurateOrNull ? CastName::accurate_cast_or_null_name : CastName::cast_name); + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 2; } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + explicit CastOverloadResolverImpl(ContextPtr context_, std::optional diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_) + : context(context_) + , diagnostic(std::move(diagnostic_)) + , keep_nullable(keep_nullable_) + , data_type_validation_settings(data_type_validation_settings_) + { + } + + static FunctionOverloadResolverPtr create(ContextPtr context) + { + const auto & settings_ref = context->getSettingsRef(); + + if constexpr (internal) + return createImpl(context, {}, false /*keep_nullable*/); + + return createImpl(context, {}, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref)); + } + + static FunctionOverloadResolverPtr createImpl(ContextPtr context, std::optional diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {}) + { + assert(!internal || !keep_nullable); + return std::make_unique(context, std::move(diagnostic), keep_nullable, data_type_validation_settings); + } + + static FunctionOverloadResolverPtr createImpl(std::optional diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {}) + { + assert(!internal || !keep_nullable); + return std::make_unique(ContextPtr(), std::move(diagnostic), keep_nullable, data_type_validation_settings); + } + +protected: + + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override + { + DataTypes data_types(arguments.size()); + + for (size_t i = 0; i < arguments.size(); ++i) + data_types[i] = arguments[i].type; + + auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get()); + return std::make_unique>(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + const auto & column = arguments.back().column; + if (!column) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. " + "Instead there is non-constant column of type {}", getName(), arguments.back().type->getName()); + + const auto * type_col = checkAndGetColumnConst(column.get()); + if (!type_col) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. " + "Instead there is a column with the following structure: {}", getName(), column->dumpStructure()); + + DataTypePtr type = DataTypeFactory::instance().get(type_col->getValue()); + validateDataType(type, data_type_validation_settings); + + if constexpr (cast_type == CastType::accurateOrNull) + return makeNullable(type); + + if constexpr (internal) + return type; + + if (keep_nullable && arguments.front().type->isNullable() && type->canBeInsideNullable()) + return makeNullable(type); + + return type; + } + + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + +private: + ContextPtr context; + std::optional diagnostic; + bool keep_nullable; + DataTypeValidationSettings data_type_validation_settings; +}; + + +struct CastOverloadName +{ + static constexpr auto cast_name = "CAST"; + static constexpr auto accurate_cast_name = "accurateCast"; + static constexpr auto accurate_cast_or_null_name = "accurateCastOrNull"; +}; + +struct CastInternalOverloadName +{ + static constexpr auto cast_name = "_CAST"; + static constexpr auto accurate_cast_name = "accurate_Cast"; + static constexpr auto accurate_cast_or_null_name = "accurate_CastOrNull"; +}; + +template +using CastOverloadResolver = CastOverloadResolverImpl; + +template +using CastInternalOverloadResolver = CastOverloadResolverImpl; + +extern template class CastOverloadResolverImpl; +extern template class CastOverloadResolverImpl; +extern template class CastOverloadResolverImpl; + +extern template class CastOverloadResolverImpl; +extern template class CastOverloadResolverImpl; +extern template class CastOverloadResolverImpl; + +} From 6d45eecdad4b737584b9f64e9a44c8fd845f17a4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 06:11:34 +0100 Subject: [PATCH 050/283] Remove garbage --- programs/odbc-bridge/CMakeLists.txt | 3 -- src/Functions/CMakeLists.txt | 2 + .../CastInternalOverloadResolverImpl.cpp | 6 +-- src/Functions/CastOverloadResolverImpl.cpp | 6 +-- src/Functions/CastOverloadResolverImpl.h | 51 +++++++++---------- src/Functions/FunctionsConversion.cpp | 15 ++++++ src/Functions/FunctionsConversion.h | 17 +------ 7 files changed, 49 insertions(+), 51 deletions(-) diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 4e5dbac486e..56373601b95 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -13,9 +13,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES getIdentifierQuote.cpp odbc-bridge.cpp validateODBCConnectionString.cpp - - ../../src/Functions/CastOverloadResolverImpl.cpp - ../../src/Functions/CastInternalOverloadResolverImpl.cpp ) clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index ac3e3671ae0..f27bcae1fe3 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -15,6 +15,8 @@ extract_into_parent_list(clickhouse_functions_sources dbms_sources checkHyperscanRegexp.cpp array/has.cpp CastOverloadResolver.cpp + CastOverloadResolverImpl.cpp + CastInternalOverloadResolverImpl.cpp ) extract_into_parent_list(clickhouse_functions_headers dbms_headers IFunction.h diff --git a/src/Functions/CastInternalOverloadResolverImpl.cpp b/src/Functions/CastInternalOverloadResolverImpl.cpp index d8ee0c76fd8..8b74f76ca39 100644 --- a/src/Functions/CastInternalOverloadResolverImpl.cpp +++ b/src/Functions/CastInternalOverloadResolverImpl.cpp @@ -3,8 +3,8 @@ namespace DB { -template class CastOverloadResolverImpl; -template class CastOverloadResolverImpl; -template class CastOverloadResolverImpl; +template class CastOverloadResolverImpl; +template class CastOverloadResolverImpl; +template class CastOverloadResolverImpl; } diff --git a/src/Functions/CastOverloadResolverImpl.cpp b/src/Functions/CastOverloadResolverImpl.cpp index d2325db5e0a..a7f7024892e 100644 --- a/src/Functions/CastOverloadResolverImpl.cpp +++ b/src/Functions/CastOverloadResolverImpl.cpp @@ -3,8 +3,8 @@ namespace DB { -template class CastOverloadResolverImpl; -template class CastOverloadResolverImpl; -template class CastOverloadResolverImpl; +template class CastOverloadResolverImpl; +template class CastOverloadResolverImpl; +template class CastOverloadResolverImpl; } diff --git a/src/Functions/CastOverloadResolverImpl.h b/src/Functions/CastOverloadResolverImpl.h index 61ccc66fb6c..36376c5f254 100644 --- a/src/Functions/CastOverloadResolverImpl.h +++ b/src/Functions/CastOverloadResolverImpl.h @@ -13,6 +13,15 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } +struct CastName +{ + static constexpr auto name = "CAST"; +}; + +struct CastInternalName +{ + static constexpr auto name = "_CAST"; +}; /** CastInternal does not preserve nullability of the data type, * i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1). @@ -20,15 +29,16 @@ namespace ErrorCodes * Cast preserves nullability according to setting `cast_keep_nullable`, * i.e. Cast(toNullable(toInt8(1)) as Int32) will be Nullable(Int32(1)) if `cast_keep_nullable` == 1. */ -template +template class CastOverloadResolverImpl : public IFunctionOverloadResolver { public: using MonotonicityForRange = FunctionCastBase::MonotonicityForRange; static constexpr auto name = cast_type == CastType::accurate - ? CastName::accurate_cast_name - : (cast_type == CastType::accurateOrNull ? CastName::accurate_cast_or_null_name : CastName::cast_name); + ? "accurateCast" + : (cast_type == CastType::accurateOrNull ? "accurateCastOrNull" + : (internal ? "_CAST" : "CAST")); String getName() const override { return name; } @@ -76,7 +86,9 @@ protected: data_types[i] = arguments[i].type; auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get()); - return std::make_unique>(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); + + using Function = FunctionCast>; + return std::make_unique(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override @@ -118,32 +130,19 @@ private: }; -struct CastOverloadName -{ - static constexpr auto cast_name = "CAST"; - static constexpr auto accurate_cast_name = "accurateCast"; - static constexpr auto accurate_cast_or_null_name = "accurateCastOrNull"; -}; - -struct CastInternalOverloadName -{ - static constexpr auto cast_name = "_CAST"; - static constexpr auto accurate_cast_name = "accurate_Cast"; - static constexpr auto accurate_cast_or_null_name = "accurate_CastOrNull"; -}; +template +using CastOverloadResolver = CastOverloadResolverImpl; template -using CastOverloadResolver = CastOverloadResolverImpl; +using CastInternalOverloadResolver = CastOverloadResolverImpl; -template -using CastInternalOverloadResolver = CastOverloadResolverImpl; -extern template class CastOverloadResolverImpl; -extern template class CastOverloadResolverImpl; -extern template class CastOverloadResolverImpl; +extern template class CastOverloadResolverImpl; +extern template class CastOverloadResolverImpl; +extern template class CastOverloadResolverImpl; -extern template class CastOverloadResolverImpl; -extern template class CastOverloadResolverImpl; -extern template class CastOverloadResolverImpl; +extern template class CastOverloadResolverImpl; +extern template class CastOverloadResolverImpl; +extern template class CastOverloadResolverImpl; } diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 01e057e19a1..59455ba51b7 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -5,6 +5,21 @@ namespace DB { +UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) +{ + const auto * arg_type = named_column.type.get(); + bool ok = checkAndGetDataType(arg_type) + || checkAndGetDataType(arg_type) + || checkAndGetDataType(arg_type) + || checkAndGetDataType(arg_type); + if (!ok) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of toDecimal() scale {}", named_column.type->getName()); + + Field field; + named_column.column->get(0, field); + return static_cast(field.get()); +} + REGISTER_FUNCTION(Conversion) { factory.registerFunction(); diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 1522e76893e..fac74715fa0 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -98,20 +98,7 @@ namespace ErrorCodes * toType - conversion in "natural way"; */ -inline UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) -{ - const auto * arg_type = named_column.type.get(); - bool ok = checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type); - if (!ok) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of toDecimal() scale {}", named_column.type->getName()); - - Field field; - named_column.column->get(0, field); - return static_cast(field.get()); -} +UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column); /// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; }; @@ -3182,8 +3169,6 @@ private: std::optional diagnostic; }; -struct CastName { static constexpr auto name = "CAST"; }; -struct CastInternalName { static constexpr auto name = "_CAST"; }; class FunctionCastBase : public IFunctionBase { From ea54ac3cb4842c332529bab14c0e619115911a2c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 06:12:22 +0100 Subject: [PATCH 051/283] Remove garbage --- programs/library-bridge/CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index f42b574b807..dd0bf67cb64 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -11,9 +11,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES LibraryBridgeHandlers.cpp SharedLibrary.cpp library-bridge.cpp - - ../../src/Functions/CastOverloadResolverImpl.cpp - ../../src/Functions/CastInternalOverloadResolverImpl.cpp ) clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) From 7983e2b6206c5b78158802430b639e9b060d7b0c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 06:14:32 +0100 Subject: [PATCH 052/283] Remove garbage --- src/Functions/CastOverloadResolver.cpp | 15 +++++++++++++++ src/Functions/FunctionsConversion.cpp | 15 --------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 1c57bcfa979..a343dbb62fe 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -6,6 +6,21 @@ namespace DB { +UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) +{ + const auto * arg_type = named_column.type.get(); + bool ok = checkAndGetDataType(arg_type) + || checkAndGetDataType(arg_type) + || checkAndGetDataType(arg_type) + || checkAndGetDataType(arg_type); + if (!ok) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of toDecimal() scale {}", named_column.type->getName()); + + Field field; + named_column.column->get(0, field); + return static_cast(field.get()); +} + FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional diagnostic) { switch (type) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 59455ba51b7..01e057e19a1 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -5,21 +5,6 @@ namespace DB { -UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) -{ - const auto * arg_type = named_column.type.get(); - bool ok = checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type); - if (!ok) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of toDecimal() scale {}", named_column.type->getName()); - - Field field; - named_column.column->get(0, field); - return static_cast(field.get()); -} - REGISTER_FUNCTION(Conversion) { factory.registerFunction(); From 0d05b8ccc1f56d774c502f780cbbd98326dd8401 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 06:37:50 +0100 Subject: [PATCH 053/283] Fix style --- src/Functions/CastOverloadResolver.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index a343dbb62fe..6bacc7f4847 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -6,6 +6,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) { const auto * arg_type = named_column.type.get(); From dc7f4b39eef4c2a9b47cee7b7197c58fd86c0520 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 07:00:28 +0100 Subject: [PATCH 054/283] Remove garbage --- src/Functions/CMakeLists.txt | 2 - .../CastInternalOverloadResolverImpl.cpp | 10 --- src/Functions/CastOverloadResolver.cpp | 21 +----- src/Functions/CastOverloadResolverImpl.cpp | 10 --- src/Functions/CastOverloadResolverImpl.h | 73 +++++++------------ 5 files changed, 31 insertions(+), 85 deletions(-) delete mode 100644 src/Functions/CastInternalOverloadResolverImpl.cpp delete mode 100644 src/Functions/CastOverloadResolverImpl.cpp diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index f27bcae1fe3..ac3e3671ae0 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -15,8 +15,6 @@ extract_into_parent_list(clickhouse_functions_sources dbms_sources checkHyperscanRegexp.cpp array/has.cpp CastOverloadResolver.cpp - CastOverloadResolverImpl.cpp - CastInternalOverloadResolverImpl.cpp ) extract_into_parent_list(clickhouse_functions_headers dbms_headers IFunction.h diff --git a/src/Functions/CastInternalOverloadResolverImpl.cpp b/src/Functions/CastInternalOverloadResolverImpl.cpp deleted file mode 100644 index 8b74f76ca39..00000000000 --- a/src/Functions/CastInternalOverloadResolverImpl.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include - -namespace DB -{ - -template class CastOverloadResolverImpl; -template class CastOverloadResolverImpl; -template class CastOverloadResolverImpl; - -} diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 6bacc7f4847..79b17d3297c 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -26,27 +26,14 @@ UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) return static_cast(field.get()); } -FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional diagnostic) -{ - switch (type) - { - case CastType::nonAccurate: - return CastInternalOverloadResolver::createImpl(diagnostic); - case CastType::accurate: - return CastInternalOverloadResolver::createImpl(diagnostic); - case CastType::accurateOrNull: - return CastInternalOverloadResolver::createImpl(diagnostic); - } -} - REGISTER_FUNCTION(CastOverloadResolvers) { - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); + factory.registerFunction("_CAST", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::nonAccurate, true); }, {}, FunctionFactory::CaseInsensitive); /// Note: "internal" (not affected by null preserving setting) versions of accurate cast functions are unneeded. - factory.registerFunction>({}, FunctionFactory::CaseInsensitive); - factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction("CAST", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::nonAccurate, false); }, {}, FunctionFactory::CaseInsensitive); + factory.registerFunction("accurateCast", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::accurate, false); }, {}); + factory.registerFunction("accurateCastOrNull", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::accurateOrNull, false); }, {}); } } diff --git a/src/Functions/CastOverloadResolverImpl.cpp b/src/Functions/CastOverloadResolverImpl.cpp deleted file mode 100644 index a7f7024892e..00000000000 --- a/src/Functions/CastOverloadResolverImpl.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include - -namespace DB -{ - -template class CastOverloadResolverImpl; -template class CastOverloadResolverImpl; -template class CastOverloadResolverImpl; - -} diff --git a/src/Functions/CastOverloadResolverImpl.h b/src/Functions/CastOverloadResolverImpl.h index 36376c5f254..b3f3e50ebf8 100644 --- a/src/Functions/CastOverloadResolverImpl.h +++ b/src/Functions/CastOverloadResolverImpl.h @@ -29,55 +29,48 @@ struct CastInternalName * Cast preserves nullability according to setting `cast_keep_nullable`, * i.e. Cast(toNullable(toInt8(1)) as Int32) will be Nullable(Int32(1)) if `cast_keep_nullable` == 1. */ -template class CastOverloadResolverImpl : public IFunctionOverloadResolver { public: using MonotonicityForRange = FunctionCastBase::MonotonicityForRange; - static constexpr auto name = cast_type == CastType::accurate - ? "accurateCast" - : (cast_type == CastType::accurateOrNull ? "accurateCastOrNull" - : (internal ? "_CAST" : "CAST")); - - String getName() const override { return name; } + String getName() const override + { + if (cast_type == CastType::accurate) + return "accurateCast"; + if (cast_type == CastType::accurateOrNull) + return "accurateCastOrNull"; + if (internal) + return "_CAST"; + else + return "CAST"; + } size_t getNumberOfArguments() const override { return 2; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - explicit CastOverloadResolverImpl(ContextPtr context_, std::optional diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_) + explicit CastOverloadResolverImpl(ContextPtr context_, CastType cast_type_, bool internal_, std::optional diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_) : context(context_) + , cast_type(cast_type_) + , internal(internal_) , diagnostic(std::move(diagnostic_)) , keep_nullable(keep_nullable_) , data_type_validation_settings(data_type_validation_settings_) { } - static FunctionOverloadResolverPtr create(ContextPtr context) + static FunctionOverloadResolverPtr create(ContextPtr context, CastType cast_type, bool internal) { const auto & settings_ref = context->getSettingsRef(); - if constexpr (internal) - return createImpl(context, {}, false /*keep_nullable*/); - - return createImpl(context, {}, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref)); - } - - static FunctionOverloadResolverPtr createImpl(ContextPtr context, std::optional diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {}) - { - assert(!internal || !keep_nullable); - return std::make_unique(context, std::move(diagnostic), keep_nullable, data_type_validation_settings); - } - - static FunctionOverloadResolverPtr createImpl(std::optional diagnostic = {}, bool keep_nullable = false, const DataTypeValidationSettings & data_type_validation_settings = {}) - { - assert(!internal || !keep_nullable); - return std::make_unique(ContextPtr(), std::move(diagnostic), keep_nullable, data_type_validation_settings); + if (internal) + return std::make_unique(context, cast_type, internal, std::nullopt, false /*keep_nullable*/, DataTypeValidationSettings{}); + else + return std::make_unique(context, cast_type, internal, std::nullopt, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref)); } protected: - FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override { DataTypes data_types(arguments.size()); @@ -87,8 +80,10 @@ protected: auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get()); - using Function = FunctionCast>; - return std::make_unique(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); + if (internal) + return std::make_unique>(context, CastInternalName::name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); + else + return std::make_unique>(context, CastName::name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override @@ -106,10 +101,10 @@ protected: DataTypePtr type = DataTypeFactory::instance().get(type_col->getValue()); validateDataType(type, data_type_validation_settings); - if constexpr (cast_type == CastType::accurateOrNull) + if (cast_type == CastType::accurateOrNull) return makeNullable(type); - if constexpr (internal) + if (internal) return type; if (keep_nullable && arguments.front().type->isNullable() && type->canBeInsideNullable()) @@ -124,25 +119,11 @@ protected: private: ContextPtr context; + CastType cast_type; + bool internal; std::optional diagnostic; bool keep_nullable; DataTypeValidationSettings data_type_validation_settings; }; - -template -using CastOverloadResolver = CastOverloadResolverImpl; - -template -using CastInternalOverloadResolver = CastOverloadResolverImpl; - - -extern template class CastOverloadResolverImpl; -extern template class CastOverloadResolverImpl; -extern template class CastOverloadResolverImpl; - -extern template class CastOverloadResolverImpl; -extern template class CastOverloadResolverImpl; -extern template class CastOverloadResolverImpl; - } From 83a5611355fdb7bf6ad95a8ae6b3e60321e00c69 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 07:04:05 +0100 Subject: [PATCH 055/283] Remove garbage --- src/Functions/CastOverloadResolver.cpp | 13 +++++++++---- src/Functions/CastOverloadResolverImpl.h | 6 +++--- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 79b17d3297c..898842c8505 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -26,14 +26,19 @@ UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) return static_cast(field.get()); } +FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional diagnostic) +{ + return CastOverloadResolverImpl::create(ContextPtr{}, type, true, diagnostic); +} + REGISTER_FUNCTION(CastOverloadResolvers) { - factory.registerFunction("_CAST", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::nonAccurate, true); }, {}, FunctionFactory::CaseInsensitive); + factory.registerFunction("_CAST", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::nonAccurate, true, {}); }, {}, FunctionFactory::CaseInsensitive); /// Note: "internal" (not affected by null preserving setting) versions of accurate cast functions are unneeded. - factory.registerFunction("CAST", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::nonAccurate, false); }, {}, FunctionFactory::CaseInsensitive); - factory.registerFunction("accurateCast", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::accurate, false); }, {}); - factory.registerFunction("accurateCastOrNull", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::accurateOrNull, false); }, {}); + factory.registerFunction("CAST", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::nonAccurate, false, {}); }, {}, FunctionFactory::CaseInsensitive); + factory.registerFunction("accurateCast", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::accurate, false, {}); }, {}); + factory.registerFunction("accurateCastOrNull", [](ContextPtr context){ return CastOverloadResolverImpl::create(context, CastType::accurateOrNull, false, {}); }, {}); } } diff --git a/src/Functions/CastOverloadResolverImpl.h b/src/Functions/CastOverloadResolverImpl.h index b3f3e50ebf8..74b3fd3df3f 100644 --- a/src/Functions/CastOverloadResolverImpl.h +++ b/src/Functions/CastOverloadResolverImpl.h @@ -60,14 +60,14 @@ public: { } - static FunctionOverloadResolverPtr create(ContextPtr context, CastType cast_type, bool internal) + static FunctionOverloadResolverPtr create(ContextPtr context, CastType cast_type, bool internal, std::optional diagnostic) { const auto & settings_ref = context->getSettingsRef(); if (internal) - return std::make_unique(context, cast_type, internal, std::nullopt, false /*keep_nullable*/, DataTypeValidationSettings{}); + return std::make_unique(context, cast_type, internal, diagnostic, false /*keep_nullable*/, DataTypeValidationSettings{}); else - return std::make_unique(context, cast_type, internal, std::nullopt, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref)); + return std::make_unique(context, cast_type, internal, diagnostic, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref)); } protected: From 574d4863224ed7343db64f976f53442f1d166e4a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 07:55:59 +0100 Subject: [PATCH 056/283] Something --- programs/library-bridge/CMakeLists.txt | 1 + .../library-bridge/createFunctionBaseCast.cpp | 19 +++++++++++++ programs/odbc-bridge/CMakeLists.txt | 1 + .../odbc-bridge/createFunctionBaseCast.cpp | 19 +++++++++++++ src/Functions/CastOverloadResolver.cpp | 1 + src/Functions/CastOverloadResolverImpl.h | 28 ++++--------------- src/Functions/FunctionsConversion.cpp | 23 +++++++++++++++ src/Functions/FunctionsConversion.h | 7 +++++ 8 files changed, 77 insertions(+), 22 deletions(-) create mode 100644 programs/library-bridge/createFunctionBaseCast.cpp create mode 100644 programs/odbc-bridge/createFunctionBaseCast.cpp diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index dd0bf67cb64..98d8848502d 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -11,6 +11,7 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES LibraryBridgeHandlers.cpp SharedLibrary.cpp library-bridge.cpp + createFunctionBaseCast.cpp ) clickhouse_add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES}) diff --git a/programs/library-bridge/createFunctionBaseCast.cpp b/programs/library-bridge/createFunctionBaseCast.cpp new file mode 100644 index 00000000000..473aa1ca81d --- /dev/null +++ b/programs/library-bridge/createFunctionBaseCast.cpp @@ -0,0 +1,19 @@ +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +FunctionBasePtr createFunctionBaseCast( + ContextPtr, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional, CastType) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for ODBC Bridge"); +} + +} diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 56373601b95..18cda4d7a04 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -13,6 +13,7 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES getIdentifierQuote.cpp odbc-bridge.cpp validateODBCConnectionString.cpp + createFunctionBaseCast.cpp ) clickhouse_add_executable(clickhouse-odbc-bridge ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}) diff --git a/programs/odbc-bridge/createFunctionBaseCast.cpp b/programs/odbc-bridge/createFunctionBaseCast.cpp new file mode 100644 index 00000000000..473aa1ca81d --- /dev/null +++ b/programs/odbc-bridge/createFunctionBaseCast.cpp @@ -0,0 +1,19 @@ +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +FunctionBasePtr createFunctionBaseCast( + ContextPtr, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional, CastType) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for ODBC Bridge"); +} + +} diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 898842c8505..6ab6f95410f 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/CastOverloadResolverImpl.h b/src/Functions/CastOverloadResolverImpl.h index 74b3fd3df3f..2bb83040163 100644 --- a/src/Functions/CastOverloadResolverImpl.h +++ b/src/Functions/CastOverloadResolverImpl.h @@ -1,7 +1,12 @@ #pragma once #include +#include #include +#include +#include +#include +#include #include @@ -13,15 +18,6 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } -struct CastName -{ - static constexpr auto name = "CAST"; -}; - -struct CastInternalName -{ - static constexpr auto name = "_CAST"; -}; /** CastInternal does not preserve nullability of the data type, * i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1). @@ -32,8 +28,6 @@ struct CastInternalName class CastOverloadResolverImpl : public IFunctionOverloadResolver { public: - using MonotonicityForRange = FunctionCastBase::MonotonicityForRange; - String getName() const override { if (cast_type == CastType::accurate) @@ -73,17 +67,7 @@ public: protected: FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override { - DataTypes data_types(arguments.size()); - - for (size_t i = 0; i < arguments.size(); ++i) - data_types[i] = arguments[i].type; - - auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get()); - - if (internal) - return std::make_unique>(context, CastInternalName::name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); - else - return std::make_unique>(context, CastName::name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); + return createFunctionBaseCast(context, arguments, return_type, diagnostic, cast_type); } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 01e057e19a1..ce9e4fd66e1 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -5,6 +5,29 @@ namespace DB { +namespace +{ + +struct CastInternalName { static constexpr auto name = "_CAST"; }; + +} + +FunctionBasePtr createFunctionBaseCast( + ContextPtr context + , const ColumnsWithTypeAndName & arguments + , const DataTypePtr & return_type + , std::optional diagnostic + , CastType cast_type) +{ + DataTypes data_types(arguments.size()); + + for (size_t i = 0; i < arguments.size(); ++i) + data_types[i] = arguments[i].type; + + auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get()); + return std::make_unique>(context, CastInternalName::name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); +} + REGISTER_FUNCTION(Conversion) { factory.registerFunction(); diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index fac74715fa0..689b72dc917 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -4972,4 +4972,11 @@ public: } }; +FunctionBasePtr createFunctionBaseCast( + ContextPtr context + , const ColumnsWithTypeAndName & arguments + , const DataTypePtr & return_type + , std::optional diagnostic + , CastType cast_type); + } From feed74e598503eeaa0e6aac44684f19e1fd42361 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 07:59:09 +0100 Subject: [PATCH 057/283] Better --- src/Functions/CastOverloadResolver.cpp | 100 +++++++++++++++++++- src/Functions/CastOverloadResolverImpl.h | 113 ----------------------- 2 files changed, 99 insertions(+), 114 deletions(-) delete mode 100644 src/Functions/CastOverloadResolverImpl.h diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 6ab6f95410f..98debc3e2a4 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -1,7 +1,12 @@ #include #include -#include +#include +#include +#include #include +#include +#include +#include namespace DB @@ -12,6 +17,99 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } + +/** CastInternal does not preserve nullability of the data type, + * i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1). + * + * Cast preserves nullability according to setting `cast_keep_nullable`, + * i.e. Cast(toNullable(toInt8(1)) as Int32) will be Nullable(Int32(1)) if `cast_keep_nullable` == 1. + */ +class CastOverloadResolverImpl : public IFunctionOverloadResolver +{ +public: + String getName() const override + { + if (cast_type == CastType::accurate) + return "accurateCast"; + if (cast_type == CastType::accurateOrNull) + return "accurateCastOrNull"; + if (internal) + return "_CAST"; + else + return "CAST"; + } + + size_t getNumberOfArguments() const override { return 2; } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + explicit CastOverloadResolverImpl(ContextPtr context_, CastType cast_type_, bool internal_, std::optional diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_) + : context(context_) + , cast_type(cast_type_) + , internal(internal_) + , diagnostic(std::move(diagnostic_)) + , keep_nullable(keep_nullable_) + , data_type_validation_settings(data_type_validation_settings_) + { + } + + static FunctionOverloadResolverPtr create(ContextPtr context, CastType cast_type, bool internal, std::optional diagnostic) + { + const auto & settings_ref = context->getSettingsRef(); + + if (internal) + return std::make_unique(context, cast_type, internal, diagnostic, false /*keep_nullable*/, DataTypeValidationSettings{}); + else + return std::make_unique(context, cast_type, internal, diagnostic, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref)); + } + +protected: + FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override + { + return createFunctionBaseCast(context, arguments, return_type, diagnostic, cast_type); + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + const auto & column = arguments.back().column; + if (!column) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. " + "Instead there is non-constant column of type {}", getName(), arguments.back().type->getName()); + + const auto * type_col = checkAndGetColumnConst(column.get()); + if (!type_col) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. " + "Instead there is a column with the following structure: {}", getName(), column->dumpStructure()); + + DataTypePtr type = DataTypeFactory::instance().get(type_col->getValue()); + validateDataType(type, data_type_validation_settings); + + if (cast_type == CastType::accurateOrNull) + return makeNullable(type); + + if (internal) + return type; + + if (keep_nullable && arguments.front().type->isNullable() && type->canBeInsideNullable()) + return makeNullable(type); + + return type; + } + + bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + +private: + ContextPtr context; + CastType cast_type; + bool internal; + std::optional diagnostic; + bool keep_nullable; + DataTypeValidationSettings data_type_validation_settings; +}; + + UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) { const auto * arg_type = named_column.type.get(); diff --git a/src/Functions/CastOverloadResolverImpl.h b/src/Functions/CastOverloadResolverImpl.h deleted file mode 100644 index 2bb83040163..00000000000 --- a/src/Functions/CastOverloadResolverImpl.h +++ /dev/null @@ -1,113 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; -} - - -/** CastInternal does not preserve nullability of the data type, - * i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1). - * - * Cast preserves nullability according to setting `cast_keep_nullable`, - * i.e. Cast(toNullable(toInt8(1)) as Int32) will be Nullable(Int32(1)) if `cast_keep_nullable` == 1. - */ -class CastOverloadResolverImpl : public IFunctionOverloadResolver -{ -public: - String getName() const override - { - if (cast_type == CastType::accurate) - return "accurateCast"; - if (cast_type == CastType::accurateOrNull) - return "accurateCastOrNull"; - if (internal) - return "_CAST"; - else - return "CAST"; - } - - size_t getNumberOfArguments() const override { return 2; } - - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - - explicit CastOverloadResolverImpl(ContextPtr context_, CastType cast_type_, bool internal_, std::optional diagnostic_, bool keep_nullable_, const DataTypeValidationSettings & data_type_validation_settings_) - : context(context_) - , cast_type(cast_type_) - , internal(internal_) - , diagnostic(std::move(diagnostic_)) - , keep_nullable(keep_nullable_) - , data_type_validation_settings(data_type_validation_settings_) - { - } - - static FunctionOverloadResolverPtr create(ContextPtr context, CastType cast_type, bool internal, std::optional diagnostic) - { - const auto & settings_ref = context->getSettingsRef(); - - if (internal) - return std::make_unique(context, cast_type, internal, diagnostic, false /*keep_nullable*/, DataTypeValidationSettings{}); - else - return std::make_unique(context, cast_type, internal, diagnostic, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref)); - } - -protected: - FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override - { - return createFunctionBaseCast(context, arguments, return_type, diagnostic, cast_type); - } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - const auto & column = arguments.back().column; - if (!column) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. " - "Instead there is non-constant column of type {}", getName(), arguments.back().type->getName()); - - const auto * type_col = checkAndGetColumnConst(column.get()); - if (!type_col) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument to {} must be a constant string describing type. " - "Instead there is a column with the following structure: {}", getName(), column->dumpStructure()); - - DataTypePtr type = DataTypeFactory::instance().get(type_col->getValue()); - validateDataType(type, data_type_validation_settings); - - if (cast_type == CastType::accurateOrNull) - return makeNullable(type); - - if (internal) - return type; - - if (keep_nullable && arguments.front().type->isNullable() && type->canBeInsideNullable()) - return makeNullable(type); - - return type; - } - - bool useDefaultImplementationForNulls() const override { return false; } - bool useDefaultImplementationForNothing() const override { return false; } - bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } - -private: - ContextPtr context; - CastType cast_type; - bool internal; - std::optional diagnostic; - bool keep_nullable; - DataTypeValidationSettings data_type_validation_settings; -}; - -} From 061cee257c489eccdcbcda7e694937fa93444d18 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 08:10:59 +0100 Subject: [PATCH 058/283] Remove crap --- src/Functions/CastOverloadResolver.cpp | 9 +++- src/Functions/FunctionsConversion.cpp | 59 +++++++++++++++++--- src/Functions/FunctionsConversion.h | 75 ++++++-------------------- src/Functions/concat.cpp | 2 +- 4 files changed, 76 insertions(+), 69 deletions(-) diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 98debc3e2a4..4a081d684f6 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -1,12 +1,12 @@ #include #include -#include #include #include #include #include #include #include +#include namespace DB @@ -17,6 +17,13 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } +FunctionBasePtr createFunctionBaseCast( + ContextPtr context + , const ColumnsWithTypeAndName & arguments + , const DataTypePtr & return_type + , std::optional diagnostic + , CastType cast_type); + /** CastInternal does not preserve nullability of the data type, * i.e. CastInternal(toNullable(toInt8(1)) as Int32) will be Int32(1). diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index ce9e4fd66e1..ebb63f1b25d 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -5,13 +5,6 @@ namespace DB { -namespace -{ - -struct CastInternalName { static constexpr auto name = "_CAST"; }; - -} - FunctionBasePtr createFunctionBaseCast( ContextPtr context , const ColumnsWithTypeAndName & arguments @@ -25,7 +18,7 @@ FunctionBasePtr createFunctionBaseCast( data_types[i] = arguments[i].type; auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get()); - return std::make_unique>(context, CastInternalName::name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); + return std::make_unique(context, "CAST", std::move(monotonicity), data_types, return_type, diagnostic, cast_type); } REGISTER_FUNCTION(Conversion) @@ -153,4 +146,54 @@ REGISTER_FUNCTION(Conversion) factory.registerFunction>(); } + +MonotonicityHelper::MonotonicityForRange MonotonicityHelper::getMonotonicityInformation(const DataTypePtr & from_type, const IDataType * to_type) +{ + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (isEnum(from_type)) + { + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + } + /// other types like Null, FixedString, Array and Tuple have no monotonicity defined + return {}; +} + } diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 689b72dc917..0c4b7b41b93 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -3170,13 +3170,17 @@ private: }; +struct FunctionCastName +{ + static constexpr auto name = "CAST"; +}; + class FunctionCastBase : public IFunctionBase { public: using MonotonicityForRange = std::function; }; -template class FunctionCast final : public FunctionCastBase { public: @@ -3204,7 +3208,7 @@ public: try { return std::make_unique( - prepareUnpackDictionaries(getArgumentTypes()[0], getResultType()), cast_name, diagnostic); + prepareUnpackDictionaries(getArgumentTypes()[0], getResultType()), cast_name, diagnostic); } catch (Exception & e) { @@ -3278,7 +3282,7 @@ private: { /// In case when converting to Nullable type, we apply different parsing rule, /// that will not throw an exception but return NULL in case of malformed input. - FunctionPtr function = FunctionConvertFromString::create(); + FunctionPtr function = FunctionConvertFromString::create(); return createFunctionAdaptor(function, from_type); } else if (!can_apply_accurate_cast) @@ -3304,7 +3308,7 @@ private: { #define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE, ADDITIONS) \ case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ - result_column = ConvertImpl::execute( \ + result_column = ConvertImpl::execute( \ arguments, result_type, input_rows_count, ADDITIONS()); \ break; if (wrapper_cast_type == CastType::accurate) @@ -3334,7 +3338,7 @@ private: { #define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE, ADDITIONS) \ case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ - result_column = ConvertImpl::template execute( \ + result_column = ConvertImpl::template execute( \ arguments, result_type, input_rows_count); \ break; if (wrapper_cast_type == CastType::accurate) @@ -3368,7 +3372,7 @@ arguments, result_type, input_rows_count); \ { if (wrapper_cast_type == CastType::accurateOrNull) { - auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper(); + auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper(); return nullable_column_wrapper(arguments, result_type, column_nullable, input_rows_count); } else @@ -3494,7 +3498,7 @@ arguments, result_type, input_rows_count); \ { AccurateConvertStrategyAdditions additions; additions.scale = scale; - result_column = ConvertImpl::execute( + result_column = ConvertImpl::execute( arguments, result_type, input_rows_count, additions); return true; @@ -3503,7 +3507,7 @@ arguments, result_type, input_rows_count); \ { AccurateOrNullConvertStrategyAdditions additions; additions.scale = scale; - result_column = ConvertImpl::execute( + result_column = ConvertImpl::execute( arguments, result_type, input_rows_count, additions); return true; @@ -3516,14 +3520,14 @@ arguments, result_type, input_rows_count); \ /// Consistent with CAST(Nullable(String) AS Nullable(Numbers)) /// In case when converting to Nullable type, we apply different parsing rule, /// that will not throw an exception but return NULL in case of malformed input. - result_column = ConvertImpl::execute( + result_column = ConvertImpl::execute( arguments, result_type, input_rows_count, scale); return true; } } - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); return true; }); @@ -3533,7 +3537,7 @@ arguments, result_type, input_rows_count); \ { if (wrapper_cast_type == CastType::accurateOrNull) { - auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper(); + auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper(); return nullable_column_wrapper(arguments, result_type, column_nullable, input_rows_count); } else @@ -4922,54 +4926,7 @@ public: return FunctionTo::Type::Monotonic::get; } - static MonotonicityForRange getMonotonicityInformation(const DataTypePtr & from_type, const IDataType * to_type) - { - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (isEnum(from_type)) - { - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - } - /// other types like Null, FixedString, Array and Tuple have no monotonicity defined - return {}; - } + static MonotonicityForRange getMonotonicityInformation(const DataTypePtr & from_type, const IDataType * to_type); }; FunctionBasePtr createFunctionBaseCast( diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp index d68f5256f6d..c75a806559c 100644 --- a/src/Functions/concat.cpp +++ b/src/Functions/concat.cpp @@ -1,8 +1,8 @@ #include +#include #include #include #include -#include #include #include #include From 804c07156d90c0b4ce6b632f30b5481077d0f5d3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 08:24:53 +0100 Subject: [PATCH 059/283] Fix garbage --- .../Serializations/SerializationObject.cpp | 20 +++- src/Functions/FunctionsConversion.h | 97 ++++++++----------- 2 files changed, 57 insertions(+), 60 deletions(-) diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index e6dc16ef5a0..cd186ec2c46 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -5,13 +5,11 @@ #include #include #include -#include #include #include -#include #include +#include #include -#include #include #include @@ -30,6 +28,7 @@ namespace ErrorCodes extern const int CANNOT_READ_ALL_DATA; extern const int ARGUMENT_OUT_OF_BOUND; extern const int LOGICAL_ERROR; + extern const int CANNOT_PARSE_TEXT; } template @@ -344,7 +343,20 @@ void SerializationObject::deserializeBinaryBulkFromString( state.nested_serialization->deserializeBinaryBulkWithMultipleStreams( column_string, limit, settings, state.nested_state, cache); - ConvertImplGenericFromString::executeImpl(*column_string, column_object, *this, column_string->size()); + size_t input_rows_count = column_string->size(); + column_object.reserve(input_rows_count); + + FormatSettings format_settings; + for (size_t i = 0; i < input_rows_count; ++i) + { + const auto & val = column_string->getDataAt(i); + ReadBufferFromMemory read_buffer(val.data, val.size); + deserializeWholeText(column_object, read_buffer, format_settings); + + if (!read_buffer.eof()) + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, + "Cannot parse string to column Object. Expected eof"); + } } template diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 0c4b7b41b93..c21e85fb40e 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -1818,14 +1818,10 @@ struct ConvertImpl {}; /// Generic conversion of any type from String. Used for complex types: Array and Tuple or types with custom serialization. -template struct ConvertImplGenericFromString { static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) { - static_assert(std::is_same_v || std::is_same_v, - "Can be used only to parse from ColumnString or ColumnFixedString"); - const IColumn & column_from = *arguments[0].column; const IDataType & data_type_to = *result_type; auto res = data_type_to.createColumn(); @@ -1841,63 +1837,52 @@ struct ConvertImplGenericFromString IColumn & column_to, const ISerialization & serialization_from, size_t input_rows_count, - const PaddedPODArray * null_map = nullptr, - const IDataType * result_type = nullptr) + const PaddedPODArray * null_map, + const IDataType * result_type) { - static_assert(std::is_same_v || std::is_same_v, - "Can be used only to parse from ColumnString or ColumnFixedString"); + column_to.reserve(input_rows_count); - if (const StringColumnType * col_from_string = checkAndGetColumn(&column_from)) + FormatSettings format_settings; + for (size_t i = 0; i < input_rows_count; ++i) { - column_to.reserve(input_rows_count); - - FormatSettings format_settings; - for (size_t i = 0; i < input_rows_count; ++i) + if (null_map && (*null_map)[i]) { - if (null_map && (*null_map)[i]) + column_to.insertDefault(); + continue; + } + + const auto & val = column_from.getDataAt(i); + ReadBufferFromMemory read_buffer(val.data, val.size); + try + { + serialization_from.deserializeWholeText(column_to, read_buffer, format_settings); + } + catch (const Exception & e) + { + auto * nullable_column = typeid_cast(&column_to); + if (e.code() == ErrorCodes::CANNOT_PARSE_BOOL && nullable_column) { - column_to.insertDefault(); + auto & col_nullmap = nullable_column->getNullMapData(); + if (col_nullmap.size() != nullable_column->size()) + col_nullmap.resize_fill(nullable_column->size()); + if (nullable_column->size() == (i + 1)) + nullable_column->popBack(1); + nullable_column->insertDefault(); continue; } + throw; + } - const auto & val = col_from_string->getDataAt(i); - ReadBufferFromMemory read_buffer(val.data, val.size); - try - { - serialization_from.deserializeWholeText(column_to, read_buffer, format_settings); - } - catch (const Exception & e) - { - auto * nullable_column = typeid_cast(&column_to); - if (e.code() == ErrorCodes::CANNOT_PARSE_BOOL && nullable_column) - { - auto & col_nullmap = nullable_column->getNullMapData(); - if (col_nullmap.size() != nullable_column->size()) - col_nullmap.resize_fill(nullable_column->size()); - if (nullable_column->size() == (i + 1)) - nullable_column->popBack(1); - nullable_column->insertDefault(); - continue; - } - throw; - } - - if (!read_buffer.eof()) - { - if (result_type) - throwExceptionForIncompletelyParsedValue(read_buffer, *result_type); - else - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, - "Cannot parse string to column {}. Expected eof", column_to.getName()); - } + if (!read_buffer.eof()) + { + if (result_type) + throwExceptionForIncompletelyParsedValue(read_buffer, *result_type); + else + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, + "Cannot parse string to column {}. Expected eof", column_to.getName()); } } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column {} of first argument of conversion function from string", - column_from.getName()); } - }; @@ -3392,7 +3377,7 @@ arguments, result_type, input_rows_count); \ { if (checkAndGetDataType(from_type.get())) { - return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } return createWrapper(from_type, to_type, requested_result_is_nullable); @@ -3555,7 +3540,7 @@ arguments, result_type, input_rows_count); \ /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } else if (const auto * agg_type = checkAndGetDataType(from_type_untyped.get())) { @@ -3598,7 +3583,7 @@ arguments, result_type, input_rows_count); \ /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } DataTypePtr from_type_holder; @@ -3689,7 +3674,7 @@ arguments, result_type, input_rows_count); \ /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } const auto * from_type = checkAndGetDataType(from_type_untyped.get()); @@ -4034,7 +4019,7 @@ arguments, result_type, input_rows_count); \ { return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) { - auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); + auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); res->finalize(); return res; }; @@ -4831,7 +4816,7 @@ arguments, result_type, input_rows_count); \ auto wrapped_result_type = result_type; if (requested_result_is_nullable) wrapped_result_type = makeNullable(result_type); - return ConvertImplGenericFromString::execute( + return ConvertImplGenericFromString::execute( arguments, wrapped_result_type, column_nullable, input_rows_count); }; return true; From f2c4a5bb94ea319b38caf35ac90a9b9208cdc745 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 08:30:13 +0100 Subject: [PATCH 060/283] Prevent garbage from appearing --- src/Functions/FunctionsConversion.cpp | 5008 ++++++++++++++++++++++++- src/Functions/FunctionsConversion.h | 4924 ------------------------ 2 files changed, 4957 insertions(+), 4975 deletions(-) delete mode 100644 src/Functions/FunctionsConversion.h diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index ebb63f1b25d..865f7db8e12 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1,10 +1,4966 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace DB { +namespace ErrorCodes +{ + extern const int ATTEMPT_TO_READ_AFTER_EOF; + extern const int CANNOT_PARSE_NUMBER; + extern const int CANNOT_READ_ARRAY_FROM_TEXT; + extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; + extern const int CANNOT_PARSE_QUOTED_STRING; + extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; + extern const int CANNOT_PARSE_DATE; + extern const int CANNOT_PARSE_DATETIME; + extern const int CANNOT_PARSE_TEXT; + extern const int CANNOT_PARSE_UUID; + extern const int CANNOT_PARSE_IPV4; + extern const int CANNOT_PARSE_IPV6; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; + extern const int LOGICAL_ERROR; + extern const int TYPE_MISMATCH; + extern const int CANNOT_CONVERT_TYPE; + extern const int ILLEGAL_COLUMN; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NOT_IMPLEMENTED; + extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN; + extern const int CANNOT_PARSE_BOOL; + extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; +} + +/** Type conversion functions. + * toType - conversion in "natural way"; + */ + +UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column); + +/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. +struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; }; + +struct AccurateConvertStrategyAdditions +{ + UInt32 scale { 0 }; +}; + +struct AccurateOrNullConvertStrategyAdditions +{ + UInt32 scale { 0 }; +}; + + +struct ConvertDefaultBehaviorTag {}; +struct ConvertReturnNullOnErrorTag {}; +struct ConvertReturnZeroOnErrorTag {}; + +/** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment. + * (Date is represented internally as number of days from some day; DateTime - as unix timestamp) + */ +template +struct ConvertImpl +{ + using FromFieldType = typename FromDataType::FieldType; + using ToFieldType = typename ToDataType::FieldType; + + template + static ColumnPtr NO_SANITIZE_UNDEFINED execute( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type [[maybe_unused]], size_t input_rows_count, + Additions additions [[maybe_unused]] = Additions()) + { + const ColumnWithTypeAndName & named_from = arguments[0]; + + using ColVecFrom = typename FromDataType::ColumnType; + using ColVecTo = typename ToDataType::ColumnType; + + if constexpr ((IsDataTypeDecimal || IsDataTypeDecimal) + && !(std::is_same_v || std::is_same_v)) + { + if constexpr (!IsDataTypeDecimalOrNumber || !IsDataTypeDecimalOrNumber) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + named_from.column->getName(), Name::name); + } + } + + if (const ColVecFrom * col_from = checkAndGetColumn(named_from.column.get())) + { + typename ColVecTo::MutablePtr col_to = nullptr; + + if constexpr (IsDataTypeDecimal) + { + UInt32 scale; + + if constexpr (std::is_same_v + || std::is_same_v) + { + scale = additions.scale; + } + else + { + scale = additions; + } + + col_to = ColVecTo::create(0, scale); + } + else + col_to = ColVecTo::create(); + + const auto & vec_from = col_from->getData(); + auto & vec_to = col_to->getData(); + vec_to.resize(input_rows_count); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; + if constexpr (std::is_same_v) + { + col_null_map_to = ColumnUInt8::create(input_rows_count, false); + vec_null_map_to = &col_null_map_to->getData(); + } + + bool result_is_bool = isBool(result_type); + for (size_t i = 0; i < input_rows_count; ++i) + { + if constexpr (std::is_same_v) + { + if (result_is_bool) + { + vec_to[i] = vec_from[i] != FromFieldType(0); + continue; + } + } + + if constexpr (std::is_same_v && std::is_same_v) + { + static_assert( + std::is_same_v, + "UInt128 and UUID types must be same"); + + vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; + vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; + + continue; + } + + if constexpr (std::is_same_v && std::is_same_v) + { + static_assert( + std::is_same_v, + "UInt128 and IPv6 types must be same"); + + vec_to[i].items[1] = std::byteswap(vec_from[i].toUnderType().items[0]); + vec_to[i].items[0] = std::byteswap(vec_from[i].toUnderType().items[1]); + + continue; + } + + if constexpr (std::is_same_v != std::is_same_v) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Conversion between numeric types and UUID is not supported. " + "Probably the passed UUID is unquoted"); + } + else if constexpr ( + (std::is_same_v != std::is_same_v) + && !(is_any_of || is_any_of) + ) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion from {} to {} is not supported", + TypeName, TypeName); + } + else if constexpr (std::is_same_v != std::is_same_v && !(std::is_same_v || std::is_same_v)) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Conversion between numeric types and IPv6 is not supported. " + "Probably the passed IPv6 is unquoted"); + } + else + { + if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) + { + if constexpr (std::is_same_v) + { + ToFieldType result; + bool convert_result = false; + + if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) + convert_result = tryConvertDecimals(vec_from[i], col_from->getScale(), col_to->getScale(), result); + else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) + convert_result = tryConvertFromDecimal(vec_from[i], col_from->getScale(), result); + else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) + convert_result = tryConvertToDecimal(vec_from[i], col_to->getScale(), result); + + if (convert_result) + vec_to[i] = result; + else + { + vec_to[i] = static_cast(0); + (*vec_null_map_to)[i] = true; + } + } + else + { + if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) + vec_to[i] = convertDecimals(vec_from[i], col_from->getScale(), col_to->getScale()); + else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) + vec_to[i] = convertFromDecimal(vec_from[i], col_from->getScale()); + else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) + vec_to[i] = convertToDecimal(vec_from[i], col_to->getScale()); + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unsupported data type in conversion function"); + } + } + else + { + /// If From Data is Nan or Inf and we convert to integer type, throw exception + if constexpr (std::is_floating_point_v && !std::is_floating_point_v) + { + if (!isFinite(vec_from[i])) + { + if constexpr (std::is_same_v) + { + vec_to[i] = 0; + (*vec_null_map_to)[i] = true; + continue; + } + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unexpected inf or nan to integer conversion"); + } + } + + if constexpr (std::is_same_v + || std::is_same_v) + { + bool convert_result = accurate::convertNumeric(vec_from[i], vec_to[i]); + + if (!convert_result) + { + if (std::is_same_v) + { + vec_to[i] = 0; + (*vec_null_map_to)[i] = true; + } + else + { + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Value in column {} cannot be safely converted into type {}", + named_from.column->getName(), result_type->getName()); + } + } + } + else + { + if constexpr (std::is_same_v && std::is_same_v) + { + const uint8_t ip4_cidr[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}; + const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); + if (!matchIPv6Subnet(src, ip4_cidr, 96)) + { + char addr[IPV6_MAX_TEXT_LENGTH + 1] {}; + char * paddr = addr; + formatIPv6(src, paddr); + + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 {} in column {} is not in IPv4 mapping block", addr, named_from.column->getName()); + } + + uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); + if constexpr (std::endian::native == std::endian::little) + { + dst[0] = src[15]; + dst[1] = src[14]; + dst[2] = src[13]; + dst[3] = src[12]; + } + else + { + dst[0] = src[12]; + dst[1] = src[13]; + dst[2] = src[14]; + dst[3] = src[15]; + } + } + else if constexpr (std::is_same_v && std::is_same_v) + { + const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); + uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); + std::memset(dst, '\0', IPV6_BINARY_LENGTH); + dst[10] = dst[11] = 0xff; + + if constexpr (std::endian::native == std::endian::little) + { + dst[12] = src[3]; + dst[13] = src[2]; + dst[14] = src[1]; + dst[15] = src[0]; + } + else + { + dst[12] = src[0]; + dst[13] = src[1]; + dst[14] = src[2]; + dst[15] = src[3]; + } + } + else if constexpr (std::is_same_v && std::is_same_v) + vec_to[i] = static_cast(static_cast(vec_from[i])); + else if constexpr (std::is_same_v && (std::is_same_v || std::is_same_v)) + vec_to[i] = static_cast(vec_from[i] * DATE_SECONDS_PER_DAY); + else + vec_to[i] = static_cast(vec_from[i]); + } + } + } + } + + if constexpr (std::is_same_v) + return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); + else + return col_to; + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + named_from.column->getName(), Name::name); + } +}; + +/** Conversion of DateTime to Date: throw off time component. + */ +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +/** Conversion of DateTime to Date32: throw off time component. + */ +template +struct ConvertImpl + : DateTimeTransformImpl {}; + +/** Conversion of Date to DateTime: adding 00:00:00 time component. + */ +template +struct ToDateTimeImpl +{ + static constexpr auto name = "toDateTime"; + + static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (d > MAX_DATETIME_DAY_NUM) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Day number {} is out of bounds of type DateTime", d); + } + else if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) + { + if (d > MAX_DATETIME_DAY_NUM) + d = MAX_DATETIME_DAY_NUM; + } + return static_cast(time_zone.fromDayNum(DayNum(d))); + } + + static UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) + { + if (d < 0) + return 0; + else if (d > MAX_DATETIME_DAY_NUM) + d = MAX_DATETIME_DAY_NUM; + } + else if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (d < 0 || d > MAX_DATETIME_DAY_NUM) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type DateTime", d); + } + return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); + } + + static UInt32 execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) + { + return dt; + } + + static UInt32 execute(Int64 dt64, const DateLUTImpl & /*time_zone*/) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Ignore) + return static_cast(dt64); + else + { + if (dt64 < 0 || dt64 >= MAX_DATETIME_TIMESTAMP) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) + return dt64 < 0 ? 0 : std::numeric_limits::max(); + else + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type DateTime", dt64); + } + else + return static_cast(dt64); + } + } +}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +/// Implementation of toDate function. + +template +struct ToDateTransform32Or64 +{ + static constexpr auto name = "toDate"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from > MAX_DATETIME_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); + } + /// if value is smaller (or equal) than maximum day value for Date, than treat it as day num, + /// otherwise treat it as unix timestamp. This is a bit weird, but we leave this behavior. + if (from <= DATE_LUT_MAX_DAY_NUM) + return from; + else + return time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); + } +}; + +/** Conversion of Date32 to Date. + */ +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ToDateTransform32Or64Signed +{ + static constexpr auto name = "toDate"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + { + // TODO: decide narrow or extended range based on FromType + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < 0 || from > MAX_DATE_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); + } + else + { + if (from < 0) + return 0; + } + return (from <= DATE_LUT_MAX_DAY_NUM) + ? static_cast(from) + : time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATE_TIMESTAMP))); + } +}; + +template +struct ToDateTransform8Or16Signed +{ + static constexpr auto name = "toDate"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + if (from < 0) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); + else + return 0; + } + return from; + } +}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +/// Implementation of toDate32 function. + +template +struct ToDate32Transform32Or64 +{ + static constexpr auto name = "toDate32"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + { + if (from < DATE_LUT_MAX_EXTEND_DAY_NUM) + return static_cast(from); + else + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type Date32", from); + } + return time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATETIME64_TIMESTAMP))); + } + } +}; + +template +struct ToDate32Transform32Or64Signed +{ + static constexpr auto name = "toDate32"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + { + static const Int32 daynum_min_offset = -static_cast(time_zone.getDayNumOffsetEpoch()); + + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < daynum_min_offset || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type Date32", from); + } + + if (from < daynum_min_offset) + return daynum_min_offset; + + return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) + ? static_cast(from) + : time_zone.toDayNum(std::min(time_t(Int64(from)), time_t(MAX_DATETIME64_TIMESTAMP))); + } +}; + +template +struct ToDate32Transform8Or16Signed +{ + static constexpr auto name = "toDate32"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + return from; + } +}; + +/** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience, + * Float32, Float64) to Date. If the + * number is less than 65536, then it is treated as DayNum, and if it's greater or equals to 65536, + * then treated as unix timestamp. If the number exceeds UInt32, saturate to MAX_UINT32 then as DayNum. + * It's a bit illogical, as we actually have two functions in one. + * But allows to support frequent case, + * when user write toDate(UInt32), expecting conversion of unix timestamp to Date. + * (otherwise such usage would be frequent mistake). + */ +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + + +template +struct ToDateTimeTransform64 +{ + static constexpr auto name = "toDateTime"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from > MAX_DATETIME_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); + } + return static_cast(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); + } +}; + +template +struct ToDateTimeTransformSigned +{ + static constexpr auto name = "toDateTime"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + if (from < 0) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); + else + return 0; + } + return from; + } +}; + +template +struct ToDateTimeTransform64Signed +{ + static constexpr auto name = "toDateTime"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < 0 || from > MAX_DATETIME_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); + } + + if (from < 0) + return 0; + return static_cast(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); + } +}; + +/// Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to DateTime. +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +/** Conversion of numeric to DateTime64 + */ + +template +struct ToDateTime64TransformUnsigned +{ + static constexpr auto name = "toDateTime64"; + + const DateTime64::NativeType scale_multiplier = 1; + + ToDateTime64TransformUnsigned(UInt32 scale = 0) /// NOLINT + : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) + {} + + NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); + else + return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); + } + else + return DecimalUtils::decimalFromComponentsWithMultiplier(std::min(from, MAX_DATETIME64_TIMESTAMP), 0, scale_multiplier); + } +}; +template +struct ToDateTime64TransformSigned +{ + static constexpr auto name = "toDateTime64"; + + const DateTime64::NativeType scale_multiplier = 1; + + ToDateTime64TransformSigned(UInt32 scale = 0) /// NOLINT + : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) + {} + + NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < MIN_DATETIME64_TIMESTAMP || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); + } + from = static_cast(std::max(from, MIN_DATETIME64_TIMESTAMP)); + from = static_cast(std::min(from, MAX_DATETIME64_TIMESTAMP)); + + return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); + } +}; +template +struct ToDateTime64TransformFloat +{ + static constexpr auto name = "toDateTime64"; + + const UInt32 scale = 1; + + ToDateTime64TransformFloat(UInt32 scale_ = 0) /// NOLINT + : scale(scale_) + {} + + NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < MIN_DATETIME64_TIMESTAMP || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); + } + + from = std::max(from, static_cast(MIN_DATETIME64_TIMESTAMP)); + from = std::min(from, static_cast(MAX_DATETIME64_TIMESTAMP)); + return convertToDecimal(from, scale); + } +}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl, false> {}; + + +/** Conversion of DateTime64 to Date or DateTime: discards fractional part. + */ +template +struct FromDateTime64Transform +{ + static constexpr auto name = Transform::name; + + const DateTime64::NativeType scale_multiplier = 1; + + FromDateTime64Transform(UInt32 scale) /// NOLINT + : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) + {} + + auto execute(DateTime64::NativeType dt, const DateLUTImpl & time_zone) const + { + const auto c = DecimalUtils::splitWithScaleMultiplier(DateTime64(dt), scale_multiplier); + return Transform::execute(static_cast(c.whole), time_zone); + } +}; + +/** Conversion of DateTime64 to Date or DateTime: discards fractional part. + */ +template +struct ConvertImpl + : DateTimeTransformImpl>, false> {}; + +template +struct ConvertImpl + : DateTimeTransformImpl>, false> {}; + +struct ToDateTime64Transform +{ + static constexpr auto name = "toDateTime64"; + + const DateTime64::NativeType scale_multiplier = 1; + + ToDateTime64Transform(UInt32 scale = 0) /// NOLINT + : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) + {} + + DateTime64::NativeType execute(UInt16 d, const DateLUTImpl & time_zone) const + { + const auto dt = ToDateTimeImpl<>::execute(d, time_zone); + return execute(dt, time_zone); + } + + DateTime64::NativeType execute(Int32 d, const DateLUTImpl & time_zone) const + { + Int64 dt = static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); + return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); + } + + DateTime64::NativeType execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) const + { + return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); + } +}; + +/** Conversion of Date or DateTime to DateTime64: add zero sub-second part. + */ +template +struct ConvertImpl + : DateTimeTransformImpl {}; + +template +struct ConvertImpl + : DateTimeTransformImpl {}; + +template +struct ConvertImpl + : DateTimeTransformImpl {}; + + +/** Transformation of numbers, dates, datetimes to strings: through formatting. + */ +template +struct FormatImpl +{ + template + static ReturnType execute(const typename DataType::FieldType x, WriteBuffer & wb, const DataType *, const DateLUTImpl *) + { + writeText(x, wb); + return ReturnType(true); + } +}; + +template <> +struct FormatImpl +{ + template + static ReturnType execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate *, const DateLUTImpl * time_zone) + { + writeDateText(DayNum(x), wb, *time_zone); + return ReturnType(true); + } +}; + +template <> +struct FormatImpl +{ + template + static ReturnType execute(const DataTypeDate32::FieldType x, WriteBuffer & wb, const DataTypeDate32 *, const DateLUTImpl * time_zone) + { + writeDateText(ExtendedDayNum(x), wb, *time_zone); + return ReturnType(true); + } +}; + +template <> +struct FormatImpl +{ + template + static ReturnType execute(const DataTypeDateTime::FieldType x, WriteBuffer & wb, const DataTypeDateTime *, const DateLUTImpl * time_zone) + { + writeDateTimeText(x, wb, *time_zone); + return ReturnType(true); + } +}; + +template <> +struct FormatImpl +{ + template + static ReturnType execute(const DataTypeDateTime64::FieldType x, WriteBuffer & wb, const DataTypeDateTime64 * type, const DateLUTImpl * time_zone) + { + writeDateTimeText(DateTime64(x), type->getScale(), wb, *time_zone); + return ReturnType(true); + } +}; + + +template +struct FormatImpl> +{ + template + static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeEnum * type, const DateLUTImpl *) + { + static constexpr bool throw_exception = std::is_same_v; + + if constexpr (throw_exception) + { + writeString(type->getNameForValue(x), wb); + } + else + { + StringRef res; + bool is_ok = type->getNameForValue(x, res); + if (is_ok) + writeString(res, wb); + return ReturnType(is_ok); + } + } +}; + +template +struct FormatImpl> +{ + template + static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeDecimal * type, const DateLUTImpl *) + { + writeText(x, type->getScale(), wb, false); + return ReturnType(true); + } +}; + + +/// DataTypeEnum to DataType free conversion +template +struct ConvertImpl, DataTypeNumber, Name, ConvertDefaultBehaviorTag> +{ + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) + { + return arguments[0].column; + } +}; + +static inline ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) +{ + ColumnUInt8::MutablePtr null_map = nullptr; + if (const auto * col_null = checkAndGetColumn(col.get())) + { + null_map = ColumnUInt8::create(); + null_map->insertRangeFrom(col_null->getNullMapColumn(), 0, col_null->size()); + } + return null_map; +} + +template +requires (!std::is_same_v) +struct ConvertImpl +{ + using FromFieldType = typename FromDataType::FieldType; + using ColVecType = ColumnVectorOrDecimal; + + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) + { + if constexpr (IsDataTypeDateOrDateTime) + { + auto datetime_arg = arguments[0]; + + const DateLUTImpl * time_zone = nullptr; + const ColumnConst * time_zone_column = nullptr; + + if (arguments.size() == 1) + { + auto non_null_args = createBlockWithNestedColumns(arguments); + time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); + } + else /// When we have a column for timezone + { + datetime_arg.column = datetime_arg.column->convertToFullColumnIfConst(); + + if constexpr (std::is_same_v || std::is_same_v) + time_zone = &DateLUT::instance(); + /// For argument of Date or DateTime type, second argument with time zone could be specified. + if constexpr (std::is_same_v || std::is_same_v) + { + if ((time_zone_column = checkAndGetColumnConst(arguments[1].column.get()))) + { + auto non_null_args = createBlockWithNestedColumns(arguments); + time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); + } + } + } + const auto & col_with_type_and_name = columnGetNested(datetime_arg); + + if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) + { + auto col_to = ColumnString::create(); + + const typename ColVecType::Container & vec_from = col_from->getData(); + ColumnString::Chars & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + size_t size = vec_from.size(); + + if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); + else if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); + else if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1)); + else if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss.") + col_from->getScale() + 1)); + else + data_to.resize(size * 3); /// Arbitrary + + offsets_to.resize(size); + + WriteBufferFromVector write_buffer(data_to); + const auto & type = static_cast(*col_with_type_and_name.type); + + ColumnUInt8::MutablePtr null_map = copyNullMap(datetime_arg.column); + + if (!null_map && arguments.size() > 1) + null_map = copyNullMap(arguments[1].column->convertToFullColumnIfConst()); + + if (null_map) + { + for (size_t i = 0; i < size; ++i) + { + if (!time_zone_column && arguments.size() > 1) + { + if (!arguments[1].column.get()->getDataAt(i).toString().empty()) + time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); + } + bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); + null_map->getData()[i] |= !is_ok; + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + else + { + for (size_t i = 0; i < size; ++i) + { + if (!time_zone_column && arguments.size() > 1) + { + if (!arguments[1].column.get()->getDataAt(i).toString().empty()) + time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); + } + FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + + write_buffer.finalize(); + + if (null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); + return col_to; + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), Name::name); + } + else + { + ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); + + const auto & col_with_type_and_name = columnGetNested(arguments[0]); + const auto & type = static_cast(*col_with_type_and_name.type); + + if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) + { + auto col_to = ColumnString::create(); + + const typename ColVecType::Container & vec_from = col_from->getData(); + ColumnString::Chars & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + size_t size = vec_from.size(); + + data_to.resize(size * 3); + offsets_to.resize(size); + + WriteBufferFromVector write_buffer(data_to); + + if (null_map) + { + for (size_t i = 0; i < size; ++i) + { + bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); + /// We don't use timezones in this branch + null_map->getData()[i] |= !is_ok; + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + else + { + for (size_t i = 0; i < size; ++i) + { + FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + + write_buffer.finalize(); + + if (null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); + return col_to; + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), Name::name); + } + } +}; + + +/// Generic conversion of any type to String or FixedString via serialization to text. +template +struct ConvertImplGenericToString +{ + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) + { + static_assert(std::is_same_v || std::is_same_v, + "Can be used only to serialize to ColumnString or ColumnFixedString"); + + ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); + + const auto & col_with_type_and_name = columnGetNested(arguments[0]); + const IDataType & type = *col_with_type_and_name.type; + const IColumn & col_from = *col_with_type_and_name.column; + + size_t size = col_from.size(); + auto col_to = removeNullable(result_type)->createColumn(); + + { + ColumnStringHelpers::WriteHelper write_helper( + assert_cast(*col_to), + size); + + auto & write_buffer = write_helper.getWriteBuffer(); + + FormatSettings format_settings; + auto serialization = type.getDefaultSerialization(); + for (size_t row = 0; row < size; ++row) + { + serialization->serializeText(col_from, row, write_buffer, format_settings); + write_helper.rowWritten(); + } + + write_helper.finalize(); + } + + if (result_type->isNullable() && null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); + return col_to; + } +}; + +/** Conversion of time_t to UInt16, Int32, UInt32 + */ +template +void convertFromTime(typename DataType::FieldType & x, time_t & time) +{ + x = time; +} + +template <> +inline void convertFromTime(DataTypeDate::FieldType & x, time_t & time) +{ + if (unlikely(time < 0)) + x = 0; + else if (unlikely(time > 0xFFFF)) + x = 0xFFFF; + else + x = time; +} + +template <> +inline void convertFromTime(DataTypeDate32::FieldType & x, time_t & time) +{ + x = static_cast(time); +} + +template <> +inline void convertFromTime(DataTypeDateTime::FieldType & x, time_t & time) +{ + if (unlikely(time < 0)) + x = 0; + else if (unlikely(time > MAX_DATETIME_TIMESTAMP)) + x = MAX_DATETIME_TIMESTAMP; + else + x = static_cast(time); +} + +/** Conversion of strings to numbers, dates, datetimes: through parsing. + */ +template +void parseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool precise_float_parsing) +{ + if constexpr (std::is_floating_point_v) + { + if (precise_float_parsing) + readFloatTextPrecise(x, rb); + else + readFloatTextFast(x, rb); + } + else + readText(x, rb); +} + +template <> +inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) +{ + DayNum tmp(0); + readDateText(tmp, rb, *time_zone); + x = tmp; +} + +template <> +inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) +{ + ExtendedDayNum tmp(0); + readDateText(tmp, rb, *time_zone); + x = tmp; +} + + +// NOTE: no need of extra overload of DateTime64, since readDateTimeText64 has different signature and that case is explicitly handled in the calling code. +template <> +inline void parseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) +{ + time_t time = 0; + readDateTimeText(time, rb, *time_zone); + convertFromTime(x, time); +} + +template <> +inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) +{ + UUID tmp; + readUUIDText(tmp, rb); + x = tmp.toUnderType(); +} + +template <> +inline void parseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) +{ + IPv4 tmp; + readIPv4Text(tmp, rb); + x = tmp.toUnderType(); +} + +template <> +inline void parseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) +{ + IPv6 tmp; + readIPv6Text(tmp, rb); + x = tmp; +} + +template +bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool precise_float_parsing) +{ + if constexpr (std::is_floating_point_v) + { + if (precise_float_parsing) + return tryReadFloatTextPrecise(x, rb); + else + return tryReadFloatTextFast(x, rb); + } + else /*if constexpr (is_integer_v)*/ + return tryReadIntText(x, rb); +} + +template <> +inline bool tryParseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) +{ + DayNum tmp(0); + if (!tryReadDateText(tmp, rb, *time_zone)) + return false; + x = tmp; + return true; +} + +template <> +inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) +{ + ExtendedDayNum tmp(0); + if (!tryReadDateText(tmp, rb, *time_zone)) + return false; + x = tmp; + return true; +} + +template <> +inline bool tryParseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) +{ + time_t time = 0; + if (!tryReadDateTimeText(time, rb, *time_zone)) + return false; + convertFromTime(x, time); + return true; +} + +template <> +inline bool tryParseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) +{ + UUID tmp; + if (!tryReadUUIDText(tmp, rb)) + return false; + + x = tmp.toUnderType(); + return true; +} + +template <> +inline bool tryParseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) +{ + IPv4 tmp; + if (!tryReadIPv4Text(tmp, rb)) + return false; + + x = tmp.toUnderType(); + return true; +} + +template <> +inline bool tryParseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) +{ + IPv6 tmp; + if (!tryReadIPv6Text(tmp, rb)) + return false; + + x = tmp; + return true; +} + + +/** Throw exception with verbose message when string value is not parsed completely. + */ +[[noreturn]] inline void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, const IDataType & result_type) +{ + WriteBufferFromOwnString message_buf; + message_buf << "Cannot parse string " << quote << String(read_buffer.buffer().begin(), read_buffer.buffer().size()) + << " as " << result_type.getName() + << ": syntax error"; + + if (read_buffer.offset()) + message_buf << " at position " << read_buffer.offset() + << " (parsed just " << quote << String(read_buffer.buffer().begin(), read_buffer.offset()) << ")"; + else + message_buf << " at begin of string"; + + // Currently there are no functions toIPv{4,6}Or{Null,Zero} + if (isNativeNumber(result_type) && !(result_type.getName() == "IPv4" || result_type.getName() == "IPv6")) + message_buf << ". Note: there are to" << result_type.getName() << "OrZero and to" << result_type.getName() << "OrNull functions, which returns zero/NULL instead of throwing exception."; + + throw Exception(PreformattedMessage{message_buf.str(), "Cannot parse string {} as {}: syntax error {}"}, ErrorCodes::CANNOT_PARSE_TEXT); +} + + +enum class ConvertFromStringExceptionMode +{ + Throw, /// Throw exception if value cannot be parsed. + Zero, /// Fill with zero or default if value cannot be parsed. + Null /// Return ColumnNullable with NULLs when value cannot be parsed. +}; + +enum class ConvertFromStringParsingMode +{ + Normal, + BestEffort, /// Only applicable for DateTime. Will use sophisticated method, that is slower. + BestEffortUS +}; + +template +struct ConvertThroughParsing +{ + static_assert(std::is_same_v || std::is_same_v, + "ConvertThroughParsing is only applicable for String or FixedString data types"); + + static constexpr bool to_datetime64 = std::is_same_v; + + static bool isAllRead(ReadBuffer & in) + { + /// In case of FixedString, skip zero bytes at end. + if constexpr (std::is_same_v) + while (!in.eof() && *in.position() == 0) + ++in.position(); + + if (in.eof()) + return true; + + /// Special case, that allows to parse string with DateTime or DateTime64 as Date or Date32. + if constexpr (std::is_same_v || std::is_same_v) + { + if (!in.eof() && (*in.position() == ' ' || *in.position() == 'T')) + { + if (in.buffer().size() == strlen("YYYY-MM-DD hh:mm:ss")) + return true; + + if (in.buffer().size() >= strlen("YYYY-MM-DD hh:mm:ss.x") + && in.buffer().begin()[19] == '.') + { + in.position() = in.buffer().begin() + 20; + + while (!in.eof() && isNumericASCII(*in.position())) + ++in.position(); + + if (in.eof()) + return true; + } + } + } + + return false; + } + + template + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, size_t input_rows_count, + Additions additions [[maybe_unused]] = Additions()) + { + using ColVecTo = typename ToDataType::ColumnType; + + const DateLUTImpl * local_time_zone [[maybe_unused]] = nullptr; + const DateLUTImpl * utc_time_zone [[maybe_unused]] = nullptr; + + /// For conversion to Date or DateTime type, second argument with time zone could be specified. + if constexpr (std::is_same_v || to_datetime64) + { + const auto result_type = removeNullable(res_type); + // Time zone is already figured out during result type resolution, no need to do it here. + if (const auto dt_col = checkAndGetDataType(result_type.get())) + local_time_zone = &dt_col->getTimeZone(); + else + local_time_zone = &extractTimeZoneFromFunctionArguments(arguments, 1, 0); + + if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort || parsing_mode == ConvertFromStringParsingMode::BestEffortUS) + utc_time_zone = &DateLUT::instance("UTC"); + } + else if constexpr (std::is_same_v || std::is_same_v) + { + // Timezone is more or less dummy when parsing Date/Date32 from string. + local_time_zone = &DateLUT::instance(); + utc_time_zone = &DateLUT::instance("UTC"); + } + + const IColumn * col_from = arguments[0].column.get(); + const ColumnString * col_from_string = checkAndGetColumn(col_from); + const ColumnFixedString * col_from_fixed_string = checkAndGetColumn(col_from); + + if (std::is_same_v && !col_from_string) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + col_from->getName(), Name::name); + + if (std::is_same_v && !col_from_fixed_string) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + col_from->getName(), Name::name); + + size_t size = input_rows_count; + typename ColVecTo::MutablePtr col_to = nullptr; + + if constexpr (IsDataTypeDecimal) + { + UInt32 scale = additions; + if constexpr (to_datetime64) + { + ToDataType check_bounds_in_ctor(scale, local_time_zone ? local_time_zone->getTimeZone() : String{}); + } + else + { + ToDataType check_bounds_in_ctor(ToDataType::maxPrecision(), scale); + } + col_to = ColVecTo::create(size, scale); + } + else + col_to = ColVecTo::create(size); + + typename ColVecTo::Container & vec_to = col_to->getData(); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; + if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) + { + col_null_map_to = ColumnUInt8::create(size); + vec_null_map_to = &col_null_map_to->getData(); + } + + const ColumnString::Chars * chars = nullptr; + const IColumn::Offsets * offsets = nullptr; + size_t fixed_string_size = 0; + + if constexpr (std::is_same_v) + { + chars = &col_from_string->getChars(); + offsets = &col_from_string->getOffsets(); + } + else + { + chars = &col_from_fixed_string->getChars(); + fixed_string_size = col_from_fixed_string->getN(); + } + + size_t current_offset = 0; + + bool precise_float_parsing = false; + + if (DB::CurrentThread::isInitialized()) + { + const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext(); + + if (query_context) + precise_float_parsing = query_context->getSettingsRef().precise_float_parsing; + } + + for (size_t i = 0; i < size; ++i) + { + size_t next_offset = std::is_same_v ? (*offsets)[i] : (current_offset + fixed_string_size); + size_t string_size = std::is_same_v ? next_offset - current_offset - 1 : fixed_string_size; + + ReadBufferFromMemory read_buffer(&(*chars)[current_offset], string_size); + + if constexpr (exception_mode == ConvertFromStringExceptionMode::Throw) + { + if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort) + { + if constexpr (to_datetime64) + { + DateTime64 res = 0; + parseDateTime64BestEffort(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); + vec_to[i] = res; + } + else + { + time_t res; + parseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); + convertFromTime(vec_to[i], res); + } + } + else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) + { + if constexpr (to_datetime64) + { + DateTime64 res = 0; + parseDateTime64BestEffortUS(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); + vec_to[i] = res; + } + else + { + time_t res; + parseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); + convertFromTime(vec_to[i], res); + } + } + else + { + if constexpr (to_datetime64) + { + DateTime64 value = 0; + readDateTime64Text(value, col_to->getScale(), read_buffer, *local_time_zone); + vec_to[i] = value; + } + else if constexpr (IsDataTypeDecimal) + { + SerializationDecimal::readText( + vec_to[i], read_buffer, ToDataType::maxPrecision(), col_to->getScale()); + } + else + { + /// we want to utilize constexpr condition here, which is not mixable with value comparison + do + { + if constexpr (std::is_same_v && std::is_same_v) + { + if (fixed_string_size == IPV6_BINARY_LENGTH) + { + readBinary(vec_to[i], read_buffer); + break; + } + } + parseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); + } while (false); + } + } + + if (!isAllRead(read_buffer)) + throwExceptionForIncompletelyParsedValue(read_buffer, *res_type); + } + else + { + bool parsed; + + if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort) + { + if constexpr (to_datetime64) + { + DateTime64 res = 0; + parsed = tryParseDateTime64BestEffort(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); + vec_to[i] = res; + } + else + { + time_t res; + parsed = tryParseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); + convertFromTime(vec_to[i],res); + } + } + else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) + { + if constexpr (to_datetime64) + { + DateTime64 res = 0; + parsed = tryParseDateTime64BestEffortUS(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); + vec_to[i] = res; + } + else + { + time_t res; + parsed = tryParseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); + convertFromTime(vec_to[i],res); + } + } + else + { + if constexpr (to_datetime64) + { + DateTime64 value = 0; + parsed = tryReadDateTime64Text(value, col_to->getScale(), read_buffer, *local_time_zone); + vec_to[i] = value; + } + else if constexpr (IsDataTypeDecimal) + { + parsed = SerializationDecimal::tryReadText( + vec_to[i], read_buffer, ToDataType::maxPrecision(), col_to->getScale()); + } + else + { + /// we want to utilize constexpr condition here, which is not mixable with value comparison + do + { + if constexpr (std::is_same_v && std::is_same_v) + { + if (fixed_string_size == IPV6_BINARY_LENGTH) + { + readBinary(vec_to[i], read_buffer); + parsed = true; + break; + } + } + + parsed = tryParseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); + } while (false); + } + } + + if (!isAllRead(read_buffer)) + parsed = false; + + if (!parsed) + { + if constexpr (std::is_same_v) + { + vec_to[i] = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); + } + else + { + vec_to[i] = static_cast(0); + } + } + + if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) + (*vec_null_map_to)[i] = !parsed; + } + + current_offset = next_offset; + } + + if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) + return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); + else + return col_to; + } +}; + + +template +requires (!std::is_same_v) +struct ConvertImpl + : ConvertThroughParsing {}; + +template +requires (!std::is_same_v) +struct ConvertImpl + : ConvertThroughParsing {}; + +template +requires (!std::is_same_v) +struct ConvertImpl + : ConvertThroughParsing {}; + +template +requires (!std::is_same_v) +struct ConvertImpl + : ConvertThroughParsing {}; + +template +requires (is_any_of && is_any_of) +struct ConvertImpl + : ConvertThroughParsing {}; + +/// Generic conversion of any type from String. Used for complex types: Array and Tuple or types with custom serialization. +struct ConvertImplGenericFromString +{ + static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) + { + const IColumn & column_from = *arguments[0].column; + const IDataType & data_type_to = *result_type; + auto res = data_type_to.createColumn(); + auto serialization = data_type_to.getDefaultSerialization(); + const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; + + executeImpl(column_from, *res, *serialization, input_rows_count, null_map, result_type.get()); + return res; + } + + static void executeImpl( + const IColumn & column_from, + IColumn & column_to, + const ISerialization & serialization_from, + size_t input_rows_count, + const PaddedPODArray * null_map, + const IDataType * result_type) + { + column_to.reserve(input_rows_count); + + FormatSettings format_settings; + for (size_t i = 0; i < input_rows_count; ++i) + { + if (null_map && (*null_map)[i]) + { + column_to.insertDefault(); + continue; + } + + const auto & val = column_from.getDataAt(i); + ReadBufferFromMemory read_buffer(val.data, val.size); + try + { + serialization_from.deserializeWholeText(column_to, read_buffer, format_settings); + } + catch (const Exception & e) + { + auto * nullable_column = typeid_cast(&column_to); + if (e.code() == ErrorCodes::CANNOT_PARSE_BOOL && nullable_column) + { + auto & col_nullmap = nullable_column->getNullMapData(); + if (col_nullmap.size() != nullable_column->size()) + col_nullmap.resize_fill(nullable_column->size()); + if (nullable_column->size() == (i + 1)) + nullable_column->popBack(1); + nullable_column->insertDefault(); + continue; + } + throw; + } + + if (!read_buffer.eof()) + { + if (result_type) + throwExceptionForIncompletelyParsedValue(read_buffer, *result_type); + else + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, + "Cannot parse string to column {}. Expected eof", column_to.getName()); + } + } + } +}; + + +template <> +struct ConvertImpl + : ConvertImpl {}; + +template <> +struct ConvertImpl + : ConvertImpl {}; + +/** If types are identical, just take reference to column. + */ +template +requires (!T::is_parametric) +struct ConvertImpl +{ + template + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, + Additions additions [[maybe_unused]] = Additions()) + { + return arguments[0].column; + } +}; + +template +struct ConvertImpl +{ + template + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, + Additions additions [[maybe_unused]] = Additions()) + { + + return arguments[0].column; + } +}; + + +/** Conversion from FixedString to String. + * Cutting sequences of zero bytes from end of strings. + */ +template +struct ConvertImpl +{ + static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type, size_t /*input_rows_count*/) + { + ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); + const auto & nested = columnGetNested(arguments[0]); + if (const ColumnFixedString * col_from = checkAndGetColumn(nested.column.get())) + { + auto col_to = ColumnString::create(); + + const ColumnFixedString::Chars & data_from = col_from->getChars(); + ColumnString::Chars & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + size_t size = col_from->size(); + size_t n = col_from->getN(); + data_to.resize(size * (n + 1)); /// + 1 - zero terminator + offsets_to.resize(size); + + size_t offset_from = 0; + size_t offset_to = 0; + for (size_t i = 0; i < size; ++i) + { + if (!null_map || !null_map->getData()[i]) + { + size_t bytes_to_copy = n; + while (bytes_to_copy > 0 && data_from[offset_from + bytes_to_copy - 1] == 0) + --bytes_to_copy; + + memcpy(&data_to[offset_to], &data_from[offset_from], bytes_to_copy); + offset_to += bytes_to_copy; + } + data_to[offset_to] = 0; + ++offset_to; + offsets_to[i] = offset_to; + offset_from += n; + } + + data_to.resize(offset_to); + if (return_type->isNullable() && null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); + return col_to; + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), Name::name); + } +}; + + +/// Declared early because used below. +struct NameToDate { static constexpr auto name = "toDate"; }; +struct NameToDate32 { static constexpr auto name = "toDate32"; }; +struct NameToDateTime { static constexpr auto name = "toDateTime"; }; +struct NameToDateTime32 { static constexpr auto name = "toDateTime32"; }; +struct NameToDateTime64 { static constexpr auto name = "toDateTime64"; }; +struct NameToString { static constexpr auto name = "toString"; }; +struct NameToDecimal32 { static constexpr auto name = "toDecimal32"; }; +struct NameToDecimal64 { static constexpr auto name = "toDecimal64"; }; +struct NameToDecimal128 { static constexpr auto name = "toDecimal128"; }; +struct NameToDecimal256 { static constexpr auto name = "toDecimal256"; }; + + +#define DEFINE_NAME_TO_INTERVAL(INTERVAL_KIND) \ + struct NameToInterval ## INTERVAL_KIND \ + { \ + static constexpr auto name = "toInterval" #INTERVAL_KIND; \ + static constexpr auto kind = IntervalKind::Kind::INTERVAL_KIND; \ + }; + +DEFINE_NAME_TO_INTERVAL(Nanosecond) +DEFINE_NAME_TO_INTERVAL(Microsecond) +DEFINE_NAME_TO_INTERVAL(Millisecond) +DEFINE_NAME_TO_INTERVAL(Second) +DEFINE_NAME_TO_INTERVAL(Minute) +DEFINE_NAME_TO_INTERVAL(Hour) +DEFINE_NAME_TO_INTERVAL(Day) +DEFINE_NAME_TO_INTERVAL(Week) +DEFINE_NAME_TO_INTERVAL(Month) +DEFINE_NAME_TO_INTERVAL(Quarter) +DEFINE_NAME_TO_INTERVAL(Year) + +#undef DEFINE_NAME_TO_INTERVAL + +struct NameParseDateTimeBestEffort; +struct NameParseDateTimeBestEffortOrZero; +struct NameParseDateTimeBestEffortOrNull; + +template +static inline bool isDateTime64(const ColumnsWithTypeAndName & arguments) +{ + if constexpr (std::is_same_v) + return true; + else if constexpr (std::is_same_v || std::is_same_v + || std::is_same_v || std::is_same_v) + { + return (arguments.size() == 2 && isUInt(arguments[1].type)) || arguments.size() == 3; + } + + return false; +} + +template +class FunctionConvert : public IFunction +{ +public: + using Monotonic = MonotonicityImpl; + + static constexpr auto name = Name::name; + static constexpr bool to_decimal = + std::is_same_v || std::is_same_v + || std::is_same_v || std::is_same_v; + + static constexpr bool to_datetime64 = std::is_same_v; + + static constexpr bool to_string_or_fixed_string = std::is_same_v || + std::is_same_v; + + static constexpr bool to_date_or_datetime = std::is_same_v || + std::is_same_v || + std::is_same_v; + + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + static FunctionPtr create() { return std::make_shared(); } + + FunctionConvert() = default; + explicit FunctionConvert(ContextPtr context_) : context(context_) {} + + String getName() const override + { + return name; + } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool isInjective(const ColumnsWithTypeAndName &) const override { return std::is_same_v; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & arguments) const override + { + /// TODO: We can make more optimizations here. + return !(to_date_or_datetime && isNumber(*arguments[0].type)); + } + + using DefaultReturnTypeGetter = std::function; + static DataTypePtr getReturnTypeDefaultImplementationForNulls(const ColumnsWithTypeAndName & arguments, const DefaultReturnTypeGetter & getter) + { + NullPresence null_presence = getNullPresense(arguments); + + if (null_presence.has_null_constant) + { + return makeNullable(std::make_shared()); + } + if (null_presence.has_nullable) + { + auto nested_columns = Block(createBlockWithNestedColumns(arguments)); + auto return_type = getter(ColumnsWithTypeAndName(nested_columns.begin(), nested_columns.end())); + return makeNullable(return_type); + } + + return getter(arguments); + } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + auto getter = [&] (const auto & args) { return getReturnTypeImplRemovedNullable(args); }; + auto res = getReturnTypeDefaultImplementationForNulls(arguments, getter); + to_nullable = res->isNullable(); + checked_return_type = true; + return res; + } + + DataTypePtr getReturnTypeImplRemovedNullable(const ColumnsWithTypeAndName & arguments) const + { + FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; + FunctionArgumentDescriptors optional_args; + + if constexpr (to_decimal) + { + mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); + } + + if (!to_decimal && isDateTime64(arguments)) + { + mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); + } + + // toString(DateTime or DateTime64, [timezone: String]) + if ((std::is_same_v && !arguments.empty() && (isDateTime64(arguments[0].type) || isDateTime(arguments[0].type))) + // toUnixTimestamp(value[, timezone : String]) + || std::is_same_v + // toDate(value[, timezone : String]) + || std::is_same_v // TODO: shall we allow timestamp argument for toDate? DateTime knows nothing about timezones and this argument is ignored below. + // toDate32(value[, timezone : String]) + || std::is_same_v + // toDateTime(value[, timezone: String]) + || std::is_same_v + // toDateTime64(value, scale : Integer[, timezone: String]) + || std::is_same_v) + { + optional_args.push_back({"timezone", static_cast(&isString), nullptr, "String"}); + } + + validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); + + if constexpr (std::is_same_v) + { + return std::make_shared(Name::kind); + } + else if constexpr (to_decimal) + { + UInt64 scale = extractToDecimalScale(arguments[1]); + + if constexpr (std::is_same_v) + return createDecimalMaxPrecision(scale); + else if constexpr (std::is_same_v) + return createDecimalMaxPrecision(scale); + else if constexpr (std::is_same_v) + return createDecimalMaxPrecision(scale); + else if constexpr (std::is_same_v) + return createDecimalMaxPrecision(scale); + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected branch in code of conversion function: it is a bug."); + } + else + { + // Optional second argument with time zone for DateTime. + UInt8 timezone_arg_position = 1; + UInt32 scale [[maybe_unused]] = DataTypeDateTime64::default_scale; + + // DateTime64 requires more arguments: scale and timezone. Since timezone is optional, scale should be first. + if (isDateTime64(arguments)) + { + timezone_arg_position += 1; + scale = static_cast(arguments[1].column->get64(0)); + + if (to_datetime64 || scale != 0) /// toDateTime('xxxx-xx-xx xx:xx:xx', 0) return DateTime + return std::make_shared(scale, + extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); + + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); + } + + if constexpr (std::is_same_v) + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); + else if constexpr (std::is_same_v) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected branch in code of conversion function: it is a bug."); + else + return std::make_shared(); + } + } + + /// Function actually uses default implementation for nulls, + /// but we need to know if return type is Nullable or not, + /// so we use checked_return_type only to intercept the first call to getReturnTypeImpl(...). + bool useDefaultImplementationForNulls() const override + { + bool to_nullable_string = to_nullable && std::is_same_v; + return checked_return_type && !to_nullable_string; + } + + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override + { + if constexpr (std::is_same_v) + return {}; + else if constexpr (std::is_same_v) + return {2}; + return {1}; + } + bool canBeExecutedOnDefaultArguments() const override { return false; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + try + { + return executeInternal(arguments, result_type, input_rows_count); + } + catch (Exception & e) + { + /// More convenient error message. + if (e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF) + { + e.addMessage("Cannot parse " + + result_type->getName() + " from " + + arguments[0].type->getName() + + ", because value is too short"); + } + else if (e.code() == ErrorCodes::CANNOT_PARSE_NUMBER + || e.code() == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT + || e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED + || e.code() == ErrorCodes::CANNOT_PARSE_QUOTED_STRING + || e.code() == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE + || e.code() == ErrorCodes::CANNOT_PARSE_DATE + || e.code() == ErrorCodes::CANNOT_PARSE_DATETIME + || e.code() == ErrorCodes::CANNOT_PARSE_UUID + || e.code() == ErrorCodes::CANNOT_PARSE_IPV4 + || e.code() == ErrorCodes::CANNOT_PARSE_IPV6) + { + e.addMessage("Cannot parse " + + result_type->getName() + " from " + + arguments[0].type->getName()); + } + + throw; + } + } + + bool hasInformationAboutMonotonicity() const override + { + return Monotonic::has(); + } + + Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override + { + return Monotonic::get(type, left, right); + } + +private: + ContextPtr context; + mutable bool checked_return_type = false; + mutable bool to_nullable = false; + + ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const + { + if (arguments.empty()) + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least 1 argument", getName()); + + if (result_type->onlyNull()) + return result_type->createColumnConstWithDefaultValue(input_rows_count); + + const DataTypePtr from_type = removeNullable(arguments[0].type); + ColumnPtr result_column; + + [[maybe_unused]] FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; + + if (context) + date_time_overflow_behavior = context->getSettingsRef().date_time_overflow_behavior.value; + + auto call = [&](const auto & types, const auto & tag) -> bool + { + using Types = std::decay_t; + using LeftDataType = typename Types::LeftType; + using RightDataType = typename Types::RightType; + using SpecialTag = std::decay_t; + + if constexpr (IsDataTypeDecimal) + { + if constexpr (std::is_same_v) + { + /// Account for optional timezone argument. + if (arguments.size() != 2 && arguments.size() != 3) + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects 2 or 3 arguments for DataTypeDateTime64.", getName()); + } + else if (arguments.size() != 2) + { + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects 2 arguments for Decimal.", getName()); + } + + const ColumnWithTypeAndName & scale_column = arguments[1]; + UInt32 scale = extractToDecimalScale(scale_column); + + switch (date_time_overflow_behavior) + { + case FormatSettings::DateTimeOverflowBehavior::Throw: + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); + break; + case FormatSettings::DateTimeOverflowBehavior::Ignore: + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); + break; + case FormatSettings::DateTimeOverflowBehavior::Saturate: + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); + break; + } + + } + else if constexpr (IsDataTypeDateOrDateTime && std::is_same_v) + { + const auto * dt64 = assert_cast(arguments[0].type.get()); + switch (date_time_overflow_behavior) + { + case FormatSettings::DateTimeOverflowBehavior::Throw: + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); + break; + case FormatSettings::DateTimeOverflowBehavior::Ignore: + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); + break; + case FormatSettings::DateTimeOverflowBehavior::Saturate: + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); + break; + } + } +#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE) \ + case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ + result_column = ConvertImpl::execute( \ + arguments, result_type, input_rows_count); \ + break; + + else if constexpr (IsDataTypeDecimalOrNumber && IsDataTypeDecimalOrNumber) + { + using LeftT = typename LeftDataType::FieldType; + using RightT = typename RightDataType::FieldType; + + static constexpr bool bad_left = + is_decimal || std::is_floating_point_v || is_big_int_v || is_signed_v; + static constexpr bool bad_right = + is_decimal || std::is_floating_point_v || is_big_int_v || is_signed_v; + + /// Disallow int vs UUID conversion (but support int vs UInt128 conversion) + if constexpr ((bad_left && std::is_same_v) || + (bad_right && std::is_same_v)) + { + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Wrong UUID conversion"); + } + else + { + switch (date_time_overflow_behavior) + { + GENERATE_OVERFLOW_MODE_CASE(Throw) + GENERATE_OVERFLOW_MODE_CASE(Ignore) + GENERATE_OVERFLOW_MODE_CASE(Saturate) + } + } + } + else if constexpr ((IsDataTypeNumber || IsDataTypeDateOrDateTime) + && IsDataTypeDateOrDateTime) + { + switch (date_time_overflow_behavior) + { + GENERATE_OVERFLOW_MODE_CASE(Throw) + GENERATE_OVERFLOW_MODE_CASE(Ignore) + GENERATE_OVERFLOW_MODE_CASE(Saturate) + } + } +#undef GENERATE_OVERFLOW_MODE_CASE + else + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count); + + return true; + }; + + if (isDateTime64(arguments)) + { + /// For toDateTime('xxxx-xx-xx xx:xx:xx.00', 2[, 'timezone']) we need to it convert to DateTime64 + const ColumnWithTypeAndName & scale_column = arguments[1]; + UInt32 scale = extractToDecimalScale(scale_column); + + if (to_datetime64 || scale != 0) /// When scale = 0, the data type is DateTime otherwise the data type is DateTime64 + { + if (!callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{})) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0].type->getName(), getName()); + + return result_column; + } + } + + if constexpr (std::is_same_v) + { + if (from_type->getCustomSerialization()) + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); + } + + bool done = false; + if constexpr (to_string_or_fixed_string) + { + done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); + } + else + { + bool cast_ipv4_ipv6_default_on_conversion_error = false; + if constexpr (is_any_of) + if (context && (cast_ipv4_ipv6_default_on_conversion_error = context->getSettingsRef().cast_ipv4_ipv6_default_on_conversion_error)) + done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertReturnZeroOnErrorTag{}); + + if (!cast_ipv4_ipv6_default_on_conversion_error) + { + /// We should use ConvertFromStringExceptionMode::Null mode when converting from String (or FixedString) + /// to Nullable type, to avoid 'value is too short' error on attempt to parse empty string from NULL values. + if (to_nullable && WhichDataType(from_type).isStringOrFixedString()) + done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertReturnNullOnErrorTag{}); + else + done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); + } + } + + if (!done) + { + /// Generic conversion of any type to String. + if (std::is_same_v) + { + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); + } + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0].type->getName(), getName()); + } + + return result_column; + } +}; + + +/** Function toTOrZero (where T is number of date or datetime type): + * try to convert from String to type T through parsing, + * if cannot parse, return default value instead of throwing exception. + * Function toTOrNull will return Nullable type with NULL when cannot parse. + * NOTE Also need to implement tryToUnixTimestamp with timezone. + */ +template +class FunctionConvertFromString : public IFunction +{ +public: + static constexpr auto name = Name::name; + static constexpr bool to_decimal = + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v>; + + static constexpr bool to_datetime64 = std::is_same_v; + + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create() { return std::make_shared(); } + + String getName() const override + { + return name; + } + + bool isVariadic() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + + bool useDefaultImplementationForConstants() const override { return true; } + bool canBeExecutedOnDefaultArguments() const override { return false; } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + DataTypePtr res; + + if (isDateTime64(arguments)) + { + validateFunctionArgumentTypes(*this, arguments, + FunctionArgumentDescriptors{{"string", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}}, + // optional + FunctionArgumentDescriptors{ + {"precision", static_cast(&isUInt8), isColumnConst, "const UInt8"}, + {"timezone", static_cast(&isStringOrFixedString), isColumnConst, "const String or FixedString"}, + }); + + UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0; + if (arguments.size() > 1) + scale = extractToDecimalScale(arguments[1]); + const auto timezone = extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false); + + res = scale == 0 ? res = std::make_shared(timezone) : std::make_shared(scale, timezone); + } + else + { + if ((arguments.size() != 1 && arguments.size() != 2) || (to_decimal && arguments.size() != 2)) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2. " + "Second argument only make sense for DateTime (time zone, optional) and Decimal (scale).", + getName(), arguments.size()); + + if (!isStringOrFixedString(arguments[0].type)) + { + if (this->getName().find("OrZero") != std::string::npos || + this->getName().find("OrNull") != std::string::npos) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. " + "Conversion functions with postfix 'OrZero' or 'OrNull' should take String argument", + arguments[0].type->getName(), getName()); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", + arguments[0].type->getName(), getName()); + } + + if (arguments.size() == 2) + { + if constexpr (std::is_same_v) + { + if (!isString(arguments[1].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}", + arguments[1].type->getName(), getName()); + } + else if constexpr (to_decimal) + { + if (!isInteger(arguments[1].type)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}", + arguments[1].type->getName(), getName()); + if (!arguments[1].column) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant", getName()); + } + else + { + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1. " + "Second argument makes sense only for DateTime and Decimal.", + getName(), arguments.size()); + } + } + + if constexpr (std::is_same_v) + res = std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0, false)); + else if constexpr (std::is_same_v) + throw Exception(ErrorCodes::LOGICAL_ERROR, "MaterializedMySQL is a bug."); + else if constexpr (to_decimal) + { + UInt64 scale = extractToDecimalScale(arguments[1]); + res = createDecimalMaxPrecision(scale); + if (!res) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Something wrong with toDecimalNNOrZero() or toDecimalNNOrNull()"); + } + else + res = std::make_shared(); + } + + if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) + res = std::make_shared(res); + + return res; + } + + template + ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, UInt32 scale = 0) const + { + const IDataType * from_type = arguments[0].type.get(); + + if (checkAndGetDataType(from_type)) + { + return ConvertThroughParsing::execute( + arguments, result_type, input_rows_count, scale); + } + else if (checkAndGetDataType(from_type)) + { + return ConvertThroughParsing::execute( + arguments, result_type, input_rows_count, scale); + } + + return nullptr; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + ColumnPtr result_column; + + if constexpr (to_decimal) + result_column = executeInternal(arguments, result_type, input_rows_count, + assert_cast(*removeNullable(result_type)).getScale()); + else + { + if (isDateTime64(arguments)) + { + UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0; + if (arguments.size() > 1) + scale = extractToDecimalScale(arguments[1]); + + if (scale == 0) + result_column = executeInternal(arguments, result_type, input_rows_count); + else + { + result_column = executeInternal(arguments, result_type, input_rows_count, static_cast(scale)); + } + } + else + { + result_column = executeInternal(arguments, result_type, input_rows_count); + } + } + + if (!result_column) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " + "Only String or FixedString argument is accepted for try-conversion function. For other arguments, " + "use function without 'orZero' or 'orNull'.", arguments[0].type->getName(), getName()); + + return result_column; + } +}; + + +/// Monotonicity. + +struct PositiveMonotonicity +{ + static bool has() { return true; } + static IFunction::Monotonicity get(const IDataType &, const Field &, const Field &) + { + return { .is_monotonic = true }; + } +}; + +struct UnknownMonotonicity +{ + static bool has() { return false; } + static IFunction::Monotonicity get(const IDataType &, const Field &, const Field &) + { + return { }; + } +}; + +template +struct ToNumberMonotonicity +{ + static bool has() { return true; } + + static UInt64 divideByRangeOfType(UInt64 x) + { + if constexpr (sizeof(T) < sizeof(UInt64)) + return x >> (sizeof(T) * 8); + else + return 0; + } + + static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) + { + if (!type.isValueRepresentedByNumber()) + return {}; + + /// If type is same, the conversion is always monotonic. + /// (Enum has separate case, because it is different data type) + if (checkAndGetDataType>(&type) || + checkAndGetDataType>(&type)) + return { .is_monotonic = true, .is_always_monotonic = true }; + + /// Float cases. + + /// When converting to Float, the conversion is always monotonic. + if constexpr (std::is_floating_point_v) + return { .is_monotonic = true, .is_always_monotonic = true }; + + const auto * low_cardinality = typeid_cast(&type); + const IDataType * low_cardinality_dictionary_type = nullptr; + if (low_cardinality) + low_cardinality_dictionary_type = low_cardinality->getDictionaryType().get(); + + WhichDataType which_type(type); + WhichDataType which_inner_type = low_cardinality + ? WhichDataType(low_cardinality_dictionary_type) + : WhichDataType(type); + + /// If converting from Float, for monotonicity, arguments must fit in range of result type. + if (which_inner_type.isFloat()) + { + if (left.isNull() || right.isNull()) + return {}; + + Float64 left_float = left.get(); + Float64 right_float = right.get(); + + if (left_float >= static_cast(std::numeric_limits::min()) + && left_float <= static_cast(std::numeric_limits::max()) + && right_float >= static_cast(std::numeric_limits::min()) + && right_float <= static_cast(std::numeric_limits::max())) + return { .is_monotonic = true }; + + return {}; + } + + /// Integer cases. + + /// Only support types represented by native integers. + /// It can be extended to big integers, decimals and DateTime64 later. + /// By the way, NULLs are representing unbounded ranges. + if (!((left.isNull() || left.getType() == Field::Types::UInt64 || left.getType() == Field::Types::Int64) + && (right.isNull() || right.getType() == Field::Types::UInt64 || right.getType() == Field::Types::Int64))) + return {}; + + const bool from_is_unsigned = type.isValueRepresentedByUnsignedInteger(); + const bool to_is_unsigned = is_unsigned_v; + + const size_t size_of_from = type.getSizeOfValueInMemory(); + const size_t size_of_to = sizeof(T); + + const bool left_in_first_half = left.isNull() + ? from_is_unsigned + : (left.get() >= 0); + + const bool right_in_first_half = right.isNull() + ? !from_is_unsigned + : (right.get() >= 0); + + /// Size of type is the same. + if (size_of_from == size_of_to) + { + if (from_is_unsigned == to_is_unsigned) + return { .is_monotonic = true, .is_always_monotonic = true }; + + if (left_in_first_half == right_in_first_half) + return { .is_monotonic = true }; + + return {}; + } + + /// Size of type is expanded. + if (size_of_from < size_of_to) + { + if (from_is_unsigned == to_is_unsigned) + return { .is_monotonic = true, .is_always_monotonic = true }; + + if (!to_is_unsigned) + return { .is_monotonic = true, .is_always_monotonic = true }; + + /// signed -> unsigned. If arguments from the same half, then function is monotonic. + if (left_in_first_half == right_in_first_half) + return { .is_monotonic = true }; + + return {}; + } + + /// Size of type is shrunk. + if (size_of_from > size_of_to) + { + /// Function cannot be monotonic on unbounded ranges. + if (left.isNull() || right.isNull()) + return {}; + + /// Function cannot be monotonic when left and right are not on the same ranges. + if (divideByRangeOfType(left.get()) != divideByRangeOfType(right.get())) + return {}; + + if (to_is_unsigned) + return { .is_monotonic = true }; + else + { + // If To is signed, it's possible that the signedness is different after conversion. So we check it explicitly. + const bool is_monotonic = (T(left.get()) >= 0) == (T(right.get()) >= 0); + + return { .is_monotonic = is_monotonic }; + } + } + + UNREACHABLE(); + } +}; + +struct ToDateMonotonicity +{ + static bool has() { return true; } + + static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) + { + auto which = WhichDataType(type); + if (which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() || which.isInt8() || which.isInt16() || which.isUInt8() + || which.isUInt16()) + { + return {.is_monotonic = true, .is_always_monotonic = true}; + } + else if ( + ((left.getType() == Field::Types::UInt64 || left.isNull()) && (right.getType() == Field::Types::UInt64 || right.isNull()) + && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + || ((left.getType() == Field::Types::Int64 || left.isNull()) && (right.getType() == Field::Types::Int64 || right.isNull()) + && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) + || (( + (left.getType() == Field::Types::Float64 || left.isNull()) + && (right.getType() == Field::Types::Float64 || right.isNull()) + && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF)))) + || !isNativeNumber(type)) + { + return {}; + } + else + { + return {.is_monotonic = true, .is_always_monotonic = true}; + } + } +}; + +struct ToDateTimeMonotonicity +{ + static bool has() { return true; } + + static IFunction::Monotonicity get(const IDataType & type, const Field &, const Field &) + { + if (type.isValueRepresentedByNumber()) + return {.is_monotonic = true, .is_always_monotonic = true}; + else + return {}; + } +}; + +/** The monotonicity for the `toString` function is mainly determined for test purposes. + * It is doubtful that anyone is looking to optimize queries with conditions `toString(CounterID) = 34`. + */ +struct ToStringMonotonicity +{ + static bool has() { return true; } + + static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) + { + IFunction::Monotonicity positive{ .is_monotonic = true }; + IFunction::Monotonicity not_monotonic; + + const auto * type_ptr = &type; + if (const auto * low_cardinality_type = checkAndGetDataType(type_ptr)) + type_ptr = low_cardinality_type->getDictionaryType().get(); + + /// Order on enum values (which is the order on integers) is completely arbitrary in respect to the order on strings. + if (WhichDataType(type).isEnum()) + return not_monotonic; + + /// `toString` function is monotonous if the argument is Date or Date32 or DateTime or String, or non-negative numbers with the same number of symbols. + if (checkDataTypes(type_ptr)) + return positive; + + if (left.isNull() || right.isNull()) + return {}; + + if (left.getType() == Field::Types::UInt64 + && right.getType() == Field::Types::UInt64) + { + return (left.get() == 0 && right.get() == 0) + || (floor(log10(left.get())) == floor(log10(right.get()))) + ? positive : not_monotonic; + } + + if (left.getType() == Field::Types::Int64 + && right.getType() == Field::Types::Int64) + { + return (left.get() == 0 && right.get() == 0) + || (left.get() > 0 && right.get() > 0 && floor(log10(left.get())) == floor(log10(right.get()))) + ? positive : not_monotonic; + } + + return not_monotonic; + } +}; + + +struct NameToUInt8 { static constexpr auto name = "toUInt8"; }; +struct NameToUInt16 { static constexpr auto name = "toUInt16"; }; +struct NameToUInt32 { static constexpr auto name = "toUInt32"; }; +struct NameToUInt64 { static constexpr auto name = "toUInt64"; }; +struct NameToUInt128 { static constexpr auto name = "toUInt128"; }; +struct NameToUInt256 { static constexpr auto name = "toUInt256"; }; +struct NameToInt8 { static constexpr auto name = "toInt8"; }; +struct NameToInt16 { static constexpr auto name = "toInt16"; }; +struct NameToInt32 { static constexpr auto name = "toInt32"; }; +struct NameToInt64 { static constexpr auto name = "toInt64"; }; +struct NameToInt128 { static constexpr auto name = "toInt128"; }; +struct NameToInt256 { static constexpr auto name = "toInt256"; }; +struct NameToFloat32 { static constexpr auto name = "toFloat32"; }; +struct NameToFloat64 { static constexpr auto name = "toFloat64"; }; +struct NameToUUID { static constexpr auto name = "toUUID"; }; +struct NameToIPv4 { static constexpr auto name = "toIPv4"; }; +struct NameToIPv6 { static constexpr auto name = "toIPv6"; }; + +using FunctionToUInt8 = FunctionConvert>; +using FunctionToUInt16 = FunctionConvert>; +using FunctionToUInt32 = FunctionConvert>; +using FunctionToUInt64 = FunctionConvert>; +using FunctionToUInt128 = FunctionConvert>; +using FunctionToUInt256 = FunctionConvert>; +using FunctionToInt8 = FunctionConvert>; +using FunctionToInt16 = FunctionConvert>; +using FunctionToInt32 = FunctionConvert>; +using FunctionToInt64 = FunctionConvert>; +using FunctionToInt128 = FunctionConvert>; +using FunctionToInt256 = FunctionConvert>; +using FunctionToFloat32 = FunctionConvert>; +using FunctionToFloat64 = FunctionConvert>; + +using FunctionToDate = FunctionConvert; + +using FunctionToDate32 = FunctionConvert; + +using FunctionToDateTime = FunctionConvert; + +using FunctionToDateTime32 = FunctionConvert; + +using FunctionToDateTime64 = FunctionConvert; + +using FunctionToUUID = FunctionConvert>; +using FunctionToIPv4 = FunctionConvert>; +using FunctionToIPv6 = FunctionConvert>; +using FunctionToString = FunctionConvert; +using FunctionToUnixTimestamp = FunctionConvert>; +using FunctionToDecimal32 = FunctionConvert, NameToDecimal32, UnknownMonotonicity>; +using FunctionToDecimal64 = FunctionConvert, NameToDecimal64, UnknownMonotonicity>; +using FunctionToDecimal128 = FunctionConvert, NameToDecimal128, UnknownMonotonicity>; +using FunctionToDecimal256 = FunctionConvert, NameToDecimal256, UnknownMonotonicity>; + +template struct FunctionTo; + +template <> struct FunctionTo { using Type = FunctionToUInt8; }; +template <> struct FunctionTo { using Type = FunctionToUInt16; }; +template <> struct FunctionTo { using Type = FunctionToUInt32; }; +template <> struct FunctionTo { using Type = FunctionToUInt64; }; +template <> struct FunctionTo { using Type = FunctionToUInt128; }; +template <> struct FunctionTo { using Type = FunctionToUInt256; }; +template <> struct FunctionTo { using Type = FunctionToInt8; }; +template <> struct FunctionTo { using Type = FunctionToInt16; }; +template <> struct FunctionTo { using Type = FunctionToInt32; }; +template <> struct FunctionTo { using Type = FunctionToInt64; }; +template <> struct FunctionTo { using Type = FunctionToInt128; }; +template <> struct FunctionTo { using Type = FunctionToInt256; }; +template <> struct FunctionTo { using Type = FunctionToFloat32; }; +template <> struct FunctionTo { using Type = FunctionToFloat64; }; + +template +struct FunctionTo { using Type = FunctionToDate; }; + +template +struct FunctionTo { using Type = FunctionToDate32; }; + +template +struct FunctionTo { using Type = FunctionToDateTime; }; + +template +struct FunctionTo { using Type = FunctionToDateTime64; }; + +template <> struct FunctionTo { using Type = FunctionToUUID; }; +template <> struct FunctionTo { using Type = FunctionToIPv4; }; +template <> struct FunctionTo { using Type = FunctionToIPv6; }; +template <> struct FunctionTo { using Type = FunctionToString; }; +template <> struct FunctionTo { using Type = FunctionToFixedString; }; +template <> struct FunctionTo> { using Type = FunctionToDecimal32; }; +template <> struct FunctionTo> { using Type = FunctionToDecimal64; }; +template <> struct FunctionTo> { using Type = FunctionToDecimal128; }; +template <> struct FunctionTo> { using Type = FunctionToDecimal256; }; + +template struct FunctionTo> + : FunctionTo> +{ +}; + +struct NameToUInt8OrZero { static constexpr auto name = "toUInt8OrZero"; }; +struct NameToUInt16OrZero { static constexpr auto name = "toUInt16OrZero"; }; +struct NameToUInt32OrZero { static constexpr auto name = "toUInt32OrZero"; }; +struct NameToUInt64OrZero { static constexpr auto name = "toUInt64OrZero"; }; +struct NameToUInt128OrZero { static constexpr auto name = "toUInt128OrZero"; }; +struct NameToUInt256OrZero { static constexpr auto name = "toUInt256OrZero"; }; +struct NameToInt8OrZero { static constexpr auto name = "toInt8OrZero"; }; +struct NameToInt16OrZero { static constexpr auto name = "toInt16OrZero"; }; +struct NameToInt32OrZero { static constexpr auto name = "toInt32OrZero"; }; +struct NameToInt64OrZero { static constexpr auto name = "toInt64OrZero"; }; +struct NameToInt128OrZero { static constexpr auto name = "toInt128OrZero"; }; +struct NameToInt256OrZero { static constexpr auto name = "toInt256OrZero"; }; +struct NameToFloat32OrZero { static constexpr auto name = "toFloat32OrZero"; }; +struct NameToFloat64OrZero { static constexpr auto name = "toFloat64OrZero"; }; +struct NameToDateOrZero { static constexpr auto name = "toDateOrZero"; }; +struct NameToDate32OrZero { static constexpr auto name = "toDate32OrZero"; }; +struct NameToDateTimeOrZero { static constexpr auto name = "toDateTimeOrZero"; }; +struct NameToDateTime64OrZero { static constexpr auto name = "toDateTime64OrZero"; }; +struct NameToDecimal32OrZero { static constexpr auto name = "toDecimal32OrZero"; }; +struct NameToDecimal64OrZero { static constexpr auto name = "toDecimal64OrZero"; }; +struct NameToDecimal128OrZero { static constexpr auto name = "toDecimal128OrZero"; }; +struct NameToDecimal256OrZero { static constexpr auto name = "toDecimal256OrZero"; }; +struct NameToUUIDOrZero { static constexpr auto name = "toUUIDOrZero"; }; +struct NameToIPv4OrZero { static constexpr auto name = "toIPv4OrZero"; }; +struct NameToIPv6OrZero { static constexpr auto name = "toIPv6OrZero"; }; + +using FunctionToUInt8OrZero = FunctionConvertFromString; +using FunctionToUInt16OrZero = FunctionConvertFromString; +using FunctionToUInt32OrZero = FunctionConvertFromString; +using FunctionToUInt64OrZero = FunctionConvertFromString; +using FunctionToUInt128OrZero = FunctionConvertFromString; +using FunctionToUInt256OrZero = FunctionConvertFromString; +using FunctionToInt8OrZero = FunctionConvertFromString; +using FunctionToInt16OrZero = FunctionConvertFromString; +using FunctionToInt32OrZero = FunctionConvertFromString; +using FunctionToInt64OrZero = FunctionConvertFromString; +using FunctionToInt128OrZero = FunctionConvertFromString; +using FunctionToInt256OrZero = FunctionConvertFromString; +using FunctionToFloat32OrZero = FunctionConvertFromString; +using FunctionToFloat64OrZero = FunctionConvertFromString; +using FunctionToDateOrZero = FunctionConvertFromString; +using FunctionToDate32OrZero = FunctionConvertFromString; +using FunctionToDateTimeOrZero = FunctionConvertFromString; +using FunctionToDateTime64OrZero = FunctionConvertFromString; +using FunctionToDecimal32OrZero = FunctionConvertFromString, NameToDecimal32OrZero, ConvertFromStringExceptionMode::Zero>; +using FunctionToDecimal64OrZero = FunctionConvertFromString, NameToDecimal64OrZero, ConvertFromStringExceptionMode::Zero>; +using FunctionToDecimal128OrZero = FunctionConvertFromString, NameToDecimal128OrZero, ConvertFromStringExceptionMode::Zero>; +using FunctionToDecimal256OrZero = FunctionConvertFromString, NameToDecimal256OrZero, ConvertFromStringExceptionMode::Zero>; +using FunctionToUUIDOrZero = FunctionConvertFromString; +using FunctionToIPv4OrZero = FunctionConvertFromString; +using FunctionToIPv6OrZero = FunctionConvertFromString; + +struct NameToUInt8OrNull { static constexpr auto name = "toUInt8OrNull"; }; +struct NameToUInt16OrNull { static constexpr auto name = "toUInt16OrNull"; }; +struct NameToUInt32OrNull { static constexpr auto name = "toUInt32OrNull"; }; +struct NameToUInt64OrNull { static constexpr auto name = "toUInt64OrNull"; }; +struct NameToUInt128OrNull { static constexpr auto name = "toUInt128OrNull"; }; +struct NameToUInt256OrNull { static constexpr auto name = "toUInt256OrNull"; }; +struct NameToInt8OrNull { static constexpr auto name = "toInt8OrNull"; }; +struct NameToInt16OrNull { static constexpr auto name = "toInt16OrNull"; }; +struct NameToInt32OrNull { static constexpr auto name = "toInt32OrNull"; }; +struct NameToInt64OrNull { static constexpr auto name = "toInt64OrNull"; }; +struct NameToInt128OrNull { static constexpr auto name = "toInt128OrNull"; }; +struct NameToInt256OrNull { static constexpr auto name = "toInt256OrNull"; }; +struct NameToFloat32OrNull { static constexpr auto name = "toFloat32OrNull"; }; +struct NameToFloat64OrNull { static constexpr auto name = "toFloat64OrNull"; }; +struct NameToDateOrNull { static constexpr auto name = "toDateOrNull"; }; +struct NameToDate32OrNull { static constexpr auto name = "toDate32OrNull"; }; +struct NameToDateTimeOrNull { static constexpr auto name = "toDateTimeOrNull"; }; +struct NameToDateTime64OrNull { static constexpr auto name = "toDateTime64OrNull"; }; +struct NameToDecimal32OrNull { static constexpr auto name = "toDecimal32OrNull"; }; +struct NameToDecimal64OrNull { static constexpr auto name = "toDecimal64OrNull"; }; +struct NameToDecimal128OrNull { static constexpr auto name = "toDecimal128OrNull"; }; +struct NameToDecimal256OrNull { static constexpr auto name = "toDecimal256OrNull"; }; +struct NameToUUIDOrNull { static constexpr auto name = "toUUIDOrNull"; }; +struct NameToIPv4OrNull { static constexpr auto name = "toIPv4OrNull"; }; +struct NameToIPv6OrNull { static constexpr auto name = "toIPv6OrNull"; }; + +using FunctionToUInt8OrNull = FunctionConvertFromString; +using FunctionToUInt16OrNull = FunctionConvertFromString; +using FunctionToUInt32OrNull = FunctionConvertFromString; +using FunctionToUInt64OrNull = FunctionConvertFromString; +using FunctionToUInt128OrNull = FunctionConvertFromString; +using FunctionToUInt256OrNull = FunctionConvertFromString; +using FunctionToInt8OrNull = FunctionConvertFromString; +using FunctionToInt16OrNull = FunctionConvertFromString; +using FunctionToInt32OrNull = FunctionConvertFromString; +using FunctionToInt64OrNull = FunctionConvertFromString; +using FunctionToInt128OrNull = FunctionConvertFromString; +using FunctionToInt256OrNull = FunctionConvertFromString; +using FunctionToFloat32OrNull = FunctionConvertFromString; +using FunctionToFloat64OrNull = FunctionConvertFromString; +using FunctionToDateOrNull = FunctionConvertFromString; +using FunctionToDate32OrNull = FunctionConvertFromString; +using FunctionToDateTimeOrNull = FunctionConvertFromString; +using FunctionToDateTime64OrNull = FunctionConvertFromString; +using FunctionToDecimal32OrNull = FunctionConvertFromString, NameToDecimal32OrNull, ConvertFromStringExceptionMode::Null>; +using FunctionToDecimal64OrNull = FunctionConvertFromString, NameToDecimal64OrNull, ConvertFromStringExceptionMode::Null>; +using FunctionToDecimal128OrNull = FunctionConvertFromString, NameToDecimal128OrNull, ConvertFromStringExceptionMode::Null>; +using FunctionToDecimal256OrNull = FunctionConvertFromString, NameToDecimal256OrNull, ConvertFromStringExceptionMode::Null>; +using FunctionToUUIDOrNull = FunctionConvertFromString; +using FunctionToIPv4OrNull = FunctionConvertFromString; +using FunctionToIPv6OrNull = FunctionConvertFromString; + +struct NameParseDateTimeBestEffort { static constexpr auto name = "parseDateTimeBestEffort"; }; +struct NameParseDateTimeBestEffortOrZero { static constexpr auto name = "parseDateTimeBestEffortOrZero"; }; +struct NameParseDateTimeBestEffortOrNull { static constexpr auto name = "parseDateTimeBestEffortOrNull"; }; +struct NameParseDateTimeBestEffortUS { static constexpr auto name = "parseDateTimeBestEffortUS"; }; +struct NameParseDateTimeBestEffortUSOrZero { static constexpr auto name = "parseDateTimeBestEffortUSOrZero"; }; +struct NameParseDateTimeBestEffortUSOrNull { static constexpr auto name = "parseDateTimeBestEffortUSOrNull"; }; +struct NameParseDateTime32BestEffort { static constexpr auto name = "parseDateTime32BestEffort"; }; +struct NameParseDateTime32BestEffortOrZero { static constexpr auto name = "parseDateTime32BestEffortOrZero"; }; +struct NameParseDateTime32BestEffortOrNull { static constexpr auto name = "parseDateTime32BestEffortOrNull"; }; +struct NameParseDateTime64BestEffort { static constexpr auto name = "parseDateTime64BestEffort"; }; +struct NameParseDateTime64BestEffortOrZero { static constexpr auto name = "parseDateTime64BestEffortOrZero"; }; +struct NameParseDateTime64BestEffortOrNull { static constexpr auto name = "parseDateTime64BestEffortOrNull"; }; +struct NameParseDateTime64BestEffortUS { static constexpr auto name = "parseDateTime64BestEffortUS"; }; +struct NameParseDateTime64BestEffortUSOrZero { static constexpr auto name = "parseDateTime64BestEffortUSOrZero"; }; +struct NameParseDateTime64BestEffortUSOrNull { static constexpr auto name = "parseDateTime64BestEffortUSOrNull"; }; + + +using FunctionParseDateTimeBestEffort = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTimeBestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; +using FunctionParseDateTimeBestEffortOrZero = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTimeBestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; +using FunctionParseDateTimeBestEffortOrNull = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTimeBestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; + +using FunctionParseDateTimeBestEffortUS = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTimeBestEffortUS, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffortUS>; +using FunctionParseDateTimeBestEffortUSOrZero = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTimeBestEffortUSOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffortUS>; +using FunctionParseDateTimeBestEffortUSOrNull = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTimeBestEffortUSOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffortUS>; + +using FunctionParseDateTime32BestEffort = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTime32BestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; +using FunctionParseDateTime32BestEffortOrZero = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTime32BestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; +using FunctionParseDateTime32BestEffortOrNull = FunctionConvertFromString< + DataTypeDateTime, NameParseDateTime32BestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; + +using FunctionParseDateTime64BestEffort = FunctionConvertFromString< + DataTypeDateTime64, NameParseDateTime64BestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; +using FunctionParseDateTime64BestEffortOrZero = FunctionConvertFromString< + DataTypeDateTime64, NameParseDateTime64BestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; +using FunctionParseDateTime64BestEffortOrNull = FunctionConvertFromString< + DataTypeDateTime64, NameParseDateTime64BestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; + +using FunctionParseDateTime64BestEffortUS = FunctionConvertFromString< + DataTypeDateTime64, NameParseDateTime64BestEffortUS, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffortUS>; +using FunctionParseDateTime64BestEffortUSOrZero = FunctionConvertFromString< + DataTypeDateTime64, NameParseDateTime64BestEffortUSOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffortUS>; +using FunctionParseDateTime64BestEffortUSOrNull = FunctionConvertFromString< + DataTypeDateTime64, NameParseDateTime64BestEffortUSOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffortUS>; + + +class ExecutableFunctionCast : public IExecutableFunction +{ +public: + using WrapperType = std::function; + + explicit ExecutableFunctionCast( + WrapperType && wrapper_function_, const char * name_, std::optional diagnostic_) + : wrapper_function(std::move(wrapper_function_)), name(name_), diagnostic(std::move(diagnostic_)) {} + + String getName() const override { return name; } + +protected: + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + /// drop second argument, pass others + ColumnsWithTypeAndName new_arguments{arguments.front()}; + if (arguments.size() > 2) + new_arguments.insert(std::end(new_arguments), std::next(std::begin(arguments), 2), std::end(arguments)); + + try + { + return wrapper_function(new_arguments, result_type, nullptr, input_rows_count); + } + catch (Exception & e) + { + if (diagnostic) + e.addMessage("while converting source column " + backQuoteIfNeed(diagnostic->column_from) + + " to destination column " + backQuoteIfNeed(diagnostic->column_to)); + throw; + } + } + + bool useDefaultImplementationForNulls() const override { return false; } + /// CAST(Nothing, T) -> T + bool useDefaultImplementationForNothing() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + +private: + WrapperType wrapper_function; + const char * name; + std::optional diagnostic; +}; + + +struct FunctionCastName +{ + static constexpr auto name = "CAST"; +}; + +class FunctionCastBase : public IFunctionBase +{ +public: + using MonotonicityForRange = std::function; +}; + +class FunctionCast final : public FunctionCastBase +{ +public: + using WrapperType = std::function; + + FunctionCast(ContextPtr context_ + , const char * cast_name_ + , MonotonicityForRange && monotonicity_for_range_ + , const DataTypes & argument_types_ + , const DataTypePtr & return_type_ + , std::optional diagnostic_ + , CastType cast_type_) + : cast_name(cast_name_), monotonicity_for_range(std::move(monotonicity_for_range_)) + , argument_types(argument_types_), return_type(return_type_), diagnostic(std::move(diagnostic_)) + , cast_type(cast_type_) + , context(context_) + { + } + + const DataTypes & getArgumentTypes() const override { return argument_types; } + const DataTypePtr & getResultType() const override { return return_type; } + + ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName & /*sample_columns*/) const override + { + try + { + return std::make_unique( + prepareUnpackDictionaries(getArgumentTypes()[0], getResultType()), cast_name, diagnostic); + } + catch (Exception & e) + { + if (diagnostic) + e.addMessage("while converting source column " + backQuoteIfNeed(diagnostic->column_from) + + " to destination column " + backQuoteIfNeed(diagnostic->column_to)); + throw; + } + } + + String getName() const override { return cast_name; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + bool hasInformationAboutMonotonicity() const override + { + return static_cast(monotonicity_for_range); + } + + Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override + { + return monotonicity_for_range(type, left, right); + } + +private: + + const char * cast_name; + MonotonicityForRange monotonicity_for_range; + + DataTypes argument_types; + DataTypePtr return_type; + + std::optional diagnostic; + CastType cast_type; + ContextPtr context; + + static WrapperType createFunctionAdaptor(FunctionPtr function, const DataTypePtr & from_type) + { + auto function_adaptor = std::make_unique(function)->build({ColumnWithTypeAndName{nullptr, from_type, ""}}); + + return [function_adaptor] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) + { + return function_adaptor->execute(arguments, result_type, input_rows_count); + }; + } + + static WrapperType createToNullableColumnWrapper() + { + return [] (ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) + { + ColumnPtr res = result_type->createColumn(); + ColumnUInt8::Ptr col_null_map_to = ColumnUInt8::create(input_rows_count, true); + return ColumnNullable::create(res->cloneResized(input_rows_count), std::move(col_null_map_to)); + }; + } + + template + WrapperType createWrapper(const DataTypePtr & from_type, const ToDataType * const to_type, bool requested_result_is_nullable) const + { + TypeIndex from_type_index = from_type->getTypeId(); + WhichDataType which(from_type_index); + bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull) + && (which.isInt() || which.isUInt() || which.isFloat()); + + FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; + if (context) + date_time_overflow_behavior = context->getSettingsRef().date_time_overflow_behavior; + + if (requested_result_is_nullable && checkAndGetDataType(from_type.get())) + { + /// In case when converting to Nullable type, we apply different parsing rule, + /// that will not throw an exception but return NULL in case of malformed input. + FunctionPtr function = FunctionConvertFromString::create(); + return createFunctionAdaptor(function, from_type); + } + else if (!can_apply_accurate_cast) + { + FunctionPtr function = FunctionTo::Type::create(context); + return createFunctionAdaptor(function, from_type); + } + + auto wrapper_cast_type = cast_type; + + return [wrapper_cast_type, from_type_index, to_type, date_time_overflow_behavior] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *column_nullable, size_t input_rows_count) + { + ColumnPtr result_column; + auto res = callOnIndexAndDataType(from_type_index, [&](const auto & types) -> bool { + using Types = std::decay_t; + using LeftDataType = typename Types::LeftType; + using RightDataType = typename Types::RightType; + + if constexpr (IsDataTypeNumber) + { + if constexpr (IsDataTypeNumber) + { +#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE, ADDITIONS) \ + case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ + result_column = ConvertImpl::execute( \ + arguments, result_type, input_rows_count, ADDITIONS()); \ + break; + if (wrapper_cast_type == CastType::accurate) + { + switch (date_time_overflow_behavior) + { + GENERATE_OVERFLOW_MODE_CASE(Throw, AccurateConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Ignore, AccurateConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Saturate, AccurateConvertStrategyAdditions) + } + } + else + { + switch (date_time_overflow_behavior) + { + GENERATE_OVERFLOW_MODE_CASE(Throw, AccurateOrNullConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Ignore, AccurateOrNullConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Saturate, AccurateOrNullConvertStrategyAdditions) + } + } +#undef GENERATE_OVERFLOW_MODE_CASE + + return true; + } + + if constexpr (std::is_same_v || std::is_same_v) + { +#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE, ADDITIONS) \ + case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ + result_column = ConvertImpl::template execute( \ +arguments, result_type, input_rows_count); \ + break; + if (wrapper_cast_type == CastType::accurate) + { + switch (date_time_overflow_behavior) + { + GENERATE_OVERFLOW_MODE_CASE(Throw, DateTimeAccurateConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Ignore, DateTimeAccurateConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Saturate, DateTimeAccurateConvertStrategyAdditions) + } + } + else + { + switch (date_time_overflow_behavior) + { + GENERATE_OVERFLOW_MODE_CASE(Throw, DateTimeAccurateOrNullConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Ignore, DateTimeAccurateOrNullConvertStrategyAdditions) + GENERATE_OVERFLOW_MODE_CASE(Saturate, DateTimeAccurateOrNullConvertStrategyAdditions) + } + } +#undef GENERATE_OVERFLOW_MODE_CASE + return true; + } + } + + return false; + }); + + /// Additionally check if callOnIndexAndDataType wasn't called at all. + if (!res) + { + if (wrapper_cast_type == CastType::accurateOrNull) + { + auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper(); + return nullable_column_wrapper(arguments, result_type, column_nullable, input_rows_count); + } + else + { + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, + "Conversion from {} to {} is not supported", + from_type_index, to_type->getName()); + } + } + + return result_column; + }; + } + + template + WrapperType createBoolWrapper(const DataTypePtr & from_type, const ToDataType * const to_type, bool requested_result_is_nullable) const + { + if (checkAndGetDataType(from_type.get())) + { + return &ConvertImplGenericFromString::execute; + } + + return createWrapper(from_type, to_type, requested_result_is_nullable); + } + + WrapperType createUInt8ToBoolWrapper(const DataTypePtr from_type, const DataTypePtr to_type) const + { + return [from_type, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) -> ColumnPtr + { + /// Special case when we convert UInt8 column to Bool column. + /// both columns have type UInt8, but we shouldn't use identity wrapper, + /// because Bool column can contain only 0 and 1. + auto res_column = to_type->createColumn(); + const auto & data_from = checkAndGetColumn(arguments[0].column.get())->getData(); + auto & data_to = assert_cast(res_column.get())->getData(); + data_to.resize(data_from.size()); + for (size_t i = 0; i != data_from.size(); ++i) + data_to[i] = static_cast(data_from[i]); + return res_column; + }; + } + + static WrapperType createStringWrapper(const DataTypePtr & from_type) + { + FunctionPtr function = FunctionToString::create(); + return createFunctionAdaptor(function, from_type); + } + + WrapperType createFixedStringWrapper(const DataTypePtr & from_type, const size_t N) const + { + if (!isStringOrFixedString(from_type)) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CAST AS FixedString is only implemented for types String and FixedString"); + + bool exception_mode_null = cast_type == CastType::accurateOrNull; + return [exception_mode_null, N] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) + { + if (exception_mode_null) + return FunctionToFixedString::executeForN(arguments, N); + else + return FunctionToFixedString::executeForN(arguments, N); + }; + } + +#define GENERATE_INTERVAL_CASE(INTERVAL_KIND) \ + case IntervalKind::Kind::INTERVAL_KIND: \ + return createFunctionAdaptor(FunctionConvert::create(), from_type); + + static WrapperType createIntervalWrapper(const DataTypePtr & from_type, IntervalKind kind) + { + switch (kind) + { + GENERATE_INTERVAL_CASE(Nanosecond) + GENERATE_INTERVAL_CASE(Microsecond) + GENERATE_INTERVAL_CASE(Millisecond) + GENERATE_INTERVAL_CASE(Second) + GENERATE_INTERVAL_CASE(Minute) + GENERATE_INTERVAL_CASE(Hour) + GENERATE_INTERVAL_CASE(Day) + GENERATE_INTERVAL_CASE(Week) + GENERATE_INTERVAL_CASE(Month) + GENERATE_INTERVAL_CASE(Quarter) + GENERATE_INTERVAL_CASE(Year) + } + throw Exception{ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion to unexpected IntervalKind: {}", kind.toString()}; + } + +#undef GENERATE_INTERVAL_CASE + + template + requires IsDataTypeDecimal + WrapperType createDecimalWrapper(const DataTypePtr & from_type, const ToDataType * to_type, bool requested_result_is_nullable) const + { + TypeIndex type_index = from_type->getTypeId(); + UInt32 scale = to_type->getScale(); + + WhichDataType which(type_index); + bool ok = which.isNativeInt() || which.isNativeUInt() || which.isDecimal() || which.isFloat() || which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() + || which.isStringOrFixedString(); + if (!ok) + { + if (cast_type == CastType::accurateOrNull) + return createToNullableColumnWrapper(); + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", + from_type->getName(), to_type->getName()); + } + + auto wrapper_cast_type = cast_type; + + return [wrapper_cast_type, type_index, scale, to_type, requested_result_is_nullable] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *column_nullable, size_t input_rows_count) + { + ColumnPtr result_column; + auto res = callOnIndexAndDataType(type_index, [&](const auto & types) -> bool + { + using Types = std::decay_t; + using LeftDataType = typename Types::LeftType; + using RightDataType = typename Types::RightType; + + if constexpr (IsDataTypeDecimalOrNumber && IsDataTypeDecimalOrNumber && !std::is_same_v) + { + if (wrapper_cast_type == CastType::accurate) + { + AccurateConvertStrategyAdditions additions; + additions.scale = scale; + result_column = ConvertImpl::execute( + arguments, result_type, input_rows_count, additions); + + return true; + } + else if (wrapper_cast_type == CastType::accurateOrNull) + { + AccurateOrNullConvertStrategyAdditions additions; + additions.scale = scale; + result_column = ConvertImpl::execute( + arguments, result_type, input_rows_count, additions); + + return true; + } + } + else if constexpr (std::is_same_v) + { + if (requested_result_is_nullable) + { + /// Consistent with CAST(Nullable(String) AS Nullable(Numbers)) + /// In case when converting to Nullable type, we apply different parsing rule, + /// that will not throw an exception but return NULL in case of malformed input. + result_column = ConvertImpl::execute( + arguments, result_type, input_rows_count, scale); + + return true; + } + } + + result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); + + return true; + }); + + /// Additionally check if callOnIndexAndDataType wasn't called at all. + if (!res) + { + if (wrapper_cast_type == CastType::accurateOrNull) + { + auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper(); + return nullable_column_wrapper(arguments, result_type, column_nullable, input_rows_count); + } + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, + "Conversion from {} to {} is not supported", + type_index, to_type->getName()); + } + + return result_column; + }; + } + + WrapperType createAggregateFunctionWrapper(const DataTypePtr & from_type_untyped, const DataTypeAggregateFunction * to_type) const + { + /// Conversion from String through parsing. + if (checkAndGetDataType(from_type_untyped.get())) + { + return &ConvertImplGenericFromString::execute; + } + else if (const auto * agg_type = checkAndGetDataType(from_type_untyped.get())) + { + if (agg_type->getFunction()->haveSameStateRepresentation(*to_type->getFunction())) + { + return [function = to_type->getFunction()]( + ColumnsWithTypeAndName & arguments, + const DataTypePtr & /* result_type */, + const ColumnNullable * /* nullable_source */, + size_t /*input_rows_count*/) -> ColumnPtr + { + const auto & argument_column = arguments.front(); + const auto * col_agg = checkAndGetColumn(argument_column.column.get()); + if (col_agg) + { + auto new_col_agg = ColumnAggregateFunction::create(*col_agg); + new_col_agg->set(function); + return new_col_agg; + } + else + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Illegal column {} for function CAST AS AggregateFunction", + argument_column.column->getName()); + } + }; + } + } + + if (cast_type == CastType::accurateOrNull) + return createToNullableColumnWrapper(); + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", + from_type_untyped->getName(), to_type->getName()); + } + + WrapperType createArrayWrapper(const DataTypePtr & from_type_untyped, const DataTypeArray & to_type) const + { + /// Conversion from String through parsing. + if (checkAndGetDataType(from_type_untyped.get())) + { + return &ConvertImplGenericFromString::execute; + } + + DataTypePtr from_type_holder; + const auto * from_type = checkAndGetDataType(from_type_untyped.get()); + const auto * from_type_map = checkAndGetDataType(from_type_untyped.get()); + + /// Convert from Map + if (from_type_map) + { + /// Recreate array of unnamed tuples because otherwise it may work + /// unexpectedly while converting to array of named tuples. + from_type_holder = from_type_map->getNestedTypeWithUnnamedTuple(); + from_type = assert_cast(from_type_holder.get()); + } + + if (!from_type) + { + throw Exception(ErrorCodes::TYPE_MISMATCH, + "CAST AS Array can only be performed between same-dimensional Array, Map or String types"); + } + + DataTypePtr from_nested_type = from_type->getNestedType(); + + /// In query SELECT CAST([] AS Array(Array(String))) from type is Array(Nothing) + bool from_empty_array = isNothing(from_nested_type); + + if (from_type->getNumberOfDimensions() != to_type.getNumberOfDimensions() && !from_empty_array) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "CAST AS Array can only be performed between same-dimensional array types"); + + const DataTypePtr & to_nested_type = to_type.getNestedType(); + + /// Prepare nested type conversion + const auto nested_function = prepareUnpackDictionaries(from_nested_type, to_nested_type); + + return [nested_function, from_nested_type, to_nested_type]( + ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr + { + const auto & argument_column = arguments.front(); + + const ColumnArray * col_array = nullptr; + + if (const ColumnMap * col_map = checkAndGetColumn(argument_column.column.get())) + col_array = &col_map->getNestedColumn(); + else + col_array = checkAndGetColumn(argument_column.column.get()); + + if (col_array) + { + /// create columns for converting nested column containing original and result columns + ColumnsWithTypeAndName nested_columns{{ col_array->getDataPtr(), from_nested_type, "" }}; + + /// convert nested column + auto result_column = nested_function(nested_columns, to_nested_type, nullable_source, nested_columns.front().column->size()); + + /// set converted nested column to result + return ColumnArray::create(result_column, col_array->getOffsetsPtr()); + } + else + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Illegal column {} for function CAST AS Array", + argument_column.column->getName()); + } + }; + } + + using ElementWrappers = std::vector; + + ElementWrappers getElementWrappers(const DataTypes & from_element_types, const DataTypes & to_element_types) const + { + ElementWrappers element_wrappers; + element_wrappers.reserve(from_element_types.size()); + + /// Create conversion wrapper for each element in tuple + for (size_t i = 0; i < from_element_types.size(); ++i) + { + const DataTypePtr & from_element_type = from_element_types[i]; + const DataTypePtr & to_element_type = to_element_types[i]; + element_wrappers.push_back(prepareUnpackDictionaries(from_element_type, to_element_type)); + } + + return element_wrappers; + } + + WrapperType createTupleWrapper(const DataTypePtr & from_type_untyped, const DataTypeTuple * to_type) const + { + /// Conversion from String through parsing. + if (checkAndGetDataType(from_type_untyped.get())) + { + return &ConvertImplGenericFromString::execute; + } + + const auto * from_type = checkAndGetDataType(from_type_untyped.get()); + if (!from_type) + throw Exception(ErrorCodes::TYPE_MISMATCH, "CAST AS Tuple can only be performed between tuple types or from String.\n" + "Left type: {}, right type: {}", from_type_untyped->getName(), to_type->getName()); + + const auto & from_element_types = from_type->getElements(); + const auto & to_element_types = to_type->getElements(); + + std::vector element_wrappers; + std::vector> to_reverse_index; + + /// For named tuples allow conversions for tuples with + /// different sets of elements. If element exists in @to_type + /// and doesn't exist in @to_type it will be filled by default values. + if (from_type->haveExplicitNames() && to_type->haveExplicitNames()) + { + const auto & from_names = from_type->getElementNames(); + std::unordered_map from_positions; + from_positions.reserve(from_names.size()); + for (size_t i = 0; i < from_names.size(); ++i) + from_positions[from_names[i]] = i; + + const auto & to_names = to_type->getElementNames(); + element_wrappers.reserve(to_names.size()); + to_reverse_index.reserve(from_names.size()); + + for (size_t i = 0; i < to_names.size(); ++i) + { + auto it = from_positions.find(to_names[i]); + if (it != from_positions.end()) + { + element_wrappers.emplace_back(prepareUnpackDictionaries(from_element_types[it->second], to_element_types[i])); + to_reverse_index.emplace_back(it->second); + } + else + { + element_wrappers.emplace_back(); + to_reverse_index.emplace_back(); + } + } + } + else + { + if (from_element_types.size() != to_element_types.size()) + throw Exception(ErrorCodes::TYPE_MISMATCH, "CAST AS Tuple can only be performed between tuple types " + "with the same number of elements or from String.\nLeft type: {}, right type: {}", + from_type->getName(), to_type->getName()); + + element_wrappers = getElementWrappers(from_element_types, to_element_types); + to_reverse_index.reserve(to_element_types.size()); + for (size_t i = 0; i < to_element_types.size(); ++i) + to_reverse_index.emplace_back(i); + } + + return [element_wrappers, from_element_types, to_element_types, to_reverse_index] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count) -> ColumnPtr + { + const auto * col = arguments.front().column.get(); + + size_t tuple_size = to_element_types.size(); + const ColumnTuple & column_tuple = typeid_cast(*col); + + Columns converted_columns(tuple_size); + + /// invoke conversion for each element + for (size_t i = 0; i < tuple_size; ++i) + { + if (to_reverse_index[i]) + { + size_t from_idx = *to_reverse_index[i]; + ColumnsWithTypeAndName element = {{column_tuple.getColumns()[from_idx], from_element_types[from_idx], "" }}; + converted_columns[i] = element_wrappers[i](element, to_element_types[i], nullable_source, input_rows_count); + } + else + { + converted_columns[i] = to_element_types[i]->createColumn()->cloneResized(input_rows_count); + } + } + + return ColumnTuple::create(converted_columns); + }; + } + + /// The case of: tuple([key1, key2, ..., key_n], [value1, value2, ..., value_n]) + WrapperType createTupleToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const + { + return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr + { + const auto * col = arguments.front().column.get(); + const auto & column_tuple = assert_cast(*col); + + Columns offsets(2); + Columns converted_columns(2); + for (size_t i = 0; i < 2; ++i) + { + const auto & column_array = assert_cast(column_tuple.getColumn(i)); + ColumnsWithTypeAndName element = {{column_array.getDataPtr(), from_kv_types[i], ""}}; + converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); + offsets[i] = column_array.getOffsetsPtr(); + } + + const auto & keys_offsets = assert_cast(*offsets[0]).getData(); + const auto & values_offsets = assert_cast(*offsets[1]).getData(); + if (keys_offsets != values_offsets) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "CAST AS Map can only be performed from tuple of arrays with equal sizes."); + + return ColumnMap::create(converted_columns[0], converted_columns[1], offsets[0]); + }; + } + + WrapperType createMapToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const + { + return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr + { + const auto * col = arguments.front().column.get(); + const auto & column_map = typeid_cast(*col); + const auto & nested_data = column_map.getNestedData(); + + Columns converted_columns(2); + for (size_t i = 0; i < 2; ++i) + { + ColumnsWithTypeAndName element = {{nested_data.getColumnPtr(i), from_kv_types[i], ""}}; + converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); + } + + return ColumnMap::create(converted_columns[0], converted_columns[1], column_map.getNestedColumn().getOffsetsPtr()); + }; + } + + /// The case of: [(key1, value1), (key2, value2), ...] + WrapperType createArrayToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const + { + return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr + { + const auto * col = arguments.front().column.get(); + const auto & column_array = typeid_cast(*col); + const auto & nested_data = typeid_cast(column_array.getData()); + + Columns converted_columns(2); + for (size_t i = 0; i < 2; ++i) + { + ColumnsWithTypeAndName element = {{nested_data.getColumnPtr(i), from_kv_types[i], ""}}; + converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); + } + + return ColumnMap::create(converted_columns[0], converted_columns[1], column_array.getOffsetsPtr()); + }; + } + + + WrapperType createMapWrapper(const DataTypePtr & from_type_untyped, const DataTypeMap * to_type) const + { + if (const auto * from_tuple = checkAndGetDataType(from_type_untyped.get())) + { + if (from_tuple->getElements().size() != 2) + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "CAST AS Map from tuple requires 2 elements. " + "Left type: {}, right type: {}", + from_tuple->getName(), + to_type->getName()); + + DataTypes from_kv_types; + const auto & to_kv_types = to_type->getKeyValueTypes(); + + for (const auto & elem : from_tuple->getElements()) + { + const auto * type_array = checkAndGetDataType(elem.get()); + if (!type_array) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "CAST AS Map can only be performed from tuples of array. Got: {}", from_tuple->getName()); + + from_kv_types.push_back(type_array->getNestedType()); + } + + return createTupleToMapWrapper(from_kv_types, to_kv_types); + } + else if (const auto * from_array = typeid_cast(from_type_untyped.get())) + { + const auto * nested_tuple = typeid_cast(from_array->getNestedType().get()); + if (!nested_tuple || nested_tuple->getElements().size() != 2) + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "CAST AS Map from array requires nested tuple of 2 elements. " + "Left type: {}, right type: {}", + from_array->getName(), + to_type->getName()); + + return createArrayToMapWrapper(nested_tuple->getElements(), to_type->getKeyValueTypes()); + } + else if (const auto * from_type = checkAndGetDataType(from_type_untyped.get())) + { + return createMapToMapWrapper(from_type->getKeyValueTypes(), to_type->getKeyValueTypes()); + } + else + { + throw Exception(ErrorCodes::TYPE_MISMATCH, "Unsupported types to CAST AS Map. " + "Left type: {}, right type: {}", from_type_untyped->getName(), to_type->getName()); + } + } + + WrapperType createTupleToObjectWrapper(const DataTypeTuple & from_tuple, bool has_nullable_subcolumns) const + { + if (!from_tuple.haveExplicitNames()) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Cast to Object can be performed only from flatten Named Tuple. Got: {}", from_tuple.getName()); + + PathsInData paths; + DataTypes from_types; + + std::tie(paths, from_types) = flattenTuple(from_tuple.getPtr()); + auto to_types = from_types; + + for (auto & type : to_types) + { + if (isTuple(type) || isNested(type)) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Cast to Object can be performed only from flatten Named Tuple. Got: {}", + from_tuple.getName()); + + type = recursiveRemoveLowCardinality(type); + } + + return [element_wrappers = getElementWrappers(from_types, to_types), + has_nullable_subcolumns, from_types, to_types, paths] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count) + { + size_t tuple_size = to_types.size(); + auto flattened_column = flattenTuple(arguments.front().column); + const auto & column_tuple = assert_cast(*flattened_column); + + if (tuple_size != column_tuple.getColumns().size()) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Expected tuple with {} subcolumn, but got {} subcolumns", + tuple_size, column_tuple.getColumns().size()); + + auto res = ColumnObject::create(has_nullable_subcolumns); + for (size_t i = 0; i < tuple_size; ++i) + { + ColumnsWithTypeAndName element = {{column_tuple.getColumns()[i], from_types[i], "" }}; + auto converted_column = element_wrappers[i](element, to_types[i], nullable_source, input_rows_count); + res->addSubcolumn(paths[i], converted_column->assumeMutable()); + } + + return res; + }; + } + + WrapperType createMapToObjectWrapper(const DataTypeMap & from_map, bool has_nullable_subcolumns) const + { + auto key_value_types = from_map.getKeyValueTypes(); + + if (!isStringOrFixedString(key_value_types[0])) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Cast to Object from Map can be performed only from Map " + "with String or FixedString key. Got: {}", from_map.getName()); + + const auto & value_type = key_value_types[1]; + auto to_value_type = value_type; + + if (!has_nullable_subcolumns && value_type->isNullable()) + to_value_type = removeNullable(value_type); + + if (has_nullable_subcolumns && !value_type->isNullable()) + to_value_type = makeNullable(value_type); + + DataTypes to_key_value_types{std::make_shared(), std::move(to_value_type)}; + auto element_wrappers = getElementWrappers(key_value_types, to_key_value_types); + + return [has_nullable_subcolumns, element_wrappers, key_value_types, to_key_value_types] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t) -> ColumnPtr + { + const auto & column_map = assert_cast(*arguments.front().column); + const auto & offsets = column_map.getNestedColumn().getOffsets(); + auto key_value_columns = column_map.getNestedData().getColumnsCopy(); + + for (size_t i = 0; i < 2; ++i) + { + ColumnsWithTypeAndName element{{key_value_columns[i], key_value_types[i], ""}}; + key_value_columns[i] = element_wrappers[i](element, to_key_value_types[i], nullable_source, key_value_columns[i]->size()); + } + + const auto & key_column_str = assert_cast(*key_value_columns[0]); + const auto & value_column = *key_value_columns[1]; + + using SubcolumnsMap = HashMap; + SubcolumnsMap subcolumns; + + for (size_t row = 0; row < offsets.size(); ++row) + { + for (size_t i = offsets[static_cast(row) - 1]; i < offsets[row]; ++i) + { + auto ref = key_column_str.getDataAt(i); + + bool inserted; + SubcolumnsMap::LookupResult it; + subcolumns.emplace(ref, it, inserted); + auto & subcolumn = it->getMapped(); + + if (inserted) + subcolumn = value_column.cloneEmpty()->cloneResized(row); + + /// Map can have duplicated keys. We insert only first one. + if (subcolumn->size() == row) + subcolumn->insertFrom(value_column, i); + } + + /// Insert default values for keys missed in current row. + for (const auto & [_, subcolumn] : subcolumns) + if (subcolumn->size() == row) + subcolumn->insertDefault(); + } + + auto column_object = ColumnObject::create(has_nullable_subcolumns); + for (auto && [key, subcolumn] : subcolumns) + { + PathInData path(key.toView()); + column_object->addSubcolumn(path, std::move(subcolumn)); + } + + return column_object; + }; + } + + WrapperType createObjectWrapper(const DataTypePtr & from_type, const DataTypeObject * to_type) const + { + if (const auto * from_tuple = checkAndGetDataType(from_type.get())) + { + return createTupleToObjectWrapper(*from_tuple, to_type->hasNullableSubcolumns()); + } + else if (const auto * from_map = checkAndGetDataType(from_type.get())) + { + return createMapToObjectWrapper(*from_map, to_type->hasNullableSubcolumns()); + } + else if (checkAndGetDataType(from_type.get())) + { + return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) + { + auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); + res->finalize(); + return res; + }; + } + else if (checkAndGetDataType(from_type.get())) + { + return [is_nullable = to_type->hasNullableSubcolumns()] (ColumnsWithTypeAndName & arguments, const DataTypePtr & , const ColumnNullable * , size_t) -> ColumnPtr + { + auto & column_object = assert_cast(*arguments.front().column); + auto res = ColumnObject::create(is_nullable); + for (size_t i = 0; i < column_object.size(); i++) + res->insert(column_object[i]); + + res->finalize(); + return res; + }; + } + + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Cast to Object can be performed only from flatten named Tuple, Map or String. Got: {}", from_type->getName()); + } + + WrapperType createVariantToVariantWrapper(const DataTypeVariant & from_variant, const DataTypeVariant & to_variant) const + { + /// We support only extension of variant type, so, only new types can be added. + /// For example: Variant(T1, T2) -> Variant(T1, T2, T3) is supported, but Variant(T1, T2) -> Variant(T1, T3) is not supported. + /// We want to extend Variant type for free without rewriting the data, but we sort data types inside Variant during type creation + /// (we do it because we want Variant(T1, T2) to be the same as Variant(T2, T1)), but after extension the order of variant types + /// (and so their discriminators) can be different. For example: Variant(T1, T3) -> Variant(T1, T2, T3). + /// To avoid full rewrite of discriminators column, ColumnVariant supports it's local order of variant columns (and so local + /// discriminators) and stores mapping global order -> local order. + /// So, to extend Variant with new types for free, we should keep old local order for old variants, append new variants and change + /// mapping global order -> local order according to the new global order. + + /// Create map (new variant type) -> (it's global discriminator in new order). + const auto & new_variants = to_variant.getVariants(); + std::unordered_map new_variant_types_to_new_global_discriminator; + new_variant_types_to_new_global_discriminator.reserve(new_variants.size()); + for (size_t i = 0; i != new_variants.size(); ++i) + new_variant_types_to_new_global_discriminator[new_variants[i]->getName()] = i; + + /// Create set of old variant types. + const auto & old_variants = from_variant.getVariants(); + std::unordered_map old_variant_types_to_old_global_discriminator; + old_variant_types_to_old_global_discriminator.reserve(old_variants.size()); + for (size_t i = 0; i != old_variants.size(); ++i) + old_variant_types_to_old_global_discriminator[old_variants[i]->getName()] = i; + + /// Check that the set of old variants types is a subset of new variant types and collect new global discriminator for each old global discriminator. + std::unordered_map old_global_discriminator_to_new; + old_global_discriminator_to_new.reserve(old_variants.size()); + for (const auto & [old_variant_type, old_discriminator] : old_variant_types_to_old_global_discriminator) + { + auto it = new_variant_types_to_new_global_discriminator.find(old_variant_type); + if (it == new_variant_types_to_new_global_discriminator.end()) + throw Exception( + ErrorCodes::CANNOT_CONVERT_TYPE, + "Cannot convert type {} to {}. Conversion between Variant types is allowed only when new Variant type is an extension " + "of an initial one", from_variant.getName(), to_variant.getName()); + old_global_discriminator_to_new[old_discriminator] = it->second; + } + + /// Collect variant types and their global discriminators that should be added to the old Variant to get the new Variant. + std::vector> variant_types_and_discriminators_to_add; + variant_types_and_discriminators_to_add.reserve(new_variants.size() - old_variants.size()); + for (size_t i = 0; i != new_variants.size(); ++i) + { + if (!old_variant_types_to_old_global_discriminator.contains(new_variants[i]->getName())) + variant_types_and_discriminators_to_add.emplace_back(new_variants[i], i); + } + + return [old_global_discriminator_to_new, variant_types_and_discriminators_to_add] + (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr + { + const auto & column_variant = assert_cast(*arguments.front().column.get()); + size_t num_old_variants = column_variant.getNumVariants(); + Columns new_variant_columns; + new_variant_columns.reserve(num_old_variants + variant_types_and_discriminators_to_add.size()); + std::vector new_local_to_global_discriminators; + new_local_to_global_discriminators.reserve(num_old_variants + variant_types_and_discriminators_to_add.size()); + for (size_t i = 0; i != num_old_variants; ++i) + { + new_variant_columns.push_back(column_variant.getVariantPtrByLocalDiscriminator(i)); + new_local_to_global_discriminators.push_back(old_global_discriminator_to_new.at(column_variant.globalDiscriminatorByLocal(i))); + } + + for (const auto & [new_variant_type, new_global_discriminator] : variant_types_and_discriminators_to_add) + { + new_variant_columns.push_back(new_variant_type->createColumn()); + new_local_to_global_discriminators.push_back(new_global_discriminator); + } + + return ColumnVariant::create(column_variant.getLocalDiscriminatorsPtr(), column_variant.getOffsetsPtr(), new_variant_columns, new_local_to_global_discriminators); + }; + } + + WrapperType createVariantToColumnWrapper(const DataTypeVariant & from_variant, const DataTypePtr & to_type) const + { + const auto & variant_types = from_variant.getVariants(); + std::vector variant_wrappers; + variant_wrappers.reserve(variant_types.size()); + + /// Create conversion wrapper for each variant. + for (const auto & variant_type : variant_types) + variant_wrappers.push_back(prepareUnpackDictionaries(variant_type, to_type)); + + return [variant_wrappers, variant_types, to_type] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + const auto & column_variant = assert_cast(*arguments.front().column.get()); + + /// First, cast each variant to the result type. + std::vector casted_variant_columns; + casted_variant_columns.reserve(variant_types.size()); + for (size_t i = 0; i != variant_types.size(); ++i) + { + auto variant_col = column_variant.getVariantPtrByLocalDiscriminator(i); + ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], "" }}; + const auto & variant_wrapper = variant_wrappers[column_variant.globalDiscriminatorByLocal(i)]; + casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size())); + } + + /// Second, construct resulting column from casted variant columns according to discriminators. + const auto & local_discriminators = column_variant.getLocalDiscriminators(); + auto res = result_type->createColumn(); + res->reserve(input_rows_count); + for (size_t i = 0; i != input_rows_count; ++i) + { + auto local_discr = local_discriminators[i]; + if (local_discr == ColumnVariant::NULL_DISCRIMINATOR) + res->insertDefault(); + else + res->insertFrom(*casted_variant_columns[local_discr], column_variant.offsetAt(i)); + } + + return res; + }; + } + + static ColumnPtr createVariantFromDescriptorsAndOneNonEmptyVariant(const DataTypes & variant_types, const ColumnPtr & discriminators, const ColumnPtr & variant, ColumnVariant::Discriminator variant_discr) + { + Columns variants; + variants.reserve(variant_types.size()); + for (size_t i = 0; i != variant_types.size(); ++i) + { + if (i == variant_discr) + variants.emplace_back(variant); + else + variants.push_back(variant_types[i]->createColumn()); + } + + return ColumnVariant::create(discriminators, variants); + } + + WrapperType createColumnToVariantWrapper(const DataTypePtr & from_type, const DataTypeVariant & to_variant) const + { + /// We allow converting NULL to Variant(...) as Variant can store NULLs. + if (from_type->onlyNull()) + { + return [](ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + auto result_column = result_type->createColumn(); + result_column->insertManyDefaults(input_rows_count); + return result_column; + }; + } + + auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(*removeNullableOrLowCardinalityNullable(from_type)); + if (!variant_discr_opt) + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert type {} to {}. Conversion to Variant allowed only for types from this Variant", from_type->getName(), to_variant.getName()); + + return [variant_discr = *variant_discr_opt] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t) -> ColumnPtr + { + const auto & result_variant_type = assert_cast(*result_type); + const auto & variant_types = result_variant_type.getVariants(); + if (const ColumnNullable * col_nullable = typeid_cast(arguments.front().column.get())) + { + const auto & column = col_nullable->getNestedColumnPtr(); + const auto & null_map = col_nullable->getNullMapData(); + IColumn::Filter filter; + filter.reserve(column->size()); + auto discriminators = ColumnVariant::ColumnDiscriminators::create(); + auto & discriminators_data = discriminators->getData(); + discriminators_data.reserve(column->size()); + size_t variant_size_hint = 0; + for (size_t i = 0; i != column->size(); ++i) + { + if (null_map[i]) + { + discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR); + filter.push_back(0); + } + else + { + discriminators_data.push_back(variant_discr); + filter.push_back(1); + ++variant_size_hint; + } + } + + ColumnPtr variant_column; + /// If there were no NULLs, just use the column. + if (variant_size_hint == column->size()) + variant_column = column; + /// Otherwise we should use filtered column. + else + variant_column = column->filter(filter, variant_size_hint); + return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), variant_column, variant_discr); + } + else if (isColumnLowCardinalityNullable(*arguments.front().column)) + { + const auto & column = arguments.front().column; + + /// Variant column cannot have LowCardinality(Nullable(...)) variant, as Variant column stores NULLs itself. + /// We should create a null-map, insert NULL_DISCRIMINATOR on NULL values and filter initial column. + const auto & col_lc = assert_cast(*column); + const auto & indexes = col_lc.getIndexes(); + auto null_index = col_lc.getDictionary().getNullValueIndex(); + IColumn::Filter filter; + filter.reserve(col_lc.size()); + auto discriminators = ColumnVariant::ColumnDiscriminators::create(); + auto & discriminators_data = discriminators->getData(); + discriminators_data.reserve(col_lc.size()); + size_t variant_size_hint = 0; + for (size_t i = 0; i != col_lc.size(); ++i) + { + if (indexes.getUInt(i) == null_index) + { + discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR); + filter.push_back(0); + } + else + { + discriminators_data.push_back(variant_discr); + filter.push_back(1); + ++variant_size_hint; + } + } + + MutableColumnPtr variant_column; + /// If there were no NULLs, we can just clone the column. + if (variant_size_hint == col_lc.size()) + variant_column = IColumn::mutate(column); + /// Otherwise we should filter column. + else + variant_column = column->filter(filter, variant_size_hint)->assumeMutable(); + + assert_cast(*variant_column).nestedRemoveNullable(); + return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), std::move(variant_column), variant_discr); + } + else + { + const auto & column = arguments.front().column; + auto discriminators = ColumnVariant::ColumnDiscriminators::create(); + discriminators->getData().resize_fill(column->size(), variant_discr); + return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), column, variant_discr); + } + }; + } + + /// Wrapper for conversion to/from Variant type + WrapperType createVariantWrapper(const DataTypePtr & from_type, const DataTypePtr & to_type) const + { + if (const auto * from_variant = checkAndGetDataType(from_type.get())) + { + if (const auto * to_variant = checkAndGetDataType(to_type.get())) + return createVariantToVariantWrapper(*from_variant, *to_variant); + + return createVariantToColumnWrapper(*from_variant, to_type); + } + + return createColumnToVariantWrapper(from_type, assert_cast(*to_type)); + } + + template + WrapperType createEnumWrapper(const DataTypePtr & from_type, const DataTypeEnum * to_type) const + { + using EnumType = DataTypeEnum; + using Function = typename FunctionTo::Type; + + if (const auto * from_enum8 = checkAndGetDataType(from_type.get())) + checkEnumToEnumConversion(from_enum8, to_type); + else if (const auto * from_enum16 = checkAndGetDataType(from_type.get())) + checkEnumToEnumConversion(from_enum16, to_type); + + if (checkAndGetDataType(from_type.get())) + return createStringToEnumWrapper(); + else if (checkAndGetDataType(from_type.get())) + return createStringToEnumWrapper(); + else if (isNativeNumber(from_type) || isEnum(from_type)) + { + auto function = Function::create(); + return createFunctionAdaptor(function, from_type); + } + else + { + if (cast_type == CastType::accurateOrNull) + return createToNullableColumnWrapper(); + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", + from_type->getName(), to_type->getName()); + } + } + + template + void checkEnumToEnumConversion(const EnumTypeFrom * from_type, const EnumTypeTo * to_type) const + { + const auto & from_values = from_type->getValues(); + const auto & to_values = to_type->getValues(); + + using ValueType = std::common_type_t; + using NameValuePair = std::pair; + using EnumValues = std::vector; + + EnumValues name_intersection; + std::set_intersection(std::begin(from_values), std::end(from_values), + std::begin(to_values), std::end(to_values), std::back_inserter(name_intersection), + [] (auto && from, auto && to) { return from.first < to.first; }); + + for (const auto & name_value : name_intersection) + { + const auto & old_value = name_value.second; + const auto & new_value = to_type->getValue(name_value.first); + if (old_value != new_value) + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Enum conversion changes value for element '{}' from {} to {}", + name_value.first, toString(old_value), toString(new_value)); + } + } + + template + WrapperType createStringToEnumWrapper() const + { + const char * function_name = cast_name; + return [function_name] ( + ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, const ColumnNullable * nullable_col, size_t /*input_rows_count*/) + { + const auto & first_col = arguments.front().column.get(); + const auto & result_type = typeid_cast(*res_type); + + const ColumnStringType * col = typeid_cast(first_col); + + if (col && nullable_col && nullable_col->size() != col->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnNullable is not compatible with original"); + + if (col) + { + const auto size = col->size(); + + auto res = result_type.createColumn(); + auto & out_data = static_cast(*res).getData(); + out_data.resize(size); + + auto default_enum_value = result_type.getValues().front().second; + + if (nullable_col) + { + for (size_t i = 0; i < size; ++i) + { + if (!nullable_col->isNullAt(i)) + out_data[i] = result_type.getValue(col->getDataAt(i)); + else + out_data[i] = default_enum_value; + } + } + else + { + for (size_t i = 0; i < size; ++i) + out_data[i] = result_type.getValue(col->getDataAt(i)); + } + + return res; + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column {} as first argument of function {}", + first_col->getName(), function_name); + }; + } + + template + WrapperType createEnumToStringWrapper() const + { + const char * function_name = cast_name; + return [function_name] ( + ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, const ColumnNullable * nullable_col, size_t /*input_rows_count*/) + { + using ColumnEnumType = EnumType::ColumnType; + + const auto & first_col = arguments.front().column.get(); + const auto & first_type = arguments.front().type.get(); + + const ColumnEnumType * enum_col = typeid_cast(first_col); + const EnumType * enum_type = typeid_cast(first_type); + + if (enum_col && nullable_col && nullable_col->size() != enum_col->size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnNullable is not compatible with original"); + + if (enum_col && enum_type) + { + const auto size = enum_col->size(); + const auto & enum_data = enum_col->getData(); + + auto res = res_type->createColumn(); + + if (nullable_col) + { + for (size_t i = 0; i < size; ++i) + { + if (!nullable_col->isNullAt(i)) + { + const auto & value = enum_type->getNameForValue(enum_data[i]); + res->insertData(value.data, value.size); + } + else + res->insertDefault(); + } + } + else + { + for (size_t i = 0; i < size; ++i) + { + const auto & value = enum_type->getNameForValue(enum_data[i]); + res->insertData(value.data, value.size); + } + } + + return res; + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column {} as first argument of function {}", + first_col->getName(), function_name); + }; + } + + static WrapperType createIdentityWrapper(const DataTypePtr &) + { + return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) + { + return arguments.front().column; + }; + } + + static WrapperType createNothingWrapper(const IDataType * to_type) + { + ColumnPtr res = to_type->createColumnConstWithDefaultValue(1); + return [res] (ColumnsWithTypeAndName &, const DataTypePtr &, const ColumnNullable *, size_t input_rows_count) + { + /// Column of Nothing type is trivially convertible to any other column + return res->cloneResized(input_rows_count)->convertToFullColumnIfConst(); + }; + } + + WrapperType prepareUnpackDictionaries(const DataTypePtr & from_type, const DataTypePtr & to_type) const + { + /// Conversion from/to Variant data type is processed in a special way. + /// We don't need to remove LowCardinality/Nullable. + if (isVariant(to_type) || isVariant(from_type)) + return createVariantWrapper(from_type, to_type); + + const auto * from_low_cardinality = typeid_cast(from_type.get()); + const auto * to_low_cardinality = typeid_cast(to_type.get()); + const auto & from_nested = from_low_cardinality ? from_low_cardinality->getDictionaryType() : from_type; + const auto & to_nested = to_low_cardinality ? to_low_cardinality->getDictionaryType() : to_type; + + if (from_type->onlyNull()) + { + if (!to_nested->isNullable() && !isVariant(to_type)) + { + if (cast_type == CastType::accurateOrNull) + { + return createToNullableColumnWrapper(); + } + else + { + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert NULL to a non-nullable type"); + } + } + + return [](ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) + { + return result_type->createColumnConstWithDefaultValue(input_rows_count)->convertToFullColumnIfConst(); + }; + } + + bool skip_not_null_check = false; + + if (from_low_cardinality && from_nested->isNullable() && !to_nested->isNullable()) + /// Disable check for dictionary. Will check that column doesn't contain NULL in wrapper below. + skip_not_null_check = true; + + auto wrapper = prepareRemoveNullable(from_nested, to_nested, skip_not_null_check); + if (!from_low_cardinality && !to_low_cardinality) + return wrapper; + + return [wrapper, from_low_cardinality, to_low_cardinality, skip_not_null_check] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) -> ColumnPtr + { + ColumnsWithTypeAndName args = {arguments[0]}; + auto & arg = args.front(); + auto res_type = result_type; + + ColumnPtr converted_column; + + ColumnPtr res_indexes; + /// For some types default can't be casted (for example, String to Int). In that case convert column to full. + bool src_converted_to_full_column = false; + + { + auto tmp_rows_count = input_rows_count; + + if (to_low_cardinality) + res_type = to_low_cardinality->getDictionaryType(); + + if (from_low_cardinality) + { + const auto * col_low_cardinality = typeid_cast(arguments[0].column.get()); + + if (skip_not_null_check && col_low_cardinality->containsNull()) + throw Exception(ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN, "Cannot convert NULL value to non-Nullable type"); + + arg.column = col_low_cardinality->getDictionary().getNestedColumn(); + arg.type = from_low_cardinality->getDictionaryType(); + + /// TODO: Make map with defaults conversion. + src_converted_to_full_column = !removeNullable(arg.type)->equals(*removeNullable(res_type)); + if (src_converted_to_full_column) + arg.column = arg.column->index(col_low_cardinality->getIndexes(), 0); + else + res_indexes = col_low_cardinality->getIndexesPtr(); + + tmp_rows_count = arg.column->size(); + } + + /// Perform the requested conversion. + converted_column = wrapper(args, res_type, nullable_source, tmp_rows_count); + } + + if (to_low_cardinality) + { + auto res_column = to_low_cardinality->createColumn(); + auto * col_low_cardinality = typeid_cast(res_column.get()); + + if (from_low_cardinality && !src_converted_to_full_column) + { + col_low_cardinality->insertRangeFromDictionaryEncodedColumn(*converted_column, *res_indexes); + } + else + col_low_cardinality->insertRangeFromFullColumn(*converted_column, 0, converted_column->size()); + + return res_column; + } + else if (!src_converted_to_full_column) + return converted_column->index(*res_indexes, 0); + else + return converted_column; + }; + } + + WrapperType prepareRemoveNullable(const DataTypePtr & from_type, const DataTypePtr & to_type, bool skip_not_null_check) const + { + /// Determine whether pre-processing and/or post-processing must take place during conversion. + + bool source_is_nullable = from_type->isNullable(); + bool result_is_nullable = to_type->isNullable(); + + auto wrapper = prepareImpl(removeNullable(from_type), removeNullable(to_type), result_is_nullable); + + if (result_is_nullable) + { + return [wrapper, source_is_nullable] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + /// Create a temporary columns on which to perform the operation. + const auto & nullable_type = static_cast(*result_type); + const auto & nested_type = nullable_type.getNestedType(); + + ColumnsWithTypeAndName tmp_args; + if (source_is_nullable) + tmp_args = createBlockWithNestedColumns(arguments); + else + tmp_args = arguments; + + const ColumnNullable * nullable_source = nullptr; + + /// Add original ColumnNullable for createStringToEnumWrapper() + if (source_is_nullable) + { + if (arguments.size() != 1) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of arguments"); + nullable_source = typeid_cast(arguments.front().column.get()); + } + + /// Perform the requested conversion. + auto tmp_res = wrapper(tmp_args, nested_type, nullable_source, input_rows_count); + + /// May happen in fuzzy tests. For debug purpose. + if (!tmp_res) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Couldn't convert {} to {} in prepareRemoveNullable wrapper.", + arguments[0].type->getName(), nested_type->getName()); + + return wrapInNullable(tmp_res, arguments, nested_type, input_rows_count); + }; + } + else if (source_is_nullable) + { + /// Conversion from Nullable to non-Nullable. + + return [wrapper, skip_not_null_check] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + auto tmp_args = createBlockWithNestedColumns(arguments); + auto nested_type = removeNullable(result_type); + + /// Check that all values are not-NULL. + /// Check can be skipped in case if LowCardinality dictionary is transformed. + /// In that case, correctness will be checked beforehand. + if (!skip_not_null_check) + { + const auto & col = arguments[0].column; + const auto & nullable_col = assert_cast(*col); + const auto & null_map = nullable_col.getNullMapData(); + + if (!memoryIsZero(null_map.data(), 0, null_map.size())) + throw Exception(ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN, "Cannot convert NULL value to non-Nullable type"); + } + const ColumnNullable * nullable_source = typeid_cast(arguments.front().column.get()); + return wrapper(tmp_args, nested_type, nullable_source, input_rows_count); + }; + } + else + return wrapper; + } + + /// 'from_type' and 'to_type' are nested types in case of Nullable. + /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested. + WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const + { + if (isUInt8(from_type) && isBool(to_type)) + return createUInt8ToBoolWrapper(from_type, to_type); + + /// We can cast IPv6 into IPv6, IPv4 into IPv4, but we should not allow to cast FixedString(16) into IPv6 as part of identity cast + bool safe_convert_custom_types = true; + + if (const auto * to_type_custom_name = to_type->getCustomName()) + safe_convert_custom_types = from_type->getCustomName() && from_type->getCustomName()->getName() == to_type_custom_name->getName(); + else if (const auto * from_type_custom_name = from_type->getCustomName()) + safe_convert_custom_types = to_type->getCustomName() && from_type_custom_name->getName() == to_type->getCustomName()->getName(); + + if (from_type->equals(*to_type) && safe_convert_custom_types) + { + /// We can only use identity conversion for DataTypeAggregateFunction when they are strictly equivalent. + if (typeid_cast(from_type.get())) + { + if (DataTypeAggregateFunction::strictEquals(from_type, to_type)) + return createIdentityWrapper(from_type); + } + else + return createIdentityWrapper(from_type); + } + else if (WhichDataType(from_type).isNothing()) + return createNothingWrapper(to_type.get()); + + WrapperType ret; + + auto make_default_wrapper = [&](const auto & types) -> bool + { + using Types = std::decay_t; + using ToDataType = typename Types::LeftType; + + if constexpr ( + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) + { + ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); + return true; + } + if constexpr (std::is_same_v) + { + if (isBool(to_type)) + ret = createBoolWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); + else + ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); + return true; + } + if constexpr ( + std::is_same_v || + std::is_same_v) + { + ret = createEnumWrapper(from_type, checkAndGetDataType(to_type.get())); + return true; + } + if constexpr ( + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v) + { + ret = createDecimalWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); + return true; + } + + return false; + }; + + bool cast_ipv4_ipv6_default_on_conversion_error_value = context && context->getSettingsRef().cast_ipv4_ipv6_default_on_conversion_error; + bool input_format_ipv4_default_on_conversion_error_value = context && context->getSettingsRef().input_format_ipv4_default_on_conversion_error; + bool input_format_ipv6_default_on_conversion_error_value = context && context->getSettingsRef().input_format_ipv6_default_on_conversion_error; + + auto make_custom_serialization_wrapper = [&, cast_ipv4_ipv6_default_on_conversion_error_value, input_format_ipv4_default_on_conversion_error_value, input_format_ipv6_default_on_conversion_error_value](const auto & types) -> bool + { + using Types = std::decay_t; + using ToDataType = typename Types::RightType; + using FromDataType = typename Types::LeftType; + + if constexpr (WhichDataType(FromDataType::type_id).isStringOrFixedString()) + { + if constexpr (std::is_same_v) + { + ret = [cast_ipv4_ipv6_default_on_conversion_error_value, + input_format_ipv4_default_on_conversion_error_value, + requested_result_is_nullable]( + ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const ColumnNullable * column_nullable, + size_t) -> ColumnPtr + { + if (!WhichDataType(result_type).isIPv4()) + throw Exception(ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName()); + + const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; + if (requested_result_is_nullable) + return convertToIPv4(arguments[0].column, null_map); + else if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv4_default_on_conversion_error_value) + return convertToIPv4(arguments[0].column, null_map); + else + return convertToIPv4(arguments[0].column, null_map); + }; + + return true; + } + + if constexpr (std::is_same_v) + { + ret = [cast_ipv4_ipv6_default_on_conversion_error_value, + input_format_ipv6_default_on_conversion_error_value, + requested_result_is_nullable]( + ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const ColumnNullable * column_nullable, + size_t) -> ColumnPtr + { + if (!WhichDataType(result_type).isIPv6()) + throw Exception( + ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv6", result_type->getName()); + + const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; + if (requested_result_is_nullable) + return convertToIPv6(arguments[0].column, null_map); + else if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv6_default_on_conversion_error_value) + return convertToIPv6(arguments[0].column, null_map); + else + return convertToIPv6(arguments[0].column, null_map); + }; + + return true; + } + + if (to_type->getCustomSerialization() && to_type->getCustomName()) + { + ret = [requested_result_is_nullable]( + ColumnsWithTypeAndName & arguments, + const DataTypePtr & result_type, + const ColumnNullable * column_nullable, + size_t input_rows_count) -> ColumnPtr + { + auto wrapped_result_type = result_type; + if (requested_result_is_nullable) + wrapped_result_type = makeNullable(result_type); + return ConvertImplGenericFromString::execute( + arguments, wrapped_result_type, column_nullable, input_rows_count); + }; + return true; + } + } + else if constexpr (WhichDataType(FromDataType::type_id).isIPv6() && WhichDataType(ToDataType::type_id).isIPv4()) + { + ret = [cast_ipv4_ipv6_default_on_conversion_error_value, requested_result_is_nullable]( + ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t) + -> ColumnPtr + { + if (!WhichDataType(result_type).isIPv4()) + throw Exception( + ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName()); + + const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; + if (requested_result_is_nullable) + return convertIPv6ToIPv4(arguments[0].column, null_map); + else if (cast_ipv4_ipv6_default_on_conversion_error_value) + return convertIPv6ToIPv4(arguments[0].column, null_map); + else + return convertIPv6ToIPv4(arguments[0].column, null_map); + }; + + return true; + } + + if constexpr (WhichDataType(ToDataType::type_id).isStringOrFixedString()) + { + if constexpr (WhichDataType(FromDataType::type_id).isEnum()) + { + ret = createEnumToStringWrapper(); + return true; + } + else if (from_type->getCustomSerialization()) + { + ret = [](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); + }; + return true; + } + } + + return false; + }; + + if (callOnTwoTypeIndexes(from_type->getTypeId(), to_type->getTypeId(), make_custom_serialization_wrapper)) + return ret; + + if (callOnIndexAndDataType(to_type->getTypeId(), make_default_wrapper)) + return ret; + + switch (to_type->getTypeId()) + { + case TypeIndex::String: + return createStringWrapper(from_type); + case TypeIndex::FixedString: + return createFixedStringWrapper(from_type, checkAndGetDataType(to_type.get())->getN()); + case TypeIndex::Array: + return createArrayWrapper(from_type, static_cast(*to_type)); + case TypeIndex::Tuple: + return createTupleWrapper(from_type, checkAndGetDataType(to_type.get())); + case TypeIndex::Map: + return createMapWrapper(from_type, checkAndGetDataType(to_type.get())); + case TypeIndex::Object: + return createObjectWrapper(from_type, checkAndGetDataType(to_type.get())); + case TypeIndex::AggregateFunction: + return createAggregateFunctionWrapper(from_type, checkAndGetDataType(to_type.get())); + case TypeIndex::Interval: + return createIntervalWrapper(from_type, checkAndGetDataType(to_type.get())->getKind()); + default: + break; + } + + if (cast_type == CastType::accurateOrNull) + return createToNullableColumnWrapper(); + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", + from_type->getName(), to_type->getName()); + } +}; + +class MonotonicityHelper +{ +public: + using MonotonicityForRange = FunctionCastBase::MonotonicityForRange; + + template + static auto monotonicityForType(const DataType * const) + { + return FunctionTo::Type::Monotonic::get; + } + + static MonotonicityForRange getMonotonicityInformation(const DataTypePtr & from_type, const IDataType * to_type) + { + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (isEnum(from_type)) + { + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + if (const auto * type = checkAndGetDataType(to_type)) + return monotonicityForType(type); + } + /// other types like Null, FixedString, Array and Tuple have no monotonicity defined + return {}; + } +}; + + FunctionBasePtr createFunctionBaseCast( ContextPtr context , const ColumnsWithTypeAndName & arguments @@ -146,54 +5102,4 @@ REGISTER_FUNCTION(Conversion) factory.registerFunction>(); } - -MonotonicityHelper::MonotonicityForRange MonotonicityHelper::getMonotonicityInformation(const DataTypePtr & from_type, const IDataType * to_type) -{ - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (isEnum(from_type)) - { - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - } - /// other types like Null, FixedString, Array and Tuple have no monotonicity defined - return {}; -} - } diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h deleted file mode 100644 index c21e85fb40e..00000000000 --- a/src/Functions/FunctionsConversion.h +++ /dev/null @@ -1,4924 +0,0 @@ -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ATTEMPT_TO_READ_AFTER_EOF; - extern const int CANNOT_PARSE_NUMBER; - extern const int CANNOT_READ_ARRAY_FROM_TEXT; - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; - extern const int CANNOT_PARSE_QUOTED_STRING; - extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; - extern const int CANNOT_PARSE_DATE; - extern const int CANNOT_PARSE_DATETIME; - extern const int CANNOT_PARSE_TEXT; - extern const int CANNOT_PARSE_UUID; - extern const int CANNOT_PARSE_IPV4; - extern const int CANNOT_PARSE_IPV6; - extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; - extern const int LOGICAL_ERROR; - extern const int TYPE_MISMATCH; - extern const int CANNOT_CONVERT_TYPE; - extern const int ILLEGAL_COLUMN; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NOT_IMPLEMENTED; - extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN; - extern const int CANNOT_PARSE_BOOL; - extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; -} - -/** Type conversion functions. - * toType - conversion in "natural way"; - */ - -UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column); - -/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. -struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; }; - -struct AccurateConvertStrategyAdditions -{ - UInt32 scale { 0 }; -}; - -struct AccurateOrNullConvertStrategyAdditions -{ - UInt32 scale { 0 }; -}; - - -struct ConvertDefaultBehaviorTag {}; -struct ConvertReturnNullOnErrorTag {}; -struct ConvertReturnZeroOnErrorTag {}; - -/** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment. - * (Date is represented internally as number of days from some day; DateTime - as unix timestamp) - */ -template -struct ConvertImpl -{ - using FromFieldType = typename FromDataType::FieldType; - using ToFieldType = typename ToDataType::FieldType; - - template - static ColumnPtr NO_SANITIZE_UNDEFINED execute( - const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type [[maybe_unused]], size_t input_rows_count, - Additions additions [[maybe_unused]] = Additions()) - { - const ColumnWithTypeAndName & named_from = arguments[0]; - - using ColVecFrom = typename FromDataType::ColumnType; - using ColVecTo = typename ToDataType::ColumnType; - - if constexpr ((IsDataTypeDecimal || IsDataTypeDecimal) - && !(std::is_same_v || std::is_same_v)) - { - if constexpr (!IsDataTypeDecimalOrNumber || !IsDataTypeDecimalOrNumber) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - named_from.column->getName(), Name::name); - } - } - - if (const ColVecFrom * col_from = checkAndGetColumn(named_from.column.get())) - { - typename ColVecTo::MutablePtr col_to = nullptr; - - if constexpr (IsDataTypeDecimal) - { - UInt32 scale; - - if constexpr (std::is_same_v - || std::is_same_v) - { - scale = additions.scale; - } - else - { - scale = additions; - } - - col_to = ColVecTo::create(0, scale); - } - else - col_to = ColVecTo::create(); - - const auto & vec_from = col_from->getData(); - auto & vec_to = col_to->getData(); - vec_to.resize(input_rows_count); - - ColumnUInt8::MutablePtr col_null_map_to; - ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; - if constexpr (std::is_same_v) - { - col_null_map_to = ColumnUInt8::create(input_rows_count, false); - vec_null_map_to = &col_null_map_to->getData(); - } - - bool result_is_bool = isBool(result_type); - for (size_t i = 0; i < input_rows_count; ++i) - { - if constexpr (std::is_same_v) - { - if (result_is_bool) - { - vec_to[i] = vec_from[i] != FromFieldType(0); - continue; - } - } - - if constexpr (std::is_same_v && std::is_same_v) - { - static_assert( - std::is_same_v, - "UInt128 and UUID types must be same"); - - vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; - vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; - - continue; - } - - if constexpr (std::is_same_v && std::is_same_v) - { - static_assert( - std::is_same_v, - "UInt128 and IPv6 types must be same"); - - vec_to[i].items[1] = std::byteswap(vec_from[i].toUnderType().items[0]); - vec_to[i].items[0] = std::byteswap(vec_from[i].toUnderType().items[1]); - - continue; - } - - if constexpr (std::is_same_v != std::is_same_v) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Conversion between numeric types and UUID is not supported. " - "Probably the passed UUID is unquoted"); - } - else if constexpr ( - (std::is_same_v != std::is_same_v) - && !(is_any_of || is_any_of) - ) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion from {} to {} is not supported", - TypeName, TypeName); - } - else if constexpr (std::is_same_v != std::is_same_v && !(std::is_same_v || std::is_same_v)) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Conversion between numeric types and IPv6 is not supported. " - "Probably the passed IPv6 is unquoted"); - } - else - { - if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) - { - if constexpr (std::is_same_v) - { - ToFieldType result; - bool convert_result = false; - - if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) - convert_result = tryConvertDecimals(vec_from[i], col_from->getScale(), col_to->getScale(), result); - else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) - convert_result = tryConvertFromDecimal(vec_from[i], col_from->getScale(), result); - else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) - convert_result = tryConvertToDecimal(vec_from[i], col_to->getScale(), result); - - if (convert_result) - vec_to[i] = result; - else - { - vec_to[i] = static_cast(0); - (*vec_null_map_to)[i] = true; - } - } - else - { - if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) - vec_to[i] = convertDecimals(vec_from[i], col_from->getScale(), col_to->getScale()); - else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) - vec_to[i] = convertFromDecimal(vec_from[i], col_from->getScale()); - else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) - vec_to[i] = convertToDecimal(vec_from[i], col_to->getScale()); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unsupported data type in conversion function"); - } - } - else - { - /// If From Data is Nan or Inf and we convert to integer type, throw exception - if constexpr (std::is_floating_point_v && !std::is_floating_point_v) - { - if (!isFinite(vec_from[i])) - { - if constexpr (std::is_same_v) - { - vec_to[i] = 0; - (*vec_null_map_to)[i] = true; - continue; - } - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unexpected inf or nan to integer conversion"); - } - } - - if constexpr (std::is_same_v - || std::is_same_v) - { - bool convert_result = accurate::convertNumeric(vec_from[i], vec_to[i]); - - if (!convert_result) - { - if (std::is_same_v) - { - vec_to[i] = 0; - (*vec_null_map_to)[i] = true; - } - else - { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Value in column {} cannot be safely converted into type {}", - named_from.column->getName(), result_type->getName()); - } - } - } - else - { - if constexpr (std::is_same_v && std::is_same_v) - { - const uint8_t ip4_cidr[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}; - const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); - if (!matchIPv6Subnet(src, ip4_cidr, 96)) - { - char addr[IPV6_MAX_TEXT_LENGTH + 1] {}; - char * paddr = addr; - formatIPv6(src, paddr); - - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 {} in column {} is not in IPv4 mapping block", addr, named_from.column->getName()); - } - - uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); - if constexpr (std::endian::native == std::endian::little) - { - dst[0] = src[15]; - dst[1] = src[14]; - dst[2] = src[13]; - dst[3] = src[12]; - } - else - { - dst[0] = src[12]; - dst[1] = src[13]; - dst[2] = src[14]; - dst[3] = src[15]; - } - } - else if constexpr (std::is_same_v && std::is_same_v) - { - const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); - uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); - std::memset(dst, '\0', IPV6_BINARY_LENGTH); - dst[10] = dst[11] = 0xff; - - if constexpr (std::endian::native == std::endian::little) - { - dst[12] = src[3]; - dst[13] = src[2]; - dst[14] = src[1]; - dst[15] = src[0]; - } - else - { - dst[12] = src[0]; - dst[13] = src[1]; - dst[14] = src[2]; - dst[15] = src[3]; - } - } - else if constexpr (std::is_same_v && std::is_same_v) - vec_to[i] = static_cast(static_cast(vec_from[i])); - else if constexpr (std::is_same_v && (std::is_same_v || std::is_same_v)) - vec_to[i] = static_cast(vec_from[i] * DATE_SECONDS_PER_DAY); - else - vec_to[i] = static_cast(vec_from[i]); - } - } - } - } - - if constexpr (std::is_same_v) - return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); - else - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - named_from.column->getName(), Name::name); - } -}; - -/** Conversion of DateTime to Date: throw off time component. - */ -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -/** Conversion of DateTime to Date32: throw off time component. - */ -template -struct ConvertImpl - : DateTimeTransformImpl {}; - -/** Conversion of Date to DateTime: adding 00:00:00 time component. - */ -template -struct ToDateTimeImpl -{ - static constexpr auto name = "toDateTime"; - - static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (d > MAX_DATETIME_DAY_NUM) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Day number {} is out of bounds of type DateTime", d); - } - else if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) - { - if (d > MAX_DATETIME_DAY_NUM) - d = MAX_DATETIME_DAY_NUM; - } - return static_cast(time_zone.fromDayNum(DayNum(d))); - } - - static UInt32 execute(Int32 d, const DateLUTImpl & time_zone) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) - { - if (d < 0) - return 0; - else if (d > MAX_DATETIME_DAY_NUM) - d = MAX_DATETIME_DAY_NUM; - } - else if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (d < 0 || d > MAX_DATETIME_DAY_NUM) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type DateTime", d); - } - return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); - } - - static UInt32 execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) - { - return dt; - } - - static UInt32 execute(Int64 dt64, const DateLUTImpl & /*time_zone*/) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Ignore) - return static_cast(dt64); - else - { - if (dt64 < 0 || dt64 >= MAX_DATETIME_TIMESTAMP) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) - return dt64 < 0 ? 0 : std::numeric_limits::max(); - else - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type DateTime", dt64); - } - else - return static_cast(dt64); - } - } -}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -/// Implementation of toDate function. - -template -struct ToDateTransform32Or64 -{ - static constexpr auto name = "toDate"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); - } - /// if value is smaller (or equal) than maximum day value for Date, than treat it as day num, - /// otherwise treat it as unix timestamp. This is a bit weird, but we leave this behavior. - if (from <= DATE_LUT_MAX_DAY_NUM) - return from; - else - return time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); - } -}; - -/** Conversion of Date32 to Date. - */ -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ToDateTransform32Or64Signed -{ - static constexpr auto name = "toDate"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - // TODO: decide narrow or extended range based on FromType - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < 0 || from > MAX_DATE_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); - } - else - { - if (from < 0) - return 0; - } - return (from <= DATE_LUT_MAX_DAY_NUM) - ? static_cast(from) - : time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATE_TIMESTAMP))); - } -}; - -template -struct ToDateTransform8Or16Signed -{ - static constexpr auto name = "toDate"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if (from < 0) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); - else - return 0; - } - return from; - } -}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -/// Implementation of toDate32 function. - -template -struct ToDate32Transform32Or64 -{ - static constexpr auto name = "toDate32"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - if (from < DATE_LUT_MAX_EXTEND_DAY_NUM) - return static_cast(from); - else - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type Date32", from); - } - return time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATETIME64_TIMESTAMP))); - } - } -}; - -template -struct ToDate32Transform32Or64Signed -{ - static constexpr auto name = "toDate32"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - static const Int32 daynum_min_offset = -static_cast(time_zone.getDayNumOffsetEpoch()); - - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < daynum_min_offset || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type Date32", from); - } - - if (from < daynum_min_offset) - return daynum_min_offset; - - return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) - ? static_cast(from) - : time_zone.toDayNum(std::min(time_t(Int64(from)), time_t(MAX_DATETIME64_TIMESTAMP))); - } -}; - -template -struct ToDate32Transform8Or16Signed -{ - static constexpr auto name = "toDate32"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - return from; - } -}; - -/** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience, - * Float32, Float64) to Date. If the - * number is less than 65536, then it is treated as DayNum, and if it's greater or equals to 65536, - * then treated as unix timestamp. If the number exceeds UInt32, saturate to MAX_UINT32 then as DayNum. - * It's a bit illogical, as we actually have two functions in one. - * But allows to support frequent case, - * when user write toDate(UInt32), expecting conversion of unix timestamp to Date. - * (otherwise such usage would be frequent mistake). - */ -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - - -template -struct ToDateTimeTransform64 -{ - static constexpr auto name = "toDateTime"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); - } - return static_cast(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); - } -}; - -template -struct ToDateTimeTransformSigned -{ - static constexpr auto name = "toDateTime"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if (from < 0) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); - else - return 0; - } - return from; - } -}; - -template -struct ToDateTimeTransform64Signed -{ - static constexpr auto name = "toDateTime"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < 0 || from > MAX_DATETIME_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); - } - - if (from < 0) - return 0; - return static_cast(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); - } -}; - -/// Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to DateTime. -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -/** Conversion of numeric to DateTime64 - */ - -template -struct ToDateTime64TransformUnsigned -{ - static constexpr auto name = "toDateTime64"; - - const DateTime64::NativeType scale_multiplier = 1; - - ToDateTime64TransformUnsigned(UInt32 scale = 0) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); - else - return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); - } - else - return DecimalUtils::decimalFromComponentsWithMultiplier(std::min(from, MAX_DATETIME64_TIMESTAMP), 0, scale_multiplier); - } -}; -template -struct ToDateTime64TransformSigned -{ - static constexpr auto name = "toDateTime64"; - - const DateTime64::NativeType scale_multiplier = 1; - - ToDateTime64TransformSigned(UInt32 scale = 0) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < MIN_DATETIME64_TIMESTAMP || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); - } - from = static_cast(std::max(from, MIN_DATETIME64_TIMESTAMP)); - from = static_cast(std::min(from, MAX_DATETIME64_TIMESTAMP)); - - return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); - } -}; -template -struct ToDateTime64TransformFloat -{ - static constexpr auto name = "toDateTime64"; - - const UInt32 scale = 1; - - ToDateTime64TransformFloat(UInt32 scale_ = 0) /// NOLINT - : scale(scale_) - {} - - NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < MIN_DATETIME64_TIMESTAMP || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); - } - - from = std::max(from, static_cast(MIN_DATETIME64_TIMESTAMP)); - from = std::min(from, static_cast(MAX_DATETIME64_TIMESTAMP)); - return convertToDecimal(from, scale); - } -}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - - -/** Conversion of DateTime64 to Date or DateTime: discards fractional part. - */ -template -struct FromDateTime64Transform -{ - static constexpr auto name = Transform::name; - - const DateTime64::NativeType scale_multiplier = 1; - - FromDateTime64Transform(UInt32 scale) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - auto execute(DateTime64::NativeType dt, const DateLUTImpl & time_zone) const - { - const auto c = DecimalUtils::splitWithScaleMultiplier(DateTime64(dt), scale_multiplier); - return Transform::execute(static_cast(c.whole), time_zone); - } -}; - -/** Conversion of DateTime64 to Date or DateTime: discards fractional part. - */ -template -struct ConvertImpl - : DateTimeTransformImpl>, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl>, false> {}; - -struct ToDateTime64Transform -{ - static constexpr auto name = "toDateTime64"; - - const DateTime64::NativeType scale_multiplier = 1; - - ToDateTime64Transform(UInt32 scale = 0) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - DateTime64::NativeType execute(UInt16 d, const DateLUTImpl & time_zone) const - { - const auto dt = ToDateTimeImpl<>::execute(d, time_zone); - return execute(dt, time_zone); - } - - DateTime64::NativeType execute(Int32 d, const DateLUTImpl & time_zone) const - { - Int64 dt = static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); - return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); - } - - DateTime64::NativeType execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) const - { - return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); - } -}; - -/** Conversion of Date or DateTime to DateTime64: add zero sub-second part. - */ -template -struct ConvertImpl - : DateTimeTransformImpl {}; - -template -struct ConvertImpl - : DateTimeTransformImpl {}; - -template -struct ConvertImpl - : DateTimeTransformImpl {}; - - -/** Transformation of numbers, dates, datetimes to strings: through formatting. - */ -template -struct FormatImpl -{ - template - static ReturnType execute(const typename DataType::FieldType x, WriteBuffer & wb, const DataType *, const DateLUTImpl *) - { - writeText(x, wb); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate *, const DateLUTImpl * time_zone) - { - writeDateText(DayNum(x), wb, *time_zone); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDate32::FieldType x, WriteBuffer & wb, const DataTypeDate32 *, const DateLUTImpl * time_zone) - { - writeDateText(ExtendedDayNum(x), wb, *time_zone); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDateTime::FieldType x, WriteBuffer & wb, const DataTypeDateTime *, const DateLUTImpl * time_zone) - { - writeDateTimeText(x, wb, *time_zone); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDateTime64::FieldType x, WriteBuffer & wb, const DataTypeDateTime64 * type, const DateLUTImpl * time_zone) - { - writeDateTimeText(DateTime64(x), type->getScale(), wb, *time_zone); - return ReturnType(true); - } -}; - - -template -struct FormatImpl> -{ - template - static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeEnum * type, const DateLUTImpl *) - { - static constexpr bool throw_exception = std::is_same_v; - - if constexpr (throw_exception) - { - writeString(type->getNameForValue(x), wb); - } - else - { - StringRef res; - bool is_ok = type->getNameForValue(x, res); - if (is_ok) - writeString(res, wb); - return ReturnType(is_ok); - } - } -}; - -template -struct FormatImpl> -{ - template - static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeDecimal * type, const DateLUTImpl *) - { - writeText(x, type->getScale(), wb, false); - return ReturnType(true); - } -}; - - -/// DataTypeEnum to DataType free conversion -template -struct ConvertImpl, DataTypeNumber, Name, ConvertDefaultBehaviorTag> -{ - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) - { - return arguments[0].column; - } -}; - -static inline ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) -{ - ColumnUInt8::MutablePtr null_map = nullptr; - if (const auto * col_null = checkAndGetColumn(col.get())) - { - null_map = ColumnUInt8::create(); - null_map->insertRangeFrom(col_null->getNullMapColumn(), 0, col_null->size()); - } - return null_map; -} - -template -requires (!std::is_same_v) -struct ConvertImpl -{ - using FromFieldType = typename FromDataType::FieldType; - using ColVecType = ColumnVectorOrDecimal; - - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) - { - if constexpr (IsDataTypeDateOrDateTime) - { - auto datetime_arg = arguments[0]; - - const DateLUTImpl * time_zone = nullptr; - const ColumnConst * time_zone_column = nullptr; - - if (arguments.size() == 1) - { - auto non_null_args = createBlockWithNestedColumns(arguments); - time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); - } - else /// When we have a column for timezone - { - datetime_arg.column = datetime_arg.column->convertToFullColumnIfConst(); - - if constexpr (std::is_same_v || std::is_same_v) - time_zone = &DateLUT::instance(); - /// For argument of Date or DateTime type, second argument with time zone could be specified. - if constexpr (std::is_same_v || std::is_same_v) - { - if ((time_zone_column = checkAndGetColumnConst(arguments[1].column.get()))) - { - auto non_null_args = createBlockWithNestedColumns(arguments); - time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); - } - } - } - const auto & col_with_type_and_name = columnGetNested(datetime_arg); - - if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) - { - auto col_to = ColumnString::create(); - - const typename ColVecType::Container & vec_from = col_from->getData(); - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - - if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss.") + col_from->getScale() + 1)); - else - data_to.resize(size * 3); /// Arbitrary - - offsets_to.resize(size); - - WriteBufferFromVector write_buffer(data_to); - const auto & type = static_cast(*col_with_type_and_name.type); - - ColumnUInt8::MutablePtr null_map = copyNullMap(datetime_arg.column); - - if (!null_map && arguments.size() > 1) - null_map = copyNullMap(arguments[1].column->convertToFullColumnIfConst()); - - if (null_map) - { - for (size_t i = 0; i < size; ++i) - { - if (!time_zone_column && arguments.size() > 1) - { - if (!arguments[1].column.get()->getDataAt(i).toString().empty()) - time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); - } - bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); - null_map->getData()[i] |= !is_ok; - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - else - { - for (size_t i = 0; i < size; ++i) - { - if (!time_zone_column && arguments.size() > 1) - { - if (!arguments[1].column.get()->getDataAt(i).toString().empty()) - time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); - } - FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - - write_buffer.finalize(); - - if (null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), Name::name); - } - else - { - ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); - - const auto & col_with_type_and_name = columnGetNested(arguments[0]); - const auto & type = static_cast(*col_with_type_and_name.type); - - if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) - { - auto col_to = ColumnString::create(); - - const typename ColVecType::Container & vec_from = col_from->getData(); - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - - data_to.resize(size * 3); - offsets_to.resize(size); - - WriteBufferFromVector write_buffer(data_to); - - if (null_map) - { - for (size_t i = 0; i < size; ++i) - { - bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); - /// We don't use timezones in this branch - null_map->getData()[i] |= !is_ok; - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - else - { - for (size_t i = 0; i < size; ++i) - { - FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - - write_buffer.finalize(); - - if (null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), Name::name); - } - } -}; - - -/// Generic conversion of any type to String or FixedString via serialization to text. -template -struct ConvertImplGenericToString -{ - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) - { - static_assert(std::is_same_v || std::is_same_v, - "Can be used only to serialize to ColumnString or ColumnFixedString"); - - ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); - - const auto & col_with_type_and_name = columnGetNested(arguments[0]); - const IDataType & type = *col_with_type_and_name.type; - const IColumn & col_from = *col_with_type_and_name.column; - - size_t size = col_from.size(); - auto col_to = removeNullable(result_type)->createColumn(); - - { - ColumnStringHelpers::WriteHelper write_helper( - assert_cast(*col_to), - size); - - auto & write_buffer = write_helper.getWriteBuffer(); - - FormatSettings format_settings; - auto serialization = type.getDefaultSerialization(); - for (size_t row = 0; row < size; ++row) - { - serialization->serializeText(col_from, row, write_buffer, format_settings); - write_helper.rowWritten(); - } - - write_helper.finalize(); - } - - if (result_type->isNullable() && null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } -}; - -/** Conversion of time_t to UInt16, Int32, UInt32 - */ -template -void convertFromTime(typename DataType::FieldType & x, time_t & time) -{ - x = time; -} - -template <> -inline void convertFromTime(DataTypeDate::FieldType & x, time_t & time) -{ - if (unlikely(time < 0)) - x = 0; - else if (unlikely(time > 0xFFFF)) - x = 0xFFFF; - else - x = time; -} - -template <> -inline void convertFromTime(DataTypeDate32::FieldType & x, time_t & time) -{ - x = static_cast(time); -} - -template <> -inline void convertFromTime(DataTypeDateTime::FieldType & x, time_t & time) -{ - if (unlikely(time < 0)) - x = 0; - else if (unlikely(time > MAX_DATETIME_TIMESTAMP)) - x = MAX_DATETIME_TIMESTAMP; - else - x = static_cast(time); -} - -/** Conversion of strings to numbers, dates, datetimes: through parsing. - */ -template -void parseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool precise_float_parsing) -{ - if constexpr (std::is_floating_point_v) - { - if (precise_float_parsing) - readFloatTextPrecise(x, rb); - else - readFloatTextFast(x, rb); - } - else - readText(x, rb); -} - -template <> -inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - DayNum tmp(0); - readDateText(tmp, rb, *time_zone); - x = tmp; -} - -template <> -inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - ExtendedDayNum tmp(0); - readDateText(tmp, rb, *time_zone); - x = tmp; -} - - -// NOTE: no need of extra overload of DateTime64, since readDateTimeText64 has different signature and that case is explicitly handled in the calling code. -template <> -inline void parseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - time_t time = 0; - readDateTimeText(time, rb, *time_zone); - convertFromTime(x, time); -} - -template <> -inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - UUID tmp; - readUUIDText(tmp, rb); - x = tmp.toUnderType(); -} - -template <> -inline void parseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - IPv4 tmp; - readIPv4Text(tmp, rb); - x = tmp.toUnderType(); -} - -template <> -inline void parseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - IPv6 tmp; - readIPv6Text(tmp, rb); - x = tmp; -} - -template -bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool precise_float_parsing) -{ - if constexpr (std::is_floating_point_v) - { - if (precise_float_parsing) - return tryReadFloatTextPrecise(x, rb); - else - return tryReadFloatTextFast(x, rb); - } - else /*if constexpr (is_integer_v)*/ - return tryReadIntText(x, rb); -} - -template <> -inline bool tryParseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - DayNum tmp(0); - if (!tryReadDateText(tmp, rb, *time_zone)) - return false; - x = tmp; - return true; -} - -template <> -inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - ExtendedDayNum tmp(0); - if (!tryReadDateText(tmp, rb, *time_zone)) - return false; - x = tmp; - return true; -} - -template <> -inline bool tryParseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - time_t time = 0; - if (!tryReadDateTimeText(time, rb, *time_zone)) - return false; - convertFromTime(x, time); - return true; -} - -template <> -inline bool tryParseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - UUID tmp; - if (!tryReadUUIDText(tmp, rb)) - return false; - - x = tmp.toUnderType(); - return true; -} - -template <> -inline bool tryParseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - IPv4 tmp; - if (!tryReadIPv4Text(tmp, rb)) - return false; - - x = tmp.toUnderType(); - return true; -} - -template <> -inline bool tryParseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - IPv6 tmp; - if (!tryReadIPv6Text(tmp, rb)) - return false; - - x = tmp; - return true; -} - - -/** Throw exception with verbose message when string value is not parsed completely. - */ -[[noreturn]] inline void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, const IDataType & result_type) -{ - WriteBufferFromOwnString message_buf; - message_buf << "Cannot parse string " << quote << String(read_buffer.buffer().begin(), read_buffer.buffer().size()) - << " as " << result_type.getName() - << ": syntax error"; - - if (read_buffer.offset()) - message_buf << " at position " << read_buffer.offset() - << " (parsed just " << quote << String(read_buffer.buffer().begin(), read_buffer.offset()) << ")"; - else - message_buf << " at begin of string"; - - // Currently there are no functions toIPv{4,6}Or{Null,Zero} - if (isNativeNumber(result_type) && !(result_type.getName() == "IPv4" || result_type.getName() == "IPv6")) - message_buf << ". Note: there are to" << result_type.getName() << "OrZero and to" << result_type.getName() << "OrNull functions, which returns zero/NULL instead of throwing exception."; - - throw Exception(PreformattedMessage{message_buf.str(), "Cannot parse string {} as {}: syntax error {}"}, ErrorCodes::CANNOT_PARSE_TEXT); -} - - -enum class ConvertFromStringExceptionMode -{ - Throw, /// Throw exception if value cannot be parsed. - Zero, /// Fill with zero or default if value cannot be parsed. - Null /// Return ColumnNullable with NULLs when value cannot be parsed. -}; - -enum class ConvertFromStringParsingMode -{ - Normal, - BestEffort, /// Only applicable for DateTime. Will use sophisticated method, that is slower. - BestEffortUS -}; - -template -struct ConvertThroughParsing -{ - static_assert(std::is_same_v || std::is_same_v, - "ConvertThroughParsing is only applicable for String or FixedString data types"); - - static constexpr bool to_datetime64 = std::is_same_v; - - static bool isAllRead(ReadBuffer & in) - { - /// In case of FixedString, skip zero bytes at end. - if constexpr (std::is_same_v) - while (!in.eof() && *in.position() == 0) - ++in.position(); - - if (in.eof()) - return true; - - /// Special case, that allows to parse string with DateTime or DateTime64 as Date or Date32. - if constexpr (std::is_same_v || std::is_same_v) - { - if (!in.eof() && (*in.position() == ' ' || *in.position() == 'T')) - { - if (in.buffer().size() == strlen("YYYY-MM-DD hh:mm:ss")) - return true; - - if (in.buffer().size() >= strlen("YYYY-MM-DD hh:mm:ss.x") - && in.buffer().begin()[19] == '.') - { - in.position() = in.buffer().begin() + 20; - - while (!in.eof() && isNumericASCII(*in.position())) - ++in.position(); - - if (in.eof()) - return true; - } - } - } - - return false; - } - - template - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, size_t input_rows_count, - Additions additions [[maybe_unused]] = Additions()) - { - using ColVecTo = typename ToDataType::ColumnType; - - const DateLUTImpl * local_time_zone [[maybe_unused]] = nullptr; - const DateLUTImpl * utc_time_zone [[maybe_unused]] = nullptr; - - /// For conversion to Date or DateTime type, second argument with time zone could be specified. - if constexpr (std::is_same_v || to_datetime64) - { - const auto result_type = removeNullable(res_type); - // Time zone is already figured out during result type resolution, no need to do it here. - if (const auto dt_col = checkAndGetDataType(result_type.get())) - local_time_zone = &dt_col->getTimeZone(); - else - local_time_zone = &extractTimeZoneFromFunctionArguments(arguments, 1, 0); - - if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort || parsing_mode == ConvertFromStringParsingMode::BestEffortUS) - utc_time_zone = &DateLUT::instance("UTC"); - } - else if constexpr (std::is_same_v || std::is_same_v) - { - // Timezone is more or less dummy when parsing Date/Date32 from string. - local_time_zone = &DateLUT::instance(); - utc_time_zone = &DateLUT::instance("UTC"); - } - - const IColumn * col_from = arguments[0].column.get(); - const ColumnString * col_from_string = checkAndGetColumn(col_from); - const ColumnFixedString * col_from_fixed_string = checkAndGetColumn(col_from); - - if (std::is_same_v && !col_from_string) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - col_from->getName(), Name::name); - - if (std::is_same_v && !col_from_fixed_string) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - col_from->getName(), Name::name); - - size_t size = input_rows_count; - typename ColVecTo::MutablePtr col_to = nullptr; - - if constexpr (IsDataTypeDecimal) - { - UInt32 scale = additions; - if constexpr (to_datetime64) - { - ToDataType check_bounds_in_ctor(scale, local_time_zone ? local_time_zone->getTimeZone() : String{}); - } - else - { - ToDataType check_bounds_in_ctor(ToDataType::maxPrecision(), scale); - } - col_to = ColVecTo::create(size, scale); - } - else - col_to = ColVecTo::create(size); - - typename ColVecTo::Container & vec_to = col_to->getData(); - - ColumnUInt8::MutablePtr col_null_map_to; - ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; - if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) - { - col_null_map_to = ColumnUInt8::create(size); - vec_null_map_to = &col_null_map_to->getData(); - } - - const ColumnString::Chars * chars = nullptr; - const IColumn::Offsets * offsets = nullptr; - size_t fixed_string_size = 0; - - if constexpr (std::is_same_v) - { - chars = &col_from_string->getChars(); - offsets = &col_from_string->getOffsets(); - } - else - { - chars = &col_from_fixed_string->getChars(); - fixed_string_size = col_from_fixed_string->getN(); - } - - size_t current_offset = 0; - - bool precise_float_parsing = false; - - if (DB::CurrentThread::isInitialized()) - { - const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext(); - - if (query_context) - precise_float_parsing = query_context->getSettingsRef().precise_float_parsing; - } - - for (size_t i = 0; i < size; ++i) - { - size_t next_offset = std::is_same_v ? (*offsets)[i] : (current_offset + fixed_string_size); - size_t string_size = std::is_same_v ? next_offset - current_offset - 1 : fixed_string_size; - - ReadBufferFromMemory read_buffer(&(*chars)[current_offset], string_size); - - if constexpr (exception_mode == ConvertFromStringExceptionMode::Throw) - { - if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort) - { - if constexpr (to_datetime64) - { - DateTime64 res = 0; - parseDateTime64BestEffort(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; - } - else - { - time_t res; - parseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); - convertFromTime(vec_to[i], res); - } - } - else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) - { - if constexpr (to_datetime64) - { - DateTime64 res = 0; - parseDateTime64BestEffortUS(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; - } - else - { - time_t res; - parseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); - convertFromTime(vec_to[i], res); - } - } - else - { - if constexpr (to_datetime64) - { - DateTime64 value = 0; - readDateTime64Text(value, col_to->getScale(), read_buffer, *local_time_zone); - vec_to[i] = value; - } - else if constexpr (IsDataTypeDecimal) - { - SerializationDecimal::readText( - vec_to[i], read_buffer, ToDataType::maxPrecision(), col_to->getScale()); - } - else - { - /// we want to utilize constexpr condition here, which is not mixable with value comparison - do - { - if constexpr (std::is_same_v && std::is_same_v) - { - if (fixed_string_size == IPV6_BINARY_LENGTH) - { - readBinary(vec_to[i], read_buffer); - break; - } - } - parseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); - } while (false); - } - } - - if (!isAllRead(read_buffer)) - throwExceptionForIncompletelyParsedValue(read_buffer, *res_type); - } - else - { - bool parsed; - - if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort) - { - if constexpr (to_datetime64) - { - DateTime64 res = 0; - parsed = tryParseDateTime64BestEffort(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; - } - else - { - time_t res; - parsed = tryParseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); - convertFromTime(vec_to[i],res); - } - } - else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) - { - if constexpr (to_datetime64) - { - DateTime64 res = 0; - parsed = tryParseDateTime64BestEffortUS(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; - } - else - { - time_t res; - parsed = tryParseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); - convertFromTime(vec_to[i],res); - } - } - else - { - if constexpr (to_datetime64) - { - DateTime64 value = 0; - parsed = tryReadDateTime64Text(value, col_to->getScale(), read_buffer, *local_time_zone); - vec_to[i] = value; - } - else if constexpr (IsDataTypeDecimal) - { - parsed = SerializationDecimal::tryReadText( - vec_to[i], read_buffer, ToDataType::maxPrecision(), col_to->getScale()); - } - else - { - /// we want to utilize constexpr condition here, which is not mixable with value comparison - do - { - if constexpr (std::is_same_v && std::is_same_v) - { - if (fixed_string_size == IPV6_BINARY_LENGTH) - { - readBinary(vec_to[i], read_buffer); - parsed = true; - break; - } - } - - parsed = tryParseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); - } while (false); - } - } - - if (!isAllRead(read_buffer)) - parsed = false; - - if (!parsed) - { - if constexpr (std::is_same_v) - { - vec_to[i] = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); - } - else - { - vec_to[i] = static_cast(0); - } - } - - if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) - (*vec_null_map_to)[i] = !parsed; - } - - current_offset = next_offset; - } - - if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) - return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); - else - return col_to; - } -}; - - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (is_any_of && is_any_of) -struct ConvertImpl - : ConvertThroughParsing {}; - -/// Generic conversion of any type from String. Used for complex types: Array and Tuple or types with custom serialization. -struct ConvertImplGenericFromString -{ - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) - { - const IColumn & column_from = *arguments[0].column; - const IDataType & data_type_to = *result_type; - auto res = data_type_to.createColumn(); - auto serialization = data_type_to.getDefaultSerialization(); - const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - - executeImpl(column_from, *res, *serialization, input_rows_count, null_map, result_type.get()); - return res; - } - - static void executeImpl( - const IColumn & column_from, - IColumn & column_to, - const ISerialization & serialization_from, - size_t input_rows_count, - const PaddedPODArray * null_map, - const IDataType * result_type) - { - column_to.reserve(input_rows_count); - - FormatSettings format_settings; - for (size_t i = 0; i < input_rows_count; ++i) - { - if (null_map && (*null_map)[i]) - { - column_to.insertDefault(); - continue; - } - - const auto & val = column_from.getDataAt(i); - ReadBufferFromMemory read_buffer(val.data, val.size); - try - { - serialization_from.deserializeWholeText(column_to, read_buffer, format_settings); - } - catch (const Exception & e) - { - auto * nullable_column = typeid_cast(&column_to); - if (e.code() == ErrorCodes::CANNOT_PARSE_BOOL && nullable_column) - { - auto & col_nullmap = nullable_column->getNullMapData(); - if (col_nullmap.size() != nullable_column->size()) - col_nullmap.resize_fill(nullable_column->size()); - if (nullable_column->size() == (i + 1)) - nullable_column->popBack(1); - nullable_column->insertDefault(); - continue; - } - throw; - } - - if (!read_buffer.eof()) - { - if (result_type) - throwExceptionForIncompletelyParsedValue(read_buffer, *result_type); - else - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, - "Cannot parse string to column {}. Expected eof", column_to.getName()); - } - } - } -}; - - -template <> -struct ConvertImpl - : ConvertImpl {}; - -template <> -struct ConvertImpl - : ConvertImpl {}; - -/** If types are identical, just take reference to column. - */ -template -requires (!T::is_parametric) -struct ConvertImpl -{ - template - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, - Additions additions [[maybe_unused]] = Additions()) - { - return arguments[0].column; - } -}; - -template -struct ConvertImpl -{ - template - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, - Additions additions [[maybe_unused]] = Additions()) - { - - return arguments[0].column; - } -}; - - -/** Conversion from FixedString to String. - * Cutting sequences of zero bytes from end of strings. - */ -template -struct ConvertImpl -{ - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type, size_t /*input_rows_count*/) - { - ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); - const auto & nested = columnGetNested(arguments[0]); - if (const ColumnFixedString * col_from = checkAndGetColumn(nested.column.get())) - { - auto col_to = ColumnString::create(); - - const ColumnFixedString::Chars & data_from = col_from->getChars(); - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = col_from->size(); - size_t n = col_from->getN(); - data_to.resize(size * (n + 1)); /// + 1 - zero terminator - offsets_to.resize(size); - - size_t offset_from = 0; - size_t offset_to = 0; - for (size_t i = 0; i < size; ++i) - { - if (!null_map || !null_map->getData()[i]) - { - size_t bytes_to_copy = n; - while (bytes_to_copy > 0 && data_from[offset_from + bytes_to_copy - 1] == 0) - --bytes_to_copy; - - memcpy(&data_to[offset_to], &data_from[offset_from], bytes_to_copy); - offset_to += bytes_to_copy; - } - data_to[offset_to] = 0; - ++offset_to; - offsets_to[i] = offset_to; - offset_from += n; - } - - data_to.resize(offset_to); - if (return_type->isNullable() && null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), Name::name); - } -}; - - -/// Declared early because used below. -struct NameToDate { static constexpr auto name = "toDate"; }; -struct NameToDate32 { static constexpr auto name = "toDate32"; }; -struct NameToDateTime { static constexpr auto name = "toDateTime"; }; -struct NameToDateTime32 { static constexpr auto name = "toDateTime32"; }; -struct NameToDateTime64 { static constexpr auto name = "toDateTime64"; }; -struct NameToString { static constexpr auto name = "toString"; }; -struct NameToDecimal32 { static constexpr auto name = "toDecimal32"; }; -struct NameToDecimal64 { static constexpr auto name = "toDecimal64"; }; -struct NameToDecimal128 { static constexpr auto name = "toDecimal128"; }; -struct NameToDecimal256 { static constexpr auto name = "toDecimal256"; }; - - -#define DEFINE_NAME_TO_INTERVAL(INTERVAL_KIND) \ - struct NameToInterval ## INTERVAL_KIND \ - { \ - static constexpr auto name = "toInterval" #INTERVAL_KIND; \ - static constexpr auto kind = IntervalKind::Kind::INTERVAL_KIND; \ - }; - -DEFINE_NAME_TO_INTERVAL(Nanosecond) -DEFINE_NAME_TO_INTERVAL(Microsecond) -DEFINE_NAME_TO_INTERVAL(Millisecond) -DEFINE_NAME_TO_INTERVAL(Second) -DEFINE_NAME_TO_INTERVAL(Minute) -DEFINE_NAME_TO_INTERVAL(Hour) -DEFINE_NAME_TO_INTERVAL(Day) -DEFINE_NAME_TO_INTERVAL(Week) -DEFINE_NAME_TO_INTERVAL(Month) -DEFINE_NAME_TO_INTERVAL(Quarter) -DEFINE_NAME_TO_INTERVAL(Year) - -#undef DEFINE_NAME_TO_INTERVAL - -struct NameParseDateTimeBestEffort; -struct NameParseDateTimeBestEffortOrZero; -struct NameParseDateTimeBestEffortOrNull; - -template -static inline bool isDateTime64(const ColumnsWithTypeAndName & arguments) -{ - if constexpr (std::is_same_v) - return true; - else if constexpr (std::is_same_v || std::is_same_v - || std::is_same_v || std::is_same_v) - { - return (arguments.size() == 2 && isUInt(arguments[1].type)) || arguments.size() == 3; - } - - return false; -} - -template -class FunctionConvert : public IFunction -{ -public: - using Monotonic = MonotonicityImpl; - - static constexpr auto name = Name::name; - static constexpr bool to_decimal = - std::is_same_v || std::is_same_v - || std::is_same_v || std::is_same_v; - - static constexpr bool to_datetime64 = std::is_same_v; - - static constexpr bool to_string_or_fixed_string = std::is_same_v || - std::is_same_v; - - static constexpr bool to_date_or_datetime = std::is_same_v || - std::is_same_v || - std::is_same_v; - - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - static FunctionPtr create() { return std::make_shared(); } - - FunctionConvert() = default; - explicit FunctionConvert(ContextPtr context_) : context(context_) {} - - String getName() const override - { - return name; - } - - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return std::is_same_v; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & arguments) const override - { - /// TODO: We can make more optimizations here. - return !(to_date_or_datetime && isNumber(*arguments[0].type)); - } - - using DefaultReturnTypeGetter = std::function; - static DataTypePtr getReturnTypeDefaultImplementationForNulls(const ColumnsWithTypeAndName & arguments, const DefaultReturnTypeGetter & getter) - { - NullPresence null_presence = getNullPresense(arguments); - - if (null_presence.has_null_constant) - { - return makeNullable(std::make_shared()); - } - if (null_presence.has_nullable) - { - auto nested_columns = Block(createBlockWithNestedColumns(arguments)); - auto return_type = getter(ColumnsWithTypeAndName(nested_columns.begin(), nested_columns.end())); - return makeNullable(return_type); - } - - return getter(arguments); - } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - auto getter = [&] (const auto & args) { return getReturnTypeImplRemovedNullable(args); }; - auto res = getReturnTypeDefaultImplementationForNulls(arguments, getter); - to_nullable = res->isNullable(); - checked_return_type = true; - return res; - } - - DataTypePtr getReturnTypeImplRemovedNullable(const ColumnsWithTypeAndName & arguments) const - { - FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; - FunctionArgumentDescriptors optional_args; - - if constexpr (to_decimal) - { - mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); - } - - if (!to_decimal && isDateTime64(arguments)) - { - mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); - } - - // toString(DateTime or DateTime64, [timezone: String]) - if ((std::is_same_v && !arguments.empty() && (isDateTime64(arguments[0].type) || isDateTime(arguments[0].type))) - // toUnixTimestamp(value[, timezone : String]) - || std::is_same_v - // toDate(value[, timezone : String]) - || std::is_same_v // TODO: shall we allow timestamp argument for toDate? DateTime knows nothing about timezones and this argument is ignored below. - // toDate32(value[, timezone : String]) - || std::is_same_v - // toDateTime(value[, timezone: String]) - || std::is_same_v - // toDateTime64(value, scale : Integer[, timezone: String]) - || std::is_same_v) - { - optional_args.push_back({"timezone", static_cast(&isString), nullptr, "String"}); - } - - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); - - if constexpr (std::is_same_v) - { - return std::make_shared(Name::kind); - } - else if constexpr (to_decimal) - { - UInt64 scale = extractToDecimalScale(arguments[1]); - - if constexpr (std::is_same_v) - return createDecimalMaxPrecision(scale); - else if constexpr (std::is_same_v) - return createDecimalMaxPrecision(scale); - else if constexpr (std::is_same_v) - return createDecimalMaxPrecision(scale); - else if constexpr (std::is_same_v) - return createDecimalMaxPrecision(scale); - - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected branch in code of conversion function: it is a bug."); - } - else - { - // Optional second argument with time zone for DateTime. - UInt8 timezone_arg_position = 1; - UInt32 scale [[maybe_unused]] = DataTypeDateTime64::default_scale; - - // DateTime64 requires more arguments: scale and timezone. Since timezone is optional, scale should be first. - if (isDateTime64(arguments)) - { - timezone_arg_position += 1; - scale = static_cast(arguments[1].column->get64(0)); - - if (to_datetime64 || scale != 0) /// toDateTime('xxxx-xx-xx xx:xx:xx', 0) return DateTime - return std::make_shared(scale, - extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); - - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); - } - - if constexpr (std::is_same_v) - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); - else if constexpr (std::is_same_v) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected branch in code of conversion function: it is a bug."); - else - return std::make_shared(); - } - } - - /// Function actually uses default implementation for nulls, - /// but we need to know if return type is Nullable or not, - /// so we use checked_return_type only to intercept the first call to getReturnTypeImpl(...). - bool useDefaultImplementationForNulls() const override - { - bool to_nullable_string = to_nullable && std::is_same_v; - return checked_return_type && !to_nullable_string; - } - - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override - { - if constexpr (std::is_same_v) - return {}; - else if constexpr (std::is_same_v) - return {2}; - return {1}; - } - bool canBeExecutedOnDefaultArguments() const override { return false; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - try - { - return executeInternal(arguments, result_type, input_rows_count); - } - catch (Exception & e) - { - /// More convenient error message. - if (e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF) - { - e.addMessage("Cannot parse " - + result_type->getName() + " from " - + arguments[0].type->getName() - + ", because value is too short"); - } - else if (e.code() == ErrorCodes::CANNOT_PARSE_NUMBER - || e.code() == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT - || e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED - || e.code() == ErrorCodes::CANNOT_PARSE_QUOTED_STRING - || e.code() == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE - || e.code() == ErrorCodes::CANNOT_PARSE_DATE - || e.code() == ErrorCodes::CANNOT_PARSE_DATETIME - || e.code() == ErrorCodes::CANNOT_PARSE_UUID - || e.code() == ErrorCodes::CANNOT_PARSE_IPV4 - || e.code() == ErrorCodes::CANNOT_PARSE_IPV6) - { - e.addMessage("Cannot parse " - + result_type->getName() + " from " - + arguments[0].type->getName()); - } - - throw; - } - } - - bool hasInformationAboutMonotonicity() const override - { - return Monotonic::has(); - } - - Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override - { - return Monotonic::get(type, left, right); - } - -private: - ContextPtr context; - mutable bool checked_return_type = false; - mutable bool to_nullable = false; - - ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const - { - if (arguments.empty()) - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least 1 argument", getName()); - - if (result_type->onlyNull()) - return result_type->createColumnConstWithDefaultValue(input_rows_count); - - const DataTypePtr from_type = removeNullable(arguments[0].type); - ColumnPtr result_column; - - [[maybe_unused]] FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; - - if (context) - date_time_overflow_behavior = context->getSettingsRef().date_time_overflow_behavior.value; - - auto call = [&](const auto & types, const auto & tag) -> bool - { - using Types = std::decay_t; - using LeftDataType = typename Types::LeftType; - using RightDataType = typename Types::RightType; - using SpecialTag = std::decay_t; - - if constexpr (IsDataTypeDecimal) - { - if constexpr (std::is_same_v) - { - /// Account for optional timezone argument. - if (arguments.size() != 2 && arguments.size() != 3) - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects 2 or 3 arguments for DataTypeDateTime64.", getName()); - } - else if (arguments.size() != 2) - { - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects 2 arguments for Decimal.", getName()); - } - - const ColumnWithTypeAndName & scale_column = arguments[1]; - UInt32 scale = extractToDecimalScale(scale_column); - - switch (date_time_overflow_behavior) - { - case FormatSettings::DateTimeOverflowBehavior::Throw: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); - break; - case FormatSettings::DateTimeOverflowBehavior::Ignore: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); - break; - case FormatSettings::DateTimeOverflowBehavior::Saturate: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); - break; - } - - } - else if constexpr (IsDataTypeDateOrDateTime && std::is_same_v) - { - const auto * dt64 = assert_cast(arguments[0].type.get()); - switch (date_time_overflow_behavior) - { - case FormatSettings::DateTimeOverflowBehavior::Throw: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); - break; - case FormatSettings::DateTimeOverflowBehavior::Ignore: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); - break; - case FormatSettings::DateTimeOverflowBehavior::Saturate: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); - break; - } - } -#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE) \ - case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ - result_column = ConvertImpl::execute( \ - arguments, result_type, input_rows_count); \ - break; - - else if constexpr (IsDataTypeDecimalOrNumber && IsDataTypeDecimalOrNumber) - { - using LeftT = typename LeftDataType::FieldType; - using RightT = typename RightDataType::FieldType; - - static constexpr bool bad_left = - is_decimal || std::is_floating_point_v || is_big_int_v || is_signed_v; - static constexpr bool bad_right = - is_decimal || std::is_floating_point_v || is_big_int_v || is_signed_v; - - /// Disallow int vs UUID conversion (but support int vs UInt128 conversion) - if constexpr ((bad_left && std::is_same_v) || - (bad_right && std::is_same_v)) - { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Wrong UUID conversion"); - } - else - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw) - GENERATE_OVERFLOW_MODE_CASE(Ignore) - GENERATE_OVERFLOW_MODE_CASE(Saturate) - } - } - } - else if constexpr ((IsDataTypeNumber || IsDataTypeDateOrDateTime) - && IsDataTypeDateOrDateTime) - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw) - GENERATE_OVERFLOW_MODE_CASE(Ignore) - GENERATE_OVERFLOW_MODE_CASE(Saturate) - } - } -#undef GENERATE_OVERFLOW_MODE_CASE - else - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count); - - return true; - }; - - if (isDateTime64(arguments)) - { - /// For toDateTime('xxxx-xx-xx xx:xx:xx.00', 2[, 'timezone']) we need to it convert to DateTime64 - const ColumnWithTypeAndName & scale_column = arguments[1]; - UInt32 scale = extractToDecimalScale(scale_column); - - if (to_datetime64 || scale != 0) /// When scale = 0, the data type is DateTime otherwise the data type is DateTime64 - { - if (!callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{})) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", - arguments[0].type->getName(), getName()); - - return result_column; - } - } - - if constexpr (std::is_same_v) - { - if (from_type->getCustomSerialization()) - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); - } - - bool done = false; - if constexpr (to_string_or_fixed_string) - { - done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); - } - else - { - bool cast_ipv4_ipv6_default_on_conversion_error = false; - if constexpr (is_any_of) - if (context && (cast_ipv4_ipv6_default_on_conversion_error = context->getSettingsRef().cast_ipv4_ipv6_default_on_conversion_error)) - done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertReturnZeroOnErrorTag{}); - - if (!cast_ipv4_ipv6_default_on_conversion_error) - { - /// We should use ConvertFromStringExceptionMode::Null mode when converting from String (or FixedString) - /// to Nullable type, to avoid 'value is too short' error on attempt to parse empty string from NULL values. - if (to_nullable && WhichDataType(from_type).isStringOrFixedString()) - done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertReturnNullOnErrorTag{}); - else - done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); - } - } - - if (!done) - { - /// Generic conversion of any type to String. - if (std::is_same_v) - { - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); - } - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", - arguments[0].type->getName(), getName()); - } - - return result_column; - } -}; - - -/** Function toTOrZero (where T is number of date or datetime type): - * try to convert from String to type T through parsing, - * if cannot parse, return default value instead of throwing exception. - * Function toTOrNull will return Nullable type with NULL when cannot parse. - * NOTE Also need to implement tryToUnixTimestamp with timezone. - */ -template -class FunctionConvertFromString : public IFunction -{ -public: - static constexpr auto name = Name::name; - static constexpr bool to_decimal = - std::is_same_v> || - std::is_same_v> || - std::is_same_v> || - std::is_same_v>; - - static constexpr bool to_datetime64 = std::is_same_v; - - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - static FunctionPtr create() { return std::make_shared(); } - - String getName() const override - { - return name; - } - - bool isVariadic() const override { return true; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - - bool useDefaultImplementationForConstants() const override { return true; } - bool canBeExecutedOnDefaultArguments() const override { return false; } - - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - DataTypePtr res; - - if (isDateTime64(arguments)) - { - validateFunctionArgumentTypes(*this, arguments, - FunctionArgumentDescriptors{{"string", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}}, - // optional - FunctionArgumentDescriptors{ - {"precision", static_cast(&isUInt8), isColumnConst, "const UInt8"}, - {"timezone", static_cast(&isStringOrFixedString), isColumnConst, "const String or FixedString"}, - }); - - UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0; - if (arguments.size() > 1) - scale = extractToDecimalScale(arguments[1]); - const auto timezone = extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false); - - res = scale == 0 ? res = std::make_shared(timezone) : std::make_shared(scale, timezone); - } - else - { - if ((arguments.size() != 1 && arguments.size() != 2) || (to_decimal && arguments.size() != 2)) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2. " - "Second argument only make sense for DateTime (time zone, optional) and Decimal (scale).", - getName(), arguments.size()); - - if (!isStringOrFixedString(arguments[0].type)) - { - if (this->getName().find("OrZero") != std::string::npos || - this->getName().find("OrNull") != std::string::npos) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. " - "Conversion functions with postfix 'OrZero' or 'OrNull' should take String argument", - arguments[0].type->getName(), getName()); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", - arguments[0].type->getName(), getName()); - } - - if (arguments.size() == 2) - { - if constexpr (std::is_same_v) - { - if (!isString(arguments[1].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}", - arguments[1].type->getName(), getName()); - } - else if constexpr (to_decimal) - { - if (!isInteger(arguments[1].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}", - arguments[1].type->getName(), getName()); - if (!arguments[1].column) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant", getName()); - } - else - { - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 1. " - "Second argument makes sense only for DateTime and Decimal.", - getName(), arguments.size()); - } - } - - if constexpr (std::is_same_v) - res = std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0, false)); - else if constexpr (std::is_same_v) - throw Exception(ErrorCodes::LOGICAL_ERROR, "MaterializedMySQL is a bug."); - else if constexpr (to_decimal) - { - UInt64 scale = extractToDecimalScale(arguments[1]); - res = createDecimalMaxPrecision(scale); - if (!res) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Something wrong with toDecimalNNOrZero() or toDecimalNNOrNull()"); - } - else - res = std::make_shared(); - } - - if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) - res = std::make_shared(res); - - return res; - } - - template - ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, UInt32 scale = 0) const - { - const IDataType * from_type = arguments[0].type.get(); - - if (checkAndGetDataType(from_type)) - { - return ConvertThroughParsing::execute( - arguments, result_type, input_rows_count, scale); - } - else if (checkAndGetDataType(from_type)) - { - return ConvertThroughParsing::execute( - arguments, result_type, input_rows_count, scale); - } - - return nullptr; - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - ColumnPtr result_column; - - if constexpr (to_decimal) - result_column = executeInternal(arguments, result_type, input_rows_count, - assert_cast(*removeNullable(result_type)).getScale()); - else - { - if (isDateTime64(arguments)) - { - UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0; - if (arguments.size() > 1) - scale = extractToDecimalScale(arguments[1]); - - if (scale == 0) - result_column = executeInternal(arguments, result_type, input_rows_count); - else - { - result_column = executeInternal(arguments, result_type, input_rows_count, static_cast(scale)); - } - } - else - { - result_column = executeInternal(arguments, result_type, input_rows_count); - } - } - - if (!result_column) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " - "Only String or FixedString argument is accepted for try-conversion function. For other arguments, " - "use function without 'orZero' or 'orNull'.", arguments[0].type->getName(), getName()); - - return result_column; - } -}; - - -/// Monotonicity. - -struct PositiveMonotonicity -{ - static bool has() { return true; } - static IFunction::Monotonicity get(const IDataType &, const Field &, const Field &) - { - return { .is_monotonic = true }; - } -}; - -struct UnknownMonotonicity -{ - static bool has() { return false; } - static IFunction::Monotonicity get(const IDataType &, const Field &, const Field &) - { - return { }; - } -}; - -template -struct ToNumberMonotonicity -{ - static bool has() { return true; } - - static UInt64 divideByRangeOfType(UInt64 x) - { - if constexpr (sizeof(T) < sizeof(UInt64)) - return x >> (sizeof(T) * 8); - else - return 0; - } - - static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) - { - if (!type.isValueRepresentedByNumber()) - return {}; - - /// If type is same, the conversion is always monotonic. - /// (Enum has separate case, because it is different data type) - if (checkAndGetDataType>(&type) || - checkAndGetDataType>(&type)) - return { .is_monotonic = true, .is_always_monotonic = true }; - - /// Float cases. - - /// When converting to Float, the conversion is always monotonic. - if constexpr (std::is_floating_point_v) - return { .is_monotonic = true, .is_always_monotonic = true }; - - const auto * low_cardinality = typeid_cast(&type); - const IDataType * low_cardinality_dictionary_type = nullptr; - if (low_cardinality) - low_cardinality_dictionary_type = low_cardinality->getDictionaryType().get(); - - WhichDataType which_type(type); - WhichDataType which_inner_type = low_cardinality - ? WhichDataType(low_cardinality_dictionary_type) - : WhichDataType(type); - - /// If converting from Float, for monotonicity, arguments must fit in range of result type. - if (which_inner_type.isFloat()) - { - if (left.isNull() || right.isNull()) - return {}; - - Float64 left_float = left.get(); - Float64 right_float = right.get(); - - if (left_float >= static_cast(std::numeric_limits::min()) - && left_float <= static_cast(std::numeric_limits::max()) - && right_float >= static_cast(std::numeric_limits::min()) - && right_float <= static_cast(std::numeric_limits::max())) - return { .is_monotonic = true }; - - return {}; - } - - /// Integer cases. - - /// Only support types represented by native integers. - /// It can be extended to big integers, decimals and DateTime64 later. - /// By the way, NULLs are representing unbounded ranges. - if (!((left.isNull() || left.getType() == Field::Types::UInt64 || left.getType() == Field::Types::Int64) - && (right.isNull() || right.getType() == Field::Types::UInt64 || right.getType() == Field::Types::Int64))) - return {}; - - const bool from_is_unsigned = type.isValueRepresentedByUnsignedInteger(); - const bool to_is_unsigned = is_unsigned_v; - - const size_t size_of_from = type.getSizeOfValueInMemory(); - const size_t size_of_to = sizeof(T); - - const bool left_in_first_half = left.isNull() - ? from_is_unsigned - : (left.get() >= 0); - - const bool right_in_first_half = right.isNull() - ? !from_is_unsigned - : (right.get() >= 0); - - /// Size of type is the same. - if (size_of_from == size_of_to) - { - if (from_is_unsigned == to_is_unsigned) - return { .is_monotonic = true, .is_always_monotonic = true }; - - if (left_in_first_half == right_in_first_half) - return { .is_monotonic = true }; - - return {}; - } - - /// Size of type is expanded. - if (size_of_from < size_of_to) - { - if (from_is_unsigned == to_is_unsigned) - return { .is_monotonic = true, .is_always_monotonic = true }; - - if (!to_is_unsigned) - return { .is_monotonic = true, .is_always_monotonic = true }; - - /// signed -> unsigned. If arguments from the same half, then function is monotonic. - if (left_in_first_half == right_in_first_half) - return { .is_monotonic = true }; - - return {}; - } - - /// Size of type is shrunk. - if (size_of_from > size_of_to) - { - /// Function cannot be monotonic on unbounded ranges. - if (left.isNull() || right.isNull()) - return {}; - - /// Function cannot be monotonic when left and right are not on the same ranges. - if (divideByRangeOfType(left.get()) != divideByRangeOfType(right.get())) - return {}; - - if (to_is_unsigned) - return { .is_monotonic = true }; - else - { - // If To is signed, it's possible that the signedness is different after conversion. So we check it explicitly. - const bool is_monotonic = (T(left.get()) >= 0) == (T(right.get()) >= 0); - - return { .is_monotonic = is_monotonic }; - } - } - - UNREACHABLE(); - } -}; - -struct ToDateMonotonicity -{ - static bool has() { return true; } - - static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) - { - auto which = WhichDataType(type); - if (which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() || which.isInt8() || which.isInt16() || which.isUInt8() - || which.isUInt16()) - { - return {.is_monotonic = true, .is_always_monotonic = true}; - } - else if ( - ((left.getType() == Field::Types::UInt64 || left.isNull()) && (right.getType() == Field::Types::UInt64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) - || ((left.getType() == Field::Types::Int64 || left.isNull()) && (right.getType() == Field::Types::Int64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) - || (( - (left.getType() == Field::Types::Float64 || left.isNull()) - && (right.getType() == Field::Types::Float64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF)))) - || !isNativeNumber(type)) - { - return {}; - } - else - { - return {.is_monotonic = true, .is_always_monotonic = true}; - } - } -}; - -struct ToDateTimeMonotonicity -{ - static bool has() { return true; } - - static IFunction::Monotonicity get(const IDataType & type, const Field &, const Field &) - { - if (type.isValueRepresentedByNumber()) - return {.is_monotonic = true, .is_always_monotonic = true}; - else - return {}; - } -}; - -/** The monotonicity for the `toString` function is mainly determined for test purposes. - * It is doubtful that anyone is looking to optimize queries with conditions `toString(CounterID) = 34`. - */ -struct ToStringMonotonicity -{ - static bool has() { return true; } - - static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) - { - IFunction::Monotonicity positive{ .is_monotonic = true }; - IFunction::Monotonicity not_monotonic; - - const auto * type_ptr = &type; - if (const auto * low_cardinality_type = checkAndGetDataType(type_ptr)) - type_ptr = low_cardinality_type->getDictionaryType().get(); - - /// Order on enum values (which is the order on integers) is completely arbitrary in respect to the order on strings. - if (WhichDataType(type).isEnum()) - return not_monotonic; - - /// `toString` function is monotonous if the argument is Date or Date32 or DateTime or String, or non-negative numbers with the same number of symbols. - if (checkDataTypes(type_ptr)) - return positive; - - if (left.isNull() || right.isNull()) - return {}; - - if (left.getType() == Field::Types::UInt64 - && right.getType() == Field::Types::UInt64) - { - return (left.get() == 0 && right.get() == 0) - || (floor(log10(left.get())) == floor(log10(right.get()))) - ? positive : not_monotonic; - } - - if (left.getType() == Field::Types::Int64 - && right.getType() == Field::Types::Int64) - { - return (left.get() == 0 && right.get() == 0) - || (left.get() > 0 && right.get() > 0 && floor(log10(left.get())) == floor(log10(right.get()))) - ? positive : not_monotonic; - } - - return not_monotonic; - } -}; - - -struct NameToUInt8 { static constexpr auto name = "toUInt8"; }; -struct NameToUInt16 { static constexpr auto name = "toUInt16"; }; -struct NameToUInt32 { static constexpr auto name = "toUInt32"; }; -struct NameToUInt64 { static constexpr auto name = "toUInt64"; }; -struct NameToUInt128 { static constexpr auto name = "toUInt128"; }; -struct NameToUInt256 { static constexpr auto name = "toUInt256"; }; -struct NameToInt8 { static constexpr auto name = "toInt8"; }; -struct NameToInt16 { static constexpr auto name = "toInt16"; }; -struct NameToInt32 { static constexpr auto name = "toInt32"; }; -struct NameToInt64 { static constexpr auto name = "toInt64"; }; -struct NameToInt128 { static constexpr auto name = "toInt128"; }; -struct NameToInt256 { static constexpr auto name = "toInt256"; }; -struct NameToFloat32 { static constexpr auto name = "toFloat32"; }; -struct NameToFloat64 { static constexpr auto name = "toFloat64"; }; -struct NameToUUID { static constexpr auto name = "toUUID"; }; -struct NameToIPv4 { static constexpr auto name = "toIPv4"; }; -struct NameToIPv6 { static constexpr auto name = "toIPv6"; }; - -using FunctionToUInt8 = FunctionConvert>; -using FunctionToUInt16 = FunctionConvert>; -using FunctionToUInt32 = FunctionConvert>; -using FunctionToUInt64 = FunctionConvert>; -using FunctionToUInt128 = FunctionConvert>; -using FunctionToUInt256 = FunctionConvert>; -using FunctionToInt8 = FunctionConvert>; -using FunctionToInt16 = FunctionConvert>; -using FunctionToInt32 = FunctionConvert>; -using FunctionToInt64 = FunctionConvert>; -using FunctionToInt128 = FunctionConvert>; -using FunctionToInt256 = FunctionConvert>; -using FunctionToFloat32 = FunctionConvert>; -using FunctionToFloat64 = FunctionConvert>; - -using FunctionToDate = FunctionConvert; - -using FunctionToDate32 = FunctionConvert; - -using FunctionToDateTime = FunctionConvert; - -using FunctionToDateTime32 = FunctionConvert; - -using FunctionToDateTime64 = FunctionConvert; - -using FunctionToUUID = FunctionConvert>; -using FunctionToIPv4 = FunctionConvert>; -using FunctionToIPv6 = FunctionConvert>; -using FunctionToString = FunctionConvert; -using FunctionToUnixTimestamp = FunctionConvert>; -using FunctionToDecimal32 = FunctionConvert, NameToDecimal32, UnknownMonotonicity>; -using FunctionToDecimal64 = FunctionConvert, NameToDecimal64, UnknownMonotonicity>; -using FunctionToDecimal128 = FunctionConvert, NameToDecimal128, UnknownMonotonicity>; -using FunctionToDecimal256 = FunctionConvert, NameToDecimal256, UnknownMonotonicity>; - -template struct FunctionTo; - -template <> struct FunctionTo { using Type = FunctionToUInt8; }; -template <> struct FunctionTo { using Type = FunctionToUInt16; }; -template <> struct FunctionTo { using Type = FunctionToUInt32; }; -template <> struct FunctionTo { using Type = FunctionToUInt64; }; -template <> struct FunctionTo { using Type = FunctionToUInt128; }; -template <> struct FunctionTo { using Type = FunctionToUInt256; }; -template <> struct FunctionTo { using Type = FunctionToInt8; }; -template <> struct FunctionTo { using Type = FunctionToInt16; }; -template <> struct FunctionTo { using Type = FunctionToInt32; }; -template <> struct FunctionTo { using Type = FunctionToInt64; }; -template <> struct FunctionTo { using Type = FunctionToInt128; }; -template <> struct FunctionTo { using Type = FunctionToInt256; }; -template <> struct FunctionTo { using Type = FunctionToFloat32; }; -template <> struct FunctionTo { using Type = FunctionToFloat64; }; - -template -struct FunctionTo { using Type = FunctionToDate; }; - -template -struct FunctionTo { using Type = FunctionToDate32; }; - -template -struct FunctionTo { using Type = FunctionToDateTime; }; - -template -struct FunctionTo { using Type = FunctionToDateTime64; }; - -template <> struct FunctionTo { using Type = FunctionToUUID; }; -template <> struct FunctionTo { using Type = FunctionToIPv4; }; -template <> struct FunctionTo { using Type = FunctionToIPv6; }; -template <> struct FunctionTo { using Type = FunctionToString; }; -template <> struct FunctionTo { using Type = FunctionToFixedString; }; -template <> struct FunctionTo> { using Type = FunctionToDecimal32; }; -template <> struct FunctionTo> { using Type = FunctionToDecimal64; }; -template <> struct FunctionTo> { using Type = FunctionToDecimal128; }; -template <> struct FunctionTo> { using Type = FunctionToDecimal256; }; - -template struct FunctionTo> - : FunctionTo> -{ -}; - -struct NameToUInt8OrZero { static constexpr auto name = "toUInt8OrZero"; }; -struct NameToUInt16OrZero { static constexpr auto name = "toUInt16OrZero"; }; -struct NameToUInt32OrZero { static constexpr auto name = "toUInt32OrZero"; }; -struct NameToUInt64OrZero { static constexpr auto name = "toUInt64OrZero"; }; -struct NameToUInt128OrZero { static constexpr auto name = "toUInt128OrZero"; }; -struct NameToUInt256OrZero { static constexpr auto name = "toUInt256OrZero"; }; -struct NameToInt8OrZero { static constexpr auto name = "toInt8OrZero"; }; -struct NameToInt16OrZero { static constexpr auto name = "toInt16OrZero"; }; -struct NameToInt32OrZero { static constexpr auto name = "toInt32OrZero"; }; -struct NameToInt64OrZero { static constexpr auto name = "toInt64OrZero"; }; -struct NameToInt128OrZero { static constexpr auto name = "toInt128OrZero"; }; -struct NameToInt256OrZero { static constexpr auto name = "toInt256OrZero"; }; -struct NameToFloat32OrZero { static constexpr auto name = "toFloat32OrZero"; }; -struct NameToFloat64OrZero { static constexpr auto name = "toFloat64OrZero"; }; -struct NameToDateOrZero { static constexpr auto name = "toDateOrZero"; }; -struct NameToDate32OrZero { static constexpr auto name = "toDate32OrZero"; }; -struct NameToDateTimeOrZero { static constexpr auto name = "toDateTimeOrZero"; }; -struct NameToDateTime64OrZero { static constexpr auto name = "toDateTime64OrZero"; }; -struct NameToDecimal32OrZero { static constexpr auto name = "toDecimal32OrZero"; }; -struct NameToDecimal64OrZero { static constexpr auto name = "toDecimal64OrZero"; }; -struct NameToDecimal128OrZero { static constexpr auto name = "toDecimal128OrZero"; }; -struct NameToDecimal256OrZero { static constexpr auto name = "toDecimal256OrZero"; }; -struct NameToUUIDOrZero { static constexpr auto name = "toUUIDOrZero"; }; -struct NameToIPv4OrZero { static constexpr auto name = "toIPv4OrZero"; }; -struct NameToIPv6OrZero { static constexpr auto name = "toIPv6OrZero"; }; - -using FunctionToUInt8OrZero = FunctionConvertFromString; -using FunctionToUInt16OrZero = FunctionConvertFromString; -using FunctionToUInt32OrZero = FunctionConvertFromString; -using FunctionToUInt64OrZero = FunctionConvertFromString; -using FunctionToUInt128OrZero = FunctionConvertFromString; -using FunctionToUInt256OrZero = FunctionConvertFromString; -using FunctionToInt8OrZero = FunctionConvertFromString; -using FunctionToInt16OrZero = FunctionConvertFromString; -using FunctionToInt32OrZero = FunctionConvertFromString; -using FunctionToInt64OrZero = FunctionConvertFromString; -using FunctionToInt128OrZero = FunctionConvertFromString; -using FunctionToInt256OrZero = FunctionConvertFromString; -using FunctionToFloat32OrZero = FunctionConvertFromString; -using FunctionToFloat64OrZero = FunctionConvertFromString; -using FunctionToDateOrZero = FunctionConvertFromString; -using FunctionToDate32OrZero = FunctionConvertFromString; -using FunctionToDateTimeOrZero = FunctionConvertFromString; -using FunctionToDateTime64OrZero = FunctionConvertFromString; -using FunctionToDecimal32OrZero = FunctionConvertFromString, NameToDecimal32OrZero, ConvertFromStringExceptionMode::Zero>; -using FunctionToDecimal64OrZero = FunctionConvertFromString, NameToDecimal64OrZero, ConvertFromStringExceptionMode::Zero>; -using FunctionToDecimal128OrZero = FunctionConvertFromString, NameToDecimal128OrZero, ConvertFromStringExceptionMode::Zero>; -using FunctionToDecimal256OrZero = FunctionConvertFromString, NameToDecimal256OrZero, ConvertFromStringExceptionMode::Zero>; -using FunctionToUUIDOrZero = FunctionConvertFromString; -using FunctionToIPv4OrZero = FunctionConvertFromString; -using FunctionToIPv6OrZero = FunctionConvertFromString; - -struct NameToUInt8OrNull { static constexpr auto name = "toUInt8OrNull"; }; -struct NameToUInt16OrNull { static constexpr auto name = "toUInt16OrNull"; }; -struct NameToUInt32OrNull { static constexpr auto name = "toUInt32OrNull"; }; -struct NameToUInt64OrNull { static constexpr auto name = "toUInt64OrNull"; }; -struct NameToUInt128OrNull { static constexpr auto name = "toUInt128OrNull"; }; -struct NameToUInt256OrNull { static constexpr auto name = "toUInt256OrNull"; }; -struct NameToInt8OrNull { static constexpr auto name = "toInt8OrNull"; }; -struct NameToInt16OrNull { static constexpr auto name = "toInt16OrNull"; }; -struct NameToInt32OrNull { static constexpr auto name = "toInt32OrNull"; }; -struct NameToInt64OrNull { static constexpr auto name = "toInt64OrNull"; }; -struct NameToInt128OrNull { static constexpr auto name = "toInt128OrNull"; }; -struct NameToInt256OrNull { static constexpr auto name = "toInt256OrNull"; }; -struct NameToFloat32OrNull { static constexpr auto name = "toFloat32OrNull"; }; -struct NameToFloat64OrNull { static constexpr auto name = "toFloat64OrNull"; }; -struct NameToDateOrNull { static constexpr auto name = "toDateOrNull"; }; -struct NameToDate32OrNull { static constexpr auto name = "toDate32OrNull"; }; -struct NameToDateTimeOrNull { static constexpr auto name = "toDateTimeOrNull"; }; -struct NameToDateTime64OrNull { static constexpr auto name = "toDateTime64OrNull"; }; -struct NameToDecimal32OrNull { static constexpr auto name = "toDecimal32OrNull"; }; -struct NameToDecimal64OrNull { static constexpr auto name = "toDecimal64OrNull"; }; -struct NameToDecimal128OrNull { static constexpr auto name = "toDecimal128OrNull"; }; -struct NameToDecimal256OrNull { static constexpr auto name = "toDecimal256OrNull"; }; -struct NameToUUIDOrNull { static constexpr auto name = "toUUIDOrNull"; }; -struct NameToIPv4OrNull { static constexpr auto name = "toIPv4OrNull"; }; -struct NameToIPv6OrNull { static constexpr auto name = "toIPv6OrNull"; }; - -using FunctionToUInt8OrNull = FunctionConvertFromString; -using FunctionToUInt16OrNull = FunctionConvertFromString; -using FunctionToUInt32OrNull = FunctionConvertFromString; -using FunctionToUInt64OrNull = FunctionConvertFromString; -using FunctionToUInt128OrNull = FunctionConvertFromString; -using FunctionToUInt256OrNull = FunctionConvertFromString; -using FunctionToInt8OrNull = FunctionConvertFromString; -using FunctionToInt16OrNull = FunctionConvertFromString; -using FunctionToInt32OrNull = FunctionConvertFromString; -using FunctionToInt64OrNull = FunctionConvertFromString; -using FunctionToInt128OrNull = FunctionConvertFromString; -using FunctionToInt256OrNull = FunctionConvertFromString; -using FunctionToFloat32OrNull = FunctionConvertFromString; -using FunctionToFloat64OrNull = FunctionConvertFromString; -using FunctionToDateOrNull = FunctionConvertFromString; -using FunctionToDate32OrNull = FunctionConvertFromString; -using FunctionToDateTimeOrNull = FunctionConvertFromString; -using FunctionToDateTime64OrNull = FunctionConvertFromString; -using FunctionToDecimal32OrNull = FunctionConvertFromString, NameToDecimal32OrNull, ConvertFromStringExceptionMode::Null>; -using FunctionToDecimal64OrNull = FunctionConvertFromString, NameToDecimal64OrNull, ConvertFromStringExceptionMode::Null>; -using FunctionToDecimal128OrNull = FunctionConvertFromString, NameToDecimal128OrNull, ConvertFromStringExceptionMode::Null>; -using FunctionToDecimal256OrNull = FunctionConvertFromString, NameToDecimal256OrNull, ConvertFromStringExceptionMode::Null>; -using FunctionToUUIDOrNull = FunctionConvertFromString; -using FunctionToIPv4OrNull = FunctionConvertFromString; -using FunctionToIPv6OrNull = FunctionConvertFromString; - -struct NameParseDateTimeBestEffort { static constexpr auto name = "parseDateTimeBestEffort"; }; -struct NameParseDateTimeBestEffortOrZero { static constexpr auto name = "parseDateTimeBestEffortOrZero"; }; -struct NameParseDateTimeBestEffortOrNull { static constexpr auto name = "parseDateTimeBestEffortOrNull"; }; -struct NameParseDateTimeBestEffortUS { static constexpr auto name = "parseDateTimeBestEffortUS"; }; -struct NameParseDateTimeBestEffortUSOrZero { static constexpr auto name = "parseDateTimeBestEffortUSOrZero"; }; -struct NameParseDateTimeBestEffortUSOrNull { static constexpr auto name = "parseDateTimeBestEffortUSOrNull"; }; -struct NameParseDateTime32BestEffort { static constexpr auto name = "parseDateTime32BestEffort"; }; -struct NameParseDateTime32BestEffortOrZero { static constexpr auto name = "parseDateTime32BestEffortOrZero"; }; -struct NameParseDateTime32BestEffortOrNull { static constexpr auto name = "parseDateTime32BestEffortOrNull"; }; -struct NameParseDateTime64BestEffort { static constexpr auto name = "parseDateTime64BestEffort"; }; -struct NameParseDateTime64BestEffortOrZero { static constexpr auto name = "parseDateTime64BestEffortOrZero"; }; -struct NameParseDateTime64BestEffortOrNull { static constexpr auto name = "parseDateTime64BestEffortOrNull"; }; -struct NameParseDateTime64BestEffortUS { static constexpr auto name = "parseDateTime64BestEffortUS"; }; -struct NameParseDateTime64BestEffortUSOrZero { static constexpr auto name = "parseDateTime64BestEffortUSOrZero"; }; -struct NameParseDateTime64BestEffortUSOrNull { static constexpr auto name = "parseDateTime64BestEffortUSOrNull"; }; - - -using FunctionParseDateTimeBestEffort = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTimeBestEffortOrZero = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTimeBestEffortOrNull = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; - -using FunctionParseDateTimeBestEffortUS = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortUS, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffortUS>; -using FunctionParseDateTimeBestEffortUSOrZero = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortUSOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffortUS>; -using FunctionParseDateTimeBestEffortUSOrNull = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortUSOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffortUS>; - -using FunctionParseDateTime32BestEffort = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTime32BestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTime32BestEffortOrZero = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTime32BestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTime32BestEffortOrNull = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTime32BestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; - -using FunctionParseDateTime64BestEffort = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTime64BestEffortOrZero = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTime64BestEffortOrNull = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; - -using FunctionParseDateTime64BestEffortUS = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortUS, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffortUS>; -using FunctionParseDateTime64BestEffortUSOrZero = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortUSOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffortUS>; -using FunctionParseDateTime64BestEffortUSOrNull = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortUSOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffortUS>; - - -class ExecutableFunctionCast : public IExecutableFunction -{ -public: - using WrapperType = std::function; - - explicit ExecutableFunctionCast( - WrapperType && wrapper_function_, const char * name_, std::optional diagnostic_) - : wrapper_function(std::move(wrapper_function_)), name(name_), diagnostic(std::move(diagnostic_)) {} - - String getName() const override { return name; } - -protected: - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - /// drop second argument, pass others - ColumnsWithTypeAndName new_arguments{arguments.front()}; - if (arguments.size() > 2) - new_arguments.insert(std::end(new_arguments), std::next(std::begin(arguments), 2), std::end(arguments)); - - try - { - return wrapper_function(new_arguments, result_type, nullptr, input_rows_count); - } - catch (Exception & e) - { - if (diagnostic) - e.addMessage("while converting source column " + backQuoteIfNeed(diagnostic->column_from) + - " to destination column " + backQuoteIfNeed(diagnostic->column_to)); - throw; - } - } - - bool useDefaultImplementationForNulls() const override { return false; } - /// CAST(Nothing, T) -> T - bool useDefaultImplementationForNothing() const override { return false; } - bool useDefaultImplementationForConstants() const override { return true; } - bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - -private: - WrapperType wrapper_function; - const char * name; - std::optional diagnostic; -}; - - -struct FunctionCastName -{ - static constexpr auto name = "CAST"; -}; - -class FunctionCastBase : public IFunctionBase -{ -public: - using MonotonicityForRange = std::function; -}; - -class FunctionCast final : public FunctionCastBase -{ -public: - using WrapperType = std::function; - - FunctionCast(ContextPtr context_ - , const char * cast_name_ - , MonotonicityForRange && monotonicity_for_range_ - , const DataTypes & argument_types_ - , const DataTypePtr & return_type_ - , std::optional diagnostic_ - , CastType cast_type_) - : cast_name(cast_name_), monotonicity_for_range(std::move(monotonicity_for_range_)) - , argument_types(argument_types_), return_type(return_type_), diagnostic(std::move(diagnostic_)) - , cast_type(cast_type_) - , context(context_) - { - } - - const DataTypes & getArgumentTypes() const override { return argument_types; } - const DataTypePtr & getResultType() const override { return return_type; } - - ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName & /*sample_columns*/) const override - { - try - { - return std::make_unique( - prepareUnpackDictionaries(getArgumentTypes()[0], getResultType()), cast_name, diagnostic); - } - catch (Exception & e) - { - if (diagnostic) - e.addMessage("while converting source column " + backQuoteIfNeed(diagnostic->column_from) + - " to destination column " + backQuoteIfNeed(diagnostic->column_to)); - throw; - } - } - - String getName() const override { return cast_name; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - bool hasInformationAboutMonotonicity() const override - { - return static_cast(monotonicity_for_range); - } - - Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override - { - return monotonicity_for_range(type, left, right); - } - -private: - - const char * cast_name; - MonotonicityForRange monotonicity_for_range; - - DataTypes argument_types; - DataTypePtr return_type; - - std::optional diagnostic; - CastType cast_type; - ContextPtr context; - - static WrapperType createFunctionAdaptor(FunctionPtr function, const DataTypePtr & from_type) - { - auto function_adaptor = std::make_unique(function)->build({ColumnWithTypeAndName{nullptr, from_type, ""}}); - - return [function_adaptor] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) - { - return function_adaptor->execute(arguments, result_type, input_rows_count); - }; - } - - static WrapperType createToNullableColumnWrapper() - { - return [] (ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) - { - ColumnPtr res = result_type->createColumn(); - ColumnUInt8::Ptr col_null_map_to = ColumnUInt8::create(input_rows_count, true); - return ColumnNullable::create(res->cloneResized(input_rows_count), std::move(col_null_map_to)); - }; - } - - template - WrapperType createWrapper(const DataTypePtr & from_type, const ToDataType * const to_type, bool requested_result_is_nullable) const - { - TypeIndex from_type_index = from_type->getTypeId(); - WhichDataType which(from_type_index); - bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull) - && (which.isInt() || which.isUInt() || which.isFloat()); - - FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; - if (context) - date_time_overflow_behavior = context->getSettingsRef().date_time_overflow_behavior; - - if (requested_result_is_nullable && checkAndGetDataType(from_type.get())) - { - /// In case when converting to Nullable type, we apply different parsing rule, - /// that will not throw an exception but return NULL in case of malformed input. - FunctionPtr function = FunctionConvertFromString::create(); - return createFunctionAdaptor(function, from_type); - } - else if (!can_apply_accurate_cast) - { - FunctionPtr function = FunctionTo::Type::create(context); - return createFunctionAdaptor(function, from_type); - } - - auto wrapper_cast_type = cast_type; - - return [wrapper_cast_type, from_type_index, to_type, date_time_overflow_behavior] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *column_nullable, size_t input_rows_count) - { - ColumnPtr result_column; - auto res = callOnIndexAndDataType(from_type_index, [&](const auto & types) -> bool { - using Types = std::decay_t; - using LeftDataType = typename Types::LeftType; - using RightDataType = typename Types::RightType; - - if constexpr (IsDataTypeNumber) - { - if constexpr (IsDataTypeNumber) - { -#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE, ADDITIONS) \ - case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ - result_column = ConvertImpl::execute( \ - arguments, result_type, input_rows_count, ADDITIONS()); \ - break; - if (wrapper_cast_type == CastType::accurate) - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw, AccurateConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Ignore, AccurateConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Saturate, AccurateConvertStrategyAdditions) - } - } - else - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw, AccurateOrNullConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Ignore, AccurateOrNullConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Saturate, AccurateOrNullConvertStrategyAdditions) - } - } -#undef GENERATE_OVERFLOW_MODE_CASE - - return true; - } - - if constexpr (std::is_same_v || std::is_same_v) - { -#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE, ADDITIONS) \ - case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ - result_column = ConvertImpl::template execute( \ -arguments, result_type, input_rows_count); \ - break; - if (wrapper_cast_type == CastType::accurate) - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw, DateTimeAccurateConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Ignore, DateTimeAccurateConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Saturate, DateTimeAccurateConvertStrategyAdditions) - } - } - else - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw, DateTimeAccurateOrNullConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Ignore, DateTimeAccurateOrNullConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Saturate, DateTimeAccurateOrNullConvertStrategyAdditions) - } - } -#undef GENERATE_OVERFLOW_MODE_CASE - return true; - } - } - - return false; - }); - - /// Additionally check if callOnIndexAndDataType wasn't called at all. - if (!res) - { - if (wrapper_cast_type == CastType::accurateOrNull) - { - auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper(); - return nullable_column_wrapper(arguments, result_type, column_nullable, input_rows_count); - } - else - { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, - "Conversion from {} to {} is not supported", - from_type_index, to_type->getName()); - } - } - - return result_column; - }; - } - - template - WrapperType createBoolWrapper(const DataTypePtr & from_type, const ToDataType * const to_type, bool requested_result_is_nullable) const - { - if (checkAndGetDataType(from_type.get())) - { - return &ConvertImplGenericFromString::execute; - } - - return createWrapper(from_type, to_type, requested_result_is_nullable); - } - - WrapperType createUInt8ToBoolWrapper(const DataTypePtr from_type, const DataTypePtr to_type) const - { - return [from_type, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) -> ColumnPtr - { - /// Special case when we convert UInt8 column to Bool column. - /// both columns have type UInt8, but we shouldn't use identity wrapper, - /// because Bool column can contain only 0 and 1. - auto res_column = to_type->createColumn(); - const auto & data_from = checkAndGetColumn(arguments[0].column.get())->getData(); - auto & data_to = assert_cast(res_column.get())->getData(); - data_to.resize(data_from.size()); - for (size_t i = 0; i != data_from.size(); ++i) - data_to[i] = static_cast(data_from[i]); - return res_column; - }; - } - - static WrapperType createStringWrapper(const DataTypePtr & from_type) - { - FunctionPtr function = FunctionToString::create(); - return createFunctionAdaptor(function, from_type); - } - - WrapperType createFixedStringWrapper(const DataTypePtr & from_type, const size_t N) const - { - if (!isStringOrFixedString(from_type)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CAST AS FixedString is only implemented for types String and FixedString"); - - bool exception_mode_null = cast_type == CastType::accurateOrNull; - return [exception_mode_null, N] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) - { - if (exception_mode_null) - return FunctionToFixedString::executeForN(arguments, N); - else - return FunctionToFixedString::executeForN(arguments, N); - }; - } - -#define GENERATE_INTERVAL_CASE(INTERVAL_KIND) \ - case IntervalKind::Kind::INTERVAL_KIND: \ - return createFunctionAdaptor(FunctionConvert::create(), from_type); - - static WrapperType createIntervalWrapper(const DataTypePtr & from_type, IntervalKind kind) - { - switch (kind) - { - GENERATE_INTERVAL_CASE(Nanosecond) - GENERATE_INTERVAL_CASE(Microsecond) - GENERATE_INTERVAL_CASE(Millisecond) - GENERATE_INTERVAL_CASE(Second) - GENERATE_INTERVAL_CASE(Minute) - GENERATE_INTERVAL_CASE(Hour) - GENERATE_INTERVAL_CASE(Day) - GENERATE_INTERVAL_CASE(Week) - GENERATE_INTERVAL_CASE(Month) - GENERATE_INTERVAL_CASE(Quarter) - GENERATE_INTERVAL_CASE(Year) - } - throw Exception{ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion to unexpected IntervalKind: {}", kind.toString()}; - } - -#undef GENERATE_INTERVAL_CASE - - template - requires IsDataTypeDecimal - WrapperType createDecimalWrapper(const DataTypePtr & from_type, const ToDataType * to_type, bool requested_result_is_nullable) const - { - TypeIndex type_index = from_type->getTypeId(); - UInt32 scale = to_type->getScale(); - - WhichDataType which(type_index); - bool ok = which.isNativeInt() || which.isNativeUInt() || which.isDecimal() || which.isFloat() || which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() - || which.isStringOrFixedString(); - if (!ok) - { - if (cast_type == CastType::accurateOrNull) - return createToNullableColumnWrapper(); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", - from_type->getName(), to_type->getName()); - } - - auto wrapper_cast_type = cast_type; - - return [wrapper_cast_type, type_index, scale, to_type, requested_result_is_nullable] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *column_nullable, size_t input_rows_count) - { - ColumnPtr result_column; - auto res = callOnIndexAndDataType(type_index, [&](const auto & types) -> bool - { - using Types = std::decay_t; - using LeftDataType = typename Types::LeftType; - using RightDataType = typename Types::RightType; - - if constexpr (IsDataTypeDecimalOrNumber && IsDataTypeDecimalOrNumber && !std::is_same_v) - { - if (wrapper_cast_type == CastType::accurate) - { - AccurateConvertStrategyAdditions additions; - additions.scale = scale; - result_column = ConvertImpl::execute( - arguments, result_type, input_rows_count, additions); - - return true; - } - else if (wrapper_cast_type == CastType::accurateOrNull) - { - AccurateOrNullConvertStrategyAdditions additions; - additions.scale = scale; - result_column = ConvertImpl::execute( - arguments, result_type, input_rows_count, additions); - - return true; - } - } - else if constexpr (std::is_same_v) - { - if (requested_result_is_nullable) - { - /// Consistent with CAST(Nullable(String) AS Nullable(Numbers)) - /// In case when converting to Nullable type, we apply different parsing rule, - /// that will not throw an exception but return NULL in case of malformed input. - result_column = ConvertImpl::execute( - arguments, result_type, input_rows_count, scale); - - return true; - } - } - - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); - - return true; - }); - - /// Additionally check if callOnIndexAndDataType wasn't called at all. - if (!res) - { - if (wrapper_cast_type == CastType::accurateOrNull) - { - auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper(); - return nullable_column_wrapper(arguments, result_type, column_nullable, input_rows_count); - } - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, - "Conversion from {} to {} is not supported", - type_index, to_type->getName()); - } - - return result_column; - }; - } - - WrapperType createAggregateFunctionWrapper(const DataTypePtr & from_type_untyped, const DataTypeAggregateFunction * to_type) const - { - /// Conversion from String through parsing. - if (checkAndGetDataType(from_type_untyped.get())) - { - return &ConvertImplGenericFromString::execute; - } - else if (const auto * agg_type = checkAndGetDataType(from_type_untyped.get())) - { - if (agg_type->getFunction()->haveSameStateRepresentation(*to_type->getFunction())) - { - return [function = to_type->getFunction()]( - ColumnsWithTypeAndName & arguments, - const DataTypePtr & /* result_type */, - const ColumnNullable * /* nullable_source */, - size_t /*input_rows_count*/) -> ColumnPtr - { - const auto & argument_column = arguments.front(); - const auto * col_agg = checkAndGetColumn(argument_column.column.get()); - if (col_agg) - { - auto new_col_agg = ColumnAggregateFunction::create(*col_agg); - new_col_agg->set(function); - return new_col_agg; - } - else - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Illegal column {} for function CAST AS AggregateFunction", - argument_column.column->getName()); - } - }; - } - } - - if (cast_type == CastType::accurateOrNull) - return createToNullableColumnWrapper(); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", - from_type_untyped->getName(), to_type->getName()); - } - - WrapperType createArrayWrapper(const DataTypePtr & from_type_untyped, const DataTypeArray & to_type) const - { - /// Conversion from String through parsing. - if (checkAndGetDataType(from_type_untyped.get())) - { - return &ConvertImplGenericFromString::execute; - } - - DataTypePtr from_type_holder; - const auto * from_type = checkAndGetDataType(from_type_untyped.get()); - const auto * from_type_map = checkAndGetDataType(from_type_untyped.get()); - - /// Convert from Map - if (from_type_map) - { - /// Recreate array of unnamed tuples because otherwise it may work - /// unexpectedly while converting to array of named tuples. - from_type_holder = from_type_map->getNestedTypeWithUnnamedTuple(); - from_type = assert_cast(from_type_holder.get()); - } - - if (!from_type) - { - throw Exception(ErrorCodes::TYPE_MISMATCH, - "CAST AS Array can only be performed between same-dimensional Array, Map or String types"); - } - - DataTypePtr from_nested_type = from_type->getNestedType(); - - /// In query SELECT CAST([] AS Array(Array(String))) from type is Array(Nothing) - bool from_empty_array = isNothing(from_nested_type); - - if (from_type->getNumberOfDimensions() != to_type.getNumberOfDimensions() && !from_empty_array) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "CAST AS Array can only be performed between same-dimensional array types"); - - const DataTypePtr & to_nested_type = to_type.getNestedType(); - - /// Prepare nested type conversion - const auto nested_function = prepareUnpackDictionaries(from_nested_type, to_nested_type); - - return [nested_function, from_nested_type, to_nested_type]( - ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr - { - const auto & argument_column = arguments.front(); - - const ColumnArray * col_array = nullptr; - - if (const ColumnMap * col_map = checkAndGetColumn(argument_column.column.get())) - col_array = &col_map->getNestedColumn(); - else - col_array = checkAndGetColumn(argument_column.column.get()); - - if (col_array) - { - /// create columns for converting nested column containing original and result columns - ColumnsWithTypeAndName nested_columns{{ col_array->getDataPtr(), from_nested_type, "" }}; - - /// convert nested column - auto result_column = nested_function(nested_columns, to_nested_type, nullable_source, nested_columns.front().column->size()); - - /// set converted nested column to result - return ColumnArray::create(result_column, col_array->getOffsetsPtr()); - } - else - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Illegal column {} for function CAST AS Array", - argument_column.column->getName()); - } - }; - } - - using ElementWrappers = std::vector; - - ElementWrappers getElementWrappers(const DataTypes & from_element_types, const DataTypes & to_element_types) const - { - ElementWrappers element_wrappers; - element_wrappers.reserve(from_element_types.size()); - - /// Create conversion wrapper for each element in tuple - for (size_t i = 0; i < from_element_types.size(); ++i) - { - const DataTypePtr & from_element_type = from_element_types[i]; - const DataTypePtr & to_element_type = to_element_types[i]; - element_wrappers.push_back(prepareUnpackDictionaries(from_element_type, to_element_type)); - } - - return element_wrappers; - } - - WrapperType createTupleWrapper(const DataTypePtr & from_type_untyped, const DataTypeTuple * to_type) const - { - /// Conversion from String through parsing. - if (checkAndGetDataType(from_type_untyped.get())) - { - return &ConvertImplGenericFromString::execute; - } - - const auto * from_type = checkAndGetDataType(from_type_untyped.get()); - if (!from_type) - throw Exception(ErrorCodes::TYPE_MISMATCH, "CAST AS Tuple can only be performed between tuple types or from String.\n" - "Left type: {}, right type: {}", from_type_untyped->getName(), to_type->getName()); - - const auto & from_element_types = from_type->getElements(); - const auto & to_element_types = to_type->getElements(); - - std::vector element_wrappers; - std::vector> to_reverse_index; - - /// For named tuples allow conversions for tuples with - /// different sets of elements. If element exists in @to_type - /// and doesn't exist in @to_type it will be filled by default values. - if (from_type->haveExplicitNames() && to_type->haveExplicitNames()) - { - const auto & from_names = from_type->getElementNames(); - std::unordered_map from_positions; - from_positions.reserve(from_names.size()); - for (size_t i = 0; i < from_names.size(); ++i) - from_positions[from_names[i]] = i; - - const auto & to_names = to_type->getElementNames(); - element_wrappers.reserve(to_names.size()); - to_reverse_index.reserve(from_names.size()); - - for (size_t i = 0; i < to_names.size(); ++i) - { - auto it = from_positions.find(to_names[i]); - if (it != from_positions.end()) - { - element_wrappers.emplace_back(prepareUnpackDictionaries(from_element_types[it->second], to_element_types[i])); - to_reverse_index.emplace_back(it->second); - } - else - { - element_wrappers.emplace_back(); - to_reverse_index.emplace_back(); - } - } - } - else - { - if (from_element_types.size() != to_element_types.size()) - throw Exception(ErrorCodes::TYPE_MISMATCH, "CAST AS Tuple can only be performed between tuple types " - "with the same number of elements or from String.\nLeft type: {}, right type: {}", - from_type->getName(), to_type->getName()); - - element_wrappers = getElementWrappers(from_element_types, to_element_types); - to_reverse_index.reserve(to_element_types.size()); - for (size_t i = 0; i < to_element_types.size(); ++i) - to_reverse_index.emplace_back(i); - } - - return [element_wrappers, from_element_types, to_element_types, to_reverse_index] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count) -> ColumnPtr - { - const auto * col = arguments.front().column.get(); - - size_t tuple_size = to_element_types.size(); - const ColumnTuple & column_tuple = typeid_cast(*col); - - Columns converted_columns(tuple_size); - - /// invoke conversion for each element - for (size_t i = 0; i < tuple_size; ++i) - { - if (to_reverse_index[i]) - { - size_t from_idx = *to_reverse_index[i]; - ColumnsWithTypeAndName element = {{column_tuple.getColumns()[from_idx], from_element_types[from_idx], "" }}; - converted_columns[i] = element_wrappers[i](element, to_element_types[i], nullable_source, input_rows_count); - } - else - { - converted_columns[i] = to_element_types[i]->createColumn()->cloneResized(input_rows_count); - } - } - - return ColumnTuple::create(converted_columns); - }; - } - - /// The case of: tuple([key1, key2, ..., key_n], [value1, value2, ..., value_n]) - WrapperType createTupleToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const - { - return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr - { - const auto * col = arguments.front().column.get(); - const auto & column_tuple = assert_cast(*col); - - Columns offsets(2); - Columns converted_columns(2); - for (size_t i = 0; i < 2; ++i) - { - const auto & column_array = assert_cast(column_tuple.getColumn(i)); - ColumnsWithTypeAndName element = {{column_array.getDataPtr(), from_kv_types[i], ""}}; - converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); - offsets[i] = column_array.getOffsetsPtr(); - } - - const auto & keys_offsets = assert_cast(*offsets[0]).getData(); - const auto & values_offsets = assert_cast(*offsets[1]).getData(); - if (keys_offsets != values_offsets) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "CAST AS Map can only be performed from tuple of arrays with equal sizes."); - - return ColumnMap::create(converted_columns[0], converted_columns[1], offsets[0]); - }; - } - - WrapperType createMapToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const - { - return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr - { - const auto * col = arguments.front().column.get(); - const auto & column_map = typeid_cast(*col); - const auto & nested_data = column_map.getNestedData(); - - Columns converted_columns(2); - for (size_t i = 0; i < 2; ++i) - { - ColumnsWithTypeAndName element = {{nested_data.getColumnPtr(i), from_kv_types[i], ""}}; - converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); - } - - return ColumnMap::create(converted_columns[0], converted_columns[1], column_map.getNestedColumn().getOffsetsPtr()); - }; - } - - /// The case of: [(key1, value1), (key2, value2), ...] - WrapperType createArrayToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const - { - return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr - { - const auto * col = arguments.front().column.get(); - const auto & column_array = typeid_cast(*col); - const auto & nested_data = typeid_cast(column_array.getData()); - - Columns converted_columns(2); - for (size_t i = 0; i < 2; ++i) - { - ColumnsWithTypeAndName element = {{nested_data.getColumnPtr(i), from_kv_types[i], ""}}; - converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); - } - - return ColumnMap::create(converted_columns[0], converted_columns[1], column_array.getOffsetsPtr()); - }; - } - - - WrapperType createMapWrapper(const DataTypePtr & from_type_untyped, const DataTypeMap * to_type) const - { - if (const auto * from_tuple = checkAndGetDataType(from_type_untyped.get())) - { - if (from_tuple->getElements().size() != 2) - throw Exception( - ErrorCodes::TYPE_MISMATCH, - "CAST AS Map from tuple requires 2 elements. " - "Left type: {}, right type: {}", - from_tuple->getName(), - to_type->getName()); - - DataTypes from_kv_types; - const auto & to_kv_types = to_type->getKeyValueTypes(); - - for (const auto & elem : from_tuple->getElements()) - { - const auto * type_array = checkAndGetDataType(elem.get()); - if (!type_array) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "CAST AS Map can only be performed from tuples of array. Got: {}", from_tuple->getName()); - - from_kv_types.push_back(type_array->getNestedType()); - } - - return createTupleToMapWrapper(from_kv_types, to_kv_types); - } - else if (const auto * from_array = typeid_cast(from_type_untyped.get())) - { - const auto * nested_tuple = typeid_cast(from_array->getNestedType().get()); - if (!nested_tuple || nested_tuple->getElements().size() != 2) - throw Exception( - ErrorCodes::TYPE_MISMATCH, - "CAST AS Map from array requires nested tuple of 2 elements. " - "Left type: {}, right type: {}", - from_array->getName(), - to_type->getName()); - - return createArrayToMapWrapper(nested_tuple->getElements(), to_type->getKeyValueTypes()); - } - else if (const auto * from_type = checkAndGetDataType(from_type_untyped.get())) - { - return createMapToMapWrapper(from_type->getKeyValueTypes(), to_type->getKeyValueTypes()); - } - else - { - throw Exception(ErrorCodes::TYPE_MISMATCH, "Unsupported types to CAST AS Map. " - "Left type: {}, right type: {}", from_type_untyped->getName(), to_type->getName()); - } - } - - WrapperType createTupleToObjectWrapper(const DataTypeTuple & from_tuple, bool has_nullable_subcolumns) const - { - if (!from_tuple.haveExplicitNames()) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Cast to Object can be performed only from flatten Named Tuple. Got: {}", from_tuple.getName()); - - PathsInData paths; - DataTypes from_types; - - std::tie(paths, from_types) = flattenTuple(from_tuple.getPtr()); - auto to_types = from_types; - - for (auto & type : to_types) - { - if (isTuple(type) || isNested(type)) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Cast to Object can be performed only from flatten Named Tuple. Got: {}", - from_tuple.getName()); - - type = recursiveRemoveLowCardinality(type); - } - - return [element_wrappers = getElementWrappers(from_types, to_types), - has_nullable_subcolumns, from_types, to_types, paths] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count) - { - size_t tuple_size = to_types.size(); - auto flattened_column = flattenTuple(arguments.front().column); - const auto & column_tuple = assert_cast(*flattened_column); - - if (tuple_size != column_tuple.getColumns().size()) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Expected tuple with {} subcolumn, but got {} subcolumns", - tuple_size, column_tuple.getColumns().size()); - - auto res = ColumnObject::create(has_nullable_subcolumns); - for (size_t i = 0; i < tuple_size; ++i) - { - ColumnsWithTypeAndName element = {{column_tuple.getColumns()[i], from_types[i], "" }}; - auto converted_column = element_wrappers[i](element, to_types[i], nullable_source, input_rows_count); - res->addSubcolumn(paths[i], converted_column->assumeMutable()); - } - - return res; - }; - } - - WrapperType createMapToObjectWrapper(const DataTypeMap & from_map, bool has_nullable_subcolumns) const - { - auto key_value_types = from_map.getKeyValueTypes(); - - if (!isStringOrFixedString(key_value_types[0])) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Cast to Object from Map can be performed only from Map " - "with String or FixedString key. Got: {}", from_map.getName()); - - const auto & value_type = key_value_types[1]; - auto to_value_type = value_type; - - if (!has_nullable_subcolumns && value_type->isNullable()) - to_value_type = removeNullable(value_type); - - if (has_nullable_subcolumns && !value_type->isNullable()) - to_value_type = makeNullable(value_type); - - DataTypes to_key_value_types{std::make_shared(), std::move(to_value_type)}; - auto element_wrappers = getElementWrappers(key_value_types, to_key_value_types); - - return [has_nullable_subcolumns, element_wrappers, key_value_types, to_key_value_types] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t) -> ColumnPtr - { - const auto & column_map = assert_cast(*arguments.front().column); - const auto & offsets = column_map.getNestedColumn().getOffsets(); - auto key_value_columns = column_map.getNestedData().getColumnsCopy(); - - for (size_t i = 0; i < 2; ++i) - { - ColumnsWithTypeAndName element{{key_value_columns[i], key_value_types[i], ""}}; - key_value_columns[i] = element_wrappers[i](element, to_key_value_types[i], nullable_source, key_value_columns[i]->size()); - } - - const auto & key_column_str = assert_cast(*key_value_columns[0]); - const auto & value_column = *key_value_columns[1]; - - using SubcolumnsMap = HashMap; - SubcolumnsMap subcolumns; - - for (size_t row = 0; row < offsets.size(); ++row) - { - for (size_t i = offsets[static_cast(row) - 1]; i < offsets[row]; ++i) - { - auto ref = key_column_str.getDataAt(i); - - bool inserted; - SubcolumnsMap::LookupResult it; - subcolumns.emplace(ref, it, inserted); - auto & subcolumn = it->getMapped(); - - if (inserted) - subcolumn = value_column.cloneEmpty()->cloneResized(row); - - /// Map can have duplicated keys. We insert only first one. - if (subcolumn->size() == row) - subcolumn->insertFrom(value_column, i); - } - - /// Insert default values for keys missed in current row. - for (const auto & [_, subcolumn] : subcolumns) - if (subcolumn->size() == row) - subcolumn->insertDefault(); - } - - auto column_object = ColumnObject::create(has_nullable_subcolumns); - for (auto && [key, subcolumn] : subcolumns) - { - PathInData path(key.toView()); - column_object->addSubcolumn(path, std::move(subcolumn)); - } - - return column_object; - }; - } - - WrapperType createObjectWrapper(const DataTypePtr & from_type, const DataTypeObject * to_type) const - { - if (const auto * from_tuple = checkAndGetDataType(from_type.get())) - { - return createTupleToObjectWrapper(*from_tuple, to_type->hasNullableSubcolumns()); - } - else if (const auto * from_map = checkAndGetDataType(from_type.get())) - { - return createMapToObjectWrapper(*from_map, to_type->hasNullableSubcolumns()); - } - else if (checkAndGetDataType(from_type.get())) - { - return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) - { - auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); - res->finalize(); - return res; - }; - } - else if (checkAndGetDataType(from_type.get())) - { - return [is_nullable = to_type->hasNullableSubcolumns()] (ColumnsWithTypeAndName & arguments, const DataTypePtr & , const ColumnNullable * , size_t) -> ColumnPtr - { - auto & column_object = assert_cast(*arguments.front().column); - auto res = ColumnObject::create(is_nullable); - for (size_t i = 0; i < column_object.size(); i++) - res->insert(column_object[i]); - - res->finalize(); - return res; - }; - } - - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Cast to Object can be performed only from flatten named Tuple, Map or String. Got: {}", from_type->getName()); - } - - WrapperType createVariantToVariantWrapper(const DataTypeVariant & from_variant, const DataTypeVariant & to_variant) const - { - /// We support only extension of variant type, so, only new types can be added. - /// For example: Variant(T1, T2) -> Variant(T1, T2, T3) is supported, but Variant(T1, T2) -> Variant(T1, T3) is not supported. - /// We want to extend Variant type for free without rewriting the data, but we sort data types inside Variant during type creation - /// (we do it because we want Variant(T1, T2) to be the same as Variant(T2, T1)), but after extension the order of variant types - /// (and so their discriminators) can be different. For example: Variant(T1, T3) -> Variant(T1, T2, T3). - /// To avoid full rewrite of discriminators column, ColumnVariant supports it's local order of variant columns (and so local - /// discriminators) and stores mapping global order -> local order. - /// So, to extend Variant with new types for free, we should keep old local order for old variants, append new variants and change - /// mapping global order -> local order according to the new global order. - - /// Create map (new variant type) -> (it's global discriminator in new order). - const auto & new_variants = to_variant.getVariants(); - std::unordered_map new_variant_types_to_new_global_discriminator; - new_variant_types_to_new_global_discriminator.reserve(new_variants.size()); - for (size_t i = 0; i != new_variants.size(); ++i) - new_variant_types_to_new_global_discriminator[new_variants[i]->getName()] = i; - - /// Create set of old variant types. - const auto & old_variants = from_variant.getVariants(); - std::unordered_map old_variant_types_to_old_global_discriminator; - old_variant_types_to_old_global_discriminator.reserve(old_variants.size()); - for (size_t i = 0; i != old_variants.size(); ++i) - old_variant_types_to_old_global_discriminator[old_variants[i]->getName()] = i; - - /// Check that the set of old variants types is a subset of new variant types and collect new global discriminator for each old global discriminator. - std::unordered_map old_global_discriminator_to_new; - old_global_discriminator_to_new.reserve(old_variants.size()); - for (const auto & [old_variant_type, old_discriminator] : old_variant_types_to_old_global_discriminator) - { - auto it = new_variant_types_to_new_global_discriminator.find(old_variant_type); - if (it == new_variant_types_to_new_global_discriminator.end()) - throw Exception( - ErrorCodes::CANNOT_CONVERT_TYPE, - "Cannot convert type {} to {}. Conversion between Variant types is allowed only when new Variant type is an extension " - "of an initial one", from_variant.getName(), to_variant.getName()); - old_global_discriminator_to_new[old_discriminator] = it->second; - } - - /// Collect variant types and their global discriminators that should be added to the old Variant to get the new Variant. - std::vector> variant_types_and_discriminators_to_add; - variant_types_and_discriminators_to_add.reserve(new_variants.size() - old_variants.size()); - for (size_t i = 0; i != new_variants.size(); ++i) - { - if (!old_variant_types_to_old_global_discriminator.contains(new_variants[i]->getName())) - variant_types_and_discriminators_to_add.emplace_back(new_variants[i], i); - } - - return [old_global_discriminator_to_new, variant_types_and_discriminators_to_add] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr - { - const auto & column_variant = assert_cast(*arguments.front().column.get()); - size_t num_old_variants = column_variant.getNumVariants(); - Columns new_variant_columns; - new_variant_columns.reserve(num_old_variants + variant_types_and_discriminators_to_add.size()); - std::vector new_local_to_global_discriminators; - new_local_to_global_discriminators.reserve(num_old_variants + variant_types_and_discriminators_to_add.size()); - for (size_t i = 0; i != num_old_variants; ++i) - { - new_variant_columns.push_back(column_variant.getVariantPtrByLocalDiscriminator(i)); - new_local_to_global_discriminators.push_back(old_global_discriminator_to_new.at(column_variant.globalDiscriminatorByLocal(i))); - } - - for (const auto & [new_variant_type, new_global_discriminator] : variant_types_and_discriminators_to_add) - { - new_variant_columns.push_back(new_variant_type->createColumn()); - new_local_to_global_discriminators.push_back(new_global_discriminator); - } - - return ColumnVariant::create(column_variant.getLocalDiscriminatorsPtr(), column_variant.getOffsetsPtr(), new_variant_columns, new_local_to_global_discriminators); - }; - } - - WrapperType createVariantToColumnWrapper(const DataTypeVariant & from_variant, const DataTypePtr & to_type) const - { - const auto & variant_types = from_variant.getVariants(); - std::vector variant_wrappers; - variant_wrappers.reserve(variant_types.size()); - - /// Create conversion wrapper for each variant. - for (const auto & variant_type : variant_types) - variant_wrappers.push_back(prepareUnpackDictionaries(variant_type, to_type)); - - return [variant_wrappers, variant_types, to_type] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - const auto & column_variant = assert_cast(*arguments.front().column.get()); - - /// First, cast each variant to the result type. - std::vector casted_variant_columns; - casted_variant_columns.reserve(variant_types.size()); - for (size_t i = 0; i != variant_types.size(); ++i) - { - auto variant_col = column_variant.getVariantPtrByLocalDiscriminator(i); - ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], "" }}; - const auto & variant_wrapper = variant_wrappers[column_variant.globalDiscriminatorByLocal(i)]; - casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size())); - } - - /// Second, construct resulting column from casted variant columns according to discriminators. - const auto & local_discriminators = column_variant.getLocalDiscriminators(); - auto res = result_type->createColumn(); - res->reserve(input_rows_count); - for (size_t i = 0; i != input_rows_count; ++i) - { - auto local_discr = local_discriminators[i]; - if (local_discr == ColumnVariant::NULL_DISCRIMINATOR) - res->insertDefault(); - else - res->insertFrom(*casted_variant_columns[local_discr], column_variant.offsetAt(i)); - } - - return res; - }; - } - - static ColumnPtr createVariantFromDescriptorsAndOneNonEmptyVariant(const DataTypes & variant_types, const ColumnPtr & discriminators, const ColumnPtr & variant, ColumnVariant::Discriminator variant_discr) - { - Columns variants; - variants.reserve(variant_types.size()); - for (size_t i = 0; i != variant_types.size(); ++i) - { - if (i == variant_discr) - variants.emplace_back(variant); - else - variants.push_back(variant_types[i]->createColumn()); - } - - return ColumnVariant::create(discriminators, variants); - } - - WrapperType createColumnToVariantWrapper(const DataTypePtr & from_type, const DataTypeVariant & to_variant) const - { - /// We allow converting NULL to Variant(...) as Variant can store NULLs. - if (from_type->onlyNull()) - { - return [](ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - auto result_column = result_type->createColumn(); - result_column->insertManyDefaults(input_rows_count); - return result_column; - }; - } - - auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(*removeNullableOrLowCardinalityNullable(from_type)); - if (!variant_discr_opt) - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert type {} to {}. Conversion to Variant allowed only for types from this Variant", from_type->getName(), to_variant.getName()); - - return [variant_discr = *variant_discr_opt] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t) -> ColumnPtr - { - const auto & result_variant_type = assert_cast(*result_type); - const auto & variant_types = result_variant_type.getVariants(); - if (const ColumnNullable * col_nullable = typeid_cast(arguments.front().column.get())) - { - const auto & column = col_nullable->getNestedColumnPtr(); - const auto & null_map = col_nullable->getNullMapData(); - IColumn::Filter filter; - filter.reserve(column->size()); - auto discriminators = ColumnVariant::ColumnDiscriminators::create(); - auto & discriminators_data = discriminators->getData(); - discriminators_data.reserve(column->size()); - size_t variant_size_hint = 0; - for (size_t i = 0; i != column->size(); ++i) - { - if (null_map[i]) - { - discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR); - filter.push_back(0); - } - else - { - discriminators_data.push_back(variant_discr); - filter.push_back(1); - ++variant_size_hint; - } - } - - ColumnPtr variant_column; - /// If there were no NULLs, just use the column. - if (variant_size_hint == column->size()) - variant_column = column; - /// Otherwise we should use filtered column. - else - variant_column = column->filter(filter, variant_size_hint); - return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), variant_column, variant_discr); - } - else if (isColumnLowCardinalityNullable(*arguments.front().column)) - { - const auto & column = arguments.front().column; - - /// Variant column cannot have LowCardinality(Nullable(...)) variant, as Variant column stores NULLs itself. - /// We should create a null-map, insert NULL_DISCRIMINATOR on NULL values and filter initial column. - const auto & col_lc = assert_cast(*column); - const auto & indexes = col_lc.getIndexes(); - auto null_index = col_lc.getDictionary().getNullValueIndex(); - IColumn::Filter filter; - filter.reserve(col_lc.size()); - auto discriminators = ColumnVariant::ColumnDiscriminators::create(); - auto & discriminators_data = discriminators->getData(); - discriminators_data.reserve(col_lc.size()); - size_t variant_size_hint = 0; - for (size_t i = 0; i != col_lc.size(); ++i) - { - if (indexes.getUInt(i) == null_index) - { - discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR); - filter.push_back(0); - } - else - { - discriminators_data.push_back(variant_discr); - filter.push_back(1); - ++variant_size_hint; - } - } - - MutableColumnPtr variant_column; - /// If there were no NULLs, we can just clone the column. - if (variant_size_hint == col_lc.size()) - variant_column = IColumn::mutate(column); - /// Otherwise we should filter column. - else - variant_column = column->filter(filter, variant_size_hint)->assumeMutable(); - - assert_cast(*variant_column).nestedRemoveNullable(); - return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), std::move(variant_column), variant_discr); - } - else - { - const auto & column = arguments.front().column; - auto discriminators = ColumnVariant::ColumnDiscriminators::create(); - discriminators->getData().resize_fill(column->size(), variant_discr); - return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), column, variant_discr); - } - }; - } - - /// Wrapper for conversion to/from Variant type - WrapperType createVariantWrapper(const DataTypePtr & from_type, const DataTypePtr & to_type) const - { - if (const auto * from_variant = checkAndGetDataType(from_type.get())) - { - if (const auto * to_variant = checkAndGetDataType(to_type.get())) - return createVariantToVariantWrapper(*from_variant, *to_variant); - - return createVariantToColumnWrapper(*from_variant, to_type); - } - - return createColumnToVariantWrapper(from_type, assert_cast(*to_type)); - } - - template - WrapperType createEnumWrapper(const DataTypePtr & from_type, const DataTypeEnum * to_type) const - { - using EnumType = DataTypeEnum; - using Function = typename FunctionTo::Type; - - if (const auto * from_enum8 = checkAndGetDataType(from_type.get())) - checkEnumToEnumConversion(from_enum8, to_type); - else if (const auto * from_enum16 = checkAndGetDataType(from_type.get())) - checkEnumToEnumConversion(from_enum16, to_type); - - if (checkAndGetDataType(from_type.get())) - return createStringToEnumWrapper(); - else if (checkAndGetDataType(from_type.get())) - return createStringToEnumWrapper(); - else if (isNativeNumber(from_type) || isEnum(from_type)) - { - auto function = Function::create(); - return createFunctionAdaptor(function, from_type); - } - else - { - if (cast_type == CastType::accurateOrNull) - return createToNullableColumnWrapper(); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", - from_type->getName(), to_type->getName()); - } - } - - template - void checkEnumToEnumConversion(const EnumTypeFrom * from_type, const EnumTypeTo * to_type) const - { - const auto & from_values = from_type->getValues(); - const auto & to_values = to_type->getValues(); - - using ValueType = std::common_type_t; - using NameValuePair = std::pair; - using EnumValues = std::vector; - - EnumValues name_intersection; - std::set_intersection(std::begin(from_values), std::end(from_values), - std::begin(to_values), std::end(to_values), std::back_inserter(name_intersection), - [] (auto && from, auto && to) { return from.first < to.first; }); - - for (const auto & name_value : name_intersection) - { - const auto & old_value = name_value.second; - const auto & new_value = to_type->getValue(name_value.first); - if (old_value != new_value) - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Enum conversion changes value for element '{}' from {} to {}", - name_value.first, toString(old_value), toString(new_value)); - } - } - - template - WrapperType createStringToEnumWrapper() const - { - const char * function_name = cast_name; - return [function_name] ( - ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, const ColumnNullable * nullable_col, size_t /*input_rows_count*/) - { - const auto & first_col = arguments.front().column.get(); - const auto & result_type = typeid_cast(*res_type); - - const ColumnStringType * col = typeid_cast(first_col); - - if (col && nullable_col && nullable_col->size() != col->size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnNullable is not compatible with original"); - - if (col) - { - const auto size = col->size(); - - auto res = result_type.createColumn(); - auto & out_data = static_cast(*res).getData(); - out_data.resize(size); - - auto default_enum_value = result_type.getValues().front().second; - - if (nullable_col) - { - for (size_t i = 0; i < size; ++i) - { - if (!nullable_col->isNullAt(i)) - out_data[i] = result_type.getValue(col->getDataAt(i)); - else - out_data[i] = default_enum_value; - } - } - else - { - for (size_t i = 0; i < size; ++i) - out_data[i] = result_type.getValue(col->getDataAt(i)); - } - - return res; - } - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column {} as first argument of function {}", - first_col->getName(), function_name); - }; - } - - template - WrapperType createEnumToStringWrapper() const - { - const char * function_name = cast_name; - return [function_name] ( - ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, const ColumnNullable * nullable_col, size_t /*input_rows_count*/) - { - using ColumnEnumType = EnumType::ColumnType; - - const auto & first_col = arguments.front().column.get(); - const auto & first_type = arguments.front().type.get(); - - const ColumnEnumType * enum_col = typeid_cast(first_col); - const EnumType * enum_type = typeid_cast(first_type); - - if (enum_col && nullable_col && nullable_col->size() != enum_col->size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnNullable is not compatible with original"); - - if (enum_col && enum_type) - { - const auto size = enum_col->size(); - const auto & enum_data = enum_col->getData(); - - auto res = res_type->createColumn(); - - if (nullable_col) - { - for (size_t i = 0; i < size; ++i) - { - if (!nullable_col->isNullAt(i)) - { - const auto & value = enum_type->getNameForValue(enum_data[i]); - res->insertData(value.data, value.size); - } - else - res->insertDefault(); - } - } - else - { - for (size_t i = 0; i < size; ++i) - { - const auto & value = enum_type->getNameForValue(enum_data[i]); - res->insertData(value.data, value.size); - } - } - - return res; - } - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column {} as first argument of function {}", - first_col->getName(), function_name); - }; - } - - static WrapperType createIdentityWrapper(const DataTypePtr &) - { - return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) - { - return arguments.front().column; - }; - } - - static WrapperType createNothingWrapper(const IDataType * to_type) - { - ColumnPtr res = to_type->createColumnConstWithDefaultValue(1); - return [res] (ColumnsWithTypeAndName &, const DataTypePtr &, const ColumnNullable *, size_t input_rows_count) - { - /// Column of Nothing type is trivially convertible to any other column - return res->cloneResized(input_rows_count)->convertToFullColumnIfConst(); - }; - } - - WrapperType prepareUnpackDictionaries(const DataTypePtr & from_type, const DataTypePtr & to_type) const - { - /// Conversion from/to Variant data type is processed in a special way. - /// We don't need to remove LowCardinality/Nullable. - if (isVariant(to_type) || isVariant(from_type)) - return createVariantWrapper(from_type, to_type); - - const auto * from_low_cardinality = typeid_cast(from_type.get()); - const auto * to_low_cardinality = typeid_cast(to_type.get()); - const auto & from_nested = from_low_cardinality ? from_low_cardinality->getDictionaryType() : from_type; - const auto & to_nested = to_low_cardinality ? to_low_cardinality->getDictionaryType() : to_type; - - if (from_type->onlyNull()) - { - if (!to_nested->isNullable() && !isVariant(to_type)) - { - if (cast_type == CastType::accurateOrNull) - { - return createToNullableColumnWrapper(); - } - else - { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert NULL to a non-nullable type"); - } - } - - return [](ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) - { - return result_type->createColumnConstWithDefaultValue(input_rows_count)->convertToFullColumnIfConst(); - }; - } - - bool skip_not_null_check = false; - - if (from_low_cardinality && from_nested->isNullable() && !to_nested->isNullable()) - /// Disable check for dictionary. Will check that column doesn't contain NULL in wrapper below. - skip_not_null_check = true; - - auto wrapper = prepareRemoveNullable(from_nested, to_nested, skip_not_null_check); - if (!from_low_cardinality && !to_low_cardinality) - return wrapper; - - return [wrapper, from_low_cardinality, to_low_cardinality, skip_not_null_check] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) -> ColumnPtr - { - ColumnsWithTypeAndName args = {arguments[0]}; - auto & arg = args.front(); - auto res_type = result_type; - - ColumnPtr converted_column; - - ColumnPtr res_indexes; - /// For some types default can't be casted (for example, String to Int). In that case convert column to full. - bool src_converted_to_full_column = false; - - { - auto tmp_rows_count = input_rows_count; - - if (to_low_cardinality) - res_type = to_low_cardinality->getDictionaryType(); - - if (from_low_cardinality) - { - const auto * col_low_cardinality = typeid_cast(arguments[0].column.get()); - - if (skip_not_null_check && col_low_cardinality->containsNull()) - throw Exception(ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN, "Cannot convert NULL value to non-Nullable type"); - - arg.column = col_low_cardinality->getDictionary().getNestedColumn(); - arg.type = from_low_cardinality->getDictionaryType(); - - /// TODO: Make map with defaults conversion. - src_converted_to_full_column = !removeNullable(arg.type)->equals(*removeNullable(res_type)); - if (src_converted_to_full_column) - arg.column = arg.column->index(col_low_cardinality->getIndexes(), 0); - else - res_indexes = col_low_cardinality->getIndexesPtr(); - - tmp_rows_count = arg.column->size(); - } - - /// Perform the requested conversion. - converted_column = wrapper(args, res_type, nullable_source, tmp_rows_count); - } - - if (to_low_cardinality) - { - auto res_column = to_low_cardinality->createColumn(); - auto * col_low_cardinality = typeid_cast(res_column.get()); - - if (from_low_cardinality && !src_converted_to_full_column) - { - col_low_cardinality->insertRangeFromDictionaryEncodedColumn(*converted_column, *res_indexes); - } - else - col_low_cardinality->insertRangeFromFullColumn(*converted_column, 0, converted_column->size()); - - return res_column; - } - else if (!src_converted_to_full_column) - return converted_column->index(*res_indexes, 0); - else - return converted_column; - }; - } - - WrapperType prepareRemoveNullable(const DataTypePtr & from_type, const DataTypePtr & to_type, bool skip_not_null_check) const - { - /// Determine whether pre-processing and/or post-processing must take place during conversion. - - bool source_is_nullable = from_type->isNullable(); - bool result_is_nullable = to_type->isNullable(); - - auto wrapper = prepareImpl(removeNullable(from_type), removeNullable(to_type), result_is_nullable); - - if (result_is_nullable) - { - return [wrapper, source_is_nullable] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - /// Create a temporary columns on which to perform the operation. - const auto & nullable_type = static_cast(*result_type); - const auto & nested_type = nullable_type.getNestedType(); - - ColumnsWithTypeAndName tmp_args; - if (source_is_nullable) - tmp_args = createBlockWithNestedColumns(arguments); - else - tmp_args = arguments; - - const ColumnNullable * nullable_source = nullptr; - - /// Add original ColumnNullable for createStringToEnumWrapper() - if (source_is_nullable) - { - if (arguments.size() != 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of arguments"); - nullable_source = typeid_cast(arguments.front().column.get()); - } - - /// Perform the requested conversion. - auto tmp_res = wrapper(tmp_args, nested_type, nullable_source, input_rows_count); - - /// May happen in fuzzy tests. For debug purpose. - if (!tmp_res) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Couldn't convert {} to {} in prepareRemoveNullable wrapper.", - arguments[0].type->getName(), nested_type->getName()); - - return wrapInNullable(tmp_res, arguments, nested_type, input_rows_count); - }; - } - else if (source_is_nullable) - { - /// Conversion from Nullable to non-Nullable. - - return [wrapper, skip_not_null_check] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - auto tmp_args = createBlockWithNestedColumns(arguments); - auto nested_type = removeNullable(result_type); - - /// Check that all values are not-NULL. - /// Check can be skipped in case if LowCardinality dictionary is transformed. - /// In that case, correctness will be checked beforehand. - if (!skip_not_null_check) - { - const auto & col = arguments[0].column; - const auto & nullable_col = assert_cast(*col); - const auto & null_map = nullable_col.getNullMapData(); - - if (!memoryIsZero(null_map.data(), 0, null_map.size())) - throw Exception(ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN, "Cannot convert NULL value to non-Nullable type"); - } - const ColumnNullable * nullable_source = typeid_cast(arguments.front().column.get()); - return wrapper(tmp_args, nested_type, nullable_source, input_rows_count); - }; - } - else - return wrapper; - } - - /// 'from_type' and 'to_type' are nested types in case of Nullable. - /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested. - WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const - { - if (isUInt8(from_type) && isBool(to_type)) - return createUInt8ToBoolWrapper(from_type, to_type); - - /// We can cast IPv6 into IPv6, IPv4 into IPv4, but we should not allow to cast FixedString(16) into IPv6 as part of identity cast - bool safe_convert_custom_types = true; - - if (const auto * to_type_custom_name = to_type->getCustomName()) - safe_convert_custom_types = from_type->getCustomName() && from_type->getCustomName()->getName() == to_type_custom_name->getName(); - else if (const auto * from_type_custom_name = from_type->getCustomName()) - safe_convert_custom_types = to_type->getCustomName() && from_type_custom_name->getName() == to_type->getCustomName()->getName(); - - if (from_type->equals(*to_type) && safe_convert_custom_types) - { - /// We can only use identity conversion for DataTypeAggregateFunction when they are strictly equivalent. - if (typeid_cast(from_type.get())) - { - if (DataTypeAggregateFunction::strictEquals(from_type, to_type)) - return createIdentityWrapper(from_type); - } - else - return createIdentityWrapper(from_type); - } - else if (WhichDataType(from_type).isNothing()) - return createNothingWrapper(to_type.get()); - - WrapperType ret; - - auto make_default_wrapper = [&](const auto & types) -> bool - { - using Types = std::decay_t; - using ToDataType = typename Types::LeftType; - - if constexpr ( - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) - { - ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); - return true; - } - if constexpr (std::is_same_v) - { - if (isBool(to_type)) - ret = createBoolWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); - else - ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); - return true; - } - if constexpr ( - std::is_same_v || - std::is_same_v) - { - ret = createEnumWrapper(from_type, checkAndGetDataType(to_type.get())); - return true; - } - if constexpr ( - std::is_same_v> || - std::is_same_v> || - std::is_same_v> || - std::is_same_v> || - std::is_same_v) - { - ret = createDecimalWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); - return true; - } - - return false; - }; - - bool cast_ipv4_ipv6_default_on_conversion_error_value = context && context->getSettingsRef().cast_ipv4_ipv6_default_on_conversion_error; - bool input_format_ipv4_default_on_conversion_error_value = context && context->getSettingsRef().input_format_ipv4_default_on_conversion_error; - bool input_format_ipv6_default_on_conversion_error_value = context && context->getSettingsRef().input_format_ipv6_default_on_conversion_error; - - auto make_custom_serialization_wrapper = [&, cast_ipv4_ipv6_default_on_conversion_error_value, input_format_ipv4_default_on_conversion_error_value, input_format_ipv6_default_on_conversion_error_value](const auto & types) -> bool - { - using Types = std::decay_t; - using ToDataType = typename Types::RightType; - using FromDataType = typename Types::LeftType; - - if constexpr (WhichDataType(FromDataType::type_id).isStringOrFixedString()) - { - if constexpr (std::is_same_v) - { - ret = [cast_ipv4_ipv6_default_on_conversion_error_value, - input_format_ipv4_default_on_conversion_error_value, - requested_result_is_nullable]( - ColumnsWithTypeAndName & arguments, - const DataTypePtr & result_type, - const ColumnNullable * column_nullable, - size_t) -> ColumnPtr - { - if (!WhichDataType(result_type).isIPv4()) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName()); - - const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - if (requested_result_is_nullable) - return convertToIPv4(arguments[0].column, null_map); - else if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv4_default_on_conversion_error_value) - return convertToIPv4(arguments[0].column, null_map); - else - return convertToIPv4(arguments[0].column, null_map); - }; - - return true; - } - - if constexpr (std::is_same_v) - { - ret = [cast_ipv4_ipv6_default_on_conversion_error_value, - input_format_ipv6_default_on_conversion_error_value, - requested_result_is_nullable]( - ColumnsWithTypeAndName & arguments, - const DataTypePtr & result_type, - const ColumnNullable * column_nullable, - size_t) -> ColumnPtr - { - if (!WhichDataType(result_type).isIPv6()) - throw Exception( - ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv6", result_type->getName()); - - const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - if (requested_result_is_nullable) - return convertToIPv6(arguments[0].column, null_map); - else if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv6_default_on_conversion_error_value) - return convertToIPv6(arguments[0].column, null_map); - else - return convertToIPv6(arguments[0].column, null_map); - }; - - return true; - } - - if (to_type->getCustomSerialization() && to_type->getCustomName()) - { - ret = [requested_result_is_nullable]( - ColumnsWithTypeAndName & arguments, - const DataTypePtr & result_type, - const ColumnNullable * column_nullable, - size_t input_rows_count) -> ColumnPtr - { - auto wrapped_result_type = result_type; - if (requested_result_is_nullable) - wrapped_result_type = makeNullable(result_type); - return ConvertImplGenericFromString::execute( - arguments, wrapped_result_type, column_nullable, input_rows_count); - }; - return true; - } - } - else if constexpr (WhichDataType(FromDataType::type_id).isIPv6() && WhichDataType(ToDataType::type_id).isIPv4()) - { - ret = [cast_ipv4_ipv6_default_on_conversion_error_value, requested_result_is_nullable]( - ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t) - -> ColumnPtr - { - if (!WhichDataType(result_type).isIPv4()) - throw Exception( - ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName()); - - const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - if (requested_result_is_nullable) - return convertIPv6ToIPv4(arguments[0].column, null_map); - else if (cast_ipv4_ipv6_default_on_conversion_error_value) - return convertIPv6ToIPv4(arguments[0].column, null_map); - else - return convertIPv6ToIPv4(arguments[0].column, null_map); - }; - - return true; - } - - if constexpr (WhichDataType(ToDataType::type_id).isStringOrFixedString()) - { - if constexpr (WhichDataType(FromDataType::type_id).isEnum()) - { - ret = createEnumToStringWrapper(); - return true; - } - else if (from_type->getCustomSerialization()) - { - ret = [](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); - }; - return true; - } - } - - return false; - }; - - if (callOnTwoTypeIndexes(from_type->getTypeId(), to_type->getTypeId(), make_custom_serialization_wrapper)) - return ret; - - if (callOnIndexAndDataType(to_type->getTypeId(), make_default_wrapper)) - return ret; - - switch (to_type->getTypeId()) - { - case TypeIndex::String: - return createStringWrapper(from_type); - case TypeIndex::FixedString: - return createFixedStringWrapper(from_type, checkAndGetDataType(to_type.get())->getN()); - case TypeIndex::Array: - return createArrayWrapper(from_type, static_cast(*to_type)); - case TypeIndex::Tuple: - return createTupleWrapper(from_type, checkAndGetDataType(to_type.get())); - case TypeIndex::Map: - return createMapWrapper(from_type, checkAndGetDataType(to_type.get())); - case TypeIndex::Object: - return createObjectWrapper(from_type, checkAndGetDataType(to_type.get())); - case TypeIndex::AggregateFunction: - return createAggregateFunctionWrapper(from_type, checkAndGetDataType(to_type.get())); - case TypeIndex::Interval: - return createIntervalWrapper(from_type, checkAndGetDataType(to_type.get())->getKind()); - default: - break; - } - - if (cast_type == CastType::accurateOrNull) - return createToNullableColumnWrapper(); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", - from_type->getName(), to_type->getName()); - } -}; - -class MonotonicityHelper -{ -public: - using MonotonicityForRange = FunctionCastBase::MonotonicityForRange; - - template - static auto monotonicityForType(const DataType * const) - { - return FunctionTo::Type::Monotonic::get; - } - - static MonotonicityForRange getMonotonicityInformation(const DataTypePtr & from_type, const IDataType * to_type); -}; - -FunctionBasePtr createFunctionBaseCast( - ContextPtr context - , const ColumnsWithTypeAndName & arguments - , const DataTypePtr & return_type - , std::optional diagnostic - , CastType cast_type); - -} From 70796e497f1970d186d017226e93a67f1c6d170f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 08:32:13 +0100 Subject: [PATCH 061/283] Miscellaneous --- programs/library-bridge/createFunctionBaseCast.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/library-bridge/createFunctionBaseCast.cpp b/programs/library-bridge/createFunctionBaseCast.cpp index 473aa1ca81d..194fc4bfcf7 100644 --- a/programs/library-bridge/createFunctionBaseCast.cpp +++ b/programs/library-bridge/createFunctionBaseCast.cpp @@ -13,7 +13,7 @@ namespace ErrorCodes FunctionBasePtr createFunctionBaseCast( ContextPtr, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional, CastType) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for ODBC Bridge"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge"); } } From 3cd70f48b0d2ef1584a9351291de4d230e71c021 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 08:49:55 +0100 Subject: [PATCH 062/283] Anonymous --- src/Functions/CastOverloadResolver.cpp | 15 ----------- src/Functions/FunctionsConversion.cpp | 37 +++++++++++++------------- 2 files changed, 19 insertions(+), 33 deletions(-) diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 4a081d684f6..ad4b28d11c1 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -117,21 +117,6 @@ private: }; -UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) -{ - const auto * arg_type = named_column.type.get(); - bool ok = checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type); - if (!ok) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of toDecimal() scale {}", named_column.type->getName()); - - Field field; - named_column.column->get(0, field); - return static_cast(field.get()); -} - FunctionOverloadResolverPtr createInternalCastOverloadResolver(CastType type, std::optional diagnostic) { return CastOverloadResolverImpl::create(ContextPtr{}, type, true, diagnostic); diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 865f7db8e12..3da6b9abe2d 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -93,11 +93,27 @@ namespace ErrorCodes extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; } +namespace +{ + /** Type conversion functions. * toType - conversion in "natural way"; */ -UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column); +UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) +{ + const auto * arg_type = named_column.type.get(); + bool ok = checkAndGetDataType(arg_type) + || checkAndGetDataType(arg_type) + || checkAndGetDataType(arg_type) + || checkAndGetDataType(arg_type); + if (!ok) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of toDecimal() scale {}", named_column.type->getName()); + + Field field; + named_column.column->get(0, field); + return static_cast(field.get()); +} /// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; }; @@ -1271,23 +1287,6 @@ void convertFromTime(typename DataType::FieldType & x, time_t & time) x = time; } -template <> -inline void convertFromTime(DataTypeDate::FieldType & x, time_t & time) -{ - if (unlikely(time < 0)) - x = 0; - else if (unlikely(time > 0xFFFF)) - x = 0xFFFF; - else - x = time; -} - -template <> -inline void convertFromTime(DataTypeDate32::FieldType & x, time_t & time) -{ - x = static_cast(time); -} - template <> inline void convertFromTime(DataTypeDateTime::FieldType & x, time_t & time) { @@ -4960,6 +4959,8 @@ public: } }; +} + FunctionBasePtr createFunctionBaseCast( ContextPtr context From d2e19a5da342236eb377e40a79a1fcd467b13b7a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 08:55:53 +0100 Subject: [PATCH 063/283] Remove unused header --- src/Functions/CastOverloadResolver.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index ad4b28d11c1..1e1fbd06d93 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include From 8a46eace13458a735ccce5f423f6b750b0db869a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 09:15:56 +0100 Subject: [PATCH 064/283] Less exceptions --- utils/check-style/check-large-objects.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh index 5c1276e5732..4eb9190512f 100755 --- a/utils/check-style/check-large-objects.sh +++ b/utils/check-style/check-large-objects.sh @@ -3,7 +3,6 @@ # Check that there are no new translation units compiled to an object file larger than a certain size. TU_EXCLUDES=( - CastOverloadResolver AggregateFunctionUniq FunctionsConversion From 194de2066c1e60a15d48f21333e97e5ec2d2f82b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 09:22:14 +0100 Subject: [PATCH 065/283] Loosen --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b55e9810361..9ffb4789dc9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,8 +61,8 @@ if (ENABLE_CHECK_HEAVY_BUILDS) # set CPU time limit to 1000 seconds set (RLIMIT_CPU 1000) - # -fsanitize=memory is too heavy - if (SANITIZE STREQUAL "memory") + # -fsanitize=memory and address are too heavy + if (SANITIZE) set (RLIMIT_DATA 10000000000) # 10G endif() From e9ab3ed2dd8ec6e81a5125c27ae4ad30882ecddf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 09:27:11 +0100 Subject: [PATCH 066/283] Even better --- programs/library-bridge/LibraryBridge.h | 2 +- programs/library-bridge/createFunctionBaseCast.cpp | 6 +++++- programs/odbc-bridge/createFunctionBaseCast.cpp | 8 ++++++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/programs/library-bridge/LibraryBridge.h b/programs/library-bridge/LibraryBridge.h index 04860a042a3..a8d15a87e07 100644 --- a/programs/library-bridge/LibraryBridge.h +++ b/programs/library-bridge/LibraryBridge.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include "LibraryBridgeHandlerFactory.h" diff --git a/programs/library-bridge/createFunctionBaseCast.cpp b/programs/library-bridge/createFunctionBaseCast.cpp index 194fc4bfcf7..54319ca707a 100644 --- a/programs/library-bridge/createFunctionBaseCast.cpp +++ b/programs/library-bridge/createFunctionBaseCast.cpp @@ -1,5 +1,6 @@ -#include +#include #include +#include namespace DB @@ -10,6 +11,9 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } +class IFunctionBase; +using FunctionBasePtr = std::shared_ptr; + FunctionBasePtr createFunctionBaseCast( ContextPtr, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional, CastType) { diff --git a/programs/odbc-bridge/createFunctionBaseCast.cpp b/programs/odbc-bridge/createFunctionBaseCast.cpp index 473aa1ca81d..54319ca707a 100644 --- a/programs/odbc-bridge/createFunctionBaseCast.cpp +++ b/programs/odbc-bridge/createFunctionBaseCast.cpp @@ -1,5 +1,6 @@ -#include +#include #include +#include namespace DB @@ -10,10 +11,13 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } +class IFunctionBase; +using FunctionBasePtr = std::shared_ptr; + FunctionBasePtr createFunctionBaseCast( ContextPtr, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional, CastType) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for ODBC Bridge"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge"); } } From 3ac4f56cfa65443119bd0fa9d23d528adf73d7bb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 9 Mar 2024 18:53:31 +0100 Subject: [PATCH 067/283] Fix tests --- .../library-bridge/createFunctionBaseCast.cpp | 2 +- .../odbc-bridge/createFunctionBaseCast.cpp | 2 +- src/Functions/CastOverloadResolver.cpp | 20 ++++++++++++------- src/Functions/FunctionsConversion.cpp | 13 ++++++------ 4 files changed, 22 insertions(+), 15 deletions(-) diff --git a/programs/library-bridge/createFunctionBaseCast.cpp b/programs/library-bridge/createFunctionBaseCast.cpp index 54319ca707a..dcdd47d79ce 100644 --- a/programs/library-bridge/createFunctionBaseCast.cpp +++ b/programs/library-bridge/createFunctionBaseCast.cpp @@ -15,7 +15,7 @@ class IFunctionBase; using FunctionBasePtr = std::shared_ptr; FunctionBasePtr createFunctionBaseCast( - ContextPtr, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional, CastType) + ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional, CastType) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge"); } diff --git a/programs/odbc-bridge/createFunctionBaseCast.cpp b/programs/odbc-bridge/createFunctionBaseCast.cpp index 54319ca707a..dcdd47d79ce 100644 --- a/programs/odbc-bridge/createFunctionBaseCast.cpp +++ b/programs/odbc-bridge/createFunctionBaseCast.cpp @@ -15,7 +15,7 @@ class IFunctionBase; using FunctionBasePtr = std::shared_ptr; FunctionBasePtr createFunctionBaseCast( - ContextPtr, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional, CastType) + ContextPtr, const char *, const ColumnsWithTypeAndName &, const DataTypePtr &, std::optional, CastType) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type conversions are not implemented for Library Bridge"); } diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 1e1fbd06d93..4e8e99b4d95 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -17,11 +17,12 @@ namespace ErrorCodes } FunctionBasePtr createFunctionBaseCast( - ContextPtr context - , const ColumnsWithTypeAndName & arguments - , const DataTypePtr & return_type - , std::optional diagnostic - , CastType cast_type); + ContextPtr context, + const char * name, + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & return_type, + std::optional diagnostic, + CastType cast_type); /** CastInternal does not preserve nullability of the data type, @@ -33,7 +34,7 @@ FunctionBasePtr createFunctionBaseCast( class CastOverloadResolverImpl : public IFunctionOverloadResolver { public: - String getName() const override + const char * getNameImpl() const { if (cast_type == CastType::accurate) return "accurateCast"; @@ -45,6 +46,11 @@ public: return "CAST"; } + String getName() const override + { + return getNameImpl(); + } + size_t getNumberOfArguments() const override { return 2; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } @@ -72,7 +78,7 @@ public: protected: FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override { - return createFunctionBaseCast(context, arguments, return_type, diagnostic, cast_type); + return createFunctionBaseCast(context, getNameImpl(), arguments, return_type, diagnostic, cast_type); } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 3da6b9abe2d..d1d00780205 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -4963,11 +4963,12 @@ public: FunctionBasePtr createFunctionBaseCast( - ContextPtr context - , const ColumnsWithTypeAndName & arguments - , const DataTypePtr & return_type - , std::optional diagnostic - , CastType cast_type) + ContextPtr context, + const char * name, + const ColumnsWithTypeAndName & arguments, + const DataTypePtr & return_type, + std::optional diagnostic, + CastType cast_type) { DataTypes data_types(arguments.size()); @@ -4975,7 +4976,7 @@ FunctionBasePtr createFunctionBaseCast( data_types[i] = arguments[i].type; auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get()); - return std::make_unique(context, "CAST", std::move(monotonicity), data_types, return_type, diagnostic, cast_type); + return std::make_unique(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); } REGISTER_FUNCTION(Conversion) From e30a1c9cbcc465370aa3be5c96437598c1a3ac80 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 02:26:18 +0100 Subject: [PATCH 068/283] Remove garbage --- src/Functions/FunctionsConversion.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index d1d00780205..5347e1f739d 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -3158,15 +3158,10 @@ struct FunctionCastName static constexpr auto name = "CAST"; }; -class FunctionCastBase : public IFunctionBase +class FunctionCast final : public IFunctionBase { public: using MonotonicityForRange = std::function; -}; - -class FunctionCast final : public FunctionCastBase -{ -public: using WrapperType = std::function; FunctionCast(ContextPtr context_ @@ -4901,7 +4896,7 @@ arguments, result_type, input_rows_count); \ class MonotonicityHelper { public: - using MonotonicityForRange = FunctionCastBase::MonotonicityForRange; + using MonotonicityForRange = FunctionCast::MonotonicityForRange; template static auto monotonicityForType(const DataType * const) From bbeecf4ed5361154c2eed61f476ed9ca2c50b773 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 02:51:32 +0100 Subject: [PATCH 069/283] Fix error --- src/Functions/CastOverloadResolver.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 4e8e99b4d95..5ca4b0bc579 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -67,12 +67,15 @@ public: static FunctionOverloadResolverPtr create(ContextPtr context, CastType cast_type, bool internal, std::optional diagnostic) { - const auto & settings_ref = context->getSettingsRef(); - if (internal) + { return std::make_unique(context, cast_type, internal, diagnostic, false /*keep_nullable*/, DataTypeValidationSettings{}); + } else + { + const auto & settings_ref = context->getSettingsRef(); return std::make_unique(context, cast_type, internal, diagnostic, settings_ref.cast_keep_nullable, DataTypeValidationSettings(settings_ref)); + } } protected: From 6c0f8773426021d93229b2c713641e189c4ffe3e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 03:02:38 +0100 Subject: [PATCH 070/283] I did not understand this code and removed it --- src/Functions/FunctionsConversion.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 5347e1f739d..38c18ddf850 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1906,18 +1906,6 @@ struct ConvertImpl } }; -template -struct ConvertImpl -{ - template - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, - Additions additions [[maybe_unused]] = Additions()) - { - - return arguments[0].column; - } -}; - /** Conversion from FixedString to String. * Cutting sequences of zero bytes from end of strings. From 7c8a97a91a0842556e1ed67234c85c7aead67700 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 03:13:15 +0100 Subject: [PATCH 071/283] Maybe better --- src/Functions/FunctionsConversion.cpp | 425 +++++++++++++------------- 1 file changed, 206 insertions(+), 219 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 38c18ddf850..1d849446254 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -151,255 +151,256 @@ struct ConvertImpl { const ColumnWithTypeAndName & named_from = arguments[0]; + /// If types are the same, reuse the columns. + if (result_type->equals(*named_from.type)) + return named_from.column; + using ColVecFrom = typename FromDataType::ColumnType; using ColVecTo = typename ToDataType::ColumnType; if constexpr ((IsDataTypeDecimal || IsDataTypeDecimal) - && !(std::is_same_v || std::is_same_v)) + && !(std::is_same_v || std::is_same_v) + && (!IsDataTypeDecimalOrNumber || !IsDataTypeDecimalOrNumber)) { - if constexpr (!IsDataTypeDecimalOrNumber || !IsDataTypeDecimalOrNumber) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - named_from.column->getName(), Name::name); - } + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + named_from.column->getName(), Name::name); } - if (const ColVecFrom * col_from = checkAndGetColumn(named_from.column.get())) + const ColVecFrom * col_from = checkAndGetColumn(named_from.column.get()); + if (!col_from) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + named_from.column->getName(), Name::name); + + typename ColVecTo::MutablePtr col_to = nullptr; + + if constexpr (IsDataTypeDecimal) { - typename ColVecTo::MutablePtr col_to = nullptr; + UInt32 scale; - if constexpr (IsDataTypeDecimal) + if constexpr (std::is_same_v + || std::is_same_v) { - UInt32 scale; - - if constexpr (std::is_same_v - || std::is_same_v) - { - scale = additions.scale; - } - else - { - scale = additions; - } - - col_to = ColVecTo::create(0, scale); + scale = additions.scale; } else - col_to = ColVecTo::create(); - - const auto & vec_from = col_from->getData(); - auto & vec_to = col_to->getData(); - vec_to.resize(input_rows_count); - - ColumnUInt8::MutablePtr col_null_map_to; - ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; - if constexpr (std::is_same_v) { - col_null_map_to = ColumnUInt8::create(input_rows_count, false); - vec_null_map_to = &col_null_map_to->getData(); + scale = additions; } - bool result_is_bool = isBool(result_type); - for (size_t i = 0; i < input_rows_count; ++i) + col_to = ColVecTo::create(0, scale); + } + else + col_to = ColVecTo::create(); + + const auto & vec_from = col_from->getData(); + auto & vec_to = col_to->getData(); + vec_to.resize(input_rows_count); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; + if constexpr (std::is_same_v) + { + col_null_map_to = ColumnUInt8::create(input_rows_count, false); + vec_null_map_to = &col_null_map_to->getData(); + } + + bool result_is_bool = isBool(result_type); + for (size_t i = 0; i < input_rows_count; ++i) + { + if constexpr (std::is_same_v) { - if constexpr (std::is_same_v) + if (result_is_bool) { - if (result_is_bool) - { - vec_to[i] = vec_from[i] != FromFieldType(0); - continue; - } - } - - if constexpr (std::is_same_v && std::is_same_v) - { - static_assert( - std::is_same_v, - "UInt128 and UUID types must be same"); - - vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; - vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; - + vec_to[i] = vec_from[i] != FromFieldType(0); continue; } + } - if constexpr (std::is_same_v && std::is_same_v) - { - static_assert( - std::is_same_v, - "UInt128 and IPv6 types must be same"); + if constexpr (std::is_same_v && std::is_same_v) + { + static_assert( + std::is_same_v, + "UInt128 and UUID types must be same"); - vec_to[i].items[1] = std::byteswap(vec_from[i].toUnderType().items[0]); - vec_to[i].items[0] = std::byteswap(vec_from[i].toUnderType().items[1]); + vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; + vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; - continue; - } + continue; + } - if constexpr (std::is_same_v != std::is_same_v) + if constexpr (std::is_same_v && std::is_same_v) + { + static_assert( + std::is_same_v, + "UInt128 and IPv6 types must be same"); + + vec_to[i].items[1] = std::byteswap(vec_from[i].toUnderType().items[0]); + vec_to[i].items[0] = std::byteswap(vec_from[i].toUnderType().items[1]); + + continue; + } + + if constexpr (std::is_same_v != std::is_same_v) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Conversion between numeric types and UUID is not supported. " + "Probably the passed UUID is unquoted"); + } + else if constexpr ( + (std::is_same_v != std::is_same_v) + && !(is_any_of || is_any_of) + ) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion from {} to {} is not supported", + TypeName, TypeName); + } + else if constexpr (std::is_same_v != std::is_same_v && !(std::is_same_v || std::is_same_v)) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Conversion between numeric types and IPv6 is not supported. " + "Probably the passed IPv6 is unquoted"); + } + else + { + if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Conversion between numeric types and UUID is not supported. " - "Probably the passed UUID is unquoted"); - } - else if constexpr ( - (std::is_same_v != std::is_same_v) - && !(is_any_of || is_any_of) - ) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion from {} to {} is not supported", - TypeName, TypeName); - } - else if constexpr (std::is_same_v != std::is_same_v && !(std::is_same_v || std::is_same_v)) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Conversion between numeric types and IPv6 is not supported. " - "Probably the passed IPv6 is unquoted"); - } - else - { - if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) + if constexpr (std::is_same_v) { - if constexpr (std::is_same_v) - { - ToFieldType result; - bool convert_result = false; + ToFieldType result; + bool convert_result = false; - if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) - convert_result = tryConvertDecimals(vec_from[i], col_from->getScale(), col_to->getScale(), result); - else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) - convert_result = tryConvertFromDecimal(vec_from[i], col_from->getScale(), result); - else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) - convert_result = tryConvertToDecimal(vec_from[i], col_to->getScale(), result); + if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) + convert_result = tryConvertDecimals(vec_from[i], col_from->getScale(), col_to->getScale(), result); + else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) + convert_result = tryConvertFromDecimal(vec_from[i], col_from->getScale(), result); + else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) + convert_result = tryConvertToDecimal(vec_from[i], col_to->getScale(), result); - if (convert_result) - vec_to[i] = result; - else - { - vec_to[i] = static_cast(0); - (*vec_null_map_to)[i] = true; - } - } + if (convert_result) + vec_to[i] = result; else { - if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) - vec_to[i] = convertDecimals(vec_from[i], col_from->getScale(), col_to->getScale()); - else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) - vec_to[i] = convertFromDecimal(vec_from[i], col_from->getScale()); - else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) - vec_to[i] = convertToDecimal(vec_from[i], col_to->getScale()); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unsupported data type in conversion function"); + vec_to[i] = static_cast(0); + (*vec_null_map_to)[i] = true; } } else { - /// If From Data is Nan or Inf and we convert to integer type, throw exception - if constexpr (std::is_floating_point_v && !std::is_floating_point_v) - { - if (!isFinite(vec_from[i])) - { - if constexpr (std::is_same_v) - { - vec_to[i] = 0; - (*vec_null_map_to)[i] = true; - continue; - } - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unexpected inf or nan to integer conversion"); - } - } - - if constexpr (std::is_same_v - || std::is_same_v) - { - bool convert_result = accurate::convertNumeric(vec_from[i], vec_to[i]); - - if (!convert_result) - { - if (std::is_same_v) - { - vec_to[i] = 0; - (*vec_null_map_to)[i] = true; - } - else - { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Value in column {} cannot be safely converted into type {}", - named_from.column->getName(), result_type->getName()); - } - } - } + if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) + vec_to[i] = convertDecimals(vec_from[i], col_from->getScale(), col_to->getScale()); + else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) + vec_to[i] = convertFromDecimal(vec_from[i], col_from->getScale()); + else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) + vec_to[i] = convertToDecimal(vec_from[i], col_to->getScale()); else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unsupported data type in conversion function"); + } + } + else + { + /// If From Data is Nan or Inf and we convert to integer type, throw exception + if constexpr (std::is_floating_point_v && !std::is_floating_point_v) + { + if (!isFinite(vec_from[i])) { - if constexpr (std::is_same_v && std::is_same_v) + if constexpr (std::is_same_v) { - const uint8_t ip4_cidr[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}; - const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); - if (!matchIPv6Subnet(src, ip4_cidr, 96)) - { - char addr[IPV6_MAX_TEXT_LENGTH + 1] {}; - char * paddr = addr; - formatIPv6(src, paddr); - - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 {} in column {} is not in IPv4 mapping block", addr, named_from.column->getName()); - } - - uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); - if constexpr (std::endian::native == std::endian::little) - { - dst[0] = src[15]; - dst[1] = src[14]; - dst[2] = src[13]; - dst[3] = src[12]; - } - else - { - dst[0] = src[12]; - dst[1] = src[13]; - dst[2] = src[14]; - dst[3] = src[15]; - } + vec_to[i] = 0; + (*vec_null_map_to)[i] = true; + continue; } - else if constexpr (std::is_same_v && std::is_same_v) - { - const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); - uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); - std::memset(dst, '\0', IPV6_BINARY_LENGTH); - dst[10] = dst[11] = 0xff; - - if constexpr (std::endian::native == std::endian::little) - { - dst[12] = src[3]; - dst[13] = src[2]; - dst[14] = src[1]; - dst[15] = src[0]; - } - else - { - dst[12] = src[0]; - dst[13] = src[1]; - dst[14] = src[2]; - dst[15] = src[3]; - } - } - else if constexpr (std::is_same_v && std::is_same_v) - vec_to[i] = static_cast(static_cast(vec_from[i])); - else if constexpr (std::is_same_v && (std::is_same_v || std::is_same_v)) - vec_to[i] = static_cast(vec_from[i] * DATE_SECONDS_PER_DAY); else - vec_to[i] = static_cast(vec_from[i]); + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unexpected inf or nan to integer conversion"); } } + + if constexpr (std::is_same_v + || std::is_same_v) + { + bool convert_result = accurate::convertNumeric(vec_from[i], vec_to[i]); + + if (!convert_result) + { + if (std::is_same_v) + { + vec_to[i] = 0; + (*vec_null_map_to)[i] = true; + } + else + { + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Value in column {} cannot be safely converted into type {}", + named_from.column->getName(), result_type->getName()); + } + } + } + else + { + if constexpr (std::is_same_v && std::is_same_v) + { + const uint8_t ip4_cidr[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}; + const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); + if (!matchIPv6Subnet(src, ip4_cidr, 96)) + { + char addr[IPV6_MAX_TEXT_LENGTH + 1] {}; + char * paddr = addr; + formatIPv6(src, paddr); + + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 {} in column {} is not in IPv4 mapping block", addr, named_from.column->getName()); + } + + uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); + if constexpr (std::endian::native == std::endian::little) + { + dst[0] = src[15]; + dst[1] = src[14]; + dst[2] = src[13]; + dst[3] = src[12]; + } + else + { + dst[0] = src[12]; + dst[1] = src[13]; + dst[2] = src[14]; + dst[3] = src[15]; + } + } + else if constexpr (std::is_same_v && std::is_same_v) + { + const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); + uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); + std::memset(dst, '\0', IPV6_BINARY_LENGTH); + dst[10] = dst[11] = 0xff; + + if constexpr (std::endian::native == std::endian::little) + { + dst[12] = src[3]; + dst[13] = src[2]; + dst[14] = src[1]; + dst[15] = src[0]; + } + else + { + dst[12] = src[0]; + dst[13] = src[1]; + dst[14] = src[2]; + dst[15] = src[3]; + } + } + else if constexpr (std::is_same_v && std::is_same_v) + vec_to[i] = static_cast(static_cast(vec_from[i])); + else if constexpr (std::is_same_v && (std::is_same_v || std::is_same_v)) + vec_to[i] = static_cast(vec_from[i] * DATE_SECONDS_PER_DAY); + else + vec_to[i] = static_cast(vec_from[i]); + } } } - - if constexpr (std::is_same_v) - return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); - else - return col_to; } + + if constexpr (std::is_same_v) + return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - named_from.column->getName(), Name::name); + return col_to; } }; @@ -1892,20 +1893,6 @@ template <> struct ConvertImpl : ConvertImpl {}; -/** If types are identical, just take reference to column. - */ -template -requires (!T::is_parametric) -struct ConvertImpl -{ - template - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, - Additions additions [[maybe_unused]] = Additions()) - { - return arguments[0].column; - } -}; - /** Conversion from FixedString to String. * Cutting sequences of zero bytes from end of strings. From 3c9e6cfc96c8ba7cbc805260bfdec9ca18776ad0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 03:52:14 +0100 Subject: [PATCH 072/283] Maybe better --- src/Functions/FunctionsConversion.cpp | 322 +++++++++++++------------- 1 file changed, 161 insertions(+), 161 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 1d849446254..66ba2d18f08 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -152,117 +152,113 @@ struct ConvertImpl const ColumnWithTypeAndName & named_from = arguments[0]; /// If types are the same, reuse the columns. - if (result_type->equals(*named_from.type)) + if constexpr (std::is_same_v && !FromDataType::is_parametric) + { return named_from.column; - - using ColVecFrom = typename FromDataType::ColumnType; - using ColVecTo = typename ToDataType::ColumnType; - - if constexpr ((IsDataTypeDecimal || IsDataTypeDecimal) - && !(std::is_same_v || std::is_same_v) - && (!IsDataTypeDecimalOrNumber || !IsDataTypeDecimalOrNumber)) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - named_from.column->getName(), Name::name); - } - - const ColVecFrom * col_from = checkAndGetColumn(named_from.column.get()); - if (!col_from) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - named_from.column->getName(), Name::name); - - typename ColVecTo::MutablePtr col_to = nullptr; - - if constexpr (IsDataTypeDecimal) - { - UInt32 scale; - - if constexpr (std::is_same_v - || std::is_same_v) - { - scale = additions.scale; - } - else - { - scale = additions; - } - - col_to = ColVecTo::create(0, scale); } else - col_to = ColVecTo::create(); - - const auto & vec_from = col_from->getData(); - auto & vec_to = col_to->getData(); - vec_to.resize(input_rows_count); - - ColumnUInt8::MutablePtr col_null_map_to; - ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; - if constexpr (std::is_same_v) { - col_null_map_to = ColumnUInt8::create(input_rows_count, false); - vec_null_map_to = &col_null_map_to->getData(); - } + using ColVecFrom = typename FromDataType::ColumnType; + using ColVecTo = typename ToDataType::ColumnType; - bool result_is_bool = isBool(result_type); - for (size_t i = 0; i < input_rows_count; ++i) - { - if constexpr (std::is_same_v) + if constexpr ((IsDataTypeDecimal || IsDataTypeDecimal) + && !(std::is_same_v || std::is_same_v) + && (!IsDataTypeDecimalOrNumber || !IsDataTypeDecimalOrNumber)) { - if (result_is_bool) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + named_from.column->getName(), Name::name); + } + + const ColVecFrom * col_from = checkAndGetColumn(named_from.column.get()); + if (!col_from) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + named_from.column->getName(), Name::name); + + typename ColVecTo::MutablePtr col_to = nullptr; + + if constexpr (IsDataTypeDecimal) + { + UInt32 scale; + + if constexpr (std::is_same_v + || std::is_same_v) { - vec_to[i] = vec_from[i] != FromFieldType(0); - continue; + scale = additions.scale; + } + else + { + scale = additions; } - } - if constexpr (std::is_same_v && std::is_same_v) - { - static_assert( - std::is_same_v, - "UInt128 and UUID types must be same"); - - vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; - vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; - - continue; - } - - if constexpr (std::is_same_v && std::is_same_v) - { - static_assert( - std::is_same_v, - "UInt128 and IPv6 types must be same"); - - vec_to[i].items[1] = std::byteswap(vec_from[i].toUnderType().items[0]); - vec_to[i].items[0] = std::byteswap(vec_from[i].toUnderType().items[1]); - - continue; - } - - if constexpr (std::is_same_v != std::is_same_v) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Conversion between numeric types and UUID is not supported. " - "Probably the passed UUID is unquoted"); - } - else if constexpr ( - (std::is_same_v != std::is_same_v) - && !(is_any_of || is_any_of) - ) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion from {} to {} is not supported", - TypeName, TypeName); - } - else if constexpr (std::is_same_v != std::is_same_v && !(std::is_same_v || std::is_same_v)) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Conversion between numeric types and IPv6 is not supported. " - "Probably the passed IPv6 is unquoted"); + col_to = ColVecTo::create(0, scale); } else + col_to = ColVecTo::create(); + + const auto & vec_from = col_from->getData(); + auto & vec_to = col_to->getData(); + vec_to.resize(input_rows_count); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; + if constexpr (std::is_same_v) { - if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) + col_null_map_to = ColumnUInt8::create(input_rows_count, false); + vec_null_map_to = &col_null_map_to->getData(); + } + + bool result_is_bool = isBool(result_type); + for (size_t i = 0; i < input_rows_count; ++i) + { + if constexpr (std::is_same_v) + { + if (result_is_bool) + { + vec_to[i] = vec_from[i] != FromFieldType(0); + continue; + } + } + + if constexpr (std::is_same_v && std::is_same_v) + { + static_assert( + std::is_same_v, + "UInt128 and UUID types must be same"); + + vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; + vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; + } + else if constexpr (std::is_same_v && std::is_same_v) + { + static_assert( + std::is_same_v, + "UInt128 and IPv6 types must be same"); + + vec_to[i].items[1] = std::byteswap(vec_from[i].toUnderType().items[0]); + vec_to[i].items[0] = std::byteswap(vec_from[i].toUnderType().items[1]); + } + else if constexpr (std::is_same_v != std::is_same_v) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Conversion between numeric types and UUID is not supported. " + "Probably the passed UUID is unquoted"); + } + else if constexpr ( + (std::is_same_v != std::is_same_v) + && !(is_any_of + || is_any_of)) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion from {} to {} is not supported", + TypeName, TypeName); + } + else if constexpr (std::is_same_v != std::is_same_v + && !(std::is_same_v || std::is_same_v)) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Conversion between numeric types and IPv6 is not supported. " + "Probably the passed IPv6 is unquoted"); + } + else if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) { if constexpr (std::is_same_v) { @@ -296,6 +292,66 @@ struct ConvertImpl throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unsupported data type in conversion function"); } } + else if constexpr (std::is_same_v && std::is_same_v) + { + const uint8_t ip4_cidr[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}; + const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); + if (!matchIPv6Subnet(src, ip4_cidr, 96)) + { + char addr[IPV6_MAX_TEXT_LENGTH + 1] {}; + char * paddr = addr; + formatIPv6(src, paddr); + + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 {} in column {} is not in IPv4 mapping block", addr, named_from.column->getName()); + } + + uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); + if constexpr (std::endian::native == std::endian::little) + { + dst[0] = src[15]; + dst[1] = src[14]; + dst[2] = src[13]; + dst[3] = src[12]; + } + else + { + dst[0] = src[12]; + dst[1] = src[13]; + dst[2] = src[14]; + dst[3] = src[15]; + } + } + else if constexpr (std::is_same_v && std::is_same_v) + { + const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); + uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); + std::memset(dst, '\0', IPV6_BINARY_LENGTH); + dst[10] = dst[11] = 0xff; + + if constexpr (std::endian::native == std::endian::little) + { + dst[12] = src[3]; + dst[13] = src[2]; + dst[14] = src[1]; + dst[15] = src[0]; + } + else + { + dst[12] = src[0]; + dst[13] = src[1]; + dst[14] = src[2]; + dst[15] = src[3]; + } + } + else if constexpr (std::is_same_v && std::is_same_v) + { + vec_to[i] = static_cast(static_cast(vec_from[i])); + } + else if constexpr (std::is_same_v + && (std::is_same_v || std::is_same_v)) + { + vec_to[i] = static_cast(vec_from[i] * DATE_SECONDS_PER_DAY); + } else { /// If From Data is Nan or Inf and we convert to integer type, throw exception @@ -335,72 +391,16 @@ struct ConvertImpl } else { - if constexpr (std::is_same_v && std::is_same_v) - { - const uint8_t ip4_cidr[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}; - const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); - if (!matchIPv6Subnet(src, ip4_cidr, 96)) - { - char addr[IPV6_MAX_TEXT_LENGTH + 1] {}; - char * paddr = addr; - formatIPv6(src, paddr); - - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 {} in column {} is not in IPv4 mapping block", addr, named_from.column->getName()); - } - - uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); - if constexpr (std::endian::native == std::endian::little) - { - dst[0] = src[15]; - dst[1] = src[14]; - dst[2] = src[13]; - dst[3] = src[12]; - } - else - { - dst[0] = src[12]; - dst[1] = src[13]; - dst[2] = src[14]; - dst[3] = src[15]; - } - } - else if constexpr (std::is_same_v && std::is_same_v) - { - const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); - uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); - std::memset(dst, '\0', IPV6_BINARY_LENGTH); - dst[10] = dst[11] = 0xff; - - if constexpr (std::endian::native == std::endian::little) - { - dst[12] = src[3]; - dst[13] = src[2]; - dst[14] = src[1]; - dst[15] = src[0]; - } - else - { - dst[12] = src[0]; - dst[13] = src[1]; - dst[14] = src[2]; - dst[15] = src[3]; - } - } - else if constexpr (std::is_same_v && std::is_same_v) - vec_to[i] = static_cast(static_cast(vec_from[i])); - else if constexpr (std::is_same_v && (std::is_same_v || std::is_same_v)) - vec_to[i] = static_cast(vec_from[i] * DATE_SECONDS_PER_DAY); - else - vec_to[i] = static_cast(vec_from[i]); + vec_to[i] = static_cast(vec_from[i]); } } } - } - if constexpr (std::is_same_v) - return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); - else - return col_to; + if constexpr (std::is_same_v) + return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); + else + return col_to; + } } }; From d2e29525c85458c07f385208582e820de10297e4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 04:08:34 +0100 Subject: [PATCH 073/283] Maybe better --- src/Functions/FunctionsConversion.cpp | 176 +++++++++++++------------- 1 file changed, 87 insertions(+), 89 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 66ba2d18f08..d2d72558500 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1,4 +1,3 @@ -#include #include #include @@ -115,6 +114,71 @@ UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) return static_cast(field.get()); } + +/** Conversion of Date to DateTime: adding 00:00:00 time component. + */ +template +struct ToDateTimeImpl +{ + static constexpr auto name = "toDateTime"; + + static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (d > MAX_DATETIME_DAY_NUM) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Day number {} is out of bounds of type DateTime", d); + } + else if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) + { + if (d > MAX_DATETIME_DAY_NUM) + d = MAX_DATETIME_DAY_NUM; + } + return static_cast(time_zone.fromDayNum(DayNum(d))); + } + + static UInt32 execute(Int32 d, const DateLUTImpl & time_zone) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) + { + if (d < 0) + return 0; + else if (d > MAX_DATETIME_DAY_NUM) + d = MAX_DATETIME_DAY_NUM; + } + else if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (d < 0 || d > MAX_DATETIME_DAY_NUM) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type DateTime", d); + } + return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); + } + + static UInt32 execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) + { + return dt; + } + + static UInt32 execute(Int64 dt64, const DateLUTImpl & /*time_zone*/) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Ignore) + return static_cast(dt64); + else + { + if (dt64 < 0 || dt64 >= MAX_DATETIME_TIMESTAMP) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) + return dt64 < 0 ? 0 : std::numeric_limits::max(); + else + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type DateTime", dt64); + } + else + return static_cast(dt64); + } + } +}; + + /// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; }; @@ -151,11 +215,32 @@ struct ConvertImpl { const ColumnWithTypeAndName & named_from = arguments[0]; - /// If types are the same, reuse the columns. if constexpr (std::is_same_v && !FromDataType::is_parametric) { + /// If types are the same, reuse the columns. return named_from.column; } + else if constexpr ((std::is_same_v || std::is_same_v) + && std::is_same_v) + { + /// Conversion of DateTime to Date: throw off time component. + /// Conversion of Date32 to Date. + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (std::is_same_v && std::is_same_v) + { + /// Conversion of DateTime to Date: throw off time component. + return DateTimeTransformImpl::execute( + arguments, result_type, input_rows_count); + } + else if constexpr ((std::is_same_v || std::is_same_v) + && std::is_same_v) + { + /// Conversion from Date/Date32 to DateTime. + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } else { using ColVecFrom = typename FromDataType::ColumnType; @@ -404,88 +489,6 @@ struct ConvertImpl } }; -/** Conversion of DateTime to Date: throw off time component. - */ -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -/** Conversion of DateTime to Date32: throw off time component. - */ -template -struct ConvertImpl - : DateTimeTransformImpl {}; - -/** Conversion of Date to DateTime: adding 00:00:00 time component. - */ -template -struct ToDateTimeImpl -{ - static constexpr auto name = "toDateTime"; - - static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (d > MAX_DATETIME_DAY_NUM) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Day number {} is out of bounds of type DateTime", d); - } - else if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) - { - if (d > MAX_DATETIME_DAY_NUM) - d = MAX_DATETIME_DAY_NUM; - } - return static_cast(time_zone.fromDayNum(DayNum(d))); - } - - static UInt32 execute(Int32 d, const DateLUTImpl & time_zone) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) - { - if (d < 0) - return 0; - else if (d > MAX_DATETIME_DAY_NUM) - d = MAX_DATETIME_DAY_NUM; - } - else if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (d < 0 || d > MAX_DATETIME_DAY_NUM) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type DateTime", d); - } - return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); - } - - static UInt32 execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) - { - return dt; - } - - static UInt32 execute(Int64 dt64, const DateLUTImpl & /*time_zone*/) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Ignore) - return static_cast(dt64); - else - { - if (dt64 < 0 || dt64 >= MAX_DATETIME_TIMESTAMP) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) - return dt64 < 0 ? 0 : std::numeric_limits::max(); - else - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type DateTime", dt64); - } - else - return static_cast(dt64); - } - } -}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; /// Implementation of toDate function. @@ -510,11 +513,6 @@ struct ToDateTransform32Or64 } }; -/** Conversion of Date32 to Date. - */ -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; template struct ToDateTransform32Or64Signed From 24a248859d97f7faf78a1dc98a056332cde97401 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 06:52:47 +0100 Subject: [PATCH 074/283] Remove unneeded file --- src/Compression/examples/CMakeLists.txt | 3 - .../cached_compressed_read_buffer.cpp | 79 ------------------- .../examples/compressed_buffer.cpp | 3 - 3 files changed, 85 deletions(-) delete mode 100644 src/Compression/examples/cached_compressed_read_buffer.cpp diff --git a/src/Compression/examples/CMakeLists.txt b/src/Compression/examples/CMakeLists.txt index 7bf68e8845e..86fa5c3f78d 100644 --- a/src/Compression/examples/CMakeLists.txt +++ b/src/Compression/examples/CMakeLists.txt @@ -1,5 +1,2 @@ clickhouse_add_executable (compressed_buffer compressed_buffer.cpp) target_link_libraries (compressed_buffer PRIVATE dbms) - -clickhouse_add_executable (cached_compressed_read_buffer cached_compressed_read_buffer.cpp) -target_link_libraries (cached_compressed_read_buffer PRIVATE dbms) diff --git a/src/Compression/examples/cached_compressed_read_buffer.cpp b/src/Compression/examples/cached_compressed_read_buffer.cpp deleted file mode 100644 index a8e14ac7271..00000000000 --- a/src/Compression/examples/cached_compressed_read_buffer.cpp +++ /dev/null @@ -1,79 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - - -int main(int argc, char ** argv) -{ - using namespace DB; - - if (argc < 2) - { - std::cerr << "Usage: program path\n"; - return 1; - } - - try - { - UncompressedCache cache("SLRU", 1024, 0.5); - std::string path = argv[1]; - - std::cerr << std::fixed << std::setprecision(3); - - size_t hits = 0; - size_t misses = 0; - - { - Stopwatch watch; - CachedCompressedReadBuffer in( - path, - [&]() - { - return createReadBufferFromFileBase(path, {}); - }, - &cache - ); - WriteBufferFromFile out("/dev/null"); - copyData(in, out); - - std::cerr << "Elapsed: " << watch.elapsedSeconds() << std::endl; - } - - cache.getStats(hits, misses); - std::cerr << "Hits: " << hits << ", misses: " << misses << std::endl; - - { - Stopwatch watch; - CachedCompressedReadBuffer in( - path, - [&]() - { - return createReadBufferFromFileBase(path, {}); - }, - &cache - ); - WriteBufferFromFile out("/dev/null"); - copyData(in, out); - - std::cerr << "Elapsed: " << watch.elapsedSeconds() << std::endl; - } - - cache.getStats(hits, misses); - std::cerr << "Hits: " << hits << ", misses: " << misses << std::endl; - } - catch (const Exception & e) - { - std::cerr << e.what() << ", " << e.displayText() << std::endl; - return 1; - } - - return 0; -} diff --git a/src/Compression/examples/compressed_buffer.cpp b/src/Compression/examples/compressed_buffer.cpp index 74646ff0f28..530f0938662 100644 --- a/src/Compression/examples/compressed_buffer.cpp +++ b/src/Compression/examples/compressed_buffer.cpp @@ -1,7 +1,4 @@ -#include - #include -#include #include #include From a231f7de9ae7b86c41204c774dda40c3fbe73ac6 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 06:55:09 +0100 Subject: [PATCH 075/283] Remove unneeded file --- src/Compression/examples/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Compression/examples/CMakeLists.txt b/src/Compression/examples/CMakeLists.txt index 86fa5c3f78d..a924075d0dc 100644 --- a/src/Compression/examples/CMakeLists.txt +++ b/src/Compression/examples/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (compressed_buffer compressed_buffer.cpp) -target_link_libraries (compressed_buffer PRIVATE dbms) +target_link_libraries (compressed_buffer PRIVATE clickhouse_common_io) From 69cab686e4e418c0e7cfa74dfea459671d14bc49 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 10:03:31 +0100 Subject: [PATCH 076/283] Fix build of examples --- src/Compression/examples/CMakeLists.txt | 2 +- src/Core/examples/CMakeLists.txt | 3 - src/Core/examples/mysql_protocol.cpp | 390 ------------------ src/Interpreters/examples/CMakeLists.txt | 22 +- src/Interpreters/examples/hash_map.cpp | 1 - src/Interpreters/examples/hash_map_lookup.cpp | 2 - src/Processors/CMakeLists.txt | 4 - src/Processors/examples/CMakeLists.txt | 4 - .../examples/comma_separated_streams.cpp | 117 ------ src/Processors/examples/test_in | 8 - src/Storages/MergeTree/CMakeLists.txt | 3 - .../MergeTree/examples/CMakeLists.txt | 2 - .../examples/wal_action_metadata.cpp | 61 --- src/Storages/examples/CMakeLists.txt | 8 - src/Storages/examples/active_parts.py | 41 -- .../examples/async_read_buffer_from_hdfs.cpp | 37 -- ...get_abandonable_lock_in_all_partitions.cpp | 71 ---- 17 files changed, 12 insertions(+), 764 deletions(-) delete mode 100644 src/Core/examples/mysql_protocol.cpp delete mode 100644 src/Processors/examples/CMakeLists.txt delete mode 100644 src/Processors/examples/comma_separated_streams.cpp delete mode 100644 src/Processors/examples/test_in delete mode 100644 src/Storages/MergeTree/examples/CMakeLists.txt delete mode 100644 src/Storages/MergeTree/examples/wal_action_metadata.cpp delete mode 100644 src/Storages/examples/active_parts.py delete mode 100644 src/Storages/examples/async_read_buffer_from_hdfs.cpp delete mode 100644 src/Storages/examples/get_abandonable_lock_in_all_partitions.cpp diff --git a/src/Compression/examples/CMakeLists.txt b/src/Compression/examples/CMakeLists.txt index a924075d0dc..a7cc6bebf42 100644 --- a/src/Compression/examples/CMakeLists.txt +++ b/src/Compression/examples/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable (compressed_buffer compressed_buffer.cpp) -target_link_libraries (compressed_buffer PRIVATE clickhouse_common_io) +target_link_libraries (compressed_buffer PRIVATE clickhouse_common_io clickhouse_compression) diff --git a/src/Core/examples/CMakeLists.txt b/src/Core/examples/CMakeLists.txt index 2326eada96d..f30ee25491f 100644 --- a/src/Core/examples/CMakeLists.txt +++ b/src/Core/examples/CMakeLists.txt @@ -6,6 +6,3 @@ target_link_libraries (field PRIVATE dbms) clickhouse_add_executable (string_ref_hash string_ref_hash.cpp) target_link_libraries (string_ref_hash PRIVATE clickhouse_common_io) - -clickhouse_add_executable (mysql_protocol mysql_protocol.cpp) -target_link_libraries (mysql_protocol PRIVATE dbms) diff --git a/src/Core/examples/mysql_protocol.cpp b/src/Core/examples/mysql_protocol.cpp deleted file mode 100644 index a6247418e87..00000000000 --- a/src/Core/examples/mysql_protocol.cpp +++ /dev/null @@ -1,390 +0,0 @@ -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -int main(int argc, char ** argv) -{ - using namespace DB; - using namespace MySQLProtocol; - using namespace MySQLProtocol::Generic; - using namespace MySQLProtocol::Authentication; - using namespace MySQLProtocol::ConnectionPhase; - using namespace MySQLProtocol::ProtocolText; - - - uint8_t server_sequence_id = 1; - uint8_t client_sequence_id = 1; - String user = "default"; - String password = "123"; - String database; - - UInt8 charset_utf8 = 33; - UInt32 max_packet_size = MAX_PACKET_LENGTH; - String mysql_native_password = "mysql_native_password"; - - UInt32 server_capability_flags = CLIENT_PROTOCOL_41 | CLIENT_SECURE_CONNECTION | CLIENT_PLUGIN_AUTH - | CLIENT_PLUGIN_AUTH_LENENC_CLIENT_DATA | CLIENT_CONNECT_WITH_DB | CLIENT_DEPRECATE_EOF; - - UInt32 client_capability_flags = CLIENT_PROTOCOL_41 | CLIENT_PLUGIN_AUTH | CLIENT_SECURE_CONNECTION; - - /// Handshake packet - { - /// 1. Greeting: - /// 1.1 Server writes greeting to client - std::string s0; - WriteBufferFromString out0(s0); - - Handshake server_handshake( - server_capability_flags, -1, "ClickHouse", "mysql_native_password", "aaaaaaaaaaaaaaaaaaaaa", CharacterSet::utf8_general_ci); - server_handshake.writePayload(out0, server_sequence_id); - - /// 1.2 Client reads the greeting - ReadBufferFromString in0(s0); - Handshake client_handshake; - client_handshake.readPayload(in0, client_sequence_id); - - /// Check packet - ASSERT(server_handshake.capability_flags == client_handshake.capability_flags) - ASSERT(server_handshake.status_flags == client_handshake.status_flags) - ASSERT(server_handshake.server_version == client_handshake.server_version) - ASSERT(server_handshake.protocol_version == client_handshake.protocol_version) - ASSERT(server_handshake.auth_plugin_data.substr(0, 20) == client_handshake.auth_plugin_data) - ASSERT(server_handshake.auth_plugin_name == client_handshake.auth_plugin_name) - - /// 2. Greeting Response: - std::string s1; - WriteBufferFromString out1(s1); - - /// 2.1 Client writes to server - Native41 native41(password, client_handshake.auth_plugin_data); - String auth_plugin_data = native41.getAuthPluginData(); - HandshakeResponse client_handshake_response( - client_capability_flags, max_packet_size, charset_utf8, user, database, auth_plugin_data, mysql_native_password); - client_handshake_response.writePayload(out1, client_sequence_id); - - /// 2.2 Server reads the response - ReadBufferFromString in1(s1); - HandshakeResponse server_handshake_response; - server_handshake_response.readPayload(in1, server_sequence_id); - - /// Check - ASSERT(server_handshake_response.capability_flags == client_handshake_response.capability_flags) - ASSERT(server_handshake_response.character_set == client_handshake_response.character_set) - ASSERT(server_handshake_response.username == client_handshake_response.username) - ASSERT(server_handshake_response.database == client_handshake_response.database) - ASSERT(server_handshake_response.auth_response == client_handshake_response.auth_response) - ASSERT(server_handshake_response.auth_plugin_name == client_handshake_response.auth_plugin_name) - } - - /// OK Packet - { - // 1. Server writes packet - std::string s0; - WriteBufferFromString out0(s0); - OKPacket server(0x00, server_capability_flags, 0, 0, 0, "", ""); - server.writePayload(out0, server_sequence_id); - - // 2. Client reads packet - ReadBufferFromString in0(s0); - ResponsePacket client(server_capability_flags); - client.readPayload(in0, client_sequence_id); - - // Check - ASSERT(client.getType() == PACKET_OK) - ASSERT(client.ok.header == server.header) - ASSERT(client.ok.status_flags == server.status_flags) - ASSERT(client.ok.capabilities == server.capabilities) - } - - /// ERR Packet - { - // 1. Server writes packet - std::string s0; - WriteBufferFromString out0(s0); - ERRPacket server(123, "12345", "This is the error message"); - server.writePayload(out0, server_sequence_id); - - // 2. Client reads packet - ReadBufferFromString in0(s0); - ResponsePacket client(server_capability_flags); - client.readPayload(in0, client_sequence_id); - - // Check - ASSERT(client.getType() == PACKET_ERR) - ASSERT(client.err.header == server.header) - ASSERT(client.err.error_code == server.error_code) - ASSERT(client.err.sql_state == server.sql_state) - ASSERT(client.err.error_message == server.error_message) - } - - /// EOF Packet - { - // 1. Server writes packet - std::string s0; - WriteBufferFromString out0(s0); - EOFPacket server(1, 1); - server.writePayload(out0, server_sequence_id); - - // 2. Client reads packet - ReadBufferFromString in0(s0); - ResponsePacket client(server_capability_flags); - client.readPayload(in0, client_sequence_id); - - // Check - ASSERT(client.getType() == PACKET_EOF) - ASSERT(client.eof.header == server.header) - ASSERT(client.eof.warnings == server.warnings) - ASSERT(client.eof.status_flags == server.status_flags) - } - - /// ColumnDefinition Packet - { - // 1. Server writes packet - std::string s0; - WriteBufferFromString out0(s0); - ColumnDefinition server("schema", "tbl", "org_tbl", "name", "org_name", 33, 0x00, MYSQL_TYPE_STRING, 0x00, 0x00); - server.writePayload(out0, server_sequence_id); - - // 2. Client reads packet - ReadBufferFromString in0(s0); - ColumnDefinition client; - client.readPayload(in0, client_sequence_id); - - // Check - ASSERT(client.column_type == server.column_type) - ASSERT(client.column_length == server.column_length) - ASSERT(client.next_length == server.next_length) - ASSERT(client.character_set == server.character_set) - ASSERT(client.decimals == server.decimals) - ASSERT(client.name == server.name) - ASSERT(client.org_name == server.org_name) - ASSERT(client.table == server.table) - ASSERT(client.org_table == server.org_table) - ASSERT(client.schema == server.schema) - } - - /// GTID sets tests. - { - struct Testcase - { - String name; - String sets; - String want; - }; - - Testcase cases[] = { - {"gtid-sets-without-whitespace", - "2c5adab4-d64a-11e5-82df-ac162d72dac0:1-247743812,9f58c169-d121-11e7-835b-ac162db9c048:1-56060985:56060987-56061175:56061177-" - "56061224:56061226-75201528:75201530-75201755:75201757-75201983:75201985-75407550:75407552-75407604:75407606-75407661:" - "75407663-87889848:87889850-87889935:87889937-87890042:87890044-88391955:88391957-88392125:88392127-88392245:88392247-" - "88755771:88755773-88755826:88755828-88755921:88755923-100279047:100279049-100279126:100279128-100279247:100279249-121672430:" - "121672432-121672503:121672505-121672524:121672526-122946019:122946021-122946291:122946293-122946469:122946471-134313284:" - "134313286-134313415:134313417-134313648:134313650-136492728:136492730-136492784:136492786-136492904:136492906-145582402:" - "145582404-145582439:145582441-145582463:145582465-147455222:147455224-147455262:147455264-147455277:147455279-149319049:" - "149319051-149319261:149319263-150635915,a6d83ff6-bfcf-11e7-8c93-246e96158550:1-126618302", - "2c5adab4-d64a-11e5-82df-ac162d72dac0:1-247743812,9f58c169-d121-11e7-835b-ac162db9c048:1-56060985:56060987-56061175:56061177-" - "56061224:56061226-75201528:75201530-75201755:75201757-75201983:75201985-75407550:75407552-75407604:75407606-75407661:" - "75407663-87889848:87889850-87889935:87889937-87890042:87890044-88391955:88391957-88392125:88392127-88392245:88392247-" - "88755771:88755773-88755826:88755828-88755921:88755923-100279047:100279049-100279126:100279128-100279247:100279249-121672430:" - "121672432-121672503:121672505-121672524:121672526-122946019:122946021-122946291:122946293-122946469:122946471-134313284:" - "134313286-134313415:134313417-134313648:134313650-136492728:136492730-136492784:136492786-136492904:136492906-145582402:" - "145582404-145582439:145582441-145582463:145582465-147455222:147455224-147455262:147455264-147455277:147455279-149319049:" - "149319051-149319261:149319263-150635915,a6d83ff6-bfcf-11e7-8c93-246e96158550:1-126618302"}, - - {"gtid-sets-with-whitespace", - "2c5adab4-d64a-11e5-82df-ac162d72dac0:1-247743812, 9f58c169-d121-11e7-835b-ac162db9c048:1-56060985:56060987-56061175:56061177", - "2c5adab4-d64a-11e5-82df-ac162d72dac0:1-247743812,9f58c169-d121-11e7-835b-ac162db9c048:1-56060985:56060987-56061175:56061177"}, - - {"gtid-sets-single", "2c5adab4-d64a-11e5-82df-ac162d72dac0:1-247743812", "2c5adab4-d64a-11e5-82df-ac162d72dac0:1-247743812"}}; - - for (auto & tc : cases) - { - GTIDSets gtid_sets; - gtid_sets.parse(tc.sets); - - String want = tc.want; - String got = gtid_sets.toString(); - ASSERT(want == got) - } - } - - { - struct Testcase - { - String name; - String gtid_sets; - String gtid_str; - String want; - }; - - Testcase cases[] = { - {"merge", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-2:4-7", - "10662d71-9d91-11ea-bbc2-0242ac110003:3", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-7"}, - - {"merge-front", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-2:5-7", - "10662d71-9d91-11ea-bbc2-0242ac110003:3", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-3:5-7"}, - - {"extend-interval", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-2:6-7", - "10662d71-9d91-11ea-bbc2-0242ac110003:4", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-2:4:6-7"}, - - {"extend-interval", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-2:4:7-9", - "10662d71-9d91-11ea-bbc2-0242ac110003:5", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-2:4-5:7-9"}, - - {"extend-interval", - "10662d71-9d91-11ea-bbc2-0242ac110003:6-7", - "10662d71-9d91-11ea-bbc2-0242ac110003:4", - "10662d71-9d91-11ea-bbc2-0242ac110003:4:6-7"}, - - {"extend-interval", - "10662d71-9d91-11ea-bbc2-0242ac110003:6-7", - "10662d71-9d91-11ea-bbc2-0242ac110003:9", - "10662d71-9d91-11ea-bbc2-0242ac110003:6-7:9"}, - - {"extend-interval", - "10662d71-9d91-11ea-bbc2-0242ac110003:6-7", - "20662d71-9d91-11ea-bbc2-0242ac110003:9", - "10662d71-9d91-11ea-bbc2-0242ac110003:6-7,20662d71-9d91-11ea-bbc2-0242ac110003:9"}, - - {"shrink-sequence", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-3:4-5:7", - "10662d71-9d91-11ea-bbc2-0242ac110003:6", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-7"}, - - {"shrink-sequence", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-3:4-5:10", - "10662d71-9d91-11ea-bbc2-0242ac110003:8", - "10662d71-9d91-11ea-bbc2-0242ac110003:1-5:8:10" - } - }; - - for (auto & tc : cases) - { - GTIDSets gtid_sets; - gtid_sets.parse(tc.gtid_sets); - ASSERT(tc.gtid_sets == gtid_sets.toString()) - - GTIDSets gtid_sets1; - gtid_sets1.parse(tc.gtid_str); - - GTID gtid; - gtid.uuid = gtid_sets1.sets[0].uuid; - gtid.seq_no = gtid_sets1.sets[0].intervals[0].start; - gtid_sets.update(gtid); - - String want = tc.want; - String got = gtid_sets.toString(); - ASSERT(want == got) - } - } - - { - /// mysql_protocol --host=172.17.0.3 --user=root --password=123 --db=sbtest - try - { - boost::program_options::options_description desc("Allowed options"); - desc.add_options()("host", boost::program_options::value()->required(), "master host")( - "port", boost::program_options::value()->default_value(3306), "master port")( - "user", boost::program_options::value()->default_value("root"), "master user")( - "password", boost::program_options::value()->required(), "master password")( - "gtid", boost::program_options::value()->default_value(""), "executed GTID sets")( - "db", boost::program_options::value()->required(), "replicate do db")( - "binlog_checksum", boost::program_options::value()->default_value("CRC32"), "master binlog_checksum"); - - boost::program_options::variables_map options; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); - if (argc == 0) - { - return 1; - } - - auto host = options.at("host").as(); - auto port = options.at("port").as(); - auto master_user = options.at("user").as(); - auto master_password = options.at("password").as(); - auto gtid_sets = options.at("gtid").as(); - auto replicate_db = options.at("db").as(); - auto binlog_checksum = options.at("binlog_checksum").as(); - - std::cerr << "Master Host: " << host << ", Port: " << port << ", User: " << master_user << ", Password: " << master_password - << ", Replicate DB: " << replicate_db << ", GTID: " << gtid_sets << std::endl; - - UInt32 slave_id = 9004; - MySQLClient slave(host, port, master_user, master_password); - - /// Connect to the master. - slave.connect(); - slave.startBinlogDumpGTID(slave_id, replicate_db, {}, gtid_sets, binlog_checksum); - - WriteBufferFromOStream cerr(std::cerr); - - /// Read one binlog event on by one. - while (true) - { - auto event = slave.readOneBinlogEvent(); - switch (event->type()) - { - case MYSQL_QUERY_EVENT: { - auto binlog_event = std::static_pointer_cast(event); - binlog_event->dump(cerr); - - Position pos = slave.getPosition(); - pos.dump(cerr); - break; - } - case MYSQL_WRITE_ROWS_EVENT: { - auto binlog_event = std::static_pointer_cast(event); - binlog_event->dump(cerr); - - Position pos = slave.getPosition(); - pos.dump(cerr); - break; - } - case MYSQL_UPDATE_ROWS_EVENT: { - auto binlog_event = std::static_pointer_cast(event); - binlog_event->dump(cerr); - - Position pos = slave.getPosition(); - pos.dump(cerr); - break; - } - case MYSQL_DELETE_ROWS_EVENT: { - auto binlog_event = std::static_pointer_cast(event); - binlog_event->dump(cerr); - - Position pos = slave.getPosition(); - pos.dump(cerr); - break; - } - default: - if (event->header.type != MySQLReplication::EventType::HEARTBEAT_EVENT) - { - event->dump(cerr); - } - break; - } - } - } - catch (const Exception & ex) - { - std::cerr << "Error: " << ex.message() << std::endl; - return 1; - } - } -} diff --git a/src/Interpreters/examples/CMakeLists.txt b/src/Interpreters/examples/CMakeLists.txt index 11c219ff64e..8bb7f9eeb98 100644 --- a/src/Interpreters/examples/CMakeLists.txt +++ b/src/Interpreters/examples/CMakeLists.txt @@ -1,35 +1,35 @@ clickhouse_add_executable (hash_map hash_map.cpp) -target_link_libraries (hash_map PRIVATE dbms ch_contrib::sparsehash) +target_link_libraries (hash_map PRIVATE dbms clickhouse_functions ch_contrib::sparsehash) clickhouse_add_executable (hash_map_lookup hash_map_lookup.cpp) -target_link_libraries (hash_map_lookup PRIVATE dbms) +target_link_libraries (hash_map_lookup PRIVATE clickhouse_common_io clickhouse_compression) clickhouse_add_executable (hash_map3 hash_map3.cpp) -target_link_libraries (hash_map3 PRIVATE dbms ch_contrib::farmhash ch_contrib::metrohash) +target_link_libraries (hash_map3 PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::farmhash ch_contrib::metrohash) clickhouse_add_executable (hash_map_string hash_map_string.cpp) -target_link_libraries (hash_map_string PRIVATE dbms ch_contrib::sparsehash) +target_link_libraries (hash_map_string PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::sparsehash) clickhouse_add_executable (hash_map_string_2 hash_map_string_2.cpp) -target_link_libraries (hash_map_string_2 PRIVATE dbms) +target_link_libraries (hash_map_string_2 PRIVATE clickhouse_common_io clickhouse_compression) clickhouse_add_executable (hash_map_string_3 hash_map_string_3.cpp) -target_link_libraries (hash_map_string_3 PRIVATE dbms ch_contrib::farmhash ch_contrib::metrohash) +target_link_libraries (hash_map_string_3 PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::farmhash ch_contrib::metrohash) clickhouse_add_executable (hash_map_string_small hash_map_string_small.cpp) -target_link_libraries (hash_map_string_small PRIVATE dbms ch_contrib::sparsehash) +target_link_libraries (hash_map_string_small PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::sparsehash) clickhouse_add_executable (string_hash_map string_hash_map.cpp) -target_link_libraries (string_hash_map PRIVATE dbms ch_contrib::sparsehash) +target_link_libraries (string_hash_map PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::sparsehash) clickhouse_add_executable (string_hash_map_aggregation string_hash_map.cpp) -target_link_libraries (string_hash_map_aggregation PRIVATE dbms) +target_link_libraries (string_hash_map_aggregation PRIVATE clickhouse_common_io clickhouse_compression) clickhouse_add_executable (string_hash_set string_hash_set.cpp) -target_link_libraries (string_hash_set PRIVATE dbms) +target_link_libraries (string_hash_set PRIVATE clickhouse_common_io clickhouse_compression) clickhouse_add_executable (two_level_hash_map two_level_hash_map.cpp) -target_link_libraries (two_level_hash_map PRIVATE dbms ch_contrib::sparsehash) +target_link_libraries (two_level_hash_map PRIVATE clickhouse_common_io clickhouse_compression ch_contrib::sparsehash) clickhouse_add_executable (jit_example jit_example.cpp) target_link_libraries (jit_example PRIVATE dbms) diff --git a/src/Interpreters/examples/hash_map.cpp b/src/Interpreters/examples/hash_map.cpp index b55f174678e..0a91d00809f 100644 --- a/src/Interpreters/examples/hash_map.cpp +++ b/src/Interpreters/examples/hash_map.cpp @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Interpreters/examples/hash_map_lookup.cpp b/src/Interpreters/examples/hash_map_lookup.cpp index fd6b231cf73..829a234c537 100644 --- a/src/Interpreters/examples/hash_map_lookup.cpp +++ b/src/Interpreters/examples/hash_map_lookup.cpp @@ -8,10 +8,8 @@ #define DBMS_HASH_MAP_DEBUG_RESIZES #include -#include #include #include -#include #include #include diff --git a/src/Processors/CMakeLists.txt b/src/Processors/CMakeLists.txt index 7e965188b4c..e69de29bb2d 100644 --- a/src/Processors/CMakeLists.txt +++ b/src/Processors/CMakeLists.txt @@ -1,4 +0,0 @@ -if (ENABLE_EXAMPLES) - add_subdirectory(examples) -endif () - diff --git a/src/Processors/examples/CMakeLists.txt b/src/Processors/examples/CMakeLists.txt deleted file mode 100644 index 5d43a0d7d08..00000000000 --- a/src/Processors/examples/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -if (TARGET ch_contrib::hivemetastore) - clickhouse_add_executable (comma_separated_streams comma_separated_streams.cpp) - target_link_libraries (comma_separated_streams PRIVATE dbms) -endif() diff --git a/src/Processors/examples/comma_separated_streams.cpp b/src/Processors/examples/comma_separated_streams.cpp deleted file mode 100644 index 2ec5564f346..00000000000 --- a/src/Processors/examples/comma_separated_streams.cpp +++ /dev/null @@ -1,117 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace DB; - -int main() -try -{ - Block sample; - { - // a - ColumnWithTypeAndName col; - col.name = "a"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - // b - ColumnWithTypeAndName col; - col.name = "b"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - // c - ColumnWithTypeAndName col; - col.name = "c"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - // d - ColumnWithTypeAndName col; - col.name = "d"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - // e - ColumnWithTypeAndName col; - col.name = "e"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - // f - ColumnWithTypeAndName col; - col.name = "f"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - // g - ColumnWithTypeAndName col; - col.name = "g"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - { - // h - ColumnWithTypeAndName col; - col.name = "h"; - col.type = std::make_shared(); - sample.insert(std::move(col)); - } - - - ReadBufferFromFile in_buf("test_in"); - WriteBufferFromFile out_buf("test_out"); - - FormatSettings format_settings; - format_settings.with_names_use_header = true; - format_settings.skip_unknown_fields = true; - format_settings.csv.delimiter = '\x01'; - format_settings.hive_text.input_field_names = - { - "d", - "e", - "f", - "a", - "b", - "c", - "g", - "h", - "i", - "j", - }; - - RowInputFormatParams in_params{DEFAULT_INSERT_BLOCK_SIZE}; - InputFormatPtr input_format = std::make_shared(sample, in_buf, in_params, format_settings); - auto pipeline = QueryPipeline(std::move(input_format)); - auto reader = std::make_unique(pipeline); - - OutputFormatPtr output_format = std::make_shared(out_buf, sample, true, true, format_settings); - Block res; - while (reader->pull(res)) - { - output_format->write(res); - } - return 0; -} -catch (...) -{ - std::cerr << getCurrentExceptionMessage(true) << '\n'; - return 1; -} diff --git a/src/Processors/examples/test_in b/src/Processors/examples/test_in deleted file mode 100644 index c7df97a26a6..00000000000 --- a/src/Processors/examples/test_in +++ /dev/null @@ -1,8 +0,0 @@ -2021-09-14JPall20.0200 -2021-09-14CIall20.0100 -2021-09-14JMall40.25411 -2021-09-14MMall310.19354838709677422766 -2021-09-14TZAndroid30.3333333333333333311 -2021-09-14SGall80.25412 -2021-09-14PYall11.0001 -2021-09-14MXall10.0100 diff --git a/src/Storages/MergeTree/CMakeLists.txt b/src/Storages/MergeTree/CMakeLists.txt index 390835f17ae..e69de29bb2d 100644 --- a/src/Storages/MergeTree/CMakeLists.txt +++ b/src/Storages/MergeTree/CMakeLists.txt @@ -1,3 +0,0 @@ -if(ENABLE_EXAMPLES) - add_subdirectory(examples) -endif() diff --git a/src/Storages/MergeTree/examples/CMakeLists.txt b/src/Storages/MergeTree/examples/CMakeLists.txt deleted file mode 100644 index 25bba7ae0b4..00000000000 --- a/src/Storages/MergeTree/examples/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -clickhouse_add_executable (wal_action_metadata wal_action_metadata.cpp) -target_link_libraries (wal_action_metadata PRIVATE dbms) diff --git a/src/Storages/MergeTree/examples/wal_action_metadata.cpp b/src/Storages/MergeTree/examples/wal_action_metadata.cpp deleted file mode 100644 index 03c38c7a186..00000000000 --- a/src/Storages/MergeTree/examples/wal_action_metadata.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include - -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int UNKNOWN_FORMAT_VERSION; -} -} - -int main(int, char **) -{ - try - { - { - std::cout << "test: dummy test" << std::endl; - - DB::MergeTreeWriteAheadLog::ActionMetadata metadata_out; - DB::MemoryWriteBuffer buf{}; - - metadata_out.write(buf); - buf.finalize(); - - metadata_out.read(*buf.tryGetReadBuffer()); - } - - { - std::cout << "test: min compatibility" << std::endl; - - DB::MergeTreeWriteAheadLog::ActionMetadata metadata_out; - metadata_out.min_compatible_version = DB::MergeTreeWriteAheadLog::WAL_VERSION + 1; - DB::MemoryWriteBuffer buf{}; - - metadata_out.write(buf); - buf.finalize(); - - try - { - metadata_out.read(*buf.tryGetReadBuffer()); - } - catch (const DB::Exception & e) - { - if (e.code() != DB::ErrorCodes::UNKNOWN_FORMAT_VERSION) - { - std::cerr << "Expected UNKNOWN_FORMAT_VERSION exception but got: " - << e.what() << ", " << e.displayText() << std::endl; - } - } - } - } - catch (const DB::Exception & e) - { - std::cerr << e.what() << ", " << e.displayText() << std::endl; - return 1; - } - - return 0; -} diff --git a/src/Storages/examples/CMakeLists.txt b/src/Storages/examples/CMakeLists.txt index 225337d8ec8..cddfc9404d4 100644 --- a/src/Storages/examples/CMakeLists.txt +++ b/src/Storages/examples/CMakeLists.txt @@ -6,11 +6,3 @@ target_link_libraries (merge_selector2 PRIVATE dbms) clickhouse_add_executable (get_current_inserts_in_replicated get_current_inserts_in_replicated.cpp) target_link_libraries (get_current_inserts_in_replicated PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper string_utils) - -clickhouse_add_executable (get_abandonable_lock_in_all_partitions get_abandonable_lock_in_all_partitions.cpp) -target_link_libraries (get_abandonable_lock_in_all_partitions PRIVATE dbms clickhouse_common_config clickhouse_common_zookeeper) - -if (TARGET ch_contrib::hdfs) - clickhouse_add_executable (async_read_buffer_from_hdfs async_read_buffer_from_hdfs.cpp) - target_link_libraries (async_read_buffer_from_hdfs PRIVATE dbms ch_contrib::hdfs) -endif () diff --git a/src/Storages/examples/active_parts.py b/src/Storages/examples/active_parts.py deleted file mode 100644 index d82c5ca96bf..00000000000 --- a/src/Storages/examples/active_parts.py +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/bin/python -# coding=UTF-8 - -# Displays a list of active parts - parts that are not overlapped by any other part. -# Usage: `ls /var/lib/clickhouse/data/merge/visits | active_parts.py` - -import sys -import re - -parts = {} -for s in sys.stdin.read().split(): - m = re.match( - "^([0-9]{6})[0-9]{2}_([0-9]{6})[0-9]{2}_([0-9]+)_([0-9]+)_([0-9]+)$", s - ) - if m == None: - continue - m1 = m.group(1) - m2 = m.group(2) - i1 = int(m.group(3)) - i2 = int(m.group(4)) - l = int(m.group(5)) - if m1 != m2: - raise Exception("not in single month: " + s) - if m1 not in parts: - parts[m1] = [] - parts[m1].append((i1, i2, l, s)) - -for m, ps in sorted(parts.items()): - ps.sort(key=lambda i1_i2_l_s: (i1_i2_l_s[0], -i1_i2_l_s[1], -i1_i2_l_s[2])) - (x2, y2, l2, s2) = (-1, -1, -1, -1) - for x1, y1, l1, s1 in ps: - if x1 >= x2 and y1 <= y2 and l1 < l2 and (x1, y1) != (x2, y2): # 2 contains 1 - pass - elif x1 > y2: # 1 is to the right of 2 - if x1 != y2 + 1 and y2 != -1: - print() # to see the missing numbers - (x2, y2, l2, s2) = (x1, y1, l1, s1) - print(s1) - else: - raise Exception("invalid parts intersection: " + s1 + " and " + s2) - print() diff --git a/src/Storages/examples/async_read_buffer_from_hdfs.cpp b/src/Storages/examples/async_read_buffer_from_hdfs.cpp deleted file mode 100644 index 4f6aed8ef65..00000000000 --- a/src/Storages/examples/async_read_buffer_from_hdfs.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -int main() -{ - using namespace DB; - namespace fs = std::filesystem; - - String config_path = "/path/to/config/file"; - ConfigProcessor config_processor(config_path, false, true); - config_processor.setConfigPath(fs::path(config_path).parent_path()); - auto loaded_config = config_processor.loadConfig(false); - auto * config = loaded_config.configuration.duplicate(); - - String hdfs_namenode_url = "hdfs://namenode:port/"; - String path = "/path/to/hdfs/file"; - ReadSettings settings = {}; - auto in = std::make_unique(hdfs_namenode_url, path, *config, settings); - auto & reader = getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); - AsynchronousReadBufferFromHDFS buf(reader, {}, std::move(in)); - - String output; - WriteBufferFromString out(output); - copyData(buf, out); - std::cout << "output:" << output << std::endl; - return 0; -} diff --git a/src/Storages/examples/get_abandonable_lock_in_all_partitions.cpp b/src/Storages/examples/get_abandonable_lock_in_all_partitions.cpp deleted file mode 100644 index 4607d68f02d..00000000000 --- a/src/Storages/examples/get_abandonable_lock_in_all_partitions.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include -#include -#include -#include -#include - -#include - -#include - - -using namespace DB; - -/// This test is useful for assessing the performance of acquiring block numbers in all partitions (and there -/// can be ~1000 of them). This is needed when creating a mutation entry for a ReplicatedMergeTree table. -int main(int argc, char ** argv) -try -{ - if (argc != 3) - { - std::cerr << "usage: " << argv[0] << " " << std::endl; - return 3; - } - - ConfigProcessor processor(argv[1], false, true); - auto config = processor.loadConfig().configuration; - String root_path = argv[2]; - - auto zk = zkutil::ZooKeeper::createWithoutKillingPreviousSessions(*config, zkutil::getZooKeeperConfigName(*config), nullptr); - - String temp_path = root_path + "/temp"; - String blocks_path = root_path + "/block_numbers"; - - Stopwatch total_timer; - Stopwatch timer; - - EphemeralLocksInAllPartitions locks(blocks_path, "test_lock-", temp_path, *zk); - - std::cerr << "Locked, elapsed: " << timer.elapsedSeconds() << std::endl; - for (const auto & lock : locks.getLocks()) - std::cout << lock.partition_id << " " << lock.number << std::endl; - timer.restart(); - - locks.unlock(); - std::cerr << "Abandoned, elapsed: " << timer.elapsedSeconds() << std::endl; - - std::cerr << "Total elapsed: " << total_timer.elapsedSeconds() << std::endl; - - return 0; -} -catch (const Exception & e) -{ - std::cerr << e.what() << ", " << e.displayText() << ": " << std::endl - << e.getStackTraceString() << std::endl; - throw; -} -catch (Poco::Exception & e) -{ - std::cerr << "Exception: " << e.displayText() << std::endl; - throw; -} -catch (std::exception & e) -{ - std::cerr << "std::exception: " << e.what() << std::endl; - throw; -} -catch (...) -{ - std::cerr << "Some exception" << std::endl; - throw; -} From adf056b54ad35638245c1a5c30bc57c4cb17fc6c Mon Sep 17 00:00:00 2001 From: Dan Wu Date: Sun, 10 Mar 2024 20:42:41 +0800 Subject: [PATCH 077/283] Update check-large-objects.sh to be language neutral The previous implementation uses `total` keyword to filter out lines that contains the total size of objects under a directory. But when the OS uses other language, it fails to filter out that line, so the script would fail. This implementation changes the script to match only lines that contains object file by their extension. This implementation would be language neutral. --- utils/check-style/check-large-objects.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh index 5c1276e5732..04f4e0c3171 100755 --- a/utils/check-style/check-large-objects.sh +++ b/utils/check-style/check-large-objects.sh @@ -12,7 +12,7 @@ TU_EXCLUDES=( Aggregator ) -if find $1 -name '*.o' | xargs wc -c | grep -v total | sort -rn | awk '{ if ($1 > 50000000) print }' \ +if find $1 -name '*.o' | xargs wc -c | grep --regexp='.o$' | sort -rn | awk '{ if ($1 > 50000000) print }' \ | grep -v -f <(printf "%s\n" "${TU_EXCLUDES[@]}") then echo "^ It's not allowed to have so large translation units." From 0b63cb237a5a72c96bbc3d4cf52ab70a5d2ad2aa Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 10 Mar 2024 12:05:32 +0100 Subject: [PATCH 078/283] Fix --- src/Storages/System/StorageSystemDisks.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index f67d4f7acd0..eecc889f86b 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -73,9 +73,9 @@ Pipe StorageSystemDisks::read( col_unreserved->insert(disk_ptr->getUnreservedSpace().value_or(std::numeric_limits::max())); col_keep->insert(disk_ptr->getKeepingFreeSpace()); auto data_source_description = disk_ptr->getDataSourceDescription(); - col_type->insert(data_source_description.type); - col_object_storage_type->insert(data_source_description.object_storage_type); - col_metadata_type->insert(data_source_description.metadata_type); + col_type->insert(magic_enum::enum_name(data_source_description.type)); + col_object_storage_type->insert(magic_enum::enum_name(data_source_description.object_storage_type)); + col_metadata_type->insert(magic_enum::enum_name(data_source_description.metadata_type)); col_is_encrypted->insert(data_source_description.is_encrypted); col_is_read_only->insert(disk_ptr->isReadOnly()); col_is_write_once->insert(disk_ptr->isWriteOnce()); From d2b7fb03b12c5ce3ec5a377f3872483639e8c4fe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 10 Mar 2024 23:36:03 +0100 Subject: [PATCH 079/283] Fix localization in check-large-objects --- utils/check-style/check-large-objects.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh index 4eb9190512f..3e2a385bdd0 100755 --- a/utils/check-style/check-large-objects.sh +++ b/utils/check-style/check-large-objects.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +export LC_ALL=C # The "total" should be printed without localization + # Check that there are no new translation units compiled to an object file larger than a certain size. TU_EXCLUDES=( From fc9efeddf77dc76981ccbad263566911994d33a2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 11 Mar 2024 01:42:44 +0300 Subject: [PATCH 080/283] Update check-large-objects.sh --- utils/check-style/check-large-objects.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh index 04f4e0c3171..5faacae84b9 100755 --- a/utils/check-style/check-large-objects.sh +++ b/utils/check-style/check-large-objects.sh @@ -12,7 +12,7 @@ TU_EXCLUDES=( Aggregator ) -if find $1 -name '*.o' | xargs wc -c | grep --regexp='.o$' | sort -rn | awk '{ if ($1 > 50000000) print }' \ +if find $1 -name '*.o' | xargs wc -c | grep --regexp='\.o$' | sort -rn | awk '{ if ($1 > 50000000) print }' \ | grep -v -f <(printf "%s\n" "${TU_EXCLUDES[@]}") then echo "^ It's not allowed to have so large translation units." From b354d07b829a94a43cf6f3867585efac389088c1 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 11 Mar 2024 14:59:22 +0800 Subject: [PATCH 081/283] remove break --- src/Processors/Transforms/FilterTransform.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Processors/Transforms/FilterTransform.cpp b/src/Processors/Transforms/FilterTransform.cpp index 0f2509c7510..b3be9246f43 100644 --- a/src/Processors/Transforms/FilterTransform.cpp +++ b/src/Processors/Transforms/FilterTransform.cpp @@ -337,7 +337,6 @@ void FilterTransform::doTransform(Chunk & chunk) min_size_in_memory = size_in_memory; first_non_constant_column = i; } - break; } } (void)min_size_in_memory; /// Suppress error of clang-analyzer-deadcode.DeadStores From 1c82f8707d76821cbe006ed3f9be84822677cd9b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 11 Mar 2024 08:38:03 +0100 Subject: [PATCH 082/283] Fix tidy --- src/Functions/FunctionsConversion.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index d2d72558500..94fd960a99e 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1056,7 +1056,7 @@ struct ConvertImpl, DataTypeNumber, Name, Con } }; -static inline ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) +inline ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) { ColumnUInt8::MutablePtr null_map = nullptr; if (const auto * col_null = checkAndGetColumn(col.get())) @@ -1984,7 +1984,7 @@ struct NameParseDateTimeBestEffortOrZero; struct NameParseDateTimeBestEffortOrNull; template -static inline bool isDateTime64(const ColumnsWithTypeAndName & arguments) +inline bool isDateTime64(const ColumnsWithTypeAndName & arguments) { if constexpr (std::is_same_v) return true; @@ -3391,7 +3391,7 @@ arguments, result_type, input_rows_count); \ case IntervalKind::Kind::INTERVAL_KIND: \ return createFunctionAdaptor(FunctionConvert::create(), from_type); - static WrapperType createIntervalWrapper(const DataTypePtr & from_type, IntervalKind kind) + static WrapperType createIntervalWrapper(const DataTypePtr & from_type, IntervalKind::Kind kind) { switch (kind) { @@ -3994,7 +3994,7 @@ arguments, result_type, input_rows_count); \ { return [is_nullable = to_type->hasNullableSubcolumns()] (ColumnsWithTypeAndName & arguments, const DataTypePtr & , const ColumnNullable * , size_t) -> ColumnPtr { - auto & column_object = assert_cast(*arguments.front().column); + const auto & column_object = assert_cast(*arguments.front().column); auto res = ColumnObject::create(is_nullable); for (size_t i = 0; i < column_object.size(); i++) res->insert(column_object[i]); From 61c3d917ae82bb6ae14cea3a4a7c36c19f6d3b22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 11 Mar 2024 10:33:09 +0000 Subject: [PATCH 083/283] Use `boost::algorithm::join` --- src/Common/FunctionDocumentation.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/Common/FunctionDocumentation.cpp b/src/Common/FunctionDocumentation.cpp index 0dc5b48f9d1..7b554539a4f 100644 --- a/src/Common/FunctionDocumentation.cpp +++ b/src/Common/FunctionDocumentation.cpp @@ -1,5 +1,7 @@ #include +#include + namespace DB { @@ -31,15 +33,7 @@ std::string FunctionDocumentation::examplesAsString() const std::string FunctionDocumentation::categoriesAsString() const { - if (categories.empty()) - return ""; - - auto it = categories.begin(); - std::string res = *it; - ++it; - for (; it != categories.end(); ++it) - res += ", " + *it; - return res; + return boost::algorithm::join(categories, ", "); } } From ecb11005e34948a365555da1e2271e9da31d5074 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 11 Mar 2024 10:56:26 +0000 Subject: [PATCH 084/283] List clang-tidy checks as list instead of a string The string representation will be deprecated at some point: https://reviews.llvm.org/D147876 --- .clang-tidy | 195 ++++++++++++++++++++++++++-------------------------- 1 file changed, 97 insertions(+), 98 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 0dacf813c7e..4aeb38ca409 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -5,128 +5,127 @@ # a) the new check is not controversial (this includes many checks in readability-* and google-*) or # b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*). -# TODO: Once clang(-tidy) 17 is the minimum, we can convert this list to YAML -# See https://releases.llvm.org/17.0.1/tools/clang/tools/extra/docs/ReleaseNotes.html#improvements-to-clang-tidy - # TODO Let clang-tidy check headers in further directories # --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$' HeaderFilterRegex: '^.*/(base|programs|utils)/.*(h|hpp)$' -Checks: '*, - -abseil-*, +Checks: [ + '*', - -altera-*, + '-abseil-*', - -android-*, + '-altera-*', - -bugprone-assignment-in-if-condition, - -bugprone-branch-clone, - -bugprone-easily-swappable-parameters, - -bugprone-exception-escape, - -bugprone-implicit-widening-of-multiplication-result, - -bugprone-narrowing-conversions, - -bugprone-not-null-terminated-result, - -bugprone-reserved-identifier, # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged - -bugprone-unchecked-optional-access, + '-android-*', - -cert-dcl16-c, - -cert-dcl37-c, - -cert-dcl51-cpp, - -cert-err58-cpp, - -cert-msc32-c, - -cert-msc51-cpp, - -cert-oop54-cpp, - -cert-oop57-cpp, + '-bugprone-assignment-in-if-condition', + '-bugprone-branch-clone', + '-bugprone-easily-swappable-parameters', + '-bugprone-exception-escape', + '-bugprone-implicit-widening-of-multiplication-result', + '-bugprone-narrowing-conversions', + '-bugprone-not-null-terminated-result', + '-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged + '-bugprone-unchecked-optional-access', - -clang-analyzer-unix.Malloc, + '-cert-dcl16-c', + '-cert-dcl37-c', + '-cert-dcl51-cpp', + '-cert-err58-cpp', + '-cert-msc32-c', + '-cert-msc51-cpp', + '-cert-oop54-cpp', + '-cert-oop57-cpp', - -cppcoreguidelines-*, # impractical in a codebase as large as ClickHouse, also slow + '-clang-analyzer-unix.Malloc', - -darwin-*, + '-cppcoreguidelines-*', # impractical in a codebase as large as ClickHouse, also slow - -fuchsia-*, + '-darwin-*', - -google-build-using-namespace, - -google-readability-braces-around-statements, - -google-readability-casting, - -google-readability-function-size, - -google-readability-namespace-comments, - -google-readability-todo, + '-fuchsia-*', - -hicpp-avoid-c-arrays, - -hicpp-avoid-goto, - -hicpp-braces-around-statements, - -hicpp-explicit-conversions, - -hicpp-function-size, - -hicpp-member-init, - -hicpp-move-const-arg, - -hicpp-multiway-paths-covered, - -hicpp-named-parameter, - -hicpp-no-array-decay, - -hicpp-no-assembler, - -hicpp-no-malloc, - -hicpp-signed-bitwise, - -hicpp-special-member-functions, - -hicpp-uppercase-literal-suffix, - -hicpp-use-auto, - -hicpp-use-emplace, - -hicpp-vararg, + '-google-build-using-namespace', + '-google-readability-braces-around-statements', + '-google-readability-casting', + '-google-readability-function-size', + '-google-readability-namespace-comments', + '-google-readability-todo', - -linuxkernel-*, + '-hicpp-avoid-c-arrays', + '-hicpp-avoid-goto', + '-hicpp-braces-around-statements', + '-hicpp-explicit-conversions', + '-hicpp-function-size', + '-hicpp-member-init', + '-hicpp-move-const-arg', + '-hicpp-multiway-paths-covered', + '-hicpp-named-parameter', + '-hicpp-no-array-decay', + '-hicpp-no-assembler', + '-hicpp-no-malloc', + '-hicpp-signed-bitwise', + '-hicpp-special-member-functions', + '-hicpp-uppercase-literal-suffix', + '-hicpp-use-auto', + '-hicpp-use-emplace', + '-hicpp-vararg', - -llvm-*, + '-linuxkernel-*', - -llvmlibc-*, + '-llvm-*', - -openmp-*, + '-llvmlibc-*', - -misc-const-correctness, - -misc-include-cleaner, # useful but far too many occurrences - -misc-no-recursion, - -misc-non-private-member-variables-in-classes, - -misc-confusable-identifiers, # useful but slooow - -misc-use-anonymous-namespace, + '-openmp-*', - -modernize-avoid-c-arrays, - -modernize-concat-nested-namespaces, - -modernize-macro-to-enum, - -modernize-pass-by-value, - -modernize-return-braced-init-list, - -modernize-use-auto, - -modernize-use-default-member-init, - -modernize-use-emplace, - -modernize-use-nodiscard, - -modernize-use-override, - -modernize-use-trailing-return-type, + '-misc-const-correctness', + '-misc-include-cleaner', # useful but far too many occurrences + '-misc-no-recursion', + '-misc-non-private-member-variables-in-classes', + '-misc-confusable-identifiers', # useful but slooo + '-misc-use-anonymous-namespace', - -performance-inefficient-string-concatenation, - -performance-no-int-to-ptr, - -performance-avoid-endl, - -performance-unnecessary-value-param, + '-modernize-avoid-c-arrays', + '-modernize-concat-nested-namespaces', + '-modernize-macro-to-enum', + '-modernize-pass-by-value', + '-modernize-return-braced-init-list', + '-modernize-use-auto', + '-modernize-use-default-member-init', + '-modernize-use-emplace', + '-modernize-use-nodiscard', + '-modernize-use-override', + '-modernize-use-trailing-return-type', - -portability-simd-intrinsics, + '-performance-inefficient-string-concatenation', + '-performance-no-int-to-ptr', + '-performance-avoid-endl', + '-performance-unnecessary-value-param', - -readability-avoid-unconditional-preprocessor-if, - -readability-braces-around-statements, - -readability-convert-member-functions-to-static, - -readability-else-after-return, - -readability-function-cognitive-complexity, - -readability-function-size, - -readability-identifier-length, - -readability-identifier-naming, # useful but too slow - -readability-implicit-bool-conversion, - -readability-isolate-declaration, - -readability-magic-numbers, - -readability-named-parameter, - -readability-redundant-declaration, - -readability-simplify-boolean-expr, - -readability-static-accessed-through-instance, - -readability-suspicious-call-argument, - -readability-uppercase-literal-suffix, - -readability-use-anyofallof, + '-portability-simd-intrinsics', - -zircon-*, -' + '-readability-avoid-unconditional-preprocessor-if', + '-readability-braces-around-statements', + '-readability-convert-member-functions-to-static', + '-readability-else-after-return', + '-readability-function-cognitive-complexity', + '-readability-function-size', + '-readability-identifier-length', + '-readability-identifier-naming', # useful but too slow + '-readability-implicit-bool-conversion', + '-readability-isolate-declaration', + '-readability-magic-numbers', + '-readability-named-parameter', + '-readability-redundant-declaration', + '-readability-simplify-boolean-expr', + '-readability-static-accessed-through-instance', + '-readability-suspicious-call-argument', + '-readability-uppercase-literal-suffix', + '-readability-use-anyofallof', + + '-zircon-*' +] WarningsAsErrors: '*' From e5e84419aff0f559bc545737bfdc0518a732f7ff Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 10 Mar 2024 14:29:18 +0000 Subject: [PATCH 085/283] Fix clang-tidy-s --- contrib/libmetrohash/src/metrohash128.h | 3 +++ src/Access/AccessControl.h | 10 +++++----- src/Access/IAccessStorage.cpp | 2 +- src/Access/IAccessStorage.h | 2 +- src/Common/Arena.h | 4 +--- src/Common/DNSResolver.cpp | 2 +- src/Common/DNSResolver.h | 2 +- src/Common/DateLUTImpl.h | 2 +- src/Common/MultiVersion.h | 4 ++-- src/Common/PODArray.h | 6 +++--- src/Common/SipHash.h | 2 +- src/Common/TransactionID.h | 2 +- src/Common/ZooKeeper/IKeeper.cpp | 8 ++++---- src/Common/ZooKeeper/IKeeper.h | 16 ++++++++-------- src/Common/logger_useful.h | 16 ++++++++-------- src/Core/PostgreSQL/insertPostgreSQLValue.cpp | 4 ++-- src/Core/PostgreSQL/insertPostgreSQLValue.h | 4 ++-- src/Core/Settings.h | 2 ++ src/Dictionaries/CacheDictionary.cpp | 4 ++-- src/Dictionaries/CacheDictionary.h | 2 +- .../GeodataProviders/IHierarchiesProvider.h | 2 +- src/Dictionaries/RegExpTreeDictionary.cpp | 2 +- src/Dictionaries/RegExpTreeDictionary.h | 2 +- src/Functions/IFunction.h | 4 ---- src/IO/ReadSettings.h | 1 + src/Interpreters/AsynchronousInsertQueue.cpp | 2 +- src/Interpreters/AsynchronousInsertQueue.h | 2 +- src/Interpreters/Context.h | 4 ++-- src/Interpreters/IExternalLoadable.h | 2 +- src/Interpreters/ProcessList.h | 2 +- src/Processors/Chunk.h | 2 +- .../Algorithms/AggregatingSortedAlgorithm.cpp | 2 +- src/Processors/Port.h | 2 +- src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp | 4 ++-- src/Processors/TTL/TTLUpdateInfoAlgorithm.h | 4 ++-- src/Storages/StorageInMemoryMetadata.h | 4 ++-- 36 files changed, 69 insertions(+), 69 deletions(-) diff --git a/contrib/libmetrohash/src/metrohash128.h b/contrib/libmetrohash/src/metrohash128.h index 2dbb6ca5a8a..f507c917caf 100644 --- a/contrib/libmetrohash/src/metrohash128.h +++ b/contrib/libmetrohash/src/metrohash128.h @@ -17,6 +17,8 @@ #ifndef METROHASH_METROHASH_128_H #define METROHASH_METROHASH_128_H +// NOLINTBEGIN(readability-avoid-const-params-in-decls) + #include class MetroHash128 @@ -68,5 +70,6 @@ private: void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out); +// NOLINTEND(readability-avoid-const-params-in-decls) #endif // #ifndef METROHASH_METROHASH_128_H diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index 55ea4e4f717..1af74e02fb7 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -133,20 +133,20 @@ public: /// This function also enables custom prefixes to be used. void setCustomSettingsPrefixes(const Strings & prefixes); void setCustomSettingsPrefixes(const String & comma_separated_prefixes); - bool isSettingNameAllowed(const std::string_view name) const; - void checkSettingNameIsAllowed(const std::string_view name) const; + bool isSettingNameAllowed(std::string_view name) const; + void checkSettingNameIsAllowed(std::string_view name) const; /// Allows implicit user creation without password (by default it's allowed). /// In other words, allow 'CREATE USER' queries without 'IDENTIFIED WITH' clause. - void setImplicitNoPasswordAllowed(const bool allow_implicit_no_password_); + void setImplicitNoPasswordAllowed(bool allow_implicit_no_password_); bool isImplicitNoPasswordAllowed() const; /// Allows users without password (by default it's allowed). - void setNoPasswordAllowed(const bool allow_no_password_); + void setNoPasswordAllowed(bool allow_no_password_); bool isNoPasswordAllowed() const; /// Allows users with plaintext password (by default it's allowed). - void setPlaintextPasswordAllowed(const bool allow_plaintext_password_); + void setPlaintextPasswordAllowed(bool allow_plaintext_password_); bool isPlaintextPasswordAllowed() const; /// Default password type when the user does not specify it. diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index fbe9e231002..1d6b8d99cd5 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -616,7 +616,7 @@ UUID IAccessStorage::generateRandomID() } -void IAccessStorage::clearConflictsInEntitiesList(std::vector> & entities, const LoggerPtr log_) +void IAccessStorage::clearConflictsInEntitiesList(std::vector> & entities, LoggerPtr log_) { std::unordered_map positions_by_id; std::unordered_map positions_by_type_and_name[static_cast(AccessEntityType::MAX)]; diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index ebb5a39cdf0..ad78bf92e02 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -228,7 +228,7 @@ protected: static UUID generateRandomID(); LoggerPtr getLogger() const; static String formatEntityTypeWithName(AccessEntityType type, const String & name) { return AccessEntityTypeInfo::get(type).formatEntityNameWithType(name); } - static void clearConflictsInEntitiesList(std::vector> & entities, const LoggerPtr log_); + static void clearConflictsInEntitiesList(std::vector> & entities, LoggerPtr log_); [[noreturn]] void throwNotFound(const UUID & id) const; [[noreturn]] void throwNotFound(AccessEntityType type, const String & name) const; [[noreturn]] static void throwBadCast(const UUID & id, AccessEntityType type, const String & name, AccessEntityType required_type); diff --git a/src/Common/Arena.h b/src/Common/Arena.h index cb26397844b..ba5b9ea9205 100644 --- a/src/Common/Arena.h +++ b/src/Common/Arena.h @@ -47,9 +47,7 @@ private: std::unique_ptr prev; - MemoryChunk() - { - } + MemoryChunk() = default; void swap(MemoryChunk & other) { diff --git a/src/Common/DNSResolver.cpp b/src/Common/DNSResolver.cpp index e36e1483da8..4b577a251af 100644 --- a/src/Common/DNSResolver.cpp +++ b/src/Common/DNSResolver.cpp @@ -297,7 +297,7 @@ void DNSResolver::setDisableCacheFlag(bool is_disabled) impl->disable_cache = is_disabled; } -void DNSResolver::setCacheMaxEntries(const UInt64 cache_max_entries) +void DNSResolver::setCacheMaxEntries(UInt64 cache_max_entries) { impl->cache_address.setMaxSizeInBytes(cache_max_entries); impl->cache_host.setMaxSizeInBytes(cache_max_entries); diff --git a/src/Common/DNSResolver.h b/src/Common/DNSResolver.h index e3030e51a96..1ddd9d3b991 100644 --- a/src/Common/DNSResolver.h +++ b/src/Common/DNSResolver.h @@ -56,7 +56,7 @@ public: void setDisableCacheFlag(bool is_disabled = true); /// Set a limit of entries in cache - void setCacheMaxEntries(const UInt64 cache_max_entries); + void setCacheMaxEntries(UInt64 cache_max_entries); /// Drops all caches void dropCache(); diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index 7bf66c0504a..4087e77d588 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -255,7 +255,7 @@ private: static LUTIndex toLUTIndex(ExtendedDayNum d) { - return normalizeLUTIndex(static_cast(d + daynum_offset_epoch)); + return normalizeLUTIndex(static_cast(d) + daynum_offset_epoch); } LUTIndex toLUTIndex(Time t) const diff --git a/src/Common/MultiVersion.h b/src/Common/MultiVersion.h index 8f488f9fcbc..680e224f869 100644 --- a/src/Common/MultiVersion.h +++ b/src/Common/MultiVersion.h @@ -41,9 +41,9 @@ public: } /// There is no copy constructor because only one MultiVersion should own the same object. - MultiVersion(MultiVersion && src) { *this = std::move(src); } + MultiVersion(MultiVersion && src) { *this = std::move(src); } /// NOLINT - MultiVersion & operator=(MultiVersion && src) + MultiVersion & operator=(MultiVersion && src) /// NOLINT { if (this != &src) { diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index 1a4047a2588..af863e01fb2 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -25,7 +25,7 @@ */ template constexpr bool memcpy_can_be_used_for_assignment = std::is_same_v - || (std::is_integral_v && std::is_integral_v && sizeof(T) == sizeof(U)); + || (std::is_integral_v && std::is_integral_v && sizeof(T) == sizeof(U)); /// NOLINT(misc-redundant-expression) namespace DB { @@ -558,7 +558,7 @@ public: } template - void swap(PODArray & rhs, TAllocatorParams &&... allocator_params) + void swap(PODArray & rhs, TAllocatorParams &&... allocator_params) /// NOLINT(performance-noexcept-swap) { #ifndef NDEBUG this->unprotect(); @@ -756,7 +756,7 @@ public: }; template -void swap(PODArray & lhs, PODArray & rhs) +void swap(PODArray & lhs, PODArray & rhs) /// NOLINT { lhs.swap(rhs); } diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h index 729fb76a573..c89ee2c9d90 100644 --- a/src/Common/SipHash.h +++ b/src/Common/SipHash.h @@ -149,7 +149,7 @@ public: /// Pad the remainder, which is missing up to an 8-byte word. current_word = 0; - switch (end - data) + switch (end - data) /// NOLINT(bugprone-switch-missing-default-case) { case 7: current_bytes[CURRENT_BYTES_IDX(6)] = data[6]; [[fallthrough]]; case 6: current_bytes[CURRENT_BYTES_IDX(5)] = data[5]; [[fallthrough]]; diff --git a/src/Common/TransactionID.h b/src/Common/TransactionID.h index 3ab86f7589c..97d0072bc14 100644 --- a/src/Common/TransactionID.h +++ b/src/Common/TransactionID.h @@ -16,7 +16,7 @@ class MergeTreeTransaction; /// or transaction object is not needed and not passed intentionally. #ifndef NO_TRANSACTION_PTR #define NO_TRANSACTION_PTR std::shared_ptr(nullptr) -#define NO_TRANSACTION_RAW static_cast(nullptr) +#define NO_TRANSACTION_RAW static_cast(nullptr) /// NOLINT(bugprone-macro-parentheses) #endif /// Commit Sequence Number diff --git a/src/Common/ZooKeeper/IKeeper.cpp b/src/Common/ZooKeeper/IKeeper.cpp index 6c47ea68b84..7d2602bde1e 100644 --- a/src/Common/ZooKeeper/IKeeper.cpp +++ b/src/Common/ZooKeeper/IKeeper.cpp @@ -23,7 +23,7 @@ namespace ProfileEvents namespace Coordination { -void Exception::incrementErrorMetrics(const Error code_) +void Exception::incrementErrorMetrics(Error code_) { if (Coordination::isUserError(code_)) ProfileEvents::increment(ProfileEvents::ZooKeeperUserExceptions); @@ -33,14 +33,14 @@ void Exception::incrementErrorMetrics(const Error code_) ProfileEvents::increment(ProfileEvents::ZooKeeperOtherExceptions); } -Exception::Exception(const std::string & msg, const Error code_, int) +Exception::Exception(const std::string & msg, Error code_, int) : DB::Exception(msg, DB::ErrorCodes::KEEPER_EXCEPTION) , code(code_) { incrementErrorMetrics(code); } -Exception::Exception(PreformattedMessage && msg, const Error code_) +Exception::Exception(PreformattedMessage && msg, Error code_) : DB::Exception(std::move(msg), DB::ErrorCodes::KEEPER_EXCEPTION) , code(code_) { @@ -48,7 +48,7 @@ Exception::Exception(PreformattedMessage && msg, const Error code_) incrementErrorMetrics(code); } -Exception::Exception(const Error code_) +Exception::Exception(Error code_) : Exception(code_, "Coordination error: {}", errorMessage(code_)) { } diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index c7b902ea03a..ec49c94808e 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -466,13 +466,13 @@ class Exception : public DB::Exception { private: /// Delegate constructor, used to minimize repetition; last parameter used for overload resolution. - Exception(const std::string & msg, const Error code_, int); /// NOLINT - Exception(PreformattedMessage && msg, const Error code_); + Exception(const std::string & msg, Error code_, int); /// NOLINT + Exception(PreformattedMessage && msg, Error code_); /// Message must be a compile-time constant template requires std::is_convertible_v - Exception(T && message, const Error code_) : DB::Exception(std::forward(message), DB::ErrorCodes::KEEPER_EXCEPTION, /* remote_= */ false), code(code_) + Exception(T && message, Error code_) : DB::Exception(std::forward(message), DB::ErrorCodes::KEEPER_EXCEPTION, /* remote_= */ false), code(code_) { incrementErrorMetrics(code); } @@ -480,23 +480,23 @@ private: static void incrementErrorMetrics(Error code_); public: - explicit Exception(const Error code_); /// NOLINT + explicit Exception(Error code_); /// NOLINT Exception(const Exception & exc); template - Exception(const Error code_, FormatStringHelper fmt, Args &&... args) + Exception(Error code_, FormatStringHelper fmt, Args &&... args) : DB::Exception(DB::ErrorCodes::KEEPER_EXCEPTION, std::move(fmt), std::forward(args)...) , code(code_) { incrementErrorMetrics(code); } - inline static Exception createDeprecated(const std::string & msg, const Error code_) + inline static Exception createDeprecated(const std::string & msg, Error code_) { return Exception(msg, code_, 0); } - inline static Exception fromPath(const Error code_, const std::string & path) + inline static Exception fromPath(Error code_, const std::string & path) { return Exception(code_, "Coordination error: {}, path {}", errorMessage(code_), path); } @@ -504,7 +504,7 @@ public: /// Message must be a compile-time constant template requires std::is_convertible_v - inline static Exception fromMessage(const Error code_, T && message) + inline static Exception fromMessage(Error code_, T && message) { return Exception(std::forward(message), code_); } diff --git a/src/Common/logger_useful.h b/src/Common/logger_useful.h index 8e78e93e198..013b35e695e 100644 --- a/src/Common/logger_useful.h +++ b/src/Common/logger_useful.h @@ -19,14 +19,14 @@ namespace Poco { class Logger; } using LogSeriesLimiterPtr = std::shared_ptr; -namespace +namespace impl { - [[maybe_unused]] LoggerPtr getLoggerHelper(const LoggerPtr & logger) { return logger; } - [[maybe_unused]] LoggerPtr getLoggerHelper(const AtomicLogger & logger) { return logger.load(); } - [[maybe_unused]] const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; } - [[maybe_unused]] std::unique_ptr getLoggerHelper(std::unique_ptr && logger) { return logger; } - [[maybe_unused]] std::unique_ptr getLoggerHelper(std::unique_ptr && logger) { return logger; } - [[maybe_unused]] LogSeriesLimiterPtr getLoggerHelper(LogSeriesLimiterPtr & logger) { return logger; } + [[maybe_unused]] inline LoggerPtr getLoggerHelper(const LoggerPtr & logger) { return logger; } + [[maybe_unused]] inline LoggerPtr getLoggerHelper(const AtomicLogger & logger) { return logger.load(); } + [[maybe_unused]] inline const ::Poco::Logger * getLoggerHelper(const ::Poco::Logger * logger) { return logger; } + [[maybe_unused]] inline std::unique_ptr getLoggerHelper(std::unique_ptr && logger) { return logger; } + [[maybe_unused]] inline std::unique_ptr getLoggerHelper(std::unique_ptr && logger) { return logger; } + [[maybe_unused]] inline LogSeriesLimiterPtr getLoggerHelper(LogSeriesLimiterPtr & logger) { return logger; } } #define LOG_IMPL_FIRST_ARG(X, ...) X @@ -65,7 +65,7 @@ namespace #define LOG_IMPL(logger, priority, PRIORITY, ...) do \ { \ - auto _logger = ::getLoggerHelper(logger); \ + auto _logger = ::impl::getLoggerHelper(logger); \ const bool _is_clients_log = (DB::CurrentThread::getGroup() != nullptr) && \ (DB::CurrentThread::get().getClientLogsLevel() >= (priority)); \ if (!_is_clients_log && !_logger->is((PRIORITY))) \ diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp index aa60bdee28a..b507b300769 100644 --- a/src/Core/PostgreSQL/insertPostgreSQLValue.cpp +++ b/src/Core/PostgreSQL/insertPostgreSQLValue.cpp @@ -36,7 +36,7 @@ void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_colum void insertPostgreSQLValue( IColumn & column, std::string_view value, - const ExternalResultDescription::ValueType type, const DataTypePtr data_type, + ExternalResultDescription::ValueType type, DataTypePtr data_type, const std::unordered_map & array_info, size_t idx) { switch (type) @@ -170,7 +170,7 @@ void insertPostgreSQLValue( void preparePostgreSQLArrayInfo( - std::unordered_map & array_info, size_t column_idx, const DataTypePtr data_type) + std::unordered_map & array_info, size_t column_idx, DataTypePtr data_type) { const auto * array_type = typeid_cast(data_type.get()); auto nested = array_type->getNestedType(); diff --git a/src/Core/PostgreSQL/insertPostgreSQLValue.h b/src/Core/PostgreSQL/insertPostgreSQLValue.h index 3bc83292b96..bfb85422aa1 100644 --- a/src/Core/PostgreSQL/insertPostgreSQLValue.h +++ b/src/Core/PostgreSQL/insertPostgreSQLValue.h @@ -22,11 +22,11 @@ struct PostgreSQLArrayInfo void insertPostgreSQLValue( IColumn & column, std::string_view value, - const ExternalResultDescription::ValueType type, const DataTypePtr data_type, + ExternalResultDescription::ValueType type, DataTypePtr data_type, const std::unordered_map & array_info, size_t idx); void preparePostgreSQLArrayInfo( - std::unordered_map & array_info, size_t column_idx, const DataTypePtr data_type); + std::unordered_map & array_info, size_t column_idx, DataTypePtr data_type); void insertDefaultPostgreSQLValue(IColumn & column, const IColumn & sample_column); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d70a6cf51c5..c41db9d2141 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1192,6 +1192,7 @@ class IColumn; FORMAT_FACTORY_SETTINGS(M, ALIAS) \ OBSOLETE_FORMAT_SETTINGS(M, ALIAS) \ +/// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding) DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(SettingsTraits, LIST_OF_SETTINGS) @@ -1236,6 +1237,7 @@ private: /* * User-specified file format settings for File and URL engines. */ +/// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding) DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, LIST_OF_ALL_FORMAT_SETTINGS) struct FormatFactorySettings : public BaseSettings diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index 8444042db9e..6e9b09f8919 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -423,7 +423,7 @@ MutableColumns CacheDictionary::aggregateColumnsInOrderOfKe const DictionaryStorageFetchRequest & request, const MutableColumns & fetched_columns, const PaddedPODArray & key_index_to_state, - IColumn::Filter * const default_mask) const + IColumn::Filter * default_mask) const { MutableColumns aggregated_columns = request.makeAttributesResultColumns(); @@ -473,7 +473,7 @@ MutableColumns CacheDictionary::aggregateColumns( const PaddedPODArray & key_index_to_fetched_columns_from_storage_result, const MutableColumns & fetched_columns_during_update, const HashMap & found_keys_to_fetched_columns_during_update_index, - IColumn::Filter * const default_mask) const + IColumn::Filter * default_mask) const { /** * Aggregation of columns fetched from storage and from source during update. diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h index 8897fb40fa9..c02fb91c60e 100644 --- a/src/Dictionaries/CacheDictionary.h +++ b/src/Dictionaries/CacheDictionary.h @@ -162,7 +162,7 @@ private: const DictionaryStorageFetchRequest & request, const MutableColumns & fetched_columns, const PaddedPODArray & key_index_to_state, - IColumn::Filter * const default_mask = nullptr) const; + IColumn::Filter * default_mask = nullptr) const; MutableColumns aggregateColumns( const PaddedPODArray & keys, diff --git a/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h b/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h index 68ab0fdca2d..a4b88127786 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h +++ b/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h @@ -14,7 +14,7 @@ class IRegionsHierarchyReader public: virtual bool readNext(RegionEntry & entry) = 0; - virtual ~IRegionsHierarchyReader() {} + virtual ~IRegionsHierarchyReader() = default; }; using IRegionsHierarchyReaderPtr = std::unique_ptr; diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index 4d82aa9ca0e..1f5c2d6d2c7 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -568,7 +568,7 @@ bool RegExpTreeDictionary::setAttributesShortCircuit( const String & data, std::unordered_set & visited_nodes, const std::unordered_map & attributes, - std::unordered_set * const defaults) const + std::unordered_set * defaults) const { if (visited_nodes.contains(id)) return attributes_to_set.attributesFull() == attributes.size(); diff --git a/src/Dictionaries/RegExpTreeDictionary.h b/src/Dictionaries/RegExpTreeDictionary.h index 9e14abb49d0..d6bc90ef651 100644 --- a/src/Dictionaries/RegExpTreeDictionary.h +++ b/src/Dictionaries/RegExpTreeDictionary.h @@ -210,7 +210,7 @@ private: const String & data, std::unordered_set & visited_nodes, const std::unordered_map & attributes, - std::unordered_set * const defaults) const; + std::unordered_set * defaults) const; struct RegexTreeNode; using RegexTreeNodePtr = std::shared_ptr; diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 05aa08e2ad7..9b7cdf12d57 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -13,10 +13,6 @@ #include -#if USE_EMBEDDED_COMPILER -# include -#endif - /// This file contains user interface for functions. namespace llvm diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index 38904df4403..5c401c0c8d9 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -63,6 +63,7 @@ enum class RemoteFSReadMethod class MMappedFileCache; class PageCache; +/// NOLINTNEXTLINE(clang-analyzer-optin.performance.Padding) struct ReadSettings { /// Method to use reading from local filesystem. diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 7d56dbabe3c..9327f31b6ff 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -905,7 +905,7 @@ Chunk AsynchronousInsertQueue::processEntriesWithParsing( const InsertDataPtr & data, const Block & header, const ContextPtr & insert_context, - const LoggerPtr logger, + LoggerPtr logger, LogFunc && add_to_async_insert_log) { size_t total_rows = 0; diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index f60b3d343fb..5076701d0b0 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -265,7 +265,7 @@ private: const InsertDataPtr & data, const Block & header, const ContextPtr & insert_context, - const LoggerPtr logger, + LoggerPtr logger, LogFunc && add_to_async_insert_log); template diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index c8aa3604a6f..43df8d6adf2 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -330,7 +330,7 @@ protected: return *this; } - void swap(QueryAccessInfo & rhs) + void swap(QueryAccessInfo & rhs) noexcept { std::swap(databases, rhs.databases); std::swap(tables, rhs.tables); @@ -680,7 +680,7 @@ public: void addSpecialScalar(const String & name, const Block & block); const QueryAccessInfo & getQueryAccessInfo() const { return *getQueryAccessInfoPtr(); } - const QueryAccessInfoPtr getQueryAccessInfoPtr() const { return query_access_info; } + QueryAccessInfoPtr getQueryAccessInfoPtr() const { return query_access_info; } void setQueryAccessInfo(QueryAccessInfoPtr other) { query_access_info = other; } void addQueryAccessInfo( diff --git a/src/Interpreters/IExternalLoadable.h b/src/Interpreters/IExternalLoadable.h index 3c004508b0a..47031778876 100644 --- a/src/Interpreters/IExternalLoadable.h +++ b/src/Interpreters/IExternalLoadable.h @@ -23,7 +23,7 @@ struct ExternalLoadableLifetime UInt64 max_sec = 0; ExternalLoadableLifetime(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); - ExternalLoadableLifetime() {} + ExternalLoadableLifetime() = default; }; /// Get delay before trying to load again after error. diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 1c253f562e8..ad47041c762 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -318,7 +318,7 @@ public: ~ProcessListEntry(); QueryStatusPtr getQueryStatus() { return *it; } - const QueryStatusPtr getQueryStatus() const { return *it; } + QueryStatusPtr getQueryStatus() const { return *it; } }; diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 9a7d6bc294d..4f753798eaa 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -59,7 +59,7 @@ public: Chunk clone() const; - void swap(Chunk & other) + void swap(Chunk & other) noexcept { columns.swap(other.columns); chunk_info.swap(other.chunk_info); diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index d2d2434c477..3bd0b532d90 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -126,7 +126,7 @@ static void postprocessChunk(Chunk & chunk, const AggregatingSortedAlgorithm::Co AggregatingSortedAlgorithm::SimpleAggregateDescription::SimpleAggregateDescription( - AggregateFunctionPtr function_, const size_t column_number_, + AggregateFunctionPtr function_, size_t column_number_, DataTypePtr nested_type_, DataTypePtr real_type_) : function(std::move(function_)), column_number(column_number_) , nested_type(std::move(nested_type_)), real_type(std::move(real_type_)) diff --git a/src/Processors/Port.h b/src/Processors/Port.h index 67af2f041aa..f3c7bbb5fee 100644 --- a/src/Processors/Port.h +++ b/src/Processors/Port.h @@ -110,7 +110,7 @@ protected: return result; } - uintptr_t ALWAYS_INLINE swap(std::atomic & value, std::uintptr_t flags, std::uintptr_t mask) + uintptr_t ALWAYS_INLINE swap(std::atomic & value, std::uintptr_t flags, std::uintptr_t mask) /// NOLINT { Data * expected = nullptr; Data * desired = getPtr(flags | getUInt(data)); diff --git a/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp b/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp index b7cddf3c165..13d3030bbb8 100644 --- a/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp +++ b/src/Processors/TTL/TTLUpdateInfoAlgorithm.cpp @@ -6,8 +6,8 @@ namespace DB TTLUpdateInfoAlgorithm::TTLUpdateInfoAlgorithm( const TTLExpressions & ttl_expressions_, const TTLDescription & description_, - const TTLUpdateField ttl_update_field_, - const String ttl_update_key_, + TTLUpdateField ttl_update_field_, + String ttl_update_key_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_) diff --git a/src/Processors/TTL/TTLUpdateInfoAlgorithm.h b/src/Processors/TTL/TTLUpdateInfoAlgorithm.h index 0cf31765aef..b6aee6f7cb0 100644 --- a/src/Processors/TTL/TTLUpdateInfoAlgorithm.h +++ b/src/Processors/TTL/TTLUpdateInfoAlgorithm.h @@ -22,8 +22,8 @@ public: TTLUpdateInfoAlgorithm( const TTLExpressions & ttl_expressions_, const TTLDescription & description_, - const TTLUpdateField ttl_update_field_, - const String ttl_update_key_, + TTLUpdateField ttl_update_field_, + String ttl_update_key_, const TTLInfo & old_ttl_info_, time_t current_time_, bool force_ ); diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 2823aba1224..69cd3422a7d 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -72,8 +72,8 @@ struct StorageInMemoryMetadata StorageInMemoryMetadata(const StorageInMemoryMetadata & other); StorageInMemoryMetadata & operator=(const StorageInMemoryMetadata & other); - StorageInMemoryMetadata(StorageInMemoryMetadata && other) = default; - StorageInMemoryMetadata & operator=(StorageInMemoryMetadata && other) = default; + StorageInMemoryMetadata(StorageInMemoryMetadata && other) = default; /// NOLINT + StorageInMemoryMetadata & operator=(StorageInMemoryMetadata && other) = default; /// NOLINT /// NOTE: Thread unsafe part. You should not modify same StorageInMemoryMetadata /// structure from different threads. It should be used as MultiVersion From 90b27432a26c0a5204e09ff5ff5f2ae8df3055af Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 11 Mar 2024 12:18:58 +0100 Subject: [PATCH 086/283] Update test.py --- tests/integration/test_backup_restore_s3/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index 452a9143067..f3f4837c317 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -130,7 +130,7 @@ def check_system_tables(backup_query_id=None): if disk ] expected_disks = ( - ("default", "local", "", ""), + ("default", "local", "None", "None"), ("disk_s3", "object_storage", "s3", "local"), ("disk_s3_cache", "object_storage", "s3", "local"), ("disk_s3_other_bucket", "object_storage", "s3", "local"), From 57f6263f67dd91e624003199295c840a228947a0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 11 Mar 2024 12:31:40 +0100 Subject: [PATCH 087/283] Lock contention fix --- src/Common/ProfileEvents.cpp | 1 + src/Interpreters/Cache/FileCache.cpp | 12 +++++++++--- src/Interpreters/Cache/FileCache.h | 2 +- src/Interpreters/Cache/FileCache_fwd.h | 1 + src/Interpreters/Cache/Guards.h | 15 ++++++++++++--- 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index c1ac3d08245..ab1a16a3edf 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -476,6 +476,7 @@ The server successfully detected this situation and will download merged part fr M(FileSegmentRemoveMicroseconds, "File segment remove() time") \ M(FileSegmentHolderCompleteMicroseconds, "File segments holder complete() time") \ M(FileSegmentFailToIncreasePriority, "Number of times the priority was not increased due to a high contention on the cache lock") \ + M(FilesystemCacheFailToReserveSpaceBecauseOfLockContention, "Number of times space reservation was skipped due to a high contention on the cache lock") \ M(FilesystemCacheHoldFileSegments, "Filesystem cache file segments count, which were hold") \ M(FilesystemCacheUnusedHoldFileSegments, "Filesystem cache file segments count, which were hold, but not used (because of seek or LIMIT n, etc)") \ \ diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 9c705ddc27c..5650b9ce44e 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -27,6 +27,7 @@ namespace ProfileEvents extern const Event FilesystemCacheReserveMicroseconds; extern const Event FilesystemCacheGetOrSetMicroseconds; extern const Event FilesystemCacheGetMicroseconds; + extern const Event FilesystemCacheFailToReserveSpaceBecauseOfLockContention; } namespace DB @@ -188,9 +189,9 @@ CacheGuard::Lock FileCache::lockCache() const return cache_guard.lock(); } -CacheGuard::Lock FileCache::tryLockCache() const +CacheGuard::Lock FileCache::tryLockCache(std::optional acquire_timeout) const { - return cache_guard.tryLock(); + return acquire_timeout.has_value() ? cache_guard.tryLockFor(acquire_timeout.value()) : cache_guard.tryLock(); } FileSegments FileCache::getImpl(const LockedKey & locked_key, const FileSegment::Range & range, size_t file_segments_limit) const @@ -781,7 +782,12 @@ bool FileCache::tryReserve( ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheReserveMicroseconds); assertInitialized(); - auto cache_lock = lockCache(); + auto cache_lock = tryLockCache(std::chrono::milliseconds(FILECACHE_TRY_RESERVE_LOCK_TIMEOUT_MILLISECONDS)); + if (!cache_lock) + { + ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfLockContention); + return false; + } LOG_TEST( log, "Trying to reserve space ({} bytes) for {}:{}, current usage {}/{}", diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 5b665ad0271..7434b2ac78a 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -173,7 +173,7 @@ public: void deactivateBackgroundOperations(); CacheGuard::Lock lockCache() const; - CacheGuard::Lock tryLockCache() const; + CacheGuard::Lock tryLockCache(std::optional acquire_timeout = std::nullopt) const; std::vector sync(); diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h index 06261b19db7..eaed279e7fd 100644 --- a/src/Interpreters/Cache/FileCache_fwd.h +++ b/src/Interpreters/Cache/FileCache_fwd.h @@ -12,6 +12,7 @@ static constexpr int FILECACHE_DEFAULT_LOAD_METADATA_THREADS = 16; static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 10000000; static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0; static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024; +static constexpr size_t FILECACHE_TRY_RESERVE_LOCK_TIMEOUT_MILLISECONDS = 1000; /// 1 sec. class FileCache; using FileCachePtr = std::shared_ptr; diff --git a/src/Interpreters/Cache/Guards.h b/src/Interpreters/Cache/Guards.h index 5729620d82f..0ac7cb80483 100644 --- a/src/Interpreters/Cache/Guards.h +++ b/src/Interpreters/Cache/Guards.h @@ -61,17 +61,26 @@ namespace DB */ struct CacheGuard : private boost::noncopyable { + using Mutex = std::timed_mutex; /// struct is used (not keyword `using`) to make CacheGuard::Lock non-interchangable with other guards locks /// so, we wouldn't be able to pass CacheGuard::Lock to a function which accepts KeyGuard::Lock, for example - struct Lock : public std::unique_lock + struct Lock : public std::unique_lock { - using Base = std::unique_lock; + using Base = std::unique_lock; using Base::Base; }; Lock lock() { return Lock(mutex); } + Lock tryLock() { return Lock(mutex, std::try_to_lock); } - std::mutex mutex; + + Lock tryLockFor(const std::chrono::milliseconds & acquire_timeout) + { + return Lock(mutex, std::chrono::duration(acquire_timeout)); + } + +private: + Mutex mutex; }; /** From a7350299396d5ba3f2322584195554a7d946562f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 11 Mar 2024 12:50:54 +0000 Subject: [PATCH 088/283] Fix tests --- src/Common/DateLUTImpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index 4087e77d588..082127e717c 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -255,7 +255,7 @@ private: static LUTIndex toLUTIndex(ExtendedDayNum d) { - return normalizeLUTIndex(static_cast(d) + daynum_offset_epoch); + return normalizeLUTIndex(static_cast(d + daynum_offset_epoch)); /// NOLINT } LUTIndex toLUTIndex(Time t) const From 802bae9661a6f22a8c42a6f88f9816e3497d9355 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 11 Mar 2024 12:54:34 +0000 Subject: [PATCH 089/283] GCC --> clang pragmas --- base/base/coverage.cpp | 2 +- base/base/sort.h | 6 +++--- programs/client/Client.cpp | 4 ++-- programs/copier/ClusterCopierApp.cpp | 4 ++-- programs/extract-from-config/ExtractFromConfig.cpp | 4 ++-- programs/format/Format.cpp | 4 ++-- programs/local/LocalServer.cpp | 4 ++-- programs/obfuscator/Obfuscator.cpp | 4 ++-- src/Common/SSH/Wrappers.cpp | 8 ++++---- src/Functions/GatherUtils/Sources.h | 8 ++++---- 10 files changed, 24 insertions(+), 24 deletions(-) diff --git a/base/base/coverage.cpp b/base/base/coverage.cpp index 99b897c4571..d96b3ea1e9a 100644 --- a/base/base/coverage.cpp +++ b/base/base/coverage.cpp @@ -1,7 +1,7 @@ #include "coverage.h" #include -#pragma GCC diagnostic ignored "-Wreserved-identifier" +#pragma clang diagnostic ignored "-Wreserved-identifier" /// WITH_COVERAGE enables the default implementation of code coverage, diff --git a/base/base/sort.h b/base/base/sort.h index 99bf8a0830e..e46c388d185 100644 --- a/base/base/sort.h +++ b/base/base/sort.h @@ -59,8 +59,8 @@ using ComparatorWrapper = Comparator; #endif -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wold-style-cast" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wold-style-cast" #include @@ -115,7 +115,7 @@ void partial_sort(RandomIt first, RandomIt middle, RandomIt last) ::partial_sort(first, middle, last, comparator()); } -#pragma GCC diagnostic pop +#pragma clang diagnostic pop template void sort(RandomIt first, RandomIt last, Compare compare) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index a2bd6b6016a..1c7e57dac76 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1377,8 +1377,8 @@ void Client::readArguments( } -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseClient(int argc, char ** argv) { diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index fdf07dec61a..ed748a17a55 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -232,8 +232,8 @@ int ClusterCopierApp::main(const std::vector &) } -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseClusterCopier(int argc, char ** argv) { diff --git a/programs/extract-from-config/ExtractFromConfig.cpp b/programs/extract-from-config/ExtractFromConfig.cpp index 56041ee382f..61d451664e3 100644 --- a/programs/extract-from-config/ExtractFromConfig.cpp +++ b/programs/extract-from-config/ExtractFromConfig.cpp @@ -109,8 +109,8 @@ static std::vector extractFromConfig( return {configuration->getString(key)}; } -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseExtractFromConfig(int argc, char ** argv) { diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index a1c51565ae3..c92106e2f90 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -70,8 +70,8 @@ void skipSpacesAndComments(const char*& pos, const char* end, bool print_comment } -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wmissing-declarations" extern const char * auto_time_zones[]; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 68f0e52ce08..99639d5e604 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -944,8 +944,8 @@ void LocalServer::readArguments(int argc, char ** argv, Arguments & common_argum } -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseLocal(int argc, char ** argv) { diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 242e995e466..317d93aaf0c 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -1204,8 +1204,8 @@ public: } -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wunused-function" +#clang GCC diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseObfuscator(int argc, char ** argv) try diff --git a/src/Common/SSH/Wrappers.cpp b/src/Common/SSH/Wrappers.cpp index 463338dbe3f..a9b9f758c6e 100644 --- a/src/Common/SSH/Wrappers.cpp +++ b/src/Common/SSH/Wrappers.cpp @@ -2,13 +2,13 @@ # if USE_SSH # include -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wreserved-macro-identifier" -# pragma GCC diagnostic ignored "-Wreserved-identifier" +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wreserved-macro-identifier" +# pragma clang diagnostic ignored "-Wreserved-identifier" # include -# pragma GCC diagnostic pop +# pragma clang diagnostic pop namespace { diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h index 222f9f19168..80fb9ce3900 100644 --- a/src/Functions/GatherUtils/Sources.h +++ b/src/Functions/GatherUtils/Sources.h @@ -140,9 +140,9 @@ struct NumericArraySource : public ArraySourceImpl> /// The methods can be virtual or not depending on the template parameter. See IStringSource. -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wsuggest-override" -#pragma GCC diagnostic ignored "-Wsuggest-destructor-override" +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wsuggest-override" +#pragma clang diagnostic ignored "-Wsuggest-destructor-override" template struct ConstSource : public Base @@ -231,7 +231,7 @@ struct ConstSource : public Base } }; -#pragma GCC diagnostic pop +#pragma clang diagnostic pop struct StringSource { From 19d8256fa83a4e8353dcad372067085ec8f0828d Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 11 Mar 2024 14:44:19 +0100 Subject: [PATCH 090/283] Update test.py --- tests/integration/test_backup_restore_s3/test.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index f3f4837c317..d65fc1f09d6 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -130,11 +130,11 @@ def check_system_tables(backup_query_id=None): if disk ] expected_disks = ( - ("default", "local", "None", "None"), - ("disk_s3", "object_storage", "s3", "local"), - ("disk_s3_cache", "object_storage", "s3", "local"), - ("disk_s3_other_bucket", "object_storage", "s3", "local"), - ("disk_s3_plain", "object_storage", "s3", "plain"), + ("default", "Local", "None", "None"), + ("disk_s3", "ObjectStorage", "S3", "Local"), + ("disk_s3_cache", "ObjectStorage", "S3", "Local"), + ("disk_s3_other_bucket", "ObjectStorage", "S3", "Local"), + ("disk_s3_plain", "ObjectStorage", "S3", "Plain"), ) assert len(expected_disks) == len(disks) for expected_disk in expected_disks: From 91de3825171eefb8f828c2907181b6a5e6b0f017 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 11 Mar 2024 14:00:01 +0000 Subject: [PATCH 091/283] Spit `DictionaryPipelineExecutor` into separate file --- src/Dictionaries/CacheDictionary.cpp | 2 +- .../DictionaryPipelineExecutor.cpp | 42 +++++++++++++++++++ src/Dictionaries/DictionaryPipelineExecutor.h | 27 ++++++++++++ src/Dictionaries/DictionarySourceHelpers.cpp | 29 ------------- src/Dictionaries/DictionarySourceHelpers.h | 17 -------- src/Dictionaries/FlatDictionary.cpp | 2 +- src/Dictionaries/HashedArrayDictionary.cpp | 1 + src/Dictionaries/HashedDictionary.h | 2 +- src/Dictionaries/IPAddressDictionary.cpp | 1 + src/Dictionaries/PolygonDictionary.cpp | 3 +- src/Dictionaries/RangeHashedDictionary.h | 6 +-- src/Dictionaries/RegExpTreeDictionary.cpp | 1 + .../registerRangeHashedDictionary.cpp | 5 ++- 13 files changed, 82 insertions(+), 56 deletions(-) create mode 100644 src/Dictionaries/DictionaryPipelineExecutor.cpp create mode 100644 src/Dictionaries/DictionaryPipelineExecutor.h diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index 8444042db9e..b136d5ebd71 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/src/Dictionaries/DictionaryPipelineExecutor.cpp b/src/Dictionaries/DictionaryPipelineExecutor.cpp new file mode 100644 index 00000000000..30d1ab95f53 --- /dev/null +++ b/src/Dictionaries/DictionaryPipelineExecutor.cpp @@ -0,0 +1,42 @@ +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int LOGICAL_ERROR; +} + +DictionaryPipelineExecutor::DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async) + : async_executor(async ? std::make_unique(pipeline_) : nullptr) + , executor(async ? nullptr : std::make_unique(pipeline_)) +{ +} + +bool DictionaryPipelineExecutor::pull(Block & block) +{ + if (async_executor) + { + while (true) + { + bool has_data = async_executor->pull(block); + if (has_data && !block) + continue; + return has_data; + } + } + else if (executor) + return executor->pull(block); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "DictionaryPipelineExecutor is not initialized"); +} + +DictionaryPipelineExecutor::~DictionaryPipelineExecutor() = default; + +} diff --git a/src/Dictionaries/DictionaryPipelineExecutor.h b/src/Dictionaries/DictionaryPipelineExecutor.h new file mode 100644 index 00000000000..601213e5039 --- /dev/null +++ b/src/Dictionaries/DictionaryPipelineExecutor.h @@ -0,0 +1,27 @@ +#pragma once + +#include + +namespace DB +{ + +class Block; +class QueryPipeline; +class PullingAsyncPipelineExecutor; +class PullingPipelineExecutor; + +/// Wrapper for `Pulling(Async)PipelineExecutor` to dynamically dispatch calls to the right executor +class DictionaryPipelineExecutor +{ +public: + DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async); + bool pull(Block & block); + + ~DictionaryPipelineExecutor(); + +private: + std::unique_ptr async_executor; + std::unique_ptr executor; +}; + +} diff --git a/src/Dictionaries/DictionarySourceHelpers.cpp b/src/Dictionaries/DictionarySourceHelpers.cpp index d9a4d9ccbcf..f0e1bc4109a 100644 --- a/src/Dictionaries/DictionarySourceHelpers.cpp +++ b/src/Dictionaries/DictionarySourceHelpers.cpp @@ -9,15 +9,11 @@ #include #include -#include -#include - namespace DB { namespace ErrorCodes { - extern const int LOGICAL_ERROR; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; } @@ -135,29 +131,4 @@ String TransformWithAdditionalColumns::getName() const return "TransformWithAdditionalColumns"; } -DictionaryPipelineExecutor::DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async) - : async_executor(async ? std::make_unique(pipeline_) : nullptr) - , executor(async ? nullptr : std::make_unique(pipeline_)) -{} - -bool DictionaryPipelineExecutor::pull(Block & block) -{ - if (async_executor) - { - while (true) - { - bool has_data = async_executor->pull(block); - if (has_data && !block) - continue; - return has_data; - } - } - else if (executor) - return executor->pull(block); - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "DictionaryPipelineExecutor is not initialized"); -} - -DictionaryPipelineExecutor::~DictionaryPipelineExecutor() = default; - } diff --git a/src/Dictionaries/DictionarySourceHelpers.h b/src/Dictionaries/DictionarySourceHelpers.h index a545b5cdac7..39c6e7b3c42 100644 --- a/src/Dictionaries/DictionarySourceHelpers.h +++ b/src/Dictionaries/DictionarySourceHelpers.h @@ -16,10 +16,6 @@ namespace DB struct DictionaryStructure; class SettingsChanges; -class PullingPipelineExecutor; -class PullingAsyncPipelineExecutor; -class QueryPipeline; - /// For simple key Block blockForIds( @@ -55,17 +51,4 @@ private: size_t current_range_index = 0; }; -/// Wrapper for `Pulling(Async)PipelineExecutor` to dynamically dispatch calls to the right executor -class DictionaryPipelineExecutor -{ -public: - DictionaryPipelineExecutor(QueryPipeline & pipeline_, bool async); - bool pull(Block & block); - - ~DictionaryPipelineExecutor(); -private: - std::unique_ptr async_executor; - std::unique_ptr executor; -}; - } diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index efb7d0a176c..fc58ff525bd 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index d09f402143e..2420c07277c 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index b3b8cc56868..46a0af487f5 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Dictionaries/IPAddressDictionary.cpp b/src/Dictionaries/IPAddressDictionary.cpp index e1c9572e607..1bc6d16c932 100644 --- a/src/Dictionaries/IPAddressDictionary.cpp +++ b/src/Dictionaries/IPAddressDictionary.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index 485b48d9d81..1456a0db750 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -1,6 +1,5 @@ #include "PolygonDictionary.h" -#include #include #include @@ -15,7 +14,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 1a6ee7e81d4..509b991b30c 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include @@ -29,10 +31,6 @@ #include #include -#include -#include -#include - namespace DB { diff --git a/src/Dictionaries/RegExpTreeDictionary.cpp b/src/Dictionaries/RegExpTreeDictionary.cpp index 4d82aa9ca0e..8930074bbe0 100644 --- a/src/Dictionaries/RegExpTreeDictionary.cpp +++ b/src/Dictionaries/RegExpTreeDictionary.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/src/Dictionaries/registerRangeHashedDictionary.cpp b/src/Dictionaries/registerRangeHashedDictionary.cpp index 4e20abfdb79..8123b811198 100644 --- a/src/Dictionaries/registerRangeHashedDictionary.cpp +++ b/src/Dictionaries/registerRangeHashedDictionary.cpp @@ -1,5 +1,8 @@ -#include "RangeHashedDictionary.h" +#include + #include +#include +#include #include namespace DB From 724cc903afb9283a8369a62a836d04eceae42e57 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 11 Mar 2024 15:56:02 +0100 Subject: [PATCH 092/283] Restart CI --- tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh b/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh index 1bf21dfc53b..6cd5c3b486c 100755 --- a/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh +++ b/tests/queries/0_stateless/03001_parallel_parsing_deadlock.sh @@ -9,4 +9,3 @@ DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.csv $CLICKHOUSE_LOCAL -q "select number > 1000000 ? 'error' : toString(number) from numbers(2000000) format CSV" > $DATA_FILE $CLICKHOUSE_LOCAL -q "select * from file($DATA_FILE, CSV, 'x UInt64') format Null settings input_format_allow_errors_ratio=1" rm $DATA_FILE - From ecc30448baa1c6283f3f0f13c83cfd1bf4428b9b Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 11 Mar 2024 15:26:29 +0000 Subject: [PATCH 093/283] Fix filtering when optimize_use_implicit_projections=1 --- .../optimizeUseAggregateProjection.cpp | 4 ++ src/Storages/VirtualColumnUtils.cpp | 2 +- src/Storages/VirtualColumnUtils.h | 3 + ...ions_non_deterministoc_functions.reference | 55 +++++++++++++++++++ ...rojections_non_deterministoc_functions.sql | 28 ++++++++++ 5 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.reference create mode 100644 tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.sql diff --git a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp index 91f4213ff43..b40fea47b3c 100644 --- a/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp +++ b/src/Processors/QueryPlan/Optimizations/optimizeUseAggregateProjection.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -464,6 +465,9 @@ AggregateProjectionCandidates getAggregateProjectionCandidates( // LOG_TRACE(getLogger("optimizeUseProjections"), "Query DAG: {}", dag.dag->dumpDAG()); candidates.has_filter = dag.filter_node; + /// We can't use minmax projection if filter has non-deterministic functions. + if (dag.filter_node && !VirtualColumnUtils::isDeterministicInScopeOfQuery(dag.filter_node)) + can_use_minmax_projection = false; if (can_use_minmax_projection) { diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 897090223d6..c3ac27903c9 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -238,7 +238,7 @@ static bool canEvaluateSubtree(const ActionsDAG::Node * node, const Block & allo return true; } -static bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) +bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node) { for (const auto * child : node->children) { diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index b5526fc5c7f..83494872cac 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -25,6 +25,9 @@ void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, /// Just filters block. Block should contain all the required columns. void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context); +/// Recursively checks if all functions used in DAG are deterministic in scope of query. +bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node); + /// Extract a part of predicate that can be evaluated using only columns from input_names. ActionsDAGPtr splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs); diff --git a/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.reference b/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.reference new file mode 100644 index 00000000000..8233925d609 --- /dev/null +++ b/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.reference @@ -0,0 +1,55 @@ +-- count +100000 all_10_10_0 +100000 all_1_1_0 +100000 all_2_2_0 +100000 all_3_3_0 +100000 all_4_4_0 +100000 all_5_5_0 +100000 all_6_6_0 +100000 all_7_7_0 +100000 all_8_8_0 +100000 all_9_9_0 +-- rand()%2=0: +1 all_10_10_0 +1 all_1_1_0 +1 all_2_2_0 +1 all_3_3_0 +1 all_4_4_0 +1 all_5_5_0 +1 all_6_6_0 +1 all_7_7_0 +1 all_8_8_0 +1 all_9_9_0 +-- optimize_use_implicit_projections=0 +1 all_10_10_0 +1 all_1_1_0 +1 all_2_2_0 +1 all_3_3_0 +1 all_4_4_0 +1 all_5_5_0 +1 all_6_6_0 +1 all_7_7_0 +1 all_8_8_0 +1 all_9_9_0 +-- optimize_trivial_count_query=0 +1 all_10_10_0 +1 all_1_1_0 +1 all_2_2_0 +1 all_3_3_0 +1 all_4_4_0 +1 all_5_5_0 +1 all_6_6_0 +1 all_7_7_0 +1 all_8_8_0 +1 all_9_9_0 +-- optimize_trivial_count_query=0, optimize_use_implicit_projections=0 +1 all_10_10_0 +1 all_1_1_0 +1 all_2_2_0 +1 all_3_3_0 +1 all_4_4_0 +1 all_5_5_0 +1 all_6_6_0 +1 all_7_7_0 +1 all_8_8_0 +1 all_9_9_0 diff --git a/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.sql b/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.sql new file mode 100644 index 00000000000..3be9bc3982a --- /dev/null +++ b/tests/queries/0_stateless/03008_filter_projections_non_deterministoc_functions.sql @@ -0,0 +1,28 @@ +create table test (number UInt64) engine=MergeTree order by number; +system stop merges test; +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); +INSERT INTO test select number from numbers(100000); + +select '-- count'; +SELECT count(), _part FROM test GROUP BY _part ORDER BY _part; + +select '-- rand()%2=0:'; +SELECT count() > 0 AND count() < 100000, _part FROM test WHERE rand(1)%2=1 GROUP BY _part ORDER BY _part; + +select '-- optimize_use_implicit_projections=0'; +SELECT count() > 0 AND count() < 100000, _part FROM test WHERE rand(2)%2=1 GROUP BY _part ORDER BY _part settings optimize_use_implicit_projections=0; + +select '-- optimize_trivial_count_query=0'; +SELECT count() > 0 AND count() < 100000, _part FROM test WHERE rand(3)%2=1 GROUP BY _part ORDER BY _part settings optimize_trivial_count_query=0; + +select '-- optimize_trivial_count_query=0, optimize_use_implicit_projections=0'; +SELECT count() > 0 AND count() < 100000, _part FROM test WHERE rand(4)%2=1 GROUP BY _part ORDER BY _part settings optimize_trivial_count_query=0,optimize_use_implicit_projections=0; + From 81b2a1f621d9bd64fde2c4e4f6a83c9b3b0c461a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 11 Mar 2024 15:34:02 +0000 Subject: [PATCH 094/283] Fix build --- programs/obfuscator/Obfuscator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 317d93aaf0c..b2bf942af4e 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -1205,7 +1205,7 @@ public: } #pragma clang diagnostic ignored "-Wunused-function" -#clang GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseObfuscator(int argc, char ** argv) try From 38f41ee311d0a36d194965e5815489a25c60e449 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 11 Mar 2024 16:55:30 +0100 Subject: [PATCH 095/283] Fix integration test --- tests/integration/test_disk_types/test.py | 10 +++++----- .../test_endpoint_macro_substitution/test.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index af482b97be3..86579911b3e 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -3,10 +3,10 @@ from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV disk_types = { - "default": "local", - "disk_s3": "s3", - "disk_hdfs": "hdfs", - "disk_encrypted": "s3", + "default": "Local", + "disk_s3": "S3", + "disk_hdfs": "HDFS", + "disk_encrypted": "S3", } @@ -55,7 +55,7 @@ def test_different_types(cluster): def test_select_by_type(cluster): node = cluster.instances["node"] for name, disk_type in list(disk_types.items()): - if disk_type != "s3": + if disk_type != "S3": assert ( node.query( "SELECT name FROM system.disks WHERE type='" + disk_type + "'" diff --git a/tests/integration/test_endpoint_macro_substitution/test.py b/tests/integration/test_endpoint_macro_substitution/test.py index 42a8ddbda84..bec3d9de0e3 100644 --- a/tests/integration/test_endpoint_macro_substitution/test.py +++ b/tests/integration/test_endpoint_macro_substitution/test.py @@ -4,10 +4,10 @@ from helpers.test_tools import TSV from pyhdfs import HdfsClient disk_types = { - "default": "local", - "disk_s3": "s3", - "disk_hdfs": "hdfs", - "disk_encrypted": "s3", + "default": "Local", + "disk_s3": "S3", + "disk_hdfs": "HDFS", + "disk_encrypted": "S3", } @@ -63,7 +63,7 @@ def test_select_by_type(cluster): fs = HdfsClient(hosts=cluster.hdfs_ip) for name, disk_type in list(disk_types.items()): - if disk_type != "s3": + if disk_type != "S3": assert ( node.query( "SELECT name FROM system.disks WHERE type='" + disk_type + "'" From 16e01eb93ad449c61417dcaccd570439364b0714 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 11 Mar 2024 18:05:51 +0100 Subject: [PATCH 096/283] Fix style --- src/Core/Settings.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index fb456b46d89..8257b94cd9f 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -15,6 +15,7 @@ namespace ErrorCodes extern const int THERE_IS_NO_PROFILE; extern const int NO_ELEMENTS_IN_CONFIG; extern const int UNKNOWN_ELEMENT_IN_CONFIG; + extern const int BAD_ARGUMENTS; } IMPLEMENT_SETTINGS_TRAITS(SettingsTraits, LIST_OF_SETTINGS) From 55a82047613c607dedb592fed019d04455e8c8e8 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 11 Mar 2024 19:43:30 +0100 Subject: [PATCH 097/283] Fix test --- .../0_stateless/03003_compatibility_setting_bad_value.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql index 9a6f4e7944a..48e98798c51 100644 --- a/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql +++ b/tests/queries/0_stateless/03003_compatibility_setting_bad_value.sql @@ -1,2 +1,2 @@ -select 42 settings compatibility=NULL; -- {clientError BAD_GET} +select 42 settings compatibility=NULL; -- {clientError BAD_ARGUMENTS} From 5a71636411cb358c94e58b7caac18c22104b0e1c Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 11 Mar 2024 19:44:52 +0100 Subject: [PATCH 098/283] Fxi --- tests/integration/test_disk_types/test.py | 30 +++++++++++++++++++ .../test_endpoint_macro_substitution/test.py | 9 +++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index 86579911b3e..5047cdc605e 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -50,6 +50,36 @@ def test_different_types(cluster): assert ( fields[encrypted_col_ix] == "0" ), f"{fields[name_col_ix]} expected to be non-encrypted!" +def test_different_types(cluster): + node = cluster.instances["node"] + response = TSV.toMat(node.query("SELECT * FROM system.disks FORMAT TSVWithNames")) + + assert len(response) > len(disk_types) # at least one extra line for header + + name_col_ix = response[0].index("name") + type_col_ix = response[0].index("type") + encrypted_col_ix = response[0].index("is_encrypted") + + for fields in response[1:]: # skip header + assert len(fields) >= 7 + expected_disk_type = disk_types.get(fields[name_col_ix], "UNKNOWN") + + if expected_disk_type != "Local": + disk_type = fields[response[0].index("object_storage_type")] + else: + disk_type = fields[type_col_ix] + + assert ( + expected_disk_type == disk_type + ), f"Wrong type ({fields[type_col_ix]}) for disk {fields[name_col_ix]}!" + if "encrypted" in fields[name_col_ix]: + assert ( + fields[encrypted_col_ix] == "1" + ), f"{fields[name_col_ix]} expected to be encrypted!" + else: + assert ( + fields[encrypted_col_ix] == "0" + ), f"{fields[name_col_ix]} expected to be non-encrypted!" def test_select_by_type(cluster): diff --git a/tests/integration/test_endpoint_macro_substitution/test.py b/tests/integration/test_endpoint_macro_substitution/test.py index bec3d9de0e3..e161d8e82ff 100644 --- a/tests/integration/test_endpoint_macro_substitution/test.py +++ b/tests/integration/test_endpoint_macro_substitution/test.py @@ -45,8 +45,15 @@ def test_different_types(cluster): for fields in response[1:]: # skip header assert len(fields) >= 7 + expected_disk_type = disk_types.get(fields[name_col_ix], "UNKNOWN") + + if expected_disk_type != "Local": + disk_type = fields[response[0].index("object_storage_type")] + else: + disk_type = fields[type_col_ix] + assert ( - disk_types.get(fields[name_col_ix], "UNKNOWN") == fields[type_col_ix] + expected_disk_type == disk_type ), f"Wrong type ({fields[type_col_ix]}) for disk {fields[name_col_ix]}!" if "encrypted" in fields[name_col_ix]: assert ( From 9b055c3a43039387b42e755efddd83b9a8934ca6 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 11 Mar 2024 20:38:30 +0100 Subject: [PATCH 099/283] Use assert_cast to prevent nullptr dereference on bad column types in FunctionsConversion --- src/Functions/FunctionsConversion.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 1522e76893e..f338af28240 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -4561,7 +4561,7 @@ arguments, result_type, input_rows_count); \ if (from_low_cardinality) { - const auto * col_low_cardinality = typeid_cast(arguments[0].column.get()); + const auto * col_low_cardinality = assert_cast(arguments[0].column.get()); if (skip_not_null_check && col_low_cardinality->containsNull()) throw Exception(ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN, "Cannot convert NULL value to non-Nullable type"); @@ -4586,7 +4586,7 @@ arguments, result_type, input_rows_count); \ if (to_low_cardinality) { auto res_column = to_low_cardinality->createColumn(); - auto * col_low_cardinality = typeid_cast(res_column.get()); + auto * col_low_cardinality = assert_cast(res_column.get()); if (from_low_cardinality && !src_converted_to_full_column) { From 3a26b9c89ee3083884fde341c2af418bcde2f4cf Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 11 Mar 2024 19:42:25 +0000 Subject: [PATCH 100/283] impl --- .../0_stateless/02887_mutations_subcolumns.reference | 6 +++--- tests/queries/0_stateless/02887_mutations_subcolumns.sql | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02887_mutations_subcolumns.reference b/tests/queries/0_stateless/02887_mutations_subcolumns.reference index c2d6cbbd225..1ccc83b48a3 100644 --- a/tests/queries/0_stateless/02887_mutations_subcolumns.reference +++ b/tests/queries/0_stateless/02887_mutations_subcolumns.reference @@ -5,6 +5,6 @@ 4 ttt 5 ttt 6 ttt -{"a":"1","obj":{"k1":1,"k2":null,"k3":null}} -{"a":"3","obj":{"k1":null,"k2":null,"k3":1}} -{"a":"1","obj":{"k1":1,"k2":null,"k3":null}} +1 [('k1',1)] +3 [('k3',1)] +1 [('k1',1)] diff --git a/tests/queries/0_stateless/02887_mutations_subcolumns.sql b/tests/queries/0_stateless/02887_mutations_subcolumns.sql index a01158e1b06..87b3009e929 100644 --- a/tests/queries/0_stateless/02887_mutations_subcolumns.sql +++ b/tests/queries/0_stateless/02887_mutations_subcolumns.sql @@ -40,9 +40,9 @@ INSERT INTO t_mutations_subcolumns VALUES (2, '{"k2": 1}'); INSERT INTO t_mutations_subcolumns VALUES (3, '{"k3": 1}'); ALTER TABLE t_mutations_subcolumns DELETE WHERE obj.k2 = 1; -SELECT * FROM t_mutations_subcolumns ORDER BY a FORMAT JSONEachRow; +SELECT a, arrayFilter(x -> not isNull(x.2), tupleToNameValuePairs(obj)) FROM t_mutations_subcolumns ORDER BY a; ALTER TABLE t_mutations_subcolumns DELETE WHERE isNull(obj.k1); -SELECT * FROM t_mutations_subcolumns ORDER BY a FORMAT JSONEachRow; +SELECT a, arrayFilter(x -> not isNull(x.2), tupleToNameValuePairs(obj)) FROM t_mutations_subcolumns ORDER BY a; DROP TABLE t_mutations_subcolumns; From 2e74685ba6ea8a3cc32ff0e21d0ee657517ef5a4 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 11 Mar 2024 19:58:43 +0000 Subject: [PATCH 101/283] Make variant tests a bit faster --- .../02941_variant_type_2.reference | 80 +++++++++---------- .../0_stateless/02941_variant_type_2.sh | 12 +-- ...different_local_and_global_order.reference | 30 +++---- ...e_with_different_local_and_global_order.sh | 8 +- 4 files changed, 65 insertions(+), 65 deletions(-) diff --git a/tests/queries/0_stateless/02941_variant_type_2.reference b/tests/queries/0_stateless/02941_variant_type_2.reference index 4b6d53c52ac..20a5176cb5e 100644 --- a/tests/queries/0_stateless/02941_variant_type_2.reference +++ b/tests/queries/0_stateless/02941_variant_type_2.reference @@ -1,51 +1,51 @@ Memory test4 insert test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 MergeTree compact test4 insert test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 MergeTree wide test4 insert test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 test4 select -1000000 -200000 -200000 -200000 -200000 -200000 -200000 -200000 +500000 +100000 +100000 +100000 +100000 +100000 +100000 +100000 diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh index 509c537e7fc..d1fa0a777c9 100755 --- a/tests/queries/0_stateless/02941_variant_type_2.sh +++ b/tests/queries/0_stateless/02941_variant_type_2.sh @@ -12,12 +12,12 @@ CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspic function test4_insert() { echo "test4 insert" - $CH_CLIENT -nmq "insert into test select number, NULL from numbers(200000); -insert into test select number + 200000, number from numbers(200000); -insert into test select number + 400000, 'str_' || toString(number) from numbers(200000); -insert into test select number + 600000, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(200000); -insert into test select number + 800000, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(200000); -insert into test select number + 1000000, range(number % 20 + 1)::Array(UInt64) from numbers(200000);" + $CH_CLIENT -nmq "insert into test select number, NULL from numbers(100000); +insert into test select number + 100000, number from numbers(100000); +insert into test select number + 200000, 'str_' || toString(number) from numbers(100000); +insert into test select number + 300000, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(100000); +insert into test select number + 400000, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(100000); +insert into test select number + 500000, range(number % 20 + 1)::Array(UInt64) from numbers(100000);" } function test4_select diff --git a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference index 1736a307c42..4109a88997c 100644 --- a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference +++ b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.reference @@ -44,9 +44,9 @@ str_38 str_38 \N ----------------------------------------------------------------------------------------------------------- test2 insert test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- MergeTree compact test1 insert @@ -136,14 +136,14 @@ str_38 str_38 \N ----------------------------------------------------------------------------------------------------------- test2 insert test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- MergeTree wide test1 insert @@ -233,12 +233,12 @@ str_38 str_38 \N ----------------------------------------------------------------------------------------------------------- test2 insert test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- test2 select -2500000 -750000 -1750000 +500000 +100000 +400000 ----------------------------------------------------------------------------------------------------------- diff --git a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh index 3bb37719a3f..1d88757a5d6 100755 --- a/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh +++ b/tests/queries/0_stateless/02943_variant_type_with_different_local_and_global_order.sh @@ -29,10 +29,10 @@ function test1_select() function test2_insert() { echo "test2 insert" - $CH_CLIENT -q "insert into test select number, number::Variant(UInt64)::Variant(UInt64, Array(UInt64)) from numbers(1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" - $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)) as res from numbers(1000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" - $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64)) as res from numbers(2000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" - $CH_CLIENT -q "insert into test select number, if(number < 3500000, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)), if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64))) from numbers(3000000, 1000000) settings max_insert_block_size = 100000, min_insert_block_size_rows=100000" + $CH_CLIENT -q "insert into test select number, number::Variant(UInt64)::Variant(UInt64, Array(UInt64)) from numbers(200000) settings max_insert_block_size = 10000, min_insert_block_size_rows=10000" + $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)) as res from numbers(200000, 200000) settings max_insert_block_size = 10000, min_insert_block_size_rows=10000" + $CH_CLIENT -q "insert into test select number, if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64)) as res from numbers(400000, 200000) settings max_insert_block_size = 10000, min_insert_block_size_rows=10000" + $CH_CLIENT -q "insert into test select number, if(number < 3500000, if(number % 2, NULL, number)::Variant(UInt64)::Variant(UInt64, String, Array(UInt64)), if(number % 2, NULL, 'str_' || toString(number))::Variant(String)::Variant(UInt64, String, Array(UInt64))) from numbers(600000, 200000) settings max_insert_block_size = 10000, min_insert_block_size_rows=10000" } function test2_select() From 120a1fdb5f817b442bf659da243407fb7003eaa1 Mon Sep 17 00:00:00 2001 From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com> Date: Mon, 11 Mar 2024 17:24:33 -0300 Subject: [PATCH 102/283] Improves varPop docs. Adds varPopStable. --- .../aggregate-functions/reference/varpop.md | 99 +++++++++++++++++-- 1 file changed, 91 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md index 751688b0830..5f18bdc30f6 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -1,16 +1,99 @@ --- -slug: /en/sql-reference/aggregate-functions/reference/varpop +title: "varPop" +slug: "/en/sql-reference/aggregate-functions/reference/varpop" sidebar_position: 32 --- -# varPop(x) +This page covers the `varPop` and `varPopStable` functions available in ClickHouse. -Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`. +## varPop -In other words, dispersion for a set of values. Returns `Float64`. +Calculates the population covariance between two data columns. The population covariance measures the degree to which two variables vary together. Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`. -Alias: `VAR_POP`. +**Syntax** -:::note -This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error. -::: \ No newline at end of file +```sql +covarPop(x, y) +``` + +**Parameters** + +- `x`: The first data column. [Numeric](../../../native-protocol/columns.md) +- `y`: The second data column. [Numeric](../../../native-protocol/columns.md) + +**Returned value** + +Returns an integer of type `Float64`. + +**Implementation details** + +This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable). + +**Example** + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x Int32, + y Int32 +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (1, 2), (2, 3), (3, 5), (4, 6), (5, 8); + +SELECT + covarPop(x, y) AS covar_pop +FROM test_data; +``` + +```response +3 +``` + +## varPopStable + +Calculates population covariance between two data columns using a stable, numerically accurate method to calculate the variance. This function is designed to provide reliable results even with large datasets or values that might cause numerical instability in other implementations. + +**Syntax** + +```sql +covarPopStable(x, y) +``` + +**Parameters** + +- `x`: The first data column. [String literal](../syntax#syntax-string-literal) +- `y`: The second data column. [Expression](../syntax#syntax-expressions) + +**Returned value** + +Returns an integer of type `Float64`. + +**Implementation details** + +Unlike [`varPop()`](#varPop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations. + +**Example** + +Query: + +```sql +DROP TABLE IF EXISTS test_data; +CREATE TABLE test_data +( + x Int32, + y Int32 +) +ENGINE = Memory; + +INSERT INTO test_data VALUES (1, 2), (2, 9), (9, 5), (4, 6), (5, 8); + +SELECT + covarPopStable(x, y) AS covar_pop_stable +FROM test_data; +``` + +```response +0.5999999999999999 +``` From 281dc8d29deba2980e6b191edefa3b62114d38a7 Mon Sep 17 00:00:00 2001 From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com> Date: Mon, 11 Mar 2024 17:48:12 -0300 Subject: [PATCH 103/283] Improves varSamp docs. Adds varSampStable docs. --- .../aggregate-functions/reference/varsamp.md | 126 ++++++++++++++++-- 1 file changed, 118 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md index 9b2b94936ec..e75cb075ff8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md @@ -1,18 +1,128 @@ --- +title: "varSamp" slug: /en/sql-reference/aggregate-functions/reference/varsamp sidebar_position: 33 --- -# varSamp +This page contains information on the `varSamp` and `varSampStable` ClickHouse functions. -Calculates the amount `Σ((x - x̅)^2) / (n - 1)`, where `n` is the sample size and `x̅`is the average value of `x`. +## varSamp -It represents an unbiased estimate of the variance of a random variable if passed values from its sample. +Calculate the sample variance of a data set. -Returns `Float64`. When `n <= 1`, returns `+∞`. +**Syntax** -Alias: `VAR_SAMP`. +```sql +varSamp(expr) +``` -:::note -This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error. -::: +**Parameters** + +- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../syntax#syntax-expressions) + +**Returned value** + +Returns a Float64 value representing the sample variance of the input data set. + +**Implementation details** + +The `varSamp()` function calculates the sample variance using the following formula: + +```plaintext +∑(x - mean(x))^2 / (n - 1) +``` + +Where: + +- `x` is each individual data point in the data set. +- `mean(x)` is the arithmetic mean of the data set. +- `n` is the number of data points in the data set. + +The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead. + +This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable` function](#varSampStable). + +**Example** + +Query: + +```sql +CREATE TABLE example_table +( + id UInt64, + value Float64 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); + +SELECT varSamp(value) FROM example_table; +``` + +Response: + +```response +0.8650000000000091 +``` + +## varSampStable + +Calculate the sample variance of a data set using a numerically stable algorithm. + +**Syntax** + +```sql +varSampStable(expr) +``` + +**Parameters** + +- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../syntax#syntax-expressions) + +**Returned value** + +The `varSampStable()` function returns a Float64 value representing the sample variance of the input data set. + +**Implementation details** + +The `varSampStable()` function calculates the sample variance using the same formula as the [`varSamp()`](#varSamp function): + +```plaintext +∑(x - mean(x))^2 / (n - 1) +``` + +Where: +- `x` is each individual data point in the data set. +- `mean(x)` is the arithmetic mean of the data set. +- `n` is the number of data points in the data set. + +The difference between `varSampStable()` and `varSamp()` is that `varSampStable()` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values. + +Like `varSamp()`, the `varSampStable()` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable()` function](./varpop#varpopstable) instead. + +**Example** + +Query: + +```sql +CREATE TABLE example_table +( + id UInt64, + value Float64 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7); + +SELECT varSampStable(value) FROM example_table; +``` + +Response: + +```response +0.865 +``` + +This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp()` due to the more precise handling of floating-point arithmetic. From c628eaca8ba19584fe36067dee8e6ec3e8f5cc4b Mon Sep 17 00:00:00 2001 From: Zhuo Qiu Date: Tue, 26 Dec 2023 14:13:07 +0800 Subject: [PATCH 104/283] Consider deleted rows when selecting parts to merge --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 90 +++++++++++++++++++ src/Storages/MergeTree/IMergeTreeDataPart.h | 13 +++ .../MergeTree/MergeFromLogEntryTask.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 1 + .../MergeTree/MergeTreeDataMergerMutator.cpp | 11 ++- .../MergeTree/MergeTreeDataMergerMutator.h | 2 +- .../MergeTree/MergeTreeDataWriter.cpp | 1 + src/Storages/MergeTree/MergeTreeSettings.h | 2 + .../MergeTree/MergedBlockOutputStream.cpp | 5 ++ .../MergeTree/MutateFromLogEntryTask.cpp | 2 +- src/Storages/MergeTree/MutateTask.cpp | 54 +++++++++++ .../MergeTree/ReplicatedMergeTreeQueue.cpp | 5 +- src/Storages/StorageMergeTree.cpp | 4 +- .../03001_consider_lwd_when_merge.reference | 3 + .../03001_consider_lwd_when_merge.sql | 23 +++++ 15 files changed, 208 insertions(+), 10 deletions(-) create mode 100644 tests/queries/0_stateless/03001_consider_lwd_when_merge.reference create mode 100644 tests/queries/0_stateless/03001_consider_lwd_when_merge.sql diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 3fea6d04944..c099512d636 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -609,6 +609,15 @@ UInt64 IMergeTreeDataPart::getMarksCount() const return index_granularity.getMarksCount(); } +UInt64 IMergeTreeDataPart::getExistingBytesOnDisk() const +{ + if (storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge && supportLightweightDeleteMutate() && hasLightweightDelete() + && existing_rows_count.has_value() && existing_rows_count.value() < rows_count && rows_count > 0) + return bytes_on_disk * existing_rows_count.value() / rows_count; + else + return bytes_on_disk; +} + size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const { auto checksum = checksums.files.find(file_name); @@ -691,6 +700,7 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks calculateColumnsAndSecondaryIndicesSizesOnDisk(); loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. + loadExistingRowsCount(); /// Must be called after loadRowsCount() as it uses the value of `rows_count`. loadPartitionAndMinMaxIndex(); if (!parent_part) { @@ -1313,6 +1323,86 @@ void IMergeTreeDataPart::loadRowsCount() } } +void IMergeTreeDataPart::loadExistingRowsCount() +{ + if (existing_rows_count.has_value()) + return; + + if (!rows_count || !storage.getSettings()->load_existing_rows_count_for_old_parts || !supportLightweightDeleteMutate() + || !hasLightweightDelete()) + existing_rows_count = rows_count; + else + existing_rows_count = readExistingRowsCount(); +} + +UInt64 IMergeTreeDataPart::readExistingRowsCount() +{ + const size_t total_mark = getMarksCount(); + if (!total_mark) + return rows_count; + + NamesAndTypesList cols; + cols.push_back(LightweightDeleteDescription::FILTER_COLUMN); + + StorageMetadataPtr metadata_ptr = storage.getInMemoryMetadataPtr(); + StorageSnapshotPtr storage_snapshot_ptr = std::make_shared(storage, metadata_ptr); + + MergeTreeReaderPtr reader = getReader( + cols, + storage_snapshot_ptr, + MarkRanges{MarkRange(0, total_mark)}, + nullptr, + storage.getContext()->getMarkCache().get(), + std::make_shared(), + MergeTreeReaderSettings{}, + ValueSizeMap{}, + ReadBufferFromFileBase::ProfileCallback{}); + + if (!reader) + { + LOG_WARNING(storage.log, "Create reader failed while reading existing rows count"); + return rows_count; + } + + size_t current_mark = 0; + bool continue_reading = false; + size_t current_row = 0; + size_t existing_count = 0; + + while (current_row < rows_count) + { + size_t rows_to_read = index_granularity.getMarkRows(current_mark); + continue_reading = (current_mark != 0); + + Columns result; + result.resize(1); + + size_t rows_read = reader->readRows(current_mark, total_mark, continue_reading, rows_to_read, result); + if (!rows_read) + { + LOG_WARNING(storage.log, "Part {} has lightweight delete, but _row_exists column not found", name); + return rows_count; + } + + current_row += rows_read; + current_mark += (rows_to_read == rows_read); + + const ColumnUInt8 * row_exists_col = typeid_cast(result[0].get()); + if (!row_exists_col) + { + LOG_WARNING(storage.log, "Part {} _row_exists column type is not UInt8", name); + return rows_count; + } + + for (UInt8 row_exists : row_exists_col->getData()) + if (row_exists) + existing_count++; + } + + LOG_DEBUG(storage.log, "Part {} existing_rows_count = {}", name, existing_count); + return existing_count; +} + void IMergeTreeDataPart::appendFilesOfRowsCount(Strings & files) { files.push_back("count.txt"); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index aaae64a5970..8bd32e777bc 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -231,6 +231,9 @@ public: size_t rows_count = 0; + /// Existing rows count (excluding lightweight deleted rows) + std::optional existing_rows_count; + time_t modification_time = 0; /// When the part is removed from the working set. Changes once. mutable std::atomic remove_time { std::numeric_limits::max() }; @@ -373,6 +376,10 @@ public: void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; } void setBytesUncompressedOnDisk(UInt64 bytes_uncompressed_on_disk_) { bytes_uncompressed_on_disk = bytes_uncompressed_on_disk_; } + /// Returns estimated size of existing rows if setting exclude_deleted_rows_for_part_size_in_merge is true + /// Otherwise returns bytes_on_disk + UInt64 getExistingBytesOnDisk() const; + size_t getFileSizeOrZero(const String & file_name) const; auto getFilesChecksums() const { return checksums.files; } @@ -499,6 +506,9 @@ public: /// True if here is lightweight deleted mask file in part. bool hasLightweightDelete() const; + /// Read existing rows count from _row_exists column + UInt64 readExistingRowsCount(); + void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings); /// Checks the consistency of this data part. @@ -664,6 +674,9 @@ private: /// For the older format version calculates rows count from the size of a column with a fixed size. void loadRowsCount(); + /// Load existing rows count from _row_exists column if load_existing_rows_count_for_old_parts is true. + void loadExistingRowsCount(); + static void appendFilesOfRowsCount(Strings & files); /// Loads ttl infos in json format from file ttl.txt. If file doesn't exists assigns ttl infos with all zeros diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index ae6e398026d..5ef004ec019 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -174,7 +174,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() } /// Start to make the main work - size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts); + size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts, true); /// Can throw an exception while reserving space. IMergeTreeDataPart::TTLInfos ttl_infos; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d56cf761cf4..5e05f75c1c5 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8261,6 +8261,7 @@ std::pair MergeTreeData::createE new_data_part->setColumns(columns, {}, metadata_snapshot->getMetadataVersion()); new_data_part->rows_count = block.rows(); + new_data_part->existing_rows_count = block.rows(); new_data_part->partition = partition; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 1bf1d4a3c29..90144a8cc8f 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -405,7 +405,7 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo } IMergeSelector::Part part_info; - part_info.size = part->getBytesOnDisk(); + part_info.size = part->getExistingBytesOnDisk(); part_info.age = res.current_time - part->modification_time; part_info.level = part->info.level; part_info.data = ∂ @@ -611,7 +611,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti return SelectPartsDecision::CANNOT_SELECT; } - sum_bytes += (*it)->getBytesOnDisk(); + sum_bytes += (*it)->getExistingBytesOnDisk(); prev_it = it; ++it; @@ -793,7 +793,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart } -size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts) +size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & is_merge) { size_t res = 0; time_t current_time = std::time(nullptr); @@ -804,7 +804,10 @@ size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData:: if (part_max_ttl && part_max_ttl <= current_time) continue; - res += part->getBytesOnDisk(); + if (is_merge) + res += part->getExistingBytesOnDisk(); + else + res += part->getBytesOnDisk(); } return static_cast(res * DISK_USAGE_COEFFICIENT_TO_RESERVE); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index f3a3f51b6c3..731c5e1d176 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -193,7 +193,7 @@ public: /// The approximate amount of disk space needed for merge or mutation. With a surplus. - static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts); + static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & is_merge); private: /** Select all parts belonging to the same partition. diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index fdac16ae19a..2ba74e44b40 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -537,6 +537,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( new_data_part->setColumns(columns, infos, metadata_snapshot->getMetadataVersion()); new_data_part->rows_count = block.rows(); + new_data_part->existing_rows_count = block.rows(); new_data_part->partition = std::move(partition); new_data_part->minmax_idx = std::move(minmax_idx); new_data_part->is_temp = true; diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 925dc973dc3..ea54f61b4b6 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -42,6 +42,7 @@ struct Settings; M(UInt64, compact_parts_max_bytes_to_buffer, 128 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ M(UInt64, compact_parts_max_granules_to_buffer, 128, "Only available in ClickHouse Cloud", 0) \ M(UInt64, compact_parts_merge_max_bytes_to_prefetch_part, 16 * 1024 * 1024, "Only available in ClickHouse Cloud", 0) \ + M(Bool, load_existing_rows_count_for_old_parts, false, "Whether to load existing_rows_count for existing parts. If false, existing_rows_count will be equal to rows_count for existing parts.", 0) \ \ /** Merge settings. */ \ M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ @@ -79,6 +80,7 @@ struct Settings; M(UInt64, number_of_mutations_to_throw, 1000, "If table has at least that many unfinished mutations, throw 'Too many mutations' exception. Disabled if set to 0", 0) \ M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \ + M(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "Use an estimated source part size (excluding lightweight deleted rows) when selecting parts to merge", 0) \ \ /** Inserts settings. */ \ M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \ diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index f2fe2e0f255..d8555d69788 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -188,6 +188,11 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->index_granularity = writer->getIndexGranularity(); new_part->calculateColumnsAndSecondaryIndicesSizesOnDisk(); + /// In mutation, existing_rows_count is already calculated in PartMergerWriter + /// In merge situation, lightweight deleted rows was physically deleted, existing_rows_count equals rows_count + if (!new_part->existing_rows_count.has_value()) + new_part->existing_rows_count = rows_count; + if (default_codec != nullptr) new_part->default_codec = default_codec; diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index a9ff687fe4d..620b0e34c6a 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -49,7 +49,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() } /// TODO - some better heuristic? - size_t estimated_space_for_result = MergeTreeDataMergerMutator::estimateNeededDiskSpace({source_part}); + size_t estimated_space_for_result = MergeTreeDataMergerMutator::estimateNeededDiskSpace({source_part}, false); if (entry.create_time + storage_settings_ptr->prefer_fetch_merged_part_time_threshold.totalSeconds() <= time(nullptr) && estimated_space_for_result >= storage_settings_ptr->prefer_fetch_merged_part_size_threshold) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 150cc27c369..3d31d2f05db 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -60,6 +60,26 @@ static bool checkOperationIsNotCanceled(ActionBlocker & merges_blocker, MergeLis return true; } +static UInt64 getExistingRowsCount(const Block & block) +{ + auto column = block.getByName(LightweightDeleteDescription::FILTER_COLUMN.name).column; + const ColumnUInt8 * row_exists_col = typeid_cast(column.get()); + + if (!row_exists_col) + { + LOG_WARNING(&Poco::Logger::get("MutationHelpers::getExistingRowsCount"), "_row_exists column type is not UInt8"); + return block.rows(); + } + + UInt64 existing_count = 0; + + for (UInt8 row_exists : row_exists_col->getData()) + if (row_exists) + existing_count++; + + return existing_count; +} + /** Split mutation commands into two parts: * First part should be executed by mutations interpreter. * Other is just simple drop/renames, so they can be executed without interpreter. @@ -997,6 +1017,9 @@ struct MutationContext bool need_prefix = true; scope_guard temporary_directory_lock; + + /// Whether this mutation contains lightweight delete + bool has_lightweight_delete; }; using MutationContextPtr = std::shared_ptr; @@ -1191,6 +1214,7 @@ public: } case State::SUCCESS: { + finalize(); return false; } } @@ -1226,6 +1250,11 @@ private: const ProjectionsDescription & projections; ExecutableTaskPtr merge_projection_parts_task_ptr; + + /// Existing rows count calculated during part writing. + /// It is initialized in prepare(), calculated in mutateOriginalPartAndPrepareProjections() + /// and set to new_data_part in finalize() + size_t existing_rows_count; }; @@ -1238,6 +1267,8 @@ void PartMergerWriter::prepare() // We split the materialization into multiple stages similar to the process of INSERT SELECT query. projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } + + existing_rows_count = 0; } @@ -1251,6 +1282,9 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ctx->out->write(cur_block); + if (ctx->has_lightweight_delete) + existing_rows_count += MutationHelpers::getExistingRowsCount(cur_block); + for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { const auto & projection = *ctx->projections_to_build[i]; @@ -1340,6 +1374,12 @@ bool PartMergerWriter::iterateThroughAllProjections() return true; } +void PartMergerWriter::finalize() +{ + if (ctx->has_lightweight_delete) + ctx->new_data_part->existing_rows_count = existing_rows_count; +} + class MutateAllPartColumnsTask : public IExecutableTask { public: @@ -2185,6 +2225,20 @@ bool MutateTask::prepare() if (ctx->mutating_pipeline_builder.initialized()) ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies()); + if (ctx->updated_header.has(LightweightDeleteDescription::FILTER_COLUMN.name)) + { + /// This mutation contains lightweight delete, reset existing_rows_count of new data part to 0 + /// It will be updated while writing _row_exists column + ctx->has_lightweight_delete = true; + } + else + { + ctx->has_lightweight_delete = false; + + /// This mutation does not contains lightweight delete, copy existing_rows_count from source part + ctx->new_data_part->existing_rows_count = ctx->source_part->existing_rows_count.value_or(ctx->source_part->rows_count); + } + /// All columns from part are changed and may be some more that were missing before in part /// TODO We can materialize compact part without copying data if (!isWidePart(ctx->source_part) || !isFullPartStorage(ctx->source_part->getDataPartStorage()) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 858eae4afd9..42f564f40da 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1350,7 +1350,10 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( auto part = data.getPartIfExists(name, {MergeTreeDataPartState::PreActive, MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated}); if (part) { - sum_parts_size_in_bytes += part->getBytesOnDisk(); + if (entry.type == LogEntry::MERGE_PARTS) + sum_parts_size_in_bytes += part->getExistingBytesOnDisk(); + else + sum_parts_size_in_bytes += part->getBytesOnDisk(); if (entry.type == LogEntry::MUTATE_PART && !storage.mutation_backoff_policy.partCanBeMutated(part->name)) { diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 663e7f435b7..c816a6f0dce 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1113,7 +1113,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (isTTLMergeType(future_part->merge_type)) getContext()->getMergeList().bookMergeWithTTL(); - merging_tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part->parts), *this, metadata_snapshot, false); + merging_tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part->parts, true), *this, metadata_snapshot, false); return std::make_shared(future_part, std::move(merging_tagger), std::make_shared()); } @@ -1336,7 +1336,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( future_part->name = part->getNewName(new_part_info); future_part->part_format = part->getFormat(); - tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}), *this, metadata_snapshot, true); + tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}, false), *this, metadata_snapshot, true); return std::make_shared(future_part, std::move(tagger), commands, txn); } } diff --git a/tests/queries/0_stateless/03001_consider_lwd_when_merge.reference b/tests/queries/0_stateless/03001_consider_lwd_when_merge.reference new file mode 100644 index 00000000000..19920de3d3c --- /dev/null +++ b/tests/queries/0_stateless/03001_consider_lwd_when_merge.reference @@ -0,0 +1,3 @@ +2 +2 +1 diff --git a/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql b/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql new file mode 100644 index 00000000000..a65e8877020 --- /dev/null +++ b/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS lwd_merge; + +CREATE TABLE lwd_merge (id UInt64 CODEC(NONE)) + ENGINE = MergeTree ORDER BY id +SETTINGS max_bytes_to_merge_at_max_space_in_pool = 80000, exclude_deleted_rows_for_part_size_in_merge = 0; + +INSERT INTO lwd_merge SELECT number FROM numbers(10000); +INSERT INTO lwd_merge SELECT number FROM numbers(10000, 10000); + +OPTIMIZE TABLE lwd_merge; +SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1; + +DELETE FROM lwd_merge WHERE id % 10 > 0; + +OPTIMIZE TABLE lwd_merge; +SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1; + +ALTER TABLE lwd_merge MODIFY SETTING exclude_deleted_rows_for_part_size_in_merge = 1; + +OPTIMIZE TABLE lwd_merge; +SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1; + +DROP TABLE IF EXISTS lwd_merge; From 4ad8141a162b3b7735e2f08c069e98b9c2ba2382 Mon Sep 17 00:00:00 2001 From: Zhuo Qiu Date: Wed, 28 Feb 2024 19:54:21 -0600 Subject: [PATCH 105/283] Maintain compatibility of estimateNeededDiskSpace() Co-authored-by: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> --- src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeDataMergerMutator.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 90144a8cc8f..53d49b51e8f 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -793,7 +793,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart } -size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & is_merge) +size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & account_for_deleted) { size_t res = 0; time_t current_time = std::time(nullptr); @@ -804,7 +804,7 @@ size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData:: if (part_max_ttl && part_max_ttl <= current_time) continue; - if (is_merge) + if (account_for_deleted) res += part->getExistingBytesOnDisk(); else res += part->getBytesOnDisk(); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 731c5e1d176..669ee040af3 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -193,7 +193,7 @@ public: /// The approximate amount of disk space needed for merge or mutation. With a surplus. - static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & is_merge); + static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & account_for_deleted = false); private: /** Select all parts belonging to the same partition. From 05969a39f390445c8d0df43b7077e0eb81db3538 Mon Sep 17 00:00:00 2001 From: Zhuo Qiu Date: Tue, 12 Mar 2024 14:45:25 +0800 Subject: [PATCH 106/283] resolve conflicts --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 5 +++-- src/Storages/MergeTree/MutateTask.cpp | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index c099512d636..5fede923252 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1342,7 +1342,7 @@ UInt64 IMergeTreeDataPart::readExistingRowsCount() return rows_count; NamesAndTypesList cols; - cols.push_back(LightweightDeleteDescription::FILTER_COLUMN); + cols.emplace_back(RowExistsColumn::name, RowExistsColumn::type); StorageMetadataPtr metadata_ptr = storage.getInMemoryMetadataPtr(); StorageSnapshotPtr storage_snapshot_ptr = std::make_shared(storage, metadata_ptr); @@ -1351,7 +1351,8 @@ UInt64 IMergeTreeDataPart::readExistingRowsCount() cols, storage_snapshot_ptr, MarkRanges{MarkRange(0, total_mark)}, - nullptr, + /*virtual_fields=*/ {}, + /*uncompressed_cache=*/{}, storage.getContext()->getMarkCache().get(), std::make_shared(), MergeTreeReaderSettings{}, diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 3d31d2f05db..4d1e60f450e 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -62,7 +62,7 @@ static bool checkOperationIsNotCanceled(ActionBlocker & merges_blocker, MergeLis static UInt64 getExistingRowsCount(const Block & block) { - auto column = block.getByName(LightweightDeleteDescription::FILTER_COLUMN.name).column; + auto column = block.getByName(RowExistsColumn::name).column; const ColumnUInt8 * row_exists_col = typeid_cast(column.get()); if (!row_exists_col) @@ -2225,7 +2225,7 @@ bool MutateTask::prepare() if (ctx->mutating_pipeline_builder.initialized()) ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies()); - if (ctx->updated_header.has(LightweightDeleteDescription::FILTER_COLUMN.name)) + if (ctx->updated_header.has(RowExistsColumn::name)) { /// This mutation contains lightweight delete, reset existing_rows_count of new data part to 0 /// It will be updated while writing _row_exists column From 47a4ce8a4e629f2d6321be1411d93c0bfabbbc95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 12 Mar 2024 10:02:33 +0000 Subject: [PATCH 107/283] Move `RangeHashedDictionary::getColumn` into source file --- src/Dictionaries/RangeHashedDictionary.cpp | 448 +++++++++++++++++++++ src/Dictionaries/RangeHashedDictionary.h | 421 ------------------- 2 files changed, 448 insertions(+), 421 deletions(-) create mode 100644 src/Dictionaries/RangeHashedDictionary.cpp diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp new file mode 100644 index 00000000000..203561fc23d --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -0,0 +1,448 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; +} + +template +ColumnPtr RangeHashedDictionary::getColumn( + const std::string & attribute_name, + const DataTypePtr & attribute_type, + const Columns & key_columns, + const DataTypes & key_types, + DefaultOrFilter default_or_filter) const +{ + bool is_short_circuit = std::holds_alternative(default_or_filter); + assert(is_short_circuit || std::holds_alternative(default_or_filter)); + + if (dictionary_key_type == DictionaryKeyType::Complex) + { + auto key_types_copy = key_types; + key_types_copy.pop_back(); + dict_struct.validateKeyTypes(key_types_copy); + } + + ColumnPtr result; + + const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, attribute_type); + const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second; + const auto & attribute = attributes[attribute_index]; + + /// Cast range column to storage type + Columns modified_key_columns = key_columns; + const ColumnPtr & range_storage_column = key_columns.back(); + ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""}; + modified_key_columns.back() = castColumnAccurate(column_to_cast, dict_struct.range_min->type); + + size_t keys_size = key_columns.front()->size(); + bool is_attribute_nullable = attribute.is_value_nullable.has_value(); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container * vec_null_map_to = nullptr; + if (is_attribute_nullable) + { + col_null_map_to = ColumnUInt8::create(keys_size, false); + vec_null_map_to = &col_null_map_to->getData(); + } + + auto type_call = [&](const auto & dictionary_attribute_type) + { + using Type = std::decay_t; + using AttributeType = typename Type::AttributeType; + using ValueType = DictionaryValueType; + using ColumnProvider = DictionaryAttributeColumnProvider; + + auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size); + + if (is_short_circuit) + { + IColumn::Filter & default_mask = std::get(default_or_filter).get(); + size_t keys_found = 0; + + if constexpr (std::is_same_v) + { + auto * out = column.get(); + + keys_found = getItemsShortCircuitImpl( + attribute, + modified_key_columns, + [&](size_t, const Array & value, bool) + { + out->insert(value); + }, + default_mask); + } + else if constexpr (std::is_same_v) + { + auto * out = column.get(); + + if (is_attribute_nullable) + keys_found = getItemsShortCircuitImpl( + attribute, + modified_key_columns, + [&](size_t row, StringRef value, bool is_null) + { + (*vec_null_map_to)[row] = is_null; + out->insertData(value.data, value.size); + }, + default_mask); + else + keys_found = getItemsShortCircuitImpl( + attribute, + modified_key_columns, + [&](size_t, StringRef value, bool) + { + out->insertData(value.data, value.size); + }, + default_mask); + } + else + { + auto & out = column->getData(); + + if (is_attribute_nullable) + keys_found = getItemsShortCircuitImpl( + attribute, + modified_key_columns, + [&](size_t row, const auto value, bool is_null) + { + (*vec_null_map_to)[row] = is_null; + out[row] = value; + }, + default_mask); + else + keys_found = getItemsShortCircuitImpl( + attribute, + modified_key_columns, + [&](size_t row, const auto value, bool) + { + out[row] = value; + }, + default_mask); + + out.resize(keys_found); + } + + if (is_attribute_nullable) + vec_null_map_to->resize(keys_found); + } + else + { + const ColumnPtr & default_values_column = std::get(default_or_filter).get(); + + DictionaryDefaultValueExtractor default_value_extractor( + dictionary_attribute.null_value, default_values_column); + + if constexpr (std::is_same_v) + { + auto * out = column.get(); + + getItemsImpl( + attribute, + modified_key_columns, + [&](size_t, const Array & value, bool) + { + out->insert(value); + }, + default_value_extractor); + } + else if constexpr (std::is_same_v) + { + auto * out = column.get(); + + if (is_attribute_nullable) + getItemsImpl( + attribute, + modified_key_columns, + [&](size_t row, StringRef value, bool is_null) + { + (*vec_null_map_to)[row] = is_null; + out->insertData(value.data, value.size); + }, + default_value_extractor); + else + getItemsImpl( + attribute, + modified_key_columns, + [&](size_t, StringRef value, bool) + { + out->insertData(value.data, value.size); + }, + default_value_extractor); + } + else + { + auto & out = column->getData(); + + if (is_attribute_nullable) + getItemsImpl( + attribute, + modified_key_columns, + [&](size_t row, const auto value, bool is_null) + { + (*vec_null_map_to)[row] = is_null; + out[row] = value; + }, + default_value_extractor); + else + getItemsImpl( + attribute, + modified_key_columns, + [&](size_t row, const auto value, bool) + { + out[row] = value; + }, + default_value_extractor); + } + } + + result = std::move(column); + }; + + callOnDictionaryAttributeType(attribute.type, type_call); + + if (is_attribute_nullable) + result = ColumnNullable::create(result, std::move(col_null_map_to)); + + return result; +} + +template +template +void RangeHashedDictionary::getItemsImpl( + const Attribute & attribute, + const Columns & key_columns, + ValueSetter && set_value, + DefaultValueExtractor & default_value_extractor) const +{ + const auto & attribute_container = std::get>(attribute.container); + + size_t keys_found = 0; + + const ColumnPtr & range_column = key_columns.back(); + auto key_columns_copy = key_columns; + key_columns_copy.pop_back(); + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); + const size_t keys_size = keys_extractor.getKeysSize(); + + callOnRangeType(dict_struct.range_min->type, [&](const auto & types) + { + using Types = std::decay_t; + using RangeColumnType = typename Types::LeftType; + using RangeStorageType = typename RangeColumnType::ValueType; + using RangeInterval = Interval; + + const auto * range_column_typed = typeid_cast(range_column.get()); + if (!range_column_typed) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Dictionary {} range column type should be equal to {}", + getFullName(), + dict_struct.range_min->type->getName()); + + const auto & range_column_data = range_column_typed->getData(); + + const auto & key_attribute_container = std::get>(key_attribute.container); + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + const auto it = key_attribute_container.find(key); + + if (it) + { + const auto date = range_column_data[key_index]; + const auto & interval_tree = it->getMapped(); + + size_t value_index = 0; + std::optional range; + + interval_tree.find(date, [&](auto & interval, auto & interval_value_index) + { + if (range) + { + if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) + { + range = interval; + value_index = interval_value_index; + } + else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > * range) + { + range = interval; + value_index = interval_value_index; + } + } + else + { + range = interval; + value_index = interval_value_index; + } + + return true; + }); + + if (range.has_value()) + { + ++keys_found; + + AttributeType value = attribute_container[value_index]; + + if constexpr (is_nullable) + { + bool is_null = (*attribute.is_value_nullable)[value_index]; + set_value(key_index, value, is_null); + } + else + { + set_value(key_index, value, false); + } + + keys_extractor.rollbackCurrentKey(); + continue; + } + } + + if constexpr (is_nullable) + set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index)); + else + set_value(key_index, default_value_extractor[key_index], false); + + keys_extractor.rollbackCurrentKey(); + } + }); + + query_count.fetch_add(keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); +} + +template +template +size_t RangeHashedDictionary::getItemsShortCircuitImpl( + const Attribute & attribute, + const Columns & key_columns, + ValueSetter && set_value, + IColumn::Filter & default_mask) const +{ + const auto & attribute_container = std::get>(attribute.container); + + size_t keys_found = 0; + + const ColumnPtr & range_column = key_columns.back(); + auto key_columns_copy = key_columns; + key_columns_copy.pop_back(); + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); + const size_t keys_size = keys_extractor.getKeysSize(); + default_mask.resize(keys_size); + + callOnRangeType(dict_struct.range_min->type, [&](const auto & types) + { + using Types = std::decay_t; + using RangeColumnType = typename Types::LeftType; + using RangeStorageType = typename RangeColumnType::ValueType; + using RangeInterval = Interval; + + const auto * range_column_typed = typeid_cast(range_column.get()); + if (!range_column_typed) + throw Exception(ErrorCodes::TYPE_MISMATCH, + "Dictionary {} range column type should be equal to {}", + getFullName(), + dict_struct.range_min->type->getName()); + + const auto & range_column_data = range_column_typed->getData(); + + const auto & key_attribute_container = std::get>(key_attribute.container); + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + const auto it = key_attribute_container.find(key); + + if (it) + { + const auto date = range_column_data[key_index]; + const auto & interval_tree = it->getMapped(); + + size_t value_index = 0; + std::optional range; + + interval_tree.find(date, [&](auto & interval, auto & interval_value_index) + { + if (range) + { + if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) + { + range = interval; + value_index = interval_value_index; + } + else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > * range) + { + range = interval; + value_index = interval_value_index; + } + } + else + { + range = interval; + value_index = interval_value_index; + } + + return true; + }); + + if (range.has_value()) + { + default_mask[key_index] = 0; + ++keys_found; + + AttributeType value = attribute_container[value_index]; + + if constexpr (is_nullable) + { + bool is_null = (*attribute.is_value_nullable)[value_index]; + set_value(key_index, value, is_null); + } + else + { + set_value(key_index, value, false); + } + + keys_extractor.rollbackCurrentKey(); + continue; + } + } + + default_mask[key_index] = 1; + + keys_extractor.rollbackCurrentKey(); + } + }); + + query_count.fetch_add(keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); + return keys_found; +} + +template +ColumnPtr RangeHashedDictionary::getColumn( + const std::string & attribute_name, + const DataTypePtr & attribute_type, + const Columns & key_columns, + const DataTypes & key_types, + DefaultOrFilter default_or_filter) const; + +template +ColumnPtr RangeHashedDictionary::getColumn( + const std::string & attribute_name, + const DataTypePtr & attribute_type, + const Columns & key_columns, + const DataTypes & key_types, + DefaultOrFilter default_or_filter) const; + +} diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 509b991b30c..4a8008b9051 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -339,209 +339,6 @@ RangeHashedDictionary::RangeHashedDictionary( calculateBytesAllocated(); } -template -ColumnPtr RangeHashedDictionary::getColumn( - const std::string & attribute_name, - const DataTypePtr & attribute_type, - const Columns & key_columns, - const DataTypes & key_types, - DefaultOrFilter default_or_filter) const -{ - bool is_short_circuit = std::holds_alternative(default_or_filter); - assert(is_short_circuit || std::holds_alternative(default_or_filter)); - - if (dictionary_key_type == DictionaryKeyType::Complex) - { - auto key_types_copy = key_types; - key_types_copy.pop_back(); - dict_struct.validateKeyTypes(key_types_copy); - } - - ColumnPtr result; - - const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, attribute_type); - const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second; - const auto & attribute = attributes[attribute_index]; - - /// Cast range column to storage type - Columns modified_key_columns = key_columns; - const ColumnPtr & range_storage_column = key_columns.back(); - ColumnWithTypeAndName column_to_cast = {range_storage_column->convertToFullColumnIfConst(), key_types.back(), ""}; - modified_key_columns.back() = castColumnAccurate(column_to_cast, dict_struct.range_min->type); - - size_t keys_size = key_columns.front()->size(); - bool is_attribute_nullable = attribute.is_value_nullable.has_value(); - - ColumnUInt8::MutablePtr col_null_map_to; - ColumnUInt8::Container * vec_null_map_to = nullptr; - if (is_attribute_nullable) - { - col_null_map_to = ColumnUInt8::create(keys_size, false); - vec_null_map_to = &col_null_map_to->getData(); - } - - auto type_call = [&](const auto & dictionary_attribute_type) - { - using Type = std::decay_t; - using AttributeType = typename Type::AttributeType; - using ValueType = DictionaryValueType; - using ColumnProvider = DictionaryAttributeColumnProvider; - - auto column = ColumnProvider::getColumn(dictionary_attribute, keys_size); - - if (is_short_circuit) - { - IColumn::Filter & default_mask = std::get(default_or_filter).get(); - size_t keys_found = 0; - - if constexpr (std::is_same_v) - { - auto * out = column.get(); - - keys_found = getItemsShortCircuitImpl( - attribute, - modified_key_columns, - [&](size_t, const Array & value, bool) - { - out->insert(value); - }, - default_mask); - } - else if constexpr (std::is_same_v) - { - auto * out = column.get(); - - if (is_attribute_nullable) - keys_found = getItemsShortCircuitImpl( - attribute, - modified_key_columns, - [&](size_t row, StringRef value, bool is_null) - { - (*vec_null_map_to)[row] = is_null; - out->insertData(value.data, value.size); - }, - default_mask); - else - keys_found = getItemsShortCircuitImpl( - attribute, - modified_key_columns, - [&](size_t, StringRef value, bool) - { - out->insertData(value.data, value.size); - }, - default_mask); - } - else - { - auto & out = column->getData(); - - if (is_attribute_nullable) - keys_found = getItemsShortCircuitImpl( - attribute, - modified_key_columns, - [&](size_t row, const auto value, bool is_null) - { - (*vec_null_map_to)[row] = is_null; - out[row] = value; - }, - default_mask); - else - keys_found = getItemsShortCircuitImpl( - attribute, - modified_key_columns, - [&](size_t row, const auto value, bool) - { - out[row] = value; - }, - default_mask); - - out.resize(keys_found); - } - - if (is_attribute_nullable) - vec_null_map_to->resize(keys_found); - } - else - { - const ColumnPtr & default_values_column = std::get(default_or_filter).get(); - - DictionaryDefaultValueExtractor default_value_extractor( - dictionary_attribute.null_value, default_values_column); - - if constexpr (std::is_same_v) - { - auto * out = column.get(); - - getItemsImpl( - attribute, - modified_key_columns, - [&](size_t, const Array & value, bool) - { - out->insert(value); - }, - default_value_extractor); - } - else if constexpr (std::is_same_v) - { - auto * out = column.get(); - - if (is_attribute_nullable) - getItemsImpl( - attribute, - modified_key_columns, - [&](size_t row, StringRef value, bool is_null) - { - (*vec_null_map_to)[row] = is_null; - out->insertData(value.data, value.size); - }, - default_value_extractor); - else - getItemsImpl( - attribute, - modified_key_columns, - [&](size_t, StringRef value, bool) - { - out->insertData(value.data, value.size); - }, - default_value_extractor); - } - else - { - auto & out = column->getData(); - - if (is_attribute_nullable) - getItemsImpl( - attribute, - modified_key_columns, - [&](size_t row, const auto value, bool is_null) - { - (*vec_null_map_to)[row] = is_null; - out[row] = value; - }, - default_value_extractor); - else - getItemsImpl( - attribute, - modified_key_columns, - [&](size_t row, const auto value, bool) - { - out[row] = value; - }, - default_value_extractor); - } - } - - result = std::move(column); - }; - - callOnDictionaryAttributeType(attribute.type, type_call); - - if (is_attribute_nullable) - result = ColumnNullable::create(result, std::move(col_null_map_to)); - - return result; -} - template ColumnPtr RangeHashedDictionary::getColumnInternal( const std::string & attribute_name, @@ -840,224 +637,6 @@ typename RangeHashedDictionary::Attribute RangeHashedDictio return attribute; } -template -template -void RangeHashedDictionary::getItemsImpl( - const Attribute & attribute, - const Columns & key_columns, - ValueSetter && set_value, - DefaultValueExtractor & default_value_extractor) const -{ - const auto & attribute_container = std::get>(attribute.container); - - size_t keys_found = 0; - - const ColumnPtr & range_column = key_columns.back(); - auto key_columns_copy = key_columns; - key_columns_copy.pop_back(); - - DictionaryKeysArenaHolder arena_holder; - DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); - const size_t keys_size = keys_extractor.getKeysSize(); - - callOnRangeType(dict_struct.range_min->type, [&](const auto & types) - { - using Types = std::decay_t; - using RangeColumnType = typename Types::LeftType; - using RangeStorageType = typename RangeColumnType::ValueType; - using RangeInterval = Interval; - - const auto * range_column_typed = typeid_cast(range_column.get()); - if (!range_column_typed) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Dictionary {} range column type should be equal to {}", - getFullName(), - dict_struct.range_min->type->getName()); - - const auto & range_column_data = range_column_typed->getData(); - - const auto & key_attribute_container = std::get>(key_attribute.container); - - for (size_t key_index = 0; key_index < keys_size; ++key_index) - { - auto key = keys_extractor.extractCurrentKey(); - const auto it = key_attribute_container.find(key); - - if (it) - { - const auto date = range_column_data[key_index]; - const auto & interval_tree = it->getMapped(); - - size_t value_index = 0; - std::optional range; - - interval_tree.find(date, [&](auto & interval, auto & interval_value_index) - { - if (range) - { - if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) - { - range = interval; - value_index = interval_value_index; - } - else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > * range) - { - range = interval; - value_index = interval_value_index; - } - } - else - { - range = interval; - value_index = interval_value_index; - } - - return true; - }); - - if (range.has_value()) - { - ++keys_found; - - AttributeType value = attribute_container[value_index]; - - if constexpr (is_nullable) - { - bool is_null = (*attribute.is_value_nullable)[value_index]; - set_value(key_index, value, is_null); - } - else - { - set_value(key_index, value, false); - } - - keys_extractor.rollbackCurrentKey(); - continue; - } - } - - if constexpr (is_nullable) - set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index)); - else - set_value(key_index, default_value_extractor[key_index], false); - - keys_extractor.rollbackCurrentKey(); - } - }); - - query_count.fetch_add(keys_size, std::memory_order_relaxed); - found_count.fetch_add(keys_found, std::memory_order_relaxed); -} - -template -template -size_t RangeHashedDictionary::getItemsShortCircuitImpl( - const Attribute & attribute, - const Columns & key_columns, - ValueSetter && set_value, - IColumn::Filter & default_mask) const -{ - const auto & attribute_container = std::get>(attribute.container); - - size_t keys_found = 0; - - const ColumnPtr & range_column = key_columns.back(); - auto key_columns_copy = key_columns; - key_columns_copy.pop_back(); - - DictionaryKeysArenaHolder arena_holder; - DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); - const size_t keys_size = keys_extractor.getKeysSize(); - default_mask.resize(keys_size); - - callOnRangeType(dict_struct.range_min->type, [&](const auto & types) - { - using Types = std::decay_t; - using RangeColumnType = typename Types::LeftType; - using RangeStorageType = typename RangeColumnType::ValueType; - using RangeInterval = Interval; - - const auto * range_column_typed = typeid_cast(range_column.get()); - if (!range_column_typed) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Dictionary {} range column type should be equal to {}", - getFullName(), - dict_struct.range_min->type->getName()); - - const auto & range_column_data = range_column_typed->getData(); - - const auto & key_attribute_container = std::get>(key_attribute.container); - - for (size_t key_index = 0; key_index < keys_size; ++key_index) - { - auto key = keys_extractor.extractCurrentKey(); - const auto it = key_attribute_container.find(key); - - if (it) - { - const auto date = range_column_data[key_index]; - const auto & interval_tree = it->getMapped(); - - size_t value_index = 0; - std::optional range; - - interval_tree.find(date, [&](auto & interval, auto & interval_value_index) - { - if (range) - { - if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) - { - range = interval; - value_index = interval_value_index; - } - else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > * range) - { - range = interval; - value_index = interval_value_index; - } - } - else - { - range = interval; - value_index = interval_value_index; - } - - return true; - }); - - if (range.has_value()) - { - default_mask[key_index] = 0; - ++keys_found; - - AttributeType value = attribute_container[value_index]; - - if constexpr (is_nullable) - { - bool is_null = (*attribute.is_value_nullable)[value_index]; - set_value(key_index, value, is_null); - } - else - { - set_value(key_index, value, false); - } - - keys_extractor.rollbackCurrentKey(); - continue; - } - } - - default_mask[key_index] = 1; - - keys_extractor.rollbackCurrentKey(); - } - }); - - query_count.fetch_add(keys_size, std::memory_order_relaxed); - found_count.fetch_add(keys_found, std::memory_order_relaxed); - return keys_found; -} - template template void RangeHashedDictionary::getItemsInternalImpl( From f1f388d9d26dbaf0775e90fdc26f9085b83b1c26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Mon, 11 Mar 2024 16:33:12 +0000 Subject: [PATCH 108/283] Remove RangeHashedDictionary as exception from large objects check --- utils/check-style/check-large-objects.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh index 5c1276e5732..5b0e8e88df5 100755 --- a/utils/check-style/check-large-objects.sh +++ b/utils/check-style/check-large-objects.sh @@ -7,8 +7,6 @@ TU_EXCLUDES=( AggregateFunctionUniq FunctionsConversion - RangeHashedDictionary - Aggregator ) From 4ce5245157418217b3f7e41724df3b8cf3dd3272 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 12 Mar 2024 11:03:39 +0000 Subject: [PATCH 109/283] Automatic style fix --- tests/integration/test_disk_types/test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index 5047cdc605e..a53d073d30b 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -50,6 +50,8 @@ def test_different_types(cluster): assert ( fields[encrypted_col_ix] == "0" ), f"{fields[name_col_ix]} expected to be non-encrypted!" + + def test_different_types(cluster): node = cluster.instances["node"] response = TSV.toMat(node.query("SELECT * FROM system.disks FORMAT TSVWithNames")) From d431276045f6600af92562abf2b6387bd37d068c Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 12 Mar 2024 12:43:48 +0100 Subject: [PATCH 110/283] Fix usage of session_token in S3 --- src/Storages/StorageS3.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 11da394feec..ff055508aa6 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1451,7 +1451,8 @@ void StorageS3::Configuration::connect(const ContextPtr & context) auth_settings.expiration_window_seconds.value_or( context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), - }); + }, + credentials.GetSessionToken()); } void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection) From 2e803f29f8f89eec0d9eb95b089ecad5dd6e18b7 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 12 Mar 2024 12:45:02 +0100 Subject: [PATCH 111/283] Fix KeeperSnapshotManagerS3 --- src/Coordination/KeeperSnapshotManagerS3.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 80345db2524..796506a07db 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -121,7 +121,8 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo auth_settings.use_insecure_imds_request.value_or(false), auth_settings.expiration_window_seconds.value_or(S3::DEFAULT_EXPIRATION_WINDOW_SECONDS), auth_settings.no_sign_request.value_or(false), - }); + }, + credentials.GetSessionToken()); auto new_client = std::make_shared(std::move(new_uri), std::move(auth_settings), std::move(client)); From c27c1e1035772cff76f318a361153ca21f9adc55 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Mar 2024 14:36:07 +0100 Subject: [PATCH 112/283] Fix build --- src/Functions/FunctionsConversion.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 94fd960a99e..e2d9c4a173e 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -3391,9 +3391,9 @@ arguments, result_type, input_rows_count); \ case IntervalKind::Kind::INTERVAL_KIND: \ return createFunctionAdaptor(FunctionConvert::create(), from_type); - static WrapperType createIntervalWrapper(const DataTypePtr & from_type, IntervalKind::Kind kind) + static WrapperType createIntervalWrapper(const DataTypePtr & from_type, IntervalKind kind) { - switch (kind) + switch (kind.kind) { GENERATE_INTERVAL_CASE(Nanosecond) GENERATE_INTERVAL_CASE(Microsecond) From b5489ac9620311b160b8c5fa23c02afcc7271114 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 12 Mar 2024 14:05:39 +0000 Subject: [PATCH 113/283] Print out all queries to have better insights --- ...materialized_views_ignore_errors.reference | 26 +++++++++++++++++-- ..._logs_materialized_views_ignore_errors.sql | 8 +++--- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference index 029f80b46b0..596a047c104 100644 --- a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference +++ b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.reference @@ -1,2 +1,24 @@ -11 queryfinish OK -11 querystart OK +"-- Attach MV to system.query_log and check that writing query_log will not fail\n\nset log_queries=1;","querystart","OK" +"-- Attach MV to system.query_log and check that writing query_log will not fail\n\nset log_queries=1;","queryfinish","OK" +"drop table if exists log_proxy_02572;","querystart","OK" +"drop table if exists log_proxy_02572;","queryfinish","OK" +"drop table if exists push_to_logs_proxy_mv_02572;","querystart","OK" +"drop table if exists push_to_logs_proxy_mv_02572;","queryfinish","OK" +"-- create log tables\nsystem flush logs;","querystart","OK" +"-- create log tables\nsystem flush logs;","queryfinish","OK" +"create table log_proxy_02572 as system.query_log engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572');","querystart","OK" +"create table log_proxy_02572 as system.query_log engine=Distributed('test_shard_localhost', currentDatabase(), 'receiver_02572');","queryfinish","OK" +"create materialized view push_to_logs_proxy_mv_02572 to log_proxy_02572 as select * from system.query_log;","querystart","OK" +"create materialized view push_to_logs_proxy_mv_02572 to log_proxy_02572 as select * from system.query_log;","queryfinish","OK" +"select 1 format Null;","querystart","OK" +"select 1 format Null;","queryfinish","OK" +"system flush logs;","querystart","OK" +"system flush logs;","queryfinish","OK" +"system flush logs;","querystart","OK" +"system flush logs;","queryfinish","OK" +"drop table log_proxy_02572;","querystart","OK" +"drop table log_proxy_02572;","queryfinish","OK" +"drop table push_to_logs_proxy_mv_02572;","querystart","OK" +"drop table push_to_logs_proxy_mv_02572;","queryfinish","OK" +"set log_queries=0;","querystart","OK" +"set log_queries=0;","queryfinish","OK" diff --git a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql index a7a74190821..2381639fba0 100644 --- a/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql +++ b/tests/queries/0_stateless/02572_system_logs_materialized_views_ignore_errors.sql @@ -21,10 +21,12 @@ system flush logs; drop table log_proxy_02572; drop table push_to_logs_proxy_mv_02572; +set log_queries=0; + system flush logs; -- lower() to pass through clickhouse-test "exception" check -select count(), lower(type::String), errorCodeToName(exception_code) +select replaceAll(query, '\n', '\\n'), lower(type::String), errorCodeToName(exception_code) from system.query_log where current_database = currentDatabase() - group by 2, 3 - order by 2; + order by event_time_microseconds + format CSV; From 0b588480f5165af38675911ebba043fe410465db Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 12 Mar 2024 14:34:15 +0000 Subject: [PATCH 114/283] Fix lazy execution in dictGetOrDefault for RangeHashedDictionary --- src/Functions/FunctionsExternalDictionaries.h | 7 ++-- .../03009_range_dict_get_or_default.reference | 1 + .../03009_range_dict_get_or_default.sql | 34 +++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03009_range_dict_get_or_default.reference create mode 100644 tests/queries/0_stateless/03009_range_dict_get_or_default.sql diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 261c728e9e1..011772baab9 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -324,12 +324,15 @@ public: String getName() const override { return name; } bool isVariadic() const override { return true; } - bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override + bool isShortCircuit(ShortCircuitSettings & settings, size_t number_of_arguments) const override { if constexpr (dictionary_get_function_type != DictionaryGetFunctionType::getOrDefault) return false; - settings.arguments_with_disabled_lazy_execution.insert({0, 1, 2}); + /// We execute lazily only last argument with default expression. + for (size_t i = 0; i != number_of_arguments - 1; ++i) + settings.arguments_with_disabled_lazy_execution.insert(i); + settings.enable_lazy_execution_for_common_descendants_of_arguments = false; settings.force_enable_lazy_execution = false; return true; diff --git a/tests/queries/0_stateless/03009_range_dict_get_or_default.reference b/tests/queries/0_stateless/03009_range_dict_get_or_default.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/03009_range_dict_get_or_default.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/03009_range_dict_get_or_default.sql b/tests/queries/0_stateless/03009_range_dict_get_or_default.sql new file mode 100644 index 00000000000..1f4b4073b9f --- /dev/null +++ b/tests/queries/0_stateless/03009_range_dict_get_or_default.sql @@ -0,0 +1,34 @@ +DROP DICTIONARY IF EXISTS range_dictionary; +DROP TABLE IF EXISTS range_dictionary_nullable_source_table; + + +CREATE TABLE range_dictionary_nullable_source_table +( + key UInt64, + start_date Date, + end_date Date, + value Nullable(UInt64) +) +ENGINE = TinyLog; + +INSERT INTO range_dictionary_nullable_source_table VALUES (0, toDate('2019-05-05'), toDate('2019-05-20'), 0), (1, toDate('2019-05-05'), toDate('2019-05-20'), NULL); + +CREATE DICTIONARY range_dictionary +( + key UInt64, + start_date Date, + end_date Date, + value Nullable(UInt64) DEFAULT NULL +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'range_dictionary_nullable_source_table')) +LIFETIME(MIN 1 MAX 1000) +LAYOUT(RANGE_HASHED()) +RANGE(MIN start_date MAX end_date); + +SELECT dictGetOrDefault('range_dictionary', 'value', toUInt64(2), toDate(toLowCardinality(materialize('2019-05-15'))), 2); + + +DROP DICTIONARY IF EXISTS range_dictionary; +DROP TABLE IF EXISTS range_dictionary_nullable_source_table; + From 01d0fba0853930de42d4c122c9f067bd91f9af4b Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 12 Mar 2024 14:48:59 +0000 Subject: [PATCH 115/283] init --- src/Interpreters/InterpreterSelectQuery.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index bcedba7346d..3c84d086d85 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2552,7 +2552,12 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc /// If necessary, we request more sources than the number of threads - to distribute the work evenly over the threads. if (max_streams > 1 && !is_sync_remote) - max_streams = static_cast(max_streams * settings.max_streams_to_max_threads_ratio); + { + if (auto streams_with_ratio = max_streams * settings.max_streams_to_max_threads_ratio; streams_with_ratio > sizeof(size_t)) + max_streams = static_cast(streams_with_ratio); + else + throw Exception(ErrorCodes::INCORRECT_DATA, "Exceeded limit for `max_streams_to_max_threads_ratio`. Make sure that `max_streams * max_streams_to_max_threads_ratio` not exceeds {}, current value: {}", sizeof(size_t), streams_with_ratio); + } auto & prewhere_info = analysis_result.prewhere_info; From 07ee777c8d354526cf89c3647863916a0855de49 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 12 Mar 2024 15:39:04 +0000 Subject: [PATCH 116/283] Restart CI From 858ad2d68860d7993280b90c0f17ae3a0e84d712 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 12 Mar 2024 16:39:10 +0100 Subject: [PATCH 117/283] beautify and change max_value --- src/Interpreters/InterpreterSelectQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 3c84d086d85..a314492c5b0 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2553,10 +2553,10 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc /// If necessary, we request more sources than the number of threads - to distribute the work evenly over the threads. if (max_streams > 1 && !is_sync_remote) { - if (auto streams_with_ratio = max_streams * settings.max_streams_to_max_threads_ratio; streams_with_ratio > sizeof(size_t)) + if (auto streams_with_ratio = max_streams * settings.max_streams_to_max_threads_ratio; streams_with_ratio < SIZE_MAX) max_streams = static_cast(streams_with_ratio); else - throw Exception(ErrorCodes::INCORRECT_DATA, "Exceeded limit for `max_streams_to_max_threads_ratio`. Make sure that `max_streams * max_streams_to_max_threads_ratio` not exceeds {}, current value: {}", sizeof(size_t), streams_with_ratio); + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Exceeded limit for `max_streams` with `max_streams_to_max_threads_ratio`. Make sure that `max_streams * max_streams_to_max_threads_ratio` not exceeds {}, current value: {}", SIZE_MAX, streams_with_ratio); } auto & prewhere_info = analysis_result.prewhere_info; From ccc6df0e432783e3d128fe1c7b6a22b399f2b510 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 12 Mar 2024 16:43:55 +0000 Subject: [PATCH 118/283] fix aggregate function uniqExact --- .../AggregateFunctionUniq.h | 2 ++ .../Combinators/AggregateFunctionArray.h | 1 + .../Combinators/AggregateFunctionIf.h | 1 + .../Combinators/AggregateFunctionMerge.h | 1 + .../Combinators/AggregateFunctionNull.h | 1 + .../Combinators/AggregateFunctionState.h | 1 + src/AggregateFunctions/IAggregateFunction.h | 4 +++ src/Common/ColumnsHashingImpl.h | 3 +- src/Common/ProfileEvents.cpp | 2 ++ src/Interpreters/Aggregator.cpp | 18 ++++++++-- src/Interpreters/Aggregator.h | 1 + .../03008_optimize_equal_ranges.reference | 8 +++++ .../03008_optimize_equal_ranges.sql | 25 +++++++++++++ .../03008_uniq_exact_equal_ranges.reference | 0 .../03008_uniq_exact_equal_ranges.sql | 36 +++++++++++++++++++ 15 files changed, 100 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/03008_optimize_equal_ranges.reference create mode 100644 tests/queries/0_stateless/03008_optimize_equal_ranges.sql create mode 100644 tests/queries/0_stateless/03008_uniq_exact_equal_ranges.reference create mode 100644 tests/queries/0_stateless/03008_uniq_exact_equal_ranges.sql diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index 8ac75e4451c..891f2ac4284 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -483,6 +483,7 @@ public: } bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; } + bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override { @@ -576,6 +577,7 @@ public: } bool isAbleToParallelizeMerge() const override { return is_able_to_parallelize_merge; } + bool canOptimizeEqualKeysRanges() const override { return !is_able_to_parallelize_merge; } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena *) const override { diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionArray.h b/src/AggregateFunctions/Combinators/AggregateFunctionArray.h index 7f38453f86b..6b918926d0d 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionArray.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionArray.h @@ -142,6 +142,7 @@ public: } bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } + bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override { diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionIf.h b/src/AggregateFunctions/Combinators/AggregateFunctionIf.h index e81f2203e7b..df23398a10d 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionIf.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionIf.h @@ -165,6 +165,7 @@ public: } bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } + bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override { diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h b/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h index 5b9e8e606af..53c24bd60c1 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionMerge.h @@ -111,6 +111,7 @@ public: } bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } + bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override { diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h index 8b614f68540..ba72f960852 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h @@ -152,6 +152,7 @@ public: } bool isAbleToParallelizeMerge() const override { return nested_function->isAbleToParallelizeMerge(); } + bool canOptimizeEqualKeysRanges() const override { return nested_function->canOptimizeEqualKeysRanges(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override { diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionState.h b/src/AggregateFunctions/Combinators/AggregateFunctionState.h index 8335d21cb1e..b0ab6d49604 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionState.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionState.h @@ -92,6 +92,7 @@ public: } bool isAbleToParallelizeMerge() const override { return nested_func->isAbleToParallelizeMerge(); } + bool canOptimizeEqualKeysRanges() const override { return nested_func->canOptimizeEqualKeysRanges(); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, ThreadPool & thread_pool, Arena * arena) const override { diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 94bb121893d..499185320e6 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -162,6 +162,10 @@ public: /// Tells if merge() with thread pool parameter could be used. virtual bool isAbleToParallelizeMerge() const { return false; } + /// Return true if it is allowed to replace call of `addBatch` + /// to `addBatchSinglePlace` for ranges of consecutive equal keys. + virtual bool canOptimizeEqualKeysRanges() const { return true; } + /// Should be used only if isAbleToParallelizeMerge() returned true. virtual void merge(AggregateDataPtr __restrict /*place*/, ConstAggregateDataPtr /*rhs*/, ThreadPool & /*thread_pool*/, Arena * /*arena*/) const diff --git a/src/Common/ColumnsHashingImpl.h b/src/Common/ColumnsHashingImpl.h index 7116160e94c..d68171a6566 100644 --- a/src/Common/ColumnsHashingImpl.h +++ b/src/Common/ColumnsHashingImpl.h @@ -62,7 +62,6 @@ struct LastElementCache bool check(const Key & key) const { return value.first == key; } bool hasOnlyOneValue() const { return found && misses == 1; } - UInt64 getMisses() const { return misses; } }; template @@ -232,7 +231,7 @@ public: ALWAYS_INLINE UInt64 getCacheMissesSinceLastReset() const { if constexpr (consecutive_keys_optimization) - return cache.getMisses(); + return cache.misses; return 0; } diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index c1ac3d08245..8fd1e189977 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -533,6 +533,8 @@ The server successfully detected this situation and will download merged part fr \ M(AggregationPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for aggregation.") \ M(AggregationHashTablesInitializedAsTwoLevel, "How many hash tables were inited as two-level for aggregation.") \ + M(AggregationProcessedBlocks, "How many blocks were processed by Aggregator") \ + M(AggregationOptimizedEqualRangesOfKeys, "For how many blocks optimization of equal ranges of keys was applied") \ \ M(MetadataFromKeeperCacheHit, "Number of times an object storage metadata request was answered from cache without making request to Keeper") \ M(MetadataFromKeeperCacheMiss, "Number of times an object storage metadata request had to be answered from Keeper") \ diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 80a98683867..7c9dac82eff 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -53,6 +53,8 @@ namespace ProfileEvents extern const Event OverflowThrow; extern const Event OverflowBreak; extern const Event OverflowAny; + extern const Event AggregationProcessedBlocks; + extern const Event AggregationOptimizedEqualRangesOfKeys; } namespace CurrentMetrics @@ -985,6 +987,7 @@ void Aggregator::executeOnBlockSmall( { /// `result` will destroy the states of aggregate functions in the destructor result.aggregator = this; + ProfileEvents::increment(ProfileEvents::AggregationProcessedBlocks); /// How to perform the aggregation? if (result.empty()) @@ -1342,6 +1345,7 @@ void NO_INLINE Aggregator::executeImplBatch( if constexpr (use_compiled_functions) { std::vector columns_data; + bool can_optimize_equal_keys_ranges = true; for (size_t i = 0; i < aggregate_functions.size(); ++i) { @@ -1350,13 +1354,15 @@ void NO_INLINE Aggregator::executeImplBatch( AggregateFunctionInstruction * inst = aggregate_instructions + i; size_t arguments_size = inst->that->getArgumentTypes().size(); // NOLINT + can_optimize_equal_keys_ranges &= inst->can_optimize_equal_keys_ranges; for (size_t argument_index = 0; argument_index < arguments_size; ++argument_index) columns_data.emplace_back(getColumnData(inst->batch_arguments[argument_index])); } - if (all_keys_are_const || (!no_more_keys && state.hasOnlyOneValueSinceLastReset())) + if (all_keys_are_const || (can_optimize_equal_keys_ranges && state.hasOnlyOneValueSinceLastReset())) { + ProfileEvents::increment(ProfileEvents::AggregationOptimizedEqualRangesOfKeys); auto add_into_aggregate_states_function_single_place = compiled_aggregate_functions_holder->compiled_aggregate_functions.add_into_aggregate_states_function_single_place; add_into_aggregate_states_function_single_place(row_begin, row_end, columns_data.data(), places[key_start]); } @@ -1379,10 +1385,15 @@ void NO_INLINE Aggregator::executeImplBatch( AggregateFunctionInstruction * inst = aggregate_instructions + i; - if (all_keys_are_const || (!no_more_keys && state.hasOnlyOneValueSinceLastReset())) + if (all_keys_are_const || (inst->can_optimize_equal_keys_ranges && state.hasOnlyOneValueSinceLastReset())) + { + ProfileEvents::increment(ProfileEvents::AggregationOptimizedEqualRangesOfKeys); addBatchSinglePlace(row_begin, row_end, inst, places[key_start] + inst->state_offset, aggregates_pool); + } else + { addBatch(row_begin, row_end, inst, places.get(), aggregates_pool); + } } } @@ -1510,6 +1521,7 @@ void NO_INLINE Aggregator::executeOnIntervalWithoutKey( /// `data_variants` will destroy the states of aggregate functions in the destructor data_variants.aggregator = this; data_variants.init(AggregatedDataVariants::Type::without_key); + ProfileEvents::increment(ProfileEvents::AggregationProcessedBlocks); AggregatedDataWithoutKey & res = data_variants.without_key; @@ -1588,6 +1600,7 @@ void Aggregator::prepareAggregateInstructions( } aggregate_functions_instructions[i].has_sparse_arguments = has_sparse_arguments; + aggregate_functions_instructions[i].can_optimize_equal_keys_ranges = aggregate_functions[i]->canOptimizeEqualKeysRanges(); aggregate_functions_instructions[i].arguments = aggregate_columns[i].data(); aggregate_functions_instructions[i].state_offset = offsets_of_aggregate_states[i]; @@ -1640,6 +1653,7 @@ bool Aggregator::executeOnBlock(Columns columns, { /// `result` will destroy the states of aggregate functions in the destructor result.aggregator = this; + ProfileEvents::increment(ProfileEvents::AggregationProcessedBlocks); /// How to perform the aggregation? if (result.empty()) diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 375b8986101..2d3f497fec0 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1221,6 +1221,7 @@ public: const IColumn ** batch_arguments{}; const UInt64 * offsets{}; bool has_sparse_arguments = false; + bool can_optimize_equal_keys_ranges = true; }; /// Used for optimize_aggregation_in_order: diff --git a/tests/queries/0_stateless/03008_optimize_equal_ranges.reference b/tests/queries/0_stateless/03008_optimize_equal_ranges.reference new file mode 100644 index 00000000000..08f8008fca6 --- /dev/null +++ b/tests/queries/0_stateless/03008_optimize_equal_ranges.reference @@ -0,0 +1,8 @@ +0 30000 +1 30000 +2 30000 +0 449985000 +1 449985000 +2 449985000 +sum 1 +uniqExact 0 diff --git a/tests/queries/0_stateless/03008_optimize_equal_ranges.sql b/tests/queries/0_stateless/03008_optimize_equal_ranges.sql new file mode 100644 index 00000000000..c6143fb7f51 --- /dev/null +++ b/tests/queries/0_stateless/03008_optimize_equal_ranges.sql @@ -0,0 +1,25 @@ +DROP TABLE IF EXISTS t_optimize_equal_ranges; + +CREATE TABLE t_optimize_equal_ranges (a UInt64, b String, c UInt64) ENGINE = MergeTree ORDER BY a; + +SET max_block_size = 1024; +SET max_bytes_before_external_group_by = 0; +SET optimize_aggregation_in_order = 0; + +INSERT INTO t_optimize_equal_ranges SELECT 0, toString(number), number FROM numbers(30000); +INSERT INTO t_optimize_equal_ranges SELECT 1, toString(number), number FROM numbers(30000); +INSERT INTO t_optimize_equal_ranges SELECT 2, toString(number), number FROM numbers(30000); + +SELECT a, uniqExact(b) FROM t_optimize_equal_ranges GROUP BY a ORDER BY a; +SELECT a, sum(c) FROM t_optimize_equal_ranges GROUP BY a ORDER BY a; + +SYSTEM FLUSH LOGS; + +SELECT + used_aggregate_functions[1] AS func, + ProfileEvents['AggregationOptimizedEqualRangesOfKeys'] > 0 +FROM system.query_log +WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND query LIKE '%SELECT%FROM%t_optimize_equal_ranges%' +ORDER BY func; + +DROP TABLE t_optimize_equal_ranges; diff --git a/tests/queries/0_stateless/03008_uniq_exact_equal_ranges.reference b/tests/queries/0_stateless/03008_uniq_exact_equal_ranges.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03008_uniq_exact_equal_ranges.sql b/tests/queries/0_stateless/03008_uniq_exact_equal_ranges.sql new file mode 100644 index 00000000000..2e708f28cac --- /dev/null +++ b/tests/queries/0_stateless/03008_uniq_exact_equal_ranges.sql @@ -0,0 +1,36 @@ +DROP TABLE IF EXISTS t_uniq_exact; + +CREATE TABLE t_uniq_exact (a UInt64, b String, c UInt64) ENGINE = MergeTree ORDER BY a; + +SET group_by_two_level_threshold_bytes = 1; +SET group_by_two_level_threshold = 1; +SET max_threads = 4; +SET max_bytes_before_external_group_by = 0; +SET optimize_aggregation_in_order = 0; + +INSERT INTO t_uniq_exact SELECT 0, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 1, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 2, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 3, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 4, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 5, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 6, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 7, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 8, randomPrintableASCII(5), rand() FROM numbers(300000); +INSERT INTO t_uniq_exact SELECT 9, randomPrintableASCII(5), rand() FROM numbers(300000); + +OPTIMIZE TABLE t_uniq_exact FINAL; + +SELECT a, uniqExact(b) FROM t_uniq_exact GROUP BY a ORDER BY a +SETTINGS min_hit_rate_to_use_consecutive_keys_optimization = 1.0 +EXCEPT +SELECT a, uniqExact(b) FROM t_uniq_exact GROUP BY a ORDER BY a +SETTINGS min_hit_rate_to_use_consecutive_keys_optimization = 0.5; + +SELECT a, sum(c) FROM t_uniq_exact GROUP BY a ORDER BY a +SETTINGS min_hit_rate_to_use_consecutive_keys_optimization = 1.0 +EXCEPT +SELECT a, sum(c) FROM t_uniq_exact GROUP BY a ORDER BY a +SETTINGS min_hit_rate_to_use_consecutive_keys_optimization = 0.5; + +DROP TABLE t_uniq_exact; From 53442f49140c9c8b391f5b3a76a9bf4ab45dbed0 Mon Sep 17 00:00:00 2001 From: johnnymatthews <9611008+johnnymatthews@users.noreply.github.com> Date: Tue, 12 Mar 2024 13:48:47 -0300 Subject: [PATCH 119/283] Cleans up markdown. --- docs/en/sql-reference/aggregate-functions/reference/varpop.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md index 5f18bdc30f6..76472f62789 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -27,7 +27,7 @@ Returns an integer of type `Float64`. **Implementation details** -This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable). +This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable). **Example** From 8abb85948c4b401a61cdf6dc6cf33b7ac9df2279 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 12 Mar 2024 17:03:11 +0000 Subject: [PATCH 120/283] Fix flaky test and add separate to show unexpected behaviour --- .../01603_insert_select_too_many_parts.sql | 3 ++- ...t_select_too_many_parts_multithread.reference | 1 + ..._insert_select_too_many_parts_multithread.sql | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.reference create mode 100644 tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql diff --git a/tests/queries/0_stateless/01603_insert_select_too_many_parts.sql b/tests/queries/0_stateless/01603_insert_select_too_many_parts.sql index a56b680e212..eea52282cf4 100644 --- a/tests/queries/0_stateless/01603_insert_select_too_many_parts.sql +++ b/tests/queries/0_stateless/01603_insert_select_too_many_parts.sql @@ -5,7 +5,8 @@ SYSTEM STOP MERGES too_many_parts; SET max_block_size = 1, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; -- exception is not thrown if threshold is exceeded when multi-block INSERT is already started. -INSERT INTO too_many_parts SELECT * FROM numbers(10); +-- Single thread is used as different threads check it separately https://github.com/ClickHouse/ClickHouse/issues/61158 +INSERT INTO too_many_parts SELECT * FROM numbers(10) SETTINGS max_insert_threads=1; SELECT count() FROM too_many_parts; -- exception is thrown if threshold is exceeded on new INSERT. diff --git a/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.reference b/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.reference new file mode 100644 index 00000000000..29d6383b52c --- /dev/null +++ b/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.reference @@ -0,0 +1 @@ +100 diff --git a/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql b/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql new file mode 100644 index 00000000000..00cf262add5 --- /dev/null +++ b/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql @@ -0,0 +1,16 @@ +# Tags: disabled +# TODO: Fix parts_to_throw_insert logic for parallel MergeTreeSink onStart calls +DROP TABLE IF EXISTS too_many_parts; + +CREATE TABLE too_many_parts (x UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS parts_to_delay_insert = 5, parts_to_throw_insert = 5; + +SYSTEM STOP MERGES too_many_parts; +SET max_block_size = 1, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0, max_threads=100, max_insert_threads=100; + +-- exception is not thrown if threshold is exceeded when multi-block INSERT is already started. +INSERT INTO too_many_parts SELECT * FROM numbers_mt(100); +SELECT count() FROM too_many_parts; + +INSERT INTO too_many_parts SELECT * FROM numbers_mt(10); -- { serverError 252 } + +DROP TABLE too_many_parts; From c947484fe0d788dd46384b90987b59694f0a0b77 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 12 Mar 2024 19:36:10 +0100 Subject: [PATCH 121/283] Fxi again --- tests/integration/test_disk_types/test.py | 15 ++++++++++++--- .../test_endpoint_macro_substitution/test.py | 17 ++++++++++++----- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_disk_types/test.py b/tests/integration/test_disk_types/test.py index a53d073d30b..b9b8ef2010d 100644 --- a/tests/integration/test_disk_types/test.py +++ b/tests/integration/test_disk_types/test.py @@ -87,19 +87,28 @@ def test_different_types(cluster): def test_select_by_type(cluster): node = cluster.instances["node"] for name, disk_type in list(disk_types.items()): - if disk_type != "S3": + if disk_type == "Local": assert ( node.query( "SELECT name FROM system.disks WHERE type='" + disk_type + "'" ) == name + "\n" ) - else: + elif disk_type == "S3": assert ( node.query( - "SELECT name FROM system.disks WHERE type='" + "SELECT name FROM system.disks WHERE object_storage_type='" + disk_type + "' ORDER BY name" ) == "disk_encrypted\ndisk_s3\n" ) + else: + assert ( + node.query( + "SELECT name FROM system.disks WHERE object_storage_type='" + + disk_type + + "'" + ) + == name + "\n" + ) diff --git a/tests/integration/test_endpoint_macro_substitution/test.py b/tests/integration/test_endpoint_macro_substitution/test.py index e161d8e82ff..ee72fb9b492 100644 --- a/tests/integration/test_endpoint_macro_substitution/test.py +++ b/tests/integration/test_endpoint_macro_substitution/test.py @@ -67,22 +67,29 @@ def test_different_types(cluster): def test_select_by_type(cluster): node = cluster.instances["node"] - fs = HdfsClient(hosts=cluster.hdfs_ip) - for name, disk_type in list(disk_types.items()): - if disk_type != "S3": + if disk_type == "Local": assert ( node.query( "SELECT name FROM system.disks WHERE type='" + disk_type + "'" ) == name + "\n" ) - else: + elif disk_type == "S3": assert ( node.query( - "SELECT name FROM system.disks WHERE type='" + "SELECT name FROM system.disks WHERE object_storage_type='" + disk_type + "' ORDER BY name" ) == "disk_encrypted\ndisk_s3\n" ) + else: + assert ( + node.query( + "SELECT name FROM system.disks WHERE object_storage_type='" + + disk_type + + "'" + ) + == name + "\n" + ) From ebd934e28696c1d01f6ee225c62b4e76a026c337 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Mar 2024 05:05:23 +0100 Subject: [PATCH 122/283] Continue --- src/Functions/FunctionsConversion.cpp | 390 +++++++++++++------------- 1 file changed, 192 insertions(+), 198 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index e2d9c4a173e..846b6c31062 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -179,6 +179,131 @@ struct ToDateTimeImpl }; +/// Implementation of toDate function. + +template +struct ToDateTransform32Or64 +{ + static constexpr auto name = "toDate"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from > MAX_DATETIME_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); + } + /// if value is smaller (or equal) than maximum day value for Date, than treat it as day num, + /// otherwise treat it as unix timestamp. This is a bit weird, but we leave this behavior. + if (from <= DATE_LUT_MAX_DAY_NUM) + return from; + else + return time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); + } +}; + + +template +struct ToDateTransform32Or64Signed +{ + static constexpr auto name = "toDate"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + { + // TODO: decide narrow or extended range based on FromType + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < 0 || from > MAX_DATE_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); + } + else + { + if (from < 0) + return 0; + } + return (from <= DATE_LUT_MAX_DAY_NUM) + ? static_cast(from) + : time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATE_TIMESTAMP))); + } +}; + +template +struct ToDateTransform8Or16Signed +{ + static constexpr auto name = "toDate"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + if (from < 0) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); + else + return 0; + } + return from; + } +}; + +/// Implementation of toDate32 function. + +template +struct ToDate32Transform32Or64 +{ + static constexpr auto name = "toDate32"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + { + if (from < DATE_LUT_MAX_EXTEND_DAY_NUM) + return static_cast(from); + else + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type Date32", from); + } + return time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATETIME64_TIMESTAMP))); + } + } +}; + +template +struct ToDate32Transform32Or64Signed +{ + static constexpr auto name = "toDate32"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + { + static const Int32 daynum_min_offset = -static_cast(time_zone.getDayNumOffsetEpoch()); + + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < daynum_min_offset || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type Date32", from); + } + + if (from < daynum_min_offset) + return daynum_min_offset; + + return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) + ? static_cast(from) + : time_zone.toDayNum(std::min(time_t(Int64(from)), time_t(MAX_DATETIME64_TIMESTAMP))); + } +}; + +template +struct ToDate32Transform8Or16Signed +{ + static constexpr auto name = "toDate32"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + return from; + } +}; + + /// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; }; @@ -241,6 +366,73 @@ struct ConvertImpl return DateTimeTransformImpl, false>::execute( arguments, result_type, input_rows_count); } + /** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience, + * Float32, Float64) to Date. If the + * number is less than 65536, then it is treated as DayNum, and if it's greater or equals to 65536, + * then treated as unix timestamp. If the number exceeds UInt32, saturate to MAX_UINT32 then as DayNum. + * It's a bit illogical, as we actually have two functions in one. + * But allows to support frequent case, + * when user write toDate(UInt32), expecting conversion of unix timestamp to Date. + * (otherwise such usage would be frequent mistake). + */ + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } else { using ColVecFrom = typename FromDataType::ColumnType; @@ -490,208 +682,10 @@ struct ConvertImpl }; -/// Implementation of toDate function. - -template -struct ToDateTransform32Or64 -{ - static constexpr auto name = "toDate"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); - } - /// if value is smaller (or equal) than maximum day value for Date, than treat it as day num, - /// otherwise treat it as unix timestamp. This is a bit weird, but we leave this behavior. - if (from <= DATE_LUT_MAX_DAY_NUM) - return from; - else - return time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); - } -}; - - -template -struct ToDateTransform32Or64Signed -{ - static constexpr auto name = "toDate"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - // TODO: decide narrow or extended range based on FromType - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < 0 || from > MAX_DATE_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); - } - else - { - if (from < 0) - return 0; - } - return (from <= DATE_LUT_MAX_DAY_NUM) - ? static_cast(from) - : time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATE_TIMESTAMP))); - } -}; - -template -struct ToDateTransform8Or16Signed -{ - static constexpr auto name = "toDate"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if (from < 0) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); - else - return 0; - } - return from; - } -}; - template struct ConvertImpl : DateTimeTransformImpl, false> {}; -/// Implementation of toDate32 function. - -template -struct ToDate32Transform32Or64 -{ - static constexpr auto name = "toDate32"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - if (from < DATE_LUT_MAX_EXTEND_DAY_NUM) - return static_cast(from); - else - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type Date32", from); - } - return time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATETIME64_TIMESTAMP))); - } - } -}; - -template -struct ToDate32Transform32Or64Signed -{ - static constexpr auto name = "toDate32"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - static const Int32 daynum_min_offset = -static_cast(time_zone.getDayNumOffsetEpoch()); - - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < daynum_min_offset || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type Date32", from); - } - - if (from < daynum_min_offset) - return daynum_min_offset; - - return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) - ? static_cast(from) - : time_zone.toDayNum(std::min(time_t(Int64(from)), time_t(MAX_DATETIME64_TIMESTAMP))); - } -}; - -template -struct ToDate32Transform8Or16Signed -{ - static constexpr auto name = "toDate32"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - return from; - } -}; - -/** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience, - * Float32, Float64) to Date. If the - * number is less than 65536, then it is treated as DayNum, and if it's greater or equals to 65536, - * then treated as unix timestamp. If the number exceeds UInt32, saturate to MAX_UINT32 then as DayNum. - * It's a bit illogical, as we actually have two functions in one. - * But allows to support frequent case, - * when user write toDate(UInt32), expecting conversion of unix timestamp to Date. - * (otherwise such usage would be frequent mistake). - */ -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - template struct ToDateTimeTransform64 From dbfda047014ab1b3e4cd81e46c5ce623387d61fe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Mar 2024 05:18:51 +0100 Subject: [PATCH 123/283] Continue --- src/Functions/FunctionsConversion.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 846b6c31062..33ef12b9d23 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -366,6 +366,12 @@ struct ConvertImpl return DateTimeTransformImpl, false>::execute( arguments, result_type, input_rows_count); } + else if constexpr (std::is_same_v && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count, TransformDateTime64(assert_cast(*named_from.type).getScale())); + } /** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience, * Float32, Float64) to Date. If the * number is less than 65536, then it is treated as DayNum, and if it's greater or equals to 65536, @@ -682,11 +688,6 @@ struct ConvertImpl }; -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - - template struct ToDateTimeTransform64 { From a5383a4619a18f0979594e25701f8f7893d62d56 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Mar 2024 05:27:05 +0100 Subject: [PATCH 124/283] Continue --- src/Functions/FunctionsConversion.cpp | 410 +++++++++++++------------- 1 file changed, 205 insertions(+), 205 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 33ef12b9d23..ffcb0086e27 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -303,6 +303,183 @@ struct ToDate32Transform8Or16Signed } }; +template +struct ToDateTimeTransform64 +{ + static constexpr auto name = "toDateTime"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from > MAX_DATETIME_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); + } + return static_cast(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); + } +}; + +template +struct ToDateTimeTransformSigned +{ + static constexpr auto name = "toDateTime"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + if (from < 0) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); + else + return 0; + } + return from; + } +}; + +template +struct ToDateTimeTransform64Signed +{ + static constexpr auto name = "toDateTime"; + + static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < 0 || from > MAX_DATETIME_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); + } + + if (from < 0) + return 0; + return static_cast(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); + } +}; + +/** Conversion of numeric to DateTime64 + */ + +template +struct ToDateTime64TransformUnsigned +{ + static constexpr auto name = "toDateTime64"; + + const DateTime64::NativeType scale_multiplier = 1; + + ToDateTime64TransformUnsigned(UInt32 scale = 0) /// NOLINT + : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) + {} + + NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); + else + return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); + } + else + return DecimalUtils::decimalFromComponentsWithMultiplier(std::min(from, MAX_DATETIME64_TIMESTAMP), 0, scale_multiplier); + } +}; +template +struct ToDateTime64TransformSigned +{ + static constexpr auto name = "toDateTime64"; + + const DateTime64::NativeType scale_multiplier = 1; + + ToDateTime64TransformSigned(UInt32 scale = 0) /// NOLINT + : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) + {} + + NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < MIN_DATETIME64_TIMESTAMP || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); + } + from = static_cast(std::max(from, MIN_DATETIME64_TIMESTAMP)); + from = static_cast(std::min(from, MAX_DATETIME64_TIMESTAMP)); + + return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); + } +}; +template +struct ToDateTime64TransformFloat +{ + static constexpr auto name = "toDateTime64"; + + const UInt32 scale = 1; + + ToDateTime64TransformFloat(UInt32 scale_ = 0) /// NOLINT + : scale(scale_) + {} + + NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const + { + if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) + { + if (from < MIN_DATETIME64_TIMESTAMP || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); + } + + from = std::max(from, static_cast(MIN_DATETIME64_TIMESTAMP)); + from = std::min(from, static_cast(MAX_DATETIME64_TIMESTAMP)); + return convertToDecimal(from, scale); + } +}; + +/** Conversion of DateTime64 to Date or DateTime: discards fractional part. + */ +template +struct FromDateTime64Transform +{ + static constexpr auto name = Transform::name; + + const DateTime64::NativeType scale_multiplier = 1; + + FromDateTime64Transform(UInt32 scale) /// NOLINT + : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) + {} + + auto execute(DateTime64::NativeType dt, const DateLUTImpl & time_zone) const + { + const auto c = DecimalUtils::splitWithScaleMultiplier(DateTime64(dt), scale_multiplier); + return Transform::execute(static_cast(c.whole), time_zone); + } +}; + +struct ToDateTime64Transform +{ + static constexpr auto name = "toDateTime64"; + + const DateTime64::NativeType scale_multiplier = 1; + + ToDateTime64Transform(UInt32 scale = 0) /// NOLINT + : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) + {} + + DateTime64::NativeType execute(UInt16 d, const DateLUTImpl & time_zone) const + { + const auto dt = ToDateTimeImpl<>::execute(d, time_zone); + return execute(dt, time_zone); + } + + DateTime64::NativeType execute(Int32 d, const DateLUTImpl & time_zone) const + { + Int64 dt = static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); + return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); + } + + DateTime64::NativeType execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) const + { + return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); + } +}; + /// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; }; @@ -439,6 +616,34 @@ struct ConvertImpl return DateTimeTransformImpl, false>::execute( arguments, result_type, input_rows_count); } + /// Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to DateTime. + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } else { using ColVecFrom = typename FromDataType::ColumnType; @@ -688,164 +893,6 @@ struct ConvertImpl }; -template -struct ToDateTimeTransform64 -{ - static constexpr auto name = "toDateTime"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); - } - return static_cast(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); - } -}; - -template -struct ToDateTimeTransformSigned -{ - static constexpr auto name = "toDateTime"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if (from < 0) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); - else - return 0; - } - return from; - } -}; - -template -struct ToDateTimeTransform64Signed -{ - static constexpr auto name = "toDateTime"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < 0 || from > MAX_DATETIME_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); - } - - if (from < 0) - return 0; - return static_cast(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); - } -}; - -/// Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to DateTime. -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -/** Conversion of numeric to DateTime64 - */ - -template -struct ToDateTime64TransformUnsigned -{ - static constexpr auto name = "toDateTime64"; - - const DateTime64::NativeType scale_multiplier = 1; - - ToDateTime64TransformUnsigned(UInt32 scale = 0) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); - else - return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); - } - else - return DecimalUtils::decimalFromComponentsWithMultiplier(std::min(from, MAX_DATETIME64_TIMESTAMP), 0, scale_multiplier); - } -}; -template -struct ToDateTime64TransformSigned -{ - static constexpr auto name = "toDateTime64"; - - const DateTime64::NativeType scale_multiplier = 1; - - ToDateTime64TransformSigned(UInt32 scale = 0) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < MIN_DATETIME64_TIMESTAMP || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); - } - from = static_cast(std::max(from, MIN_DATETIME64_TIMESTAMP)); - from = static_cast(std::min(from, MAX_DATETIME64_TIMESTAMP)); - - return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); - } -}; -template -struct ToDateTime64TransformFloat -{ - static constexpr auto name = "toDateTime64"; - - const UInt32 scale = 1; - - ToDateTime64TransformFloat(UInt32 scale_ = 0) /// NOLINT - : scale(scale_) - {} - - NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < MIN_DATETIME64_TIMESTAMP || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); - } - - from = std::max(from, static_cast(MIN_DATETIME64_TIMESTAMP)); - from = std::min(from, static_cast(MAX_DATETIME64_TIMESTAMP)); - return convertToDecimal(from, scale); - } -}; - template struct ConvertImpl : DateTimeTransformImpl, false> {}; @@ -875,26 +922,6 @@ struct ConvertImpl, false> {}; -/** Conversion of DateTime64 to Date or DateTime: discards fractional part. - */ -template -struct FromDateTime64Transform -{ - static constexpr auto name = Transform::name; - - const DateTime64::NativeType scale_multiplier = 1; - - FromDateTime64Transform(UInt32 scale) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - auto execute(DateTime64::NativeType dt, const DateLUTImpl & time_zone) const - { - const auto c = DecimalUtils::splitWithScaleMultiplier(DateTime64(dt), scale_multiplier); - return Transform::execute(static_cast(c.whole), time_zone); - } -}; - /** Conversion of DateTime64 to Date or DateTime: discards fractional part. */ template @@ -905,33 +932,6 @@ template : DateTimeTransformImpl>, false> {}; -struct ToDateTime64Transform -{ - static constexpr auto name = "toDateTime64"; - - const DateTime64::NativeType scale_multiplier = 1; - - ToDateTime64Transform(UInt32 scale = 0) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - DateTime64::NativeType execute(UInt16 d, const DateLUTImpl & time_zone) const - { - const auto dt = ToDateTimeImpl<>::execute(d, time_zone); - return execute(dt, time_zone); - } - - DateTime64::NativeType execute(Int32 d, const DateLUTImpl & time_zone) const - { - Int64 dt = static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); - return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); - } - - DateTime64::NativeType execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) const - { - return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); - } -}; /** Conversion of Date or DateTime to DateTime64: add zero sub-second part. */ From 5336a8911794662d6a207c5e4adda14c93871562 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Mar 2024 05:36:09 +0100 Subject: [PATCH 125/283] Continue --- src/Functions/FunctionsConversion.cpp | 56 +++++++++++++-------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index ffcb0086e27..af240f88560 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -644,6 +644,33 @@ struct ConvertImpl return DateTimeTransformImpl, false>::execute( arguments, result_type, input_rows_count); } + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } else { using ColVecFrom = typename FromDataType::ColumnType; @@ -893,35 +920,6 @@ struct ConvertImpl }; -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - - /** Conversion of DateTime64 to Date or DateTime: discards fractional part. */ template From b870d9d1cd767eddcbe98cb31abe0167bbdab488 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Mar 2024 05:45:33 +0100 Subject: [PATCH 126/283] Continue --- src/Functions/FunctionsConversion.cpp | 239 +++++++++++++------------- 1 file changed, 119 insertions(+), 120 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index af240f88560..574e99bd6a6 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -433,7 +433,7 @@ struct ToDateTime64TransformFloat }; /** Conversion of DateTime64 to Date or DateTime: discards fractional part. - */ + */ template struct FromDateTime64Transform { @@ -480,6 +480,98 @@ struct ToDateTime64Transform } }; +/** Transformation of numbers, dates, datetimes to strings: through formatting. + */ +template +struct FormatImpl +{ + template + static ReturnType execute(const typename DataType::FieldType x, WriteBuffer & wb, const DataType *, const DateLUTImpl *) + { + writeText(x, wb); + return ReturnType(true); + } +}; + +template <> +struct FormatImpl +{ + template + static ReturnType execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate *, const DateLUTImpl * time_zone) + { + writeDateText(DayNum(x), wb, *time_zone); + return ReturnType(true); + } +}; + +template <> +struct FormatImpl +{ + template + static ReturnType execute(const DataTypeDate32::FieldType x, WriteBuffer & wb, const DataTypeDate32 *, const DateLUTImpl * time_zone) + { + writeDateText(ExtendedDayNum(x), wb, *time_zone); + return ReturnType(true); + } +}; + +template <> +struct FormatImpl +{ + template + static ReturnType execute(const DataTypeDateTime::FieldType x, WriteBuffer & wb, const DataTypeDateTime *, const DateLUTImpl * time_zone) + { + writeDateTimeText(x, wb, *time_zone); + return ReturnType(true); + } +}; + +template <> +struct FormatImpl +{ + template + static ReturnType execute(const DataTypeDateTime64::FieldType x, WriteBuffer & wb, const DataTypeDateTime64 * type, const DateLUTImpl * time_zone) + { + writeDateTimeText(DateTime64(x), type->getScale(), wb, *time_zone); + return ReturnType(true); + } +}; + + +template +struct FormatImpl> +{ + template + static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeEnum * type, const DateLUTImpl *) + { + static constexpr bool throw_exception = std::is_same_v; + + if constexpr (throw_exception) + { + writeString(type->getNameForValue(x), wb); + } + else + { + StringRef res; + bool is_ok = type->getNameForValue(x, res); + if (is_ok) + writeString(res, wb); + return ReturnType(is_ok); + } + } +}; + +template +struct FormatImpl> +{ + template + static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeDecimal * type, const DateLUTImpl *) + { + writeText(x, type->getScale(), wb, false); + return ReturnType(true); + } +}; + /// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; }; @@ -671,6 +763,32 @@ struct ConvertImpl return DateTimeTransformImpl, false>::execute( arguments, result_type, input_rows_count); } + /// Conversion of DateTime64 to Date or DateTime: discards fractional part. + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl>, false>::execute( + arguments, result_type, input_rows_count, TransformDateTime64>(assert_cast(*named_from.type).getScale())); + } + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl>, false>::execute( + arguments, result_type, input_rows_count, TransformDateTime64>(assert_cast(*named_from.type).getScale())); + } + /// Conversion of Date or DateTime to DateTime64: add zero sub-second part. + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl::execute( + arguments, result_type, input_rows_count); + } else { using ColVecFrom = typename FromDataType::ColumnType; @@ -920,125 +1038,6 @@ struct ConvertImpl }; -/** Conversion of DateTime64 to Date or DateTime: discards fractional part. - */ -template -struct ConvertImpl - : DateTimeTransformImpl>, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl>, false> {}; - - -/** Conversion of Date or DateTime to DateTime64: add zero sub-second part. - */ -template -struct ConvertImpl - : DateTimeTransformImpl {}; - -template -struct ConvertImpl - : DateTimeTransformImpl {}; - -template -struct ConvertImpl - : DateTimeTransformImpl {}; - - -/** Transformation of numbers, dates, datetimes to strings: through formatting. - */ -template -struct FormatImpl -{ - template - static ReturnType execute(const typename DataType::FieldType x, WriteBuffer & wb, const DataType *, const DateLUTImpl *) - { - writeText(x, wb); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate *, const DateLUTImpl * time_zone) - { - writeDateText(DayNum(x), wb, *time_zone); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDate32::FieldType x, WriteBuffer & wb, const DataTypeDate32 *, const DateLUTImpl * time_zone) - { - writeDateText(ExtendedDayNum(x), wb, *time_zone); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDateTime::FieldType x, WriteBuffer & wb, const DataTypeDateTime *, const DateLUTImpl * time_zone) - { - writeDateTimeText(x, wb, *time_zone); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDateTime64::FieldType x, WriteBuffer & wb, const DataTypeDateTime64 * type, const DateLUTImpl * time_zone) - { - writeDateTimeText(DateTime64(x), type->getScale(), wb, *time_zone); - return ReturnType(true); - } -}; - - -template -struct FormatImpl> -{ - template - static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeEnum * type, const DateLUTImpl *) - { - static constexpr bool throw_exception = std::is_same_v; - - if constexpr (throw_exception) - { - writeString(type->getNameForValue(x), wb); - } - else - { - StringRef res; - bool is_ok = type->getNameForValue(x, res); - if (is_ok) - writeString(res, wb); - return ReturnType(is_ok); - } - } -}; - -template -struct FormatImpl> -{ - template - static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeDecimal * type, const DateLUTImpl *) - { - writeText(x, type->getScale(), wb, false); - return ReturnType(true); - } -}; - - /// DataTypeEnum to DataType free conversion template struct ConvertImpl, DataTypeNumber, Name, ConvertDefaultBehaviorTag> From 8d9a8fdfc1d91723513bd66184662a810e01b2fe Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Mar 2024 05:52:01 +0100 Subject: [PATCH 127/283] Continue --- src/Functions/FunctionsConversion.cpp | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 574e99bd6a6..2ce508c4924 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -609,9 +609,12 @@ struct ConvertImpl { const ColumnWithTypeAndName & named_from = arguments[0]; - if constexpr (std::is_same_v && !FromDataType::is_parametric) + if constexpr ((std::is_same_v && !FromDataType::is_parametric) + || (std::is_same_v && std::is_same_v) + || (std::is_same_v && std::is_same_v)) { /// If types are the same, reuse the columns. + /// Conversions between Enum and the underlying type are also free. return named_from.column; } else if constexpr ((std::is_same_v || std::is_same_v) @@ -1038,25 +1041,11 @@ struct ConvertImpl }; -/// DataTypeEnum to DataType free conversion -template -struct ConvertImpl, DataTypeNumber, Name, ConvertDefaultBehaviorTag> -{ - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) - { - return arguments[0].column; - } -}; - inline ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) { - ColumnUInt8::MutablePtr null_map = nullptr; - if (const auto * col_null = checkAndGetColumn(col.get())) - { - null_map = ColumnUInt8::create(); - null_map->insertRangeFrom(col_null->getNullMapColumn(), 0, col_null->size()); - } - return null_map; + if (const auto * col_nullable = checkAndGetColumn(col.get())) + return col_nullable->getNullMapColumn().mutate(); + return nullptr; } template From 17a7696a378c5cfaad18214284e340f1d2306170 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Mar 2024 06:06:07 +0100 Subject: [PATCH 128/283] Continue --- src/Functions/FunctionsConversion.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 2ce508c4924..56a0d74c60c 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1041,11 +1041,15 @@ struct ConvertImpl }; -inline ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) +ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) { + ColumnUInt8::MutablePtr null_map = nullptr; if (const auto * col_nullable = checkAndGetColumn(col.get())) - return col_nullable->getNullMapColumn().mutate(); - return nullptr; + { + null_map = ColumnUInt8::create(); + null_map->insertRangeFrom(col_nullable->getNullMapColumn(), 0, col_nullable->size()); + } + return null_map; } template From ecd6b88831a4251acf3f62d52a43302fa1ef534c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Mar 2024 06:15:17 +0100 Subject: [PATCH 129/283] Continue --- src/Functions/FunctionsConversion.cpp | 1272 ++++++++++++------------- 1 file changed, 635 insertions(+), 637 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 56a0d74c60c..16f547939fa 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -572,475 +572,6 @@ struct FormatImpl> } }; - -/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. -struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; }; - -struct AccurateConvertStrategyAdditions -{ - UInt32 scale { 0 }; -}; - -struct AccurateOrNullConvertStrategyAdditions -{ - UInt32 scale { 0 }; -}; - - -struct ConvertDefaultBehaviorTag {}; -struct ConvertReturnNullOnErrorTag {}; -struct ConvertReturnZeroOnErrorTag {}; - -/** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment. - * (Date is represented internally as number of days from some day; DateTime - as unix timestamp) - */ -template -struct ConvertImpl -{ - using FromFieldType = typename FromDataType::FieldType; - using ToFieldType = typename ToDataType::FieldType; - - template - static ColumnPtr NO_SANITIZE_UNDEFINED execute( - const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type [[maybe_unused]], size_t input_rows_count, - Additions additions [[maybe_unused]] = Additions()) - { - const ColumnWithTypeAndName & named_from = arguments[0]; - - if constexpr ((std::is_same_v && !FromDataType::is_parametric) - || (std::is_same_v && std::is_same_v) - || (std::is_same_v && std::is_same_v)) - { - /// If types are the same, reuse the columns. - /// Conversions between Enum and the underlying type are also free. - return named_from.column; - } - else if constexpr ((std::is_same_v || std::is_same_v) - && std::is_same_v) - { - /// Conversion of DateTime to Date: throw off time component. - /// Conversion of Date32 to Date. - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); - } - else if constexpr (std::is_same_v && std::is_same_v) - { - /// Conversion of DateTime to Date: throw off time component. - return DateTimeTransformImpl::execute( - arguments, result_type, input_rows_count); - } - else if constexpr ((std::is_same_v || std::is_same_v) - && std::is_same_v) - { - /// Conversion from Date/Date32 to DateTime. - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); - } - else if constexpr (std::is_same_v && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count, TransformDateTime64(assert_cast(*named_from.type).getScale())); - } - /** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience, - * Float32, Float64) to Date. If the - * number is less than 65536, then it is treated as DayNum, and if it's greater or equals to 65536, - * then treated as unix timestamp. If the number exceeds UInt32, saturate to MAX_UINT32 then as DayNum. - * It's a bit illogical, as we actually have two functions in one. - * But allows to support frequent case, - * when user write toDate(UInt32), expecting conversion of unix timestamp to Date. - * (otherwise such usage would be frequent mistake). - */ - else if constexpr (( - std::is_same_v - || std::is_same_v) - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); - } - else if constexpr (( - std::is_same_v - || std::is_same_v) - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); - } - else if constexpr (( - std::is_same_v - || std::is_same_v - || std::is_same_v - || std::is_same_v) - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); - } - else if constexpr (( - std::is_same_v - || std::is_same_v) - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); - } - else if constexpr (( - std::is_same_v - || std::is_same_v) - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); - } - else if constexpr (( - std::is_same_v - || std::is_same_v - || std::is_same_v - || std::is_same_v) - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); - } - /// Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to DateTime. - else if constexpr (( - std::is_same_v - || std::is_same_v - || std::is_same_v) - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); - } - else if constexpr (std::is_same_v - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); - } - else if constexpr (( - std::is_same_v - || std::is_same_v - || std::is_same_v) - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); - } - else if constexpr (( - std::is_same_v - || std::is_same_v - || std::is_same_v - || std::is_same_v) - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); - } - else if constexpr (std::is_same_v - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); - } - else if constexpr (( - std::is_same_v - || std::is_same_v) - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); - } - /// Conversion of DateTime64 to Date or DateTime: discards fractional part. - else if constexpr (std::is_same_v - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl>, false>::execute( - arguments, result_type, input_rows_count, TransformDateTime64>(assert_cast(*named_from.type).getScale())); - } - else if constexpr (std::is_same_v - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl>, false>::execute( - arguments, result_type, input_rows_count, TransformDateTime64>(assert_cast(*named_from.type).getScale())); - } - /// Conversion of Date or DateTime to DateTime64: add zero sub-second part. - else if constexpr (( - std::is_same_v - || std::is_same_v - || std::is_same_v) - && std::is_same_v - && std::is_same_v) - { - return DateTimeTransformImpl::execute( - arguments, result_type, input_rows_count); - } - else - { - using ColVecFrom = typename FromDataType::ColumnType; - using ColVecTo = typename ToDataType::ColumnType; - - if constexpr ((IsDataTypeDecimal || IsDataTypeDecimal) - && !(std::is_same_v || std::is_same_v) - && (!IsDataTypeDecimalOrNumber || !IsDataTypeDecimalOrNumber)) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - named_from.column->getName(), Name::name); - } - - const ColVecFrom * col_from = checkAndGetColumn(named_from.column.get()); - if (!col_from) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - named_from.column->getName(), Name::name); - - typename ColVecTo::MutablePtr col_to = nullptr; - - if constexpr (IsDataTypeDecimal) - { - UInt32 scale; - - if constexpr (std::is_same_v - || std::is_same_v) - { - scale = additions.scale; - } - else - { - scale = additions; - } - - col_to = ColVecTo::create(0, scale); - } - else - col_to = ColVecTo::create(); - - const auto & vec_from = col_from->getData(); - auto & vec_to = col_to->getData(); - vec_to.resize(input_rows_count); - - ColumnUInt8::MutablePtr col_null_map_to; - ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; - if constexpr (std::is_same_v) - { - col_null_map_to = ColumnUInt8::create(input_rows_count, false); - vec_null_map_to = &col_null_map_to->getData(); - } - - bool result_is_bool = isBool(result_type); - for (size_t i = 0; i < input_rows_count; ++i) - { - if constexpr (std::is_same_v) - { - if (result_is_bool) - { - vec_to[i] = vec_from[i] != FromFieldType(0); - continue; - } - } - - if constexpr (std::is_same_v && std::is_same_v) - { - static_assert( - std::is_same_v, - "UInt128 and UUID types must be same"); - - vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; - vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; - } - else if constexpr (std::is_same_v && std::is_same_v) - { - static_assert( - std::is_same_v, - "UInt128 and IPv6 types must be same"); - - vec_to[i].items[1] = std::byteswap(vec_from[i].toUnderType().items[0]); - vec_to[i].items[0] = std::byteswap(vec_from[i].toUnderType().items[1]); - } - else if constexpr (std::is_same_v != std::is_same_v) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Conversion between numeric types and UUID is not supported. " - "Probably the passed UUID is unquoted"); - } - else if constexpr ( - (std::is_same_v != std::is_same_v) - && !(is_any_of - || is_any_of)) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion from {} to {} is not supported", - TypeName, TypeName); - } - else if constexpr (std::is_same_v != std::is_same_v - && !(std::is_same_v || std::is_same_v)) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Conversion between numeric types and IPv6 is not supported. " - "Probably the passed IPv6 is unquoted"); - } - else if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) - { - if constexpr (std::is_same_v) - { - ToFieldType result; - bool convert_result = false; - - if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) - convert_result = tryConvertDecimals(vec_from[i], col_from->getScale(), col_to->getScale(), result); - else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) - convert_result = tryConvertFromDecimal(vec_from[i], col_from->getScale(), result); - else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) - convert_result = tryConvertToDecimal(vec_from[i], col_to->getScale(), result); - - if (convert_result) - vec_to[i] = result; - else - { - vec_to[i] = static_cast(0); - (*vec_null_map_to)[i] = true; - } - } - else - { - if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) - vec_to[i] = convertDecimals(vec_from[i], col_from->getScale(), col_to->getScale()); - else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) - vec_to[i] = convertFromDecimal(vec_from[i], col_from->getScale()); - else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) - vec_to[i] = convertToDecimal(vec_from[i], col_to->getScale()); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unsupported data type in conversion function"); - } - } - else if constexpr (std::is_same_v && std::is_same_v) - { - const uint8_t ip4_cidr[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}; - const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); - if (!matchIPv6Subnet(src, ip4_cidr, 96)) - { - char addr[IPV6_MAX_TEXT_LENGTH + 1] {}; - char * paddr = addr; - formatIPv6(src, paddr); - - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 {} in column {} is not in IPv4 mapping block", addr, named_from.column->getName()); - } - - uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); - if constexpr (std::endian::native == std::endian::little) - { - dst[0] = src[15]; - dst[1] = src[14]; - dst[2] = src[13]; - dst[3] = src[12]; - } - else - { - dst[0] = src[12]; - dst[1] = src[13]; - dst[2] = src[14]; - dst[3] = src[15]; - } - } - else if constexpr (std::is_same_v && std::is_same_v) - { - const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); - uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); - std::memset(dst, '\0', IPV6_BINARY_LENGTH); - dst[10] = dst[11] = 0xff; - - if constexpr (std::endian::native == std::endian::little) - { - dst[12] = src[3]; - dst[13] = src[2]; - dst[14] = src[1]; - dst[15] = src[0]; - } - else - { - dst[12] = src[0]; - dst[13] = src[1]; - dst[14] = src[2]; - dst[15] = src[3]; - } - } - else if constexpr (std::is_same_v && std::is_same_v) - { - vec_to[i] = static_cast(static_cast(vec_from[i])); - } - else if constexpr (std::is_same_v - && (std::is_same_v || std::is_same_v)) - { - vec_to[i] = static_cast(vec_from[i] * DATE_SECONDS_PER_DAY); - } - else - { - /// If From Data is Nan or Inf and we convert to integer type, throw exception - if constexpr (std::is_floating_point_v && !std::is_floating_point_v) - { - if (!isFinite(vec_from[i])) - { - if constexpr (std::is_same_v) - { - vec_to[i] = 0; - (*vec_null_map_to)[i] = true; - continue; - } - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unexpected inf or nan to integer conversion"); - } - } - - if constexpr (std::is_same_v - || std::is_same_v) - { - bool convert_result = accurate::convertNumeric(vec_from[i], vec_to[i]); - - if (!convert_result) - { - if (std::is_same_v) - { - vec_to[i] = 0; - (*vec_null_map_to)[i] = true; - } - else - { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Value in column {} cannot be safely converted into type {}", - named_from.column->getName(), result_type->getName()); - } - } - } - else - { - vec_to[i] = static_cast(vec_from[i]); - } - } - } - - if constexpr (std::is_same_v) - return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); - else - return col_to; - } - } -}; - - ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) { ColumnUInt8::MutablePtr null_map = nullptr; @@ -1052,174 +583,6 @@ ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) return null_map; } -template -requires (!std::is_same_v) -struct ConvertImpl -{ - using FromFieldType = typename FromDataType::FieldType; - using ColVecType = ColumnVectorOrDecimal; - - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) - { - if constexpr (IsDataTypeDateOrDateTime) - { - auto datetime_arg = arguments[0]; - - const DateLUTImpl * time_zone = nullptr; - const ColumnConst * time_zone_column = nullptr; - - if (arguments.size() == 1) - { - auto non_null_args = createBlockWithNestedColumns(arguments); - time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); - } - else /// When we have a column for timezone - { - datetime_arg.column = datetime_arg.column->convertToFullColumnIfConst(); - - if constexpr (std::is_same_v || std::is_same_v) - time_zone = &DateLUT::instance(); - /// For argument of Date or DateTime type, second argument with time zone could be specified. - if constexpr (std::is_same_v || std::is_same_v) - { - if ((time_zone_column = checkAndGetColumnConst(arguments[1].column.get()))) - { - auto non_null_args = createBlockWithNestedColumns(arguments); - time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); - } - } - } - const auto & col_with_type_and_name = columnGetNested(datetime_arg); - - if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) - { - auto col_to = ColumnString::create(); - - const typename ColVecType::Container & vec_from = col_from->getData(); - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - - if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss.") + col_from->getScale() + 1)); - else - data_to.resize(size * 3); /// Arbitrary - - offsets_to.resize(size); - - WriteBufferFromVector write_buffer(data_to); - const auto & type = static_cast(*col_with_type_and_name.type); - - ColumnUInt8::MutablePtr null_map = copyNullMap(datetime_arg.column); - - if (!null_map && arguments.size() > 1) - null_map = copyNullMap(arguments[1].column->convertToFullColumnIfConst()); - - if (null_map) - { - for (size_t i = 0; i < size; ++i) - { - if (!time_zone_column && arguments.size() > 1) - { - if (!arguments[1].column.get()->getDataAt(i).toString().empty()) - time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); - } - bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); - null_map->getData()[i] |= !is_ok; - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - else - { - for (size_t i = 0; i < size; ++i) - { - if (!time_zone_column && arguments.size() > 1) - { - if (!arguments[1].column.get()->getDataAt(i).toString().empty()) - time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); - } - FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - - write_buffer.finalize(); - - if (null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), Name::name); - } - else - { - ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); - - const auto & col_with_type_and_name = columnGetNested(arguments[0]); - const auto & type = static_cast(*col_with_type_and_name.type); - - if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) - { - auto col_to = ColumnString::create(); - - const typename ColVecType::Container & vec_from = col_from->getData(); - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - - data_to.resize(size * 3); - offsets_to.resize(size); - - WriteBufferFromVector write_buffer(data_to); - - if (null_map) - { - for (size_t i = 0; i < size; ++i) - { - bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); - /// We don't use timezones in this branch - null_map->getData()[i] |= !is_ok; - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - else - { - for (size_t i = 0; i < size; ++i) - { - FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - - write_buffer.finalize(); - - if (null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), Name::name); - } - } -}; - /// Generic conversion of any type to String or FixedString via serialization to text. template @@ -1774,6 +1137,641 @@ struct ConvertThroughParsing }; +/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. +struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; }; + +struct AccurateConvertStrategyAdditions +{ + UInt32 scale { 0 }; +}; + +struct AccurateOrNullConvertStrategyAdditions +{ + UInt32 scale { 0 }; +}; + + +struct ConvertDefaultBehaviorTag {}; +struct ConvertReturnNullOnErrorTag {}; +struct ConvertReturnZeroOnErrorTag {}; + +/** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment. + * (Date is represented internally as number of days from some day; DateTime - as unix timestamp) + */ +template +struct ConvertImpl +{ + using FromFieldType = typename FromDataType::FieldType; + using ToFieldType = typename ToDataType::FieldType; + + template + static ColumnPtr NO_SANITIZE_UNDEFINED execute( + const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type [[maybe_unused]], size_t input_rows_count, + Additions additions [[maybe_unused]] = Additions()) + { + const ColumnWithTypeAndName & named_from = arguments[0]; + + if constexpr ((std::is_same_v && !FromDataType::is_parametric) + || (std::is_same_v && std::is_same_v) + || (std::is_same_v && std::is_same_v)) + { + /// If types are the same, reuse the columns. + /// Conversions between Enum and the underlying type are also free. + return named_from.column; + } + else if constexpr ((std::is_same_v || std::is_same_v) + && std::is_same_v) + { + /// Conversion of DateTime to Date: throw off time component. + /// Conversion of Date32 to Date. + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (std::is_same_v && std::is_same_v) + { + /// Conversion of DateTime to Date: throw off time component. + return DateTimeTransformImpl::execute( + arguments, result_type, input_rows_count); + } + else if constexpr ((std::is_same_v || std::is_same_v) + && std::is_same_v) + { + /// Conversion from Date/Date32 to DateTime. + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (std::is_same_v && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count, TransformDateTime64(assert_cast(*named_from.type).getScale())); + } + /** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience, + * Float32, Float64) to Date. If the + * number is less than 65536, then it is treated as DayNum, and if it's greater or equals to 65536, + * then treated as unix timestamp. If the number exceeds UInt32, saturate to MAX_UINT32 then as DayNum. + * It's a bit illogical, as we actually have two functions in one. + * But allows to support frequent case, + * when user write toDate(UInt32), expecting conversion of unix timestamp to Date. + * (otherwise such usage would be frequent mistake). + */ + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + /// Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to DateTime. + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (( + std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl, false>::execute( + arguments, result_type, input_rows_count); + } + /// Conversion of DateTime64 to Date or DateTime: discards fractional part. + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl>, false>::execute( + arguments, result_type, input_rows_count, TransformDateTime64>(assert_cast(*named_from.type).getScale())); + } + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl>, false>::execute( + arguments, result_type, input_rows_count, TransformDateTime64>(assert_cast(*named_from.type).getScale())); + } + /// Conversion of Date or DateTime to DateTime64: add zero sub-second part. + else if constexpr (( + std::is_same_v + || std::is_same_v + || std::is_same_v) + && std::is_same_v + && std::is_same_v) + { + return DateTimeTransformImpl::execute( + arguments, result_type, input_rows_count); + } + else if constexpr (IsDataTypeDateOrDateTime + && std::is_same_v + && std::is_same_v) + { + /// Date or DateTime to String + + using ColVecType = ColumnVectorOrDecimal; + + auto datetime_arg = arguments[0]; + + const DateLUTImpl * time_zone = nullptr; + const ColumnConst * time_zone_column = nullptr; + + if (arguments.size() == 1) + { + auto non_null_args = createBlockWithNestedColumns(arguments); + time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); + } + else /// When we have a column for timezone + { + datetime_arg.column = datetime_arg.column->convertToFullColumnIfConst(); + + if constexpr (std::is_same_v || std::is_same_v) + time_zone = &DateLUT::instance(); + /// For argument of Date or DateTime type, second argument with time zone could be specified. + if constexpr (std::is_same_v || std::is_same_v) + { + if ((time_zone_column = checkAndGetColumnConst(arguments[1].column.get()))) + { + auto non_null_args = createBlockWithNestedColumns(arguments); + time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); + } + } + } + const auto & col_with_type_and_name = columnGetNested(datetime_arg); + + if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) + { + auto col_to = ColumnString::create(); + + const typename ColVecType::Container & vec_from = col_from->getData(); + ColumnString::Chars & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + size_t size = vec_from.size(); + + if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); + else if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); + else if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1)); + else if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss.") + col_from->getScale() + 1)); + else + data_to.resize(size * 3); /// Arbitrary + + offsets_to.resize(size); + + WriteBufferFromVector write_buffer(data_to); + const auto & type = static_cast(*col_with_type_and_name.type); + + ColumnUInt8::MutablePtr null_map = copyNullMap(datetime_arg.column); + + if (!null_map && arguments.size() > 1) + null_map = copyNullMap(arguments[1].column->convertToFullColumnIfConst()); + + if (null_map) + { + for (size_t i = 0; i < size; ++i) + { + if (!time_zone_column && arguments.size() > 1) + { + if (!arguments[1].column.get()->getDataAt(i).toString().empty()) + time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); + } + bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); + null_map->getData()[i] |= !is_ok; + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + else + { + for (size_t i = 0; i < size; ++i) + { + if (!time_zone_column && arguments.size() > 1) + { + if (!arguments[1].column.get()->getDataAt(i).toString().empty()) + time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); + } + FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + + write_buffer.finalize(); + + if (null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); + return col_to; + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), Name::name); + } + else if constexpr (std::is_same_v + && std::is_same_v) + { + /// Anything else to String. + + using ColVecType = ColumnVectorOrDecimal; + + ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); + + const auto & col_with_type_and_name = columnGetNested(arguments[0]); + const auto & type = static_cast(*col_with_type_and_name.type); + + if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) + { + auto col_to = ColumnString::create(); + + const typename ColVecType::Container & vec_from = col_from->getData(); + ColumnString::Chars & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + size_t size = vec_from.size(); + + data_to.resize(size * 3); + offsets_to.resize(size); + + WriteBufferFromVector write_buffer(data_to); + + if (null_map) + { + for (size_t i = 0; i < size; ++i) + { + bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); + /// We don't use timezones in this branch + null_map->getData()[i] |= !is_ok; + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + else + { + for (size_t i = 0; i < size; ++i) + { + FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); + writeChar(0, write_buffer); + offsets_to[i] = write_buffer.count(); + } + } + + write_buffer.finalize(); + + if (null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); + return col_to; + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), Name::name); + } + else + { + using ColVecFrom = typename FromDataType::ColumnType; + using ColVecTo = typename ToDataType::ColumnType; + + if constexpr ((IsDataTypeDecimal || IsDataTypeDecimal) + && !(std::is_same_v || std::is_same_v) + && (!IsDataTypeDecimalOrNumber || !IsDataTypeDecimalOrNumber)) + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + named_from.column->getName(), Name::name); + } + + const ColVecFrom * col_from = checkAndGetColumn(named_from.column.get()); + if (!col_from) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + named_from.column->getName(), Name::name); + + typename ColVecTo::MutablePtr col_to = nullptr; + + if constexpr (IsDataTypeDecimal) + { + UInt32 scale; + + if constexpr (std::is_same_v + || std::is_same_v) + { + scale = additions.scale; + } + else + { + scale = additions; + } + + col_to = ColVecTo::create(0, scale); + } + else + col_to = ColVecTo::create(); + + const auto & vec_from = col_from->getData(); + auto & vec_to = col_to->getData(); + vec_to.resize(input_rows_count); + + ColumnUInt8::MutablePtr col_null_map_to; + ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; + if constexpr (std::is_same_v) + { + col_null_map_to = ColumnUInt8::create(input_rows_count, false); + vec_null_map_to = &col_null_map_to->getData(); + } + + bool result_is_bool = isBool(result_type); + for (size_t i = 0; i < input_rows_count; ++i) + { + if constexpr (std::is_same_v) + { + if (result_is_bool) + { + vec_to[i] = vec_from[i] != FromFieldType(0); + continue; + } + } + + if constexpr (std::is_same_v && std::is_same_v) + { + static_assert( + std::is_same_v, + "UInt128 and UUID types must be same"); + + vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; + vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; + } + else if constexpr (std::is_same_v && std::is_same_v) + { + static_assert( + std::is_same_v, + "UInt128 and IPv6 types must be same"); + + vec_to[i].items[1] = std::byteswap(vec_from[i].toUnderType().items[0]); + vec_to[i].items[0] = std::byteswap(vec_from[i].toUnderType().items[1]); + } + else if constexpr (std::is_same_v != std::is_same_v) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Conversion between numeric types and UUID is not supported. " + "Probably the passed UUID is unquoted"); + } + else if constexpr ( + (std::is_same_v != std::is_same_v) + && !(is_any_of + || is_any_of)) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion from {} to {} is not supported", + TypeName, TypeName); + } + else if constexpr (std::is_same_v != std::is_same_v + && !(std::is_same_v || std::is_same_v)) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Conversion between numeric types and IPv6 is not supported. " + "Probably the passed IPv6 is unquoted"); + } + else if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) + { + if constexpr (std::is_same_v) + { + ToFieldType result; + bool convert_result = false; + + if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) + convert_result = tryConvertDecimals(vec_from[i], col_from->getScale(), col_to->getScale(), result); + else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) + convert_result = tryConvertFromDecimal(vec_from[i], col_from->getScale(), result); + else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) + convert_result = tryConvertToDecimal(vec_from[i], col_to->getScale(), result); + + if (convert_result) + vec_to[i] = result; + else + { + vec_to[i] = static_cast(0); + (*vec_null_map_to)[i] = true; + } + } + else + { + if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) + vec_to[i] = convertDecimals(vec_from[i], col_from->getScale(), col_to->getScale()); + else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) + vec_to[i] = convertFromDecimal(vec_from[i], col_from->getScale()); + else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) + vec_to[i] = convertToDecimal(vec_from[i], col_to->getScale()); + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unsupported data type in conversion function"); + } + } + else if constexpr (std::is_same_v && std::is_same_v) + { + const uint8_t ip4_cidr[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}; + const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); + if (!matchIPv6Subnet(src, ip4_cidr, 96)) + { + char addr[IPV6_MAX_TEXT_LENGTH + 1] {}; + char * paddr = addr; + formatIPv6(src, paddr); + + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 {} in column {} is not in IPv4 mapping block", addr, named_from.column->getName()); + } + + uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); + if constexpr (std::endian::native == std::endian::little) + { + dst[0] = src[15]; + dst[1] = src[14]; + dst[2] = src[13]; + dst[3] = src[12]; + } + else + { + dst[0] = src[12]; + dst[1] = src[13]; + dst[2] = src[14]; + dst[3] = src[15]; + } + } + else if constexpr (std::is_same_v && std::is_same_v) + { + const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); + uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); + std::memset(dst, '\0', IPV6_BINARY_LENGTH); + dst[10] = dst[11] = 0xff; + + if constexpr (std::endian::native == std::endian::little) + { + dst[12] = src[3]; + dst[13] = src[2]; + dst[14] = src[1]; + dst[15] = src[0]; + } + else + { + dst[12] = src[0]; + dst[13] = src[1]; + dst[14] = src[2]; + dst[15] = src[3]; + } + } + else if constexpr (std::is_same_v && std::is_same_v) + { + vec_to[i] = static_cast(static_cast(vec_from[i])); + } + else if constexpr (std::is_same_v + && (std::is_same_v || std::is_same_v)) + { + vec_to[i] = static_cast(vec_from[i] * DATE_SECONDS_PER_DAY); + } + else + { + /// If From Data is Nan or Inf and we convert to integer type, throw exception + if constexpr (std::is_floating_point_v && !std::is_floating_point_v) + { + if (!isFinite(vec_from[i])) + { + if constexpr (std::is_same_v) + { + vec_to[i] = 0; + (*vec_null_map_to)[i] = true; + continue; + } + else + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unexpected inf or nan to integer conversion"); + } + } + + if constexpr (std::is_same_v + || std::is_same_v) + { + bool convert_result = accurate::convertNumeric(vec_from[i], vec_to[i]); + + if (!convert_result) + { + if (std::is_same_v) + { + vec_to[i] = 0; + (*vec_null_map_to)[i] = true; + } + else + { + throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Value in column {} cannot be safely converted into type {}", + named_from.column->getName(), result_type->getName()); + } + } + } + else + { + vec_to[i] = static_cast(vec_from[i]); + } + } + } + + if constexpr (std::is_same_v) + return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); + else + return col_to; + } + } +}; + + template requires (!std::is_same_v) struct ConvertImpl From f339c88079e2b0173b29e343872a576767b56e7b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Mar 2024 06:32:31 +0100 Subject: [PATCH 130/283] Continue --- src/Functions/FunctionsConversion.cpp | 61 +++++++++++++-------------- 1 file changed, 29 insertions(+), 32 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 16f547939fa..5001abcb5d2 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1163,13 +1163,10 @@ template struct ConvertImpl { - using FromFieldType = typename FromDataType::FieldType; - using ToFieldType = typename ToDataType::FieldType; - template static ColumnPtr NO_SANITIZE_UNDEFINED execute( const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type [[maybe_unused]], size_t input_rows_count, - Additions additions [[maybe_unused]] = Additions()) + Additions additions = Additions()) { const ColumnWithTypeAndName & named_from = arguments[0]; @@ -1206,7 +1203,7 @@ struct ConvertImpl && std::is_same_v) { return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count, TransformDateTime64(assert_cast(*named_from.type).getScale())); + arguments, result_type, input_rows_count, additions); } /** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience, * Float32, Float64) to Date. If the @@ -1336,14 +1333,14 @@ struct ConvertImpl && std::is_same_v) { return DateTimeTransformImpl>, false>::execute( - arguments, result_type, input_rows_count, TransformDateTime64>(assert_cast(*named_from.type).getScale())); + arguments, result_type, input_rows_count, additions); } else if constexpr (std::is_same_v && std::is_same_v && std::is_same_v) { return DateTimeTransformImpl>, false>::execute( - arguments, result_type, input_rows_count, TransformDateTime64>(assert_cast(*named_from.type).getScale())); + arguments, result_type, input_rows_count, additions); } /// Conversion of Date or DateTime to DateTime64: add zero sub-second part. else if constexpr (( @@ -1362,6 +1359,7 @@ struct ConvertImpl { /// Date or DateTime to String + using FromFieldType = typename FromDataType::FieldType; using ColVecType = ColumnVectorOrDecimal; auto datetime_arg = arguments[0]; @@ -1471,6 +1469,7 @@ struct ConvertImpl { /// Anything else to String. + using FromFieldType = typename FromDataType::FieldType; using ColVecType = ColumnVectorOrDecimal; ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); @@ -1523,8 +1522,31 @@ struct ConvertImpl throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), Name::name); } + else if constexpr ((std::is_same_v || std::is_same_v) + && !std::is_same_v + && std::is_same_v) + { + return ConvertThroughParsing::execute( + arguments, result_type, input_rows_count, additions); + } + else if constexpr ((std::is_same_v || std::is_same_v) + && !std::is_same_v + && std::is_same_v) + { + return ConvertThroughParsing::execute( + arguments, result_type, input_rows_count, additions); + } + else if constexpr ((std::is_same_v || std::is_same_v) + && is_any_of + && std::is_same_v) + { + return ConvertThroughParsing::execute( + arguments, result_type, input_rows_count, additions); + } else { + using FromFieldType = typename FromDataType::FieldType; + using ToFieldType = typename ToDataType::FieldType; using ColVecFrom = typename FromDataType::ColumnType; using ColVecTo = typename ToDataType::ColumnType; @@ -1772,31 +1794,6 @@ struct ConvertImpl }; -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (is_any_of && is_any_of) -struct ConvertImpl - : ConvertThroughParsing {}; - /// Generic conversion of any type from String. Used for complex types: Array and Tuple or types with custom serialization. struct ConvertImplGenericFromString { From 87db039d8952ca0e2ed32c7eda0f49600f9078c4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Mar 2024 07:06:06 +0100 Subject: [PATCH 131/283] Continue --- src/Functions/FunctionsConversion.cpp | 119 ++++++++++++-------------- 1 file changed, 55 insertions(+), 64 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 5001abcb5d2..fd328e8af42 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1354,8 +1354,7 @@ struct ConvertImpl arguments, result_type, input_rows_count); } else if constexpr (IsDataTypeDateOrDateTime - && std::is_same_v - && std::is_same_v) + && std::is_same_v) { /// Date or DateTime to String @@ -1464,6 +1463,54 @@ struct ConvertImpl throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), Name::name); } + /// Conversion from FixedString to String. + /// Cutting sequences of zero bytes from end of strings. + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); + const auto & nested = columnGetNested(arguments[0]); + if (const ColumnFixedString * col_from = checkAndGetColumn(nested.column.get())) + { + auto col_to = ColumnString::create(); + + const ColumnFixedString::Chars & data_from = col_from->getChars(); + ColumnString::Chars & data_to = col_to->getChars(); + ColumnString::Offsets & offsets_to = col_to->getOffsets(); + size_t size = col_from->size(); + size_t n = col_from->getN(); + data_to.resize(size * (n + 1)); /// + 1 - zero terminator + offsets_to.resize(size); + + size_t offset_from = 0; + size_t offset_to = 0; + for (size_t i = 0; i < size; ++i) + { + if (!null_map || !null_map->getData()[i]) + { + size_t bytes_to_copy = n; + while (bytes_to_copy > 0 && data_from[offset_from + bytes_to_copy - 1] == 0) + --bytes_to_copy; + + memcpy(&data_to[offset_to], &data_from[offset_from], bytes_to_copy); + offset_to += bytes_to_copy; + } + data_to[offset_to] = 0; + ++offset_to; + offsets_to[i] = offset_to; + offset_from += n; + } + + data_to.resize(offset_to); + if (result_type->isNullable() && null_map) + return ColumnNullable::create(std::move(col_to), std::move(null_map)); + return col_to; + } + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), Name::name); + } else if constexpr (std::is_same_v && std::is_same_v) { @@ -1543,6 +1590,12 @@ struct ConvertImpl return ConvertThroughParsing::execute( arguments, result_type, input_rows_count, additions); } + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v + && std::is_same_v) + { + } else { using FromFieldType = typename FromDataType::FieldType; @@ -1863,68 +1916,6 @@ struct ConvertImplGenericFromString }; -template <> -struct ConvertImpl - : ConvertImpl {}; - -template <> -struct ConvertImpl - : ConvertImpl {}; - - -/** Conversion from FixedString to String. - * Cutting sequences of zero bytes from end of strings. - */ -template -struct ConvertImpl -{ - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type, size_t /*input_rows_count*/) - { - ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); - const auto & nested = columnGetNested(arguments[0]); - if (const ColumnFixedString * col_from = checkAndGetColumn(nested.column.get())) - { - auto col_to = ColumnString::create(); - - const ColumnFixedString::Chars & data_from = col_from->getChars(); - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = col_from->size(); - size_t n = col_from->getN(); - data_to.resize(size * (n + 1)); /// + 1 - zero terminator - offsets_to.resize(size); - - size_t offset_from = 0; - size_t offset_to = 0; - for (size_t i = 0; i < size; ++i) - { - if (!null_map || !null_map->getData()[i]) - { - size_t bytes_to_copy = n; - while (bytes_to_copy > 0 && data_from[offset_from + bytes_to_copy - 1] == 0) - --bytes_to_copy; - - memcpy(&data_to[offset_to], &data_from[offset_from], bytes_to_copy); - offset_to += bytes_to_copy; - } - data_to[offset_to] = 0; - ++offset_to; - offsets_to[i] = offset_to; - offset_from += n; - } - - data_to.resize(offset_to); - if (return_type->isNullable() && null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), Name::name); - } -}; - - /// Declared early because used below. struct NameToDate { static constexpr auto name = "toDate"; }; struct NameToDate32 { static constexpr auto name = "toDate32"; }; From f8dfd8c03a05f45729f0dce588da4c9b0200739e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Mar 2024 07:15:07 +0100 Subject: [PATCH 132/283] Less garbage --- src/Functions/FunctionsConversion.cpp | 30 +++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index fd328e8af42..5461661386e 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -2389,7 +2389,6 @@ public: static constexpr bool to_datetime64 = std::is_same_v; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - static FunctionPtr create() { return std::make_shared(); } String getName() const override { @@ -2516,28 +2515,29 @@ public: ColumnPtr result_column; if constexpr (to_decimal) + { result_column = executeInternal(arguments, result_type, input_rows_count, assert_cast(*removeNullable(result_type)).getScale()); - else + } + else if (isDateTime64(arguments)) { - if (isDateTime64(arguments)) - { - UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0; - if (arguments.size() > 1) - scale = extractToDecimalScale(arguments[1]); + UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0; + if (arguments.size() > 1) + scale = extractToDecimalScale(arguments[1]); - if (scale == 0) - result_column = executeInternal(arguments, result_type, input_rows_count); - else - { - result_column = executeInternal(arguments, result_type, input_rows_count, static_cast(scale)); - } + if (scale == 0) + { + result_column = executeInternal(arguments, result_type, input_rows_count); } else { - result_column = executeInternal(arguments, result_type, input_rows_count); + result_column = executeInternal(arguments, result_type, input_rows_count, static_cast(scale)); } } + else + { + result_column = executeInternal(arguments, result_type, input_rows_count); + } if (!result_column) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " @@ -3204,7 +3204,7 @@ private: { /// In case when converting to Nullable type, we apply different parsing rule, /// that will not throw an exception but return NULL in case of malformed input. - FunctionPtr function = FunctionConvertFromString::create(); + FunctionPtr function = FunctionConvertFromString::create(context); return createFunctionAdaptor(function, from_type); } else if (!can_apply_accurate_cast) From 65a541fbdbac4ec3a00a1f0271867e68e6cdef87 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Mar 2024 07:48:14 +0100 Subject: [PATCH 133/283] Tighten --- utils/check-style/check-large-objects.sh | 3 --- 1 file changed, 3 deletions(-) diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh index 3e2a385bdd0..9b78d6196e3 100755 --- a/utils/check-style/check-large-objects.sh +++ b/utils/check-style/check-large-objects.sh @@ -6,10 +6,7 @@ export LC_ALL=C # The "total" should be printed without localization TU_EXCLUDES=( AggregateFunctionUniq - FunctionsConversion - RangeHashedDictionary - Aggregator ) From 486e8537a80abe1b238839f207f35a9f9030011e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Mar 2024 08:02:27 +0100 Subject: [PATCH 134/283] Fix error --- src/Functions/FunctionsConversion.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 5461661386e..67d6e202255 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1309,14 +1309,14 @@ struct ConvertImpl && std::is_same_v) { return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); + arguments, result_type, input_rows_count, additions); } else if constexpr (std::is_same_v && std::is_same_v && std::is_same_v) { return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); + arguments, result_type, input_rows_count, additions); } else if constexpr (( std::is_same_v @@ -1325,7 +1325,7 @@ struct ConvertImpl && std::is_same_v) { return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count); + arguments, result_type, input_rows_count, additions); } /// Conversion of DateTime64 to Date or DateTime: discards fractional part. else if constexpr (std::is_same_v @@ -1351,7 +1351,7 @@ struct ConvertImpl && std::is_same_v) { return DateTimeTransformImpl::execute( - arguments, result_type, input_rows_count); + arguments, result_type, input_rows_count, additions); } else if constexpr (IsDataTypeDateOrDateTime && std::is_same_v) From 514f8392f93b9488ab4b52011102891a851b51db Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 13 Mar 2024 10:25:37 +0100 Subject: [PATCH 135/283] Fix error --- src/Functions/FunctionsConversion.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 67d6e202255..70cbf31bcb3 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1569,15 +1569,20 @@ struct ConvertImpl throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), Name::name); } + else if constexpr (std::is_same_v + && std::is_same_v + && std::is_same_v) + { + return ConvertImpl::execute( + arguments, result_type, input_rows_count, additions); + } else if constexpr ((std::is_same_v || std::is_same_v) - && !std::is_same_v && std::is_same_v) { return ConvertThroughParsing::execute( arguments, result_type, input_rows_count, additions); } else if constexpr ((std::is_same_v || std::is_same_v) - && !std::is_same_v && std::is_same_v) { return ConvertThroughParsing::execute( @@ -1590,12 +1595,6 @@ struct ConvertImpl return ConvertThroughParsing::execute( arguments, result_type, input_rows_count, additions); } - else if constexpr (std::is_same_v - && std::is_same_v - && std::is_same_v - && std::is_same_v) - { - } else { using FromFieldType = typename FromDataType::FieldType; From 47dfefd6b3983bad2c5598c19b14df679f605d93 Mon Sep 17 00:00:00 2001 From: Alexander Sapin Date: Wed, 13 Mar 2024 10:37:51 +0100 Subject: [PATCH 136/283] config substitutions --- src/Common/Config/ConfigProcessor.cpp | 21 +++++++- src/Common/Config/ConfigProcessor.h | 11 +++-- src/Common/tests/gtest_config_processor.cpp | 49 +++++++++++++++++++ .../configs/config_zk_include_test.xml | 8 +++ .../test_config_substitutions/test.py | 11 ++++- 5 files changed, 93 insertions(+), 7 deletions(-) create mode 100644 tests/integration/test_config_substitutions/configs/config_zk_include_test.xml diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 641e7ddcdaa..57e9f9efc10 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -173,10 +173,12 @@ static void deleteAttributesRecursive(Node * root) static void mergeAttributes(Element & config_element, Element & with_element) { auto * with_element_attributes = with_element.attributes(); + //std::cerr << "MERGE ATTRIBUTES:" << with_element_attributes->length() << "\n"; for (size_t i = 0; i < with_element_attributes->length(); ++i) { auto * attr = with_element_attributes->item(i); + //std::cerr << "ATTR NAME:" << attr->nodeName() << std::endl; config_element.setAttribute(attr->nodeName(), attr->getNodeValue()); } @@ -276,6 +278,8 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, for (Node * node = config_root->firstChild(); node;) { Node * next_node = node->nextSibling(); + //if (next_node) + // std::cerr << "NEXT NODE:" << next_node->nodeName() << std::endl; /// Remove text from the original config node. if (node->nodeType() == Node::TEXT_NODE && !allWhitespace(node->getNodeValue())) { @@ -283,6 +287,7 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, } else if (node->nodeType() == Node::ELEMENT_NODE) { + std::cerr << "NODES IN SOURCE: " << node->nodeName() << std::endl; config_element_by_id.insert(ElementsByIdentifier::value_type(getElementIdentifier(node), node)); } node = next_node; @@ -296,6 +301,7 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, bool remove = false; if (with_node->nodeType() == Node::ELEMENT_NODE) { + //std::cerr << "WITH NODE: " << with_node->nodeName() << std::endl; Element & with_element = dynamic_cast(*with_node); remove = with_element.hasAttribute("remove"); bool replace = with_element.hasAttribute("replace"); @@ -303,11 +309,13 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, if (remove && replace) throw Poco::Exception("both remove and replace attributes set for element <" + with_node->nodeName() + ">"); + ElementsByIdentifier::iterator it = config_element_by_id.find(getElementIdentifier(with_node)); if (it != config_element_by_id.end()) { Node * config_node = it->second; + //std::cerr << "SUBNODE NODE: " << config_node->nodeName() << std::endl; config_element_by_id.erase(it); if (remove) @@ -316,12 +324,14 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, } else if (replace) { + //std::cerr << "REPLACE!!!" << std::endl; with_element.removeAttribute("replace"); NodePtr new_node = config->importNode(with_node, true); config_root->replaceChild(new_node, config_node); } else { + //std::cerr << "SUBNODE NODE HERE: " << config_node->nodeName() << std::endl; Element & config_element = dynamic_cast(*config_node); /// Remove substitution attributes from the merge target node if source node already has a value @@ -333,11 +343,17 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, mergeAttributes(config_element, with_element); mergeRecursive(config, config_node, with_node); } + //std::cerr << "DONE\n"; merged = true; } } + else + { + //std::cerr << "ELEMENT NOT FOUND\n"; + } if (!merged && !remove) { + //std::cerr << "NOTHING hAPPENED\n"; /// Since we didn't find a pair to this node in default config, we will paste it as is. /// But it may have some child nodes which have attributes like "replace" or "remove". /// They are useless in preprocessed configuration. @@ -433,6 +449,7 @@ void ConfigProcessor::doIncludesRecursive( auto process_include = [&](const Node * include_attr, const std::function & get_node, const char * error_msg) { const std::string & name = include_attr->getNodeValue(); + LOG_DEBUG(log, "PROCESS INCLUDE {}", name); const Node * node_to_include = get_node(name); if (!node_to_include) { @@ -453,13 +470,15 @@ void ConfigProcessor::doIncludesRecursive( /// Replace the whole node not just contents. if (node->nodeName() == "include") { + LOG_DEBUG(log, "Include here for node {}", name); const NodeListPtr children = node_to_include->childNodes(); Node * next_child = nullptr; for (Node * child = children->item(0); child; child = next_child) { next_child = child->nextSibling(); NodePtr new_node = config->importNode(child, true); - node->parentNode()->insertBefore(new_node, node); + //node->parentNode()->insertBefore(new_node, node); + mergeRecursive(config, node->parentNode(), new_node); } node->parentNode()->removeChild(node); diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 5712c36d737..9201ec7b77d 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -119,6 +119,11 @@ public: static inline const auto SUBSTITUTION_ATTRS = {"incl", "from_zk", "from_env"}; + /// If config root node name is not 'clickhouse' and merging config's root node names doesn't match, bypasses merging and returns false. + /// For compatibility root node 'yandex' considered equal to 'clickhouse'. + bool merge(XMLDocumentPtr config, XMLDocumentPtr with); + void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); + private: const std::string path; std::string preprocessed_path; @@ -145,11 +150,7 @@ private: void hideRecursive(Poco::XML::Node * config_root); XMLDocumentPtr hideElements(XMLDocumentPtr xml_tree); - void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); - - /// If config root node name is not 'clickhouse' and merging config's root node names doesn't match, bypasses merging and returns false. - /// For compatibility root node 'yandex' considered equal to 'clickhouse'. - bool merge(XMLDocumentPtr config, XMLDocumentPtr with); + //void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); void doIncludesRecursive( XMLDocumentPtr config, diff --git a/src/Common/tests/gtest_config_processor.cpp b/src/Common/tests/gtest_config_processor.cpp index f01460d515b..d5397dd54c2 100644 --- a/src/Common/tests/gtest_config_processor.cpp +++ b/src/Common/tests/gtest_config_processor.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -63,3 +64,51 @@ TEST(Common, ConfigProcessorManyElements) /// More that 5 min is way too slow ASSERT_LE(enumerate_elapsed_ms, 300*1000); } + +TEST(Common, ConfigProcessorMerge) +{ + namespace fs = std::filesystem; + + auto path = fs::path("/tmp/test_config_processor/"); + + fs::remove_all(path); + fs::create_directories(path); + fs::create_directories(path / "config.d"); + //SCOPE_EXIT({ fs::remove_all(path); }); + + auto config_file = std::make_unique(path / "config.xml"); + { + DB::WriteBufferFromFile out(config_file->path()); + writeString("\n", out); + writeString("1\n", out); + writeString("\n", out); + } + DB::XMLDocumentPtr config_xml; + DB::ConfigProcessor processor(config_file->path(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false); + { + bool has_zk_includes; + config_xml = processor.processConfig(&has_zk_includes); + } + + auto small_part_file = std::make_unique(path / "config.d" / "part.xml"); + { + DB::WriteBufferFromFile out(small_part_file->path()); + writeString("33\n", out); + } + DB::XMLDocumentPtr part_xml; + + { + DB::ConfigProcessor tiny(small_part_file->path(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false); + bool has_zk_includes; + part_xml = tiny.processConfig(&has_zk_includes); + } + + auto * root_node = DB::XMLUtils::getRootNode(config_xml); + auto * part_node = DB::XMLUtils::getRootNode(part_xml); + auto * new_node = config_xml->importNode(part_node, true); + auto * deep_root = root_node->getNodeByPath("merge_tree"); + processor.mergeRecursive(config_xml, deep_root, new_node); + DB::ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(config_xml)); + + ASSERT_EQ(configuration->getUInt64("merge_tree.min_bytes_for_wide_part"), 33); +} diff --git a/tests/integration/test_config_substitutions/configs/config_zk_include_test.xml b/tests/integration/test_config_substitutions/configs/config_zk_include_test.xml new file mode 100644 index 00000000000..4a0ed623c4e --- /dev/null +++ b/tests/integration/test_config_substitutions/configs/config_zk_include_test.xml @@ -0,0 +1,8 @@ + + + + 1 + + + + diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py index 564985b2f50..08834e20423 100644 --- a/tests/integration/test_config_substitutions/test.py +++ b/tests/integration/test_config_substitutions/test.py @@ -13,7 +13,7 @@ node2 = cluster.add_instance( env_variables={"MAX_QUERY_SIZE": "55555"}, ) node3 = cluster.add_instance( - "node3", user_configs=["configs/config_zk.xml"], with_zookeeper=True + "node3", user_configs=["configs/config_zk.xml",], main_configs=["configs/config_zk_include_test.xml"], with_zookeeper=True ) node4 = cluster.add_instance( "node4", @@ -62,6 +62,11 @@ def start_cluster(): value=b"default", makepath=True, ) + zk.create( + path="/min_bytes_for_wide_part", + value=b"33", + makepath=True, + ) cluster.add_zookeeper_startup_command(create_zk_roots) @@ -237,3 +242,7 @@ def test_allow_databases(start_cluster): ).strip() == "" ) + +def test_config_multiple_zk_substitutions(start_cluster): + #print(node3.query("SELECT * FROM system.merge_tree_settings")) + print(node3.query("SELECT * FROM system.merge_tree_settings WHERE changed=1")) From 5ef241cc850fe95240145f86329e259f5609de31 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 13 Mar 2024 11:30:11 +0100 Subject: [PATCH 137/283] WIP on virtual columns in StorageMerge --- src/Storages/StorageMerge.cpp | 42 ++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 8410f0a8df8..ec03545d767 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -56,12 +56,17 @@ #include #include #include +#include "Common/logger_useful.h" #include #include #include #include +#include "Analyzer/QueryNode.h" +#include "Core/QueryProcessingStage.h" +#include "IO/WriteHelpers.h" #include #include +#include namespace DB { @@ -798,10 +803,13 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin( const ContextPtr & context, const Names & required_column_names) { + LOG_DEBUG(&Poco::Logger::get("replaceTableExpressionAndRemoveJoin"), "BEFORE:\n{}", query->dumpTree()); auto * query_node = query->as(); auto join_tree_type = query_node->getJoinTree()->getNodeType(); auto modified_query = query_node->cloneAndReplace(original_table_expression, replacement_table_expression); + LOG_DEBUG(&Poco::Logger::get("replaceTableExpressionAndRemoveJoin"), "AFTER:\n{}", modified_query->dumpTree()); + // For the case when join tree is just a table or a table function we don't need to do anything more. if (join_tree_type == QueryTreeNodeType::TABLE || join_tree_type == QueryTreeNodeType::TABLE_FUNCTION) return modified_query; @@ -880,6 +888,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ if (modified_query_info.table_expression) { auto replacement_table_expression = std::make_shared(storage, storage_lock, storage_snapshot_); + replacement_table_expression->setAlias(modified_query_info.table_expression->getAlias()); if (query_info.table_expression_modifiers) replacement_table_expression->setTableExpressionModifiers(*query_info.table_expression_modifiers); @@ -960,6 +969,8 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ column_name_to_node); } + LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "{}", modified_query_info.query_tree->dumpTree()); + modified_query_info.query = queryNodeToSelectQuery(modified_query_info.query_tree); } else @@ -1020,7 +1031,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( const auto & [database_name, storage, _, table_name] = storage_with_lock; bool allow_experimental_analyzer = context->getSettingsRef().allow_experimental_analyzer; auto storage_stage - = storage->getQueryProcessingStage(context, QueryProcessingStage::Complete, storage_snapshot_, modified_query_info); + = storage->getQueryProcessingStage(context, processed_stage, storage_snapshot_, modified_query_info); builder = plan.buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); @@ -1047,10 +1058,23 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( Block pipe_header = builder->getHeader(); - if (has_database_virtual_column && common_header.has("_database") && !pipe_header.has("_database")) + auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); + if (storage_snapshot_->storage.supportsSubcolumns()) + get_column_options.withSubcolumns(); + + LOG_DEBUG(&Poco::Logger::get("createSources"), "Processed:{}\nStorage:{}", toString(processed_stage), toString(storage_stage)); + + String table_alias; + if (allow_experimental_analyzer) + table_alias = modified_query_info.query_tree->as()->getJoinTree()->as()->getAlias(); + + String database_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_database" : table_alias + "._database"; + String table_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_table" : table_alias + "._table"; + + if (has_database_virtual_column && common_header.has(database_column) && (storage_stage == QueryProcessingStage::FetchColumns || dynamic_cast(&storage_snapshot_->storage) != nullptr)) { ColumnWithTypeAndName column; - column.name = "_database"; + column.name = database_column; column.type = std::make_shared(std::make_shared()); column.column = column.type->createColumnConst(0, Field(database_name)); @@ -1062,10 +1086,10 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( { return std::make_shared(stream_header, adding_column_actions); }); } - if (has_table_virtual_column && common_header.has("_table") && !pipe_header.has("_table")) + if (has_table_virtual_column && common_header.has(table_column) && (storage_stage == QueryProcessingStage::FetchColumns || dynamic_cast(&storage_snapshot_->storage) != nullptr)) { ColumnWithTypeAndName column; - column.name = "_table"; + column.name = table_column; column.type = std::make_shared(std::make_shared()); column.column = column.type->createColumnConst(0, Field(table_name)); @@ -1080,7 +1104,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( /// Subordinary tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. convertAndFilterSourceStream( - header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder, processed_stage); + header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder, storage_stage); } return builder; @@ -1433,7 +1457,7 @@ void ReadFromMerge::convertAndFilterSourceStream( const RowPolicyDataOpt & row_policy_data_opt, ContextPtr local_context, QueryPipelineBuilder & builder, - QueryProcessingStage::Enum processed_stage) + QueryProcessingStage::Enum processed_stage [[maybe_unused]]) { Block before_block_header = builder.getHeader(); @@ -1493,7 +1517,7 @@ void ReadFromMerge::convertAndFilterSourceStream( ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name; if (local_context->getSettingsRef().allow_experimental_analyzer - && (processed_stage != QueryProcessingStage::FetchColumns || dynamic_cast(&snapshot->storage) != nullptr)) + && (processed_stage != QueryProcessingStage::FetchColumns)) convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position; if (row_policy_data_opt) @@ -1501,6 +1525,8 @@ void ReadFromMerge::convertAndFilterSourceStream( row_policy_data_opt->addFilterTransform(builder); } + LOG_DEBUG(&Poco::Logger::get("convertAndFilterSourceStream"), "SOURCE:\n{}\nRESULT:\n{}", builder.getHeader().dumpStructure(), header.dumpStructure()); + auto convert_actions_dag = ActionsDAG::makeConvertingActions(builder.getHeader().getColumnsWithTypeAndName(), header.getColumnsWithTypeAndName(), convert_actions_match_columns_mode); From 2e49140de7695095831f52b32fc965a6b07ffd3e Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 13 Mar 2024 11:37:42 +0100 Subject: [PATCH 138/283] Small progress --- src/Storages/StorageMerge.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index ec03545d767..d5fd0f51a62 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1517,7 +1517,7 @@ void ReadFromMerge::convertAndFilterSourceStream( ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name; if (local_context->getSettingsRef().allow_experimental_analyzer - && (processed_stage != QueryProcessingStage::FetchColumns)) + && (processed_stage == QueryProcessingStage::FetchColumns && dynamic_cast(&snapshot->storage) != nullptr)) convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position; if (row_policy_data_opt) From c0bfafa203091abbe2f5af6d1253249fc432ad64 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 13 Mar 2024 10:48:02 +0000 Subject: [PATCH 139/283] fix tags --- .../02997_insert_select_too_many_parts_multithread.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql b/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql index 00cf262add5..2dfc8094115 100644 --- a/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql +++ b/tests/queries/0_stateless/02997_insert_select_too_many_parts_multithread.sql @@ -1,5 +1,5 @@ -# Tags: disabled -# TODO: Fix parts_to_throw_insert logic for parallel MergeTreeSink onStart calls +-- Tags: disabled +-- TODO: Fix parts_to_throw_insert logic for parallel MergeTreeSink onStart calls DROP TABLE IF EXISTS too_many_parts; CREATE TABLE too_many_parts (x UInt64) ENGINE = MergeTree ORDER BY tuple() SETTINGS parts_to_delay_insert = 5, parts_to_throw_insert = 5; From 0353121dccb87403ff08334137fd9cecbb8953f1 Mon Sep 17 00:00:00 2001 From: peter279k Date: Wed, 13 Mar 2024 19:17:27 +0800 Subject: [PATCH 140/283] Improve related NULL functions usage --- docs/en/sql-reference/functions/functions-for-nulls.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 91c04cfded3..e73d6c899e7 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -14,7 +14,7 @@ Returns whether the argument is [NULL](../../sql-reference/syntax.md#null). isNull(x) ``` -Alias: `ISNULL`. +Alias: `IS NULL`. **Arguments** @@ -58,6 +58,8 @@ Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-li isNotNull(x) ``` +Alias: `IS NOT NULL`. + **Arguments:** - `x` — A value of non-compound data type. @@ -100,6 +102,8 @@ Returns whether the argument is 0 (zero) or [NULL](../../sql-reference/syntax.md isZeroOrNull(x) ``` +Alias: `x = 0 OR x IS NULL`. + **Arguments:** - `x` — A value of non-compound data type. From 3931351ec4806769048d2638f54323f1ae89e056 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 13 Mar 2024 12:36:47 +0100 Subject: [PATCH 141/283] Pass timeout through setting --- src/Core/Settings.h | 1 + src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp | 5 +++-- src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp | 10 +++++++--- src/Disks/IO/CachedOnDiskWriteBufferFromFile.h | 15 +++++++++------ src/IO/ReadSettings.h | 1 + src/IO/WriteSettings.h | 1 + src/Interpreters/Cache/FileCache.cpp | 5 +++-- src/Interpreters/Cache/FileCache.h | 3 ++- src/Interpreters/Cache/FileCache_fwd.h | 1 - src/Interpreters/Cache/FileSegment.cpp | 4 ++-- src/Interpreters/Cache/FileSegment.h | 2 +- src/Interpreters/Cache/Metadata.cpp | 6 +++++- .../Cache/WriteBufferToFileSegment.cpp | 8 +++++++- src/Interpreters/Cache/WriteBufferToFileSegment.h | 2 ++ src/Interpreters/Context.cpp | 2 ++ 15 files changed, 46 insertions(+), 20 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d70a6cf51c5..7ba335099e6 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -778,6 +778,7 @@ class IColumn; M(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), "Max remote filesystem cache size that can be downloaded by a single query", 0) \ M(Bool, throw_on_error_from_cache_on_write_operations, false, "Ignore error from cache when caching on write operations (INSERT, merges)", 0) \ M(UInt64, filesystem_cache_segments_batch_size, 20, "Limit on size of a single batch of file segments that a read buffer can request from cache. Too low value will lead to excessive requests to cache, too large may slow down eviction from cache", 0) \ + M(UInt64, filesystem_cache_reserve_space_wait_lock_timeout_milliseconds, 1000, "Wait time to lock cache for sapce reservation in filesystem cache", 0) \ \ M(Bool, use_page_cache_for_disks_without_file_cache, false, "Use userspace page cache for remote disks that don't have filesystem cache enabled.", 0) \ M(Bool, read_from_page_cache_if_exists_otherwise_bypass_cache, false, "Use userspace page cache in passive mode, similar to read_from_filesystem_cache_if_exists_otherwise_bypass_cache.", 0) \ diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 47ee5858562..1e108b481ee 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -637,7 +637,8 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment) ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, current_impl_buffer_size); - bool continue_predownload = file_segment.reserve(current_predownload_size); + bool continue_predownload = file_segment.reserve( + current_predownload_size, settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds); if (continue_predownload) { LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, current_impl_buffer_size); @@ -992,7 +993,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() { chassert(file_offset_of_buffer_end + size - 1 <= file_segment.range().right); - bool success = file_segment.reserve(size); + bool success = file_segment.reserve(size, settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds); if (success) { chassert(file_segment.getCurrentWriteOffset() == static_cast(implementation_buffer->getPosition())); diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp index faed55de713..f4e309f461e 100644 --- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.cpp @@ -26,16 +26,18 @@ FileSegmentRangeWriter::FileSegmentRangeWriter( FileCache * cache_, const FileSegment::Key & key_, const FileCacheUserInfo & user_, + size_t reserve_space_lock_wait_timeout_milliseconds_, std::shared_ptr cache_log_, const String & query_id_, const String & source_path_) : cache(cache_) , key(key_) + , user(user_) + , reserve_space_lock_wait_timeout_milliseconds(reserve_space_lock_wait_timeout_milliseconds_) , log(getLogger("FileSegmentRangeWriter")) , cache_log(cache_log_) , query_id(query_id_) , source_path(source_path_) - , user(user_) { } @@ -89,7 +91,7 @@ bool FileSegmentRangeWriter::write(const char * data, size_t size, size_t offset size_t size_to_write = std::min(available_size, size); - bool reserved = file_segment->reserve(size_to_write); + bool reserved = file_segment->reserve(size_to_write, reserve_space_lock_wait_timeout_milliseconds); if (!reserved) { appendFilesystemCacheLog(*file_segment); @@ -211,6 +213,7 @@ CachedOnDiskWriteBufferFromFile::CachedOnDiskWriteBufferFromFile( , key(key_) , query_id(query_id_) , user(user_) + , reserve_space_lock_wait_timeout_milliseconds(settings_.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds) , throw_on_error_from_cache(settings_.throw_on_error_from_cache) , cache_log(!query_id_.empty() && settings_.enable_filesystem_cache_log ? cache_log_ : nullptr) { @@ -251,7 +254,8 @@ void CachedOnDiskWriteBufferFromFile::cacheData(char * data, size_t size, bool t if (!cache_writer) { - cache_writer = std::make_unique(cache.get(), key, user, cache_log, query_id, source_path); + cache_writer = std::make_unique( + cache.get(), key, user, reserve_space_lock_wait_timeout_milliseconds, cache_log, query_id, source_path); } Stopwatch watch(CLOCK_MONOTONIC); diff --git a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h index 59e0c76ca3d..ad4f6b5916d 100644 --- a/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h +++ b/src/Disks/IO/CachedOnDiskWriteBufferFromFile.h @@ -30,6 +30,7 @@ public: FileCache * cache_, const FileSegment::Key & key_, const FileCacheUserInfo & user_, + size_t reserve_space_lock_wait_timeout_milliseconds_, std::shared_ptr cache_log_, const String & query_id_, const String & source_path_); @@ -52,13 +53,14 @@ private: void completeFileSegment(); FileCache * cache; - FileSegment::Key key; + const FileSegment::Key key; + const FileCacheUserInfo user; + const size_t reserve_space_lock_wait_timeout_milliseconds; LoggerPtr log; std::shared_ptr cache_log; - String query_id; - String source_path; - FileCacheUserInfo user; + const String query_id; + const String source_path; FileSegmentsHolderPtr file_segments; @@ -99,11 +101,12 @@ private: String source_path; FileCacheKey key; - size_t current_download_offset = 0; const String query_id; const FileCacheUserInfo user; + const size_t reserve_space_lock_wait_timeout_milliseconds; + const bool throw_on_error_from_cache; - bool throw_on_error_from_cache; + size_t current_download_offset = 0; bool cache_in_error_state_or_disabled = false; std::unique_ptr cache_writer; diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index c0a63bf51b1..6a0cac35878 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -100,6 +100,7 @@ struct ReadSettings bool read_from_filesystem_cache_if_exists_otherwise_bypass_cache = false; bool enable_filesystem_cache_log = false; size_t filesystem_cache_segments_batch_size = 20; + size_t filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = 1000; bool use_page_cache_for_disks_without_file_cache = false; bool read_from_page_cache_if_exists_otherwise_bypass_cache = false; diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h index fcadf34f021..7d36677b468 100644 --- a/src/IO/WriteSettings.h +++ b/src/IO/WriteSettings.h @@ -20,6 +20,7 @@ struct WriteSettings bool enable_filesystem_cache_on_write_operations = false; bool enable_filesystem_cache_log = false; bool throw_on_error_from_cache = false; + size_t filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = 1000; bool s3_allow_parallel_part_upload = true; diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 5650b9ce44e..ea40ffcfa3c 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -777,12 +777,13 @@ bool FileCache::tryReserve( FileSegment & file_segment, const size_t size, FileCacheReserveStat & reserve_stat, - const UserInfo & user) + const UserInfo & user, + size_t lock_wait_timeout_milliseconds) { ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheReserveMicroseconds); assertInitialized(); - auto cache_lock = tryLockCache(std::chrono::milliseconds(FILECACHE_TRY_RESERVE_LOCK_TIMEOUT_MILLISECONDS)); + auto cache_lock = tryLockCache(std::chrono::milliseconds(lock_wait_timeout_milliseconds)); if (!cache_lock) { ProfileEvents::increment(ProfileEvents::FilesystemCacheFailToReserveSpaceBecauseOfLockContention); diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 7434b2ac78a..007c4fd9483 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -161,7 +161,8 @@ public: FileSegment & file_segment, size_t size, FileCacheReserveStat & stat, - const UserInfo & user); + const UserInfo & user, + size_t lock_wait_timeout_milliseconds); std::vector getFileSegmentInfos(const UserID & user_id); diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h index eaed279e7fd..06261b19db7 100644 --- a/src/Interpreters/Cache/FileCache_fwd.h +++ b/src/Interpreters/Cache/FileCache_fwd.h @@ -12,7 +12,6 @@ static constexpr int FILECACHE_DEFAULT_LOAD_METADATA_THREADS = 16; static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 10000000; static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0; static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024; -static constexpr size_t FILECACHE_TRY_RESERVE_LOCK_TIMEOUT_MILLISECONDS = 1000; /// 1 sec. class FileCache; using FileCachePtr = std::shared_ptr; diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 6b2d4a4bec8..9ec2b090dc7 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -497,7 +497,7 @@ LockedKeyPtr FileSegment::lockKeyMetadata(bool assert_exists) const return metadata->tryLock(); } -bool FileSegment::reserve(size_t size_to_reserve, FileCacheReserveStat * reserve_stat) +bool FileSegment::reserve(size_t size_to_reserve, size_t lock_wait_timeout_milliseconds, FileCacheReserveStat * reserve_stat) { if (!size_to_reserve) throw Exception(ErrorCodes::LOGICAL_ERROR, "Zero space reservation is not allowed"); @@ -549,7 +549,7 @@ bool FileSegment::reserve(size_t size_to_reserve, FileCacheReserveStat * reserve if (!reserve_stat) reserve_stat = &dummy_stat; - bool reserved = cache->tryReserve(*this, size_to_reserve, *reserve_stat, getKeyMetadata()->user); + bool reserved = cache->tryReserve(*this, size_to_reserve, *reserve_stat, getKeyMetadata()->user, lock_wait_timeout_milliseconds); if (!reserved) setDownloadFailedUnlocked(lockFileSegment()); diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index ea97a6b0157..c34ee064345 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -199,7 +199,7 @@ public: /// Try to reserve exactly `size` bytes (in addition to the getDownloadedSize() bytes already downloaded). /// Returns true if reservation was successful, false otherwise. - bool reserve(size_t size_to_reserve, FileCacheReserveStat * reserve_stat = nullptr); + bool reserve(size_t size_to_reserve, size_t lock_wait_timeout_milliseconds, FileCacheReserveStat * reserve_stat = nullptr); /// Write data into reserved space. void write(const char * from, size_t size, size_t offset); diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 727f2762cca..b79605622b6 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -693,6 +694,9 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optionalset(memory->data(), memory->size()); } + const auto reserve_space_lock_wait_timeout_milliseconds = + Context::getGlobalContextInstance()->getReadSettings().filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; + size_t offset = file_segment.getCurrentWriteOffset(); if (offset != static_cast(reader->getPosition())) reader->seek(offset, SEEK_SET); @@ -701,7 +705,7 @@ void CacheMetadata::downloadImpl(FileSegment & file_segment, std::optionalavailable(); - if (!file_segment.reserve(size)) + if (!file_segment.reserve(size, reserve_space_lock_wait_timeout_milliseconds)) { LOG_TEST( log, "Failed to reserve space during background download " diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index 7cd4e2d6e8d..759135722dc 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,11 @@ WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegmentsHolderPtr segment , file_segment(&segment_holder_->front()) , segment_holder(std::move(segment_holder_)) { + auto query_context = CurrentThread::getQueryContext(); + if (query_context) + reserve_space_lock_wait_timeout_milliseconds = query_context->getReadSettings().filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; + else + reserve_space_lock_wait_timeout_milliseconds = Context::getGlobalContextInstance()->getReadSettings().filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; } /// If it throws an exception, the file segment will be incomplete, so you should not use it in the future. @@ -49,7 +55,7 @@ void WriteBufferToFileSegment::nextImpl() FileCacheReserveStat reserve_stat; /// In case of an error, we don't need to finalize the file segment /// because it will be deleted soon and completed in the holder's destructor. - bool ok = file_segment->reserve(bytes_to_write, &reserve_stat); + bool ok = file_segment->reserve(bytes_to_write, reserve_space_lock_wait_timeout_milliseconds, &reserve_stat); if (!ok) { diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.h b/src/Interpreters/Cache/WriteBufferToFileSegment.h index feb33472513..bff340d79b3 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.h +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.h @@ -28,6 +28,8 @@ private: /// Empty if file_segment is not owned by this WriteBufferToFileSegment FileSegmentsHolderPtr segment_holder; + + size_t reserve_space_lock_wait_timeout_milliseconds; }; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d658fbe9920..6a0657a842c 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -5166,6 +5166,7 @@ ReadSettings Context::getReadSettings() const res.read_from_filesystem_cache_if_exists_otherwise_bypass_cache = settings.read_from_filesystem_cache_if_exists_otherwise_bypass_cache; res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; res.filesystem_cache_segments_batch_size = settings.filesystem_cache_segments_batch_size; + res.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; res.filesystem_cache_max_download_size = settings.filesystem_cache_max_download_size; res.skip_download_if_exceeds_query_cache = settings.skip_download_if_exceeds_query_cache; @@ -5214,6 +5215,7 @@ WriteSettings Context::getWriteSettings() const res.enable_filesystem_cache_on_write_operations = settings.enable_filesystem_cache_on_write_operations; res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; res.throw_on_error_from_cache = settings.throw_on_error_from_cache_on_write_operations; + res.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds = settings.filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; res.s3_allow_parallel_part_upload = settings.s3_allow_parallel_part_upload; From ef796c668d187c80f13223dbf679dcc442907008 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Mar 2024 12:03:37 +0000 Subject: [PATCH 142/283] Fixing test_build_sets_from_multiple_threads/test.py::test_set --- src/Planner/Planner.cpp | 4 +++- tests/analyzer_integration_broken_tests.txt | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index bc1fb30781d..861b12e3da2 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1081,7 +1081,9 @@ void addBuildSubqueriesForSetsStepIfNeeded( for (auto & subquery : subqueries) { auto query_tree = subquery->detachQueryTree(); - auto subquery_options = select_query_options.subquery(); + /// I suppose it should be better to use all flags from select_query_options, + /// But for now it is done in the same way as in old analyzer. + auto subquery_options = SelectQueryOptions(QueryProcessingStage::Complete, select_query_options.subquery_depth).subquery(); Planner subquery_planner( query_tree, subquery_options, diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 31527dc3476..e71a047c215 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -1,4 +1,3 @@ -test_build_sets_from_multiple_threads/test.py::test_set test_concurrent_backups_s3/test.py::test_concurrent_backups test_distributed_type_object/test.py::test_distributed_type_object test_merge_table_over_distributed/test.py::test_global_in From 1a47682c12a45acb888c89974302ec35bee5aaed Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 13 Mar 2024 13:08:12 +0100 Subject: [PATCH 143/283] Analyzer: Fix virtual columns in StorageMerge #ci_set_analyzer --- src/Interpreters/InterpreterSelectQueryAnalyzer.cpp | 1 + src/Storages/StorageMerge.cpp | 8 +++++--- tests/analyzer_tech_debt.txt | 1 - 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index 922f4a99b4a..539d7a59f6f 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -86,6 +86,7 @@ void replaceStorageInQueryTree(QueryTreeNodePtr & query_tree, const ContextPtr & continue; auto replacement_table_expression = std::make_shared(storage, context); + replacement_table_expression->setAlias(node->getAlias()); if (auto table_expression_modifiers = table_node.getTableExpressionModifiers()) replacement_table_expression->setTableExpressionModifiers(*table_expression_modifiers); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index d5fd0f51a62..fab4b2e5146 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1135,13 +1135,15 @@ QueryPlan ReadFromMerge::createPlanForTable( bool allow_experimental_analyzer = modified_context->getSettingsRef().allow_experimental_analyzer; auto storage_stage = storage->getQueryProcessingStage(modified_context, - QueryProcessingStage::Complete, + processed_stage, storage_snapshot_, modified_query_info); + LOG_DEBUG(&Poco::Logger::get("createPlanForTable"), "Storage: {}", toString(storage_stage)); + QueryPlan plan; - if (processed_stage <= storage_stage || (allow_experimental_analyzer && processed_stage == QueryProcessingStage::FetchColumns)) + if (processed_stage <= storage_stage) { /// If there are only virtual columns in query, you must request at least one other column. if (real_column_names.empty()) @@ -1186,7 +1188,7 @@ QueryPlan ReadFromMerge::createPlanForTable( row_policy_data_opt->addStorageFilter(source_step_with_filter); } } - else if (processed_stage > storage_stage || (allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)) + else if (processed_stage > storage_stage || allow_experimental_analyzer) { /// Maximum permissible parallelism is streams_num modified_context->setSetting("max_threads", streams_num); diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index dc6284d20c5..cee3cff8cd5 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -1,5 +1,4 @@ 00223_shard_distributed_aggregation_memory_efficient -00717_merge_and_distributed 00725_memory_tracking 01062_pm_all_join_with_block_continuation 01083_expressions_in_engine_arguments From 6203d45b966c91a18b2aee6b6da6e3dd3d36db62 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Mar 2024 13:12:48 +0100 Subject: [PATCH 144/283] working version with comments --- programs/server/Server.cpp | 6 +- src/Common/Config/ConfigProcessor.cpp | 71 ++++++++++++++----- src/Core/BaseSettings.cpp | 4 +- .../configs/config_zk_include_test.xml | 6 +- .../test_config_substitutions/test.py | 40 ++++++++++- 5 files changed, 104 insertions(+), 23 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 7636f221ab5..9d7dd02a235 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -733,8 +733,6 @@ try LOG_INFO(log, "Available CPU instruction sets: {}", cpu_info); #endif - sanityChecks(*this); - // Initialize global thread pool. Do it before we fetch configs from zookeeper // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will // ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well. @@ -904,6 +902,7 @@ try config_processor.savePreprocessedConfig(loaded_config, config().getString("path", DBMS_DEFAULT_PATH)); config().removeConfiguration(old_configuration.get()); config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); + global_context->setConfig(loaded_config.configuration); } Settings::checkNoSettingNamesAtTopLevel(config(), config_path); @@ -911,6 +910,9 @@ try /// We need to reload server settings because config could be updated via zookeeper. server_settings.loadSettingsFromConfig(config()); + /// NOTE: Do sanity checks after we loaded all possible substituions from ZK + sanityChecks(*this); + #if defined(OS_LINUX) std::string executable_path = getExecutablePath(); diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 57e9f9efc10..75a973f582f 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -272,6 +272,7 @@ void ConfigProcessor::hideRecursive(Poco::XML::Node * config_root) void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root) { + //LOG_DEBUG(log, "WITH ROOT {}", with_root->nodeName()); const NodeListPtr with_nodes = with_root->childNodes(); using ElementsByIdentifier = std::multimap; ElementsByIdentifier config_element_by_id; @@ -287,7 +288,7 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, } else if (node->nodeType() == Node::ELEMENT_NODE) { - std::cerr << "NODES IN SOURCE: " << node->nodeName() << std::endl; + //LOG_DEBUG(log, "NODES IN SOURCE: {}", node->nodeName()); config_element_by_id.insert(ElementsByIdentifier::value_type(getElementIdentifier(node), node)); } node = next_node; @@ -301,6 +302,7 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, bool remove = false; if (with_node->nodeType() == Node::ELEMENT_NODE) { + //LOG_DEBUG(log, "WITH NODE: {}", with_node->nodeName()); //std::cerr << "WITH NODE: " << with_node->nodeName() << std::endl; Element & with_element = dynamic_cast(*with_node); remove = with_element.hasAttribute("remove"); @@ -315,6 +317,7 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, if (it != config_element_by_id.end()) { Node * config_node = it->second; + //LOG_DEBUG(log, "SUBNODE NODE: {}", config_node->nodeName()); //std::cerr << "SUBNODE NODE: " << config_node->nodeName() << std::endl; config_element_by_id.erase(it); @@ -324,6 +327,7 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, } else if (replace) { + //LOG_DEBUG(log, "REPLACE: {}", config_node->nodeName()); //std::cerr << "REPLACE!!!" << std::endl; with_element.removeAttribute("replace"); NodePtr new_node = config->importNode(with_node, true); @@ -331,6 +335,7 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, } else { + //LOG_DEBUG(log, "SUBNODE NODE HERE: {}", config_node->nodeName()); //std::cerr << "SUBNODE NODE HERE: " << config_node->nodeName() << std::endl; Element & config_element = dynamic_cast(*config_node); @@ -346,13 +351,15 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, //std::cerr << "DONE\n"; merged = true; } - } - else - { - //std::cerr << "ELEMENT NOT FOUND\n"; + //else + //{ + // LOG_DEBUG(log, "ELEMENT NOT FOUND"); + // //std::cerr << "ELEMENT NOT FOUND\n"; + //} } if (!merged && !remove) { + //LOG_DEBUG(log, "NOTHING HAPPENED"); //std::cerr << "NOTHING hAPPENED\n"; /// Since we didn't find a pair to this node in default config, we will paste it as is. /// But it may have some child nodes which have attributes like "replace" or "remove". @@ -443,13 +450,14 @@ void ConfigProcessor::doIncludesRecursive( /// Replace the original contents, not add to it. bool replace = attributes->getNamedItem("replace"); + bool merge = attributes->getNamedItem("merge"); bool included_something = false; auto process_include = [&](const Node * include_attr, const std::function & get_node, const char * error_msg) { const std::string & name = include_attr->getNodeValue(); - LOG_DEBUG(log, "PROCESS INCLUDE {}", name); + //LOG_DEBUG(log, "PROCESS INCLUDE {}", name); const Node * node_to_include = get_node(name); if (!node_to_include) { @@ -467,26 +475,38 @@ void ConfigProcessor::doIncludesRecursive( } else { + + Element & element = dynamic_cast(*node); /// Replace the whole node not just contents. if (node->nodeName() == "include") { - LOG_DEBUG(log, "Include here for node {}", name); + //LOG_DEBUG(log, "Include here for node {}", name); const NodeListPtr children = node_to_include->childNodes(); Node * next_child = nullptr; + for (Node * child = children->item(0); child; child = next_child) { next_child = child->nextSibling(); - NodePtr new_node = config->importNode(child, true); - //node->parentNode()->insertBefore(new_node, node); - mergeRecursive(config, node->parentNode(), new_node); + + //LOG_DEBUG(log, "MERGEEEE {} PARENT NODE {} NODE {}", merge, node->parentNode()->nodeName(), node->nodeName()); + if (merge) + { + //LOG_DEBUG(log, "MERGEEEE"); + NodePtr new_node = config->importNode(child->parentNode(), true); + //LOG_DEBUG(log, "CHILD {} NEW NODE NAME {}", child->nodeName(), new_node->nodeName()); + mergeRecursive(config, node->parentNode(), new_node); + } + else + { + NodePtr new_node = config->importNode(child, true); + node->parentNode()->insertBefore(new_node, node); + } } node->parentNode()->removeChild(node); } else { - Element & element = dynamic_cast(*node); - for (const auto & attr_name : SUBSTITUTION_ATTRS) element.removeAttribute(attr_name); @@ -792,13 +812,19 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfig(bool allow_zk_includes ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(config_xml)); + //LOG_DEBUG(log, "MIN BYTES FOR WIDE PART {}", configuration->getUInt64("merge_tree.min_bytes_for_wide_part")); + //Poco::Util::AbstractConfiguration::Keys config_keys; + //configuration->keys("merge_tree", config_keys); + //for (const String & key : config_keys) + //{ + // LOG_DEBUG(log, "CONFIG KEY {} LEFT in merge_tree before ZK", key); + //} + return LoadedConfig{configuration, has_zk_includes, /* loaded_from_preprocessed = */ false, config_xml, path}; } ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes( - zkutil::ZooKeeperNodeCache & zk_node_cache, - const zkutil::EventPtr & zk_changed_event, - bool fallback_to_preprocessed) + zkutil::ZooKeeperNodeCache & zk_node_cache, const zkutil::EventPtr & zk_changed_event, bool fallback_to_preprocessed) { XMLDocumentPtr config_xml; bool has_zk_includes; @@ -817,13 +843,26 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes( if (!zk_exception) throw; - LOG_WARNING(log, "Error while processing from_zk config includes: {}. Config will be loaded from preprocessed file: {}", zk_exception->message(), preprocessed_path); + LOG_WARNING( + log, + "Error while processing from_zk config includes: {}. Config will be loaded from preprocessed file: {}", + zk_exception->message(), + preprocessed_path); config_xml = dom_parser.parse(preprocessed_path); } ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(config_xml)); + //LOG_DEBUG(log, "MIN BYTES FOR WIDE PART {} WITH ZK INCLUDE", configuration->getUInt64("merge_tree.min_bytes_for_wide_part")); + + //Poco::Util::AbstractConfiguration::Keys config_keys; + //configuration->keys("merge_tree", config_keys); + //for (const String & key : config_keys) + //{ + // LOG_DEBUG(log, "CONFIG KEY {} LEFT in merge_tree after ZK", key); + //} + return LoadedConfig{configuration, has_zk_includes, !processed_successfully, config_xml, path}; } diff --git a/src/Core/BaseSettings.cpp b/src/Core/BaseSettings.cpp index a7e1ab99af7..c535b9ce65e 100644 --- a/src/Core/BaseSettings.cpp +++ b/src/Core/BaseSettings.cpp @@ -41,13 +41,13 @@ BaseSettingsHelpers::Flags BaseSettingsHelpers::readFlags(ReadBuffer & in) void BaseSettingsHelpers::throwSettingNotFound(std::string_view name) { - throw Exception(ErrorCodes::UNKNOWN_SETTING, "Unknown setting {}", String{name}); + throw Exception(ErrorCodes::UNKNOWN_SETTING, "Unknown setting '{}'", String{name}); } void BaseSettingsHelpers::warningSettingNotFound(std::string_view name) { - LOG_WARNING(getLogger("Settings"), "Unknown setting {}, skipping", name); + LOG_WARNING(getLogger("Settings"), "Unknown setting '{}', skipping", name); } } diff --git a/tests/integration/test_config_substitutions/configs/config_zk_include_test.xml b/tests/integration/test_config_substitutions/configs/config_zk_include_test.xml index 4a0ed623c4e..743770c3024 100644 --- a/tests/integration/test_config_substitutions/configs/config_zk_include_test.xml +++ b/tests/integration/test_config_substitutions/configs/config_zk_include_test.xml @@ -1,8 +1,12 @@ + 44 + + 99 1 + 1111 - + diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py index 08834e20423..304506148b0 100644 --- a/tests/integration/test_config_substitutions/test.py +++ b/tests/integration/test_config_substitutions/test.py @@ -67,6 +67,12 @@ def start_cluster(): value=b"33", makepath=True, ) + zk.create( + path="/merge_max_block_size", + value=b"8888", + makepath=True, + ) + cluster.add_zookeeper_startup_command(create_zk_roots) @@ -244,5 +250,35 @@ def test_allow_databases(start_cluster): ) def test_config_multiple_zk_substitutions(start_cluster): - #print(node3.query("SELECT * FROM system.merge_tree_settings")) - print(node3.query("SELECT * FROM system.merge_tree_settings WHERE changed=1")) + assert node3.query("SELECT value FROM system.merge_tree_settings WHERE name='min_bytes_for_wide_part'") == "33\n" + assert node3.query("SELECT value FROM system.merge_tree_settings WHERE name='min_rows_for_wide_part'") == "1111\n" + assert node3.query("SELECT value FROM system.merge_tree_settings WHERE name='merge_max_block_size'") == "8888\n" + assert node3.query("SELECT value FROM system.server_settings WHERE name='background_pool_size'") == "44\n" + + zk = cluster.get_kazoo_client("zoo1") + zk.create( + path="/background_pool_size", + value=b"72", + makepath=True, + ) + + node3.replace_config( + "/etc/clickhouse-server/config.d/config_zk_include_test.xml", + """ + + + 44 + + + 1 + 1111 + + + + +""", + ) + + node3.query("SYSTEM RELOAD CONFIG") + + assert node3.query("SELECT value FROM system.server_settings WHERE name='background_pool_size'") == "72\n" From eacf33b445f6b93abda804ef2a9aed2928c82e56 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Mar 2024 13:13:29 +0100 Subject: [PATCH 145/283] Remove comments --- src/Common/Config/ConfigProcessor.cpp | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 75a973f582f..1aa41558923 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -173,12 +173,10 @@ static void deleteAttributesRecursive(Node * root) static void mergeAttributes(Element & config_element, Element & with_element) { auto * with_element_attributes = with_element.attributes(); - //std::cerr << "MERGE ATTRIBUTES:" << with_element_attributes->length() << "\n"; for (size_t i = 0; i < with_element_attributes->length(); ++i) { auto * attr = with_element_attributes->item(i); - //std::cerr << "ATTR NAME:" << attr->nodeName() << std::endl; config_element.setAttribute(attr->nodeName(), attr->getNodeValue()); } @@ -279,8 +277,6 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, for (Node * node = config_root->firstChild(); node;) { Node * next_node = node->nextSibling(); - //if (next_node) - // std::cerr << "NEXT NODE:" << next_node->nodeName() << std::endl; /// Remove text from the original config node. if (node->nodeType() == Node::TEXT_NODE && !allWhitespace(node->getNodeValue())) { @@ -288,7 +284,6 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, } else if (node->nodeType() == Node::ELEMENT_NODE) { - //LOG_DEBUG(log, "NODES IN SOURCE: {}", node->nodeName()); config_element_by_id.insert(ElementsByIdentifier::value_type(getElementIdentifier(node), node)); } node = next_node; @@ -302,8 +297,6 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, bool remove = false; if (with_node->nodeType() == Node::ELEMENT_NODE) { - //LOG_DEBUG(log, "WITH NODE: {}", with_node->nodeName()); - //std::cerr << "WITH NODE: " << with_node->nodeName() << std::endl; Element & with_element = dynamic_cast(*with_node); remove = with_element.hasAttribute("remove"); bool replace = with_element.hasAttribute("replace"); @@ -317,8 +310,6 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, if (it != config_element_by_id.end()) { Node * config_node = it->second; - //LOG_DEBUG(log, "SUBNODE NODE: {}", config_node->nodeName()); - //std::cerr << "SUBNODE NODE: " << config_node->nodeName() << std::endl; config_element_by_id.erase(it); if (remove) @@ -327,16 +318,12 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, } else if (replace) { - //LOG_DEBUG(log, "REPLACE: {}", config_node->nodeName()); - //std::cerr << "REPLACE!!!" << std::endl; with_element.removeAttribute("replace"); NodePtr new_node = config->importNode(with_node, true); config_root->replaceChild(new_node, config_node); } else { - //LOG_DEBUG(log, "SUBNODE NODE HERE: {}", config_node->nodeName()); - //std::cerr << "SUBNODE NODE HERE: " << config_node->nodeName() << std::endl; Element & config_element = dynamic_cast(*config_node); /// Remove substitution attributes from the merge target node if source node already has a value @@ -348,19 +335,11 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, mergeAttributes(config_element, with_element); mergeRecursive(config, config_node, with_node); } - //std::cerr << "DONE\n"; merged = true; } - //else - //{ - // LOG_DEBUG(log, "ELEMENT NOT FOUND"); - // //std::cerr << "ELEMENT NOT FOUND\n"; - //} } if (!merged && !remove) { - //LOG_DEBUG(log, "NOTHING HAPPENED"); - //std::cerr << "NOTHING hAPPENED\n"; /// Since we didn't find a pair to this node in default config, we will paste it as is. /// But it may have some child nodes which have attributes like "replace" or "remove". /// They are useless in preprocessed configuration. From 21ccabd603cde420b761736966a172a6312de6f9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Mar 2024 13:19:49 +0100 Subject: [PATCH 146/283] Remove redundant diff --- src/Common/Config/ConfigProcessor.cpp | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 1aa41558923..d639c243639 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -270,7 +270,6 @@ void ConfigProcessor::hideRecursive(Poco::XML::Node * config_root) void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root) { - //LOG_DEBUG(log, "WITH ROOT {}", with_root->nodeName()); const NodeListPtr with_nodes = with_root->childNodes(); using ElementsByIdentifier = std::multimap; ElementsByIdentifier config_element_by_id; @@ -436,7 +435,6 @@ void ConfigProcessor::doIncludesRecursive( auto process_include = [&](const Node * include_attr, const std::function & get_node, const char * error_msg) { const std::string & name = include_attr->getNodeValue(); - //LOG_DEBUG(log, "PROCESS INCLUDE {}", name); const Node * node_to_include = get_node(name); if (!node_to_include) { @@ -459,7 +457,6 @@ void ConfigProcessor::doIncludesRecursive( /// Replace the whole node not just contents. if (node->nodeName() == "include") { - //LOG_DEBUG(log, "Include here for node {}", name); const NodeListPtr children = node_to_include->childNodes(); Node * next_child = nullptr; @@ -467,12 +464,9 @@ void ConfigProcessor::doIncludesRecursive( { next_child = child->nextSibling(); - //LOG_DEBUG(log, "MERGEEEE {} PARENT NODE {} NODE {}", merge, node->parentNode()->nodeName(), node->nodeName()); if (merge) { - //LOG_DEBUG(log, "MERGEEEE"); NodePtr new_node = config->importNode(child->parentNode(), true); - //LOG_DEBUG(log, "CHILD {} NEW NODE NAME {}", child->nodeName(), new_node->nodeName()); mergeRecursive(config, node->parentNode(), new_node); } else @@ -791,14 +785,6 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfig(bool allow_zk_includes ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(config_xml)); - //LOG_DEBUG(log, "MIN BYTES FOR WIDE PART {}", configuration->getUInt64("merge_tree.min_bytes_for_wide_part")); - //Poco::Util::AbstractConfiguration::Keys config_keys; - //configuration->keys("merge_tree", config_keys); - //for (const String & key : config_keys) - //{ - // LOG_DEBUG(log, "CONFIG KEY {} LEFT in merge_tree before ZK", key); - //} - return LoadedConfig{configuration, has_zk_includes, /* loaded_from_preprocessed = */ false, config_xml, path}; } @@ -833,15 +819,6 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes( ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(config_xml)); - //LOG_DEBUG(log, "MIN BYTES FOR WIDE PART {} WITH ZK INCLUDE", configuration->getUInt64("merge_tree.min_bytes_for_wide_part")); - - //Poco::Util::AbstractConfiguration::Keys config_keys; - //configuration->keys("merge_tree", config_keys); - //for (const String & key : config_keys) - //{ - // LOG_DEBUG(log, "CONFIG KEY {} LEFT in merge_tree after ZK", key); - //} - return LoadedConfig{configuration, has_zk_includes, !processed_successfully, config_xml, path}; } From 62681225173b5c2044e33b83b691ca9ffb9245e5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Mar 2024 13:21:06 +0100 Subject: [PATCH 147/283] Revert unit test --- src/Common/tests/gtest_config_processor.cpp | 49 --------------------- 1 file changed, 49 deletions(-) diff --git a/src/Common/tests/gtest_config_processor.cpp b/src/Common/tests/gtest_config_processor.cpp index d5397dd54c2..f01460d515b 100644 --- a/src/Common/tests/gtest_config_processor.cpp +++ b/src/Common/tests/gtest_config_processor.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -64,51 +63,3 @@ TEST(Common, ConfigProcessorManyElements) /// More that 5 min is way too slow ASSERT_LE(enumerate_elapsed_ms, 300*1000); } - -TEST(Common, ConfigProcessorMerge) -{ - namespace fs = std::filesystem; - - auto path = fs::path("/tmp/test_config_processor/"); - - fs::remove_all(path); - fs::create_directories(path); - fs::create_directories(path / "config.d"); - //SCOPE_EXIT({ fs::remove_all(path); }); - - auto config_file = std::make_unique(path / "config.xml"); - { - DB::WriteBufferFromFile out(config_file->path()); - writeString("\n", out); - writeString("1\n", out); - writeString("\n", out); - } - DB::XMLDocumentPtr config_xml; - DB::ConfigProcessor processor(config_file->path(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false); - { - bool has_zk_includes; - config_xml = processor.processConfig(&has_zk_includes); - } - - auto small_part_file = std::make_unique(path / "config.d" / "part.xml"); - { - DB::WriteBufferFromFile out(small_part_file->path()); - writeString("33\n", out); - } - DB::XMLDocumentPtr part_xml; - - { - DB::ConfigProcessor tiny(small_part_file->path(), /* throw_on_bad_incl = */ false, /* log_to_console = */ false); - bool has_zk_includes; - part_xml = tiny.processConfig(&has_zk_includes); - } - - auto * root_node = DB::XMLUtils::getRootNode(config_xml); - auto * part_node = DB::XMLUtils::getRootNode(part_xml); - auto * new_node = config_xml->importNode(part_node, true); - auto * deep_root = root_node->getNodeByPath("merge_tree"); - processor.mergeRecursive(config_xml, deep_root, new_node); - DB::ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(config_xml)); - - ASSERT_EQ(configuration->getUInt64("merge_tree.min_bytes_for_wide_part"), 33); -} From ef1f3c1af8c09ea94004857c0582de8e0137a1b4 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Mar 2024 13:21:51 +0100 Subject: [PATCH 148/283] Revert config processor.h --- src/Common/Config/ConfigProcessor.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.h b/src/Common/Config/ConfigProcessor.h index 9201ec7b77d..5712c36d737 100644 --- a/src/Common/Config/ConfigProcessor.h +++ b/src/Common/Config/ConfigProcessor.h @@ -119,11 +119,6 @@ public: static inline const auto SUBSTITUTION_ATTRS = {"incl", "from_zk", "from_env"}; - /// If config root node name is not 'clickhouse' and merging config's root node names doesn't match, bypasses merging and returns false. - /// For compatibility root node 'yandex' considered equal to 'clickhouse'. - bool merge(XMLDocumentPtr config, XMLDocumentPtr with); - void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); - private: const std::string path; std::string preprocessed_path; @@ -150,7 +145,11 @@ private: void hideRecursive(Poco::XML::Node * config_root); XMLDocumentPtr hideElements(XMLDocumentPtr xml_tree); - //void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); + void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root); + + /// If config root node name is not 'clickhouse' and merging config's root node names doesn't match, bypasses merging and returns false. + /// For compatibility root node 'yandex' considered equal to 'clickhouse'. + bool merge(XMLDocumentPtr config, XMLDocumentPtr with); void doIncludesRecursive( XMLDocumentPtr config, From 8af9262e43435528bebd528eaed39ae103fa229c Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Mar 2024 13:26:25 +0100 Subject: [PATCH 149/283] Add docs --- docs/en/operations/configuration-files.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 9f17f4af1e8..fbac2407640 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -116,6 +116,8 @@ XML substitution example: Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node, and it will be fully inserted into the source element. +If you want to merge substitution content with existing configuration instead of append you can use attribute `merge="true`, for example: ``. In this case existing configuration will be merged with content from substituion and existing configuration settings will be replaced with values from substituion. + ## Encrypting and Hiding Configuration {#encryption} You can use symmetric encryption to encrypt a configuration element, for example, a plaintext password or private key. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encrypted_by` with the name of the encryption codec as value to the element to encrypt. From 479b58ec70ec3c2563a5cdc358bfd2674f58325d Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Mar 2024 13:28:10 +0100 Subject: [PATCH 150/283] Remove redundant changes --- src/Common/Config/ConfigProcessor.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index d639c243639..333d4d45a8f 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -452,9 +452,6 @@ void ConfigProcessor::doIncludesRecursive( } else { - - Element & element = dynamic_cast(*node); - /// Replace the whole node not just contents. if (node->nodeName() == "include") { const NodeListPtr children = node_to_include->childNodes(); @@ -789,7 +786,9 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfig(bool allow_zk_includes } ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes( - zkutil::ZooKeeperNodeCache & zk_node_cache, const zkutil::EventPtr & zk_changed_event, bool fallback_to_preprocessed) + zkutil::ZooKeeperNodeCache & zk_node_cache, + const zkutil::EventPtr & zk_changed_event, + bool fallback_to_preprocessed) { XMLDocumentPtr config_xml; bool has_zk_includes; @@ -808,11 +807,7 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes( if (!zk_exception) throw; - LOG_WARNING( - log, - "Error while processing from_zk config includes: {}. Config will be loaded from preprocessed file: {}", - zk_exception->message(), - preprocessed_path); + LOG_WARNING(log, "Error while processing from_zk config includes: {}. Config will be loaded from preprocessed file: {}", zk_exception->message(), preprocessed_path); config_xml = dom_parser.parse(preprocessed_path); } From ab8276f23c5c09b63e3537ad29043b5a4a9f13a2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Mar 2024 13:29:04 +0100 Subject: [PATCH 151/283] Buildable code --- src/Common/Config/ConfigProcessor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index 333d4d45a8f..deac1039372 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -477,6 +477,7 @@ void ConfigProcessor::doIncludesRecursive( } else { + Element & element = dynamic_cast(*node); for (const auto & attr_name : SUBSTITUTION_ATTRS) element.removeAttribute(attr_name); From 24626d431cac2e5c76ea14c5dd40afe5617ae1d0 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 13 Mar 2024 12:35:27 +0000 Subject: [PATCH 152/283] Automatic style fix --- .../test_config_substitutions/test.py | 44 ++++++++++++++++--- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_config_substitutions/test.py b/tests/integration/test_config_substitutions/test.py index 304506148b0..ac75771cb9c 100644 --- a/tests/integration/test_config_substitutions/test.py +++ b/tests/integration/test_config_substitutions/test.py @@ -13,7 +13,12 @@ node2 = cluster.add_instance( env_variables={"MAX_QUERY_SIZE": "55555"}, ) node3 = cluster.add_instance( - "node3", user_configs=["configs/config_zk.xml",], main_configs=["configs/config_zk_include_test.xml"], with_zookeeper=True + "node3", + user_configs=[ + "configs/config_zk.xml", + ], + main_configs=["configs/config_zk_include_test.xml"], + with_zookeeper=True, ) node4 = cluster.add_instance( "node4", @@ -73,7 +78,6 @@ def start_cluster(): makepath=True, ) - cluster.add_zookeeper_startup_command(create_zk_roots) cluster.start() @@ -249,11 +253,32 @@ def test_allow_databases(start_cluster): == "" ) + def test_config_multiple_zk_substitutions(start_cluster): - assert node3.query("SELECT value FROM system.merge_tree_settings WHERE name='min_bytes_for_wide_part'") == "33\n" - assert node3.query("SELECT value FROM system.merge_tree_settings WHERE name='min_rows_for_wide_part'") == "1111\n" - assert node3.query("SELECT value FROM system.merge_tree_settings WHERE name='merge_max_block_size'") == "8888\n" - assert node3.query("SELECT value FROM system.server_settings WHERE name='background_pool_size'") == "44\n" + assert ( + node3.query( + "SELECT value FROM system.merge_tree_settings WHERE name='min_bytes_for_wide_part'" + ) + == "33\n" + ) + assert ( + node3.query( + "SELECT value FROM system.merge_tree_settings WHERE name='min_rows_for_wide_part'" + ) + == "1111\n" + ) + assert ( + node3.query( + "SELECT value FROM system.merge_tree_settings WHERE name='merge_max_block_size'" + ) + == "8888\n" + ) + assert ( + node3.query( + "SELECT value FROM system.server_settings WHERE name='background_pool_size'" + ) + == "44\n" + ) zk = cluster.get_kazoo_client("zoo1") zk.create( @@ -281,4 +306,9 @@ def test_config_multiple_zk_substitutions(start_cluster): node3.query("SYSTEM RELOAD CONFIG") - assert node3.query("SELECT value FROM system.server_settings WHERE name='background_pool_size'") == "72\n" + assert ( + node3.query( + "SELECT value FROM system.server_settings WHERE name='background_pool_size'" + ) + == "72\n" + ) From c234e1bca623106f625b4789513cc6ef20d7986f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Mar 2024 13:35:37 +0100 Subject: [PATCH 153/283] Less diff --- src/Common/Config/ConfigProcessor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index deac1039372..d524db883eb 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -303,7 +303,6 @@ void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, if (remove && replace) throw Poco::Exception("both remove and replace attributes set for element <" + with_node->nodeName() + ">"); - ElementsByIdentifier::iterator it = config_element_by_id.find(getElementIdentifier(with_node)); if (it != config_element_by_id.end()) @@ -456,17 +455,17 @@ void ConfigProcessor::doIncludesRecursive( { const NodeListPtr children = node_to_include->childNodes(); Node * next_child = nullptr; - for (Node * child = children->item(0); child; child = next_child) { next_child = child->nextSibling(); + /// Recursively replace existing nodes in merge mode if (merge) { NodePtr new_node = config->importNode(child->parentNode(), true); mergeRecursive(config, node->parentNode(), new_node); } - else + else /// Append to existing node by default { NodePtr new_node = config->importNode(child, true); node->parentNode()->insertBefore(new_node, node); @@ -478,6 +477,7 @@ void ConfigProcessor::doIncludesRecursive( else { Element & element = dynamic_cast(*node); + for (const auto & attr_name : SUBSTITUTION_ATTRS) element.removeAttribute(attr_name); From 457578627103b0eb0028f51b739c3911de278bf4 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Mar 2024 12:38:17 +0000 Subject: [PATCH 154/283] Fix 01952_optimize_distributed_group_by_sharding_key with analyzer. --- ...istributed_group_by_sharding_key.reference | 78 +++++++++++++++++++ ...mize_distributed_group_by_sharding_key.sql | 14 ++++ 2 files changed, 92 insertions(+) diff --git a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference index ddfa6929d69..212dd348edb 100644 --- a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference +++ b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.reference @@ -71,3 +71,81 @@ Expression (Projection) Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY))))) ReadFromSystemNumbers ReadFromRemote (Read from remote replica) +set allow_experimental_analyzer = 1; +explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- not optimized +Expression (Project names) + Distinct (DISTINCT) + Union + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromSystemNumbers + ReadFromRemote (Read from remote replica) +explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- optimized +Union + Expression (Project names) + Distinct (DISTINCT) + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromSystemNumbers + ReadFromRemote (Read from remote replica) +explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- not optimized +Expression (Project names) + LimitBy + Union + Expression (Before LIMIT BY) + LimitBy + Expression ((Before LIMIT BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers)))))))) + ReadFromSystemNumbers + Expression + ReadFromRemote (Read from remote replica) +explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- optimized +Union + Expression (Project names) + LimitBy + Expression ((Before LIMIT BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers)))))))) + ReadFromSystemNumbers + ReadFromRemote (Read from remote replica) +explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized +Expression (Project names) + Distinct (DISTINCT) + Sorting (Merge sorted streams for ORDER BY, without aggregation) + Union + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromSystemNumbers + ReadFromRemote (Read from remote replica) +explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized +Expression (Project names) + Sorting (Merge sorted streams after aggregation stage for ORDER BY) + Union + Distinct (DISTINCT) + Sorting (Sorting for ORDER BY) + Expression (Before ORDER BY) + Distinct (Preliminary DISTINCT) + Expression ((Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))) + ReadFromSystemNumbers + ReadFromRemote (Read from remote replica) +explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized +Expression (Project names) + LimitBy + Expression (Before LIMIT BY) + Sorting (Merge sorted streams for ORDER BY, without aggregation) + Union + LimitBy + Expression ((Before LIMIT BY + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))) [lifted up part])) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers)))))))) + ReadFromSystemNumbers + ReadFromRemote (Read from remote replica) +explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized +Expression (Project names) + Sorting (Merge sorted streams after aggregation stage for ORDER BY) + Union + LimitBy + Expression ((Before LIMIT BY + (Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers))))))) [lifted up part])) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + (Change column names to column identifiers + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Project names + (Projection + Change column names to column identifiers)))))))) + ReadFromSystemNumbers + ReadFromRemote (Read from remote replica) diff --git a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql index 74b55b95315..adf55a9dd7f 100644 --- a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql +++ b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql @@ -4,6 +4,8 @@ set optimize_skip_unused_shards=1; set optimize_distributed_group_by_sharding_key=1; set prefer_localhost_replica=1; +set allow_experimental_analyzer = 0; + -- { echo } explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- not optimized explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- optimized @@ -14,3 +16,15 @@ explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized + +set allow_experimental_analyzer = 1; + +explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- not optimized +explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- optimized +explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- not optimized +explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- optimized + +explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized +explain select distinct k1, k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized +explain select distinct on (k1) k2 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- not optimized +explain select distinct on (k1, k2) v from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)) order by v; -- optimized From ffdb84b8df7120292543da077227ef049552dccf Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 13 Mar 2024 13:38:23 +0100 Subject: [PATCH 155/283] Fix style --- src/Storages/StorageMerge.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index fab4b2e5146..e695594873b 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1063,7 +1063,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( get_column_options.withSubcolumns(); LOG_DEBUG(&Poco::Logger::get("createSources"), "Processed:{}\nStorage:{}", toString(processed_stage), toString(storage_stage)); - + String table_alias; if (allow_experimental_analyzer) table_alias = modified_query_info.query_tree->as()->getJoinTree()->as()->getAlias(); From 4954bde599dd1bdcdf56957e17b0b9a661aa17f6 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 13 Mar 2024 13:38:35 +0100 Subject: [PATCH 156/283] Update docs/en/sql-reference/functions/string-functions.md Co-authored-by: Johnny <9611008+johnnymatthews@users.noreply.github.com> --- docs/en/sql-reference/functions/string-functions.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index f9c3f91a12b..a9b7cc9566d 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -615,7 +615,9 @@ Assumes that the string contains valid UTF-8 encoded text. If this assumption is **Example** ```sql -SELECT 'database' AS string, substringUTF8(string, 5), substringUTF8(string, 5, 1) +SELECT 'Täglich grüßt das Murmeltier.' AS str, + substringUTF8(str, 9), + substringUTF8(str, 9, 5) ``` ```response From b4953f35b4a8ca3eca816557c080ff612062b482 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 13 Mar 2024 13:39:03 +0100 Subject: [PATCH 157/283] Update docs/en/sql-reference/functions/string-functions.md Co-authored-by: Johnny <9611008+johnnymatthews@users.noreply.github.com> --- docs/en/sql-reference/functions/string-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index a9b7cc9566d..25a0c7e38d8 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -686,7 +686,7 @@ Assumes that the string contains valid UTF-8 encoded text. If this assumption is **Example** ```sql -SELECT substringIndexUTF8('www.clickhouse.com', '.', 2) +SELECT substringIndexUTF8('www.straßen-in-europa.de', '.', 2) ``` ```response From 6ca4fc26f4bca8f787b3a575d5496ffd75ee0c55 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 13 Mar 2024 13:39:14 +0100 Subject: [PATCH 158/283] Update docs/en/sql-reference/functions/string-functions.md Co-authored-by: Johnny <9611008+johnnymatthews@users.noreply.github.com> --- docs/en/sql-reference/functions/string-functions.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 25a0c7e38d8..01a583e1713 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -621,9 +621,7 @@ SELECT 'Täglich grüßt das Murmeltier.' AS str, ``` ```response -┌─string───┬─substringUTF8('database', 5)─┬─substringUTF8('database', 5, 1)─┐ -│ database │ base │ b │ -└──────────┴──────────────────────────────┴─────────────────────────────────┘ +Täglich grüßt das Murmeltier. grüßt das Murmeltier. grüßt ``` ## substringIndex From 1e536251a20a0fdbac08b0a99e420a8e74886bcd Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 13 Mar 2024 13:41:56 +0100 Subject: [PATCH 159/283] Update string-functions.md --- docs/en/sql-reference/functions/string-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 01a583e1713..b4e2adbed3c 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -688,7 +688,7 @@ SELECT substringIndexUTF8('www.straßen-in-europa.de', '.', 2) ``` ```response -www.clickhouse +www.straßen-in-europa ``` ## appendTrailingCharIfAbsent From e5e632ec3362d2106adca2e02ae2a4ea1862ee3c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Mar 2024 12:43:10 +0000 Subject: [PATCH 160/283] Update analyzer_tech_debt.txt --- tests/analyzer_tech_debt.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index dbd216ea7be..42aa579658e 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -9,7 +9,6 @@ 01747_join_view_filter_dictionary 01761_cast_to_enum_nullable 01925_join_materialized_columns -01952_optimize_distributed_group_by_sharding_key 02354_annoy # Check after constants refactoring 02901_parallel_replicas_rollup From cca96e05cf7be69f53a479db13414824552b7ca0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 11 Mar 2024 17:57:24 +0100 Subject: [PATCH 161/283] Bring clickhouse-test changes from private --- tests/clickhouse-test | 376 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 345 insertions(+), 31 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index ce0feadf050..057502379ed 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -4,9 +4,11 @@ # pylint: disable=global-variable-not-assigned # pylint: disable=too-many-lines # pylint: disable=anomalous-backslash-in-string +# pylint: disable=protected-access import copy import enum +import tempfile import glob # Not requests, to avoid requiring extra dependency. @@ -68,6 +70,144 @@ TEST_FILE_EXTENSIONS = [".sql", ".sql.j2", ".sh", ".py", ".expect"] VERSION_PATTERN = r"^((\d+\.)?(\d+\.)?(\d+\.)?\d+)$" +class SharedEngineReplacer: + ENGINES_NON_REPLICATED_REGEXP = r"[ =]((Collapsing|VersionedCollapsing|Summing|Replacing|Aggregating|)MergeTree\(?\)?)" + ENGINES_MAPPING_REPLICATED = [ + ("ReplicatedMergeTree", "SharedMergeTree"), + ("ReplicatedCollapsingMergeTree", "SharedCollapsingMergeTree"), + ( + "ReplicatedVersionedCollapsingMergeTree", + "SharedVersionedCollapsingMergeTree", + ), + ("ReplicatedSummingMergeTree", "SharedSummingMergeTree"), + ("ReplicatedReplacingMergeTree", "SharedReplacingMergeTree"), + ("ReplicatedAggregatingMergeTree", "SharedAggregatingMergeTree"), + ] + NEW_SYNTAX_REPLICATED_MERGE_TREE_RE = ( + r"Replicated[a-zA-Z]*MergeTree\((\\?'.*\\?')?,?(\\?'.*\\?')?[a-zA-Z, _}{]*\)" + ) + OLD_SYNTAX_OR_ARGUMENTS_RE = r"Tree\(.*[0-9]+.*\)" + + def _check_replicad_new_syntax(self, line): + return re.search(self.NEW_SYNTAX_REPLICATED_MERGE_TREE_RE, line) is not None + + def _check_old_syntax_or_arguments(self, line): + return re.search(self.OLD_SYNTAX_OR_ARGUMENTS_RE, line) is not None + + @staticmethod + def _is_comment_line(line): + return line.startswith("SELECT") or line.startswith("select") + + @staticmethod + def _is_create_query(line): + return ( + line.startswith("CREATE") + or line.startswith("create") + or line.startswith("ENGINE") + or line.startswith("engine") + ) + + def _replace_non_replicated(self, line, escape_quotes, use_random_path): + groups = re.search(self.ENGINES_NON_REPLICATED_REGEXP, line) + if groups is not None and not self._check_old_syntax_or_arguments(line): + non_replicated_engine = groups.groups()[0] + basename_no_ext = os.path.splitext(os.path.basename(self.file_name))[0] + if use_random_path: + shared_path = "/" + os.path.join( + basename_no_ext.replace("_", "/"), + str(os.getpid()), + str(random.randint(1, 1000)), + ) + else: + shared_path = "/" + os.path.join( + basename_no_ext.replace("_", "/"), str(os.getpid()) + ) + + if escape_quotes: + shared_engine = ( + "Shared" + + non_replicated_engine.replace("()", "") + + f"(\\'{shared_path}\\', \\'1\\')" + ) + else: + shared_engine = ( + "Shared" + + non_replicated_engine.replace("()", "") + + f"('{shared_path}', '1')" + ) + return line.replace(non_replicated_engine, shared_engine) + + return line + + def _need_to_replace_something(self): + return ( + self.replace_replicated or self.replace_non_replicated + ) and "shared_merge_tree" not in self.file_name + + def _has_show_create_table(self): + with open(self.file_name, "r", encoding="utf-8") as f: + return re.search("show create table", f.read(), re.IGNORECASE) + + def __init__( + self, file_name, replace_replicated, replace_non_replicated, reference_file + ): + self.file_name = file_name + self.temp_file_path = get_temp_file_path() + self.replace_replicated = replace_replicated + self.replace_non_replicated = replace_non_replicated + + use_random_path = not reference_file and not self._has_show_create_table() + + if not self._need_to_replace_something(): + return + + shutil.copyfile(self.file_name, self.temp_file_path) + shutil.copymode(self.file_name, self.temp_file_path) + + with open(self.file_name, "w", newline="", encoding="utf-8") as modified: + with open(self.temp_file_path, "r", newline="", encoding="utf-8") as source: + for line in source: + if self._is_comment_line(line) or ( + reference_file and not self._is_create_query(line) + ): + modified.write(line) + continue + + if self.replace_replicated: + for ( + engine_from, + engine_to, + ) in SharedEngineReplacer.ENGINES_MAPPING_REPLICATED: + if engine_from in line and ( + self._check_replicad_new_syntax(line) + or engine_from + " " in line + or engine_from + ";" in line + ): + line = line.replace(engine_from, engine_to) + break + + if self.replace_non_replicated: + line = self._replace_non_replicated( + line, reference_file, use_random_path + ) + + modified.write(line) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, exc_tb): + if not self._need_to_replace_something(): + return + shutil.move(self.temp_file_path, self.file_name) + + +def get_temp_file_path(): + return os.path.join( + tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()) + ) + + def stringhash(s: str) -> int: # default hash() function consistent # only during process invocation https://stackoverflow.com/a/42089311 @@ -92,6 +232,16 @@ def trim_for_log(s): return "\n".join(lines) +def is_valid_utf_8(fname): + try: + with open(fname, "rb") as f: + contents = f.read() + contents.decode("utf-8") + return True + except UnicodeDecodeError: + return False + + class TestException(Exception): pass @@ -536,6 +686,8 @@ class FailureReason(enum.Enum): INTERNAL_QUERY_FAIL = "Internal query (CREATE/DROP DATABASE) failed:" # SKIPPED reasons + NOT_SUPPORTED_IN_CLOUD = "not supported in cloud environment" + NOT_SUPPORTED_IN_PRIVATE = "not supported in private build" DISABLED = "disabled" SKIP = "skip" NO_JINJA = "no jinja" @@ -548,6 +700,7 @@ class FailureReason(enum.Enum): S3_STORAGE = "s3-storage" BUILD = "not running for current build" NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas" + SHARED_MERGE_TREE = "no-shared-merge-tree" # UNKNOWN reasons NO_REFERENCE = "no reference file" @@ -606,8 +759,6 @@ class SettingsRandomizer: "read_in_order_two_level_merge_threshold": lambda: random.randint(0, 100), "optimize_aggregation_in_order": lambda: random.randint(0, 1), "aggregation_in_order_max_block_bytes": lambda: random.randint(0, 50000000), - "min_compress_block_size": lambda: random.randint(1, 1048576 * 3), - "max_compress_block_size": lambda: random.randint(1, 1048576 * 3), "use_uncompressed_cache": lambda: random.randint(0, 1), "min_bytes_to_use_direct_io": threshold_generator( 0.2, 0.5, 1, 10 * 1024 * 1024 * 1024 @@ -659,6 +810,11 @@ class SettingsRandomizer: 0.3, 0.5, 1, 10 * 1024 * 1024 * 1024 ), "max_bytes_before_remerge_sort": lambda: random.randint(1, 3000000000), + "min_compress_block_size": lambda: random.randint(1, 1048576 * 3), + "max_compress_block_size": lambda: random.randint(1, 1048576 * 3), + "merge_tree_compact_parts_min_granules_to_multibuffer_read": lambda: random.randint( + 1, 128 + ), "optimize_sorting_by_input_stream_properties": lambda: random.randint(0, 1), "http_response_buffer_size": lambda: random.randint(0, 10 * 1048576), "http_wait_end_of_query": lambda: random.random() > 0.5, @@ -684,6 +840,7 @@ class SettingsRandomizer: get_localzone(), ] ), + "prefer_warmed_unmerged_parts_seconds": lambda: random.randint(0, 10), "use_page_cache_for_disks_without_file_cache": lambda: random.random() < 0.7, "page_cache_inject_eviction": lambda: random.random() < 0.5, } @@ -733,6 +890,17 @@ class MergeTreeSettingsRandomizer: "primary_key_compress_block_size": lambda: random.randint(8000, 100000), "replace_long_file_name_to_hash": lambda: random.randint(0, 1), "max_file_name_length": threshold_generator(0.3, 0.3, 0, 128), + "min_bytes_for_full_part_storage": threshold_generator( + 0.3, 0.3, 0, 512 * 1024 * 1024 + ), + "compact_parts_max_bytes_to_buffer": lambda: random.randint( + 1024, 512 * 1024 * 1024 + ), + "compact_parts_max_granules_to_buffer": threshold_generator(0.15, 0.15, 1, 256), + "compact_parts_merge_max_bytes_to_prefetch_part": lambda: random.randint( + 1, 32 * 1024 * 1024 + ), + "cache_populated_by_fetch": lambda: random.randint(0, 1), } @staticmethod @@ -744,6 +912,10 @@ class MergeTreeSettingsRandomizer: return random_settings +def replace_in_file(filename, what, with_what): + os.system(f"LC_ALL=C sed -i -e 's|{what}|{with_what}|g' {filename}") + + class TestResult: def __init__( self, @@ -972,6 +1144,15 @@ class TestCase: if tags and ("disabled" in tags) and not args.disabled: return FailureReason.DISABLED + elif args.private and self.name in suite.private_skip_list: + return FailureReason.NOT_SUPPORTED_IN_PRIVATE + + elif args.cloud and ("no-replicated-database" in tags): + return FailureReason.REPLICATED_DB + + elif args.cloud and self.name in suite.cloud_skip_list: + return FailureReason.NOT_SUPPORTED_IN_CLOUD + elif ( os.path.exists(os.path.join(suite.suite_path, self.name) + ".disabled") and not args.disabled @@ -1022,6 +1203,13 @@ class TestCase: ): return FailureReason.NON_ATOMIC_DB + elif ( + tags + and ("no-shared-merge-tree" in tags) + and args.replace_replicated_with_shared + ): + return FailureReason.SHARED_MERGE_TREE + elif tags and ("no-s3-storage" in tags) and args.s3_storage: return FailureReason.S3_STORAGE elif ( @@ -1051,7 +1239,8 @@ class TestCase: ): description = "" - debug_log = trim_for_log(debug_log) + if debug_log: + debug_log = "\n".join(debug_log.splitlines()[:100]) if proc: if proc.returncode is None: @@ -1136,6 +1325,7 @@ class TestCase: description += "\nstdout:\n" description += trim_for_log(stdout) description += "\n" + if debug_log: description += "\n" description += debug_log @@ -1148,9 +1338,7 @@ class TestCase: ) if "Exception" in stdout: - description += "\n" - description += trim_for_log(stdout) - description += "\n" + description += "\n{}\n".format("\n".join(stdout.splitlines()[:100])) if debug_log: description += "\n" description += debug_log @@ -1358,7 +1546,13 @@ class TestCase: # because there are also output of per test database creation pattern = "{test} > {stdout} 2> {stderr}" - if self.ext == ".sql": + if self.ext == ".sql" and args.cloud: + # Get at least some logs, because we don't have access to system.text_log and pods... + pattern = ( + "{client} --send_logs_level={logs_level} {secure} --multiquery {options}" + " --send_logs_level=trace < {test} > {stdout} 2>> /test_output/some_logs_from_server.log" + ) + elif self.ext == ".sql" and not args.cloud: pattern = ( "{client} --send_logs_level={logs_level} {secure} --multiquery {options} < " + pattern @@ -1396,17 +1590,15 @@ class TestCase: total_time = (datetime.now() - start_time).total_seconds() # Normalize randomized database names in stdout, stderr files. - os.system(f"LC_ALL=C sed -i -e 's/{database}/default/g' {self.stdout_file}") + replace_in_file(self.stdout_file, database, "default") if args.hide_db_name: - os.system(f"LC_ALL=C sed -i -e 's/{database}/default/g' {self.stderr_file}") + replace_in_file(self.stderr_file, database, "default") if args.replicated_database: - os.system(f"LC_ALL=C sed -i -e 's|/auto_{{shard}}||g' {self.stdout_file}") - os.system(f"LC_ALL=C sed -i -e 's|auto_{{replica}}||g' {self.stdout_file}") + replace_in_file(self.stdout_file, "/auto_{shard}", "") + replace_in_file(self.stdout_file, "auto_{replica}", "") # Normalize hostname in stdout file. - os.system( - f"LC_ALL=C sed -i -e 's/{socket.gethostname()}/localhost/g' {self.stdout_file}" - ) + replace_in_file(self.stdout_file, socket.gethostname(), "localhost") stdout = "" if os.path.exists(self.stdout_file): @@ -1444,18 +1636,51 @@ class TestCase: self.testcase_args = self.configure_testcase_args( args, self.case_file, suite.suite_tmp_path ) + client_options = self.add_random_settings(client_options) - proc, stdout, stderr, debug_log, total_time = self.run_single_test( - server_logs_level, client_options - ) - result = self.process_result_impl( - proc, stdout, stderr, debug_log, total_time - ) - result.check_if_need_retry(args, stdout, stderr, self.runs_count) - # to avoid breaking CSV parser - result.description = result.description.replace("\0", "") + if not is_valid_utf_8(self.case_file) or not is_valid_utf_8( + self.reference_file + ): + proc, stdout, stderr, debug_log, total_time = self.run_single_test( + server_logs_level, client_options + ) + result = self.process_result_impl( + proc, stdout, stderr, debug_log, total_time + ) + result.check_if_need_retry(args, stdout, stderr, self.runs_count) + # to avoid breaking CSV parser + result.description = result.description.replace("\0", "") + else: + with SharedEngineReplacer( + self.case_file, + args.replace_replicated_with_shared, + args.replace_non_replicated_with_shared, + False, + ): + with SharedEngineReplacer( + self.reference_file, + args.replace_replicated_with_shared, + args.replace_non_replicated_with_shared, + True, + ): + ( + proc, + stdout, + stderr, + debug_log, + total_time, + ) = self.run_single_test(server_logs_level, client_options) + + result = self.process_result_impl( + proc, stdout, stderr, debug_log, total_time + ) + result.check_if_need_retry( + args, stdout, stderr, self.runs_count + ) + # to avoid breaking CSV parser + result.description = result.description.replace("\0", "") if result.status == TestStatus.FAIL: result.description = self.add_info_about_settings(result.description) @@ -1688,6 +1913,8 @@ class TestSuite: self.suite_path: str = suite_path self.suite_tmp_path: str = suite_tmp_path self.suite: str = suite + self.cloud_skip_list: List[str] = [] + self.private_skip_list: List[str] = [] if args.run_by_hash_num is not None and args.run_by_hash_total is not None: if args.run_by_hash_num > args.run_by_hash_total: @@ -1987,10 +2214,16 @@ def check_server_started(args): sys.stdout.flush() retry_count = args.server_check_retries + query = "SELECT version(), arrayStringConcat(groupArray(value), ' ') FROM system.build_options WHERE name IN ('GIT_HASH', 'GIT_BRANCH')" while retry_count > 0: try: - clickhouse_execute(args, "SELECT 1", max_http_retries=1) + res = ( + str(clickhouse_execute(args, query).decode()) + .strip() + .replace("\t", " @ ") + ) print(" OK") + print(f"Connected to server {res}") sys.stdout.flush() return True except (ConnectionError, http.client.ImproperConnectionState) as e: @@ -2412,6 +2645,23 @@ def reportLogStats(args): print("\n") +def try_get_skip_list(base_dir, name): + test_names_to_skip = [] + skip_list_path = os.path.join(base_dir, name) + if not os.path.exists(skip_list_path): + return test_names_to_skip + + with open(skip_list_path, "r", encoding="utf-8") as fd: + for line in fd.read().split("\n"): + if line == "" or line[0] == " ": + continue + test_name = line.split()[0].strip() + if test_name != "": + test_names_to_skip.append(test_name) + + return test_names_to_skip + + def main(args): global server_died global stop_time @@ -2430,18 +2680,18 @@ def main(args): args.build_flags = collect_build_flags(args) args.changed_merge_tree_settings = collect_changed_merge_tree_settings(args) - args.suppport_system_processes_is_all_data_sent = check_table_column( - args, "system", "processes", "is_all_data_sent" - ) - if args.s3_storage and ( - BuildFlags.THREAD in args.build_flags or BuildFlags.DEBUG in args.build_flags - ): + if args.s3_storage and (BuildFlags.RELEASE not in args.build_flags): args.no_random_settings = True if args.skip: args.skip = set(args.skip) + if args.replace_replicated_with_shared: + if not args.skip: + args.skip = set([]) + args.skip = set(args.skip) + base_dir = os.path.abspath(args.queries) # Keep same default values as in queries/shell_config.sh @@ -2516,6 +2766,8 @@ def main(args): ) total_tests_run = 0 + cloud_skip_list = try_get_skip_list(base_dir, "../queries-no-cloud-tests.txt") + private_skip_list = try_get_skip_list(base_dir, "../queries-no-private-tests.txt") for suite in sorted(os.listdir(base_dir), key=suite_key_func): if server_died.is_set(): @@ -2525,6 +2777,8 @@ def main(args): if test_suite is None: continue + test_suite.cloud_skip_list = cloud_skip_list + test_suite.private_skip_list = private_skip_list total_tests_run += do_run_tests(args.jobs, test_suite, args.parallel) if server_died.is_set(): @@ -2644,7 +2898,14 @@ def find_clickhouse_command(binary, command): def get_additional_client_options(args): if args.client_option: - return " ".join("--" + option for option in args.client_option) + client_options = " ".join("--" + option for option in args.client_option) + if "CLICKHOUSE_CLIENT_OPT" in os.environ: + return os.environ["CLICKHOUSE_CLIENT_OPT"] + client_options + else: + return client_options + else: + if "CLICKHOUSE_CLIENT_OPT" in os.environ: + return os.environ["CLICKHOUSE_CLIENT_OPT"] return "" @@ -2839,6 +3100,43 @@ def parse_args(): help="Display $ characters after line with trailing whitespaces in diff output", ) + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument( + "--cloud", + action="store_true", + default=None, + dest="cloud", + help="Run only tests that are supported in ClickHouse Cloud environment", + ) + + group.add_argument( + "--no-cloud", + action="store_false", + default=None, + dest="cloud", + help="Run all the tests, including the ones not supported in ClickHouse Cloud environment", + ) + parser.set_defaults(cloud=False) + + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument( + "--private", + action="store_true", + default=None, + dest="private", + help="Run only tests that are supported in the private build", + ) + + group.add_argument( + "--no-private", + action="store_false", + default=None, + dest="private", + help="Run all the tests, including the ones not supported in the private build", + ) + # Only used to skip tests via "../queries-no-private-tests.txt", so it's fine to keep it enabled by default + parser.set_defaults(private=True) + group = parser.add_mutually_exclusive_group(required=False) group.add_argument( "--zookeeper", @@ -2920,6 +3218,18 @@ def parse_args(): default=False, help="Do not include tests that are not supported with parallel replicas feature", ) + parser.add_argument( + "--replace-replicated-with-shared", + action="store_true", + default=os.environ.get("USE_META_IN_KEEPER_FOR_MERGE_TREE", False), + help="Replace ReplicatedMergeTree engine with SharedMergeTree", + ) + parser.add_argument( + "--replace-non-replicated-with-shared", + action="store_true", + default=False, + help="Replace ordinary MergeTree engine with SharedMergeTree", + ) return parser.parse_args() @@ -3062,6 +3372,7 @@ if __name__ == "__main__": client_options_query_str = get_additional_client_options_url(args) args.client_options_query_str = client_options_query_str + "&" + args.client_options_query_str += os.environ["CLICKHOUSE_URL_PARAMS"] os.environ["CLICKHOUSE_URL_PARAMS"] += client_options_query_str else: args.client_options_query_str = "" @@ -3072,4 +3383,7 @@ if __name__ == "__main__": if args.db_engine and args.db_engine == "Ordinary": MESSAGES_TO_RETRY.append(" locking attempt on ") + if args.replace_replicated_with_shared: + args.s3_storage = True + main(args) From 80723134d6957f72606a419040c8101ec60c05e9 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 13 Mar 2024 14:00:57 +0100 Subject: [PATCH 162/283] Fix fast test #ci_set_analyzer --- src/Storages/StorageMerge.cpp | 89 +++++++++++++++++++++++------------ 1 file changed, 58 insertions(+), 31 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index e695594873b..f0b9d58f3dd 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1058,47 +1058,74 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( Block pipe_header = builder->getHeader(); - auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); - if (storage_snapshot_->storage.supportsSubcolumns()) - get_column_options.withSubcolumns(); - LOG_DEBUG(&Poco::Logger::get("createSources"), "Processed:{}\nStorage:{}", toString(processed_stage), toString(storage_stage)); - String table_alias; if (allow_experimental_analyzer) - table_alias = modified_query_info.query_tree->as()->getJoinTree()->as()->getAlias(); - - String database_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_database" : table_alias + "._database"; - String table_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_table" : table_alias + "._table"; - - if (has_database_virtual_column && common_header.has(database_column) && (storage_stage == QueryProcessingStage::FetchColumns || dynamic_cast(&storage_snapshot_->storage) != nullptr)) { - ColumnWithTypeAndName column; - column.name = database_column; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(database_name)); + String table_alias = modified_query_info.query_tree->as()->getJoinTree()->as()->getAlias(); - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); + String database_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_database" : table_alias + "._database"; + String table_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_table" : table_alias + "._table"; - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); + if (has_database_virtual_column && common_header.has(database_column) && (storage_stage == QueryProcessingStage::FetchColumns || dynamic_cast(&storage_snapshot_->storage) != nullptr)) + { + ColumnWithTypeAndName column; + column.name = database_column; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(database_name)); + + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto adding_column_actions = std::make_shared( + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); + + builder->addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, adding_column_actions); }); + } + + if (has_table_virtual_column && common_header.has(table_column) && (storage_stage == QueryProcessingStage::FetchColumns || dynamic_cast(&storage_snapshot_->storage) != nullptr)) + { + ColumnWithTypeAndName column; + column.name = table_column; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(table_name)); + + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto adding_column_actions = std::make_shared( + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); + + builder->addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, adding_column_actions); }); + } } - - if (has_table_virtual_column && common_header.has(table_column) && (storage_stage == QueryProcessingStage::FetchColumns || dynamic_cast(&storage_snapshot_->storage) != nullptr)) + else { - ColumnWithTypeAndName column; - column.name = table_column; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(table_name)); + if (has_database_virtual_column && common_header.has("_database") && !pipe_header.has("_database")) + { + ColumnWithTypeAndName column; + column.name = "_database"; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(database_name)); - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto adding_column_actions = std::make_shared( + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); + builder->addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, adding_column_actions); }); + } - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); + if (has_table_virtual_column && common_header.has("_table") && !pipe_header.has("_table")) + { + ColumnWithTypeAndName column; + column.name = "_table"; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(table_name)); + + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto adding_column_actions = std::make_shared( + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); + builder->addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, adding_column_actions); }); + } } /// Subordinary tables could have different but convertible types, like numeric types of different width. From d8c5008280aaf19bd481d436099afd89019a81c4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 13 Mar 2024 13:07:14 +0000 Subject: [PATCH 163/283] Follow up to #61258 --- .../functions/other-functions.md | 6 ++--- src/Functions/sleep.h | 25 ++++--------------- 2 files changed, 8 insertions(+), 23 deletions(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 288905c83da..e7fca31483a 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -298,7 +298,7 @@ Full columns and constants are represented differently in memory. Functions usua Accepts any arguments, including `NULL` and does nothing. Always returns 0. The argument is internally still evaluated. Useful e.g. for benchmarks. -## sleep(seconds) +## sleep Used to introduce a delay or pause in the execution of a query. It is primarily used for testing and debugging purposes. @@ -310,7 +310,7 @@ sleep(seconds) **Arguments** -- `seconds`: [Int](../../sql-reference/data-types/int-uint.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. +- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. **Returned value** @@ -360,7 +360,7 @@ sleepEachRow(seconds) **Arguments** -- `seconds`: [Int](../../sql-reference/data-types/int-uint.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. +- `seconds`: [UInt*](../../sql-reference/data-types/int-uint.md) or [Float*](../../sql-reference/data-types/float.md) The number of seconds to pause the query execution for each row in the result set to a maximum of 3 seconds. It can be a floating-point value to specify fractional seconds. **Returned value** diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index 73d58ca6b5b..84f08dd5440 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -62,32 +62,17 @@ public: { } - /// Get the name of the function. - String getName() const override - { - return name; - } - - /// Do not sleep during query analysis. - bool isSuitableForConstantFolding() const override - { - return false; - } - - size_t getNumberOfArguments() const override - { - return 1; - } - + String getName() const override { return name; } + bool isSuitableForConstantFolding() const override { return false; } /// Do not sleep during query analysis. + size_t getNumberOfArguments() const override { return 1; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { WhichDataType which(arguments[0]); - if (!which.isFloat() - && !which.isNativeUInt()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}, expected Float64", + if (!which.isFloat() && !which.isNativeUInt()) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}, expected UInt* or Float*", arguments[0]->getName(), getName()); return std::make_shared(); From e6af636a549f808730c87ab69a6b76531d3dbc95 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 13 Mar 2024 14:07:49 +0100 Subject: [PATCH 164/283] fix data race in poco tcp server --- base/poco/Net/src/TCPServerDispatcher.cpp | 4 +++- src/Common/tests/gtest_connection_pool.cpp | 11 +++-------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/base/poco/Net/src/TCPServerDispatcher.cpp b/base/poco/Net/src/TCPServerDispatcher.cpp index 20a1ffe1b4f..7f9f9a20ee7 100644 --- a/base/poco/Net/src/TCPServerDispatcher.cpp +++ b/base/poco/Net/src/TCPServerDispatcher.cpp @@ -93,7 +93,7 @@ void TCPServerDispatcher::release() void TCPServerDispatcher::run() { - AutoPtr guard(this, true); // ensure object stays alive + AutoPtr guard(this); // ensure object stays alive int idleTime = (int) _pParams->getThreadIdleTime().totalMilliseconds(); @@ -149,11 +149,13 @@ void TCPServerDispatcher::enqueue(const StreamSocket& socket) { try { + this->duplicate(); _threadPool.startWithPriority(_pParams->getThreadPriority(), *this, threadName); ++_currentThreads; } catch (Poco::Exception& exc) { + this->release(); ++_refusedConnections; std::cerr << "Got exception while starting thread for connection. Error code: " << exc.code() << ", message: '" << exc.displayText() << "'" << std::endl; diff --git a/src/Common/tests/gtest_connection_pool.cpp b/src/Common/tests/gtest_connection_pool.cpp index c271cc0e2ec..dcc3c11fd52 100644 --- a/src/Common/tests/gtest_connection_pool.cpp +++ b/src/Common/tests/gtest_connection_pool.cpp @@ -123,17 +123,15 @@ protected: std::string getServerUrl() const { - return "http://" + server_data.socket->address().toString(); + return "http://" + server_data.server->socket().address().toString(); } void startServer() { server_data.reset(); - server_data.params = new Poco::Net::HTTPServerParams(); - server_data.socket = std::make_unique(server_data.port); server_data.handler_factory = new HTTPRequestHandlerFactory(slowdown_receive); server_data.server = std::make_unique( - server_data.handler_factory, *server_data.socket, server_data.params); + server_data.handler_factory, server_data.port); server_data.server->start(); } @@ -155,8 +153,7 @@ protected: { // just some port to avoid collisions with others tests UInt16 port = 9871; - Poco::Net::HTTPServerParams::Ptr params; - std::unique_ptr socket; + HTTPRequestHandlerFactory::Ptr handler_factory; std::unique_ptr server; @@ -171,8 +168,6 @@ protected: server = nullptr; handler_factory = nullptr; - socket = nullptr; - params = nullptr; } ~ServerData() { From 9b1d8a2f52f2dd67a242eef0451b127c86066be5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Mar 2024 14:15:34 +0100 Subject: [PATCH 165/283] fix style --- docs/en/operations/configuration-files.md | 2 +- programs/server/Server.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index fbac2407640..6b2dd309623 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -116,7 +116,7 @@ XML substitution example: Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node, and it will be fully inserted into the source element. -If you want to merge substitution content with existing configuration instead of append you can use attribute `merge="true`, for example: ``. In this case existing configuration will be merged with content from substituion and existing configuration settings will be replaced with values from substituion. +If you want to merge substitution content with existing configuration instead of append you can use attribute `merge="true`, for example: ``. In this case existing configuration will be merged with content from substitution and existing configuration settings will be replaced with values from substituion. ## Encrypting and Hiding Configuration {#encryption} diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 9d7dd02a235..3be34341173 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -910,7 +910,7 @@ try /// We need to reload server settings because config could be updated via zookeeper. server_settings.loadSettingsFromConfig(config()); - /// NOTE: Do sanity checks after we loaded all possible substituions from ZK + /// NOTE: Do sanity checks after we loaded all possible substitutions from ZK sanityChecks(*this); #if defined(OS_LINUX) From fefee44540bd029eb2d354706f61a1d96ed0e272 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 13 Mar 2024 14:27:47 +0100 Subject: [PATCH 166/283] Update settings changes history --- src/Core/SettingsChangesHistory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index e680c02671a..d7b0669f64f 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -93,6 +93,7 @@ static std::map sett {"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"}, {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."}, {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, + {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"}, }}, {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, From 9bb71291d50d29dd0c401580402adc12290224bb Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 13 Mar 2024 14:42:57 +0100 Subject: [PATCH 167/283] Fix unit test --- src/Interpreters/tests/gtest_filecache.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp index b596ccb0285..2679d1b8d18 100644 --- a/src/Interpreters/tests/gtest_filecache.cpp +++ b/src/Interpreters/tests/gtest_filecache.cpp @@ -245,7 +245,7 @@ void download(FileSegment & file_segment) ASSERT_EQ(file_segment.state(), State::DOWNLOADING); ASSERT_EQ(file_segment.getDownloadedSize(), 0); - ASSERT_TRUE(file_segment.reserve(file_segment.range().size())); + ASSERT_TRUE(file_segment.reserve(file_segment.range().size(), 1000)); download(cache_base_path, file_segment); ASSERT_EQ(file_segment.state(), State::DOWNLOADING); @@ -257,7 +257,7 @@ void assertDownloadFails(FileSegment & file_segment) { ASSERT_EQ(file_segment.getOrSetDownloader(), FileSegment::getCallerId()); ASSERT_EQ(file_segment.getDownloadedSize(), 0); - ASSERT_FALSE(file_segment.reserve(file_segment.range().size())); + ASSERT_FALSE(file_segment.reserve(file_segment.range().size(), 1000)); file_segment.complete(); } @@ -956,7 +956,7 @@ TEST_F(FileCacheTest, temporaryData) for (auto & segment : *some_data_holder) { ASSERT_TRUE(segment->getOrSetDownloader() == DB::FileSegment::getCallerId()); - ASSERT_TRUE(segment->reserve(segment->range().size())); + ASSERT_TRUE(segment->reserve(segment->range().size(), 1000)); download(*segment); segment->complete(); } From 4f2be003521b00c9a9087e17fcffdf08cabcd5f1 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 13 Mar 2024 13:55:32 +0000 Subject: [PATCH 168/283] Some fixups --- docs/en/sql-reference/functions/functions-for-nulls.md | 10 +++++----- docs/en/sql-reference/operators/index.md | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index e73d6c899e7..61da9a191a1 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -10,11 +10,13 @@ sidebar_label: Nullable Returns whether the argument is [NULL](../../sql-reference/syntax.md#null). +See also operator [`IS NULL`](../operators/index.md#is_null). + ``` sql isNull(x) ``` -Alias: `IS NULL`. +Alias: `ISNULL` **Arguments** @@ -54,12 +56,12 @@ Result: Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-literal). +See also operator [`IS NOT NULL`](../operators/index.md#is_not_null). + ``` sql isNotNull(x) ``` -Alias: `IS NOT NULL`. - **Arguments:** - `x` — A value of non-compound data type. @@ -102,8 +104,6 @@ Returns whether the argument is 0 (zero) or [NULL](../../sql-reference/syntax.md isZeroOrNull(x) ``` -Alias: `x = 0 OR x IS NULL`. - **Arguments:** - `x` — A value of non-compound data type. diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index 120e464e009..31bf43e8b35 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -353,7 +353,7 @@ For efficiency, the `and` and `or` functions accept any number of arguments. The ClickHouse supports the `IS NULL` and `IS NOT NULL` operators. -### IS NULL +### IS NULL {#is_null} - For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NULL` operator returns: - `1`, if the value is `NULL`. @@ -374,7 +374,7 @@ SELECT x+100 FROM t_null WHERE y IS NULL └──────────────┘ ``` -### IS NOT NULL +### IS NOT NULL {#is_not_null} - For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NOT NULL` operator returns: - `0`, if the value is `NULL`. From 6bbf9eb5400206c326a4e453a38612c8beb6ef89 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 13 Mar 2024 13:57:55 +0000 Subject: [PATCH 169/283] Fixup fixups --- docs/en/sql-reference/functions/functions-for-nulls.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 61da9a191a1..4dfbf4262ed 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -16,7 +16,7 @@ See also operator [`IS NULL`](../operators/index.md#is_null). isNull(x) ``` -Alias: `ISNULL` +Alias: `ISNULL`. **Arguments** From 9338b681efa7ea67e09e2ee37636ca0d551590a6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Mar 2024 15:12:36 +0100 Subject: [PATCH 170/283] I will bypass style check --- docs/en/operations/configuration-files.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 6b2dd309623..30f65261322 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -116,7 +116,7 @@ XML substitution example: Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node, and it will be fully inserted into the source element. -If you want to merge substitution content with existing configuration instead of append you can use attribute `merge="true`, for example: ``. In this case existing configuration will be merged with content from substitution and existing configuration settings will be replaced with values from substituion. +If you want to merge substitution content with existing configuration instead of append you can use attribute `merge="true`, for example: ``. In this case existing configuration will be merged with content from substitution and existing configuration settings will be replaced with values from substitution. ## Encrypting and Hiding Configuration {#encryption} From ce665eae3012e330d0fb5186712c0e9c7086a4fa Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 13 Mar 2024 15:13:17 +0000 Subject: [PATCH 171/283] Doc fixups --- docs/en/operations/configuration-files.md | 10 +++++----- programs/server/Server.cpp | 2 +- src/Common/Config/ConfigProcessor.cpp | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 30f65261322..089704705d0 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -95,9 +95,11 @@ which is equal to ## Substituting Configuration {#substitution} -The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)). +The config can define substitutions. There are two types of substitutions: -If you want to replace an entire element with a substitution use `include` as the element name. +- If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)). + +- If you want to replace an entire element with a substitution, use `include` as the element name. Substitutions can also be performed from ZooKeeper by specifying attribute `from_zk = "/path/to/node"`. In this case, the element value is replaced with the contents of the Zookeeper node at `/path/to/node`. This also works with you store an entire XML subtree as a Zookeeper node, it will be fully inserted into the source element. XML substitution example: @@ -114,9 +116,7 @@ XML substitution example: ``` -Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node, and it will be fully inserted into the source element. - -If you want to merge substitution content with existing configuration instead of append you can use attribute `merge="true`, for example: ``. In this case existing configuration will be merged with content from substitution and existing configuration settings will be replaced with values from substitution. +If you want to merge the substituting content with the existing configuration instead of appending you can use attribute `merge="true"`, for example: ``. In this case, the existing configuration will be merged with the content from the substitution and the existing configuration settings will be replaced with values from substitution. ## Encrypting and Hiding Configuration {#encryption} diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 3be34341173..d7030e3b0aa 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -910,7 +910,7 @@ try /// We need to reload server settings because config could be updated via zookeeper. server_settings.loadSettingsFromConfig(config()); - /// NOTE: Do sanity checks after we loaded all possible substitutions from ZK + /// NOTE: Do sanity checks after we loaded all possible substitutions (for the configuration) from ZK sanityChecks(*this); #if defined(OS_LINUX) diff --git a/src/Common/Config/ConfigProcessor.cpp b/src/Common/Config/ConfigProcessor.cpp index d524db883eb..60407c6a174 100644 --- a/src/Common/Config/ConfigProcessor.cpp +++ b/src/Common/Config/ConfigProcessor.cpp @@ -427,6 +427,7 @@ void ConfigProcessor::doIncludesRecursive( /// Replace the original contents, not add to it. bool replace = attributes->getNamedItem("replace"); + /// Merge with the original contents bool merge = attributes->getNamedItem("merge"); bool included_something = false; From b8abd57b0f1e4f7b059cb119ccc0a7ad1d723d2c Mon Sep 17 00:00:00 2001 From: qaziqarta <96023488+qaziqarta@users.noreply.github.com> Date: Wed, 13 Mar 2024 21:18:05 +0600 Subject: [PATCH 172/283] Updated references to format settings in datetime.md Updated references to date_time_input_format and date_time_output_format. --- docs/en/sql-reference/data-types/datetime.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index 1adff18f598..a465106c2ff 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -36,9 +36,9 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isn’t explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter. -ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function. +ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings-formats.md#date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function. -When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting. +When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format) setting. ## Examples @@ -147,8 +147,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse - [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) - [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) - [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format) +- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) From cb28c84a93709ab12fc32171bf880c0e911ec0d5 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Wed, 13 Mar 2024 16:25:58 +0100 Subject: [PATCH 173/283] Fix `forget_partition` test (#61237) --- ...et_partition.sql => 02995_forget_partition.sh} | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) rename tests/queries/0_stateless/{02995_forget_partition.sql => 02995_forget_partition.sh} (63%) mode change 100644 => 100755 diff --git a/tests/queries/0_stateless/02995_forget_partition.sql b/tests/queries/0_stateless/02995_forget_partition.sh old mode 100644 new mode 100755 similarity index 63% rename from tests/queries/0_stateless/02995_forget_partition.sql rename to tests/queries/0_stateless/02995_forget_partition.sh index 269f7932ea4..8ece8d3ddb3 --- a/tests/queries/0_stateless/02995_forget_partition.sql +++ b/tests/queries/0_stateless/02995_forget_partition.sh @@ -1,5 +1,12 @@ --- Tags: zookeeper, no-replicated-database +#!/usr/bin/env bash +# Tags: zookeeper, no-replicated-database +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ drop table if exists forget_partition; create table forget_partition @@ -16,7 +23,12 @@ insert into forget_partition select number, '2024-01-01' + interval number day, alter table forget_partition drop partition '20240101'; alter table forget_partition drop partition '20240102'; +""" +# DROP PARTITION do not wait for a part to be removed from memory due to possible concurrent SELECTs, so we have to do wait manually here +while [[ $(${CLICKHOUSE_CLIENT} -q "select count() from system.parts where database=currentDatabase() and table='forget_partition' and partition='20240101'") != 0 ]]; do sleep 0.1; done + +${CLICKHOUSE_CLIENT} --multiline --multiquery -q """ set allow_unrestricted_reads_from_keeper=1; select '---before---'; @@ -31,3 +43,4 @@ select '---after---'; select name from system.zookeeper where path = '/test/02995/' || currentDatabase() || '/rmt/block_numbers' order by name; drop table forget_partition; +""" From a366acf59c91fca4df1262c812ac8c58ee7643e2 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 13 Mar 2024 15:36:53 +0000 Subject: [PATCH 174/283] Don't use default cluster in test test_distibuted_settings --- .../test_distributed_config/configs/clusters.xml | 12 ++++++++++++ tests/integration/test_distributed_config/test.py | 6 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 tests/integration/test_distributed_config/configs/clusters.xml diff --git a/tests/integration/test_distributed_config/configs/clusters.xml b/tests/integration/test_distributed_config/configs/clusters.xml new file mode 100644 index 00000000000..754d765f23f --- /dev/null +++ b/tests/integration/test_distributed_config/configs/clusters.xml @@ -0,0 +1,12 @@ + + + + + + localhost + 9000 + + + + + diff --git a/tests/integration/test_distributed_config/test.py b/tests/integration/test_distributed_config/test.py index 500e9ecdeed..c08334985b1 100644 --- a/tests/integration/test_distributed_config/test.py +++ b/tests/integration/test_distributed_config/test.py @@ -3,7 +3,7 @@ from helpers.cluster import ClickHouseCluster import logging cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node", main_configs=["configs/overrides.xml"]) +node = cluster.add_instance("node", main_configs=["configs/overrides.xml", "configs/clusters.xml"]) @pytest.fixture(scope="module") @@ -23,7 +23,7 @@ def test_distibuted_settings(start_cluster): node.query( """ CREATE TABLE data_1 (key Int) ENGINE Memory(); - CREATE TABLE dist_1 as data_1 ENGINE Distributed(default, default, data_1) SETTINGS flush_on_detach = true; + CREATE TABLE dist_1 as data_1 ENGINE Distributed(localhost_cluster, default, data_1) SETTINGS flush_on_detach = true; SYSTEM STOP DISTRIBUTED SENDS dist_1; INSERT INTO dist_1 SETTINGS prefer_localhost_replica=0 VALUES (1); DETACH TABLE dist_1; @@ -36,7 +36,7 @@ def test_distibuted_settings(start_cluster): node.query( """ CREATE TABLE data_2 (key Int) ENGINE Memory(); - CREATE TABLE dist_2 as data_2 ENGINE Distributed(default, default, data_2); + CREATE TABLE dist_2 as data_2 ENGINE Distributed(localhost_cluster, default, data_2); SYSTEM STOP DISTRIBUTED SENDS dist_2; INSERT INTO dist_2 SETTINGS prefer_localhost_replica=0 VALUES (2); DETACH TABLE dist_2; From a4a859ba31c731d8d5163fe5759a166c8c95601c Mon Sep 17 00:00:00 2001 From: qaziqarta <96023488+qaziqarta@users.noreply.github.com> Date: Wed, 13 Mar 2024 21:37:50 +0600 Subject: [PATCH 175/283] Updated ru portion of previous commit --- docs/ru/sql-reference/data-types/datetime.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 57f24786bb7..25e87794147 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -27,9 +27,9 @@ DateTime([timezone]) Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`. -ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/index.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). +ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings-formats.md#date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). -При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/index.md#settings-date_time_input_format). +При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format). ## Примеры {#primery} @@ -119,8 +119,8 @@ FROM dt - [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md) - [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) - [Функции для работы с массивами](../../sql-reference/functions/array-functions.md) -- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format) -- [Настройка `date_time_output_format`](../../operations/settings/index.md) +- [Настройка `date_time_input_format`](../../operations/settings/settings-formats.md#date_time_input_format) +- [Настройка `date_time_output_format`](../../operations/settings/settings-formats.md#date_time_output_format) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone) - [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) From 868d24415b295da1c0d325e837823bfb7cde9253 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 13 Mar 2024 15:48:29 +0000 Subject: [PATCH 176/283] Fix style --- tests/integration/test_distributed_config/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_distributed_config/test.py b/tests/integration/test_distributed_config/test.py index c08334985b1..bf4bb5a4335 100644 --- a/tests/integration/test_distributed_config/test.py +++ b/tests/integration/test_distributed_config/test.py @@ -3,7 +3,9 @@ from helpers.cluster import ClickHouseCluster import logging cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node", main_configs=["configs/overrides.xml", "configs/clusters.xml"]) +node = cluster.add_instance( + "node", main_configs=["configs/overrides.xml", "configs/clusters.xml"] +) @pytest.fixture(scope="module") From 51ccb520fb5747a00b7c56adae197467284c98d5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Mar 2024 15:54:06 +0000 Subject: [PATCH 177/283] Change only ignore_limits setting --- src/Planner/Planner.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 861b12e3da2..fde9f110d09 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1081,9 +1081,10 @@ void addBuildSubqueriesForSetsStepIfNeeded( for (auto & subquery : subqueries) { auto query_tree = subquery->detachQueryTree(); - /// I suppose it should be better to use all flags from select_query_options, + auto subquery_options = select_query_options.subquery(); + /// I don't know if this is a good decision, /// But for now it is done in the same way as in old analyzer. - auto subquery_options = SelectQueryOptions(QueryProcessingStage::Complete, select_query_options.subquery_depth).subquery(); + subquery_options.ignore_limits = false; Planner subquery_planner( query_tree, subquery_options, From 617138cf482f9a55487d0a6c2e63dacdbbdafe8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 13 Mar 2024 16:54:28 +0100 Subject: [PATCH 178/283] Teach the fuzzer to use other numeric types --- src/Client/QueryFuzzer.cpp | 46 +++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 7f1dce4b29a..38e78157096 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -914,6 +914,38 @@ ASTPtr QueryFuzzer::fuzzLiteralUnderExpressionList(ASTPtr child) child = makeASTFunction( "toFixedString", std::make_shared(value), std::make_shared(static_cast(value.size()))); } + else if (type == Field::Types::Which::UInt64 && fuzz_rand() % 7 == 0) + { + child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toUInt128" : "toUInt256", std::make_shared(l->value.get())); + } + else if (type == Field::Types::Which::Int64 && fuzz_rand() % 7 == 0) + { + child = makeASTFunction(fuzz_rand() % 2 == 0 ? "toInt128" : "toInt256", std::make_shared(l->value.get())); + } + else if (type == Field::Types::Which::Float64 && fuzz_rand() % 7 == 0) + { + int decimal = fuzz_rand() % 4; + if (decimal == 0) + child = makeASTFunction( + "toDecimal32", + std::make_shared(l->value.get()), + std::make_shared(static_cast(fuzz_rand() % 9))); + else if (decimal == 1) + child = makeASTFunction( + "toDecimal64", + std::make_shared(l->value.get()), + std::make_shared(static_cast(fuzz_rand() % 18))); + else if (decimal == 2) + child = makeASTFunction( + "toDecimal128", + std::make_shared(l->value.get()), + std::make_shared(static_cast(fuzz_rand() % 38))); + else + child = makeASTFunction( + "toDecimal256", + std::make_shared(l->value.get()), + std::make_shared(static_cast(fuzz_rand() % 76))); + } if (fuzz_rand() % 7 == 0) child = makeASTFunction("toNullable", child); @@ -933,7 +965,19 @@ ASTPtr QueryFuzzer::reverseLiteralFuzzing(ASTPtr child) { if (auto * function = child.get()->as()) { - std::unordered_set can_be_reverted{"toNullable", "toLowCardinality", "materialize"}; + const std::unordered_set can_be_reverted{ + "materialize", + "toDecimal32", /// Keeping the first parameter only should be ok (valid query most of the time) + "toDecimal64", + "toDecimal128", + "toDecimal256", + "toFixedString", /// Same as toDecimal + "toInt128", + "toInt256", + "toLowCardinality" + "toNullable", + "toUInt128", + "toUInt256"}; if (can_be_reverted.contains(function->name) && function->children.size() == 1) { if (fuzz_rand() % 7 == 0) From aecc135f5dd76c55b99205af170beafc06b9ee62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 13 Mar 2024 17:03:18 +0100 Subject: [PATCH 179/283] Add more details about how NULLs are processed in aggregations --- docs/en/sql-reference/aggregate-functions/index.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index 5d2229fbcce..e97db436271 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -18,6 +18,10 @@ ClickHouse also supports: During aggregation, all `NULL`s are skipped. If the aggregation has several parameters it will ignore any row in which one or more of the parameters are NULL. +There are a few exceptions to this rule: + - Both [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md) and [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) support modifiers that respect NULLs (`first_value(b) ignore nulls`). + - [`count`](../../sql-reference/aggregate-functions/reference/count.md) without parameters (`count()`) or with constant ones (`count(1)`) will count NULL rows too. With a column as parameter, it will count only not null values. + **Examples:** Consider this table: From 5b15ec6ae19fe1caa3800dfa333b61570cfc92b4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 13 Mar 2024 17:10:55 +0100 Subject: [PATCH 180/283] Move test from stateless to integration --- .../config.d/storage_conf.xml | 4 + .../integration/test_filesystem_cache/test.py | 75 +++++++++++++++++++ ...810_system_sync_filesystem_cache.reference | 3 - .../02810_system_sync_filesystem_cache.sh | 69 ----------------- 4 files changed, 79 insertions(+), 72 deletions(-) delete mode 100644 tests/queries/0_stateless/02810_system_sync_filesystem_cache.reference delete mode 100755 tests/queries/0_stateless/02810_system_sync_filesystem_cache.sh diff --git a/tests/integration/test_filesystem_cache/config.d/storage_conf.xml b/tests/integration/test_filesystem_cache/config.d/storage_conf.xml index b614815b34f..a8e4f9f8a99 100644 --- a/tests/integration/test_filesystem_cache/config.d/storage_conf.xml +++ b/tests/integration/test_filesystem_cache/config.d/storage_conf.xml @@ -7,4 +7,8 @@ + + system + filesystem_cache_log
+
diff --git a/tests/integration/test_filesystem_cache/test.py b/tests/integration/test_filesystem_cache/test.py index c44d817c57c..dfab462732a 100644 --- a/tests/integration/test_filesystem_cache/test.py +++ b/tests/integration/test_filesystem_cache/test.py @@ -426,3 +426,78 @@ def test_force_filesystem_cache_on_merges(cluster): test(node, True) node = cluster.instances["node"] test(node, False) + + +def test_system_sync_filesystem_cache(cluster): + node = cluster.instances["node"] + node.query( + """ +DROP TABLE IF EXISTS test; + +CREATE TABLE test (a Int32, b String) +ENGINE = MergeTree() ORDER BY tuple() +SETTINGS disk = disk(type = cache, + max_size = '100Ki', + path = "test_system_sync_filesystem_cache", + delayed_cleanup_interval_ms = 10000000, disk = hdd_blob), + min_bytes_for_wide_part = 10485760; + +INSERT INTO test SELECT 1, 'test'; + """ + ) + + query_id = "system_sync_filesystem_cache_1" + node.query( + "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1", + query_id=query_id, + ) + + key, offset = ( + node.query( + f""" + SYSTEM FLUSH LOGS; + SELECT key, offset FROM system.filesystem_cache_log WHERE query_id = '{query_id}' ORDER BY size DESC LIMIT 1; + """ + ) + .strip() + .split("\t") + ) + + cache_path = node.query( + f"SELECT cache_path FROM system.filesystem_cache WHERE key = '{key}' and file_segment_range_begin = {offset}" + ) + + node.exec_in_container(["bash", "-c", f"rm {cache_path}"]) + + assert key in node.query("SYSTEM SYNC FILESYSTEM CACHE") + + node.query("SELECT * FROM test FORMAT Null") + assert key not in node.query("SYSTEM SYNC FILESYSTEM CACHE") + + query_id = "system_sync_filesystem_cache_2" + node.query( + "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1", + query_id=query_id, + ) + + key, offset = ( + node.query( + f""" + SYSTEM FLUSH LOGS; + SELECT key, offset FROM system.filesystem_cache_log WHERE query_id = '{query_id}' ORDER BY size DESC LIMIT 1; + """ + ) + .strip() + .split("\t") + ) + cache_path = node.query( + f"SELECT cache_path FROM system.filesystem_cache WHERE key = '{key}' and file_segment_range_begin = {offset}" + ) + + node.exec_in_container(["bash", "-c", f"echo -n 'fff' > {cache_path}"]) + + assert key in node.query("SYSTEM SYNC FILESYSTEM CACHE") + + node.query("SELECT * FROM test FORMAT Null") + + assert key not in node.query("SYSTEM SYNC FILESYSTEM CACHE") diff --git a/tests/queries/0_stateless/02810_system_sync_filesystem_cache.reference b/tests/queries/0_stateless/02810_system_sync_filesystem_cache.reference deleted file mode 100644 index 7614df8ec46..00000000000 --- a/tests/queries/0_stateless/02810_system_sync_filesystem_cache.reference +++ /dev/null @@ -1,3 +0,0 @@ -ok -ok -ok diff --git a/tests/queries/0_stateless/02810_system_sync_filesystem_cache.sh b/tests/queries/0_stateless/02810_system_sync_filesystem_cache.sh deleted file mode 100755 index c88ba4d5a74..00000000000 --- a/tests/queries/0_stateless/02810_system_sync_filesystem_cache.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings - -# set -x - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - - -$CLICKHOUSE_CLIENT -nm --query """ -DROP TABLE IF EXISTS test; - -CREATE TABLE test (a Int32, b String) -ENGINE = MergeTree() ORDER BY tuple() -SETTINGS disk = disk(type = cache, max_size = '100Ki', path = ${CLICKHOUSE_TEST_UNIQUE_NAME}, delayed_cleanup_interval_ms = 10000000, disk = s3_disk), min_bytes_for_wide_part = 10485760; - -INSERT INTO test SELECT 1, 'test'; -""" - -query_id=$RANDOM - -$CLICKHOUSE_CLIENT --query_id "$query_id" --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" - -${CLICKHOUSE_CLIENT} -q "system flush logs" - -key=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -offset=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT offset FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -path=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT cache_path FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; -""") - -rm $path - -$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null SETTINGS enable_filesystem_cache_log = 1" 2>&1 | grep -F -e "No such file or directory" > /dev/null && echo "ok" || echo "fail" - -CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=fatal/g') - -$CLICKHOUSE_CLIENT --query "SYSTEM SYNC FILESYSTEM CACHE" 2>&1 | grep -q "$key" && echo 'ok' || echo 'fail' - -$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" - -key=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT key FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -offset=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT offset FROM system.filesystem_cache_log WHERE query_id = '$query_id' ORDER BY size DESC LIMIT 1; -""") - -path=$($CLICKHOUSE_CLIENT -nm --query """ -SELECT cache_path FROM system.filesystem_cache WHERE key = '$key' AND file_segment_range_begin = $offset; -""") - -echo -n 'fff' > $path - -#cat $path - -$CLICKHOUSE_CLIENT --query "SYSTEM SYNC FILESYSTEM CACHE" 2>&1 | grep -q "$key" && echo 'ok' || echo 'fail' - -$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" - -$CLICKHOUSE_CLIENT --query "SYSTEM SYNC FILESYSTEM CACHE" From b2e067b3daee284c4a97289dc1b4dac1f920c3e6 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 13 Mar 2024 16:41:35 +0000 Subject: [PATCH 181/283] Fix logical error in RabbitMQ storage with MATERIALIZED columns --- .../table-engines/integrations/rabbitmq.md | 8 ++- src/Interpreters/InterpreterInsertQuery.cpp | 23 +++++-- src/Interpreters/InterpreterInsertQuery.h | 2 +- src/Storages/RabbitMQ/RabbitMQSource.cpp | 14 +++- src/Storages/RabbitMQ/StorageRabbitMQ.cpp | 35 ++++++---- .../integration/test_storage_rabbitmq/test.py | 64 ++++++++++++++----- .../test_rabbitmq_json.reference | 50 --------------- 7 files changed, 106 insertions(+), 90 deletions(-) delete mode 100644 tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index 0f3fef3d6fb..a4d0cf78066 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -18,8 +18,8 @@ This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.c ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + name1 [type1], + name2 [type2], ... ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'host:port' [or rabbitmq_address = 'amqp(s)://guest:guest@localhost/vhost'], @@ -198,6 +198,10 @@ Additional virtual columns when `kafka_handle_error_mode='stream'`: Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully. +## Caveats {#caveats} + +Even though you may specify [default column expressions](/docs/en/sql-reference/statements/create/table.md/#default_values) (such as `DEFAULT`, `MATERIALIZED`, `ALIAS`) in the table definition, these will be ignored. Instead, the columns will be filled with their respective default values for their types. + ## Data formats support {#data-formats-support} RabbitMQ engine supports all [formats](../../../interfaces/formats.md) supported in ClickHouse. diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 70f9e0c51da..3e8bb268fe7 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -151,7 +151,7 @@ Block InterpreterInsertQuery::getSampleBlock( names.emplace_back(std::move(current_name)); } - return getSampleBlock(names, table, metadata_snapshot, allow_materialized); + return getSampleBlockImpl(names, table, metadata_snapshot, no_destination, allow_materialized); } std::optional InterpreterInsertQuery::getInsertColumnNames() const @@ -173,13 +173,18 @@ std::optional InterpreterInsertQuery::getInsertColumnNames() const return names; } -Block InterpreterInsertQuery::getSampleBlock( +Block InterpreterInsertQuery::getSampleBlockImpl( const Names & names, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, + bool no_destination, bool allow_materialized) { Block table_sample_physical = metadata_snapshot->getSampleBlock(); + Block table_sample_virtuals; + if (no_destination) + table_sample_virtuals = table->getVirtualsHeader(); + Block table_sample_insertable = metadata_snapshot->getSampleBlockInsertable(); Block res; for (const auto & current_name : names) @@ -194,13 +199,19 @@ Block InterpreterInsertQuery::getSampleBlock( if (table_sample_physical.has(current_name)) { if (!allow_materialized) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", - current_name); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column", current_name); res.insert(ColumnWithTypeAndName(table_sample_physical.getByName(current_name).type, current_name)); } - else /// The table does not have a column with that name + else if (table_sample_virtuals.has(current_name)) + { + res.insert(ColumnWithTypeAndName(table_sample_virtuals.getByName(current_name).type, current_name)); + } + else + { + /// The table does not have a column with that name throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "No such column {} in table {}", current_name, table->getStorageID().getNameForLogs()); + } } else res.insert(ColumnWithTypeAndName(table_sample_insertable.getByName(current_name).type, current_name)); @@ -276,7 +287,7 @@ Chain InterpreterInsertQuery::buildChain( if (!running_group) running_group = std::make_shared(getContext()); - auto sample = getSampleBlock(columns, table, metadata_snapshot, allow_materialized); + auto sample = getSampleBlockImpl(columns, table, metadata_snapshot, no_destination, allow_materialized); if (check_access) getContext()->checkAccess(AccessType::INSERT, table->getStorageID(), sample.getNames()); diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index 3647126afb9..bf73fb2a319 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -69,7 +69,7 @@ public: bool shouldAddSquashingFroStorage(const StoragePtr & table) const; private: - static Block getSampleBlock(const Names & names, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, bool allow_materialized); + static Block getSampleBlockImpl(const Names & names, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot, bool no_destination, bool allow_materialized); ASTPtr query_ptr; const bool allow_materialized; diff --git a/src/Storages/RabbitMQ/RabbitMQSource.cpp b/src/Storages/RabbitMQ/RabbitMQSource.cpp index 4dc257074f3..09c1bf1b2e7 100644 --- a/src/Storages/RabbitMQ/RabbitMQSource.cpp +++ b/src/Storages/RabbitMQ/RabbitMQSource.cpp @@ -11,11 +11,21 @@ namespace DB { -static std::pair getHeaders(const StorageSnapshotPtr & storage_snapshot) +static std::pair getHeaders(const StorageSnapshotPtr & storage_snapshot, const Names & column_names) { + auto all_columns_header = storage_snapshot->metadata->getSampleBlock(); + auto non_virtual_header = storage_snapshot->metadata->getSampleBlockNonMaterialized(); auto virtual_header = storage_snapshot->virtual_columns->getSampleBlock(); + for (const auto & column_name : column_names) + { + if (non_virtual_header.has(column_name) || virtual_header.has(column_name)) + continue; + const auto & column = all_columns_header.getByName(column_name); + non_virtual_header.insert(column); + } + return {non_virtual_header, virtual_header}; } @@ -40,7 +50,7 @@ RabbitMQSource::RabbitMQSource( : RabbitMQSource( storage_, storage_snapshot_, - getHeaders(storage_snapshot_), + getHeaders(storage_snapshot_, columns), context_, columns, max_block_size_, diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 980fccd307e..b882fd2728c 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -133,6 +134,9 @@ StorageRabbitMQ::StorageRabbitMQ( if (configuration.secure) SSL_library_init(); + if (!columns_.getMaterialized().empty() || !columns_.getAliases().empty() || !columns_.getDefaults().empty() || !columns_.getEphemeral().empty()) + context_->addWarningMessage("RabbitMQ table engine doesn't support ALIAS, DEFAULT or MATERIALIZED columns. They will be ignored and filled with default values"); + StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); setInMemoryMetadata(storage_metadata); @@ -1055,18 +1059,7 @@ bool StorageRabbitMQ::tryStreamToViews() if (!table) throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); - // Create an INSERT query for streaming data - auto insert = std::make_shared(); - insert->table_id = table_id; - - // Only insert into dependent views and expect that input blocks contain virtual columns - InterpreterInsertQuery interpreter(insert, rabbitmq_context, false, true, true); - auto block_io = interpreter.execute(); - auto storage_snapshot = getStorageSnapshot(getInMemoryMetadataPtr(), getContext()); - auto column_names = block_io.pipeline.getHeader().getNames(); - auto sample_block = storage_snapshot->getSampleBlockForColumns(column_names); - auto block_size = getMaxBlockSize(); // Create a stream for each consumer and join them in a union stream @@ -1082,13 +1075,29 @@ bool StorageRabbitMQ::tryStreamToViews() for (size_t i = 0; i < num_created_consumers; ++i) { auto source = std::make_shared( - *this, storage_snapshot, rabbitmq_context, column_names, block_size, - max_execution_time_ms, rabbitmq_settings->rabbitmq_handle_error_mode, false); + *this, storage_snapshot, rabbitmq_context, Names{}, block_size, + max_execution_time_ms, rabbitmq_settings->rabbitmq_handle_error_mode); sources.emplace_back(source); pipes.emplace_back(source); } + // Create an INSERT query for streaming data + auto insert = std::make_shared(); + insert->table_id = table_id; + if (!sources.empty()) + { + auto column_list = std::make_shared(); + const auto & header = sources[0]->getPort().getHeader(); + for (const auto & column : header) + column_list->children.emplace_back(std::make_shared(column.name)); + insert->columns = std::move(column_list); + } + + // Only insert into dependent views and expect that input blocks contain virtual columns + InterpreterInsertQuery interpreter(insert, rabbitmq_context, /* allow_materialized_ */ false, /* no_squash_ */ true, /* no_destination_ */ true); + auto block_io = interpreter.execute(); + block_io.pipeline.complete(Pipe::unitePipes(std::move(pipes))); std::atomic_size_t rows = 0; diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 280ce230921..0f1c5eb17dd 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -53,13 +53,13 @@ instance3 = cluster.add_instance( # Helpers -def rabbitmq_check_result(result, check=False, ref_file="test_rabbitmq_json.reference"): - fpath = p.join(p.dirname(__file__), ref_file) - with open(fpath) as reference: - if check: - assert TSV(result) == TSV(reference) - else: - return TSV(result) == TSV(reference) +def rabbitmq_check_result(result, check=False, reference=None): + if reference is None: + reference = "\n".join([f"{i}\t{i}" for i in range(50)]) + if check: + assert TSV(result) == TSV(reference) + else: + return TSV(result) == TSV(reference) def wait_rabbitmq_to_start(rabbitmq_docker_id, cookie, timeout=180): @@ -133,9 +133,10 @@ def test_rabbitmq_select(rabbitmq_cluster, secure): if secure: port = cluster.rabbitmq_secure_port + # MATERIALIZED and ALIAS columns are not supported in RabbitMQ engine, but we can test that it does not fail instance.query( """ - CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + CREATE TABLE test.rabbitmq (key UInt64, value UInt64, value2 ALIAS value + 1, value3 MATERIALIZED value + 1) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = '{}:{}', rabbitmq_exchange_name = 'select', @@ -148,6 +149,11 @@ def test_rabbitmq_select(rabbitmq_cluster, secure): ) ) + assert ( + "RabbitMQ table engine doesn\\'t support ALIAS, DEFAULT or MATERIALIZED columns" + in instance.query("SELECT * FROM system.warnings") + ) + credentials = pika.PlainCredentials("root", "clickhouse") parameters = pika.ConnectionParameters( rabbitmq_cluster.rabbitmq_ip, rabbitmq_cluster.rabbitmq_port, "/", credentials @@ -379,7 +385,7 @@ def test_rabbitmq_macros(rabbitmq_cluster): def test_rabbitmq_materialized_view(rabbitmq_cluster): instance.query( """ - CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + CREATE TABLE test.rabbitmq (key UInt64, value UInt64, dt1 DateTime MATERIALIZED now(), value2 ALIAS value + 1) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_exchange_name = 'mv', @@ -484,9 +490,11 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster): """ DROP TABLE IF EXISTS test.view1; DROP TABLE IF EXISTS test.view2; + DROP TABLE IF EXISTS test.view3; DROP TABLE IF EXISTS test.consumer1; DROP TABLE IF EXISTS test.consumer2; - CREATE TABLE test.rabbitmq (key UInt64, value UInt64) + DROP TABLE IF EXISTS test.consumer3; + CREATE TABLE test.rabbitmq (key UInt64, value UInt64, value2 ALIAS value + 1, value3 MATERIALIZED value + 1, value4 DEFAULT 1) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'rabbitmq1:5672', rabbitmq_exchange_name = 'mmv', @@ -497,13 +505,18 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster): CREATE TABLE test.view1 (key UInt64, value UInt64) ENGINE = MergeTree() ORDER BY key; - CREATE TABLE test.view2 (key UInt64, value UInt64) + CREATE TABLE test.view2 (key UInt64, value UInt64, value2 UInt64, value3 UInt64, value4 UInt64) + ENGINE = MergeTree() + ORDER BY key; + CREATE TABLE test.view3 (key UInt64) ENGINE = MergeTree() ORDER BY key; CREATE MATERIALIZED VIEW test.consumer1 TO test.view1 AS SELECT * FROM test.rabbitmq; CREATE MATERIALIZED VIEW test.consumer2 TO test.view2 AS SELECT * FROM test.rabbitmq; + CREATE MATERIALIZED VIEW test.consumer3 TO test.view3 AS + SELECT * FROM test.rabbitmq; """ ) @@ -514,7 +527,7 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster): connection = pika.BlockingConnection(parameters) channel = connection.channel() - instance.wait_for_log_line("Started streaming to 2 attached views") + instance.wait_for_log_line("Started streaming to 3 attached views") messages = [] for i in range(50): @@ -522,24 +535,43 @@ def test_rabbitmq_many_materialized_views(rabbitmq_cluster): for message in messages: channel.basic_publish(exchange="mmv", routing_key="", body=message) - while True: + is_check_passed = False + deadline = time.monotonic() + 60 + while time.monotonic() < deadline: result1 = instance.query("SELECT * FROM test.view1 ORDER BY key") result2 = instance.query("SELECT * FROM test.view2 ORDER BY key") - if rabbitmq_check_result(result1) and rabbitmq_check_result(result2): + result3 = instance.query("SELECT * FROM test.view3 ORDER BY key") + # Note that for view2 result is `i i 0 0 0`, but not `i i i+1 i+1 1` as expected, ALIAS/MATERIALIZED/DEFAULT columns are not supported in RabbitMQ engine + # We onlt check that at least it do not fail + if ( + rabbitmq_check_result(result1) + and rabbitmq_check_result( + result2, reference="\n".join([f"{i}\t{i}\t0\t0\t0" for i in range(50)]) + ) + and rabbitmq_check_result( + result3, reference="\n".join([str(i) for i in range(50)]) + ) + ): + is_check_passed = True break + time.sleep(0.1) + + assert ( + is_check_passed + ), f"References are not equal to results, result1: {result1}, result2: {result2}, result3: {result3}" instance.query( """ DROP TABLE test.consumer1; DROP TABLE test.consumer2; + DROP TABLE test.consumer3; DROP TABLE test.view1; DROP TABLE test.view2; + DROP TABLE test.view3; """ ) connection.close() - rabbitmq_check_result(result1, True) - rabbitmq_check_result(result2, True) def test_rabbitmq_big_message(rabbitmq_cluster): diff --git a/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference b/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference deleted file mode 100644 index 959bb2aad74..00000000000 --- a/tests/integration/test_storage_rabbitmq/test_rabbitmq_json.reference +++ /dev/null @@ -1,50 +0,0 @@ -0 0 -1 1 -2 2 -3 3 -4 4 -5 5 -6 6 -7 7 -8 8 -9 9 -10 10 -11 11 -12 12 -13 13 -14 14 -15 15 -16 16 -17 17 -18 18 -19 19 -20 20 -21 21 -22 22 -23 23 -24 24 -25 25 -26 26 -27 27 -28 28 -29 29 -30 30 -31 31 -32 32 -33 33 -34 34 -35 35 -36 36 -37 37 -38 38 -39 39 -40 40 -41 41 -42 42 -43 43 -44 44 -45 45 -46 46 -47 47 -48 48 -49 49 From 4670f055649f5f8f216acd42947881038dedbdbd Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 13 Mar 2024 17:18:13 +0000 Subject: [PATCH 182/283] Fix test test_input_format_parallel_parsing_memory_tracking --- .../test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py index c95bbfda708..a89cb619350 100644 --- a/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py +++ b/tests/integration/test_input_format_parallel_parsing_memory_tracking/test.py @@ -41,7 +41,7 @@ def test_memory_tracking_total(): [ "bash", "-c", - "clickhouse local -q \"SELECT arrayStringConcat(arrayMap(x->toString(cityHash64(x)), range(1000)), ' ') from numbers(10000)\" > data.json", + "clickhouse local -q \"SELECT arrayStringConcat(arrayMap(x->toString(cityHash64(x)), range(1000)), ' ') from numbers(10000)\" > data.jsonl", ] ) @@ -56,7 +56,7 @@ def test_memory_tracking_total(): "--show-error", "--data-binary", "@data.json", - "http://127.1:8123/?query=INSERT%20INTO%20null%20FORMAT%20TSV", + "http://127.1:8123/?query=INSERT%20INTO%20null%20FORMAT%20JSONEachRow", ] ) == "" From 42a1cc47e6e24c3e854346bf9dec522677bb0d09 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 13 Mar 2024 18:05:08 +0000 Subject: [PATCH 183/283] Fix 01761_cast_to_enum_nullable with analyzer. --- src/DataTypes/DataTypeNullable.cpp | 25 +++++++++++++++++++++++++ src/DataTypes/DataTypeNullable.h | 1 + tests/analyzer_tech_debt.txt | 1 - 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp index 16d5d41e5e5..db252659d41 100644 --- a/src/DataTypes/DataTypeNullable.cpp +++ b/src/DataTypes/DataTypeNullable.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -56,6 +57,30 @@ bool DataTypeNullable::equals(const IDataType & rhs) const return rhs.isNullable() && nested_data_type->equals(*static_cast(rhs).nested_data_type); } +ColumnPtr DataTypeNullable::createColumnConst(size_t size, const Field & field) const +{ + if (onlyNull()) + { + auto column = createColumn(); + column->insert(field); + return ColumnConst::create(std::move(column), size); + } + + auto column = nested_data_type->createColumn(); + bool is_null = field.isNull(); + + if (is_null) + nested_data_type->insertDefaultInto(*column); + else + column->insert(field); + + auto null_mask = ColumnUInt8::create(); + null_mask->getData().push_back(is_null ? 1 : 0); + + auto res = ColumnNullable::create(std::move(column), std::move(null_mask)); + return ColumnConst::create(std::move(res), size); +} + SerializationPtr DataTypeNullable::doGetDefaultSerialization() const { return std::make_shared(nested_data_type->getDefaultSerialization()); diff --git a/src/DataTypes/DataTypeNullable.h b/src/DataTypes/DataTypeNullable.h index b102c767993..71abe48c151 100644 --- a/src/DataTypes/DataTypeNullable.h +++ b/src/DataTypes/DataTypeNullable.h @@ -41,6 +41,7 @@ public: bool onlyNull() const override; bool canBeInsideLowCardinality() const override { return nested_data_type->canBeInsideLowCardinality(); } bool canBePromoted() const override { return nested_data_type->canBePromoted(); } + ColumnPtr createColumnConst(size_t size, const Field & field) const override; const DataTypePtr & getNestedType() const { return nested_data_type; } diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index dbd216ea7be..b4cf3cf288b 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -7,7 +7,6 @@ 01584_distributed_buffer_cannot_find_column 01624_soft_constraints 01747_join_view_filter_dictionary -01761_cast_to_enum_nullable 01925_join_materialized_columns 01952_optimize_distributed_group_by_sharding_key 02354_annoy From 6e6a67a2fb37ef22807d22aac1a8958bc618dc4d Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 13 Mar 2024 19:13:39 +0100 Subject: [PATCH 184/283] Fix unit test --- .../Cache/WriteBufferToFileSegment.cpp | 19 ++++++++++++++----- .../Cache/WriteBufferToFileSegment.h | 2 +- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index 759135722dc..51914c0a14e 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -19,9 +19,22 @@ namespace ErrorCodes extern const int NOT_ENOUGH_SPACE; } +namespace +{ + size_t getCacheLockWaitTimeout() + { + auto query_context = CurrentThread::getQueryContext(); + if (query_context) + return query_context->getReadSettings().filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; + else + return Context::getGlobalContextInstance()->getReadSettings().filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; + } +} + WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegment * file_segment_) : WriteBufferFromFileDecorator(std::make_unique(file_segment_->getPath())) , file_segment(file_segment_) + , reserve_space_lock_wait_timeout_milliseconds(getCacheLockWaitTimeout()) { } @@ -32,12 +45,8 @@ WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegmentsHolderPtr segment : throw Exception(ErrorCodes::LOGICAL_ERROR, "WriteBufferToFileSegment can be created only from single segment")) , file_segment(&segment_holder_->front()) , segment_holder(std::move(segment_holder_)) + , reserve_space_lock_wait_timeout_milliseconds(getCacheLockWaitTimeout()) { - auto query_context = CurrentThread::getQueryContext(); - if (query_context) - reserve_space_lock_wait_timeout_milliseconds = query_context->getReadSettings().filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; - else - reserve_space_lock_wait_timeout_milliseconds = Context::getGlobalContextInstance()->getReadSettings().filesystem_cache_reserve_space_wait_lock_timeout_milliseconds; } /// If it throws an exception, the file segment will be incomplete, so you should not use it in the future. diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.h b/src/Interpreters/Cache/WriteBufferToFileSegment.h index bff340d79b3..822488ceb48 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.h +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.h @@ -29,7 +29,7 @@ private: /// Empty if file_segment is not owned by this WriteBufferToFileSegment FileSegmentsHolderPtr segment_holder; - size_t reserve_space_lock_wait_timeout_milliseconds; + const size_t reserve_space_lock_wait_timeout_milliseconds; }; From 2cb1fa79d9741100063dd29a8033b0888c863dad Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Mar 2024 20:01:54 +0100 Subject: [PATCH 185/283] Bump CI From 89de338676a0233dac4563d9f4ba7e4a16b54135 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 13 Mar 2024 17:33:39 +0100 Subject: [PATCH 186/283] Better --- .../aggregate-functions/index.md | 53 +++++++++++++++++-- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index e97db436271..96bf0c5d93b 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -16,11 +16,9 @@ ClickHouse also supports: ## NULL Processing -During aggregation, all `NULL`s are skipped. If the aggregation has several parameters it will ignore any row in which one or more of the parameters are NULL. +During aggregation, all `NULL` arguments are skipped. If the aggregation has several arguments it will ignore any row in which one or more of them are NULL. -There are a few exceptions to this rule: - - Both [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md) and [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) support modifiers that respect NULLs (`first_value(b) ignore nulls`). - - [`count`](../../sql-reference/aggregate-functions/reference/count.md) without parameters (`count()`) or with constant ones (`count(1)`) will count NULL rows too. With a column as parameter, it will count only not null values. +There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases when followed by the modifier `RESPECT NULLS`: `FIRST_VALUE(b) RESPECT NULLS`. **Examples:** @@ -89,3 +87,50 @@ FROM t_null_big; │ [2,2,3] │ [2,NULL,2,3,NULL] │ └───────────────┴───────────────────────────────────────┘ ``` + +Note that aggregations are skipped when the columns are used as arguments to an aggregated function. For example [`count`](../../sql-reference/aggregate-functions/reference/count.md) without parameters (`count()`) or with constant ones (`count(1)`) will count all rows in the block (independently of the value of the GROUP BY column as it's not an argument), while `count(column)` will only return the number of rows where column is not NULL. + +```sql +SELECT + v, + count(1), + count(v) +FROM +( + SELECT if(number < 10, NULL, number % 3) AS v + FROM numbers(15) +) +GROUP BY v + +┌────v─┬─count()─┬─count(v)─┐ +│ ᴺᵁᴸᴸ │ 10 │ 0 │ +│ 0 │ 1 │ 1 │ +│ 1 │ 2 │ 2 │ +│ 2 │ 2 │ 2 │ +└──────┴─────────┴──────────┘ +``` + +And here is an example of of first_value with `RESPECT NULLS` where we can see that NULL inputs are respected and it will return the first value read, whether it's NULL or not: + +```sql +SELECT + col || '_' || ((col + 1) * 5 - 1) as range, + first_value(odd_or_null) as first, + first_value(odd_or_null) IGNORE NULLS as first_ignore_null, + first_value(odd_or_null) RESPECT NULLS as first_respect_nulls +FROM +( + SELECT + intDiv(number, 5) AS col, + if(number % 2 == 0, NULL, number) as odd_or_null + FROM numbers(15) +) +GROUP BY col +ORDER BY col + +┌─range─┬─first─┬─first_ignore_null─┬─first_respect_nulls─┐ +│ 0_4 │ 1 │ 1 │ ᴺᵁᴸᴸ │ +│ 1_9 │ 5 │ 5 │ 5 │ +│ 2_14 │ 11 │ 11 │ ᴺᵁᴸᴸ │ +└───────┴───────┴───────────────────┴─────────────────────┘ +``` From 31ebee41d040f3af8d4266623ba25a37d7df1fbd Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 13 Mar 2024 20:06:15 +0100 Subject: [PATCH 187/283] Fxi --- .../0_stateless/02151_invalid_setting_with_hints_in_query.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02151_invalid_setting_with_hints_in_query.sh b/tests/queries/0_stateless/02151_invalid_setting_with_hints_in_query.sh index 2faaa3bb1b6..367d0163497 100755 --- a/tests/queries/0_stateless/02151_invalid_setting_with_hints_in_query.sh +++ b/tests/queries/0_stateless/02151_invalid_setting_with_hints_in_query.sh @@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_LOCAL --query="SET input_format_with_names_use_headers = 1" 2>&1 | grep -qF "Code: 115. DB::Exception: Unknown setting input_format_with_names_use_headers: Maybe you meant ['input_format_with_names_use_header','input_format_with_types_use_header']. (UNKNOWN_SETTING)" && echo 'OK' || echo 'FAIL' ||: +$CLICKHOUSE_LOCAL --query="SET input_format_with_names_use_headers = 1" 2>&1 | grep -qF "Code: 115. DB::Exception: Unknown setting 'input_format_with_names_use_headers': Maybe you meant ['input_format_with_names_use_header','input_format_with_types_use_header']. (UNKNOWN_SETTING)" && echo 'OK' || echo 'FAIL' ||: From bd6bed161cc3beddd3966842385df66a0fb71e1a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 13 Mar 2024 20:38:55 +0000 Subject: [PATCH 188/283] Try to fix flaky test_undrop_query --- .../settings.md | 4 +- docs/en/sql-reference/statements/undrop.md | 48 +++++++------------ src/Interpreters/DatabaseCatalog.cpp | 4 +- src/Interpreters/InterpreterUndropQuery.cpp | 6 ++- tests/integration/test_undrop_query/test.py | 25 ++++++---- .../0_stateless/02681_undrop_query.sql | 2 +- 6 files changed, 43 insertions(+), 46 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 07c9a2b88ab..f20dcb9025e 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -933,9 +933,9 @@ Hard limit is configured via system tools ## database_atomic_delay_before_drop_table_sec {#database_atomic_delay_before_drop_table_sec} -Sets the delay before remove table data in seconds. If the query has `SYNC` modifier, this setting is ignored. +The delay before a table data is dropped in seconds. If the `DROP TABLE` query has a `SYNC` modifier, this setting is ignored. -Default value: `480` (8 minute). +Default value: `480` (8 minutes). ## database_catalog_unused_dir_hide_timeout_sec {#database_catalog_unused_dir_hide_timeout_sec} diff --git a/docs/en/sql-reference/statements/undrop.md b/docs/en/sql-reference/statements/undrop.md index 40ac1ab4f99..4b138bfe679 100644 --- a/docs/en/sql-reference/statements/undrop.md +++ b/docs/en/sql-reference/statements/undrop.md @@ -13,13 +13,6 @@ a system table called `system.dropped_tables`. If you have a materialized view without a `TO` clause associated with the dropped table, then you will also have to UNDROP the inner table of that view. -:::note -UNDROP TABLE is experimental. To use it add this setting: -```sql -set allow_experimental_undrop_table_query = 1; -``` -::: - :::tip Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md) ::: @@ -32,60 +25,53 @@ UNDROP TABLE [db.]name [UUID ''] [ON CLUSTER cluster] **Example** -``` sql -set allow_experimental_undrop_table_query = 1; -``` - ```sql -CREATE TABLE undropMe +CREATE TABLE tab ( `id` UInt8 ) ENGINE = MergeTree -ORDER BY id -``` +ORDER BY id; + +DROP TABLE tab; -```sql -DROP TABLE undropMe -``` -```sql SELECT * FROM system.dropped_tables -FORMAT Vertical +FORMAT Vertical; ``` + ```response Row 1: ────── index: 0 database: default -table: undropMe +table: tab uuid: aa696a1a-1d70-4e60-a841-4c80827706cc engine: MergeTree -metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.undropMe.aa696a1a-1d70-4e60-a841-4c80827706cc.sql +metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.tab.aa696a1a-1d70-4e60-a841-4c80827706cc.sql table_dropped_time: 2023-04-05 14:12:12 1 row in set. Elapsed: 0.001 sec. ``` + ```sql -UNDROP TABLE undropMe -``` -```response -Ok. -``` -```sql +UNDROP TABLE tab; + SELECT * FROM system.dropped_tables -FORMAT Vertical -``` +FORMAT Vertical; + ```response Ok. 0 rows in set. Elapsed: 0.001 sec. ``` + ```sql -DESCRIBE TABLE undropMe -FORMAT Vertical +DESCRIBE TABLE tab +FORMAT Vertical; ``` + ```response Row 1: ────── diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index a9fd5c852ba..9d9f418934f 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1143,7 +1143,7 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) TableMarkedAsDropped dropped_table; { std::lock_guard lock(tables_marked_dropped_mutex); - time_t latest_drop_time = std::numeric_limits::min(); + auto latest_drop_time = std::numeric_limits::min(); auto it_dropped_table = tables_marked_dropped.end(); for (auto it = tables_marked_dropped.begin(); it != tables_marked_dropped.end(); ++it) { @@ -1168,7 +1168,7 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id) } if (it_dropped_table == tables_marked_dropped.end()) throw Exception(ErrorCodes::UNKNOWN_TABLE, - "The drop task of table {} is in progress, has been dropped or the database engine doesn't support it", + "Table {} is being dropped, has been dropped, or the database engine does not support UNDROP", table_id.getNameForLogs()); latest_metadata_dropped_path = it_dropped_table->metadata_path; String table_metadata_path = getPathForMetadata(it_dropped_table->table_id); diff --git a/src/Interpreters/InterpreterUndropQuery.cpp b/src/Interpreters/InterpreterUndropQuery.cpp index 8401c47df6b..f628a656947 100644 --- a/src/Interpreters/InterpreterUndropQuery.cpp +++ b/src/Interpreters/InterpreterUndropQuery.cpp @@ -17,14 +17,16 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; } -InterpreterUndropQuery::InterpreterUndropQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) : WithMutableContext(context_), query_ptr(query_ptr_) +InterpreterUndropQuery::InterpreterUndropQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) + : WithMutableContext(context_) + , query_ptr(query_ptr_) { } - BlockIO InterpreterUndropQuery::execute() { getContext()->checkAccess(AccessType::UNDROP_TABLE); + auto & undrop = query_ptr->as(); if (!undrop.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext())) { diff --git a/tests/integration/test_undrop_query/test.py b/tests/integration/test_undrop_query/test.py index 590a5690e55..d57aa8c2dc7 100644 --- a/tests/integration/test_undrop_query/test.py +++ b/tests/integration/test_undrop_query/test.py @@ -29,30 +29,39 @@ def test_undrop_drop_and_undrop_loop(started_cluster): logging.info( "random_sec: " + random_sec.__str__() + ", table_uuid: " + table_uuid ) + node.query( - "create table test_undrop_loop" + "CREATE TABLE test_undrop_loop" + count.__str__() + " UUID '" + table_uuid - + "' (id Int32) Engine=MergeTree() order by id;" + + "' (id Int32) ENGINE = MergeTree() ORDER BY id;" ) - node.query("drop table test_undrop_loop" + count.__str__() + ";") + + node.query("DROP TABLE test_undrop_loop" + count.__str__() + ";") + time.sleep(random_sec) + if random_sec >= 5: error = node.query_and_get_error( - "undrop table test_undrop_loop" + "UNDROP TABLE test_undrop_loop" + count.__str__() - + " uuid '" + + " UUID '" + table_uuid + "';" ) assert "UNKNOWN_TABLE" in error - else: + elif random_sec <= 3: + # (*) node.query( - "undrop table test_undrop_loop" + "UNDROP TABLE test_undrop_loop" + count.__str__() - + " uuid '" + + " UUID '" + table_uuid + "';" ) count = count + 1 + else: + pass + # ignore random_sec = 4 to account for communication delay with the database. + # if we don't do that, then the second case (*) may find the table already dropped and receive an unexpected exception from the database (Bug #55167) diff --git a/tests/queries/0_stateless/02681_undrop_query.sql b/tests/queries/0_stateless/02681_undrop_query.sql index 66447fc6c44..d038a383690 100644 --- a/tests/queries/0_stateless/02681_undrop_query.sql +++ b/tests/queries/0_stateless/02681_undrop_query.sql @@ -85,5 +85,5 @@ drop table 02681_undrop_multiple; select table from system.dropped_tables where table = '02681_undrop_multiple' limit 1; undrop table 02681_undrop_multiple; select * from 02681_undrop_multiple order by id; -undrop table 02681_undrop_multiple; -- { serverError 57 } +undrop table 02681_undrop_multiple; -- { serverError TABLE_ALREADY_EXISTS } drop table 02681_undrop_multiple sync; From be9a77b6ca7aa35b78857bc812ccacbe1d22bfa3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 14 Mar 2024 03:54:07 +0300 Subject: [PATCH 189/283] Update SettingsChangesHistory.h --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 129a89bfa23..04b6add0e56 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -88,6 +88,7 @@ static std::map sett {"24.3", {{"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, + {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"}, }}, {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, @@ -136,7 +137,6 @@ static std::map sett {"function_visible_width_behavior", 0, 1, "We changed the default behavior of `visibleWidth` to be more precise"}, {"max_estimated_execution_time", 0, 0, "Separate max_execution_time and max_estimated_execution_time"}, {"iceberg_engine_ignore_schema_evolution", false, false, "Allow to ignore schema evolution in Iceberg table engine"}, - {"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"}, {"optimize_injective_functions_in_group_by", false, true, "Replace injective functions by it's arguments in GROUP BY section in analyzer"}, {"update_insert_deduplication_token_in_dependent_materialized_views", false, false, "Allow to update insert deduplication token with table identifier during insert in dependent materialized views"}, {"azure_max_unexpected_write_error_retries", 4, 4, "The maximum number of retries in case of unexpected errors during Azure blob storage write"}, From 0b522f5fb34a774ce6cf6092dc97d5360fde7ea6 Mon Sep 17 00:00:00 2001 From: peter279k Date: Thu, 14 Mar 2024 13:53:26 +0800 Subject: [PATCH 190/283] Fix issue #61351 --- docs/en/getting-started/example-datasets/laion.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/en/getting-started/example-datasets/laion.md b/docs/en/getting-started/example-datasets/laion.md index 0dbaceffc13..327c1796d11 100644 --- a/docs/en/getting-started/example-datasets/laion.md +++ b/docs/en/getting-started/example-datasets/laion.md @@ -10,10 +10,14 @@ The embeddings and the metadata are stored in separate files in the raw data. A converts them to CSV and imports them into ClickHouse. You can use the following `download.sh` script for that: ```bash -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy # download image embedding -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy # download text embedding -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet # download metadata -python3 process.py ${1} # merge files and convert to CSV +number=${1} +if [[ $number == '' ]]; then + number=1 +fi; +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${number}.npy # download image embedding +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${number}.npy # download text embedding +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${number}.parquet # download metadata +python3 process.py $number # merge files and convert to CSV ``` Script `process.py` is defined as follows: From ff976520389c277cfcb034466aed514fe96acc6d Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 14 Mar 2024 09:53:18 +0000 Subject: [PATCH 191/283] fix spell check --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index c7eb213bff2..1706d44bc8a 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2687,7 +2687,9 @@ userver utils uuid varPop +varPopStable varSamp +varSampStable variadic variantElement variantType From c6f0b2a5a458648c179456a6843aae8c9c3fb53a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 14 Mar 2024 11:57:03 +0100 Subject: [PATCH 192/283] Revert "Fix usage of session_token in S3 engine" --- src/Coordination/KeeperSnapshotManagerS3.cpp | 3 +-- src/Storages/StorageS3.cpp | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 796506a07db..80345db2524 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -121,8 +121,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo auth_settings.use_insecure_imds_request.value_or(false), auth_settings.expiration_window_seconds.value_or(S3::DEFAULT_EXPIRATION_WINDOW_SECONDS), auth_settings.no_sign_request.value_or(false), - }, - credentials.GetSessionToken()); + }); auto new_client = std::make_shared(std::move(new_uri), std::move(auth_settings), std::move(client)); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index ff055508aa6..11da394feec 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1451,8 +1451,7 @@ void StorageS3::Configuration::connect(const ContextPtr & context) auth_settings.expiration_window_seconds.value_or( context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), - }, - credentials.GetSessionToken()); + }); } void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection) From 56220b5105005acef0ab4b09d4f7b48c6aac8d66 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 14 Mar 2024 12:23:32 +0100 Subject: [PATCH 193/283] Revert "Revert "Fix usage of session_token in S3 engine"" --- src/Coordination/KeeperSnapshotManagerS3.cpp | 3 ++- src/Storages/StorageS3.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index 80345db2524..796506a07db 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -121,7 +121,8 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo auth_settings.use_insecure_imds_request.value_or(false), auth_settings.expiration_window_seconds.value_or(S3::DEFAULT_EXPIRATION_WINDOW_SECONDS), auth_settings.no_sign_request.value_or(false), - }); + }, + credentials.GetSessionToken()); auto new_client = std::make_shared(std::move(new_uri), std::move(auth_settings), std::move(client)); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 11da394feec..ff055508aa6 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1451,7 +1451,8 @@ void StorageS3::Configuration::connect(const ContextPtr & context) auth_settings.expiration_window_seconds.value_or( context->getConfigRef().getUInt64("s3.expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS)), auth_settings.no_sign_request.value_or(context->getConfigRef().getBool("s3.no_sign_request", false)), - }); + }, + credentials.GetSessionToken()); } void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configuration, const NamedCollection & collection) From 8fdf3ae747877b1aa0cbab3a74290f5a94edea63 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 14 Mar 2024 12:31:30 +0000 Subject: [PATCH 194/283] Add a comment. --- src/Planner/Planner.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index fde9f110d09..5624a911210 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1084,6 +1084,8 @@ void addBuildSubqueriesForSetsStepIfNeeded( auto subquery_options = select_query_options.subquery(); /// I don't know if this is a good decision, /// But for now it is done in the same way as in old analyzer. + /// This would not ignore limits for subqueries (affects mutations only). + /// See test_build_sets_from_multiple_threads-analyzer. subquery_options.ignore_limits = false; Planner subquery_planner( query_tree, From b59680911c036eca84776247b9644b9f5475e25c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 14 Mar 2024 12:32:13 +0000 Subject: [PATCH 195/283] Move `getItemsShortCircuitImpl` and `getItemsImpl` into separate files --- src/Dictionaries/DictionaryStructure.h | 27 ++ src/Dictionaries/RangeHashedDictionary.cpp | 218 ---------------- src/Dictionaries/RangeHashedDictionary.h | 13 +- src/Dictionaries/RangeHashedDictionary_2.cpp | 250 +++++++++++++++++++ src/Dictionaries/RangeHashedDictionary_3.cpp | 135 ++++++++++ utils/check-style/check-large-objects.sh | 1 + 6 files changed, 422 insertions(+), 222 deletions(-) create mode 100644 src/Dictionaries/RangeHashedDictionary_2.cpp create mode 100644 src/Dictionaries/RangeHashedDictionary_3.cpp diff --git a/src/Dictionaries/DictionaryStructure.h b/src/Dictionaries/DictionaryStructure.h index 55060b1592f..56d11be9837 100644 --- a/src/Dictionaries/DictionaryStructure.h +++ b/src/Dictionaries/DictionaryStructure.h @@ -41,6 +41,33 @@ enum class AttributeUnderlyingType : TypeIndexUnderlying #undef map_item + +#define CALL_FOR_ALL_DICTIONARY_ATTRIBUTE_TYPES(M) \ + M(UInt8) \ + M(UInt16) \ + M(UInt32) \ + M(UInt64) \ + M(UInt128) \ + M(UInt256) \ + M(Int8) \ + M(Int16) \ + M(Int32) \ + M(Int64) \ + M(Int128) \ + M(Int256) \ + M(Decimal32) \ + M(Decimal64) \ + M(Decimal128) \ + M(Decimal256) \ + M(DateTime64) \ + M(Float32) \ + M(Float64) \ + M(UUID) \ + M(IPv4) \ + M(IPv6) \ + M(String) \ + M(Array) + /// Min and max lifetimes for a dictionary or its entry using DictionaryLifetime = ExternalLoadableLifetime; diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index 203561fc23d..8299c3ad93a 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -211,224 +211,6 @@ ColumnPtr RangeHashedDictionary::getColumn( return result; } -template -template -void RangeHashedDictionary::getItemsImpl( - const Attribute & attribute, - const Columns & key_columns, - ValueSetter && set_value, - DefaultValueExtractor & default_value_extractor) const -{ - const auto & attribute_container = std::get>(attribute.container); - - size_t keys_found = 0; - - const ColumnPtr & range_column = key_columns.back(); - auto key_columns_copy = key_columns; - key_columns_copy.pop_back(); - - DictionaryKeysArenaHolder arena_holder; - DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); - const size_t keys_size = keys_extractor.getKeysSize(); - - callOnRangeType(dict_struct.range_min->type, [&](const auto & types) - { - using Types = std::decay_t; - using RangeColumnType = typename Types::LeftType; - using RangeStorageType = typename RangeColumnType::ValueType; - using RangeInterval = Interval; - - const auto * range_column_typed = typeid_cast(range_column.get()); - if (!range_column_typed) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Dictionary {} range column type should be equal to {}", - getFullName(), - dict_struct.range_min->type->getName()); - - const auto & range_column_data = range_column_typed->getData(); - - const auto & key_attribute_container = std::get>(key_attribute.container); - - for (size_t key_index = 0; key_index < keys_size; ++key_index) - { - auto key = keys_extractor.extractCurrentKey(); - const auto it = key_attribute_container.find(key); - - if (it) - { - const auto date = range_column_data[key_index]; - const auto & interval_tree = it->getMapped(); - - size_t value_index = 0; - std::optional range; - - interval_tree.find(date, [&](auto & interval, auto & interval_value_index) - { - if (range) - { - if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) - { - range = interval; - value_index = interval_value_index; - } - else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > * range) - { - range = interval; - value_index = interval_value_index; - } - } - else - { - range = interval; - value_index = interval_value_index; - } - - return true; - }); - - if (range.has_value()) - { - ++keys_found; - - AttributeType value = attribute_container[value_index]; - - if constexpr (is_nullable) - { - bool is_null = (*attribute.is_value_nullable)[value_index]; - set_value(key_index, value, is_null); - } - else - { - set_value(key_index, value, false); - } - - keys_extractor.rollbackCurrentKey(); - continue; - } - } - - if constexpr (is_nullable) - set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index)); - else - set_value(key_index, default_value_extractor[key_index], false); - - keys_extractor.rollbackCurrentKey(); - } - }); - - query_count.fetch_add(keys_size, std::memory_order_relaxed); - found_count.fetch_add(keys_found, std::memory_order_relaxed); -} - -template -template -size_t RangeHashedDictionary::getItemsShortCircuitImpl( - const Attribute & attribute, - const Columns & key_columns, - ValueSetter && set_value, - IColumn::Filter & default_mask) const -{ - const auto & attribute_container = std::get>(attribute.container); - - size_t keys_found = 0; - - const ColumnPtr & range_column = key_columns.back(); - auto key_columns_copy = key_columns; - key_columns_copy.pop_back(); - - DictionaryKeysArenaHolder arena_holder; - DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); - const size_t keys_size = keys_extractor.getKeysSize(); - default_mask.resize(keys_size); - - callOnRangeType(dict_struct.range_min->type, [&](const auto & types) - { - using Types = std::decay_t; - using RangeColumnType = typename Types::LeftType; - using RangeStorageType = typename RangeColumnType::ValueType; - using RangeInterval = Interval; - - const auto * range_column_typed = typeid_cast(range_column.get()); - if (!range_column_typed) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Dictionary {} range column type should be equal to {}", - getFullName(), - dict_struct.range_min->type->getName()); - - const auto & range_column_data = range_column_typed->getData(); - - const auto & key_attribute_container = std::get>(key_attribute.container); - - for (size_t key_index = 0; key_index < keys_size; ++key_index) - { - auto key = keys_extractor.extractCurrentKey(); - const auto it = key_attribute_container.find(key); - - if (it) - { - const auto date = range_column_data[key_index]; - const auto & interval_tree = it->getMapped(); - - size_t value_index = 0; - std::optional range; - - interval_tree.find(date, [&](auto & interval, auto & interval_value_index) - { - if (range) - { - if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) - { - range = interval; - value_index = interval_value_index; - } - else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > * range) - { - range = interval; - value_index = interval_value_index; - } - } - else - { - range = interval; - value_index = interval_value_index; - } - - return true; - }); - - if (range.has_value()) - { - default_mask[key_index] = 0; - ++keys_found; - - AttributeType value = attribute_container[value_index]; - - if constexpr (is_nullable) - { - bool is_null = (*attribute.is_value_nullable)[value_index]; - set_value(key_index, value, is_null); - } - else - { - set_value(key_index, value, false); - } - - keys_extractor.rollbackCurrentKey(); - continue; - } - } - - default_mask[key_index] = 1; - - keys_extractor.rollbackCurrentKey(); - } - }); - - query_count.fetch_add(keys_size, std::memory_order_relaxed); - found_count.fetch_add(keys_found, std::memory_order_relaxed); - return keys_found; -} - template ColumnPtr RangeHashedDictionary::getColumn( const std::string & attribute_name, diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 4a8008b9051..a5dedae97c4 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -236,18 +236,23 @@ private: static Attribute createAttribute(const DictionaryAttribute & dictionary_attribute); - template + + + template + using ValueSetterFunc = std::function; + + template void getItemsImpl( const Attribute & attribute, const Columns & key_columns, - ValueSetter && set_value, + ValueSetterFunc && set_value, DefaultValueExtractor & default_value_extractor) const; - template + template size_t getItemsShortCircuitImpl( const Attribute & attribute, const Columns & key_columns, - ValueSetter && set_value, + ValueSetterFunc && set_value, IColumn::Filter & default_mask) const; ColumnPtr getColumnInternal( diff --git a/src/Dictionaries/RangeHashedDictionary_2.cpp b/src/Dictionaries/RangeHashedDictionary_2.cpp new file mode 100644 index 00000000000..2329d621da4 --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionary_2.cpp @@ -0,0 +1,250 @@ +#include + +namespace DB +{ + + +template +template +size_t RangeHashedDictionary::getItemsShortCircuitImpl( + const Attribute & attribute, + const Columns & key_columns, + typename RangeHashedDictionary::ValueSetterFunc && set_value, + IColumn::Filter & default_mask) const +{ + const auto & attribute_container = std::get>(attribute.container); + + size_t keys_found = 0; + + const ColumnPtr & range_column = key_columns.back(); + auto key_columns_copy = key_columns; + key_columns_copy.pop_back(); + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); + const size_t keys_size = keys_extractor.getKeysSize(); + default_mask.resize(keys_size); + + callOnRangeType( + dict_struct.range_min->type, + [&](const auto & types) + { + using Types = std::decay_t; + using RangeColumnType = typename Types::LeftType; + using RangeStorageType = typename RangeColumnType::ValueType; + using RangeInterval = Interval; + + const auto * range_column_typed = typeid_cast(range_column.get()); + if (!range_column_typed) + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "Dictionary {} range column type should be equal to {}", + getFullName(), + dict_struct.range_min->type->getName()); + + const auto & range_column_data = range_column_typed->getData(); + + const auto & key_attribute_container = std::get>(key_attribute.container); + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + const auto it = key_attribute_container.find(key); + + if (it) + { + const auto date = range_column_data[key_index]; + const auto & interval_tree = it->getMapped(); + + size_t value_index = 0; + std::optional range; + + interval_tree.find( + date, + [&](auto & interval, auto & interval_value_index) + { + if (range) + { + if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) + { + range = interval; + value_index = interval_value_index; + } + else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > *range) + { + range = interval; + value_index = interval_value_index; + } + } + else + { + range = interval; + value_index = interval_value_index; + } + + return true; + }); + + if (range.has_value()) + { + default_mask[key_index] = 0; + ++keys_found; + + ValueType value = attribute_container[value_index]; + + if constexpr (is_nullable) + { + bool is_null = (*attribute.is_value_nullable)[value_index]; + set_value(key_index, value, is_null); + } + else + { + set_value(key_index, value, false); + } + + keys_extractor.rollbackCurrentKey(); + continue; + } + } + + default_mask[key_index] = 1; + + keys_extractor.rollbackCurrentKey(); + } + }); + + query_count.fetch_add(keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); + return keys_found; +} + +template +template +void RangeHashedDictionary::getItemsImpl( + const Attribute & attribute, + const Columns & key_columns, + typename RangeHashedDictionary::ValueSetterFunc && set_value, + DefaultValueExtractor & default_value_extractor) const +{ + const auto & attribute_container = std::get>(attribute.container); + + size_t keys_found = 0; + + const ColumnPtr & range_column = key_columns.back(); + auto key_columns_copy = key_columns; + key_columns_copy.pop_back(); + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); + const size_t keys_size = keys_extractor.getKeysSize(); + + callOnRangeType( + dict_struct.range_min->type, + [&](const auto & types) + { + using Types = std::decay_t; + using RangeColumnType = typename Types::LeftType; + using RangeStorageType = typename RangeColumnType::ValueType; + using RangeInterval = Interval; + + const auto * range_column_typed = typeid_cast(range_column.get()); + if (!range_column_typed) + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "Dictionary {} range column type should be equal to {}", + getFullName(), + dict_struct.range_min->type->getName()); + + const auto & range_column_data = range_column_typed->getData(); + + const auto & key_attribute_container = std::get>(key_attribute.container); + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + const auto it = key_attribute_container.find(key); + + if (it) + { + const auto date = range_column_data[key_index]; + const auto & interval_tree = it->getMapped(); + + size_t value_index = 0; + std::optional range; + + interval_tree.find( + date, + [&](auto & interval, auto & interval_value_index) + { + if (range) + { + if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) + { + range = interval; + value_index = interval_value_index; + } + else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > *range) + { + range = interval; + value_index = interval_value_index; + } + } + else + { + range = interval; + value_index = interval_value_index; + } + + return true; + }); + + if (range.has_value()) + { + ++keys_found; + + ValueType value = attribute_container[value_index]; + + if constexpr (is_nullable) + { + bool is_null = (*attribute.is_value_nullable)[value_index]; + set_value(key_index, value, is_null); + } + else + { + set_value(key_index, value, false); + } + + keys_extractor.rollbackCurrentKey(); + continue; + } + } + + if constexpr (is_nullable) + set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index)); + else + set_value(key_index, default_value_extractor[key_index], false); + + keys_extractor.rollbackCurrentKey(); + } + }); + + query_count.fetch_add(keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); +} + +#define INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType, IsNullable, ValueType) \ + template size_t RangeHashedDictionary::getItemsShortCircuitImpl( \ + const Attribute & attribute, \ + const Columns & key_columns, \ + typename RangeHashedDictionary::ValueSetterFunc && set_value, \ + IColumn::Filter & default_mask) const; + +#define INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(AttributeType) \ + INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Simple, true, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Simple, false, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Complex, true, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Complex, false, DictionaryValueType) + +CALL_FOR_ALL_DICTIONARY_ATTRIBUTE_TYPES(INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE) + +} diff --git a/src/Dictionaries/RangeHashedDictionary_3.cpp b/src/Dictionaries/RangeHashedDictionary_3.cpp new file mode 100644 index 00000000000..a3136d6f63d --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionary_3.cpp @@ -0,0 +1,135 @@ +#include + +namespace DB +{ + +template +template +void RangeHashedDictionary::getItemsImpl( + const Attribute & attribute, + const Columns & key_columns, + typename RangeHashedDictionary::ValueSetterFunc && set_value, + DefaultValueExtractor & default_value_extractor) const +{ + const auto & attribute_container = std::get>(attribute.container); + + size_t keys_found = 0; + + const ColumnPtr & range_column = key_columns.back(); + auto key_columns_copy = key_columns; + key_columns_copy.pop_back(); + + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); + const size_t keys_size = keys_extractor.getKeysSize(); + + callOnRangeType( + dict_struct.range_min->type, + [&](const auto & types) + { + using Types = std::decay_t; + using RangeColumnType = typename Types::LeftType; + using RangeStorageType = typename RangeColumnType::ValueType; + using RangeInterval = Interval; + + const auto * range_column_typed = typeid_cast(range_column.get()); + if (!range_column_typed) + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "Dictionary {} range column type should be equal to {}", + getFullName(), + dict_struct.range_min->type->getName()); + + const auto & range_column_data = range_column_typed->getData(); + + const auto & key_attribute_container = std::get>(key_attribute.container); + + for (size_t key_index = 0; key_index < keys_size; ++key_index) + { + auto key = keys_extractor.extractCurrentKey(); + const auto it = key_attribute_container.find(key); + + if (it) + { + const auto date = range_column_data[key_index]; + const auto & interval_tree = it->getMapped(); + + size_t value_index = 0; + std::optional range; + + interval_tree.find( + date, + [&](auto & interval, auto & interval_value_index) + { + if (range) + { + if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) + { + range = interval; + value_index = interval_value_index; + } + else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > *range) + { + range = interval; + value_index = interval_value_index; + } + } + else + { + range = interval; + value_index = interval_value_index; + } + + return true; + }); + + if (range.has_value()) + { + ++keys_found; + + ValueType value = attribute_container[value_index]; + + if constexpr (is_nullable) + { + bool is_null = (*attribute.is_value_nullable)[value_index]; + set_value(key_index, value, is_null); + } + else + { + set_value(key_index, value, false); + } + + keys_extractor.rollbackCurrentKey(); + continue; + } + } + + if constexpr (is_nullable) + set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index)); + else + set_value(key_index, default_value_extractor[key_index], false); + + keys_extractor.rollbackCurrentKey(); + } + }); + + query_count.fetch_add(keys_size, std::memory_order_relaxed); + found_count.fetch_add(keys_found, std::memory_order_relaxed); +} + +#define INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType, IsNullable, AttributeType, ValueType) \ +template void RangeHashedDictionary::getItemsImpl>( \ + const Attribute & attribute,\ + const Columns & key_columns,\ + typename RangeHashedDictionary::ValueSetterFunc && set_value,\ + DictionaryDefaultValueExtractor & default_value_extractor) const; + +#define INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(AttributeType) \ + INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Simple, true, AttributeType, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Simple, false, AttributeType, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Complex, true, AttributeType, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Complex, false, AttributeType, DictionaryValueType) + +CALL_FOR_ALL_DICTIONARY_ATTRIBUTE_TYPES(INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE) + +} diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh index 5b0e8e88df5..5ef57ea4f6c 100755 --- a/utils/check-style/check-large-objects.sh +++ b/utils/check-style/check-large-objects.sh @@ -6,6 +6,7 @@ TU_EXCLUDES=( CastOverloadResolver AggregateFunctionUniq FunctionsConversion + RangeHashedDictionary_ Aggregator ) From 85a79bc1cc8b9a374bfa2584dc4af87b24267cfd Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 14 Mar 2024 12:39:23 +0000 Subject: [PATCH 196/283] Fix the longest test unnecessary 3 minute wait --- .../01599_multiline_input_and_singleline_comments.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh b/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh index 7f77f8bb403..a537dce2d92 100755 --- a/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh +++ b/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh @@ -1,9 +1,10 @@ #!/usr/bin/expect -f -# Tags: no-fasttest -# Tag no-fasttest: 180 seconds running log_user 0 + +# In some places `-timeout 1`` is used to avoid expect to always wait for the whole timeout set timeout 60 + match_max 100000 if ![info exists env(CLICKHOUSE_PORT_TCP)] {set env(CLICKHOUSE_PORT_TCP) 9000} @@ -13,11 +14,11 @@ expect ":) " # Make a query send -- "SELECT 1\r" -expect ":-] " +expect -timeout 1 ":-] " send -- "-- xxx\r" -expect ":-] " +expect -timeout 1 ":-] " send -- ", 2\r" -expect ":-] " +expect -timeout 1 ":-] " send -- ";\r" expect "│ 1 │ 2 │" From 37913d94a3c9704775010cfc2f1d1fb9c705f7a9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 14 Mar 2024 13:39:50 +0100 Subject: [PATCH 197/283] Merge with master --- src/Functions/FunctionsConversion.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 70cbf31bcb3..42056067f00 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -4471,20 +4471,20 @@ arguments, result_type, input_rows_count); \ if (from_low_cardinality) { - const auto * col_low_cardinality = typeid_cast(arguments[0].column.get()); + const auto & col_low_cardinality = typeid_cast(*arguments[0].column); - if (skip_not_null_check && col_low_cardinality->containsNull()) + if (skip_not_null_check && col_low_cardinality.containsNull()) throw Exception(ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN, "Cannot convert NULL value to non-Nullable type"); - arg.column = col_low_cardinality->getDictionary().getNestedColumn(); + arg.column = col_low_cardinality.getDictionary().getNestedColumn(); arg.type = from_low_cardinality->getDictionaryType(); /// TODO: Make map with defaults conversion. src_converted_to_full_column = !removeNullable(arg.type)->equals(*removeNullable(res_type)); if (src_converted_to_full_column) - arg.column = arg.column->index(col_low_cardinality->getIndexes(), 0); + arg.column = arg.column->index(col_low_cardinality.getIndexes(), 0); else - res_indexes = col_low_cardinality->getIndexesPtr(); + res_indexes = col_low_cardinality.getIndexesPtr(); tmp_rows_count = arg.column->size(); } @@ -4496,14 +4496,12 @@ arguments, result_type, input_rows_count); \ if (to_low_cardinality) { auto res_column = to_low_cardinality->createColumn(); - auto * col_low_cardinality = typeid_cast(res_column.get()); + auto & col_low_cardinality = typeid_cast(*res_column); if (from_low_cardinality && !src_converted_to_full_column) - { - col_low_cardinality->insertRangeFromDictionaryEncodedColumn(*converted_column, *res_indexes); - } + col_low_cardinality.insertRangeFromDictionaryEncodedColumn(*converted_column, *res_indexes); else - col_low_cardinality->insertRangeFromFullColumn(*converted_column, 0, converted_column->size()); + col_low_cardinality.insertRangeFromFullColumn(*converted_column, 0, converted_column->size()); return res_column; } From 107acf54c609147acc6bf84407f168e274079505 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 14 Mar 2024 13:42:07 +0100 Subject: [PATCH 198/283] Fix tests --- tests/integration/test_storage_s3/test.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index dbbe670e8ca..6d5b84a8143 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1414,10 +1414,10 @@ def test_signatures(started_cluster): ) assert int(result) == 1 - result = instance.query( + error = instance.query_and_get_error( f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}')" ) - assert int(result) == 1 + assert "S3_ERROR" in error result = instance.query( f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'Arrow', 'x UInt64', 'auto')" @@ -1429,20 +1429,20 @@ def test_signatures(started_cluster): ) assert int(result) == 1 - result = instance.query( + error = instance.query_and_get_error( f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}', 'Arrow')" ) - assert int(result) == 1 + assert "S3_ERROR" in error - lt = instance.query( + error = instance.query_and_get_error( f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}', 'Arrow', 'x UInt64')" ) - assert int(result) == 1 + assert "S3_ERROR" in error - lt = instance.query( + error = instance.query_and_get_error( f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', '{session_token}', 'Arrow', 'x UInt64', 'auto')" ) - assert int(result) == 1 + assert "S3_ERROR" in error def test_select_columns(started_cluster): From 047fb87a9ae20dd57ba370a277cfd4a43411876d Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 14 Mar 2024 12:46:45 +0000 Subject: [PATCH 199/283] typo --- .../01599_multiline_input_and_singleline_comments.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh b/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh index a537dce2d92..f1acd39136f 100755 --- a/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh +++ b/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh @@ -2,7 +2,7 @@ log_user 0 -# In some places `-timeout 1`` is used to avoid expect to always wait for the whole timeout +# In some places `-timeout 1` is used to avoid expect to always wait for the whole timeout set timeout 60 match_max 100000 From bf9da768bfb090dec9b366d97240a2949b0e150e Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 14 Mar 2024 13:53:40 +0100 Subject: [PATCH 200/283] More detailed explanation --- docs/en/operations/storing-data.md | 42 +++++++++++++++++++++++++----- 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 4f676904375..fd81bc197d1 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -10,7 +10,11 @@ Data, processed in ClickHouse, is usually stored in the local file system — on 2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)) 3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs). -Note: to work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine, and to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine. They are different from external storage described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables. +:::note ClickHouse also has support for external table engines, which are different from external storage option described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables. +1. to work with data stored on `Amazon S3` disks, use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine. +2. to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine. +3. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine. +::: ## Configuring external storage {#configuring-external-storage} @@ -23,8 +27,9 @@ Disk configuration requires: Starting from 24.1 clickhouse version, it is possible to use a new configuration option. It requires to specify: 1. `type` equal to `object_storage` -2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`. +2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs`, `local_blob_storage` (or just `local` from `24.3`), `web`. Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web`. +Usage of `plain` metadata type is described in [plain storage section](/docs/en/operations/storing-data.md/#storing-data-on-webserver), `web` metadata type can be used only with `web` object storage type, `local` metadata type stores metadata files locally (each metadata files contains mapping to files in object storage and some additional meta information about them). E.g. configuration option ``` xml @@ -143,7 +148,7 @@ SETTINGS disk = 's3'; ## Dynamic Configuration {#dynamic-configuration} -There is also a possibility to specify storage configuration without a predefined disk in configuration in a configuration file, but can be configured in the CREATE/ATTACH query settings. +There is also a possibility to specify storage configuration without a predefined disk in configuration in a configuration file, but can be configured in the `CREATE`/`ATTACH` query settings. The following example query builds on the above dynamic disk configuration and shows how to use a local disk to cache data from a table stored at a URL. @@ -306,10 +311,35 @@ Optional parameters: Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs). ::: -### Using Plain Storage {#s3-storage} +### Using Plain Storage {#plain-storage} -There is a disk type `s3_plain`, which provides a write-once storage. Unlike `s3` disk type, it stores data as is, e.g. instead of randomly-generated blob names, it uses normal file names as clickhouse stores files on local disk. So this disk type allows to keeper a static version of the table and can also be used to create backups on it. -Configuration parameters are the same as for `s3` disk type. +In `22.10` a new disk type `s3_plain` was introduced, which provides a write-once storage. Configuration parameters are the same as for `s3` disk type. +Unlike `s3` disk type, it stores data as is, e.g. instead of randomly-generated blob names, it uses normal file names (the same way as clickhouse stores files on local disk) and does not store any metadata locally, e.g. it is derived from data on `s3`. + +This disk type allows to keep a static version of the table, as it does not allow executing merges on the existing data and does not allow inserting of new data. +A use case for this disk type is to create backups on it, which can be done via `BACKUP TABLE data TO Disk('plain_disk_name', 'backup_name')`. Afterwards you can do `RESTORE TABLE data AS data_restored FROM Disk('plain_disk_name', 'backup_name')` or using `ATTACH TABLE data (...) ENGINE = MergeTree() SETTINGS disk = 'plain_disk_name'`. + +Configuration: +``` xml + + s3_plain + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` + +Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs`, `local`) using `plain` metadata type. + +Configuration: +``` xml + + object_storage + azure + plain + https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ + 1 + +``` ### Using Azure Blob Storage {#azure-blob-storage} From c3aefb28e5eb11c171700dfe439c2371c6a3825e Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 14 Mar 2024 14:13:15 +0100 Subject: [PATCH 201/283] Remove unnecessary layers from clickhouse/cctools --- docker/packager/cctools/Dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docker/packager/cctools/Dockerfile b/docker/packager/cctools/Dockerfile index 1b8c675a5c5..d986c6a3c86 100644 --- a/docker/packager/cctools/Dockerfile +++ b/docker/packager/cctools/Dockerfile @@ -2,7 +2,7 @@ # It's based on the assumption that we don't care of the cctools version so much # It event does not depend on the clickhouse/fasttest in the `docker/images.json` ARG FROM_TAG=latest -FROM clickhouse/fasttest:$FROM_TAG +FROM clickhouse/fasttest:$FROM_TAG as builder ENV CC=clang-${LLVM_VERSION} ENV CXX=clang++-${LLVM_VERSION} @@ -29,3 +29,6 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \ && make install -j$(nproc) \ && cd ../.. \ && rm -rf cctools-port + +FROM scratch +COPY --from=builder /cctools /cctools From 9067c1ab9292de5064a3cb8547557798ace4ac99 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 14 Mar 2024 14:14:24 +0100 Subject: [PATCH 202/283] Merge with master --- src/Functions/FunctionsConversion.h | 4990 --------------------------- 1 file changed, 4990 deletions(-) delete mode 100644 src/Functions/FunctionsConversion.h diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h deleted file mode 100644 index f338af28240..00000000000 --- a/src/Functions/FunctionsConversion.h +++ /dev/null @@ -1,4990 +0,0 @@ -#pragma once - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int ATTEMPT_TO_READ_AFTER_EOF; - extern const int CANNOT_PARSE_NUMBER; - extern const int CANNOT_READ_ARRAY_FROM_TEXT; - extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED; - extern const int CANNOT_PARSE_QUOTED_STRING; - extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; - extern const int CANNOT_PARSE_DATE; - extern const int CANNOT_PARSE_DATETIME; - extern const int CANNOT_PARSE_TEXT; - extern const int CANNOT_PARSE_UUID; - extern const int CANNOT_PARSE_IPV4; - extern const int CANNOT_PARSE_IPV6; - extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; - extern const int LOGICAL_ERROR; - extern const int TYPE_MISMATCH; - extern const int CANNOT_CONVERT_TYPE; - extern const int ILLEGAL_COLUMN; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int NOT_IMPLEMENTED; - extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN; - extern const int CANNOT_PARSE_BOOL; - extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; -} - -/** Type conversion functions. - * toType - conversion in "natural way"; - */ - -inline UInt32 extractToDecimalScale(const ColumnWithTypeAndName & named_column) -{ - const auto * arg_type = named_column.type.get(); - bool ok = checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type) - || checkAndGetDataType(arg_type); - if (!ok) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of toDecimal() scale {}", named_column.type->getName()); - - Field field; - named_column.column->get(0, field); - return static_cast(field.get()); -} - -/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type. -struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; }; - -struct AccurateConvertStrategyAdditions -{ - UInt32 scale { 0 }; -}; - -struct AccurateOrNullConvertStrategyAdditions -{ - UInt32 scale { 0 }; -}; - - -struct ConvertDefaultBehaviorTag {}; -struct ConvertReturnNullOnErrorTag {}; -struct ConvertReturnZeroOnErrorTag {}; - -/** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment. - * (Date is represented internally as number of days from some day; DateTime - as unix timestamp) - */ -template -struct ConvertImpl -{ - using FromFieldType = typename FromDataType::FieldType; - using ToFieldType = typename ToDataType::FieldType; - - template - static ColumnPtr NO_SANITIZE_UNDEFINED execute( - const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type [[maybe_unused]], size_t input_rows_count, - Additions additions [[maybe_unused]] = Additions()) - { - const ColumnWithTypeAndName & named_from = arguments[0]; - - using ColVecFrom = typename FromDataType::ColumnType; - using ColVecTo = typename ToDataType::ColumnType; - - if constexpr ((IsDataTypeDecimal || IsDataTypeDecimal) - && !(std::is_same_v || std::is_same_v)) - { - if constexpr (!IsDataTypeDecimalOrNumber || !IsDataTypeDecimalOrNumber) - { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - named_from.column->getName(), Name::name); - } - } - - if (const ColVecFrom * col_from = checkAndGetColumn(named_from.column.get())) - { - typename ColVecTo::MutablePtr col_to = nullptr; - - if constexpr (IsDataTypeDecimal) - { - UInt32 scale; - - if constexpr (std::is_same_v - || std::is_same_v) - { - scale = additions.scale; - } - else - { - scale = additions; - } - - col_to = ColVecTo::create(0, scale); - } - else - col_to = ColVecTo::create(); - - const auto & vec_from = col_from->getData(); - auto & vec_to = col_to->getData(); - vec_to.resize(input_rows_count); - - ColumnUInt8::MutablePtr col_null_map_to; - ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; - if constexpr (std::is_same_v) - { - col_null_map_to = ColumnUInt8::create(input_rows_count, false); - vec_null_map_to = &col_null_map_to->getData(); - } - - bool result_is_bool = isBool(result_type); - for (size_t i = 0; i < input_rows_count; ++i) - { - if constexpr (std::is_same_v) - { - if (result_is_bool) - { - vec_to[i] = vec_from[i] != FromFieldType(0); - continue; - } - } - - if constexpr (std::is_same_v && std::is_same_v) - { - static_assert( - std::is_same_v, - "UInt128 and UUID types must be same"); - - vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; - vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; - - continue; - } - - if constexpr (std::is_same_v && std::is_same_v) - { - static_assert( - std::is_same_v, - "UInt128 and IPv6 types must be same"); - - vec_to[i].items[1] = std::byteswap(vec_from[i].toUnderType().items[0]); - vec_to[i].items[0] = std::byteswap(vec_from[i].toUnderType().items[1]); - - continue; - } - - if constexpr (std::is_same_v != std::is_same_v) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Conversion between numeric types and UUID is not supported. " - "Probably the passed UUID is unquoted"); - } - else if constexpr ( - (std::is_same_v != std::is_same_v) - && !(is_any_of || is_any_of) - ) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conversion from {} to {} is not supported", - TypeName, TypeName); - } - else if constexpr (std::is_same_v != std::is_same_v && !(std::is_same_v || std::is_same_v)) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, - "Conversion between numeric types and IPv6 is not supported. " - "Probably the passed IPv6 is unquoted"); - } - else - { - if constexpr (IsDataTypeDecimal || IsDataTypeDecimal) - { - if constexpr (std::is_same_v) - { - ToFieldType result; - bool convert_result = false; - - if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) - convert_result = tryConvertDecimals(vec_from[i], col_from->getScale(), col_to->getScale(), result); - else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) - convert_result = tryConvertFromDecimal(vec_from[i], col_from->getScale(), result); - else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) - convert_result = tryConvertToDecimal(vec_from[i], col_to->getScale(), result); - - if (convert_result) - vec_to[i] = result; - else - { - vec_to[i] = static_cast(0); - (*vec_null_map_to)[i] = true; - } - } - else - { - if constexpr (IsDataTypeDecimal && IsDataTypeDecimal) - vec_to[i] = convertDecimals(vec_from[i], col_from->getScale(), col_to->getScale()); - else if constexpr (IsDataTypeDecimal && IsDataTypeNumber) - vec_to[i] = convertFromDecimal(vec_from[i], col_from->getScale()); - else if constexpr (IsDataTypeNumber && IsDataTypeDecimal) - vec_to[i] = convertToDecimal(vec_from[i], col_to->getScale()); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unsupported data type in conversion function"); - } - } - else - { - /// If From Data is Nan or Inf and we convert to integer type, throw exception - if constexpr (std::is_floating_point_v && !std::is_floating_point_v) - { - if (!isFinite(vec_from[i])) - { - if constexpr (std::is_same_v) - { - vec_to[i] = 0; - (*vec_null_map_to)[i] = true; - continue; - } - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Unexpected inf or nan to integer conversion"); - } - } - - if constexpr (std::is_same_v - || std::is_same_v) - { - bool convert_result = accurate::convertNumeric(vec_from[i], vec_to[i]); - - if (!convert_result) - { - if (std::is_same_v) - { - vec_to[i] = 0; - (*vec_null_map_to)[i] = true; - } - else - { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Value in column {} cannot be safely converted into type {}", - named_from.column->getName(), result_type->getName()); - } - } - } - else - { - if constexpr (std::is_same_v && std::is_same_v) - { - const uint8_t ip4_cidr[] {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}; - const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); - if (!matchIPv6Subnet(src, ip4_cidr, 96)) - { - char addr[IPV6_MAX_TEXT_LENGTH + 1] {}; - char * paddr = addr; - formatIPv6(src, paddr); - - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "IPv6 {} in column {} is not in IPv4 mapping block", addr, named_from.column->getName()); - } - - uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); - if constexpr (std::endian::native == std::endian::little) - { - dst[0] = src[15]; - dst[1] = src[14]; - dst[2] = src[13]; - dst[3] = src[12]; - } - else - { - dst[0] = src[12]; - dst[1] = src[13]; - dst[2] = src[14]; - dst[3] = src[15]; - } - } - else if constexpr (std::is_same_v && std::is_same_v) - { - const uint8_t * src = reinterpret_cast(&vec_from[i].toUnderType()); - uint8_t * dst = reinterpret_cast(&vec_to[i].toUnderType()); - std::memset(dst, '\0', IPV6_BINARY_LENGTH); - dst[10] = dst[11] = 0xff; - - if constexpr (std::endian::native == std::endian::little) - { - dst[12] = src[3]; - dst[13] = src[2]; - dst[14] = src[1]; - dst[15] = src[0]; - } - else - { - dst[12] = src[0]; - dst[13] = src[1]; - dst[14] = src[2]; - dst[15] = src[3]; - } - } - else if constexpr (std::is_same_v && std::is_same_v) - vec_to[i] = static_cast(static_cast(vec_from[i])); - else if constexpr (std::is_same_v && (std::is_same_v || std::is_same_v)) - vec_to[i] = static_cast(vec_from[i] * DATE_SECONDS_PER_DAY); - else - vec_to[i] = static_cast(vec_from[i]); - } - } - } - } - - if constexpr (std::is_same_v) - return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); - else - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - named_from.column->getName(), Name::name); - } -}; - -/** Conversion of DateTime to Date: throw off time component. - */ -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -/** Conversion of DateTime to Date32: throw off time component. - */ -template -struct ConvertImpl - : DateTimeTransformImpl {}; - -/** Conversion of Date to DateTime: adding 00:00:00 time component. - */ -template -struct ToDateTimeImpl -{ - static constexpr auto name = "toDateTime"; - - static UInt32 execute(UInt16 d, const DateLUTImpl & time_zone) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (d > MAX_DATETIME_DAY_NUM) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Day number {} is out of bounds of type DateTime", d); - } - else if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) - { - if (d > MAX_DATETIME_DAY_NUM) - d = MAX_DATETIME_DAY_NUM; - } - return static_cast(time_zone.fromDayNum(DayNum(d))); - } - - static UInt32 execute(Int32 d, const DateLUTImpl & time_zone) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) - { - if (d < 0) - return 0; - else if (d > MAX_DATETIME_DAY_NUM) - d = MAX_DATETIME_DAY_NUM; - } - else if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (d < 0 || d > MAX_DATETIME_DAY_NUM) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type DateTime", d); - } - return static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); - } - - static UInt32 execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) - { - return dt; - } - - static UInt32 execute(Int64 dt64, const DateLUTImpl & /*time_zone*/) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Ignore) - return static_cast(dt64); - else - { - if (dt64 < 0 || dt64 >= MAX_DATETIME_TIMESTAMP) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Saturate) - return dt64 < 0 ? 0 : std::numeric_limits::max(); - else - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type DateTime", dt64); - } - else - return static_cast(dt64); - } - } -}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -/// Implementation of toDate function. - -template -struct ToDateTransform32Or64 -{ - static constexpr auto name = "toDate"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); - } - /// if value is smaller (or equal) than maximum day value for Date, than treat it as day num, - /// otherwise treat it as unix timestamp. This is a bit weird, but we leave this behavior. - if (from <= DATE_LUT_MAX_DAY_NUM) - return from; - else - return time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); - } -}; - -/** Conversion of Date32 to Date. - */ -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ToDateTransform32Or64Signed -{ - static constexpr auto name = "toDate"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - // TODO: decide narrow or extended range based on FromType - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < 0 || from > MAX_DATE_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); - } - else - { - if (from < 0) - return 0; - } - return (from <= DATE_LUT_MAX_DAY_NUM) - ? static_cast(from) - : time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATE_TIMESTAMP))); - } -}; - -template -struct ToDateTransform8Or16Signed -{ - static constexpr auto name = "toDate"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if (from < 0) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Value {} is out of bounds of type Date", from); - else - return 0; - } - return from; - } -}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -/// Implementation of toDate32 function. - -template -struct ToDate32Transform32Or64 -{ - static constexpr auto name = "toDate32"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - if (from < DATE_LUT_MAX_EXTEND_DAY_NUM) - return static_cast(from); - else - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type Date32", from); - } - return time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATETIME64_TIMESTAMP))); - } - } -}; - -template -struct ToDate32Transform32Or64Signed -{ - static constexpr auto name = "toDate32"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) - { - static const Int32 daynum_min_offset = -static_cast(time_zone.getDayNumOffsetEpoch()); - - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < daynum_min_offset || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type Date32", from); - } - - if (from < daynum_min_offset) - return daynum_min_offset; - - return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) - ? static_cast(from) - : time_zone.toDayNum(std::min(time_t(Int64(from)), time_t(MAX_DATETIME64_TIMESTAMP))); - } -}; - -template -struct ToDate32Transform8Or16Signed -{ - static constexpr auto name = "toDate32"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - return from; - } -}; - -/** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience, - * Float32, Float64) to Date. If the - * number is less than 65536, then it is treated as DayNum, and if it's greater or equals to 65536, - * then treated as unix timestamp. If the number exceeds UInt32, saturate to MAX_UINT32 then as DayNum. - * It's a bit illogical, as we actually have two functions in one. - * But allows to support frequent case, - * when user write toDate(UInt32), expecting conversion of unix timestamp to Date. - * (otherwise such usage would be frequent mistake). - */ -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - - -template -struct ToDateTimeTransform64 -{ - static constexpr auto name = "toDateTime"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); - } - return static_cast(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); - } -}; - -template -struct ToDateTimeTransformSigned -{ - static constexpr auto name = "toDateTime"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if (from < 0) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); - else - return 0; - } - return from; - } -}; - -template -struct ToDateTimeTransform64Signed -{ - static constexpr auto name = "toDateTime"; - - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < 0 || from > MAX_DATETIME_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime", from); - } - - if (from < 0) - return 0; - return static_cast(std::min(time_t(from), time_t(MAX_DATETIME_TIMESTAMP))); - } -}; - -/// Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to DateTime. -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -/** Conversion of numeric to DateTime64 - */ - -template -struct ToDateTime64TransformUnsigned -{ - static constexpr auto name = "toDateTime64"; - - const DateTime64::NativeType scale_multiplier = 1; - - ToDateTime64TransformUnsigned(UInt32 scale = 0) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); - else - return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); - } - else - return DecimalUtils::decimalFromComponentsWithMultiplier(std::min(from, MAX_DATETIME64_TIMESTAMP), 0, scale_multiplier); - } -}; -template -struct ToDateTime64TransformSigned -{ - static constexpr auto name = "toDateTime64"; - - const DateTime64::NativeType scale_multiplier = 1; - - ToDateTime64TransformSigned(UInt32 scale = 0) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < MIN_DATETIME64_TIMESTAMP || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); - } - from = static_cast(std::max(from, MIN_DATETIME64_TIMESTAMP)); - from = static_cast(std::min(from, MAX_DATETIME64_TIMESTAMP)); - - return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); - } -}; -template -struct ToDateTime64TransformFloat -{ - static constexpr auto name = "toDateTime64"; - - const UInt32 scale = 1; - - ToDateTime64TransformFloat(UInt32 scale_ = 0) /// NOLINT - : scale(scale_) - {} - - NO_SANITIZE_UNDEFINED DateTime64::NativeType execute(FromType from, const DateLUTImpl &) const - { - if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) - { - if (from < MIN_DATETIME64_TIMESTAMP || from > MAX_DATETIME64_TIMESTAMP) [[unlikely]] - throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Timestamp value {} is out of bounds of type DateTime64", from); - } - - from = std::max(from, static_cast(MIN_DATETIME64_TIMESTAMP)); - from = std::min(from, static_cast(MAX_DATETIME64_TIMESTAMP)); - return convertToDecimal(from, scale); - } -}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl, false> {}; - - -/** Conversion of DateTime64 to Date or DateTime: discards fractional part. - */ -template -struct FromDateTime64Transform -{ - static constexpr auto name = Transform::name; - - const DateTime64::NativeType scale_multiplier = 1; - - FromDateTime64Transform(UInt32 scale) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - auto execute(DateTime64::NativeType dt, const DateLUTImpl & time_zone) const - { - const auto c = DecimalUtils::splitWithScaleMultiplier(DateTime64(dt), scale_multiplier); - return Transform::execute(static_cast(c.whole), time_zone); - } -}; - -/** Conversion of DateTime64 to Date or DateTime: discards fractional part. - */ -template -struct ConvertImpl - : DateTimeTransformImpl>, false> {}; - -template -struct ConvertImpl - : DateTimeTransformImpl>, false> {}; - -struct ToDateTime64Transform -{ - static constexpr auto name = "toDateTime64"; - - const DateTime64::NativeType scale_multiplier = 1; - - ToDateTime64Transform(UInt32 scale = 0) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - DateTime64::NativeType execute(UInt16 d, const DateLUTImpl & time_zone) const - { - const auto dt = ToDateTimeImpl<>::execute(d, time_zone); - return execute(dt, time_zone); - } - - DateTime64::NativeType execute(Int32 d, const DateLUTImpl & time_zone) const - { - Int64 dt = static_cast(time_zone.fromDayNum(ExtendedDayNum(d))); - return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); - } - - DateTime64::NativeType execute(UInt32 dt, const DateLUTImpl & /*time_zone*/) const - { - return DecimalUtils::decimalFromComponentsWithMultiplier(dt, 0, scale_multiplier); - } -}; - -/** Conversion of Date or DateTime to DateTime64: add zero sub-second part. - */ -template -struct ConvertImpl - : DateTimeTransformImpl {}; - -template -struct ConvertImpl - : DateTimeTransformImpl {}; - -template -struct ConvertImpl - : DateTimeTransformImpl {}; - - -/** Transformation of numbers, dates, datetimes to strings: through formatting. - */ -template -struct FormatImpl -{ - template - static ReturnType execute(const typename DataType::FieldType x, WriteBuffer & wb, const DataType *, const DateLUTImpl *) - { - writeText(x, wb); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate *, const DateLUTImpl * time_zone) - { - writeDateText(DayNum(x), wb, *time_zone); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDate32::FieldType x, WriteBuffer & wb, const DataTypeDate32 *, const DateLUTImpl * time_zone) - { - writeDateText(ExtendedDayNum(x), wb, *time_zone); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDateTime::FieldType x, WriteBuffer & wb, const DataTypeDateTime *, const DateLUTImpl * time_zone) - { - writeDateTimeText(x, wb, *time_zone); - return ReturnType(true); - } -}; - -template <> -struct FormatImpl -{ - template - static ReturnType execute(const DataTypeDateTime64::FieldType x, WriteBuffer & wb, const DataTypeDateTime64 * type, const DateLUTImpl * time_zone) - { - writeDateTimeText(DateTime64(x), type->getScale(), wb, *time_zone); - return ReturnType(true); - } -}; - - -template -struct FormatImpl> -{ - template - static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeEnum * type, const DateLUTImpl *) - { - static constexpr bool throw_exception = std::is_same_v; - - if constexpr (throw_exception) - { - writeString(type->getNameForValue(x), wb); - } - else - { - StringRef res; - bool is_ok = type->getNameForValue(x, res); - if (is_ok) - writeString(res, wb); - return ReturnType(is_ok); - } - } -}; - -template -struct FormatImpl> -{ - template - static ReturnType execute(const FieldType x, WriteBuffer & wb, const DataTypeDecimal * type, const DateLUTImpl *) - { - writeText(x, type->getScale(), wb, false); - return ReturnType(true); - } -}; - - -/// DataTypeEnum to DataType free conversion -template -struct ConvertImpl, DataTypeNumber, Name, ConvertDefaultBehaviorTag> -{ - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) - { - return arguments[0].column; - } -}; - -static inline ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col) -{ - ColumnUInt8::MutablePtr null_map = nullptr; - if (const auto * col_null = checkAndGetColumn(col.get())) - { - null_map = ColumnUInt8::create(); - null_map->insertRangeFrom(col_null->getNullMapColumn(), 0, col_null->size()); - } - return null_map; -} - -template -requires (!std::is_same_v) -struct ConvertImpl -{ - using FromFieldType = typename FromDataType::FieldType; - using ColVecType = ColumnVectorOrDecimal; - - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) - { - if constexpr (IsDataTypeDateOrDateTime) - { - auto datetime_arg = arguments[0]; - - const DateLUTImpl * time_zone = nullptr; - const ColumnConst * time_zone_column = nullptr; - - if (arguments.size() == 1) - { - auto non_null_args = createBlockWithNestedColumns(arguments); - time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); - } - else /// When we have a column for timezone - { - datetime_arg.column = datetime_arg.column->convertToFullColumnIfConst(); - - if constexpr (std::is_same_v || std::is_same_v) - time_zone = &DateLUT::instance(); - /// For argument of Date or DateTime type, second argument with time zone could be specified. - if constexpr (std::is_same_v || std::is_same_v) - { - if ((time_zone_column = checkAndGetColumnConst(arguments[1].column.get()))) - { - auto non_null_args = createBlockWithNestedColumns(arguments); - time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0); - } - } - } - const auto & col_with_type_and_name = columnGetNested(datetime_arg); - - if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) - { - auto col_to = ColumnString::create(); - - const typename ColVecType::Container & vec_from = col_from->getData(); - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - - if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1)); - else if constexpr (std::is_same_v) - data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss.") + col_from->getScale() + 1)); - else - data_to.resize(size * 3); /// Arbitrary - - offsets_to.resize(size); - - WriteBufferFromVector write_buffer(data_to); - const auto & type = static_cast(*col_with_type_and_name.type); - - ColumnUInt8::MutablePtr null_map = copyNullMap(datetime_arg.column); - - if (!null_map && arguments.size() > 1) - null_map = copyNullMap(arguments[1].column->convertToFullColumnIfConst()); - - if (null_map) - { - for (size_t i = 0; i < size; ++i) - { - if (!time_zone_column && arguments.size() > 1) - { - if (!arguments[1].column.get()->getDataAt(i).toString().empty()) - time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); - } - bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); - null_map->getData()[i] |= !is_ok; - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - else - { - for (size_t i = 0; i < size; ++i) - { - if (!time_zone_column && arguments.size() > 1) - { - if (!arguments[1].column.get()->getDataAt(i).toString().empty()) - time_zone = &DateLUT::instance(arguments[1].column.get()->getDataAt(i).toString()); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty"); - } - FormatImpl::template execute(vec_from[i], write_buffer, &type, time_zone); - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - - write_buffer.finalize(); - - if (null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), Name::name); - } - else - { - ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); - - const auto & col_with_type_and_name = columnGetNested(arguments[0]); - const auto & type = static_cast(*col_with_type_and_name.type); - - if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) - { - auto col_to = ColumnString::create(); - - const typename ColVecType::Container & vec_from = col_from->getData(); - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = vec_from.size(); - - data_to.resize(size * 3); - offsets_to.resize(size); - - WriteBufferFromVector write_buffer(data_to); - - if (null_map) - { - for (size_t i = 0; i < size; ++i) - { - bool is_ok = FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); - /// We don't use timezones in this branch - null_map->getData()[i] |= !is_ok; - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - else - { - for (size_t i = 0; i < size; ++i) - { - FormatImpl::template execute(vec_from[i], write_buffer, &type, nullptr); - writeChar(0, write_buffer); - offsets_to[i] = write_buffer.count(); - } - } - - write_buffer.finalize(); - - if (null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), Name::name); - } - } -}; - - -/// Generic conversion of any type to String or FixedString via serialization to text. -template -struct ConvertImplGenericToString -{ - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) - { - static_assert(std::is_same_v || std::is_same_v, - "Can be used only to serialize to ColumnString or ColumnFixedString"); - - ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); - - const auto & col_with_type_and_name = columnGetNested(arguments[0]); - const IDataType & type = *col_with_type_and_name.type; - const IColumn & col_from = *col_with_type_and_name.column; - - size_t size = col_from.size(); - auto col_to = removeNullable(result_type)->createColumn(); - - { - ColumnStringHelpers::WriteHelper write_helper( - assert_cast(*col_to), - size); - - auto & write_buffer = write_helper.getWriteBuffer(); - - FormatSettings format_settings; - auto serialization = type.getDefaultSerialization(); - for (size_t row = 0; row < size; ++row) - { - serialization->serializeText(col_from, row, write_buffer, format_settings); - write_helper.rowWritten(); - } - - write_helper.finalize(); - } - - if (result_type->isNullable() && null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } -}; - -/** Conversion of time_t to UInt16, Int32, UInt32 - */ -template -void convertFromTime(typename DataType::FieldType & x, time_t & time) -{ - x = time; -} - -template <> -inline void convertFromTime(DataTypeDate::FieldType & x, time_t & time) -{ - if (unlikely(time < 0)) - x = 0; - else if (unlikely(time > 0xFFFF)) - x = 0xFFFF; - else - x = time; -} - -template <> -inline void convertFromTime(DataTypeDate32::FieldType & x, time_t & time) -{ - x = static_cast(time); -} - -template <> -inline void convertFromTime(DataTypeDateTime::FieldType & x, time_t & time) -{ - if (unlikely(time < 0)) - x = 0; - else if (unlikely(time > MAX_DATETIME_TIMESTAMP)) - x = MAX_DATETIME_TIMESTAMP; - else - x = static_cast(time); -} - -/** Conversion of strings to numbers, dates, datetimes: through parsing. - */ -template -void parseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool precise_float_parsing) -{ - if constexpr (std::is_floating_point_v) - { - if (precise_float_parsing) - readFloatTextPrecise(x, rb); - else - readFloatTextFast(x, rb); - } - else - readText(x, rb); -} - -template <> -inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - DayNum tmp(0); - readDateText(tmp, rb, *time_zone); - x = tmp; -} - -template <> -inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - ExtendedDayNum tmp(0); - readDateText(tmp, rb, *time_zone); - x = tmp; -} - - -// NOTE: no need of extra overload of DateTime64, since readDateTimeText64 has different signature and that case is explicitly handled in the calling code. -template <> -inline void parseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - time_t time = 0; - readDateTimeText(time, rb, *time_zone); - convertFromTime(x, time); -} - -template <> -inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - UUID tmp; - readUUIDText(tmp, rb); - x = tmp.toUnderType(); -} - -template <> -inline void parseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - IPv4 tmp; - readIPv4Text(tmp, rb); - x = tmp.toUnderType(); -} - -template <> -inline void parseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - IPv6 tmp; - readIPv6Text(tmp, rb); - x = tmp; -} - -template -bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool precise_float_parsing) -{ - if constexpr (std::is_floating_point_v) - { - if (precise_float_parsing) - return tryReadFloatTextPrecise(x, rb); - else - return tryReadFloatTextFast(x, rb); - } - else /*if constexpr (is_integer_v)*/ - return tryReadIntText(x, rb); -} - -template <> -inline bool tryParseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - DayNum tmp(0); - if (!tryReadDateText(tmp, rb, *time_zone)) - return false; - x = tmp; - return true; -} - -template <> -inline bool tryParseImpl(DataTypeDate32::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - ExtendedDayNum tmp(0); - if (!tryReadDateText(tmp, rb, *time_zone)) - return false; - x = tmp; - return true; -} - -template <> -inline bool tryParseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone, bool) -{ - time_t time = 0; - if (!tryReadDateTimeText(time, rb, *time_zone)) - return false; - convertFromTime(x, time); - return true; -} - -template <> -inline bool tryParseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - UUID tmp; - if (!tryReadUUIDText(tmp, rb)) - return false; - - x = tmp.toUnderType(); - return true; -} - -template <> -inline bool tryParseImpl(DataTypeIPv4::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - IPv4 tmp; - if (!tryReadIPv4Text(tmp, rb)) - return false; - - x = tmp.toUnderType(); - return true; -} - -template <> -inline bool tryParseImpl(DataTypeIPv6::FieldType & x, ReadBuffer & rb, const DateLUTImpl *, bool) -{ - IPv6 tmp; - if (!tryReadIPv6Text(tmp, rb)) - return false; - - x = tmp; - return true; -} - - -/** Throw exception with verbose message when string value is not parsed completely. - */ -[[noreturn]] inline void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, const IDataType & result_type) -{ - WriteBufferFromOwnString message_buf; - message_buf << "Cannot parse string " << quote << String(read_buffer.buffer().begin(), read_buffer.buffer().size()) - << " as " << result_type.getName() - << ": syntax error"; - - if (read_buffer.offset()) - message_buf << " at position " << read_buffer.offset() - << " (parsed just " << quote << String(read_buffer.buffer().begin(), read_buffer.offset()) << ")"; - else - message_buf << " at begin of string"; - - // Currently there are no functions toIPv{4,6}Or{Null,Zero} - if (isNativeNumber(result_type) && !(result_type.getName() == "IPv4" || result_type.getName() == "IPv6")) - message_buf << ". Note: there are to" << result_type.getName() << "OrZero and to" << result_type.getName() << "OrNull functions, which returns zero/NULL instead of throwing exception."; - - throw Exception(PreformattedMessage{message_buf.str(), "Cannot parse string {} as {}: syntax error {}"}, ErrorCodes::CANNOT_PARSE_TEXT); -} - - -enum class ConvertFromStringExceptionMode -{ - Throw, /// Throw exception if value cannot be parsed. - Zero, /// Fill with zero or default if value cannot be parsed. - Null /// Return ColumnNullable with NULLs when value cannot be parsed. -}; - -enum class ConvertFromStringParsingMode -{ - Normal, - BestEffort, /// Only applicable for DateTime. Will use sophisticated method, that is slower. - BestEffortUS -}; - -template -struct ConvertThroughParsing -{ - static_assert(std::is_same_v || std::is_same_v, - "ConvertThroughParsing is only applicable for String or FixedString data types"); - - static constexpr bool to_datetime64 = std::is_same_v; - - static bool isAllRead(ReadBuffer & in) - { - /// In case of FixedString, skip zero bytes at end. - if constexpr (std::is_same_v) - while (!in.eof() && *in.position() == 0) - ++in.position(); - - if (in.eof()) - return true; - - /// Special case, that allows to parse string with DateTime or DateTime64 as Date or Date32. - if constexpr (std::is_same_v || std::is_same_v) - { - if (!in.eof() && (*in.position() == ' ' || *in.position() == 'T')) - { - if (in.buffer().size() == strlen("YYYY-MM-DD hh:mm:ss")) - return true; - - if (in.buffer().size() >= strlen("YYYY-MM-DD hh:mm:ss.x") - && in.buffer().begin()[19] == '.') - { - in.position() = in.buffer().begin() + 20; - - while (!in.eof() && isNumericASCII(*in.position())) - ++in.position(); - - if (in.eof()) - return true; - } - } - } - - return false; - } - - template - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, size_t input_rows_count, - Additions additions [[maybe_unused]] = Additions()) - { - using ColVecTo = typename ToDataType::ColumnType; - - const DateLUTImpl * local_time_zone [[maybe_unused]] = nullptr; - const DateLUTImpl * utc_time_zone [[maybe_unused]] = nullptr; - - /// For conversion to Date or DateTime type, second argument with time zone could be specified. - if constexpr (std::is_same_v || to_datetime64) - { - const auto result_type = removeNullable(res_type); - // Time zone is already figured out during result type resolution, no need to do it here. - if (const auto dt_col = checkAndGetDataType(result_type.get())) - local_time_zone = &dt_col->getTimeZone(); - else - local_time_zone = &extractTimeZoneFromFunctionArguments(arguments, 1, 0); - - if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort || parsing_mode == ConvertFromStringParsingMode::BestEffortUS) - utc_time_zone = &DateLUT::instance("UTC"); - } - else if constexpr (std::is_same_v || std::is_same_v) - { - // Timezone is more or less dummy when parsing Date/Date32 from string. - local_time_zone = &DateLUT::instance(); - utc_time_zone = &DateLUT::instance("UTC"); - } - - const IColumn * col_from = arguments[0].column.get(); - const ColumnString * col_from_string = checkAndGetColumn(col_from); - const ColumnFixedString * col_from_fixed_string = checkAndGetColumn(col_from); - - if (std::is_same_v && !col_from_string) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - col_from->getName(), Name::name); - - if (std::is_same_v && !col_from_fixed_string) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - col_from->getName(), Name::name); - - size_t size = input_rows_count; - typename ColVecTo::MutablePtr col_to = nullptr; - - if constexpr (IsDataTypeDecimal) - { - UInt32 scale = additions; - if constexpr (to_datetime64) - { - ToDataType check_bounds_in_ctor(scale, local_time_zone ? local_time_zone->getTimeZone() : String{}); - } - else - { - ToDataType check_bounds_in_ctor(ToDataType::maxPrecision(), scale); - } - col_to = ColVecTo::create(size, scale); - } - else - col_to = ColVecTo::create(size); - - typename ColVecTo::Container & vec_to = col_to->getData(); - - ColumnUInt8::MutablePtr col_null_map_to; - ColumnUInt8::Container * vec_null_map_to [[maybe_unused]] = nullptr; - if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) - { - col_null_map_to = ColumnUInt8::create(size); - vec_null_map_to = &col_null_map_to->getData(); - } - - const ColumnString::Chars * chars = nullptr; - const IColumn::Offsets * offsets = nullptr; - size_t fixed_string_size = 0; - - if constexpr (std::is_same_v) - { - chars = &col_from_string->getChars(); - offsets = &col_from_string->getOffsets(); - } - else - { - chars = &col_from_fixed_string->getChars(); - fixed_string_size = col_from_fixed_string->getN(); - } - - size_t current_offset = 0; - - bool precise_float_parsing = false; - - if (DB::CurrentThread::isInitialized()) - { - const DB::ContextPtr query_context = DB::CurrentThread::get().getQueryContext(); - - if (query_context) - precise_float_parsing = query_context->getSettingsRef().precise_float_parsing; - } - - for (size_t i = 0; i < size; ++i) - { - size_t next_offset = std::is_same_v ? (*offsets)[i] : (current_offset + fixed_string_size); - size_t string_size = std::is_same_v ? next_offset - current_offset - 1 : fixed_string_size; - - ReadBufferFromMemory read_buffer(&(*chars)[current_offset], string_size); - - if constexpr (exception_mode == ConvertFromStringExceptionMode::Throw) - { - if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort) - { - if constexpr (to_datetime64) - { - DateTime64 res = 0; - parseDateTime64BestEffort(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; - } - else - { - time_t res; - parseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); - convertFromTime(vec_to[i], res); - } - } - else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) - { - if constexpr (to_datetime64) - { - DateTime64 res = 0; - parseDateTime64BestEffortUS(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; - } - else - { - time_t res; - parseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); - convertFromTime(vec_to[i], res); - } - } - else - { - if constexpr (to_datetime64) - { - DateTime64 value = 0; - readDateTime64Text(value, col_to->getScale(), read_buffer, *local_time_zone); - vec_to[i] = value; - } - else if constexpr (IsDataTypeDecimal) - { - SerializationDecimal::readText( - vec_to[i], read_buffer, ToDataType::maxPrecision(), col_to->getScale()); - } - else - { - /// we want to utilize constexpr condition here, which is not mixable with value comparison - do - { - if constexpr (std::is_same_v && std::is_same_v) - { - if (fixed_string_size == IPV6_BINARY_LENGTH) - { - readBinary(vec_to[i], read_buffer); - break; - } - } - parseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); - } while (false); - } - } - - if (!isAllRead(read_buffer)) - throwExceptionForIncompletelyParsedValue(read_buffer, *res_type); - } - else - { - bool parsed; - - if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort) - { - if constexpr (to_datetime64) - { - DateTime64 res = 0; - parsed = tryParseDateTime64BestEffort(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; - } - else - { - time_t res; - parsed = tryParseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); - convertFromTime(vec_to[i],res); - } - } - else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) - { - if constexpr (to_datetime64) - { - DateTime64 res = 0; - parsed = tryParseDateTime64BestEffortUS(res, col_to->getScale(), read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; - } - else - { - time_t res; - parsed = tryParseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); - convertFromTime(vec_to[i],res); - } - } - else - { - if constexpr (to_datetime64) - { - DateTime64 value = 0; - parsed = tryReadDateTime64Text(value, col_to->getScale(), read_buffer, *local_time_zone); - vec_to[i] = value; - } - else if constexpr (IsDataTypeDecimal) - { - parsed = SerializationDecimal::tryReadText( - vec_to[i], read_buffer, ToDataType::maxPrecision(), col_to->getScale()); - } - else - { - /// we want to utilize constexpr condition here, which is not mixable with value comparison - do - { - if constexpr (std::is_same_v && std::is_same_v) - { - if (fixed_string_size == IPV6_BINARY_LENGTH) - { - readBinary(vec_to[i], read_buffer); - parsed = true; - break; - } - } - - parsed = tryParseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); - } while (false); - } - } - - if (!isAllRead(read_buffer)) - parsed = false; - - if (!parsed) - { - if constexpr (std::is_same_v) - { - vec_to[i] = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); - } - else - { - vec_to[i] = static_cast(0); - } - } - - if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) - (*vec_null_map_to)[i] = !parsed; - } - - current_offset = next_offset; - } - - if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) - return ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); - else - return col_to; - } -}; - - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (!std::is_same_v) -struct ConvertImpl - : ConvertThroughParsing {}; - -template -requires (is_any_of && is_any_of) -struct ConvertImpl - : ConvertThroughParsing {}; - -/// Generic conversion of any type from String. Used for complex types: Array and Tuple or types with custom serialization. -template -struct ConvertImplGenericFromString -{ - static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) - { - static_assert(std::is_same_v || std::is_same_v, - "Can be used only to parse from ColumnString or ColumnFixedString"); - - const IColumn & column_from = *arguments[0].column; - const IDataType & data_type_to = *result_type; - auto res = data_type_to.createColumn(); - auto serialization = data_type_to.getDefaultSerialization(); - const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - - executeImpl(column_from, *res, *serialization, input_rows_count, null_map, result_type.get()); - return res; - } - - static void executeImpl( - const IColumn & column_from, - IColumn & column_to, - const ISerialization & serialization_from, - size_t input_rows_count, - const PaddedPODArray * null_map = nullptr, - const IDataType * result_type = nullptr) - { - static_assert(std::is_same_v || std::is_same_v, - "Can be used only to parse from ColumnString or ColumnFixedString"); - - if (const StringColumnType * col_from_string = checkAndGetColumn(&column_from)) - { - column_to.reserve(input_rows_count); - - FormatSettings format_settings; - for (size_t i = 0; i < input_rows_count; ++i) - { - if (null_map && (*null_map)[i]) - { - column_to.insertDefault(); - continue; - } - - const auto & val = col_from_string->getDataAt(i); - ReadBufferFromMemory read_buffer(val.data, val.size); - try - { - serialization_from.deserializeWholeText(column_to, read_buffer, format_settings); - } - catch (const Exception & e) - { - auto * nullable_column = typeid_cast(&column_to); - if (e.code() == ErrorCodes::CANNOT_PARSE_BOOL && nullable_column) - { - auto & col_nullmap = nullable_column->getNullMapData(); - if (col_nullmap.size() != nullable_column->size()) - col_nullmap.resize_fill(nullable_column->size()); - if (nullable_column->size() == (i + 1)) - nullable_column->popBack(1); - nullable_column->insertDefault(); - continue; - } - throw; - } - - if (!read_buffer.eof()) - { - if (result_type) - throwExceptionForIncompletelyParsedValue(read_buffer, *result_type); - else - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, - "Cannot parse string to column {}. Expected eof", column_to.getName()); - } - } - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column {} of first argument of conversion function from string", - column_from.getName()); - } - -}; - - -template <> -struct ConvertImpl - : ConvertImpl {}; - -template <> -struct ConvertImpl - : ConvertImpl {}; - -/** If types are identical, just take reference to column. - */ -template -requires (!T::is_parametric) -struct ConvertImpl -{ - template - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, - Additions additions [[maybe_unused]] = Additions()) - { - return arguments[0].column; - } -}; - -template -struct ConvertImpl -{ - template - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/, - Additions additions [[maybe_unused]] = Additions()) - { - - return arguments[0].column; - } -}; - - -/** Conversion from FixedString to String. - * Cutting sequences of zero bytes from end of strings. - */ -template -struct ConvertImpl -{ - static ColumnPtr execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type, size_t /*input_rows_count*/) - { - ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column); - const auto & nested = columnGetNested(arguments[0]); - if (const ColumnFixedString * col_from = checkAndGetColumn(nested.column.get())) - { - auto col_to = ColumnString::create(); - - const ColumnFixedString::Chars & data_from = col_from->getChars(); - ColumnString::Chars & data_to = col_to->getChars(); - ColumnString::Offsets & offsets_to = col_to->getOffsets(); - size_t size = col_from->size(); - size_t n = col_from->getN(); - data_to.resize(size * (n + 1)); /// + 1 - zero terminator - offsets_to.resize(size); - - size_t offset_from = 0; - size_t offset_to = 0; - for (size_t i = 0; i < size; ++i) - { - if (!null_map || !null_map->getData()[i]) - { - size_t bytes_to_copy = n; - while (bytes_to_copy > 0 && data_from[offset_from + bytes_to_copy - 1] == 0) - --bytes_to_copy; - - memcpy(&data_to[offset_to], &data_from[offset_from], bytes_to_copy); - offset_to += bytes_to_copy; - } - data_to[offset_to] = 0; - ++offset_to; - offsets_to[i] = offset_to; - offset_from += n; - } - - data_to.resize(offset_to); - if (return_type->isNullable() && null_map) - return ColumnNullable::create(std::move(col_to), std::move(null_map)); - return col_to; - } - else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", - arguments[0].column->getName(), Name::name); - } -}; - - -/// Declared early because used below. -struct NameToDate { static constexpr auto name = "toDate"; }; -struct NameToDate32 { static constexpr auto name = "toDate32"; }; -struct NameToDateTime { static constexpr auto name = "toDateTime"; }; -struct NameToDateTime32 { static constexpr auto name = "toDateTime32"; }; -struct NameToDateTime64 { static constexpr auto name = "toDateTime64"; }; -struct NameToString { static constexpr auto name = "toString"; }; -struct NameToDecimal32 { static constexpr auto name = "toDecimal32"; }; -struct NameToDecimal64 { static constexpr auto name = "toDecimal64"; }; -struct NameToDecimal128 { static constexpr auto name = "toDecimal128"; }; -struct NameToDecimal256 { static constexpr auto name = "toDecimal256"; }; - - -#define DEFINE_NAME_TO_INTERVAL(INTERVAL_KIND) \ - struct NameToInterval ## INTERVAL_KIND \ - { \ - static constexpr auto name = "toInterval" #INTERVAL_KIND; \ - static constexpr auto kind = IntervalKind::Kind::INTERVAL_KIND; \ - }; - -DEFINE_NAME_TO_INTERVAL(Nanosecond) -DEFINE_NAME_TO_INTERVAL(Microsecond) -DEFINE_NAME_TO_INTERVAL(Millisecond) -DEFINE_NAME_TO_INTERVAL(Second) -DEFINE_NAME_TO_INTERVAL(Minute) -DEFINE_NAME_TO_INTERVAL(Hour) -DEFINE_NAME_TO_INTERVAL(Day) -DEFINE_NAME_TO_INTERVAL(Week) -DEFINE_NAME_TO_INTERVAL(Month) -DEFINE_NAME_TO_INTERVAL(Quarter) -DEFINE_NAME_TO_INTERVAL(Year) - -#undef DEFINE_NAME_TO_INTERVAL - -struct NameParseDateTimeBestEffort; -struct NameParseDateTimeBestEffortOrZero; -struct NameParseDateTimeBestEffortOrNull; - -template -static inline bool isDateTime64(const ColumnsWithTypeAndName & arguments) -{ - if constexpr (std::is_same_v) - return true; - else if constexpr (std::is_same_v || std::is_same_v - || std::is_same_v || std::is_same_v) - { - return (arguments.size() == 2 && isUInt(arguments[1].type)) || arguments.size() == 3; - } - - return false; -} - -template -class FunctionConvert : public IFunction -{ -public: - using Monotonic = MonotonicityImpl; - - static constexpr auto name = Name::name; - static constexpr bool to_decimal = - std::is_same_v || std::is_same_v - || std::is_same_v || std::is_same_v; - - static constexpr bool to_datetime64 = std::is_same_v; - - static constexpr bool to_string_or_fixed_string = std::is_same_v || - std::is_same_v; - - static constexpr bool to_date_or_datetime = std::is_same_v || - std::is_same_v || - std::is_same_v; - - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - static FunctionPtr create() { return std::make_shared(); } - - FunctionConvert() = default; - explicit FunctionConvert(ContextPtr context_) : context(context_) {} - - String getName() const override - { - return name; - } - - bool isVariadic() const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - bool isInjective(const ColumnsWithTypeAndName &) const override { return std::is_same_v; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & arguments) const override - { - /// TODO: We can make more optimizations here. - return !(to_date_or_datetime && isNumber(*arguments[0].type)); - } - - using DefaultReturnTypeGetter = std::function; - static DataTypePtr getReturnTypeDefaultImplementationForNulls(const ColumnsWithTypeAndName & arguments, const DefaultReturnTypeGetter & getter) - { - NullPresence null_presence = getNullPresense(arguments); - - if (null_presence.has_null_constant) - { - return makeNullable(std::make_shared()); - } - if (null_presence.has_nullable) - { - auto nested_columns = Block(createBlockWithNestedColumns(arguments)); - auto return_type = getter(ColumnsWithTypeAndName(nested_columns.begin(), nested_columns.end())); - return makeNullable(return_type); - } - - return getter(arguments); - } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - auto getter = [&] (const auto & args) { return getReturnTypeImplRemovedNullable(args); }; - auto res = getReturnTypeDefaultImplementationForNulls(arguments, getter); - to_nullable = res->isNullable(); - checked_return_type = true; - return res; - } - - DataTypePtr getReturnTypeImplRemovedNullable(const ColumnsWithTypeAndName & arguments) const - { - FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; - FunctionArgumentDescriptors optional_args; - - if constexpr (to_decimal) - { - mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); - } - - if (!to_decimal && isDateTime64(arguments)) - { - mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); - } - - // toString(DateTime or DateTime64, [timezone: String]) - if ((std::is_same_v && !arguments.empty() && (isDateTime64(arguments[0].type) || isDateTime(arguments[0].type))) - // toUnixTimestamp(value[, timezone : String]) - || std::is_same_v - // toDate(value[, timezone : String]) - || std::is_same_v // TODO: shall we allow timestamp argument for toDate? DateTime knows nothing about timezones and this argument is ignored below. - // toDate32(value[, timezone : String]) - || std::is_same_v - // toDateTime(value[, timezone: String]) - || std::is_same_v - // toDateTime64(value, scale : Integer[, timezone: String]) - || std::is_same_v) - { - optional_args.push_back({"timezone", static_cast(&isString), nullptr, "String"}); - } - - validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args); - - if constexpr (std::is_same_v) - { - return std::make_shared(Name::kind); - } - else if constexpr (to_decimal) - { - UInt64 scale = extractToDecimalScale(arguments[1]); - - if constexpr (std::is_same_v) - return createDecimalMaxPrecision(scale); - else if constexpr (std::is_same_v) - return createDecimalMaxPrecision(scale); - else if constexpr (std::is_same_v) - return createDecimalMaxPrecision(scale); - else if constexpr (std::is_same_v) - return createDecimalMaxPrecision(scale); - - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected branch in code of conversion function: it is a bug."); - } - else - { - // Optional second argument with time zone for DateTime. - UInt8 timezone_arg_position = 1; - UInt32 scale [[maybe_unused]] = DataTypeDateTime64::default_scale; - - // DateTime64 requires more arguments: scale and timezone. Since timezone is optional, scale should be first. - if (isDateTime64(arguments)) - { - timezone_arg_position += 1; - scale = static_cast(arguments[1].column->get64(0)); - - if (to_datetime64 || scale != 0) /// toDateTime('xxxx-xx-xx xx:xx:xx', 0) return DateTime - return std::make_shared(scale, - extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); - - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); - } - - if constexpr (std::is_same_v) - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, timezone_arg_position, 0, false)); - else if constexpr (std::is_same_v) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected branch in code of conversion function: it is a bug."); - else - return std::make_shared(); - } - } - - /// Function actually uses default implementation for nulls, - /// but we need to know if return type is Nullable or not, - /// so we use checked_return_type only to intercept the first call to getReturnTypeImpl(...). - bool useDefaultImplementationForNulls() const override - { - bool to_nullable_string = to_nullable && std::is_same_v; - return checked_return_type && !to_nullable_string; - } - - bool useDefaultImplementationForConstants() const override { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override - { - if constexpr (std::is_same_v) - return {}; - else if constexpr (std::is_same_v) - return {2}; - return {1}; - } - bool canBeExecutedOnDefaultArguments() const override { return false; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - try - { - return executeInternal(arguments, result_type, input_rows_count); - } - catch (Exception & e) - { - /// More convenient error message. - if (e.code() == ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF) - { - e.addMessage("Cannot parse " - + result_type->getName() + " from " - + arguments[0].type->getName() - + ", because value is too short"); - } - else if (e.code() == ErrorCodes::CANNOT_PARSE_NUMBER - || e.code() == ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT - || e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED - || e.code() == ErrorCodes::CANNOT_PARSE_QUOTED_STRING - || e.code() == ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE - || e.code() == ErrorCodes::CANNOT_PARSE_DATE - || e.code() == ErrorCodes::CANNOT_PARSE_DATETIME - || e.code() == ErrorCodes::CANNOT_PARSE_UUID - || e.code() == ErrorCodes::CANNOT_PARSE_IPV4 - || e.code() == ErrorCodes::CANNOT_PARSE_IPV6) - { - e.addMessage("Cannot parse " - + result_type->getName() + " from " - + arguments[0].type->getName()); - } - - throw; - } - } - - bool hasInformationAboutMonotonicity() const override - { - return Monotonic::has(); - } - - Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override - { - return Monotonic::get(type, left, right); - } - -private: - ContextPtr context; - mutable bool checked_return_type = false; - mutable bool to_nullable = false; - - ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const - { - if (arguments.empty()) - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least 1 argument", getName()); - - if (result_type->onlyNull()) - return result_type->createColumnConstWithDefaultValue(input_rows_count); - - const DataTypePtr from_type = removeNullable(arguments[0].type); - ColumnPtr result_column; - - [[maybe_unused]] FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; - - if (context) - date_time_overflow_behavior = context->getSettingsRef().date_time_overflow_behavior.value; - - auto call = [&](const auto & types, const auto & tag) -> bool - { - using Types = std::decay_t; - using LeftDataType = typename Types::LeftType; - using RightDataType = typename Types::RightType; - using SpecialTag = std::decay_t; - - if constexpr (IsDataTypeDecimal) - { - if constexpr (std::is_same_v) - { - /// Account for optional timezone argument. - if (arguments.size() != 2 && arguments.size() != 3) - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects 2 or 3 arguments for DataTypeDateTime64.", getName()); - } - else if (arguments.size() != 2) - { - throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects 2 arguments for Decimal.", getName()); - } - - const ColumnWithTypeAndName & scale_column = arguments[1]; - UInt32 scale = extractToDecimalScale(scale_column); - - switch (date_time_overflow_behavior) - { - case FormatSettings::DateTimeOverflowBehavior::Throw: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); - break; - case FormatSettings::DateTimeOverflowBehavior::Ignore: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); - break; - case FormatSettings::DateTimeOverflowBehavior::Saturate: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); - break; - } - - } - else if constexpr (IsDataTypeDateOrDateTime && std::is_same_v) - { - const auto * dt64 = assert_cast(arguments[0].type.get()); - switch (date_time_overflow_behavior) - { - case FormatSettings::DateTimeOverflowBehavior::Throw: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); - break; - case FormatSettings::DateTimeOverflowBehavior::Ignore: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); - break; - case FormatSettings::DateTimeOverflowBehavior::Saturate: - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, dt64->getScale()); - break; - } - } -#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE) \ - case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ - result_column = ConvertImpl::execute( \ - arguments, result_type, input_rows_count); \ - break; - - else if constexpr (IsDataTypeDecimalOrNumber && IsDataTypeDecimalOrNumber) - { - using LeftT = typename LeftDataType::FieldType; - using RightT = typename RightDataType::FieldType; - - static constexpr bool bad_left = - is_decimal || std::is_floating_point_v || is_big_int_v || is_signed_v; - static constexpr bool bad_right = - is_decimal || std::is_floating_point_v || is_big_int_v || is_signed_v; - - /// Disallow int vs UUID conversion (but support int vs UInt128 conversion) - if constexpr ((bad_left && std::is_same_v) || - (bad_right && std::is_same_v)) - { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Wrong UUID conversion"); - } - else - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw) - GENERATE_OVERFLOW_MODE_CASE(Ignore) - GENERATE_OVERFLOW_MODE_CASE(Saturate) - } - } - } - else if constexpr ((IsDataTypeNumber || IsDataTypeDateOrDateTime) - && IsDataTypeDateOrDateTime) - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw) - GENERATE_OVERFLOW_MODE_CASE(Ignore) - GENERATE_OVERFLOW_MODE_CASE(Saturate) - } - } -#undef GENERATE_OVERFLOW_MODE_CASE - else - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count); - - return true; - }; - - if (isDateTime64(arguments)) - { - /// For toDateTime('xxxx-xx-xx xx:xx:xx.00', 2[, 'timezone']) we need to it convert to DateTime64 - const ColumnWithTypeAndName & scale_column = arguments[1]; - UInt32 scale = extractToDecimalScale(scale_column); - - if (to_datetime64 || scale != 0) /// When scale = 0, the data type is DateTime otherwise the data type is DateTime64 - { - if (!callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{})) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", - arguments[0].type->getName(), getName()); - - return result_column; - } - } - - if constexpr (std::is_same_v) - { - if (from_type->getCustomSerialization()) - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); - } - - bool done = false; - if constexpr (to_string_or_fixed_string) - { - done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); - } - else - { - bool cast_ipv4_ipv6_default_on_conversion_error = false; - if constexpr (is_any_of) - if (context && (cast_ipv4_ipv6_default_on_conversion_error = context->getSettingsRef().cast_ipv4_ipv6_default_on_conversion_error)) - done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertReturnZeroOnErrorTag{}); - - if (!cast_ipv4_ipv6_default_on_conversion_error) - { - /// We should use ConvertFromStringExceptionMode::Null mode when converting from String (or FixedString) - /// to Nullable type, to avoid 'value is too short' error on attempt to parse empty string from NULL values. - if (to_nullable && WhichDataType(from_type).isStringOrFixedString()) - done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertReturnNullOnErrorTag{}); - else - done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); - } - } - - if (!done) - { - /// Generic conversion of any type to String. - if (std::is_same_v) - { - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); - } - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", - arguments[0].type->getName(), getName()); - } - - return result_column; - } -}; - - -/** Function toTOrZero (where T is number of date or datetime type): - * try to convert from String to type T through parsing, - * if cannot parse, return default value instead of throwing exception. - * Function toTOrNull will return Nullable type with NULL when cannot parse. - * NOTE Also need to implement tryToUnixTimestamp with timezone. - */ -template -class FunctionConvertFromString : public IFunction -{ -public: - static constexpr auto name = Name::name; - static constexpr bool to_decimal = - std::is_same_v> || - std::is_same_v> || - std::is_same_v> || - std::is_same_v>; - - static constexpr bool to_datetime64 = std::is_same_v; - - static FunctionPtr create(ContextPtr) { return std::make_shared(); } - static FunctionPtr create() { return std::make_shared(); } - - String getName() const override - { - return name; - } - - bool isVariadic() const override { return true; } - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - size_t getNumberOfArguments() const override { return 0; } - - bool useDefaultImplementationForConstants() const override { return true; } - bool canBeExecutedOnDefaultArguments() const override { return false; } - - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - - DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override - { - DataTypePtr res; - - if (isDateTime64(arguments)) - { - validateFunctionArgumentTypes(*this, arguments, - FunctionArgumentDescriptors{{"string", static_cast(&isStringOrFixedString), nullptr, "String or FixedString"}}, - // optional - FunctionArgumentDescriptors{ - {"precision", static_cast(&isUInt8), isColumnConst, "const UInt8"}, - {"timezone", static_cast(&isStringOrFixedString), isColumnConst, "const String or FixedString"}, - }); - - UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0; - if (arguments.size() > 1) - scale = extractToDecimalScale(arguments[1]); - const auto timezone = extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, false); - - res = scale == 0 ? res = std::make_shared(timezone) : std::make_shared(scale, timezone); - } - else - { - if ((arguments.size() != 1 && arguments.size() != 2) || (to_decimal && arguments.size() != 2)) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2. " - "Second argument only make sense for DateTime (time zone, optional) and Decimal (scale).", - getName(), arguments.size()); - - if (!isStringOrFixedString(arguments[0].type)) - { - if (this->getName().find("OrZero") != std::string::npos || - this->getName().find("OrNull") != std::string::npos) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. " - "Conversion functions with postfix 'OrZero' or 'OrNull' should take String argument", - arguments[0].type->getName(), getName()); - else - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", - arguments[0].type->getName(), getName()); - } - - if (arguments.size() == 2) - { - if constexpr (std::is_same_v) - { - if (!isString(arguments[1].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}", - arguments[1].type->getName(), getName()); - } - else if constexpr (to_decimal) - { - if (!isInteger(arguments[1].type)) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}", - arguments[1].type->getName(), getName()); - if (!arguments[1].column) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant", getName()); - } - else - { - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Number of arguments for function {} doesn't match: passed {}, should be 1. " - "Second argument makes sense only for DateTime and Decimal.", - getName(), arguments.size()); - } - } - - if constexpr (std::is_same_v) - res = std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0, false)); - else if constexpr (std::is_same_v) - throw Exception(ErrorCodes::LOGICAL_ERROR, "MaterializedMySQL is a bug."); - else if constexpr (to_decimal) - { - UInt64 scale = extractToDecimalScale(arguments[1]); - res = createDecimalMaxPrecision(scale); - if (!res) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Something wrong with toDecimalNNOrZero() or toDecimalNNOrNull()"); - } - else - res = std::make_shared(); - } - - if constexpr (exception_mode == ConvertFromStringExceptionMode::Null) - res = std::make_shared(res); - - return res; - } - - template - ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, UInt32 scale = 0) const - { - const IDataType * from_type = arguments[0].type.get(); - - if (checkAndGetDataType(from_type)) - { - return ConvertThroughParsing::execute( - arguments, result_type, input_rows_count, scale); - } - else if (checkAndGetDataType(from_type)) - { - return ConvertThroughParsing::execute( - arguments, result_type, input_rows_count, scale); - } - - return nullptr; - } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - ColumnPtr result_column; - - if constexpr (to_decimal) - result_column = executeInternal(arguments, result_type, input_rows_count, - assert_cast(*removeNullable(result_type)).getScale()); - else - { - if (isDateTime64(arguments)) - { - UInt64 scale = to_datetime64 ? DataTypeDateTime64::default_scale : 0; - if (arguments.size() > 1) - scale = extractToDecimalScale(arguments[1]); - - if (scale == 0) - result_column = executeInternal(arguments, result_type, input_rows_count); - else - { - result_column = executeInternal(arguments, result_type, input_rows_count, static_cast(scale)); - } - } - else - { - result_column = executeInternal(arguments, result_type, input_rows_count); - } - } - - if (!result_column) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " - "Only String or FixedString argument is accepted for try-conversion function. For other arguments, " - "use function without 'orZero' or 'orNull'.", arguments[0].type->getName(), getName()); - - return result_column; - } -}; - - -/// Monotonicity. - -struct PositiveMonotonicity -{ - static bool has() { return true; } - static IFunction::Monotonicity get(const IDataType &, const Field &, const Field &) - { - return { .is_monotonic = true }; - } -}; - -struct UnknownMonotonicity -{ - static bool has() { return false; } - static IFunction::Monotonicity get(const IDataType &, const Field &, const Field &) - { - return { }; - } -}; - -template -struct ToNumberMonotonicity -{ - static bool has() { return true; } - - static UInt64 divideByRangeOfType(UInt64 x) - { - if constexpr (sizeof(T) < sizeof(UInt64)) - return x >> (sizeof(T) * 8); - else - return 0; - } - - static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) - { - if (!type.isValueRepresentedByNumber()) - return {}; - - /// If type is same, the conversion is always monotonic. - /// (Enum has separate case, because it is different data type) - if (checkAndGetDataType>(&type) || - checkAndGetDataType>(&type)) - return { .is_monotonic = true, .is_always_monotonic = true }; - - /// Float cases. - - /// When converting to Float, the conversion is always monotonic. - if constexpr (std::is_floating_point_v) - return { .is_monotonic = true, .is_always_monotonic = true }; - - const auto * low_cardinality = typeid_cast(&type); - const IDataType * low_cardinality_dictionary_type = nullptr; - if (low_cardinality) - low_cardinality_dictionary_type = low_cardinality->getDictionaryType().get(); - - WhichDataType which_type(type); - WhichDataType which_inner_type = low_cardinality - ? WhichDataType(low_cardinality_dictionary_type) - : WhichDataType(type); - - /// If converting from Float, for monotonicity, arguments must fit in range of result type. - if (which_inner_type.isFloat()) - { - if (left.isNull() || right.isNull()) - return {}; - - Float64 left_float = left.get(); - Float64 right_float = right.get(); - - if (left_float >= static_cast(std::numeric_limits::min()) - && left_float <= static_cast(std::numeric_limits::max()) - && right_float >= static_cast(std::numeric_limits::min()) - && right_float <= static_cast(std::numeric_limits::max())) - return { .is_monotonic = true }; - - return {}; - } - - /// Integer cases. - - /// Only support types represented by native integers. - /// It can be extended to big integers, decimals and DateTime64 later. - /// By the way, NULLs are representing unbounded ranges. - if (!((left.isNull() || left.getType() == Field::Types::UInt64 || left.getType() == Field::Types::Int64) - && (right.isNull() || right.getType() == Field::Types::UInt64 || right.getType() == Field::Types::Int64))) - return {}; - - const bool from_is_unsigned = type.isValueRepresentedByUnsignedInteger(); - const bool to_is_unsigned = is_unsigned_v; - - const size_t size_of_from = type.getSizeOfValueInMemory(); - const size_t size_of_to = sizeof(T); - - const bool left_in_first_half = left.isNull() - ? from_is_unsigned - : (left.get() >= 0); - - const bool right_in_first_half = right.isNull() - ? !from_is_unsigned - : (right.get() >= 0); - - /// Size of type is the same. - if (size_of_from == size_of_to) - { - if (from_is_unsigned == to_is_unsigned) - return { .is_monotonic = true, .is_always_monotonic = true }; - - if (left_in_first_half == right_in_first_half) - return { .is_monotonic = true }; - - return {}; - } - - /// Size of type is expanded. - if (size_of_from < size_of_to) - { - if (from_is_unsigned == to_is_unsigned) - return { .is_monotonic = true, .is_always_monotonic = true }; - - if (!to_is_unsigned) - return { .is_monotonic = true, .is_always_monotonic = true }; - - /// signed -> unsigned. If arguments from the same half, then function is monotonic. - if (left_in_first_half == right_in_first_half) - return { .is_monotonic = true }; - - return {}; - } - - /// Size of type is shrunk. - if (size_of_from > size_of_to) - { - /// Function cannot be monotonic on unbounded ranges. - if (left.isNull() || right.isNull()) - return {}; - - /// Function cannot be monotonic when left and right are not on the same ranges. - if (divideByRangeOfType(left.get()) != divideByRangeOfType(right.get())) - return {}; - - if (to_is_unsigned) - return { .is_monotonic = true }; - else - { - // If To is signed, it's possible that the signedness is different after conversion. So we check it explicitly. - const bool is_monotonic = (T(left.get()) >= 0) == (T(right.get()) >= 0); - - return { .is_monotonic = is_monotonic }; - } - } - - UNREACHABLE(); - } -}; - -struct ToDateMonotonicity -{ - static bool has() { return true; } - - static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) - { - auto which = WhichDataType(type); - if (which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() || which.isInt8() || which.isInt16() || which.isUInt8() - || which.isUInt16()) - { - return {.is_monotonic = true, .is_always_monotonic = true}; - } - else if ( - ((left.getType() == Field::Types::UInt64 || left.isNull()) && (right.getType() == Field::Types::UInt64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) - || ((left.getType() == Field::Types::Int64 || left.isNull()) && (right.getType() == Field::Types::Int64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF))) - || (( - (left.getType() == Field::Types::Float64 || left.isNull()) - && (right.getType() == Field::Types::Float64 || right.isNull()) - && ((left.isNull() || left.get() < 0xFFFF) && (right.isNull() || right.get() >= 0xFFFF)))) - || !isNativeNumber(type)) - { - return {}; - } - else - { - return {.is_monotonic = true, .is_always_monotonic = true}; - } - } -}; - -struct ToDateTimeMonotonicity -{ - static bool has() { return true; } - - static IFunction::Monotonicity get(const IDataType & type, const Field &, const Field &) - { - if (type.isValueRepresentedByNumber()) - return {.is_monotonic = true, .is_always_monotonic = true}; - else - return {}; - } -}; - -/** The monotonicity for the `toString` function is mainly determined for test purposes. - * It is doubtful that anyone is looking to optimize queries with conditions `toString(CounterID) = 34`. - */ -struct ToStringMonotonicity -{ - static bool has() { return true; } - - static IFunction::Monotonicity get(const IDataType & type, const Field & left, const Field & right) - { - IFunction::Monotonicity positive{ .is_monotonic = true }; - IFunction::Monotonicity not_monotonic; - - const auto * type_ptr = &type; - if (const auto * low_cardinality_type = checkAndGetDataType(type_ptr)) - type_ptr = low_cardinality_type->getDictionaryType().get(); - - /// Order on enum values (which is the order on integers) is completely arbitrary in respect to the order on strings. - if (WhichDataType(type).isEnum()) - return not_monotonic; - - /// `toString` function is monotonous if the argument is Date or Date32 or DateTime or String, or non-negative numbers with the same number of symbols. - if (checkDataTypes(type_ptr)) - return positive; - - if (left.isNull() || right.isNull()) - return {}; - - if (left.getType() == Field::Types::UInt64 - && right.getType() == Field::Types::UInt64) - { - return (left.get() == 0 && right.get() == 0) - || (floor(log10(left.get())) == floor(log10(right.get()))) - ? positive : not_monotonic; - } - - if (left.getType() == Field::Types::Int64 - && right.getType() == Field::Types::Int64) - { - return (left.get() == 0 && right.get() == 0) - || (left.get() > 0 && right.get() > 0 && floor(log10(left.get())) == floor(log10(right.get()))) - ? positive : not_monotonic; - } - - return not_monotonic; - } -}; - - -struct NameToUInt8 { static constexpr auto name = "toUInt8"; }; -struct NameToUInt16 { static constexpr auto name = "toUInt16"; }; -struct NameToUInt32 { static constexpr auto name = "toUInt32"; }; -struct NameToUInt64 { static constexpr auto name = "toUInt64"; }; -struct NameToUInt128 { static constexpr auto name = "toUInt128"; }; -struct NameToUInt256 { static constexpr auto name = "toUInt256"; }; -struct NameToInt8 { static constexpr auto name = "toInt8"; }; -struct NameToInt16 { static constexpr auto name = "toInt16"; }; -struct NameToInt32 { static constexpr auto name = "toInt32"; }; -struct NameToInt64 { static constexpr auto name = "toInt64"; }; -struct NameToInt128 { static constexpr auto name = "toInt128"; }; -struct NameToInt256 { static constexpr auto name = "toInt256"; }; -struct NameToFloat32 { static constexpr auto name = "toFloat32"; }; -struct NameToFloat64 { static constexpr auto name = "toFloat64"; }; -struct NameToUUID { static constexpr auto name = "toUUID"; }; -struct NameToIPv4 { static constexpr auto name = "toIPv4"; }; -struct NameToIPv6 { static constexpr auto name = "toIPv6"; }; - -using FunctionToUInt8 = FunctionConvert>; -using FunctionToUInt16 = FunctionConvert>; -using FunctionToUInt32 = FunctionConvert>; -using FunctionToUInt64 = FunctionConvert>; -using FunctionToUInt128 = FunctionConvert>; -using FunctionToUInt256 = FunctionConvert>; -using FunctionToInt8 = FunctionConvert>; -using FunctionToInt16 = FunctionConvert>; -using FunctionToInt32 = FunctionConvert>; -using FunctionToInt64 = FunctionConvert>; -using FunctionToInt128 = FunctionConvert>; -using FunctionToInt256 = FunctionConvert>; -using FunctionToFloat32 = FunctionConvert>; -using FunctionToFloat64 = FunctionConvert>; - -using FunctionToDate = FunctionConvert; - -using FunctionToDate32 = FunctionConvert; - -using FunctionToDateTime = FunctionConvert; - -using FunctionToDateTime32 = FunctionConvert; - -using FunctionToDateTime64 = FunctionConvert; - -using FunctionToUUID = FunctionConvert>; -using FunctionToIPv4 = FunctionConvert>; -using FunctionToIPv6 = FunctionConvert>; -using FunctionToString = FunctionConvert; -using FunctionToUnixTimestamp = FunctionConvert>; -using FunctionToDecimal32 = FunctionConvert, NameToDecimal32, UnknownMonotonicity>; -using FunctionToDecimal64 = FunctionConvert, NameToDecimal64, UnknownMonotonicity>; -using FunctionToDecimal128 = FunctionConvert, NameToDecimal128, UnknownMonotonicity>; -using FunctionToDecimal256 = FunctionConvert, NameToDecimal256, UnknownMonotonicity>; - -template struct FunctionTo; - -template <> struct FunctionTo { using Type = FunctionToUInt8; }; -template <> struct FunctionTo { using Type = FunctionToUInt16; }; -template <> struct FunctionTo { using Type = FunctionToUInt32; }; -template <> struct FunctionTo { using Type = FunctionToUInt64; }; -template <> struct FunctionTo { using Type = FunctionToUInt128; }; -template <> struct FunctionTo { using Type = FunctionToUInt256; }; -template <> struct FunctionTo { using Type = FunctionToInt8; }; -template <> struct FunctionTo { using Type = FunctionToInt16; }; -template <> struct FunctionTo { using Type = FunctionToInt32; }; -template <> struct FunctionTo { using Type = FunctionToInt64; }; -template <> struct FunctionTo { using Type = FunctionToInt128; }; -template <> struct FunctionTo { using Type = FunctionToInt256; }; -template <> struct FunctionTo { using Type = FunctionToFloat32; }; -template <> struct FunctionTo { using Type = FunctionToFloat64; }; - -template -struct FunctionTo { using Type = FunctionToDate; }; - -template -struct FunctionTo { using Type = FunctionToDate32; }; - -template -struct FunctionTo { using Type = FunctionToDateTime; }; - -template -struct FunctionTo { using Type = FunctionToDateTime64; }; - -template <> struct FunctionTo { using Type = FunctionToUUID; }; -template <> struct FunctionTo { using Type = FunctionToIPv4; }; -template <> struct FunctionTo { using Type = FunctionToIPv6; }; -template <> struct FunctionTo { using Type = FunctionToString; }; -template <> struct FunctionTo { using Type = FunctionToFixedString; }; -template <> struct FunctionTo> { using Type = FunctionToDecimal32; }; -template <> struct FunctionTo> { using Type = FunctionToDecimal64; }; -template <> struct FunctionTo> { using Type = FunctionToDecimal128; }; -template <> struct FunctionTo> { using Type = FunctionToDecimal256; }; - -template struct FunctionTo> - : FunctionTo> -{ -}; - -struct NameToUInt8OrZero { static constexpr auto name = "toUInt8OrZero"; }; -struct NameToUInt16OrZero { static constexpr auto name = "toUInt16OrZero"; }; -struct NameToUInt32OrZero { static constexpr auto name = "toUInt32OrZero"; }; -struct NameToUInt64OrZero { static constexpr auto name = "toUInt64OrZero"; }; -struct NameToUInt128OrZero { static constexpr auto name = "toUInt128OrZero"; }; -struct NameToUInt256OrZero { static constexpr auto name = "toUInt256OrZero"; }; -struct NameToInt8OrZero { static constexpr auto name = "toInt8OrZero"; }; -struct NameToInt16OrZero { static constexpr auto name = "toInt16OrZero"; }; -struct NameToInt32OrZero { static constexpr auto name = "toInt32OrZero"; }; -struct NameToInt64OrZero { static constexpr auto name = "toInt64OrZero"; }; -struct NameToInt128OrZero { static constexpr auto name = "toInt128OrZero"; }; -struct NameToInt256OrZero { static constexpr auto name = "toInt256OrZero"; }; -struct NameToFloat32OrZero { static constexpr auto name = "toFloat32OrZero"; }; -struct NameToFloat64OrZero { static constexpr auto name = "toFloat64OrZero"; }; -struct NameToDateOrZero { static constexpr auto name = "toDateOrZero"; }; -struct NameToDate32OrZero { static constexpr auto name = "toDate32OrZero"; }; -struct NameToDateTimeOrZero { static constexpr auto name = "toDateTimeOrZero"; }; -struct NameToDateTime64OrZero { static constexpr auto name = "toDateTime64OrZero"; }; -struct NameToDecimal32OrZero { static constexpr auto name = "toDecimal32OrZero"; }; -struct NameToDecimal64OrZero { static constexpr auto name = "toDecimal64OrZero"; }; -struct NameToDecimal128OrZero { static constexpr auto name = "toDecimal128OrZero"; }; -struct NameToDecimal256OrZero { static constexpr auto name = "toDecimal256OrZero"; }; -struct NameToUUIDOrZero { static constexpr auto name = "toUUIDOrZero"; }; -struct NameToIPv4OrZero { static constexpr auto name = "toIPv4OrZero"; }; -struct NameToIPv6OrZero { static constexpr auto name = "toIPv6OrZero"; }; - -using FunctionToUInt8OrZero = FunctionConvertFromString; -using FunctionToUInt16OrZero = FunctionConvertFromString; -using FunctionToUInt32OrZero = FunctionConvertFromString; -using FunctionToUInt64OrZero = FunctionConvertFromString; -using FunctionToUInt128OrZero = FunctionConvertFromString; -using FunctionToUInt256OrZero = FunctionConvertFromString; -using FunctionToInt8OrZero = FunctionConvertFromString; -using FunctionToInt16OrZero = FunctionConvertFromString; -using FunctionToInt32OrZero = FunctionConvertFromString; -using FunctionToInt64OrZero = FunctionConvertFromString; -using FunctionToInt128OrZero = FunctionConvertFromString; -using FunctionToInt256OrZero = FunctionConvertFromString; -using FunctionToFloat32OrZero = FunctionConvertFromString; -using FunctionToFloat64OrZero = FunctionConvertFromString; -using FunctionToDateOrZero = FunctionConvertFromString; -using FunctionToDate32OrZero = FunctionConvertFromString; -using FunctionToDateTimeOrZero = FunctionConvertFromString; -using FunctionToDateTime64OrZero = FunctionConvertFromString; -using FunctionToDecimal32OrZero = FunctionConvertFromString, NameToDecimal32OrZero, ConvertFromStringExceptionMode::Zero>; -using FunctionToDecimal64OrZero = FunctionConvertFromString, NameToDecimal64OrZero, ConvertFromStringExceptionMode::Zero>; -using FunctionToDecimal128OrZero = FunctionConvertFromString, NameToDecimal128OrZero, ConvertFromStringExceptionMode::Zero>; -using FunctionToDecimal256OrZero = FunctionConvertFromString, NameToDecimal256OrZero, ConvertFromStringExceptionMode::Zero>; -using FunctionToUUIDOrZero = FunctionConvertFromString; -using FunctionToIPv4OrZero = FunctionConvertFromString; -using FunctionToIPv6OrZero = FunctionConvertFromString; - -struct NameToUInt8OrNull { static constexpr auto name = "toUInt8OrNull"; }; -struct NameToUInt16OrNull { static constexpr auto name = "toUInt16OrNull"; }; -struct NameToUInt32OrNull { static constexpr auto name = "toUInt32OrNull"; }; -struct NameToUInt64OrNull { static constexpr auto name = "toUInt64OrNull"; }; -struct NameToUInt128OrNull { static constexpr auto name = "toUInt128OrNull"; }; -struct NameToUInt256OrNull { static constexpr auto name = "toUInt256OrNull"; }; -struct NameToInt8OrNull { static constexpr auto name = "toInt8OrNull"; }; -struct NameToInt16OrNull { static constexpr auto name = "toInt16OrNull"; }; -struct NameToInt32OrNull { static constexpr auto name = "toInt32OrNull"; }; -struct NameToInt64OrNull { static constexpr auto name = "toInt64OrNull"; }; -struct NameToInt128OrNull { static constexpr auto name = "toInt128OrNull"; }; -struct NameToInt256OrNull { static constexpr auto name = "toInt256OrNull"; }; -struct NameToFloat32OrNull { static constexpr auto name = "toFloat32OrNull"; }; -struct NameToFloat64OrNull { static constexpr auto name = "toFloat64OrNull"; }; -struct NameToDateOrNull { static constexpr auto name = "toDateOrNull"; }; -struct NameToDate32OrNull { static constexpr auto name = "toDate32OrNull"; }; -struct NameToDateTimeOrNull { static constexpr auto name = "toDateTimeOrNull"; }; -struct NameToDateTime64OrNull { static constexpr auto name = "toDateTime64OrNull"; }; -struct NameToDecimal32OrNull { static constexpr auto name = "toDecimal32OrNull"; }; -struct NameToDecimal64OrNull { static constexpr auto name = "toDecimal64OrNull"; }; -struct NameToDecimal128OrNull { static constexpr auto name = "toDecimal128OrNull"; }; -struct NameToDecimal256OrNull { static constexpr auto name = "toDecimal256OrNull"; }; -struct NameToUUIDOrNull { static constexpr auto name = "toUUIDOrNull"; }; -struct NameToIPv4OrNull { static constexpr auto name = "toIPv4OrNull"; }; -struct NameToIPv6OrNull { static constexpr auto name = "toIPv6OrNull"; }; - -using FunctionToUInt8OrNull = FunctionConvertFromString; -using FunctionToUInt16OrNull = FunctionConvertFromString; -using FunctionToUInt32OrNull = FunctionConvertFromString; -using FunctionToUInt64OrNull = FunctionConvertFromString; -using FunctionToUInt128OrNull = FunctionConvertFromString; -using FunctionToUInt256OrNull = FunctionConvertFromString; -using FunctionToInt8OrNull = FunctionConvertFromString; -using FunctionToInt16OrNull = FunctionConvertFromString; -using FunctionToInt32OrNull = FunctionConvertFromString; -using FunctionToInt64OrNull = FunctionConvertFromString; -using FunctionToInt128OrNull = FunctionConvertFromString; -using FunctionToInt256OrNull = FunctionConvertFromString; -using FunctionToFloat32OrNull = FunctionConvertFromString; -using FunctionToFloat64OrNull = FunctionConvertFromString; -using FunctionToDateOrNull = FunctionConvertFromString; -using FunctionToDate32OrNull = FunctionConvertFromString; -using FunctionToDateTimeOrNull = FunctionConvertFromString; -using FunctionToDateTime64OrNull = FunctionConvertFromString; -using FunctionToDecimal32OrNull = FunctionConvertFromString, NameToDecimal32OrNull, ConvertFromStringExceptionMode::Null>; -using FunctionToDecimal64OrNull = FunctionConvertFromString, NameToDecimal64OrNull, ConvertFromStringExceptionMode::Null>; -using FunctionToDecimal128OrNull = FunctionConvertFromString, NameToDecimal128OrNull, ConvertFromStringExceptionMode::Null>; -using FunctionToDecimal256OrNull = FunctionConvertFromString, NameToDecimal256OrNull, ConvertFromStringExceptionMode::Null>; -using FunctionToUUIDOrNull = FunctionConvertFromString; -using FunctionToIPv4OrNull = FunctionConvertFromString; -using FunctionToIPv6OrNull = FunctionConvertFromString; - -struct NameParseDateTimeBestEffort { static constexpr auto name = "parseDateTimeBestEffort"; }; -struct NameParseDateTimeBestEffortOrZero { static constexpr auto name = "parseDateTimeBestEffortOrZero"; }; -struct NameParseDateTimeBestEffortOrNull { static constexpr auto name = "parseDateTimeBestEffortOrNull"; }; -struct NameParseDateTimeBestEffortUS { static constexpr auto name = "parseDateTimeBestEffortUS"; }; -struct NameParseDateTimeBestEffortUSOrZero { static constexpr auto name = "parseDateTimeBestEffortUSOrZero"; }; -struct NameParseDateTimeBestEffortUSOrNull { static constexpr auto name = "parseDateTimeBestEffortUSOrNull"; }; -struct NameParseDateTime32BestEffort { static constexpr auto name = "parseDateTime32BestEffort"; }; -struct NameParseDateTime32BestEffortOrZero { static constexpr auto name = "parseDateTime32BestEffortOrZero"; }; -struct NameParseDateTime32BestEffortOrNull { static constexpr auto name = "parseDateTime32BestEffortOrNull"; }; -struct NameParseDateTime64BestEffort { static constexpr auto name = "parseDateTime64BestEffort"; }; -struct NameParseDateTime64BestEffortOrZero { static constexpr auto name = "parseDateTime64BestEffortOrZero"; }; -struct NameParseDateTime64BestEffortOrNull { static constexpr auto name = "parseDateTime64BestEffortOrNull"; }; -struct NameParseDateTime64BestEffortUS { static constexpr auto name = "parseDateTime64BestEffortUS"; }; -struct NameParseDateTime64BestEffortUSOrZero { static constexpr auto name = "parseDateTime64BestEffortUSOrZero"; }; -struct NameParseDateTime64BestEffortUSOrNull { static constexpr auto name = "parseDateTime64BestEffortUSOrNull"; }; - - -using FunctionParseDateTimeBestEffort = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTimeBestEffortOrZero = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTimeBestEffortOrNull = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; - -using FunctionParseDateTimeBestEffortUS = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortUS, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffortUS>; -using FunctionParseDateTimeBestEffortUSOrZero = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortUSOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffortUS>; -using FunctionParseDateTimeBestEffortUSOrNull = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTimeBestEffortUSOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffortUS>; - -using FunctionParseDateTime32BestEffort = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTime32BestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTime32BestEffortOrZero = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTime32BestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTime32BestEffortOrNull = FunctionConvertFromString< - DataTypeDateTime, NameParseDateTime32BestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; - -using FunctionParseDateTime64BestEffort = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffort, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTime64BestEffortOrZero = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffort>; -using FunctionParseDateTime64BestEffortOrNull = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffort>; - -using FunctionParseDateTime64BestEffortUS = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortUS, ConvertFromStringExceptionMode::Throw, ConvertFromStringParsingMode::BestEffortUS>; -using FunctionParseDateTime64BestEffortUSOrZero = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortUSOrZero, ConvertFromStringExceptionMode::Zero, ConvertFromStringParsingMode::BestEffortUS>; -using FunctionParseDateTime64BestEffortUSOrNull = FunctionConvertFromString< - DataTypeDateTime64, NameParseDateTime64BestEffortUSOrNull, ConvertFromStringExceptionMode::Null, ConvertFromStringParsingMode::BestEffortUS>; - - -class ExecutableFunctionCast : public IExecutableFunction -{ -public: - using WrapperType = std::function; - - explicit ExecutableFunctionCast( - WrapperType && wrapper_function_, const char * name_, std::optional diagnostic_) - : wrapper_function(std::move(wrapper_function_)), name(name_), diagnostic(std::move(diagnostic_)) {} - - String getName() const override { return name; } - -protected: - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override - { - /// drop second argument, pass others - ColumnsWithTypeAndName new_arguments{arguments.front()}; - if (arguments.size() > 2) - new_arguments.insert(std::end(new_arguments), std::next(std::begin(arguments), 2), std::end(arguments)); - - try - { - return wrapper_function(new_arguments, result_type, nullptr, input_rows_count); - } - catch (Exception & e) - { - if (diagnostic) - e.addMessage("while converting source column " + backQuoteIfNeed(diagnostic->column_from) + - " to destination column " + backQuoteIfNeed(diagnostic->column_to)); - throw; - } - } - - bool useDefaultImplementationForNulls() const override { return false; } - /// CAST(Nothing, T) -> T - bool useDefaultImplementationForNothing() const override { return false; } - bool useDefaultImplementationForConstants() const override { return true; } - bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } - -private: - WrapperType wrapper_function; - const char * name; - std::optional diagnostic; -}; - -struct CastName { static constexpr auto name = "CAST"; }; -struct CastInternalName { static constexpr auto name = "_CAST"; }; - -class FunctionCastBase : public IFunctionBase -{ -public: - using MonotonicityForRange = std::function; -}; - -template -class FunctionCast final : public FunctionCastBase -{ -public: - using WrapperType = std::function; - - FunctionCast(ContextPtr context_ - , const char * cast_name_ - , MonotonicityForRange && monotonicity_for_range_ - , const DataTypes & argument_types_ - , const DataTypePtr & return_type_ - , std::optional diagnostic_ - , CastType cast_type_) - : cast_name(cast_name_), monotonicity_for_range(std::move(monotonicity_for_range_)) - , argument_types(argument_types_), return_type(return_type_), diagnostic(std::move(diagnostic_)) - , cast_type(cast_type_) - , context(context_) - { - } - - const DataTypes & getArgumentTypes() const override { return argument_types; } - const DataTypePtr & getResultType() const override { return return_type; } - - ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName & /*sample_columns*/) const override - { - try - { - return std::make_unique( - prepareUnpackDictionaries(getArgumentTypes()[0], getResultType()), cast_name, diagnostic); - } - catch (Exception & e) - { - if (diagnostic) - e.addMessage("while converting source column " + backQuoteIfNeed(diagnostic->column_from) + - " to destination column " + backQuoteIfNeed(diagnostic->column_to)); - throw; - } - } - - String getName() const override { return cast_name; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - bool hasInformationAboutMonotonicity() const override - { - return static_cast(monotonicity_for_range); - } - - Monotonicity getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const override - { - return monotonicity_for_range(type, left, right); - } - -private: - - const char * cast_name; - MonotonicityForRange monotonicity_for_range; - - DataTypes argument_types; - DataTypePtr return_type; - - std::optional diagnostic; - CastType cast_type; - ContextPtr context; - - static WrapperType createFunctionAdaptor(FunctionPtr function, const DataTypePtr & from_type) - { - auto function_adaptor = std::make_unique(function)->build({ColumnWithTypeAndName{nullptr, from_type, ""}}); - - return [function_adaptor] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) - { - return function_adaptor->execute(arguments, result_type, input_rows_count); - }; - } - - static WrapperType createToNullableColumnWrapper() - { - return [] (ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) - { - ColumnPtr res = result_type->createColumn(); - ColumnUInt8::Ptr col_null_map_to = ColumnUInt8::create(input_rows_count, true); - return ColumnNullable::create(res->cloneResized(input_rows_count), std::move(col_null_map_to)); - }; - } - - template - WrapperType createWrapper(const DataTypePtr & from_type, const ToDataType * const to_type, bool requested_result_is_nullable) const - { - TypeIndex from_type_index = from_type->getTypeId(); - WhichDataType which(from_type_index); - bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull) - && (which.isInt() || which.isUInt() || which.isFloat()); - - FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior; - if (context) - date_time_overflow_behavior = context->getSettingsRef().date_time_overflow_behavior; - - if (requested_result_is_nullable && checkAndGetDataType(from_type.get())) - { - /// In case when converting to Nullable type, we apply different parsing rule, - /// that will not throw an exception but return NULL in case of malformed input. - FunctionPtr function = FunctionConvertFromString::create(); - return createFunctionAdaptor(function, from_type); - } - else if (!can_apply_accurate_cast) - { - FunctionPtr function = FunctionTo::Type::create(context); - return createFunctionAdaptor(function, from_type); - } - - auto wrapper_cast_type = cast_type; - - return [wrapper_cast_type, from_type_index, to_type, date_time_overflow_behavior] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *column_nullable, size_t input_rows_count) - { - ColumnPtr result_column; - auto res = callOnIndexAndDataType(from_type_index, [&](const auto & types) -> bool { - using Types = std::decay_t; - using LeftDataType = typename Types::LeftType; - using RightDataType = typename Types::RightType; - - if constexpr (IsDataTypeNumber) - { - if constexpr (IsDataTypeNumber) - { -#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE, ADDITIONS) \ - case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ - result_column = ConvertImpl::execute( \ - arguments, result_type, input_rows_count, ADDITIONS()); \ - break; - if (wrapper_cast_type == CastType::accurate) - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw, AccurateConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Ignore, AccurateConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Saturate, AccurateConvertStrategyAdditions) - } - } - else - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw, AccurateOrNullConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Ignore, AccurateOrNullConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Saturate, AccurateOrNullConvertStrategyAdditions) - } - } -#undef GENERATE_OVERFLOW_MODE_CASE - - return true; - } - - if constexpr (std::is_same_v || std::is_same_v) - { -#define GENERATE_OVERFLOW_MODE_CASE(OVERFLOW_MODE, ADDITIONS) \ - case FormatSettings::DateTimeOverflowBehavior::OVERFLOW_MODE: \ - result_column = ConvertImpl::template execute( \ -arguments, result_type, input_rows_count); \ - break; - if (wrapper_cast_type == CastType::accurate) - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw, DateTimeAccurateConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Ignore, DateTimeAccurateConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Saturate, DateTimeAccurateConvertStrategyAdditions) - } - } - else - { - switch (date_time_overflow_behavior) - { - GENERATE_OVERFLOW_MODE_CASE(Throw, DateTimeAccurateOrNullConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Ignore, DateTimeAccurateOrNullConvertStrategyAdditions) - GENERATE_OVERFLOW_MODE_CASE(Saturate, DateTimeAccurateOrNullConvertStrategyAdditions) - } - } -#undef GENERATE_OVERFLOW_MODE_CASE - return true; - } - } - - return false; - }); - - /// Additionally check if callOnIndexAndDataType wasn't called at all. - if (!res) - { - if (wrapper_cast_type == CastType::accurateOrNull) - { - auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper(); - return nullable_column_wrapper(arguments, result_type, column_nullable, input_rows_count); - } - else - { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, - "Conversion from {} to {} is not supported", - from_type_index, to_type->getName()); - } - } - - return result_column; - }; - } - - template - WrapperType createBoolWrapper(const DataTypePtr & from_type, const ToDataType * const to_type, bool requested_result_is_nullable) const - { - if (checkAndGetDataType(from_type.get())) - { - return &ConvertImplGenericFromString::execute; - } - - return createWrapper(from_type, to_type, requested_result_is_nullable); - } - - WrapperType createUInt8ToBoolWrapper(const DataTypePtr from_type, const DataTypePtr to_type) const - { - return [from_type, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) -> ColumnPtr - { - /// Special case when we convert UInt8 column to Bool column. - /// both columns have type UInt8, but we shouldn't use identity wrapper, - /// because Bool column can contain only 0 and 1. - auto res_column = to_type->createColumn(); - const auto & data_from = checkAndGetColumn(arguments[0].column.get())->getData(); - auto & data_to = assert_cast(res_column.get())->getData(); - data_to.resize(data_from.size()); - for (size_t i = 0; i != data_from.size(); ++i) - data_to[i] = static_cast(data_from[i]); - return res_column; - }; - } - - static WrapperType createStringWrapper(const DataTypePtr & from_type) - { - FunctionPtr function = FunctionToString::create(); - return createFunctionAdaptor(function, from_type); - } - - WrapperType createFixedStringWrapper(const DataTypePtr & from_type, const size_t N) const - { - if (!isStringOrFixedString(from_type)) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CAST AS FixedString is only implemented for types String and FixedString"); - - bool exception_mode_null = cast_type == CastType::accurateOrNull; - return [exception_mode_null, N] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) - { - if (exception_mode_null) - return FunctionToFixedString::executeForN(arguments, N); - else - return FunctionToFixedString::executeForN(arguments, N); - }; - } - -#define GENERATE_INTERVAL_CASE(INTERVAL_KIND) \ - case IntervalKind::Kind::INTERVAL_KIND: \ - return createFunctionAdaptor(FunctionConvert::create(), from_type); - - static WrapperType createIntervalWrapper(const DataTypePtr & from_type, IntervalKind kind) - { - switch (kind) - { - GENERATE_INTERVAL_CASE(Nanosecond) - GENERATE_INTERVAL_CASE(Microsecond) - GENERATE_INTERVAL_CASE(Millisecond) - GENERATE_INTERVAL_CASE(Second) - GENERATE_INTERVAL_CASE(Minute) - GENERATE_INTERVAL_CASE(Hour) - GENERATE_INTERVAL_CASE(Day) - GENERATE_INTERVAL_CASE(Week) - GENERATE_INTERVAL_CASE(Month) - GENERATE_INTERVAL_CASE(Quarter) - GENERATE_INTERVAL_CASE(Year) - } - throw Exception{ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion to unexpected IntervalKind: {}", kind.toString()}; - } - -#undef GENERATE_INTERVAL_CASE - - template - requires IsDataTypeDecimal - WrapperType createDecimalWrapper(const DataTypePtr & from_type, const ToDataType * to_type, bool requested_result_is_nullable) const - { - TypeIndex type_index = from_type->getTypeId(); - UInt32 scale = to_type->getScale(); - - WhichDataType which(type_index); - bool ok = which.isNativeInt() || which.isNativeUInt() || which.isDecimal() || which.isFloat() || which.isDateOrDate32() || which.isDateTime() || which.isDateTime64() - || which.isStringOrFixedString(); - if (!ok) - { - if (cast_type == CastType::accurateOrNull) - return createToNullableColumnWrapper(); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", - from_type->getName(), to_type->getName()); - } - - auto wrapper_cast_type = cast_type; - - return [wrapper_cast_type, type_index, scale, to_type, requested_result_is_nullable] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *column_nullable, size_t input_rows_count) - { - ColumnPtr result_column; - auto res = callOnIndexAndDataType(type_index, [&](const auto & types) -> bool - { - using Types = std::decay_t; - using LeftDataType = typename Types::LeftType; - using RightDataType = typename Types::RightType; - - if constexpr (IsDataTypeDecimalOrNumber && IsDataTypeDecimalOrNumber && !std::is_same_v) - { - if (wrapper_cast_type == CastType::accurate) - { - AccurateConvertStrategyAdditions additions; - additions.scale = scale; - result_column = ConvertImpl::execute( - arguments, result_type, input_rows_count, additions); - - return true; - } - else if (wrapper_cast_type == CastType::accurateOrNull) - { - AccurateOrNullConvertStrategyAdditions additions; - additions.scale = scale; - result_column = ConvertImpl::execute( - arguments, result_type, input_rows_count, additions); - - return true; - } - } - else if constexpr (std::is_same_v) - { - if (requested_result_is_nullable) - { - /// Consistent with CAST(Nullable(String) AS Nullable(Numbers)) - /// In case when converting to Nullable type, we apply different parsing rule, - /// that will not throw an exception but return NULL in case of malformed input. - result_column = ConvertImpl::execute( - arguments, result_type, input_rows_count, scale); - - return true; - } - } - - result_column = ConvertImpl::execute(arguments, result_type, input_rows_count, scale); - - return true; - }); - - /// Additionally check if callOnIndexAndDataType wasn't called at all. - if (!res) - { - if (wrapper_cast_type == CastType::accurateOrNull) - { - auto nullable_column_wrapper = FunctionCast::createToNullableColumnWrapper(); - return nullable_column_wrapper(arguments, result_type, column_nullable, input_rows_count); - } - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, - "Conversion from {} to {} is not supported", - type_index, to_type->getName()); - } - - return result_column; - }; - } - - WrapperType createAggregateFunctionWrapper(const DataTypePtr & from_type_untyped, const DataTypeAggregateFunction * to_type) const - { - /// Conversion from String through parsing. - if (checkAndGetDataType(from_type_untyped.get())) - { - return &ConvertImplGenericFromString::execute; - } - else if (const auto * agg_type = checkAndGetDataType(from_type_untyped.get())) - { - if (agg_type->getFunction()->haveSameStateRepresentation(*to_type->getFunction())) - { - return [function = to_type->getFunction()]( - ColumnsWithTypeAndName & arguments, - const DataTypePtr & /* result_type */, - const ColumnNullable * /* nullable_source */, - size_t /*input_rows_count*/) -> ColumnPtr - { - const auto & argument_column = arguments.front(); - const auto * col_agg = checkAndGetColumn(argument_column.column.get()); - if (col_agg) - { - auto new_col_agg = ColumnAggregateFunction::create(*col_agg); - new_col_agg->set(function); - return new_col_agg; - } - else - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Illegal column {} for function CAST AS AggregateFunction", - argument_column.column->getName()); - } - }; - } - } - - if (cast_type == CastType::accurateOrNull) - return createToNullableColumnWrapper(); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", - from_type_untyped->getName(), to_type->getName()); - } - - WrapperType createArrayWrapper(const DataTypePtr & from_type_untyped, const DataTypeArray & to_type) const - { - /// Conversion from String through parsing. - if (checkAndGetDataType(from_type_untyped.get())) - { - return &ConvertImplGenericFromString::execute; - } - - DataTypePtr from_type_holder; - const auto * from_type = checkAndGetDataType(from_type_untyped.get()); - const auto * from_type_map = checkAndGetDataType(from_type_untyped.get()); - - /// Convert from Map - if (from_type_map) - { - /// Recreate array of unnamed tuples because otherwise it may work - /// unexpectedly while converting to array of named tuples. - from_type_holder = from_type_map->getNestedTypeWithUnnamedTuple(); - from_type = assert_cast(from_type_holder.get()); - } - - if (!from_type) - { - throw Exception(ErrorCodes::TYPE_MISMATCH, - "CAST AS Array can only be performed between same-dimensional Array, Map or String types"); - } - - DataTypePtr from_nested_type = from_type->getNestedType(); - - /// In query SELECT CAST([] AS Array(Array(String))) from type is Array(Nothing) - bool from_empty_array = isNothing(from_nested_type); - - if (from_type->getNumberOfDimensions() != to_type.getNumberOfDimensions() && !from_empty_array) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "CAST AS Array can only be performed between same-dimensional array types"); - - const DataTypePtr & to_nested_type = to_type.getNestedType(); - - /// Prepare nested type conversion - const auto nested_function = prepareUnpackDictionaries(from_nested_type, to_nested_type); - - return [nested_function, from_nested_type, to_nested_type]( - ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr - { - const auto & argument_column = arguments.front(); - - const ColumnArray * col_array = nullptr; - - if (const ColumnMap * col_map = checkAndGetColumn(argument_column.column.get())) - col_array = &col_map->getNestedColumn(); - else - col_array = checkAndGetColumn(argument_column.column.get()); - - if (col_array) - { - /// create columns for converting nested column containing original and result columns - ColumnsWithTypeAndName nested_columns{{ col_array->getDataPtr(), from_nested_type, "" }}; - - /// convert nested column - auto result_column = nested_function(nested_columns, to_nested_type, nullable_source, nested_columns.front().column->size()); - - /// set converted nested column to result - return ColumnArray::create(result_column, col_array->getOffsetsPtr()); - } - else - { - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Illegal column {} for function CAST AS Array", - argument_column.column->getName()); - } - }; - } - - using ElementWrappers = std::vector; - - ElementWrappers getElementWrappers(const DataTypes & from_element_types, const DataTypes & to_element_types) const - { - ElementWrappers element_wrappers; - element_wrappers.reserve(from_element_types.size()); - - /// Create conversion wrapper for each element in tuple - for (size_t i = 0; i < from_element_types.size(); ++i) - { - const DataTypePtr & from_element_type = from_element_types[i]; - const DataTypePtr & to_element_type = to_element_types[i]; - element_wrappers.push_back(prepareUnpackDictionaries(from_element_type, to_element_type)); - } - - return element_wrappers; - } - - WrapperType createTupleWrapper(const DataTypePtr & from_type_untyped, const DataTypeTuple * to_type) const - { - /// Conversion from String through parsing. - if (checkAndGetDataType(from_type_untyped.get())) - { - return &ConvertImplGenericFromString::execute; - } - - const auto * from_type = checkAndGetDataType(from_type_untyped.get()); - if (!from_type) - throw Exception(ErrorCodes::TYPE_MISMATCH, "CAST AS Tuple can only be performed between tuple types or from String.\n" - "Left type: {}, right type: {}", from_type_untyped->getName(), to_type->getName()); - - const auto & from_element_types = from_type->getElements(); - const auto & to_element_types = to_type->getElements(); - - std::vector element_wrappers; - std::vector> to_reverse_index; - - /// For named tuples allow conversions for tuples with - /// different sets of elements. If element exists in @to_type - /// and doesn't exist in @to_type it will be filled by default values. - if (from_type->haveExplicitNames() && to_type->haveExplicitNames()) - { - const auto & from_names = from_type->getElementNames(); - std::unordered_map from_positions; - from_positions.reserve(from_names.size()); - for (size_t i = 0; i < from_names.size(); ++i) - from_positions[from_names[i]] = i; - - const auto & to_names = to_type->getElementNames(); - element_wrappers.reserve(to_names.size()); - to_reverse_index.reserve(from_names.size()); - - for (size_t i = 0; i < to_names.size(); ++i) - { - auto it = from_positions.find(to_names[i]); - if (it != from_positions.end()) - { - element_wrappers.emplace_back(prepareUnpackDictionaries(from_element_types[it->second], to_element_types[i])); - to_reverse_index.emplace_back(it->second); - } - else - { - element_wrappers.emplace_back(); - to_reverse_index.emplace_back(); - } - } - } - else - { - if (from_element_types.size() != to_element_types.size()) - throw Exception(ErrorCodes::TYPE_MISMATCH, "CAST AS Tuple can only be performed between tuple types " - "with the same number of elements or from String.\nLeft type: {}, right type: {}", - from_type->getName(), to_type->getName()); - - element_wrappers = getElementWrappers(from_element_types, to_element_types); - to_reverse_index.reserve(to_element_types.size()); - for (size_t i = 0; i < to_element_types.size(); ++i) - to_reverse_index.emplace_back(i); - } - - return [element_wrappers, from_element_types, to_element_types, to_reverse_index] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count) -> ColumnPtr - { - const auto * col = arguments.front().column.get(); - - size_t tuple_size = to_element_types.size(); - const ColumnTuple & column_tuple = typeid_cast(*col); - - Columns converted_columns(tuple_size); - - /// invoke conversion for each element - for (size_t i = 0; i < tuple_size; ++i) - { - if (to_reverse_index[i]) - { - size_t from_idx = *to_reverse_index[i]; - ColumnsWithTypeAndName element = {{column_tuple.getColumns()[from_idx], from_element_types[from_idx], "" }}; - converted_columns[i] = element_wrappers[i](element, to_element_types[i], nullable_source, input_rows_count); - } - else - { - converted_columns[i] = to_element_types[i]->createColumn()->cloneResized(input_rows_count); - } - } - - return ColumnTuple::create(converted_columns); - }; - } - - /// The case of: tuple([key1, key2, ..., key_n], [value1, value2, ..., value_n]) - WrapperType createTupleToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const - { - return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr - { - const auto * col = arguments.front().column.get(); - const auto & column_tuple = assert_cast(*col); - - Columns offsets(2); - Columns converted_columns(2); - for (size_t i = 0; i < 2; ++i) - { - const auto & column_array = assert_cast(column_tuple.getColumn(i)); - ColumnsWithTypeAndName element = {{column_array.getDataPtr(), from_kv_types[i], ""}}; - converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); - offsets[i] = column_array.getOffsetsPtr(); - } - - const auto & keys_offsets = assert_cast(*offsets[0]).getData(); - const auto & values_offsets = assert_cast(*offsets[1]).getData(); - if (keys_offsets != values_offsets) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "CAST AS Map can only be performed from tuple of arrays with equal sizes."); - - return ColumnMap::create(converted_columns[0], converted_columns[1], offsets[0]); - }; - } - - WrapperType createMapToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const - { - return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr - { - const auto * col = arguments.front().column.get(); - const auto & column_map = typeid_cast(*col); - const auto & nested_data = column_map.getNestedData(); - - Columns converted_columns(2); - for (size_t i = 0; i < 2; ++i) - { - ColumnsWithTypeAndName element = {{nested_data.getColumnPtr(i), from_kv_types[i], ""}}; - converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); - } - - return ColumnMap::create(converted_columns[0], converted_columns[1], column_map.getNestedColumn().getOffsetsPtr()); - }; - } - - /// The case of: [(key1, value1), (key2, value2), ...] - WrapperType createArrayToMapWrapper(const DataTypes & from_kv_types, const DataTypes & to_kv_types) const - { - return [element_wrappers = getElementWrappers(from_kv_types, to_kv_types), from_kv_types, to_kv_types] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t /*input_rows_count*/) -> ColumnPtr - { - const auto * col = arguments.front().column.get(); - const auto & column_array = typeid_cast(*col); - const auto & nested_data = typeid_cast(column_array.getData()); - - Columns converted_columns(2); - for (size_t i = 0; i < 2; ++i) - { - ColumnsWithTypeAndName element = {{nested_data.getColumnPtr(i), from_kv_types[i], ""}}; - converted_columns[i] = element_wrappers[i](element, to_kv_types[i], nullable_source, (element[0].column)->size()); - } - - return ColumnMap::create(converted_columns[0], converted_columns[1], column_array.getOffsetsPtr()); - }; - } - - - WrapperType createMapWrapper(const DataTypePtr & from_type_untyped, const DataTypeMap * to_type) const - { - if (const auto * from_tuple = checkAndGetDataType(from_type_untyped.get())) - { - if (from_tuple->getElements().size() != 2) - throw Exception( - ErrorCodes::TYPE_MISMATCH, - "CAST AS Map from tuple requires 2 elements. " - "Left type: {}, right type: {}", - from_tuple->getName(), - to_type->getName()); - - DataTypes from_kv_types; - const auto & to_kv_types = to_type->getKeyValueTypes(); - - for (const auto & elem : from_tuple->getElements()) - { - const auto * type_array = checkAndGetDataType(elem.get()); - if (!type_array) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "CAST AS Map can only be performed from tuples of array. Got: {}", from_tuple->getName()); - - from_kv_types.push_back(type_array->getNestedType()); - } - - return createTupleToMapWrapper(from_kv_types, to_kv_types); - } - else if (const auto * from_array = typeid_cast(from_type_untyped.get())) - { - const auto * nested_tuple = typeid_cast(from_array->getNestedType().get()); - if (!nested_tuple || nested_tuple->getElements().size() != 2) - throw Exception( - ErrorCodes::TYPE_MISMATCH, - "CAST AS Map from array requires nested tuple of 2 elements. " - "Left type: {}, right type: {}", - from_array->getName(), - to_type->getName()); - - return createArrayToMapWrapper(nested_tuple->getElements(), to_type->getKeyValueTypes()); - } - else if (const auto * from_type = checkAndGetDataType(from_type_untyped.get())) - { - return createMapToMapWrapper(from_type->getKeyValueTypes(), to_type->getKeyValueTypes()); - } - else - { - throw Exception(ErrorCodes::TYPE_MISMATCH, "Unsupported types to CAST AS Map. " - "Left type: {}, right type: {}", from_type_untyped->getName(), to_type->getName()); - } - } - - WrapperType createTupleToObjectWrapper(const DataTypeTuple & from_tuple, bool has_nullable_subcolumns) const - { - if (!from_tuple.haveExplicitNames()) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Cast to Object can be performed only from flatten Named Tuple. Got: {}", from_tuple.getName()); - - PathsInData paths; - DataTypes from_types; - - std::tie(paths, from_types) = flattenTuple(from_tuple.getPtr()); - auto to_types = from_types; - - for (auto & type : to_types) - { - if (isTuple(type) || isNested(type)) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Cast to Object can be performed only from flatten Named Tuple. Got: {}", - from_tuple.getName()); - - type = recursiveRemoveLowCardinality(type); - } - - return [element_wrappers = getElementWrappers(from_types, to_types), - has_nullable_subcolumns, from_types, to_types, paths] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count) - { - size_t tuple_size = to_types.size(); - auto flattened_column = flattenTuple(arguments.front().column); - const auto & column_tuple = assert_cast(*flattened_column); - - if (tuple_size != column_tuple.getColumns().size()) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Expected tuple with {} subcolumn, but got {} subcolumns", - tuple_size, column_tuple.getColumns().size()); - - auto res = ColumnObject::create(has_nullable_subcolumns); - for (size_t i = 0; i < tuple_size; ++i) - { - ColumnsWithTypeAndName element = {{column_tuple.getColumns()[i], from_types[i], "" }}; - auto converted_column = element_wrappers[i](element, to_types[i], nullable_source, input_rows_count); - res->addSubcolumn(paths[i], converted_column->assumeMutable()); - } - - return res; - }; - } - - WrapperType createMapToObjectWrapper(const DataTypeMap & from_map, bool has_nullable_subcolumns) const - { - auto key_value_types = from_map.getKeyValueTypes(); - - if (!isStringOrFixedString(key_value_types[0])) - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Cast to Object from Map can be performed only from Map " - "with String or FixedString key. Got: {}", from_map.getName()); - - const auto & value_type = key_value_types[1]; - auto to_value_type = value_type; - - if (!has_nullable_subcolumns && value_type->isNullable()) - to_value_type = removeNullable(value_type); - - if (has_nullable_subcolumns && !value_type->isNullable()) - to_value_type = makeNullable(value_type); - - DataTypes to_key_value_types{std::make_shared(), std::move(to_value_type)}; - auto element_wrappers = getElementWrappers(key_value_types, to_key_value_types); - - return [has_nullable_subcolumns, element_wrappers, key_value_types, to_key_value_types] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t) -> ColumnPtr - { - const auto & column_map = assert_cast(*arguments.front().column); - const auto & offsets = column_map.getNestedColumn().getOffsets(); - auto key_value_columns = column_map.getNestedData().getColumnsCopy(); - - for (size_t i = 0; i < 2; ++i) - { - ColumnsWithTypeAndName element{{key_value_columns[i], key_value_types[i], ""}}; - key_value_columns[i] = element_wrappers[i](element, to_key_value_types[i], nullable_source, key_value_columns[i]->size()); - } - - const auto & key_column_str = assert_cast(*key_value_columns[0]); - const auto & value_column = *key_value_columns[1]; - - using SubcolumnsMap = HashMap; - SubcolumnsMap subcolumns; - - for (size_t row = 0; row < offsets.size(); ++row) - { - for (size_t i = offsets[static_cast(row) - 1]; i < offsets[row]; ++i) - { - auto ref = key_column_str.getDataAt(i); - - bool inserted; - SubcolumnsMap::LookupResult it; - subcolumns.emplace(ref, it, inserted); - auto & subcolumn = it->getMapped(); - - if (inserted) - subcolumn = value_column.cloneEmpty()->cloneResized(row); - - /// Map can have duplicated keys. We insert only first one. - if (subcolumn->size() == row) - subcolumn->insertFrom(value_column, i); - } - - /// Insert default values for keys missed in current row. - for (const auto & [_, subcolumn] : subcolumns) - if (subcolumn->size() == row) - subcolumn->insertDefault(); - } - - auto column_object = ColumnObject::create(has_nullable_subcolumns); - for (auto && [key, subcolumn] : subcolumns) - { - PathInData path(key.toView()); - column_object->addSubcolumn(path, std::move(subcolumn)); - } - - return column_object; - }; - } - - WrapperType createObjectWrapper(const DataTypePtr & from_type, const DataTypeObject * to_type) const - { - if (const auto * from_tuple = checkAndGetDataType(from_type.get())) - { - return createTupleToObjectWrapper(*from_tuple, to_type->hasNullableSubcolumns()); - } - else if (const auto * from_map = checkAndGetDataType(from_type.get())) - { - return createMapToObjectWrapper(*from_map, to_type->hasNullableSubcolumns()); - } - else if (checkAndGetDataType(from_type.get())) - { - return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) - { - auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); - res->finalize(); - return res; - }; - } - else if (checkAndGetDataType(from_type.get())) - { - return [is_nullable = to_type->hasNullableSubcolumns()] (ColumnsWithTypeAndName & arguments, const DataTypePtr & , const ColumnNullable * , size_t) -> ColumnPtr - { - auto & column_object = assert_cast(*arguments.front().column); - auto res = ColumnObject::create(is_nullable); - for (size_t i = 0; i < column_object.size(); i++) - res->insert(column_object[i]); - - res->finalize(); - return res; - }; - } - - throw Exception(ErrorCodes::TYPE_MISMATCH, - "Cast to Object can be performed only from flatten named Tuple, Map or String. Got: {}", from_type->getName()); - } - - WrapperType createVariantToVariantWrapper(const DataTypeVariant & from_variant, const DataTypeVariant & to_variant) const - { - /// We support only extension of variant type, so, only new types can be added. - /// For example: Variant(T1, T2) -> Variant(T1, T2, T3) is supported, but Variant(T1, T2) -> Variant(T1, T3) is not supported. - /// We want to extend Variant type for free without rewriting the data, but we sort data types inside Variant during type creation - /// (we do it because we want Variant(T1, T2) to be the same as Variant(T2, T1)), but after extension the order of variant types - /// (and so their discriminators) can be different. For example: Variant(T1, T3) -> Variant(T1, T2, T3). - /// To avoid full rewrite of discriminators column, ColumnVariant supports it's local order of variant columns (and so local - /// discriminators) and stores mapping global order -> local order. - /// So, to extend Variant with new types for free, we should keep old local order for old variants, append new variants and change - /// mapping global order -> local order according to the new global order. - - /// Create map (new variant type) -> (it's global discriminator in new order). - const auto & new_variants = to_variant.getVariants(); - std::unordered_map new_variant_types_to_new_global_discriminator; - new_variant_types_to_new_global_discriminator.reserve(new_variants.size()); - for (size_t i = 0; i != new_variants.size(); ++i) - new_variant_types_to_new_global_discriminator[new_variants[i]->getName()] = i; - - /// Create set of old variant types. - const auto & old_variants = from_variant.getVariants(); - std::unordered_map old_variant_types_to_old_global_discriminator; - old_variant_types_to_old_global_discriminator.reserve(old_variants.size()); - for (size_t i = 0; i != old_variants.size(); ++i) - old_variant_types_to_old_global_discriminator[old_variants[i]->getName()] = i; - - /// Check that the set of old variants types is a subset of new variant types and collect new global discriminator for each old global discriminator. - std::unordered_map old_global_discriminator_to_new; - old_global_discriminator_to_new.reserve(old_variants.size()); - for (const auto & [old_variant_type, old_discriminator] : old_variant_types_to_old_global_discriminator) - { - auto it = new_variant_types_to_new_global_discriminator.find(old_variant_type); - if (it == new_variant_types_to_new_global_discriminator.end()) - throw Exception( - ErrorCodes::CANNOT_CONVERT_TYPE, - "Cannot convert type {} to {}. Conversion between Variant types is allowed only when new Variant type is an extension " - "of an initial one", from_variant.getName(), to_variant.getName()); - old_global_discriminator_to_new[old_discriminator] = it->second; - } - - /// Collect variant types and their global discriminators that should be added to the old Variant to get the new Variant. - std::vector> variant_types_and_discriminators_to_add; - variant_types_and_discriminators_to_add.reserve(new_variants.size() - old_variants.size()); - for (size_t i = 0; i != new_variants.size(); ++i) - { - if (!old_variant_types_to_old_global_discriminator.contains(new_variants[i]->getName())) - variant_types_and_discriminators_to_add.emplace_back(new_variants[i], i); - } - - return [old_global_discriminator_to_new, variant_types_and_discriminators_to_add] - (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr - { - const auto & column_variant = assert_cast(*arguments.front().column.get()); - size_t num_old_variants = column_variant.getNumVariants(); - Columns new_variant_columns; - new_variant_columns.reserve(num_old_variants + variant_types_and_discriminators_to_add.size()); - std::vector new_local_to_global_discriminators; - new_local_to_global_discriminators.reserve(num_old_variants + variant_types_and_discriminators_to_add.size()); - for (size_t i = 0; i != num_old_variants; ++i) - { - new_variant_columns.push_back(column_variant.getVariantPtrByLocalDiscriminator(i)); - new_local_to_global_discriminators.push_back(old_global_discriminator_to_new.at(column_variant.globalDiscriminatorByLocal(i))); - } - - for (const auto & [new_variant_type, new_global_discriminator] : variant_types_and_discriminators_to_add) - { - new_variant_columns.push_back(new_variant_type->createColumn()); - new_local_to_global_discriminators.push_back(new_global_discriminator); - } - - return ColumnVariant::create(column_variant.getLocalDiscriminatorsPtr(), column_variant.getOffsetsPtr(), new_variant_columns, new_local_to_global_discriminators); - }; - } - - WrapperType createVariantToColumnWrapper(const DataTypeVariant & from_variant, const DataTypePtr & to_type) const - { - const auto & variant_types = from_variant.getVariants(); - std::vector variant_wrappers; - variant_wrappers.reserve(variant_types.size()); - - /// Create conversion wrapper for each variant. - for (const auto & variant_type : variant_types) - variant_wrappers.push_back(prepareUnpackDictionaries(variant_type, to_type)); - - return [variant_wrappers, variant_types, to_type] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - const auto & column_variant = assert_cast(*arguments.front().column.get()); - - /// First, cast each variant to the result type. - std::vector casted_variant_columns; - casted_variant_columns.reserve(variant_types.size()); - for (size_t i = 0; i != variant_types.size(); ++i) - { - auto variant_col = column_variant.getVariantPtrByLocalDiscriminator(i); - ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], "" }}; - const auto & variant_wrapper = variant_wrappers[column_variant.globalDiscriminatorByLocal(i)]; - casted_variant_columns.push_back(variant_wrapper(variant, result_type, nullptr, variant_col->size())); - } - - /// Second, construct resulting column from casted variant columns according to discriminators. - const auto & local_discriminators = column_variant.getLocalDiscriminators(); - auto res = result_type->createColumn(); - res->reserve(input_rows_count); - for (size_t i = 0; i != input_rows_count; ++i) - { - auto local_discr = local_discriminators[i]; - if (local_discr == ColumnVariant::NULL_DISCRIMINATOR) - res->insertDefault(); - else - res->insertFrom(*casted_variant_columns[local_discr], column_variant.offsetAt(i)); - } - - return res; - }; - } - - static ColumnPtr createVariantFromDescriptorsAndOneNonEmptyVariant(const DataTypes & variant_types, const ColumnPtr & discriminators, const ColumnPtr & variant, ColumnVariant::Discriminator variant_discr) - { - Columns variants; - variants.reserve(variant_types.size()); - for (size_t i = 0; i != variant_types.size(); ++i) - { - if (i == variant_discr) - variants.emplace_back(variant); - else - variants.push_back(variant_types[i]->createColumn()); - } - - return ColumnVariant::create(discriminators, variants); - } - - WrapperType createColumnToVariantWrapper(const DataTypePtr & from_type, const DataTypeVariant & to_variant) const - { - /// We allow converting NULL to Variant(...) as Variant can store NULLs. - if (from_type->onlyNull()) - { - return [](ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - auto result_column = result_type->createColumn(); - result_column->insertManyDefaults(input_rows_count); - return result_column; - }; - } - - auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(*removeNullableOrLowCardinalityNullable(from_type)); - if (!variant_discr_opt) - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert type {} to {}. Conversion to Variant allowed only for types from this Variant", from_type->getName(), to_variant.getName()); - - return [variant_discr = *variant_discr_opt] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t) -> ColumnPtr - { - const auto & result_variant_type = assert_cast(*result_type); - const auto & variant_types = result_variant_type.getVariants(); - if (const ColumnNullable * col_nullable = typeid_cast(arguments.front().column.get())) - { - const auto & column = col_nullable->getNestedColumnPtr(); - const auto & null_map = col_nullable->getNullMapData(); - IColumn::Filter filter; - filter.reserve(column->size()); - auto discriminators = ColumnVariant::ColumnDiscriminators::create(); - auto & discriminators_data = discriminators->getData(); - discriminators_data.reserve(column->size()); - size_t variant_size_hint = 0; - for (size_t i = 0; i != column->size(); ++i) - { - if (null_map[i]) - { - discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR); - filter.push_back(0); - } - else - { - discriminators_data.push_back(variant_discr); - filter.push_back(1); - ++variant_size_hint; - } - } - - ColumnPtr variant_column; - /// If there were no NULLs, just use the column. - if (variant_size_hint == column->size()) - variant_column = column; - /// Otherwise we should use filtered column. - else - variant_column = column->filter(filter, variant_size_hint); - return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), variant_column, variant_discr); - } - else if (isColumnLowCardinalityNullable(*arguments.front().column)) - { - const auto & column = arguments.front().column; - - /// Variant column cannot have LowCardinality(Nullable(...)) variant, as Variant column stores NULLs itself. - /// We should create a null-map, insert NULL_DISCRIMINATOR on NULL values and filter initial column. - const auto & col_lc = assert_cast(*column); - const auto & indexes = col_lc.getIndexes(); - auto null_index = col_lc.getDictionary().getNullValueIndex(); - IColumn::Filter filter; - filter.reserve(col_lc.size()); - auto discriminators = ColumnVariant::ColumnDiscriminators::create(); - auto & discriminators_data = discriminators->getData(); - discriminators_data.reserve(col_lc.size()); - size_t variant_size_hint = 0; - for (size_t i = 0; i != col_lc.size(); ++i) - { - if (indexes.getUInt(i) == null_index) - { - discriminators_data.push_back(ColumnVariant::NULL_DISCRIMINATOR); - filter.push_back(0); - } - else - { - discriminators_data.push_back(variant_discr); - filter.push_back(1); - ++variant_size_hint; - } - } - - MutableColumnPtr variant_column; - /// If there were no NULLs, we can just clone the column. - if (variant_size_hint == col_lc.size()) - variant_column = IColumn::mutate(column); - /// Otherwise we should filter column. - else - variant_column = column->filter(filter, variant_size_hint)->assumeMutable(); - - assert_cast(*variant_column).nestedRemoveNullable(); - return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), std::move(variant_column), variant_discr); - } - else - { - const auto & column = arguments.front().column; - auto discriminators = ColumnVariant::ColumnDiscriminators::create(); - discriminators->getData().resize_fill(column->size(), variant_discr); - return createVariantFromDescriptorsAndOneNonEmptyVariant(variant_types, std::move(discriminators), column, variant_discr); - } - }; - } - - /// Wrapper for conversion to/from Variant type - WrapperType createVariantWrapper(const DataTypePtr & from_type, const DataTypePtr & to_type) const - { - if (const auto * from_variant = checkAndGetDataType(from_type.get())) - { - if (const auto * to_variant = checkAndGetDataType(to_type.get())) - return createVariantToVariantWrapper(*from_variant, *to_variant); - - return createVariantToColumnWrapper(*from_variant, to_type); - } - - return createColumnToVariantWrapper(from_type, assert_cast(*to_type)); - } - - template - WrapperType createEnumWrapper(const DataTypePtr & from_type, const DataTypeEnum * to_type) const - { - using EnumType = DataTypeEnum; - using Function = typename FunctionTo::Type; - - if (const auto * from_enum8 = checkAndGetDataType(from_type.get())) - checkEnumToEnumConversion(from_enum8, to_type); - else if (const auto * from_enum16 = checkAndGetDataType(from_type.get())) - checkEnumToEnumConversion(from_enum16, to_type); - - if (checkAndGetDataType(from_type.get())) - return createStringToEnumWrapper(); - else if (checkAndGetDataType(from_type.get())) - return createStringToEnumWrapper(); - else if (isNativeNumber(from_type) || isEnum(from_type)) - { - auto function = Function::create(); - return createFunctionAdaptor(function, from_type); - } - else - { - if (cast_type == CastType::accurateOrNull) - return createToNullableColumnWrapper(); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", - from_type->getName(), to_type->getName()); - } - } - - template - void checkEnumToEnumConversion(const EnumTypeFrom * from_type, const EnumTypeTo * to_type) const - { - const auto & from_values = from_type->getValues(); - const auto & to_values = to_type->getValues(); - - using ValueType = std::common_type_t; - using NameValuePair = std::pair; - using EnumValues = std::vector; - - EnumValues name_intersection; - std::set_intersection(std::begin(from_values), std::end(from_values), - std::begin(to_values), std::end(to_values), std::back_inserter(name_intersection), - [] (auto && from, auto && to) { return from.first < to.first; }); - - for (const auto & name_value : name_intersection) - { - const auto & old_value = name_value.second; - const auto & new_value = to_type->getValue(name_value.first); - if (old_value != new_value) - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Enum conversion changes value for element '{}' from {} to {}", - name_value.first, toString(old_value), toString(new_value)); - } - } - - template - WrapperType createStringToEnumWrapper() const - { - const char * function_name = cast_name; - return [function_name] ( - ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, const ColumnNullable * nullable_col, size_t /*input_rows_count*/) - { - const auto & first_col = arguments.front().column.get(); - const auto & result_type = typeid_cast(*res_type); - - const ColumnStringType * col = typeid_cast(first_col); - - if (col && nullable_col && nullable_col->size() != col->size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnNullable is not compatible with original"); - - if (col) - { - const auto size = col->size(); - - auto res = result_type.createColumn(); - auto & out_data = static_cast(*res).getData(); - out_data.resize(size); - - auto default_enum_value = result_type.getValues().front().second; - - if (nullable_col) - { - for (size_t i = 0; i < size; ++i) - { - if (!nullable_col->isNullAt(i)) - out_data[i] = result_type.getValue(col->getDataAt(i)); - else - out_data[i] = default_enum_value; - } - } - else - { - for (size_t i = 0; i < size; ++i) - out_data[i] = result_type.getValue(col->getDataAt(i)); - } - - return res; - } - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column {} as first argument of function {}", - first_col->getName(), function_name); - }; - } - - template - WrapperType createEnumToStringWrapper() const - { - const char * function_name = cast_name; - return [function_name] ( - ColumnsWithTypeAndName & arguments, const DataTypePtr & res_type, const ColumnNullable * nullable_col, size_t /*input_rows_count*/) - { - using ColumnEnumType = EnumType::ColumnType; - - const auto & first_col = arguments.front().column.get(); - const auto & first_type = arguments.front().type.get(); - - const ColumnEnumType * enum_col = typeid_cast(first_col); - const EnumType * enum_type = typeid_cast(first_type); - - if (enum_col && nullable_col && nullable_col->size() != enum_col->size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "ColumnNullable is not compatible with original"); - - if (enum_col && enum_type) - { - const auto size = enum_col->size(); - const auto & enum_data = enum_col->getData(); - - auto res = res_type->createColumn(); - - if (nullable_col) - { - for (size_t i = 0; i < size; ++i) - { - if (!nullable_col->isNullAt(i)) - { - const auto & value = enum_type->getNameForValue(enum_data[i]); - res->insertData(value.data, value.size); - } - else - res->insertDefault(); - } - } - else - { - for (size_t i = 0; i < size; ++i) - { - const auto & value = enum_type->getNameForValue(enum_data[i]); - res->insertData(value.data, value.size); - } - } - - return res; - } - else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected column {} as first argument of function {}", - first_col->getName(), function_name); - }; - } - - static WrapperType createIdentityWrapper(const DataTypePtr &) - { - return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) - { - return arguments.front().column; - }; - } - - static WrapperType createNothingWrapper(const IDataType * to_type) - { - ColumnPtr res = to_type->createColumnConstWithDefaultValue(1); - return [res] (ColumnsWithTypeAndName &, const DataTypePtr &, const ColumnNullable *, size_t input_rows_count) - { - /// Column of Nothing type is trivially convertible to any other column - return res->cloneResized(input_rows_count)->convertToFullColumnIfConst(); - }; - } - - WrapperType prepareUnpackDictionaries(const DataTypePtr & from_type, const DataTypePtr & to_type) const - { - /// Conversion from/to Variant data type is processed in a special way. - /// We don't need to remove LowCardinality/Nullable. - if (isVariant(to_type) || isVariant(from_type)) - return createVariantWrapper(from_type, to_type); - - const auto * from_low_cardinality = typeid_cast(from_type.get()); - const auto * to_low_cardinality = typeid_cast(to_type.get()); - const auto & from_nested = from_low_cardinality ? from_low_cardinality->getDictionaryType() : from_type; - const auto & to_nested = to_low_cardinality ? to_low_cardinality->getDictionaryType() : to_type; - - if (from_type->onlyNull()) - { - if (!to_nested->isNullable() && !isVariant(to_type)) - { - if (cast_type == CastType::accurateOrNull) - { - return createToNullableColumnWrapper(); - } - else - { - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert NULL to a non-nullable type"); - } - } - - return [](ColumnsWithTypeAndName &, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) - { - return result_type->createColumnConstWithDefaultValue(input_rows_count)->convertToFullColumnIfConst(); - }; - } - - bool skip_not_null_check = false; - - if (from_low_cardinality && from_nested->isNullable() && !to_nested->isNullable()) - /// Disable check for dictionary. Will check that column doesn't contain NULL in wrapper below. - skip_not_null_check = true; - - auto wrapper = prepareRemoveNullable(from_nested, to_nested, skip_not_null_check); - if (!from_low_cardinality && !to_low_cardinality) - return wrapper; - - return [wrapper, from_low_cardinality, to_low_cardinality, skip_not_null_check] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) -> ColumnPtr - { - ColumnsWithTypeAndName args = {arguments[0]}; - auto & arg = args.front(); - auto res_type = result_type; - - ColumnPtr converted_column; - - ColumnPtr res_indexes; - /// For some types default can't be casted (for example, String to Int). In that case convert column to full. - bool src_converted_to_full_column = false; - - { - auto tmp_rows_count = input_rows_count; - - if (to_low_cardinality) - res_type = to_low_cardinality->getDictionaryType(); - - if (from_low_cardinality) - { - const auto * col_low_cardinality = assert_cast(arguments[0].column.get()); - - if (skip_not_null_check && col_low_cardinality->containsNull()) - throw Exception(ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN, "Cannot convert NULL value to non-Nullable type"); - - arg.column = col_low_cardinality->getDictionary().getNestedColumn(); - arg.type = from_low_cardinality->getDictionaryType(); - - /// TODO: Make map with defaults conversion. - src_converted_to_full_column = !removeNullable(arg.type)->equals(*removeNullable(res_type)); - if (src_converted_to_full_column) - arg.column = arg.column->index(col_low_cardinality->getIndexes(), 0); - else - res_indexes = col_low_cardinality->getIndexesPtr(); - - tmp_rows_count = arg.column->size(); - } - - /// Perform the requested conversion. - converted_column = wrapper(args, res_type, nullable_source, tmp_rows_count); - } - - if (to_low_cardinality) - { - auto res_column = to_low_cardinality->createColumn(); - auto * col_low_cardinality = assert_cast(res_column.get()); - - if (from_low_cardinality && !src_converted_to_full_column) - { - col_low_cardinality->insertRangeFromDictionaryEncodedColumn(*converted_column, *res_indexes); - } - else - col_low_cardinality->insertRangeFromFullColumn(*converted_column, 0, converted_column->size()); - - return res_column; - } - else if (!src_converted_to_full_column) - return converted_column->index(*res_indexes, 0); - else - return converted_column; - }; - } - - WrapperType prepareRemoveNullable(const DataTypePtr & from_type, const DataTypePtr & to_type, bool skip_not_null_check) const - { - /// Determine whether pre-processing and/or post-processing must take place during conversion. - - bool source_is_nullable = from_type->isNullable(); - bool result_is_nullable = to_type->isNullable(); - - auto wrapper = prepareImpl(removeNullable(from_type), removeNullable(to_type), result_is_nullable); - - if (result_is_nullable) - { - return [wrapper, source_is_nullable] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - /// Create a temporary columns on which to perform the operation. - const auto & nullable_type = static_cast(*result_type); - const auto & nested_type = nullable_type.getNestedType(); - - ColumnsWithTypeAndName tmp_args; - if (source_is_nullable) - tmp_args = createBlockWithNestedColumns(arguments); - else - tmp_args = arguments; - - const ColumnNullable * nullable_source = nullptr; - - /// Add original ColumnNullable for createStringToEnumWrapper() - if (source_is_nullable) - { - if (arguments.size() != 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of arguments"); - nullable_source = typeid_cast(arguments.front().column.get()); - } - - /// Perform the requested conversion. - auto tmp_res = wrapper(tmp_args, nested_type, nullable_source, input_rows_count); - - /// May happen in fuzzy tests. For debug purpose. - if (!tmp_res) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Couldn't convert {} to {} in prepareRemoveNullable wrapper.", - arguments[0].type->getName(), nested_type->getName()); - - return wrapInNullable(tmp_res, arguments, nested_type, input_rows_count); - }; - } - else if (source_is_nullable) - { - /// Conversion from Nullable to non-Nullable. - - return [wrapper, skip_not_null_check] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - auto tmp_args = createBlockWithNestedColumns(arguments); - auto nested_type = removeNullable(result_type); - - /// Check that all values are not-NULL. - /// Check can be skipped in case if LowCardinality dictionary is transformed. - /// In that case, correctness will be checked beforehand. - if (!skip_not_null_check) - { - const auto & col = arguments[0].column; - const auto & nullable_col = assert_cast(*col); - const auto & null_map = nullable_col.getNullMapData(); - - if (!memoryIsZero(null_map.data(), 0, null_map.size())) - throw Exception(ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN, "Cannot convert NULL value to non-Nullable type"); - } - const ColumnNullable * nullable_source = typeid_cast(arguments.front().column.get()); - return wrapper(tmp_args, nested_type, nullable_source, input_rows_count); - }; - } - else - return wrapper; - } - - /// 'from_type' and 'to_type' are nested types in case of Nullable. - /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested. - WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const - { - if (isUInt8(from_type) && isBool(to_type)) - return createUInt8ToBoolWrapper(from_type, to_type); - - /// We can cast IPv6 into IPv6, IPv4 into IPv4, but we should not allow to cast FixedString(16) into IPv6 as part of identity cast - bool safe_convert_custom_types = true; - - if (const auto * to_type_custom_name = to_type->getCustomName()) - safe_convert_custom_types = from_type->getCustomName() && from_type->getCustomName()->getName() == to_type_custom_name->getName(); - else if (const auto * from_type_custom_name = from_type->getCustomName()) - safe_convert_custom_types = to_type->getCustomName() && from_type_custom_name->getName() == to_type->getCustomName()->getName(); - - if (from_type->equals(*to_type) && safe_convert_custom_types) - { - /// We can only use identity conversion for DataTypeAggregateFunction when they are strictly equivalent. - if (typeid_cast(from_type.get())) - { - if (DataTypeAggregateFunction::strictEquals(from_type, to_type)) - return createIdentityWrapper(from_type); - } - else - return createIdentityWrapper(from_type); - } - else if (WhichDataType(from_type).isNothing()) - return createNothingWrapper(to_type.get()); - - WrapperType ret; - - auto make_default_wrapper = [&](const auto & types) -> bool - { - using Types = std::decay_t; - using ToDataType = typename Types::LeftType; - - if constexpr ( - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) - { - ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); - return true; - } - if constexpr (std::is_same_v) - { - if (isBool(to_type)) - ret = createBoolWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); - else - ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); - return true; - } - if constexpr ( - std::is_same_v || - std::is_same_v) - { - ret = createEnumWrapper(from_type, checkAndGetDataType(to_type.get())); - return true; - } - if constexpr ( - std::is_same_v> || - std::is_same_v> || - std::is_same_v> || - std::is_same_v> || - std::is_same_v) - { - ret = createDecimalWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); - return true; - } - - return false; - }; - - bool cast_ipv4_ipv6_default_on_conversion_error_value = context && context->getSettingsRef().cast_ipv4_ipv6_default_on_conversion_error; - bool input_format_ipv4_default_on_conversion_error_value = context && context->getSettingsRef().input_format_ipv4_default_on_conversion_error; - bool input_format_ipv6_default_on_conversion_error_value = context && context->getSettingsRef().input_format_ipv6_default_on_conversion_error; - - auto make_custom_serialization_wrapper = [&, cast_ipv4_ipv6_default_on_conversion_error_value, input_format_ipv4_default_on_conversion_error_value, input_format_ipv6_default_on_conversion_error_value](const auto & types) -> bool - { - using Types = std::decay_t; - using ToDataType = typename Types::RightType; - using FromDataType = typename Types::LeftType; - - if constexpr (WhichDataType(FromDataType::type_id).isStringOrFixedString()) - { - if constexpr (std::is_same_v) - { - ret = [cast_ipv4_ipv6_default_on_conversion_error_value, - input_format_ipv4_default_on_conversion_error_value, - requested_result_is_nullable]( - ColumnsWithTypeAndName & arguments, - const DataTypePtr & result_type, - const ColumnNullable * column_nullable, - size_t) -> ColumnPtr - { - if (!WhichDataType(result_type).isIPv4()) - throw Exception(ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName()); - - const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - if (requested_result_is_nullable) - return convertToIPv4(arguments[0].column, null_map); - else if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv4_default_on_conversion_error_value) - return convertToIPv4(arguments[0].column, null_map); - else - return convertToIPv4(arguments[0].column, null_map); - }; - - return true; - } - - if constexpr (std::is_same_v) - { - ret = [cast_ipv4_ipv6_default_on_conversion_error_value, - input_format_ipv6_default_on_conversion_error_value, - requested_result_is_nullable]( - ColumnsWithTypeAndName & arguments, - const DataTypePtr & result_type, - const ColumnNullable * column_nullable, - size_t) -> ColumnPtr - { - if (!WhichDataType(result_type).isIPv6()) - throw Exception( - ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv6", result_type->getName()); - - const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - if (requested_result_is_nullable) - return convertToIPv6(arguments[0].column, null_map); - else if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv6_default_on_conversion_error_value) - return convertToIPv6(arguments[0].column, null_map); - else - return convertToIPv6(arguments[0].column, null_map); - }; - - return true; - } - - if (to_type->getCustomSerialization() && to_type->getCustomName()) - { - ret = [requested_result_is_nullable]( - ColumnsWithTypeAndName & arguments, - const DataTypePtr & result_type, - const ColumnNullable * column_nullable, - size_t input_rows_count) -> ColumnPtr - { - auto wrapped_result_type = result_type; - if (requested_result_is_nullable) - wrapped_result_type = makeNullable(result_type); - return ConvertImplGenericFromString::execute( - arguments, wrapped_result_type, column_nullable, input_rows_count); - }; - return true; - } - } - else if constexpr (WhichDataType(FromDataType::type_id).isIPv6() && WhichDataType(ToDataType::type_id).isIPv4()) - { - ret = [cast_ipv4_ipv6_default_on_conversion_error_value, requested_result_is_nullable]( - ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t) - -> ColumnPtr - { - if (!WhichDataType(result_type).isIPv4()) - throw Exception( - ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName()); - - const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr; - if (requested_result_is_nullable) - return convertIPv6ToIPv4(arguments[0].column, null_map); - else if (cast_ipv4_ipv6_default_on_conversion_error_value) - return convertIPv6ToIPv4(arguments[0].column, null_map); - else - return convertIPv6ToIPv4(arguments[0].column, null_map); - }; - - return true; - } - - if constexpr (WhichDataType(ToDataType::type_id).isStringOrFixedString()) - { - if constexpr (WhichDataType(FromDataType::type_id).isEnum()) - { - ret = createEnumToStringWrapper(); - return true; - } - else if (from_type->getCustomSerialization()) - { - ret = [](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr - { - return ConvertImplGenericToString::execute(arguments, result_type, input_rows_count); - }; - return true; - } - } - - return false; - }; - - if (callOnTwoTypeIndexes(from_type->getTypeId(), to_type->getTypeId(), make_custom_serialization_wrapper)) - return ret; - - if (callOnIndexAndDataType(to_type->getTypeId(), make_default_wrapper)) - return ret; - - switch (to_type->getTypeId()) - { - case TypeIndex::String: - return createStringWrapper(from_type); - case TypeIndex::FixedString: - return createFixedStringWrapper(from_type, checkAndGetDataType(to_type.get())->getN()); - case TypeIndex::Array: - return createArrayWrapper(from_type, static_cast(*to_type)); - case TypeIndex::Tuple: - return createTupleWrapper(from_type, checkAndGetDataType(to_type.get())); - case TypeIndex::Map: - return createMapWrapper(from_type, checkAndGetDataType(to_type.get())); - case TypeIndex::Object: - return createObjectWrapper(from_type, checkAndGetDataType(to_type.get())); - case TypeIndex::AggregateFunction: - return createAggregateFunctionWrapper(from_type, checkAndGetDataType(to_type.get())); - case TypeIndex::Interval: - return createIntervalWrapper(from_type, checkAndGetDataType(to_type.get())->getKind()); - default: - break; - } - - if (cast_type == CastType::accurateOrNull) - return createToNullableColumnWrapper(); - else - throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported", - from_type->getName(), to_type->getName()); - } -}; - -class MonotonicityHelper -{ -public: - using MonotonicityForRange = FunctionCastBase::MonotonicityForRange; - - template - static auto monotonicityForType(const DataType * const) - { - return FunctionTo::Type::Monotonic::get; - } - - static MonotonicityForRange getMonotonicityInformation(const DataTypePtr & from_type, const IDataType * to_type) - { - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (isEnum(from_type)) - { - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - } - /// other types like Null, FixedString, Array and Tuple have no monotonicity defined - return {}; - } -}; - -} From 277032e444652cfc18f62032a35af918bac39f4d Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 14 Mar 2024 14:16:33 +0100 Subject: [PATCH 203/283] Move the least updating cctools into beginning, update it --- docker/packager/binary-builder/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/packager/binary-builder/Dockerfile b/docker/packager/binary-builder/Dockerfile index 96c90403187..c9442accd7e 100644 --- a/docker/packager/binary-builder/Dockerfile +++ b/docker/packager/binary-builder/Dockerfile @@ -4,6 +4,9 @@ FROM clickhouse/fasttest:$FROM_TAG ENV CC=clang-${LLVM_VERSION} ENV CXX=clang++-${LLVM_VERSION} +# If the cctools is updated, then first build it in the CI, then update here in a different commit +COPY --from=clickhouse/cctools:d9e3596e706b /cctools /cctools + # Rust toolchain and libraries ENV RUSTUP_HOME=/rust/rustup ENV CARGO_HOME=/rust/cargo @@ -73,9 +76,6 @@ RUN curl -Lo /usr/bin/clang-tidy-cache \ "https://raw.githubusercontent.com/matus-chochlik/ctcache/$CLANG_TIDY_SHA1/clang-tidy-cache" \ && chmod +x /usr/bin/clang-tidy-cache -# If the cctools is updated, then first build it in the CI, then update here in a different commit -COPY --from=clickhouse/cctools:5a908f73878a /cctools /cctools - RUN mkdir /workdir && chmod 777 /workdir WORKDIR /workdir From d465835306c013b62e35c1330326505095b8b658 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 14 Mar 2024 14:08:43 +0100 Subject: [PATCH 204/283] Reorder hidden and shown checks in comment, change url of Mergeable check --- tests/ci/commit_status_helper.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 1c2d8b2ade8..bda2db13991 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -18,8 +18,10 @@ from github.GithubObject import NotSet from github.IssueComment import IssueComment from github.Repository import Repository -from ci_config import REQUIRED_CHECKS, CHECK_DESCRIPTIONS, CheckDescription -from env_helper import GITHUB_JOB_URL, GITHUB_REPOSITORY, TEMP_PATH +# isort: on + +from ci_config import CHECK_DESCRIPTIONS, REQUIRED_CHECKS, CheckDescription +from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL, TEMP_PATH from pr_info import SKIP_MERGEABLE_CHECK_LABEL, PRInfo from report import ( ERROR, @@ -259,6 +261,12 @@ def generate_status_comment(pr_info: PRInfo, statuses: CommitStatuses) -> str: result = [comment_body] + if visible_table_rows: + visible_table_rows.sort() + result.append(table_header) + result.extend(visible_table_rows) + result.append(table_footer) + if hidden_table_rows: hidden_table_rows.sort() result.append(details_header) @@ -267,12 +275,6 @@ def generate_status_comment(pr_info: PRInfo, statuses: CommitStatuses) -> str: result.append(table_footer) result.append(details_footer) - if visible_table_rows: - visible_table_rows.sort() - result.append(table_header) - result.extend(visible_table_rows) - result.append(table_footer) - return "".join(result) @@ -427,7 +429,7 @@ def set_mergeable_check( context=MERGEABLE_NAME, description=format_description(description), state=state, - target_url=GITHUB_JOB_URL(), + target_url=GITHUB_RUN_URL, ) From 03b9bca8a4dc7f27c20d8cca22cbbc234ed0468c Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 14 Mar 2024 14:31:55 +0100 Subject: [PATCH 205/283] Terminate EC2 on spot event if runner isn't running --- tests/ci/worker/init_runner.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/ci/worker/init_runner.sh b/tests/ci/worker/init_runner.sh index b211128cf10..de1d128dc87 100644 --- a/tests/ci/worker/init_runner.sh +++ b/tests/ci/worker/init_runner.sh @@ -138,13 +138,15 @@ check_spot_instance_is_old() { check_proceed_spot_termination() { # The function checks and proceeds spot instance termination if exists # The event for spot instance termination + local FORCE + FORCE=${1:-} if TERMINATION_DATA=$(curl -s --fail http://169.254.169.254/latest/meta-data/spot/instance-action); then # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-instance-termination-notices.html#instance-action-metadata _action=$(jq '.action' -r <<< "$TERMINATION_DATA") _time=$(jq '.time | fromdate' <<< "$TERMINATION_DATA") _until_action=$((_time - $(date +%s))) echo "Received the '$_action' event that will be effective in $_until_action seconds" - if (( _until_action <= 30 )); then + if (( _until_action <= 30 )) || [ "$FORCE" == "force" ]; then echo "The action $_action will be done in $_until_action, killing the runner and exit" local runner_pid runner_pid=$(pgrep Runner.Listener) @@ -309,7 +311,7 @@ while true; do echo "Checking if the instance suppose to terminate" no_terminating_metadata || terminate_on_event check_spot_instance_is_old && terminate_and_exit - check_proceed_spot_termination + check_proceed_spot_termination force echo "Going to configure runner" sudo -u ubuntu ./config.sh --url $RUNNER_URL --token "$(get_runner_token)" \ @@ -319,7 +321,7 @@ while true; do echo "Another one check to avoid race between runner and infrastructure" no_terminating_metadata || terminate_on_event check_spot_instance_is_old && terminate_and_exit - check_proceed_spot_termination + check_proceed_spot_termination force echo "Run" sudo -u ubuntu \ From 82b089f4e95073e7048254bf5d4dc03a515f71a2 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 14 Mar 2024 13:59:29 +0000 Subject: [PATCH 206/283] remove profile event --- src/Common/ProfileEvents.cpp | 1 - src/Interpreters/Aggregator.cpp | 4 ---- 2 files changed, 5 deletions(-) diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 8fd1e189977..e43e8910089 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -533,7 +533,6 @@ The server successfully detected this situation and will download merged part fr \ M(AggregationPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for aggregation.") \ M(AggregationHashTablesInitializedAsTwoLevel, "How many hash tables were inited as two-level for aggregation.") \ - M(AggregationProcessedBlocks, "How many blocks were processed by Aggregator") \ M(AggregationOptimizedEqualRangesOfKeys, "For how many blocks optimization of equal ranges of keys was applied") \ \ M(MetadataFromKeeperCacheHit, "Number of times an object storage metadata request was answered from cache without making request to Keeper") \ diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 7c9dac82eff..40b1c09a32e 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -53,7 +53,6 @@ namespace ProfileEvents extern const Event OverflowThrow; extern const Event OverflowBreak; extern const Event OverflowAny; - extern const Event AggregationProcessedBlocks; extern const Event AggregationOptimizedEqualRangesOfKeys; } @@ -987,7 +986,6 @@ void Aggregator::executeOnBlockSmall( { /// `result` will destroy the states of aggregate functions in the destructor result.aggregator = this; - ProfileEvents::increment(ProfileEvents::AggregationProcessedBlocks); /// How to perform the aggregation? if (result.empty()) @@ -1521,7 +1519,6 @@ void NO_INLINE Aggregator::executeOnIntervalWithoutKey( /// `data_variants` will destroy the states of aggregate functions in the destructor data_variants.aggregator = this; data_variants.init(AggregatedDataVariants::Type::without_key); - ProfileEvents::increment(ProfileEvents::AggregationProcessedBlocks); AggregatedDataWithoutKey & res = data_variants.without_key; @@ -1653,7 +1650,6 @@ bool Aggregator::executeOnBlock(Columns columns, { /// `result` will destroy the states of aggregate functions in the destructor result.aggregator = this; - ProfileEvents::increment(ProfileEvents::AggregationProcessedBlocks); /// How to perform the aggregation? if (result.empty()) From 200823c31108015b16ead859a346a59f83cc6e74 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 14 Mar 2024 15:04:19 +0100 Subject: [PATCH 207/283] Try fix docs check --- .../sql-reference/aggregate-functions/reference/varpop.md | 4 ++-- .../sql-reference/aggregate-functions/reference/varsamp.md | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md index 76472f62789..2044b7e690b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -63,8 +63,8 @@ covarPopStable(x, y) **Parameters** -- `x`: The first data column. [String literal](../syntax#syntax-string-literal) -- `y`: The second data column. [Expression](../syntax#syntax-expressions) +- `x`: The first data column. [String literal](../../syntax#syntax-string-literal) +- `y`: The second data column. [Expression](../../syntax#syntax-expressions) **Returned value** diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md index e75cb075ff8..be669a16ae8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md @@ -8,7 +8,7 @@ This page contains information on the `varSamp` and `varSampStable` ClickHouse f ## varSamp -Calculate the sample variance of a data set. +Calculate the sample variance of a data set. **Syntax** @@ -18,7 +18,7 @@ varSamp(expr) **Parameters** -- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../syntax#syntax-expressions) +- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions) **Returned value** @@ -78,7 +78,7 @@ varSampStable(expr) **Parameters** -- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../syntax#syntax-expressions) +- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions) **Returned value** From 02e81329794ed326853f1e039bca49e960ed469d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 14 Mar 2024 14:05:19 +0000 Subject: [PATCH 208/283] fix test --- .../03008_optimize_equal_ranges.reference | 12 ++++++++++-- .../0_stateless/03008_optimize_equal_ranges.sql | 9 ++++++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/03008_optimize_equal_ranges.reference b/tests/queries/0_stateless/03008_optimize_equal_ranges.reference index 08f8008fca6..fc7a4f3c118 100644 --- a/tests/queries/0_stateless/03008_optimize_equal_ranges.reference +++ b/tests/queries/0_stateless/03008_optimize_equal_ranges.reference @@ -1,8 +1,16 @@ 0 30000 1 30000 2 30000 +0 30000 +1 30000 +2 30000 0 449985000 1 449985000 2 449985000 -sum 1 -uniqExact 0 +0 449985000 +1 449985000 +2 449985000 +sum 1 1 +sum 16 1 +uniqExact 1 1 +uniqExact 16 0 diff --git a/tests/queries/0_stateless/03008_optimize_equal_ranges.sql b/tests/queries/0_stateless/03008_optimize_equal_ranges.sql index c6143fb7f51..4d521420741 100644 --- a/tests/queries/0_stateless/03008_optimize_equal_ranges.sql +++ b/tests/queries/0_stateless/03008_optimize_equal_ranges.sql @@ -10,16 +10,19 @@ INSERT INTO t_optimize_equal_ranges SELECT 0, toString(number), number FROM numb INSERT INTO t_optimize_equal_ranges SELECT 1, toString(number), number FROM numbers(30000); INSERT INTO t_optimize_equal_ranges SELECT 2, toString(number), number FROM numbers(30000); -SELECT a, uniqExact(b) FROM t_optimize_equal_ranges GROUP BY a ORDER BY a; -SELECT a, sum(c) FROM t_optimize_equal_ranges GROUP BY a ORDER BY a; +SELECT a, uniqExact(b) FROM t_optimize_equal_ranges GROUP BY a ORDER BY a SETTINGS max_threads = 16; +SELECT a, uniqExact(b) FROM t_optimize_equal_ranges GROUP BY a ORDER BY a SETTINGS max_threads = 1; +SELECT a, sum(c) FROM t_optimize_equal_ranges GROUP BY a ORDER BY a SETTINGS max_threads = 16; +SELECT a, sum(c) FROM t_optimize_equal_ranges GROUP BY a ORDER BY a SETTINGS max_threads = 1; SYSTEM FLUSH LOGS; SELECT used_aggregate_functions[1] AS func, + Settings['max_threads'] AS threads, ProfileEvents['AggregationOptimizedEqualRangesOfKeys'] > 0 FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND query LIKE '%SELECT%FROM%t_optimize_equal_ranges%' -ORDER BY func; +ORDER BY func, threads; DROP TABLE t_optimize_equal_ranges; From b21a5fec7b99bf223482491759f72357ec447480 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 14 Mar 2024 15:06:08 +0100 Subject: [PATCH 209/283] Fix broken list and thank clang tidy --- src/Client/QueryFuzzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 38e78157096..0a7cb1b36db 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -974,7 +974,7 @@ ASTPtr QueryFuzzer::reverseLiteralFuzzing(ASTPtr child) "toFixedString", /// Same as toDecimal "toInt128", "toInt256", - "toLowCardinality" + "toLowCardinality", "toNullable", "toUInt128", "toUInt256"}; From 026ac4deb14963077722ba527cabf76abd2ff0c0 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 14 Mar 2024 14:20:22 +0000 Subject: [PATCH 210/283] Fix heap-use-after-free for Merge table with alias --- src/Storages/StorageMerge.cpp | 3 +- ...t_storage_merge_aliases_analyzer.reference | 10 ++++ ...25_test_storage_merge_aliases_analyzer.sql | 60 +++++++++++++++++++ 3 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01925_test_storage_merge_aliases_analyzer.reference create mode 100644 tests/queries/0_stateless/01925_test_storage_merge_aliases_analyzer.sql diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 8410f0a8df8..e07bcf339c3 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -681,8 +681,9 @@ public: { if (column->hasExpression()) { + auto column_name = column->getColumnName(); node = column->getExpressionOrThrow(); - node->setAlias(column->getColumnName()); + node->setAlias(column_name); } else column->setColumnSource(replacement_table_expression); diff --git a/tests/queries/0_stateless/01925_test_storage_merge_aliases_analyzer.reference b/tests/queries/0_stateless/01925_test_storage_merge_aliases_analyzer.reference new file mode 100644 index 00000000000..b0fea25ed4b --- /dev/null +++ b/tests/queries/0_stateless/01925_test_storage_merge_aliases_analyzer.reference @@ -0,0 +1,10 @@ +alias1 +1 4 16 23 +23 16 4 1 +2020-02-02 1 4 2 16 3 23 +alias2 +1 3 4 4 +4 4 3 1 +23 16 4 1 +2020-02-01 1 3 2 4 3 4 +2020-02-02 1 4 2 16 3 23 diff --git a/tests/queries/0_stateless/01925_test_storage_merge_aliases_analyzer.sql b/tests/queries/0_stateless/01925_test_storage_merge_aliases_analyzer.sql new file mode 100644 index 00000000000..31035aa80cd --- /dev/null +++ b/tests/queries/0_stateless/01925_test_storage_merge_aliases_analyzer.sql @@ -0,0 +1,60 @@ +-- Tags: no-parallel + +drop table if exists merge; +set allow_experimental_analyzer = 1; +create table merge +( + dt Date, + colAlias0 Int32, + colAlias1 Int32, + col2 Int32, + colAlias2 UInt32, + col3 Int32, + colAlias3 UInt32 +) +engine = Merge(currentDatabase(), '^alias_'); + +drop table if exists alias_1; +drop table if exists alias_2; + +create table alias_1 +( + dt Date, + col Int32, + colAlias0 UInt32 alias col, + colAlias1 UInt32 alias col3 + colAlias0, + col2 Int32, + colAlias2 Int32 alias colAlias1 + col2 + 10, + col3 Int32, + colAlias3 Int32 alias colAlias2 + colAlias1 + col3 +) +engine = MergeTree() +order by (dt); + +insert into alias_1 (dt, col, col2, col3) values ('2020-02-02', 1, 2, 3); + +select 'alias1'; +select colAlias0, colAlias1, colAlias2, colAlias3 from alias_1; +select colAlias3, colAlias2, colAlias1, colAlias0 from merge; +select * from merge; + +create table alias_2 +( + dt Date, + col Int32, + col2 Int32, + colAlias0 UInt32 alias col, + colAlias3 Int32 alias col3 + colAlias0, + colAlias1 UInt32 alias colAlias0 + col2, + colAlias2 Int32 alias colAlias0 + colAlias1, + col3 Int32 +) +engine = MergeTree() +order by (dt); + +insert into alias_2 (dt, col, col2, col3) values ('2020-02-01', 1, 2, 3); + +select 'alias2'; +select colAlias0, colAlias1, colAlias2, colAlias3 from alias_2; +select colAlias3, colAlias2, colAlias1, colAlias0 from merge order by dt; +select * from merge order by dt; From e145115ef1a1cdb8f480cbbd51a7eab3f86a43d3 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 14 Mar 2024 16:15:06 +0100 Subject: [PATCH 211/283] Fixup --- src/Storages/StorageMerge.cpp | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index f0b9d58f3dd..7124cd7393e 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -56,17 +56,12 @@ #include #include #include -#include "Common/logger_useful.h" #include #include #include #include -#include "Analyzer/QueryNode.h" -#include "Core/QueryProcessingStage.h" -#include "IO/WriteHelpers.h" #include #include -#include namespace DB { @@ -803,13 +798,10 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin( const ContextPtr & context, const Names & required_column_names) { - LOG_DEBUG(&Poco::Logger::get("replaceTableExpressionAndRemoveJoin"), "BEFORE:\n{}", query->dumpTree()); auto * query_node = query->as(); auto join_tree_type = query_node->getJoinTree()->getNodeType(); auto modified_query = query_node->cloneAndReplace(original_table_expression, replacement_table_expression); - LOG_DEBUG(&Poco::Logger::get("replaceTableExpressionAndRemoveJoin"), "AFTER:\n{}", modified_query->dumpTree()); - // For the case when join tree is just a table or a table function we don't need to do anything more. if (join_tree_type == QueryTreeNodeType::TABLE || join_tree_type == QueryTreeNodeType::TABLE_FUNCTION) return modified_query; @@ -969,8 +961,6 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ column_name_to_node); } - LOG_DEBUG(&Poco::Logger::get("getModifiedQueryInfo"), "{}", modified_query_info.query_tree->dumpTree()); - modified_query_info.query = queryNodeToSelectQuery(modified_query_info.query_tree); } else @@ -1058,8 +1048,6 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( Block pipe_header = builder->getHeader(); - LOG_DEBUG(&Poco::Logger::get("createSources"), "Processed:{}\nStorage:{}", toString(processed_stage), toString(storage_stage)); - if (allow_experimental_analyzer) { String table_alias = modified_query_info.query_tree->as()->getJoinTree()->as()->getAlias(); @@ -1067,7 +1055,8 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( String database_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_database" : table_alias + "._database"; String table_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_table" : table_alias + "._table"; - if (has_database_virtual_column && common_header.has(database_column) && (storage_stage == QueryProcessingStage::FetchColumns || dynamic_cast(&storage_snapshot_->storage) != nullptr)) + if (has_database_virtual_column && common_header.has(database_column) + && (storage_stage == QueryProcessingStage::FetchColumns || (dynamic_cast(&storage_snapshot_->storage) != nullptr && !pipe_header.has("'" + database_name + "'_String")))) { ColumnWithTypeAndName column; column.name = database_column; @@ -1082,7 +1071,8 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( { return std::make_shared(stream_header, adding_column_actions); }); } - if (has_table_virtual_column && common_header.has(table_column) && (storage_stage == QueryProcessingStage::FetchColumns || dynamic_cast(&storage_snapshot_->storage) != nullptr)) + if (has_table_virtual_column && common_header.has(table_column) + && (storage_stage == QueryProcessingStage::FetchColumns || (dynamic_cast(&storage_snapshot_->storage) != nullptr && !pipe_header.has("'" + table_name + "'_String")))) { ColumnWithTypeAndName column; column.name = table_column; @@ -1166,8 +1156,6 @@ QueryPlan ReadFromMerge::createPlanForTable( storage_snapshot_, modified_query_info); - LOG_DEBUG(&Poco::Logger::get("createPlanForTable"), "Storage: {}", toString(storage_stage)); - QueryPlan plan; if (processed_stage <= storage_stage) @@ -1545,8 +1533,7 @@ void ReadFromMerge::convertAndFilterSourceStream( ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name; - if (local_context->getSettingsRef().allow_experimental_analyzer - && (processed_stage == QueryProcessingStage::FetchColumns && dynamic_cast(&snapshot->storage) != nullptr)) + if (local_context->getSettingsRef().allow_experimental_analyzer && dynamic_cast(&snapshot->storage) != nullptr) convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position; if (row_policy_data_opt) @@ -1554,8 +1541,6 @@ void ReadFromMerge::convertAndFilterSourceStream( row_policy_data_opt->addFilterTransform(builder); } - LOG_DEBUG(&Poco::Logger::get("convertAndFilterSourceStream"), "SOURCE:\n{}\nRESULT:\n{}", builder.getHeader().dumpStructure(), header.dumpStructure()); - auto convert_actions_dag = ActionsDAG::makeConvertingActions(builder.getHeader().getColumnsWithTypeAndName(), header.getColumnsWithTypeAndName(), convert_actions_match_columns_mode); From d558c4dcb7ed2d80ca63d1bf2f9e09d580c843c0 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 14 Mar 2024 16:17:58 +0100 Subject: [PATCH 212/283] Cleanup --- src/Storages/StorageMerge.cpp | 5 ++--- src/Storages/StorageMerge.h | 3 +-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 7124cd7393e..f95464f4bff 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1121,7 +1121,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( /// Subordinary tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. convertAndFilterSourceStream( - header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder, storage_stage); + header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder); } return builder; @@ -1473,8 +1473,7 @@ void ReadFromMerge::convertAndFilterSourceStream( const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, ContextPtr local_context, - QueryPipelineBuilder & builder, - QueryProcessingStage::Enum processed_stage [[maybe_unused]]) + QueryPipelineBuilder & builder) { Block before_block_header = builder.getHeader(); diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 556649f622d..3aabd7e26e3 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -277,8 +277,7 @@ private: const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, ContextPtr context, - QueryPipelineBuilder & builder, - QueryProcessingStage::Enum processed_stage); + QueryPipelineBuilder & builder); StorageMerge::StorageListWithLocks getSelectedTables( ContextPtr query_context, From c89010f80336384252e230158c85ae4f7ae1dae1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 14 Mar 2024 15:22:01 +0000 Subject: [PATCH 213/283] Remove unused function template --- src/Dictionaries/RangeHashedDictionary_2.cpp | 114 ------------------- 1 file changed, 114 deletions(-) diff --git a/src/Dictionaries/RangeHashedDictionary_2.cpp b/src/Dictionaries/RangeHashedDictionary_2.cpp index 2329d621da4..d400fd1d830 100644 --- a/src/Dictionaries/RangeHashedDictionary_2.cpp +++ b/src/Dictionaries/RangeHashedDictionary_2.cpp @@ -118,120 +118,6 @@ size_t RangeHashedDictionary::getItemsShortCircuitImpl( return keys_found; } -template -template -void RangeHashedDictionary::getItemsImpl( - const Attribute & attribute, - const Columns & key_columns, - typename RangeHashedDictionary::ValueSetterFunc && set_value, - DefaultValueExtractor & default_value_extractor) const -{ - const auto & attribute_container = std::get>(attribute.container); - - size_t keys_found = 0; - - const ColumnPtr & range_column = key_columns.back(); - auto key_columns_copy = key_columns; - key_columns_copy.pop_back(); - - DictionaryKeysArenaHolder arena_holder; - DictionaryKeysExtractor keys_extractor(key_columns_copy, arena_holder.getComplexKeyArena()); - const size_t keys_size = keys_extractor.getKeysSize(); - - callOnRangeType( - dict_struct.range_min->type, - [&](const auto & types) - { - using Types = std::decay_t; - using RangeColumnType = typename Types::LeftType; - using RangeStorageType = typename RangeColumnType::ValueType; - using RangeInterval = Interval; - - const auto * range_column_typed = typeid_cast(range_column.get()); - if (!range_column_typed) - throw Exception( - ErrorCodes::TYPE_MISMATCH, - "Dictionary {} range column type should be equal to {}", - getFullName(), - dict_struct.range_min->type->getName()); - - const auto & range_column_data = range_column_typed->getData(); - - const auto & key_attribute_container = std::get>(key_attribute.container); - - for (size_t key_index = 0; key_index < keys_size; ++key_index) - { - auto key = keys_extractor.extractCurrentKey(); - const auto it = key_attribute_container.find(key); - - if (it) - { - const auto date = range_column_data[key_index]; - const auto & interval_tree = it->getMapped(); - - size_t value_index = 0; - std::optional range; - - interval_tree.find( - date, - [&](auto & interval, auto & interval_value_index) - { - if (range) - { - if (likely(configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::min) && interval < *range) - { - range = interval; - value_index = interval_value_index; - } - else if (configuration.lookup_strategy == RangeHashedDictionaryLookupStrategy::max && interval > *range) - { - range = interval; - value_index = interval_value_index; - } - } - else - { - range = interval; - value_index = interval_value_index; - } - - return true; - }); - - if (range.has_value()) - { - ++keys_found; - - ValueType value = attribute_container[value_index]; - - if constexpr (is_nullable) - { - bool is_null = (*attribute.is_value_nullable)[value_index]; - set_value(key_index, value, is_null); - } - else - { - set_value(key_index, value, false); - } - - keys_extractor.rollbackCurrentKey(); - continue; - } - } - - if constexpr (is_nullable) - set_value(key_index, default_value_extractor[key_index], default_value_extractor.isNullAt(key_index)); - else - set_value(key_index, default_value_extractor[key_index], false); - - keys_extractor.rollbackCurrentKey(); - } - }); - - query_count.fetch_add(keys_size, std::memory_order_relaxed); - found_count.fetch_add(keys_found, std::memory_order_relaxed); -} - #define INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType, IsNullable, ValueType) \ template size_t RangeHashedDictionary::getItemsShortCircuitImpl( \ const Attribute & attribute, \ From 22ca96cf8d44aa907c0c3c463e4b0a5628312aa0 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 14 Mar 2024 16:05:01 +0000 Subject: [PATCH 214/283] Disable optimize_rewrite_sum_if_to_count_if if return is nullable --- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 2 +- .../0_stateless/03010_sum_to_to_count_if_nullable.reference | 0 .../queries/0_stateless/03010_sum_to_to_count_if_nullable.sql | 3 +++ 3 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.reference create mode 100644 tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.sql diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 1a6ee9215a9..d374d92c1fb 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -32,7 +32,7 @@ public: return; auto * function_node = node->as(); - if (!function_node || !function_node->isAggregateFunction()) + if (!function_node || !function_node->isAggregateFunction() || function_node->getResultType()->isNullable()) return; auto function_name = function_node->getFunctionName(); diff --git a/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.reference b/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.sql b/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.sql new file mode 100644 index 00000000000..394cd4f1ea5 --- /dev/null +++ b/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.sql @@ -0,0 +1,3 @@ +SET optimize_rewrite_sum_if_to_count_if = 1; +SELECT (sumIf(toNullable(1), (number % 2) = 0), NULL) FROM numbers(10) SETTINGS allow_experimental_analyzer=0; +SELECT (sumIf(toNullable(1), (number % 2) = 0), NULL) FROM numbers(10) SETTINGS allow_experimental_analyzer=1; \ No newline at end of file From 7c5ef07c7b9f20db16811eb60dbe27a36b821575 Mon Sep 17 00:00:00 2001 From: Peter Date: Fri, 15 Mar 2024 00:18:37 +0800 Subject: [PATCH 215/283] Add checksum validating before extracting archive --- docs/en/getting-started/example-datasets/nyc-taxi.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md index cac75fdc45a..516a6d54248 100644 --- a/docs/en/getting-started/example-datasets/nyc-taxi.md +++ b/docs/en/getting-started/example-datasets/nyc-taxi.md @@ -248,6 +248,9 @@ Some of the files might not download fully. Check the file sizes and re-download ``` bash $ curl -O https://datasets.clickhouse.com/trips_mergetree/partitions/trips_mergetree.tar +# Validate the checksum +$ md5sum trips_mergetree.tar +# Checksum should be equal to: f3b8d469b41d9a82da064ded7245d12c $ tar xvf trips_mergetree.tar -C /var/lib/clickhouse # path to ClickHouse data directory $ # check permissions of unpacked data, fix if required $ sudo service clickhouse-server restart From 0b3b734c9b268e65c758e1f2092d176dfc1d9489 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 14 Mar 2024 17:37:49 +0100 Subject: [PATCH 216/283] Analyzer: Fix planner context for subquery in StorageMerge --- src/Planner/PlannerContext.cpp | 6 ++++++ src/Planner/PlannerContext.h | 8 ++++++-- src/Storages/StorageMerge.cpp | 22 ++++++++++++--------- src/Storages/StorageMerge.h | 2 +- tests/analyzer_integration_broken_tests.txt | 1 - 5 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/Planner/PlannerContext.cpp b/src/Planner/PlannerContext.cpp index f33255f0a44..f939b959ce7 100644 --- a/src/Planner/PlannerContext.cpp +++ b/src/Planner/PlannerContext.cpp @@ -48,6 +48,12 @@ PlannerContext::PlannerContext(ContextMutablePtr query_context_, GlobalPlannerCo , is_ast_level_optimization_allowed(!(query_context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY || select_query_options_.ignore_ast_optimizations)) {} +PlannerContext::PlannerContext(ContextMutablePtr query_context_, PlannerContextPtr planner_context_) + : query_context(std::move(query_context_)) + , global_planner_context(planner_context_->global_planner_context) + , is_ast_level_optimization_allowed(planner_context_->is_ast_level_optimization_allowed) +{} + TableExpressionData & PlannerContext::getOrCreateTableExpressionData(const QueryTreeNodePtr & table_expression_node) { auto [it, _] = table_expression_node_to_data.emplace(table_expression_node, TableExpressionData()); diff --git a/src/Planner/PlannerContext.h b/src/Planner/PlannerContext.h index 4d9ba037cac..418240fa34e 100644 --- a/src/Planner/PlannerContext.h +++ b/src/Planner/PlannerContext.h @@ -75,12 +75,18 @@ private: using GlobalPlannerContextPtr = std::shared_ptr; +class PlannerContext; +using PlannerContextPtr = std::shared_ptr; + class PlannerContext { public: /// Create planner context with query context and global planner context PlannerContext(ContextMutablePtr query_context_, GlobalPlannerContextPtr global_planner_context_, const SelectQueryOptions & select_query_options_); + /// Create planner with modified query_context + PlannerContext(ContextMutablePtr query_context_, PlannerContextPtr planner_context_); + /// Get planner context query context ContextPtr getQueryContext() const { @@ -191,6 +197,4 @@ private: PreparedSets prepared_sets; }; -using PlannerContextPtr = std::shared_ptr; - } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 8410f0a8df8..4c53f67c76b 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -422,6 +422,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu std::vector> pipelines; auto table_it = selected_tables.begin(); + auto modified_context = Context::createCopy(context); for (size_t i = 0; i < selected_tables.size(); ++i, ++table_it) { auto & child_plan = child_plans->at(i); @@ -438,7 +439,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu if (child_plan.row_policy_data_opt) child_plan.row_policy_data_opt->extendNames(real_column_names); - auto modified_query_info = getModifiedQueryInfo(context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases); + auto modified_query_info = getModifiedQueryInfo(modified_context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases); auto source_pipeline = createSources( child_plan.plan, @@ -547,9 +548,10 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ } /// Settings will be modified when planning children tables. - auto modified_context = Context::createCopy(context); for (const auto & table : selected_tables) { + auto modified_context = Context::createCopy(context); + size_t current_need_streams = tables_count >= num_streams ? 1 : (num_streams / tables_count); size_t current_streams = std::min(current_need_streams, remaining_streams); remaining_streams -= current_streams; @@ -570,25 +572,25 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ auto & aliases = res.back().table_aliases; auto & row_policy_data_opt = res.back().row_policy_data_opt; auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr(); - auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot, context); + auto nested_storage_snaphsot = storage->getStorageSnapshot(storage_metadata_snapshot, modified_context); Names column_names_as_aliases; Names real_column_names = column_names; const auto & database_name = std::get<0>(table); const auto & table_name = std::get<3>(table); - auto row_policy_filter_ptr = context->getRowPolicyFilter( + auto row_policy_filter_ptr = modified_context->getRowPolicyFilter( database_name, table_name, RowPolicyFilterType::SELECT_FILTER); if (row_policy_filter_ptr) { - row_policy_data_opt = RowPolicyData(row_policy_filter_ptr, storage, context); + row_policy_data_opt = RowPolicyData(row_policy_filter_ptr, storage, modified_context); row_policy_data_opt->extendNames(real_column_names); } auto modified_query_info - = getModifiedQueryInfo(context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases); + = getModifiedQueryInfo(modified_context, table, nested_storage_snaphsot, real_column_names, column_names_as_aliases, aliases); if (!context->getSettingsRef().allow_experimental_analyzer) { @@ -657,10 +659,9 @@ std::vector ReadFromMerge::createChildrenPlans(SelectQ row_policy_data_opt, modified_context, current_streams); + res.back().plan.addInterpreterContext(modified_context); } - if (!res.empty()) - res[0].plan.addInterpreterContext(modified_context); return res; } @@ -863,7 +864,7 @@ QueryTreeNodePtr replaceTableExpressionAndRemoveJoin( } -SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_context, +SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & modified_context, const StorageWithLockAndName & storage_with_lock_and_name, const StorageSnapshotPtr & storage_snapshot_, Names required_column_names, @@ -877,6 +878,9 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextPtr & modified_ if (modified_query_info.optimized_prewhere_info && !modified_query_info.prewhere_info) modified_query_info.prewhere_info = modified_query_info.optimized_prewhere_info; + if (modified_query_info.planner_context) + modified_query_info.planner_context = std::make_shared(modified_context, modified_query_info.planner_context); + if (modified_query_info.table_expression) { auto replacement_table_expression = std::make_shared(storage, storage_lock, storage_snapshot_); diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 556649f622d..c049d50f3b4 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -192,7 +192,7 @@ private: using Aliases = std::vector; - SelectQueryInfo getModifiedQueryInfo(const ContextPtr & modified_context, + SelectQueryInfo getModifiedQueryInfo(const ContextMutablePtr & modified_context, const StorageWithLockAndName & storage_with_lock_and_name, const StorageSnapshotPtr & storage_snapshot, Names required_column_names, diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 31527dc3476..d2ef983f26d 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -1,4 +1,3 @@ test_build_sets_from_multiple_threads/test.py::test_set test_concurrent_backups_s3/test.py::test_concurrent_backups test_distributed_type_object/test.py::test_distributed_type_object -test_merge_table_over_distributed/test.py::test_global_in From a6f1e09e69a583ab2f235f918e4bc0d92949d478 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 14 Mar 2024 17:35:10 +0000 Subject: [PATCH 217/283] Test test test_system_clusters_actual_information flakiness --- .../test_system_clusters_actual_information/test.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/tests/integration/test_system_clusters_actual_information/test.py b/tests/integration/test_system_clusters_actual_information/test.py index e90a6cdeb3f..c6e3262fd62 100644 --- a/tests/integration/test_system_clusters_actual_information/test.py +++ b/tests/integration/test_system_clusters_actual_information/test.py @@ -12,20 +12,11 @@ cluster = ClickHouseCluster(__file__) node = cluster.add_instance( "node", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] ) -node_1 = cluster.add_instance("node_1", with_zookeeper=True) - @pytest.fixture(scope="module") def started_cluster(): try: cluster.start() - node_1.query_with_retry("DROP TABLE IF EXISTS replicated") - - node_1.query_with_retry( - """CREATE TABLE replicated (id UInt32, date Date) ENGINE = - ReplicatedMergeTree('/clickhouse/tables/replicated', 'node_1') ORDER BY id PARTITION BY toYYYYMM(date)""" - ) - node.query_with_retry( "CREATE TABLE distributed (id UInt32, date Date) ENGINE = Distributed('test_cluster', 'default', 'replicated')" ) @@ -37,8 +28,6 @@ def started_cluster(): def test(started_cluster): - cluster.pause_container("node_1") - node.query("SYSTEM RELOAD CONFIG") error = node.query_and_get_error( "SELECT count() FROM distributed SETTINGS receive_timeout=1, handshake_timeout_ms=1" @@ -67,5 +56,3 @@ def test(started_cluster): assert recovery_time == 0 assert errors_count == 0 - - cluster.unpause_container("node_1") From 5c8f2bbda0124704fffc414e329b37d8245c42e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 14 Mar 2024 17:45:42 +0000 Subject: [PATCH 218/283] Split template instantiations into separate files --- src/Dictionaries/RangeHashedDictionary.h | 1 + ... => RangeHashedDictionaryGetItemsImpl.txx} | 30 +++++++++---------- ...ngeHashedDictionaryGetItemsImplDecimal.cpp | 10 +++++++ ...RangeHashedDictionaryGetItemsImplFloat.cpp | 7 +++++ .../RangeHashedDictionaryGetItemsImplInt.cpp | 11 +++++++ ...angeHashedDictionaryGetItemsImplOthers.cpp | 10 +++++++ .../RangeHashedDictionaryGetItemsImplUInt.cpp | 11 +++++++ ...hedDictionaryGetItemsShortCircuitImpl.txx} | 30 ++++++++----------- ...tionaryGetItemsShortCircuitImplDecimal.cpp | 10 +++++++ ...ictionaryGetItemsShortCircuitImplFloat.cpp | 7 +++++ ...dDictionaryGetItemsShortCircuitImplInt.cpp | 11 +++++++ ...ctionaryGetItemsShortCircuitImplOthers.cpp | 10 +++++++ ...DictionaryGetItemsShortCircuitImplUInt.cpp | 11 +++++++ 13 files changed, 126 insertions(+), 33 deletions(-) rename src/Dictionaries/{RangeHashedDictionary_3.cpp => RangeHashedDictionaryGetItemsImpl.txx} (98%) create mode 100644 src/Dictionaries/RangeHashedDictionaryGetItemsImplDecimal.cpp create mode 100644 src/Dictionaries/RangeHashedDictionaryGetItemsImplFloat.cpp create mode 100644 src/Dictionaries/RangeHashedDictionaryGetItemsImplInt.cpp create mode 100644 src/Dictionaries/RangeHashedDictionaryGetItemsImplOthers.cpp create mode 100644 src/Dictionaries/RangeHashedDictionaryGetItemsImplUInt.cpp rename src/Dictionaries/{RangeHashedDictionary_2.cpp => RangeHashedDictionaryGetItemsShortCircuitImpl.txx} (98%) create mode 100644 src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplDecimal.cpp create mode 100644 src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplFloat.cpp create mode 100644 src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplInt.cpp create mode 100644 src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplOthers.cpp create mode 100644 src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplUInt.cpp diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index a5dedae97c4..23f7df8133e 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -292,6 +292,7 @@ private: extern template class RangeHashedDictionary; extern template class RangeHashedDictionary; + namespace { template diff --git a/src/Dictionaries/RangeHashedDictionary_3.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsImpl.txx similarity index 98% rename from src/Dictionaries/RangeHashedDictionary_3.cpp rename to src/Dictionaries/RangeHashedDictionaryGetItemsImpl.txx index a3136d6f63d..9da2b0faf4a 100644 --- a/src/Dictionaries/RangeHashedDictionary_3.cpp +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsImpl.txx @@ -1,5 +1,18 @@ #include +#define INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType, IsNullable, AttributeType, ValueType) \ +template void RangeHashedDictionary::getItemsImpl>( \ + const Attribute & attribute,\ + const Columns & key_columns,\ + typename RangeHashedDictionary::ValueSetterFunc && set_value,\ + DictionaryDefaultValueExtractor & default_value_extractor) const; + +#define INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(AttributeType) \ + INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Simple, true, AttributeType, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Simple, false, AttributeType, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Complex, true, AttributeType, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Complex, false, AttributeType, DictionaryValueType) + namespace DB { @@ -13,6 +26,7 @@ void RangeHashedDictionary::getItemsImpl( { const auto & attribute_container = std::get>(attribute.container); + size_t keys_found = 0; const ColumnPtr & range_column = key_columns.back(); @@ -116,20 +130,4 @@ void RangeHashedDictionary::getItemsImpl( query_count.fetch_add(keys_size, std::memory_order_relaxed); found_count.fetch_add(keys_found, std::memory_order_relaxed); } - -#define INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType, IsNullable, AttributeType, ValueType) \ -template void RangeHashedDictionary::getItemsImpl>( \ - const Attribute & attribute,\ - const Columns & key_columns,\ - typename RangeHashedDictionary::ValueSetterFunc && set_value,\ - DictionaryDefaultValueExtractor & default_value_extractor) const; - -#define INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(AttributeType) \ - INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Simple, true, AttributeType, DictionaryValueType) \ - INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Simple, false, AttributeType, DictionaryValueType) \ - INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Complex, true, AttributeType, DictionaryValueType) \ - INSTANTIATE_GET_ITEMS_IMPL(DictionaryKeyType::Complex, false, AttributeType, DictionaryValueType) - -CALL_FOR_ALL_DICTIONARY_ATTRIBUTE_TYPES(INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE) - } diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsImplDecimal.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsImplDecimal.cpp new file mode 100644 index 00000000000..f1ee4dd58e1 --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsImplDecimal.cpp @@ -0,0 +1,10 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Decimal32); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Decimal64); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Decimal128); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Decimal256); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(DateTime64); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsImplFloat.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsImplFloat.cpp new file mode 100644 index 00000000000..291a55a76db --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsImplFloat.cpp @@ -0,0 +1,7 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Float32); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Float64); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsImplInt.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsImplInt.cpp new file mode 100644 index 00000000000..a0748a9f486 --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsImplInt.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int8); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int16); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int32); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int64); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int128); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Int256); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsImplOthers.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsImplOthers.cpp new file mode 100644 index 00000000000..96e5bb54d0b --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsImplOthers.cpp @@ -0,0 +1,10 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UUID); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(IPv4); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(IPv6); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(String); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(Array); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsImplUInt.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsImplUInt.cpp new file mode 100644 index 00000000000..e60a7189a2d --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsImplUInt.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt8); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt16); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt32); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt64); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt128); +INSTANTIATE_GET_ITEMS_IMPL_FOR_ATTRIBUTE_TYPE(UInt256); +} diff --git a/src/Dictionaries/RangeHashedDictionary_2.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImpl.txx similarity index 98% rename from src/Dictionaries/RangeHashedDictionary_2.cpp rename to src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImpl.txx index d400fd1d830..5807af519f9 100644 --- a/src/Dictionaries/RangeHashedDictionary_2.cpp +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImpl.txx @@ -1,9 +1,21 @@ #include +#define INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType, IsNullable, ValueType) \ + template size_t RangeHashedDictionary::getItemsShortCircuitImpl( \ + const Attribute & attribute, \ + const Columns & key_columns, \ + typename RangeHashedDictionary::ValueSetterFunc && set_value, \ + IColumn::Filter & default_mask) const; + +#define INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(AttributeType) \ + INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Simple, true, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Simple, false, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Complex, true, DictionaryValueType) \ + INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Complex, false, DictionaryValueType) + namespace DB { - template template size_t RangeHashedDictionary::getItemsShortCircuitImpl( @@ -117,20 +129,4 @@ size_t RangeHashedDictionary::getItemsShortCircuitImpl( found_count.fetch_add(keys_found, std::memory_order_relaxed); return keys_found; } - -#define INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType, IsNullable, ValueType) \ - template size_t RangeHashedDictionary::getItemsShortCircuitImpl( \ - const Attribute & attribute, \ - const Columns & key_columns, \ - typename RangeHashedDictionary::ValueSetterFunc && set_value, \ - IColumn::Filter & default_mask) const; - -#define INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(AttributeType) \ - INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Simple, true, DictionaryValueType) \ - INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Simple, false, DictionaryValueType) \ - INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Complex, true, DictionaryValueType) \ - INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL(DictionaryKeyType::Complex, false, DictionaryValueType) - -CALL_FOR_ALL_DICTIONARY_ATTRIBUTE_TYPES(INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE) - } diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplDecimal.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplDecimal.cpp new file mode 100644 index 00000000000..298369e4735 --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplDecimal.cpp @@ -0,0 +1,10 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Decimal32); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Decimal64); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Decimal128); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Decimal256); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(DateTime64); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplFloat.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplFloat.cpp new file mode 100644 index 00000000000..e8e8da6c75e --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplFloat.cpp @@ -0,0 +1,7 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Float32); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Float64); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplInt.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplInt.cpp new file mode 100644 index 00000000000..c685b9b5331 --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplInt.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int8); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int16); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int32); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int64); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int128); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Int256); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplOthers.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplOthers.cpp new file mode 100644 index 00000000000..46ea141b59b --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplOthers.cpp @@ -0,0 +1,10 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UUID); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(IPv4); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(IPv6); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(String); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(Array); +} diff --git a/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplUInt.cpp b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplUInt.cpp new file mode 100644 index 00000000000..18421fd7e2d --- /dev/null +++ b/src/Dictionaries/RangeHashedDictionaryGetItemsShortCircuitImplUInt.cpp @@ -0,0 +1,11 @@ +#include + +namespace DB +{ +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt8); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt16); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt32); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt64); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt128); +INSTANTIATE_GET_ITEMS_SHORT_CIRCUIT_IMPL_FOR_ATTRIBUTE_TYPE(UInt256); +} From 7b79d92bbe4c4070135da9747be02edb8499c6a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 14 Mar 2024 17:49:23 +0000 Subject: [PATCH 219/283] Adjust large object check --- src/Dictionaries/RangeHashedDictionary.h | 1 - utils/check-style/check-large-objects.sh | 1 - 2 files changed, 2 deletions(-) diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index 23f7df8133e..a5dedae97c4 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -292,7 +292,6 @@ private: extern template class RangeHashedDictionary; extern template class RangeHashedDictionary; - namespace { template diff --git a/utils/check-style/check-large-objects.sh b/utils/check-style/check-large-objects.sh index 5ef57ea4f6c..5b0e8e88df5 100755 --- a/utils/check-style/check-large-objects.sh +++ b/utils/check-style/check-large-objects.sh @@ -6,7 +6,6 @@ TU_EXCLUDES=( CastOverloadResolver AggregateFunctionUniq FunctionsConversion - RangeHashedDictionary_ Aggregator ) From c6826145dbc6d21c90b3346a26c6c1c53323d138 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 14 Mar 2024 17:59:58 +0000 Subject: [PATCH 220/283] Remove unnecessary empty lines --- src/Dictionaries/RangeHashedDictionary.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/Dictionaries/RangeHashedDictionary.h b/src/Dictionaries/RangeHashedDictionary.h index a5dedae97c4..0469e82d7be 100644 --- a/src/Dictionaries/RangeHashedDictionary.h +++ b/src/Dictionaries/RangeHashedDictionary.h @@ -31,7 +31,6 @@ #include #include - namespace DB { @@ -44,7 +43,6 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; } - enum class RangeHashedDictionaryLookupStrategy : uint8_t { min, @@ -236,8 +234,6 @@ private: static Attribute createAttribute(const DictionaryAttribute & dictionary_attribute); - - template using ValueSetterFunc = std::function; From e8da3bb2eb4ba0c54e948c1756c2d30e548d4432 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 14 Mar 2024 19:06:02 +0100 Subject: [PATCH 221/283] Fix style --- .../integration/test_system_clusters_actual_information/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_system_clusters_actual_information/test.py b/tests/integration/test_system_clusters_actual_information/test.py index c6e3262fd62..8b6436aeb5c 100644 --- a/tests/integration/test_system_clusters_actual_information/test.py +++ b/tests/integration/test_system_clusters_actual_information/test.py @@ -13,6 +13,7 @@ node = cluster.add_instance( "node", with_zookeeper=True, main_configs=["configs/remote_servers.xml"] ) + @pytest.fixture(scope="module") def started_cluster(): try: From 7e9d863c22e65ce34539670519de096a9f2261e6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 14 Mar 2024 18:08:55 +0000 Subject: [PATCH 222/283] Update reference of the test --- .../0_stateless/03010_sum_to_to_count_if_nullable.reference | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.reference b/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.reference index e69de29bb2d..8627f639a03 100644 --- a/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.reference +++ b/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.reference @@ -0,0 +1,2 @@ +(5,NULL) +(5,NULL) From b65beba1fd439a1e98b028b85548873e8339335c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 14 Mar 2024 18:13:06 +0000 Subject: [PATCH 223/283] Remove not used error code declaration --- src/Dictionaries/RangeHashedDictionary.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index 8299c3ad93a..30a0123ade6 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -3,11 +3,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int TYPE_MISMATCH; -} - template ColumnPtr RangeHashedDictionary::getColumn( const std::string & attribute_name, From b0008495294761e3394de0060dda61ee66808476 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 14 Mar 2024 18:15:12 +0000 Subject: [PATCH 224/283] Better --- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index d374d92c1fb..2c41b6dc467 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -32,7 +32,7 @@ public: return; auto * function_node = node->as(); - if (!function_node || !function_node->isAggregateFunction() || function_node->getResultType()->isNullable()) + if (!function_node || !function_node->isAggregateFunction()) return; auto function_name = function_node->getFunctionName(); @@ -54,10 +54,10 @@ public: if (!constant_node) return; - const auto & constant_value_literal = constant_node->getValue(); - if (!isInt64OrUInt64FieldType(constant_value_literal.getType())) + if (auto constant_type = constant_node->getResultType(); !isUInt64(constant_type) && !isInt64(constant_type)) return; + const auto & constant_value_literal = constant_node->getValue(); if (getSettings().aggregate_functions_null_for_empty) return; From e9f81170873bd06bf8c9875fa90654ec6fb0abc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 14 Mar 2024 20:16:25 +0100 Subject: [PATCH 225/283] Increase memory limit for coverage builds --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ffb4789dc9..eff6dd3ff6a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,13 +56,13 @@ option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile t if (ENABLE_CHECK_HEAVY_BUILDS) # set DATA (since RSS does not work since 2.6.x+) to 5G set (RLIMIT_DATA 5000000000) - # set VIRT (RLIMIT_AS) to 10G (DATA*10) + # set VIRT (RLIMIT_AS) to 10G (DATA*2) set (RLIMIT_AS 10000000000) # set CPU time limit to 1000 seconds set (RLIMIT_CPU 1000) # -fsanitize=memory and address are too heavy - if (SANITIZE) + if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE) set (RLIMIT_DATA 10000000000) # 10G endif() From e8b3cc28518a409fbefe5206f676c2623d881484 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Thu, 14 Mar 2024 20:01:12 +0000 Subject: [PATCH 226/283] CI: skip hdfs tests for arm #do_not_test #batch_0 #no_merge_commit #ci_set_arm --- .../integration/test_allowed_url_from_config/test.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_allowed_url_from_config/test.py b/tests/integration/test_allowed_url_from_config/test.py index 3106cf12702..fb7564ae9d3 100644 --- a/tests/integration/test_allowed_url_from_config/test.py +++ b/tests/integration/test_allowed_url_from_config/test.py @@ -1,3 +1,4 @@ +import platform import pytest from helpers.cluster import ClickHouseCluster @@ -16,9 +17,11 @@ node5 = cluster.add_instance( "node5", main_configs=["configs/config_without_allowed_hosts.xml"] ) node6 = cluster.add_instance("node6", main_configs=["configs/config_for_remote.xml"]) -node7 = cluster.add_instance( - "node7", main_configs=["configs/config_for_redirect.xml"], with_hdfs=True -) + +if platform.processor() != "arm": + node7 = cluster.add_instance( + "node7", main_configs=["configs/config_for_redirect.xml"], with_hdfs=True + ) @pytest.fixture(scope="module") @@ -270,6 +273,7 @@ def test_table_function_remote(start_cluster): ) +@pytest.mark.skipif(platform.processor() == "arm", reason="skip for ARM") def test_redirect(start_cluster): hdfs_api = start_cluster.hdfs_api @@ -284,6 +288,7 @@ def test_redirect(start_cluster): node7.query("DROP TABLE table_test_7_1") +@pytest.mark.skipif(platform.processor() == "arm", reason="skip for ARM") def test_HDFS(start_cluster): assert "not allowed" in node7.query_and_get_error( "CREATE TABLE table_test_7_2 (word String) ENGINE=HDFS('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'CSV')" @@ -293,6 +298,7 @@ def test_HDFS(start_cluster): ) +@pytest.mark.skipif(platform.processor() == "arm", reason="skip for ARM") def test_schema_inference(start_cluster): error = node7.query_and_get_error("desc url('http://test.com`, 'TSVRaw'')") assert error.find("ReadWriteBufferFromHTTPBase") == -1 From e08eaebc9946d39d19b4e995cfdc962719c55c44 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 14 Mar 2024 20:36:18 +0000 Subject: [PATCH 227/283] Add sanity check for poll_max_batch_size FileLog setting to avoid big untracked allocations --- src/Storages/FileLog/FileLogSettings.cpp | 6 ++++++ .../03010_file_log_large_poll_batch_size.reference | 0 .../0_stateless/03010_file_log_large_poll_batch_size.sql | 2 ++ 3 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/03010_file_log_large_poll_batch_size.reference create mode 100644 tests/queries/0_stateless/03010_file_log_large_poll_batch_size.sql diff --git a/src/Storages/FileLog/FileLogSettings.cpp b/src/Storages/FileLog/FileLogSettings.cpp index 2cd42c35870..8e245285b9a 100644 --- a/src/Storages/FileLog/FileLogSettings.cpp +++ b/src/Storages/FileLog/FileLogSettings.cpp @@ -11,6 +11,7 @@ namespace DB namespace ErrorCodes { extern const int UNKNOWN_SETTING; + extern const int INVALID_SETTING_VALUE; } IMPLEMENT_SETTINGS_TRAITS(FileLogSettingsTraits, LIST_OF_FILELOG_SETTINGS) @@ -36,6 +37,11 @@ void FileLogSettings::loadFromQuery(ASTStorage & storage_def) settings_ast->is_standalone = false; storage_def.set(storage_def.settings, settings_ast); } + + /// Check that batch size is not too high (the same as we check setting max_block_size). + constexpr UInt64 max_sane_block_rows_size = 4294967296; // 2^32 + if (poll_max_batch_size > max_sane_block_rows_size) + throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Sanity check: 'poll_max_batch_size' value is too high ({})", poll_max_batch_size); } } diff --git a/tests/queries/0_stateless/03010_file_log_large_poll_batch_size.reference b/tests/queries/0_stateless/03010_file_log_large_poll_batch_size.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03010_file_log_large_poll_batch_size.sql b/tests/queries/0_stateless/03010_file_log_large_poll_batch_size.sql new file mode 100644 index 00000000000..2663011f2ec --- /dev/null +++ b/tests/queries/0_stateless/03010_file_log_large_poll_batch_size.sql @@ -0,0 +1,2 @@ +create table test (number UInt64) engine=FileLog('./user_files/data.jsonl', 'JSONEachRow') settings poll_max_batch_size=18446744073709; -- {serverError INVALID_SETTING_VALUE} + From 1cae77997a648414d2e16e806c322fae2f2da301 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 14 Mar 2024 21:34:25 +0000 Subject: [PATCH 228/283] Try revert stage back --- src/Storages/StorageMerge.cpp | 8 +++++--- src/Storages/StorageMerge.h | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index f95464f4bff..52362eb5cb8 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1121,7 +1121,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( /// Subordinary tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. convertAndFilterSourceStream( - header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder); + header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder, processed_stage); } return builder; @@ -1473,7 +1473,8 @@ void ReadFromMerge::convertAndFilterSourceStream( const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, ContextPtr local_context, - QueryPipelineBuilder & builder) + QueryPipelineBuilder & builder, + QueryProcessingStage::Enum processed_stage) { Block before_block_header = builder.getHeader(); @@ -1532,7 +1533,8 @@ void ReadFromMerge::convertAndFilterSourceStream( ActionsDAG::MatchColumnsMode convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Name; - if (local_context->getSettingsRef().allow_experimental_analyzer && dynamic_cast(&snapshot->storage) != nullptr) + if (local_context->getSettingsRef().allow_experimental_analyzer + && (processed_stage != QueryProcessingStage::FetchColumns || dynamic_cast(&snapshot->storage) != nullptr)) convert_actions_match_columns_mode = ActionsDAG::MatchColumnsMode::Position; if (row_policy_data_opt) diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 3aabd7e26e3..556649f622d 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -277,7 +277,8 @@ private: const Aliases & aliases, const RowPolicyDataOpt & row_policy_data_opt, ContextPtr context, - QueryPipelineBuilder & builder); + QueryPipelineBuilder & builder, + QueryProcessingStage::Enum processed_stage); StorageMerge::StorageListWithLocks getSelectedTables( ContextPtr query_context, From e418f249b8a08484b10bbd5a60de5718686de2f3 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 15 Mar 2024 00:40:28 +0000 Subject: [PATCH 229/283] fix test --- tests/queries/0_stateless/03008_optimize_equal_ranges.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03008_optimize_equal_ranges.sql b/tests/queries/0_stateless/03008_optimize_equal_ranges.sql index 4d521420741..6d769c7382a 100644 --- a/tests/queries/0_stateless/03008_optimize_equal_ranges.sql +++ b/tests/queries/0_stateless/03008_optimize_equal_ranges.sql @@ -5,6 +5,7 @@ CREATE TABLE t_optimize_equal_ranges (a UInt64, b String, c UInt64) ENGINE = Mer SET max_block_size = 1024; SET max_bytes_before_external_group_by = 0; SET optimize_aggregation_in_order = 0; +SET optimize_use_projections = 0; INSERT INTO t_optimize_equal_ranges SELECT 0, toString(number), number FROM numbers(30000); INSERT INTO t_optimize_equal_ranges SELECT 1, toString(number), number FROM numbers(30000); From 978fb783517639f495fbfbc67fe721cb8c8b2f5d Mon Sep 17 00:00:00 2001 From: peter279k Date: Fri, 15 Mar 2024 11:18:21 +0800 Subject: [PATCH 230/283] Correct Criteo example dataset instruction section --- docs/en/getting-started/example-datasets/criteo.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/getting-started/example-datasets/criteo.md b/docs/en/getting-started/example-datasets/criteo.md index a2e0fda0cb0..4becdb50731 100644 --- a/docs/en/getting-started/example-datasets/criteo.md +++ b/docs/en/getting-started/example-datasets/criteo.md @@ -55,7 +55,7 @@ CREATE TABLE criteo_log ( ) ENGINE = Log; ``` -Download the data: +Insert the data: ``` bash $ for i in {00..23}; do echo $i; zcat datasets/criteo/day_${i#0}.gz | sed -r 's/^/2000-01-'${i/00/24}'\t/' | clickhouse-client --host=example-perftest01j --query="INSERT INTO criteo_log FORMAT TabSeparated"; done From a3da67ba9ae17440d4809e76ad19d2e308469e44 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 15 Mar 2024 06:47:10 +0000 Subject: [PATCH 231/283] Update version_date.tsv and changelogs after v23.8.11.28-lts --- docs/changelogs/v23.8.11.28-lts.md | 30 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 4 ++++ 2 files changed, 34 insertions(+) create mode 100644 docs/changelogs/v23.8.11.28-lts.md diff --git a/docs/changelogs/v23.8.11.28-lts.md b/docs/changelogs/v23.8.11.28-lts.md new file mode 100644 index 00000000000..acc284caa72 --- /dev/null +++ b/docs/changelogs/v23.8.11.28-lts.md @@ -0,0 +1,30 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.8.11.28-lts (31879d2ab4c) FIXME as compared to v23.8.10.43-lts (a278225bba9) + +#### Improvement +* Backported in [#60828](https://github.com/ClickHouse/ClickHouse/issues/60828): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Use the current branch test-utils to build cctools'. [#61276](https://github.com/ClickHouse/ClickHouse/pull/61276) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)). +* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)). +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index e372e407ce1..ad7c92d85d5 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,9 +1,11 @@ v24.2.1.2248-stable 2024-02-29 +v24.1.6.52-stable 2024-03-07 v24.1.5.6-stable 2024-02-14 v24.1.4.20-stable 2024-02-14 v24.1.3.31-stable 2024-02-09 v24.1.2.5-stable 2024-02-02 v24.1.1.2048-stable 2024-01-30 +v23.12.5.81-stable 2024-03-15 v23.12.4.15-stable 2024-02-09 v23.12.3.40-stable 2024-02-02 v23.12.2.59-stable 2024-01-05 @@ -25,6 +27,7 @@ v23.9.4.11-stable 2023-11-08 v23.9.3.12-stable 2023-10-31 v23.9.2.56-stable 2023-10-19 v23.9.1.1854-stable 2023-09-29 +v23.8.11.28-lts 2024-03-15 v23.8.10.43-lts 2024-03-05 v23.8.9.54-lts 2024-01-05 v23.8.8.20-lts 2023-11-25 @@ -55,6 +58,7 @@ v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.21.26-lts 2024-03-15 v23.3.20.27-lts 2024-03-05 v23.3.19.32-lts 2024-01-05 v23.3.18.15-lts 2023-11-25 From e6f2cd080e2bc8b0fb47287c6749fe6bc45a14e7 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 15 Mar 2024 06:47:48 +0000 Subject: [PATCH 232/283] Update version_date.tsv and changelogs after v23.3.21.26-lts --- docs/changelogs/v23.3.21.26-lts.md | 24 ++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 2 ++ 2 files changed, 26 insertions(+) create mode 100644 docs/changelogs/v23.3.21.26-lts.md diff --git a/docs/changelogs/v23.3.21.26-lts.md b/docs/changelogs/v23.3.21.26-lts.md new file mode 100644 index 00000000000..b0f059c4907 --- /dev/null +++ b/docs/changelogs/v23.3.21.26-lts.md @@ -0,0 +1,24 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.3.21.26-lts (d9672a3731f) FIXME as compared to v23.3.20.27-lts (cc974ba4f81) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix reading from sparse columns after restart [#49660](https://github.com/ClickHouse/ClickHouse/pull/49660) ([Anton Popov](https://github.com/CurtizJ)). +* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)). +* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)). +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index e372e407ce1..4ed577e0b61 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,4 +1,5 @@ v24.2.1.2248-stable 2024-02-29 +v24.1.6.52-stable 2024-03-07 v24.1.5.6-stable 2024-02-14 v24.1.4.20-stable 2024-02-14 v24.1.3.31-stable 2024-02-09 @@ -55,6 +56,7 @@ v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.21.26-lts 2024-03-15 v23.3.20.27-lts 2024-03-05 v23.3.19.32-lts 2024-01-05 v23.3.18.15-lts 2023-11-25 From 4e34887ab4d1bfe2355327da452d66d376dc6dae Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 15 Mar 2024 06:48:11 +0000 Subject: [PATCH 233/283] Update version_date.tsv and changelogs after v24.1.7.18-stable --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v24.1.7.18-stable.md | 26 ++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 6 ++++++ 5 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v24.1.7.18-stable.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 2f42854a972..17eee6d4287 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.2.1.2248" +ARG VERSION="24.2.2.71" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 7bd777de5b9..bd5fa313adc 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.2.1.2248" +ARG VERSION="24.2.2.71" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 03d01cfd5d7..256dcdc029f 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.2.1.2248" +ARG VERSION="24.2.2.71" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v24.1.7.18-stable.md b/docs/changelogs/v24.1.7.18-stable.md new file mode 100644 index 00000000000..603a83a67be --- /dev/null +++ b/docs/changelogs/v24.1.7.18-stable.md @@ -0,0 +1,26 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.1.7.18-stable (90925babd78) FIXME as compared to v24.1.6.52-stable (fa09f677bc9) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)). +* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)). +* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#61043](https://github.com/ClickHouse/ClickHouse/issues/61043): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#61168](https://github.com/ClickHouse/ClickHouse/issues/61168): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)). +* Backported in [#61192](https://github.com/ClickHouse/ClickHouse/issues/61192): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index e372e407ce1..32fbfee8274 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,9 +1,13 @@ +v24.2.2.71-stable 2024-03-15 v24.2.1.2248-stable 2024-02-29 +v24.1.7.18-stable 2024-03-15 +v24.1.6.52-stable 2024-03-07 v24.1.5.6-stable 2024-02-14 v24.1.4.20-stable 2024-02-14 v24.1.3.31-stable 2024-02-09 v24.1.2.5-stable 2024-02-02 v24.1.1.2048-stable 2024-01-30 +v23.12.5.81-stable 2024-03-15 v23.12.4.15-stable 2024-02-09 v23.12.3.40-stable 2024-02-02 v23.12.2.59-stable 2024-01-05 @@ -25,6 +29,7 @@ v23.9.4.11-stable 2023-11-08 v23.9.3.12-stable 2023-10-31 v23.9.2.56-stable 2023-10-19 v23.9.1.1854-stable 2023-09-29 +v23.8.11.28-lts 2024-03-15 v23.8.10.43-lts 2024-03-05 v23.8.9.54-lts 2024-01-05 v23.8.8.20-lts 2023-11-25 @@ -55,6 +60,7 @@ v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.21.26-lts 2024-03-15 v23.3.20.27-lts 2024-03-05 v23.3.19.32-lts 2024-01-05 v23.3.18.15-lts 2023-11-25 From 5a56c439395c3864d0f95d7b3b8ef63ed069fab6 Mon Sep 17 00:00:00 2001 From: Pham Anh Tuan Date: Wed, 13 Mar 2024 15:06:11 +0000 Subject: [PATCH 234/283] add async metrics for virtual memory mappings --- src/Common/AsynchronousMetrics.cpp | 53 +++++++++++++++++++ src/Common/AsynchronousMetrics.h | 3 ++ ...ry_mappings_asynchronous_metrics.reference | 2 + ...l_memory_mappings_asynchronous_metrics.sql | 4 ++ 4 files changed, 62 insertions(+) create mode 100644 tests/queries/0_stateless/03010_virtual_memory_mappings_asynchronous_metrics.reference create mode 100644 tests/queries/0_stateless/03010_virtual_memory_mappings_asynchronous_metrics.sql diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index b24d9bcc301..0b9be18c84e 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -90,6 +91,9 @@ AsynchronousMetrics::AsynchronousMetrics( openFileIfExists("/sys/fs/cgroup/cpu/cpu.cfs_quota_us", cgroupcpu_cfs_quota); } + openFileIfExists("/proc/sys/vm/max_map_count", vm_max_map_count); + openFileIfExists("/proc/self/maps", vm_maps); + openSensors(); openBlockDevices(); openEDAC(); @@ -1423,6 +1427,55 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) } } + if (vm_max_map_count) + { + try + { + vm_max_map_count->rewind(); + + uint64_t max_map_count = 0; + readText(max_map_count, *vm_max_map_count); + new_values["VMMaxMapCount"] = { max_map_count, "The maximum number of memory mappings a process may have (/proc/sys/vm/max_map_count)."}; + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + openFileIfExists("/proc/sys/vm/max_map_count", vm_max_map_count); + } + } + + if (vm_maps) + { + try + { + vm_maps->rewind(); + + uint64_t num_maps = 0; + while (!vm_maps->eof()) + { + char * next_pos = find_first_symbols<'\n'>(vm_maps->position(), vm_maps->buffer().end()); + vm_maps->position() = next_pos; + + if (!vm_maps->hasPendingData()) + continue; + + if (*vm_maps->position() == '\n') + { + ++num_maps; + ++vm_maps->position(); + } + } + new_values["VMNumMaps"] = { num_maps, + "The current number of memory mappings of the process (/proc/self/maps)." + " If it is close to the maximum (VMMaxMapCount), you should increase the limit for vm.max_map_count in /etc/sysctl.conf"}; + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + openFileIfExists("/proc/self/maps", vm_maps); + } + } + try { for (size_t i = 0, size = thermal.size(); i < size; ++i) diff --git a/src/Common/AsynchronousMetrics.h b/src/Common/AsynchronousMetrics.h index 305e8136b8a..4b3d28e80c5 100644 --- a/src/Common/AsynchronousMetrics.h +++ b/src/Common/AsynchronousMetrics.h @@ -123,6 +123,9 @@ private: std::optional cgroupcpu_cfs_quota TSA_GUARDED_BY(data_mutex); std::optional cgroupcpu_max TSA_GUARDED_BY(data_mutex); + std::optional vm_max_map_count TSA_GUARDED_BY(data_mutex); + std::optional vm_maps TSA_GUARDED_BY(data_mutex); + std::vector> thermal TSA_GUARDED_BY(data_mutex); std::unordered_map Date: Fri, 15 Mar 2024 06:50:44 +0000 Subject: [PATCH 235/283] Update version_date.tsv and changelogs after v24.2.2.71-stable --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v24.2.2.71-stable.md | 55 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 6 +++ 5 files changed, 64 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v24.2.2.71-stable.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 2f42854a972..17eee6d4287 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.2.1.2248" +ARG VERSION="24.2.2.71" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 7bd777de5b9..bd5fa313adc 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.2.1.2248" +ARG VERSION="24.2.2.71" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 03d01cfd5d7..256dcdc029f 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.2.1.2248" +ARG VERSION="24.2.2.71" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v24.2.2.71-stable.md b/docs/changelogs/v24.2.2.71-stable.md new file mode 100644 index 00000000000..b9aa5be626b --- /dev/null +++ b/docs/changelogs/v24.2.2.71-stable.md @@ -0,0 +1,55 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.2.2.71-stable (9293d361e72) FIXME as compared to v24.2.1.2248-stable (891689a4150) + +#### Improvement +* Backported in [#60834](https://github.com/ClickHouse/ClickHouse/issues/60834): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* PartsSplitter invalid ranges for the same part [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)). +* Try to avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)). +* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)). +* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)). +* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)). +* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)). +* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#60758](https://github.com/ClickHouse/ClickHouse/issues/60758): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#60706](https://github.com/ClickHouse/ClickHouse/issues/60706): Eliminates the need to provide input args to docker server jobs to clean yml files. [#60602](https://github.com/ClickHouse/ClickHouse/pull/60602) ([Max K.](https://github.com/maxknv)). +* Backported in [#61045](https://github.com/ClickHouse/ClickHouse/issues/61045): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#60721](https://github.com/ClickHouse/ClickHouse/issues/60721): Fix build_report job so that it's defined by ci_config only (not yml file). [#60613](https://github.com/ClickHouse/ClickHouse/pull/60613) ([Max K.](https://github.com/maxknv)). +* Backported in [#60668](https://github.com/ClickHouse/ClickHouse/issues/60668): Do not await ci pending jobs on release branches decrease wait timeout to fit into gh job timeout. [#60652](https://github.com/ClickHouse/ClickHouse/pull/60652) ([Max K.](https://github.com/maxknv)). +* Backported in [#60863](https://github.com/ClickHouse/ClickHouse/issues/60863): Set limited number of builds for "special build check" report in backports. [#60850](https://github.com/ClickHouse/ClickHouse/pull/60850) ([Max K.](https://github.com/maxknv)). +* Backported in [#60946](https://github.com/ClickHouse/ClickHouse/issues/60946): ... [#60935](https://github.com/ClickHouse/ClickHouse/pull/60935) ([Max K.](https://github.com/maxknv)). +* Backported in [#60972](https://github.com/ClickHouse/ClickHouse/issues/60972): ... [#60952](https://github.com/ClickHouse/ClickHouse/pull/60952) ([Max K.](https://github.com/maxknv)). +* Backported in [#60980](https://github.com/ClickHouse/ClickHouse/issues/60980): ... [#60958](https://github.com/ClickHouse/ClickHouse/pull/60958) ([Max K.](https://github.com/maxknv)). +* Backported in [#61170](https://github.com/ClickHouse/ClickHouse/issues/61170): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)). +* Backported in [#61181](https://github.com/ClickHouse/ClickHouse/issues/61181): ... [#61172](https://github.com/ClickHouse/ClickHouse/pull/61172) ([Max K.](https://github.com/maxknv)). +* Backported in [#61228](https://github.com/ClickHouse/ClickHouse/issues/61228): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)). +* Backported in [#61194](https://github.com/ClickHouse/ClickHouse/issues/61194): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)). +* Backported in [#61244](https://github.com/ClickHouse/ClickHouse/issues/61244): ... [#61214](https://github.com/ClickHouse/ClickHouse/pull/61214) ([Max K.](https://github.com/maxknv)). +* Backported in [#61388](https://github.com/ClickHouse/ClickHouse/issues/61388):. [#61373](https://github.com/ClickHouse/ClickHouse/pull/61373) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* CI: make workflow yml abstract [#60421](https://github.com/ClickHouse/ClickHouse/pull/60421) ([Max K.](https://github.com/maxknv)). +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)). +* General sanity in function `seriesOutliersDetectTukey` [#60535](https://github.com/ClickHouse/ClickHouse/pull/60535) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index e372e407ce1..32fbfee8274 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,9 +1,13 @@ +v24.2.2.71-stable 2024-03-15 v24.2.1.2248-stable 2024-02-29 +v24.1.7.18-stable 2024-03-15 +v24.1.6.52-stable 2024-03-07 v24.1.5.6-stable 2024-02-14 v24.1.4.20-stable 2024-02-14 v24.1.3.31-stable 2024-02-09 v24.1.2.5-stable 2024-02-02 v24.1.1.2048-stable 2024-01-30 +v23.12.5.81-stable 2024-03-15 v23.12.4.15-stable 2024-02-09 v23.12.3.40-stable 2024-02-02 v23.12.2.59-stable 2024-01-05 @@ -25,6 +29,7 @@ v23.9.4.11-stable 2023-11-08 v23.9.3.12-stable 2023-10-31 v23.9.2.56-stable 2023-10-19 v23.9.1.1854-stable 2023-09-29 +v23.8.11.28-lts 2024-03-15 v23.8.10.43-lts 2024-03-05 v23.8.9.54-lts 2024-01-05 v23.8.8.20-lts 2023-11-25 @@ -55,6 +60,7 @@ v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.21.26-lts 2024-03-15 v23.3.20.27-lts 2024-03-05 v23.3.19.32-lts 2024-01-05 v23.3.18.15-lts 2023-11-25 From 51f0930fe276da5d32a7a4df302f337ebfead00e Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 15 Mar 2024 06:53:49 +0000 Subject: [PATCH 236/283] Update version_date.tsv and changelogs after v23.12.5.81-stable --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v23.12.5.81-stable.md | 64 +++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 6 +++ 5 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v23.12.5.81-stable.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 2f42854a972..17eee6d4287 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.2.1.2248" +ARG VERSION="24.2.2.71" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 7bd777de5b9..bd5fa313adc 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.2.1.2248" +ARG VERSION="24.2.2.71" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 03d01cfd5d7..256dcdc029f 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.2.1.2248" +ARG VERSION="24.2.2.71" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v23.12.5.81-stable.md b/docs/changelogs/v23.12.5.81-stable.md new file mode 100644 index 00000000000..0a0acd97d58 --- /dev/null +++ b/docs/changelogs/v23.12.5.81-stable.md @@ -0,0 +1,64 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v23.12.5.81-stable (a0fbe3ae813) FIXME as compared to v23.12.4.15-stable (4233d111d20) + +#### Improvement +* Backported in [#60290](https://github.com/ClickHouse/ClickHouse/issues/60290): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#60830](https://github.com/ClickHouse/ClickHouse/issues/60830): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)). + +#### Build/Testing/Packaging Improvement +* Backported in [#59883](https://github.com/ClickHouse/ClickHouse/issues/59883): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix_kql_issue_found_by_wingfuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)). +* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)). +* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)). +* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)). +* Hide sensitive info for s3queue [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)). +* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)). +* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)). +* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)). +* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)). +* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#60767](https://github.com/ClickHouse/ClickHouse/issues/60767): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#60582](https://github.com/ClickHouse/ClickHouse/issues/60582): Arm and amd docker build jobs use similar job names and thus overwrite job reports - aarch64 and amd64 suffixes added to fix this. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)). +* Backported in [#61041](https://github.com/ClickHouse/ClickHouse/issues/61041): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#61030](https://github.com/ClickHouse/ClickHouse/issues/61030): ... [#61022](https://github.com/ClickHouse/ClickHouse/pull/61022) ([Max K.](https://github.com/maxknv)). +* Backported in [#61224](https://github.com/ClickHouse/ClickHouse/issues/61224): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)). +* Backported in [#61190](https://github.com/ClickHouse/ClickHouse/issues/61190): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#59798](https://github.com/ClickHouse/ClickHouse/issues/59798) to 23.12: CI: do not reuse builds on release branches"'. [#59979](https://github.com/ClickHouse/ClickHouse/pull/59979) ([Max K.](https://github.com/maxknv)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* CI: move ci-specifics from job scripts to ci.py [#58516](https://github.com/ClickHouse/ClickHouse/pull/58516) ([Max K.](https://github.com/maxknv)). +* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: do not reuse builds on release branches [#59798](https://github.com/ClickHouse/ClickHouse/pull/59798) ([Max K.](https://github.com/maxknv)). +* Fix mark release ready [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Ability to detect undead ZooKeeper sessions [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)). +* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)). +* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index e372e407ce1..32fbfee8274 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,9 +1,13 @@ +v24.2.2.71-stable 2024-03-15 v24.2.1.2248-stable 2024-02-29 +v24.1.7.18-stable 2024-03-15 +v24.1.6.52-stable 2024-03-07 v24.1.5.6-stable 2024-02-14 v24.1.4.20-stable 2024-02-14 v24.1.3.31-stable 2024-02-09 v24.1.2.5-stable 2024-02-02 v24.1.1.2048-stable 2024-01-30 +v23.12.5.81-stable 2024-03-15 v23.12.4.15-stable 2024-02-09 v23.12.3.40-stable 2024-02-02 v23.12.2.59-stable 2024-01-05 @@ -25,6 +29,7 @@ v23.9.4.11-stable 2023-11-08 v23.9.3.12-stable 2023-10-31 v23.9.2.56-stable 2023-10-19 v23.9.1.1854-stable 2023-09-29 +v23.8.11.28-lts 2024-03-15 v23.8.10.43-lts 2024-03-05 v23.8.9.54-lts 2024-01-05 v23.8.8.20-lts 2023-11-25 @@ -55,6 +60,7 @@ v23.4.4.16-stable 2023-06-17 v23.4.3.48-stable 2023-06-12 v23.4.2.11-stable 2023-05-02 v23.4.1.1943-stable 2023-04-27 +v23.3.21.26-lts 2024-03-15 v23.3.20.27-lts 2024-03-05 v23.3.19.32-lts 2024-01-05 v23.3.18.15-lts 2023-11-25 From 7f97b11ce6095ae77d0baf1cb3f9517714c6bdc5 Mon Sep 17 00:00:00 2001 From: Zhuo Qiu Date: Fri, 15 Mar 2024 15:46:09 +0800 Subject: [PATCH 237/283] only read _row_exists column when necessary --- src/Storages/MergeTree/MutateTask.cpp | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 4d1e60f450e..bfdc109a89d 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1018,8 +1018,8 @@ struct MutationContext scope_guard temporary_directory_lock; - /// Whether this mutation contains lightweight delete - bool has_lightweight_delete; + /// Whether we need to count lightweight delete rows in this mutation + bool count_lightweight_deleted_rows; }; using MutationContextPtr = std::shared_ptr; @@ -1282,7 +1282,8 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ctx->out->write(cur_block); - if (ctx->has_lightweight_delete) + /// TODO: move this calculation to DELETE FROM mutation + if (ctx->count_lightweight_deleted_rows) existing_rows_count += MutationHelpers::getExistingRowsCount(cur_block); for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) @@ -1376,7 +1377,7 @@ bool PartMergerWriter::iterateThroughAllProjections() void PartMergerWriter::finalize() { - if (ctx->has_lightweight_delete) + if (ctx->count_lightweight_deleted_rows) ctx->new_data_part->existing_rows_count = existing_rows_count; } @@ -2225,17 +2226,17 @@ bool MutateTask::prepare() if (ctx->mutating_pipeline_builder.initialized()) ctx->execute_ttl_type = MutationHelpers::shouldExecuteTTL(ctx->metadata_snapshot, ctx->interpreter->getColumnDependencies()); - if (ctx->updated_header.has(RowExistsColumn::name)) + if (ctx->data->getSettings()->exclude_deleted_rows_for_part_size_in_merge && ctx->updated_header.has(RowExistsColumn::name)) { - /// This mutation contains lightweight delete, reset existing_rows_count of new data part to 0 - /// It will be updated while writing _row_exists column - ctx->has_lightweight_delete = true; + /// This mutation contains lightweight delete and we need to count the deleted rows, + /// Reset existing_rows_count of new data part to 0 and it will be updated while writing _row_exists column + ctx->count_lightweight_deleted_rows = true; } else { - ctx->has_lightweight_delete = false; + ctx->count_lightweight_deleted_rows = false; - /// This mutation does not contains lightweight delete, copy existing_rows_count from source part + /// No need to count deleted rows, copy existing_rows_count from source part ctx->new_data_part->existing_rows_count = ctx->source_part->existing_rows_count.value_or(ctx->source_part->rows_count); } From be4554ba431c2c496c139e6b4869a68ca3ba58dc Mon Sep 17 00:00:00 2001 From: Zhuo Qiu Date: Fri, 15 Mar 2024 16:31:50 +0800 Subject: [PATCH 238/283] fix test --- tests/queries/0_stateless/03001_consider_lwd_when_merge.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql b/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql index a65e8877020..988d7058f21 100644 --- a/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql +++ b/tests/queries/0_stateless/03001_consider_lwd_when_merge.sql @@ -17,6 +17,9 @@ SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = ALTER TABLE lwd_merge MODIFY SETTING exclude_deleted_rows_for_part_size_in_merge = 1; +-- delete again because deleted rows will be counted in mutation +DELETE FROM lwd_merge WHERE id % 100 == 0; + OPTIMIZE TABLE lwd_merge; SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1; From 4eda78440d696db68d654a5883cc20fb1b145365 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Mar 2024 10:34:32 +0100 Subject: [PATCH 239/283] Remove C++ templates --- src/DataTypes/IDataType.h | 2 + src/Functions/castOrDefault.cpp | 181 ++++++++++++++------------------ 2 files changed, 79 insertions(+), 104 deletions(-) diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 55f584ef1e0..4403e3d9bd4 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -533,6 +533,8 @@ class DataTypeDateTime; class DataTypeDateTime64; template constexpr bool IsDataTypeDecimal> = true; + +/// TODO: this is garbage, remove it. template <> inline constexpr bool IsDataTypeDecimal = true; template constexpr bool IsDataTypeNumber> = true; diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 970e6fd6f75..18bdea28029 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -173,25 +173,22 @@ private: bool keep_nullable; }; -template class FunctionCastOrDefaultTyped final : public IFunction { public: - static constexpr auto name = Name::name; - - static FunctionPtr create(ContextPtr context) - { - return std::make_shared(context); - } - - explicit FunctionCastOrDefaultTyped(ContextPtr context_) - : impl(context_) + explicit FunctionCastOrDefaultTyped(ContextPtr context_, String name_, DataTypePtr type_) + : impl(context_), name(std::move(name_)), type(std::move(type_)), which(type) { } String getName() const override { return name; } private: + FunctionCastOrDefault impl; + String name; + DataTypePtr type; + WhichDataType which; + size_t getNumberOfArguments() const override { return 0; } bool isVariadic() const override { return true; } @@ -209,10 +206,10 @@ private: FunctionArgumentDescriptors mandatory_args = {{"Value", nullptr, nullptr, nullptr}}; FunctionArgumentDescriptors optional_args; - if constexpr (IsDataTypeDecimal) + if (isDecimal(type) || isDateTime64(type)) mandatory_args.push_back({"scale", static_cast(&isNativeInteger), &isColumnConst, "const Integer"}); - if (std::is_same_v || std::is_same_v) + if (isDateTimeOrDateTime64(type)) optional_args.push_back({"timezone", static_cast(&isString), isColumnConst, "const String"}); optional_args.push_back({"default_value", nullptr, nullptr, nullptr}); @@ -224,7 +221,7 @@ private: size_t scale = 0; std::string time_zone; - if constexpr (IsDataTypeDecimal) + if (isDecimal(type)) { const auto & scale_argument = arguments[additional_argument_index]; @@ -241,7 +238,7 @@ private: ++additional_argument_index; } - if constexpr (std::is_same_v || std::is_same_v) + if (isDateTimeOrDateTime64(type)) { if (additional_argument_index < arguments.size()) { @@ -251,16 +248,22 @@ private: } } - std::shared_ptr cast_type; + DataTypePtr cast_type; - if constexpr (std::is_same_v) - cast_type = std::make_shared(scale, time_zone); - else if constexpr (IsDataTypeDecimal) - cast_type = std::make_shared(Type::maxPrecision(), scale); - else if constexpr (std::is_same_v || std::is_same_v) - cast_type = std::make_shared(time_zone); + if (which.isDateTime64()) + cast_type = std::make_shared(scale, time_zone); + else if (which.isDateTime()) + cast_type = std::make_shared(time_zone); + else if (which.isDecimal32()) + cast_type = createDecimalMaxPrecision(scale); + else if (which.isDecimal64()) + cast_type = createDecimalMaxPrecision(scale); + else if (which.isDecimal128()) + cast_type = createDecimalMaxPrecision(scale); + else if (which.isDecimal256()) + cast_type = createDecimalMaxPrecision(scale); else - cast_type = std::make_shared(); + cast_type = type; ColumnWithTypeAndName type_argument = { @@ -289,7 +292,8 @@ private: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_size) const override { - size_t additional_arguments_size = IsDataTypeDecimal + (std::is_same_v || std::is_same_v); + /// Scale and time zone + size_t additional_arguments_size = (which.isDecimal() || which.isDateTime64()) + which.isDateTimeOrDateTime64(); ColumnWithTypeAndName second_argument = { @@ -310,98 +314,67 @@ private: return impl.executeImpl(arguments_with_cast_type, result_type, input_rows_size); } - - FunctionCastOrDefault impl; }; -struct NameToUInt8OrDefault { static constexpr auto name = "toUInt8OrDefault"; }; -struct NameToUInt16OrDefault { static constexpr auto name = "toUInt16OrDefault"; }; -struct NameToUInt32OrDefault { static constexpr auto name = "toUInt32OrDefault"; }; -struct NameToUInt64OrDefault { static constexpr auto name = "toUInt64OrDefault"; }; -struct NameToUInt256OrDefault { static constexpr auto name = "toUInt256OrDefault"; }; -struct NameToInt8OrDefault { static constexpr auto name = "toInt8OrDefault"; }; -struct NameToInt16OrDefault { static constexpr auto name = "toInt16OrDefault"; }; -struct NameToInt32OrDefault { static constexpr auto name = "toInt32OrDefault"; }; -struct NameToInt64OrDefault { static constexpr auto name = "toInt64OrDefault"; }; -struct NameToInt128OrDefault { static constexpr auto name = "toInt128OrDefault"; }; -struct NameToInt256OrDefault { static constexpr auto name = "toInt256OrDefault"; }; -struct NameToFloat32OrDefault { static constexpr auto name = "toFloat32OrDefault"; }; -struct NameToFloat64OrDefault { static constexpr auto name = "toFloat64OrDefault"; }; -struct NameToDateOrDefault { static constexpr auto name = "toDateOrDefault"; }; -struct NameToDate32OrDefault { static constexpr auto name = "toDate32OrDefault"; }; -struct NameToDateTimeOrDefault { static constexpr auto name = "toDateTimeOrDefault"; }; -struct NameToDateTime64OrDefault { static constexpr auto name = "toDateTime64OrDefault"; }; -struct NameToDecimal32OrDefault { static constexpr auto name = "toDecimal32OrDefault"; }; -struct NameToDecimal64OrDefault { static constexpr auto name = "toDecimal64OrDefault"; }; -struct NameToDecimal128OrDefault { static constexpr auto name = "toDecimal128OrDefault"; }; -struct NameToDecimal256OrDefault { static constexpr auto name = "toDecimal256OrDefault"; }; -struct NameToUUIDOrDefault { static constexpr auto name = "toUUIDOrDefault"; }; -struct NameToIPv4OrDefault { static constexpr auto name = "toIPv4OrDefault"; }; -struct NameToIPv6OrDefault { static constexpr auto name = "toIPv6OrDefault"; }; - -using FunctionToUInt8OrDefault = FunctionCastOrDefaultTyped; -using FunctionToUInt16OrDefault = FunctionCastOrDefaultTyped; -using FunctionToUInt32OrDefault = FunctionCastOrDefaultTyped; -using FunctionToUInt64OrDefault = FunctionCastOrDefaultTyped; -using FunctionToUInt256OrDefault = FunctionCastOrDefaultTyped; - -using FunctionToInt8OrDefault = FunctionCastOrDefaultTyped; -using FunctionToInt16OrDefault = FunctionCastOrDefaultTyped; -using FunctionToInt32OrDefault = FunctionCastOrDefaultTyped; -using FunctionToInt64OrDefault = FunctionCastOrDefaultTyped; -using FunctionToInt128OrDefault = FunctionCastOrDefaultTyped; -using FunctionToInt256OrDefault = FunctionCastOrDefaultTyped; - -using FunctionToFloat32OrDefault = FunctionCastOrDefaultTyped; -using FunctionToFloat64OrDefault = FunctionCastOrDefaultTyped; - -using FunctionToDateOrDefault = FunctionCastOrDefaultTyped; -using FunctionToDate32OrDefault = FunctionCastOrDefaultTyped; -using FunctionToDateTimeOrDefault = FunctionCastOrDefaultTyped; -using FunctionToDateTime64OrDefault = FunctionCastOrDefaultTyped; - -using FunctionToDecimal32OrDefault = FunctionCastOrDefaultTyped, NameToDecimal32OrDefault>; -using FunctionToDecimal64OrDefault = FunctionCastOrDefaultTyped, NameToDecimal64OrDefault>; -using FunctionToDecimal128OrDefault = FunctionCastOrDefaultTyped, NameToDecimal128OrDefault>; -using FunctionToDecimal256OrDefault = FunctionCastOrDefaultTyped, NameToDecimal256OrDefault>; - -using FunctionToUUIDOrDefault = FunctionCastOrDefaultTyped; -using FunctionToIPv4OrDefault = FunctionCastOrDefaultTyped; -using FunctionToIPv6OrDefault = FunctionCastOrDefaultTyped; - REGISTER_FUNCTION(CastOrDefault) { factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction("toUInt8OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toUInt8OrDefault", std::make_shared())); }); + factory.registerFunction("toUInt16OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toUInt16OrDefault", std::make_shared())); }); + factory.registerFunction("toUInt32OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toUInt32OrDefault", std::make_shared())); }); + factory.registerFunction("toUInt64OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toUInt64OrDefault", std::make_shared())); }); + factory.registerFunction("toUInt128OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toUInt128OrDefault", std::make_shared())); }); + factory.registerFunction("toUInt256OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toUInt256OrDefault", std::make_shared())); }); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction("toInt8OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toInt8OrDefault", std::make_shared())); }); + factory.registerFunction("toInt16OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toInt16OrDefault", std::make_shared())); }); + factory.registerFunction("toInt32OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toInt32OrDefault", std::make_shared())); }); + factory.registerFunction("toInt64OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toInt64OrDefault", std::make_shared())); }); + factory.registerFunction("toInt128OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toInt128OrDefault", std::make_shared())); }); + factory.registerFunction("toInt256OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toInt256OrDefault", std::make_shared())); }); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction("toFloat32OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toFloat32OrDefault", std::make_shared())); }); + factory.registerFunction("toFloat64OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toFloat64OrDefault", std::make_shared())); }); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction("toDateOrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toDateOrDefault", std::make_shared())); }); + factory.registerFunction("toDate32OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toDate32OrDefault", std::make_shared())); }); + factory.registerFunction("toDateTimeOrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toDateTimeOrDefault", std::make_shared())); }); + factory.registerFunction("toDateTime64OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toDateTime64OrDefault", std::make_shared(3 /* default scale */))); }); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction("toDecimal32OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toDecimal32OrDefault", createDecimalMaxPrecision(0))); }); + factory.registerFunction("toDecimal64OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toDecimal64OrDefault", createDecimalMaxPrecision(0))); }); + factory.registerFunction("toDecimal128OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toDecimal128OrDefault", createDecimalMaxPrecision(0))); }); + factory.registerFunction("toDecimal256OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toDecimal256OrDefault", createDecimalMaxPrecision(0))); }); - factory.registerFunction(); - factory.registerFunction(); - factory.registerFunction(); + factory.registerFunction("toUUIDOrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toUUIDOrDefault", std::make_shared())); }); + factory.registerFunction("toIPv4OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toIPv4OrDefault", std::make_shared())); }); + factory.registerFunction("toIPv6OrDefault", [](ContextPtr context){ return std::make_unique( + std::make_shared(context, "toIPv6OrDefault", std::make_shared())); }); } } From 4290b1fe2cfe6eace74c08601db884f0c1738bf8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Mar 2024 11:32:21 +0100 Subject: [PATCH 240/283] Garbage --- src/Functions/FunctionsConversion.cpp | 49 +++++++-------------------- src/Functions/castOrDefault.cpp | 2 +- 2 files changed, 13 insertions(+), 38 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 42056067f00..f69d3e9146b 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1980,13 +1980,6 @@ public: static constexpr bool to_datetime64 = std::is_same_v; - static constexpr bool to_string_or_fixed_string = std::is_same_v || - std::is_same_v; - - static constexpr bool to_date_or_datetime = std::is_same_v || - std::is_same_v || - std::is_same_v; - static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } static FunctionPtr create() { return std::make_shared(); } @@ -2003,8 +1996,7 @@ public: bool isInjective(const ColumnsWithTypeAndName &) const override { return std::is_same_v; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & arguments) const override { - /// TODO: We can make more optimizations here. - return !(to_date_or_datetime && isNumber(*arguments[0].type)); + return !(IsDataTypeDateOrDateTime && isNumber(*arguments[0].type)); } using DefaultReturnTypeGetter = std::function; @@ -2327,7 +2319,7 @@ private: } bool done = false; - if constexpr (to_string_or_fixed_string) + if constexpr (std::is_same_v || std::is_same_v) { done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); } @@ -3155,7 +3147,6 @@ public: } private: - const char * cast_name; MonotonicityForRange monotonicity_for_range; @@ -4623,26 +4614,12 @@ arguments, result_type, input_rows_count); \ using Types = std::decay_t; using ToDataType = typename Types::LeftType; - if constexpr ( - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) + if constexpr (is_any_of) { ret = createWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); return true; @@ -4662,12 +4639,10 @@ arguments, result_type, input_rows_count); \ ret = createEnumWrapper(from_type, checkAndGetDataType(to_type.get())); return true; } - if constexpr ( - std::is_same_v> || - std::is_same_v> || - std::is_same_v> || - std::is_same_v> || - std::is_same_v) + if constexpr (is_any_of, DataTypeDecimal, + DataTypeDecimal, DataTypeDecimal, + DataTypeDateTime64>) { ret = createDecimalWrapper(from_type, checkAndGetDataType(to_type.get()), requested_result_is_nullable); return true; diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 18bdea28029..57cb03e0349 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -303,7 +303,7 @@ private: }; ColumnsWithTypeAndName arguments_with_cast_type; - arguments_with_cast_type.reserve(arguments.size()); + arguments_with_cast_type.reserve(arguments.size() + 1); arguments_with_cast_type.emplace_back(arguments[0]); arguments_with_cast_type.emplace_back(second_argument); From 060f79862d1c13f9fb723b592e384db009a45823 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 15 Mar 2024 10:49:36 +0000 Subject: [PATCH 241/283] Fix --- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 2 +- ...3010_sum_to_to_count_if_nullable.reference | 66 +++++++++++++++++++ .../03010_sum_to_to_count_if_nullable.sql | 12 +++- 3 files changed, 77 insertions(+), 3 deletions(-) diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 2c41b6dc467..1c2097e7be9 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -54,7 +54,7 @@ public: if (!constant_node) return; - if (auto constant_type = constant_node->getResultType(); !isUInt64(constant_type) && !isInt64(constant_type)) + if (auto constant_type = constant_node->getResultType(); !isNativeInteger(constant_type)) return; const auto & constant_value_literal = constant_node->getValue(); diff --git a/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.reference b/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.reference index 8627f639a03..89e5f639c66 100644 --- a/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.reference +++ b/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.reference @@ -1,2 +1,68 @@ (5,NULL) (5,NULL) +(5,NULL) +QUERY id: 0 + PROJECTION COLUMNS + (sumIf(toNullable(1), equals(modulo(number, 2), 0)), NULL) Tuple(Nullable(UInt64), Nullable(Nothing)) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: tuple, function_type: ordinary, result_type: Tuple(Nullable(UInt64), Nullable(Nothing)) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sumIf, function_type: aggregate, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: Nullable(UInt8) + EXPRESSION + FUNCTION id: 7, function_name: toNullable, function_type: ordinary, result_type: Nullable(UInt8) + ARGUMENTS + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 10, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: number, result_type: UInt64, source_id: 15 + CONSTANT id: 16, constant_value: UInt64_2, constant_value_type: UInt8 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: NULL, constant_value_type: Nullable(Nothing) + JOIN TREE + TABLE_FUNCTION id: 15, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 19, nodes: 1 + CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt8 +(5,NULL) +QUERY id: 0 + PROJECTION COLUMNS + (sum(if(equals(modulo(number, 2), 0), toNullable(1), 0)), NULL) Tuple(Nullable(UInt64), Nullable(Nothing)) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: tuple, function_type: ordinary, result_type: Tuple(Nullable(UInt64), Nullable(Nothing)) + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: sumOrNullIf, function_type: aggregate, result_type: Nullable(UInt64) + ARGUMENTS + LIST id: 5, nodes: 2 + CONSTANT id: 6, constant_value: UInt64_1, constant_value_type: Nullable(UInt8) + EXPRESSION + FUNCTION id: 7, function_name: toNullable, function_type: ordinary, result_type: Nullable(UInt8) + ARGUMENTS + LIST id: 8, nodes: 1 + CONSTANT id: 9, constant_value: UInt64_1, constant_value_type: UInt8 + FUNCTION id: 10, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + FUNCTION id: 12, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 13, nodes: 2 + COLUMN id: 14, column_name: number, result_type: UInt64, source_id: 15 + CONSTANT id: 16, constant_value: UInt64_2, constant_value_type: UInt8 + CONSTANT id: 17, constant_value: UInt64_0, constant_value_type: UInt8 + CONSTANT id: 18, constant_value: NULL, constant_value_type: Nullable(Nothing) + JOIN TREE + TABLE_FUNCTION id: 15, alias: __table1, table_function_name: numbers + ARGUMENTS + LIST id: 19, nodes: 1 + CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt8 diff --git a/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.sql b/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.sql index 394cd4f1ea5..b283a69a020 100644 --- a/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.sql +++ b/tests/queries/0_stateless/03010_sum_to_to_count_if_nullable.sql @@ -1,3 +1,11 @@ SET optimize_rewrite_sum_if_to_count_if = 1; -SELECT (sumIf(toNullable(1), (number % 2) = 0), NULL) FROM numbers(10) SETTINGS allow_experimental_analyzer=0; -SELECT (sumIf(toNullable(1), (number % 2) = 0), NULL) FROM numbers(10) SETTINGS allow_experimental_analyzer=1; \ No newline at end of file + +SET allow_experimental_analyzer = 0; +SELECT (sumIf(toNullable(1), (number % 2) = 0), NULL) FROM numbers(10); +SELECT (sum(if((number % 2) = 0, toNullable(1), 0)), NULL) FROM numbers(10); + +SET allow_experimental_analyzer = 1; +SELECT (sumIf(toNullable(1), (number % 2) = 0), NULL) FROM numbers(10); +EXPLAIN QUERY TREE SELECT (sumIf(toNullable(1), (number % 2) = 0), NULL) FROM numbers(10); +SELECT (sum(if((number % 2) = 0, toNullable(1), 0)), NULL) FROM numbers(10); +EXPLAIN QUERY TREE SELECT (sum(if((number % 2) = 0, toNullable(1), 0)), NULL) FROM numbers(10); \ No newline at end of file From ad4a25906842e89c526d2651954d1ac3e64fbfb7 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 15 Mar 2024 12:13:35 +0100 Subject: [PATCH 242/283] Restore automerge for approved PRs --- .github/workflows/pull_request.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index c065219f980..2dddde9aa14 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -172,6 +172,7 @@ jobs: run: | cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py + python3 merge_pr.py --check-approved ############################################################################################# From a5bd24205947aea5a93490bb698fa99856e00d89 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Mar 2024 12:36:33 +0100 Subject: [PATCH 243/283] Even more garbage --- src/Functions/FunctionsConversion.cpp | 73 ++++++++++++--------------- 1 file changed, 32 insertions(+), 41 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index f69d3e9146b..7f130b0cc86 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1183,26 +1183,26 @@ struct ConvertImpl { /// Conversion of DateTime to Date: throw off time component. /// Conversion of Date32 to Date. - return DateTimeTransformImpl, false>::execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (std::is_same_v && std::is_same_v) { /// Conversion of DateTime to Date: throw off time component. - return DateTimeTransformImpl::execute( + return DateTimeTransformImpl::template execute( arguments, result_type, input_rows_count); } else if constexpr ((std::is_same_v || std::is_same_v) && std::is_same_v) { /// Conversion from Date/Date32 to DateTime. - return DateTimeTransformImpl, false>::execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (std::is_same_v && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count, additions); } /** Special case of converting Int8, Int16, (U)Int32 or (U)Int64 (and also, for convenience, @@ -1220,7 +1220,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (( @@ -1229,7 +1229,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (( @@ -1240,7 +1240,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (( @@ -1249,7 +1249,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (( @@ -1258,7 +1258,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (( @@ -1269,7 +1269,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } /// Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to DateTime. @@ -1280,14 +1280,14 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (std::is_same_v && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (( @@ -1297,7 +1297,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (( @@ -1308,15 +1308,15 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count, additions); + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count); } else if constexpr (std::is_same_v && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count, additions); + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count); } else if constexpr (( std::is_same_v @@ -1324,22 +1324,22 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::execute( - arguments, result_type, input_rows_count, additions); + return DateTimeTransformImpl, false>::template execute( + arguments, result_type, input_rows_count); } /// Conversion of DateTime64 to Date or DateTime: discards fractional part. else if constexpr (std::is_same_v && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl>, false>::execute( + return DateTimeTransformImpl>, false>::template execute( arguments, result_type, input_rows_count, additions); } else if constexpr (std::is_same_v && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl>, false>::execute( + return DateTimeTransformImpl>, false>::template execute( arguments, result_type, input_rows_count, additions); } /// Conversion of Date or DateTime to DateTime64: add zero sub-second part. @@ -1350,8 +1350,8 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl::execute( - arguments, result_type, input_rows_count, additions); + return DateTimeTransformImpl::template execute( + arguments, result_type, input_rows_count); } else if constexpr (IsDataTypeDateOrDateTime && std::is_same_v) @@ -1573,8 +1573,8 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return ConvertImpl::execute( - arguments, result_type, input_rows_count, additions); + return ConvertImpl::template execute( + arguments, result_type, input_rows_count); } else if constexpr ((std::is_same_v || std::is_same_v) && std::is_same_v) @@ -1974,11 +1974,8 @@ public: using Monotonic = MonotonicityImpl; static constexpr auto name = Name::name; - static constexpr bool to_decimal = - std::is_same_v || std::is_same_v - || std::is_same_v || std::is_same_v; - static constexpr bool to_datetime64 = std::is_same_v; + static constexpr bool to_decimal = IsDataTypeDecimal && !to_datetime64; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } static FunctionPtr create() { return std::make_shared(); } @@ -2319,7 +2316,7 @@ private: } bool done = false; - if constexpr (std::is_same_v || std::is_same_v) + if constexpr (is_any_of) { done = callOnIndexAndDataType(from_type->getTypeId(), call, ConvertDefaultBehaviorTag{}); } @@ -2371,13 +2368,8 @@ class FunctionConvertFromString : public IFunction { public: static constexpr auto name = Name::name; - static constexpr bool to_decimal = - std::is_same_v> || - std::is_same_v> || - std::is_same_v> || - std::is_same_v>; - static constexpr bool to_datetime64 = std::is_same_v; + static constexpr bool to_decimal = IsDataTypeDecimal && !to_datetime64; static FunctionPtr create(ContextPtr) { return std::make_shared(); } @@ -3203,13 +3195,12 @@ private: return createFunctionAdaptor(function, from_type); } - auto wrapper_cast_type = cast_type; - - return [wrapper_cast_type, from_type_index, to_type, date_time_overflow_behavior] - (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *column_nullable, size_t input_rows_count) + return [wrapper_cast_type = cast_type, from_type_index, to_type, date_time_overflow_behavior] + (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) { ColumnPtr result_column; - auto res = callOnIndexAndDataType(from_type_index, [&](const auto & types) -> bool { + auto res = callOnIndexAndDataType(from_type_index, [&](const auto & types) -> bool + { using Types = std::decay_t; using LeftDataType = typename Types::LeftType; using RightDataType = typename Types::RightType; From 61870fa4b5621d2c9d55a397ee463e285675eee2 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 15 Mar 2024 12:43:11 +0100 Subject: [PATCH 244/283] Disable broken SonarCloud --- .github/workflows/nightly.yml | 103 +++++++++++++++++----------------- 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 93ac2be19b4..0ab02db8d7a 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -45,6 +45,7 @@ jobs: with: data: "${{ needs.RunConfig.outputs.data }}" set_latest: true + SonarCloud: runs-on: [self-hosted, builder] env: @@ -54,53 +55,55 @@ jobs: CC: clang-17 CXX: clang++-17 steps: - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis - filter: tree:0 - submodules: true - - name: Set up JDK 11 - uses: actions/setup-java@v1 - with: - java-version: 11 - - name: Download and set up sonar-scanner - env: - SONAR_SCANNER_DOWNLOAD_URL: https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${{ env.SONAR_SCANNER_VERSION }}-linux.zip - run: | - mkdir -p "$HOME/.sonar" - curl -sSLo "$HOME/.sonar/sonar-scanner.zip" "${{ env.SONAR_SCANNER_DOWNLOAD_URL }}" - unzip -o "$HOME/.sonar/sonar-scanner.zip" -d "$HOME/.sonar/" - echo "$HOME/.sonar/sonar-scanner-${{ env.SONAR_SCANNER_VERSION }}-linux/bin" >> "$GITHUB_PATH" - - name: Download and set up build-wrapper - env: - BUILD_WRAPPER_DOWNLOAD_URL: ${{ env.SONAR_SERVER_URL }}/static/cpp/build-wrapper-linux-x86.zip - run: | - curl -sSLo "$HOME/.sonar/build-wrapper-linux-x86.zip" "${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}" - unzip -o "$HOME/.sonar/build-wrapper-linux-x86.zip" -d "$HOME/.sonar/" - echo "$HOME/.sonar/build-wrapper-linux-x86" >> "$GITHUB_PATH" - - name: Set Up Build Tools - run: | - sudo apt-get update - sudo apt-get install -yq git cmake ccache ninja-build python3 yasm nasm - sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" - - name: Run build-wrapper - run: | - mkdir build - cd build - cmake .. - cd .. - build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ - - name: Run sonar-scanner - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} - run: | - sonar-scanner \ - --define sonar.host.url="${{ env.SONAR_SERVER_URL }}" \ - --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ - --define sonar.projectKey="ClickHouse_ClickHouse" \ - --define sonar.organization="clickhouse-java" \ - --define sonar.cfamily.cpp23.enabled=true \ - --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql" + - name: Disabled + run: echo "The job is disabled since permanently broken" + # - name: Check out repository code + # uses: ClickHouse/checkout@v1 + # with: + # clear-repository: true + # fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis + # filter: tree:0 + # submodules: true + # - name: Set up JDK 11 + # uses: actions/setup-java@v1 + # with: + # java-version: 11 + # - name: Download and set up sonar-scanner + # env: + # SONAR_SCANNER_DOWNLOAD_URL: https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${{ env.SONAR_SCANNER_VERSION }}-linux.zip + # run: | + # mkdir -p "$HOME/.sonar" + # curl -sSLo "$HOME/.sonar/sonar-scanner.zip" "${{ env.SONAR_SCANNER_DOWNLOAD_URL }}" + # unzip -o "$HOME/.sonar/sonar-scanner.zip" -d "$HOME/.sonar/" + # echo "$HOME/.sonar/sonar-scanner-${{ env.SONAR_SCANNER_VERSION }}-linux/bin" >> "$GITHUB_PATH" + # - name: Download and set up build-wrapper + # env: + # BUILD_WRAPPER_DOWNLOAD_URL: ${{ env.SONAR_SERVER_URL }}/static/cpp/build-wrapper-linux-x86.zip + # run: | + # curl -sSLo "$HOME/.sonar/build-wrapper-linux-x86.zip" "${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}" + # unzip -o "$HOME/.sonar/build-wrapper-linux-x86.zip" -d "$HOME/.sonar/" + # echo "$HOME/.sonar/build-wrapper-linux-x86" >> "$GITHUB_PATH" + # - name: Set Up Build Tools + # run: | + # sudo apt-get update + # sudo apt-get install -yq git cmake ccache ninja-build python3 yasm nasm + # sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" + # - name: Run build-wrapper + # run: | + # mkdir build + # cd build + # cmake .. + # cd .. + # build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ + # - name: Run sonar-scanner + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + # run: | + # sonar-scanner \ + # --define sonar.host.url="${{ env.SONAR_SERVER_URL }}" \ + # --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ + # --define sonar.projectKey="ClickHouse_ClickHouse" \ + # --define sonar.organization="clickhouse-java" \ + # --define sonar.cfamily.cpp23.enabled=true \ + # --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql" From 5787b7f7c8a38fd407d8c532b99f143685e55a78 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 15 Mar 2024 14:46:26 +0300 Subject: [PATCH 245/283] Update nightly.yml --- .github/workflows/nightly.yml | 62 ----------------------------------- 1 file changed, 62 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 0ab02db8d7a..515236bb826 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -45,65 +45,3 @@ jobs: with: data: "${{ needs.RunConfig.outputs.data }}" set_latest: true - - SonarCloud: - runs-on: [self-hosted, builder] - env: - SONAR_SCANNER_VERSION: 4.8.0.2856 - SONAR_SERVER_URL: "https://sonarcloud.io" - BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed - CC: clang-17 - CXX: clang++-17 - steps: - - name: Disabled - run: echo "The job is disabled since permanently broken" - # - name: Check out repository code - # uses: ClickHouse/checkout@v1 - # with: - # clear-repository: true - # fetch-depth: 0 # Shallow clones should be disabled for a better relevancy of analysis - # filter: tree:0 - # submodules: true - # - name: Set up JDK 11 - # uses: actions/setup-java@v1 - # with: - # java-version: 11 - # - name: Download and set up sonar-scanner - # env: - # SONAR_SCANNER_DOWNLOAD_URL: https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${{ env.SONAR_SCANNER_VERSION }}-linux.zip - # run: | - # mkdir -p "$HOME/.sonar" - # curl -sSLo "$HOME/.sonar/sonar-scanner.zip" "${{ env.SONAR_SCANNER_DOWNLOAD_URL }}" - # unzip -o "$HOME/.sonar/sonar-scanner.zip" -d "$HOME/.sonar/" - # echo "$HOME/.sonar/sonar-scanner-${{ env.SONAR_SCANNER_VERSION }}-linux/bin" >> "$GITHUB_PATH" - # - name: Download and set up build-wrapper - # env: - # BUILD_WRAPPER_DOWNLOAD_URL: ${{ env.SONAR_SERVER_URL }}/static/cpp/build-wrapper-linux-x86.zip - # run: | - # curl -sSLo "$HOME/.sonar/build-wrapper-linux-x86.zip" "${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}" - # unzip -o "$HOME/.sonar/build-wrapper-linux-x86.zip" -d "$HOME/.sonar/" - # echo "$HOME/.sonar/build-wrapper-linux-x86" >> "$GITHUB_PATH" - # - name: Set Up Build Tools - # run: | - # sudo apt-get update - # sudo apt-get install -yq git cmake ccache ninja-build python3 yasm nasm - # sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" - # - name: Run build-wrapper - # run: | - # mkdir build - # cd build - # cmake .. - # cd .. - # build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/ - # - name: Run sonar-scanner - # env: - # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} - # run: | - # sonar-scanner \ - # --define sonar.host.url="${{ env.SONAR_SERVER_URL }}" \ - # --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ - # --define sonar.projectKey="ClickHouse_ClickHouse" \ - # --define sonar.organization="clickhouse-java" \ - # --define sonar.cfamily.cpp23.enabled=true \ - # --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql" From 16abbcd095c4034a49921288ff0d79835addbf16 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 15 Mar 2024 12:54:13 +0100 Subject: [PATCH 246/283] Revert "Updated format settings references in the docs (datetime.md)" --- docs/en/sql-reference/data-types/datetime.md | 8 ++++---- docs/ru/sql-reference/data-types/datetime.md | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index a465106c2ff..1adff18f598 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -36,9 +36,9 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isn’t explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter. -ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings-formats.md#date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function. +ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function. -When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format) setting. +When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting. ## Examples @@ -147,8 +147,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse - [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) - [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) - [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format) +- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 25e87794147..57f24786bb7 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -27,9 +27,9 @@ DateTime([timezone]) Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`. -ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings-formats.md#date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). +ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/index.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). -При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format). +При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/index.md#settings-date_time_input_format). ## Примеры {#primery} @@ -119,8 +119,8 @@ FROM dt - [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md) - [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) - [Функции для работы с массивами](../../sql-reference/functions/array-functions.md) -- [Настройка `date_time_input_format`](../../operations/settings/settings-formats.md#date_time_input_format) -- [Настройка `date_time_output_format`](../../operations/settings/settings-formats.md#date_time_output_format) +- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format) +- [Настройка `date_time_output_format`](../../operations/settings/index.md) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone) - [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) From f91c45f562c0cdef32c9403e90eb1e52e818064c Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 15 Mar 2024 12:03:49 +0000 Subject: [PATCH 247/283] fix `01599_multiline_input_and_singleline_comments` properly --- ...multiline_input_and_singleline_comments.sh | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh b/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh index f1acd39136f..07c2e345009 100755 --- a/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh +++ b/tests/queries/0_stateless/01599_multiline_input_and_singleline_comments.sh @@ -2,7 +2,6 @@ log_user 0 -# In some places `-timeout 1` is used to avoid expect to always wait for the whole timeout set timeout 60 match_max 100000 @@ -14,15 +13,23 @@ expect ":) " # Make a query send -- "SELECT 1\r" -expect -timeout 1 ":-] " send -- "-- xxx\r" -expect -timeout 1 ":-] " send -- ", 2\r" -expect -timeout 1 ":-] " -send -- ";\r" +send -- ";" + +# For some reason this sleep is required for this test to work properly +sleep 1 +send -- "\r" + +expect { + "│ 1 │ 2 │" { } + timeout { exit 1 } +} -expect "│ 1 │ 2 │" expect ":) " -send -- "\4" -expect eof +send -- "" +expect { + eof { exit 0 } + timeout { exit 1 } +} From 20bad992a41a37b65a477dcd40bb1c70b5c4344c Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 15 Mar 2024 12:46:15 +0000 Subject: [PATCH 248/283] CI: fix checkout action version #do_not_test --- .github/workflows/reusable_build.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/reusable_build.yml b/.github/workflows/reusable_build.yml index d2fe6f5dbe7..80d78d93e1b 100644 --- a/.github/workflows/reusable_build.yml +++ b/.github/workflows/reusable_build.yml @@ -43,8 +43,7 @@ jobs: runs-on: [self-hosted, '${{inputs.runner_type}}'] steps: - name: Check out repository code - # WIP: temporary try commit with limited perallelization of checkout - uses: ClickHouse/checkout@0be3f7b3098bae494d3ef5d29d2e0676fb606232 + uses: ClickHouse/checkout@v1 with: clear-repository: true ref: ${{ fromJson(inputs.data).git_ref }} From bf8ae84cb6dba368966cb220b4ca5b3c6deef106 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Fri, 15 Mar 2024 13:43:03 +0000 Subject: [PATCH 249/283] fixup! CI: skip hdfs tests for arm #do_not_test #batch_0 #no_merge_commit --- tests/integration/helpers/cluster.py | 7 +++++++ .../integration/test_allowed_url_from_config/test.py | 11 +++++------ 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 52c0d8a8ee5..194c66fa5c3 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1,8 +1,10 @@ import base64 import errno +from functools import cached_property import http.client import logging import os +import platform import stat import os.path as p import pprint @@ -4746,3 +4748,8 @@ class ClickHouseKiller(object): def __exit__(self, exc_type, exc_val, exc_tb): self.clickhouse_node.start_clickhouse() + + +@cached_property +def is_arm(): + return any(arch in platform.processor().lower() for arch in ("arm, aarch")) diff --git a/tests/integration/test_allowed_url_from_config/test.py b/tests/integration/test_allowed_url_from_config/test.py index fb7564ae9d3..df8934aa69b 100644 --- a/tests/integration/test_allowed_url_from_config/test.py +++ b/tests/integration/test_allowed_url_from_config/test.py @@ -1,6 +1,5 @@ -import platform import pytest -from helpers.cluster import ClickHouseCluster +from helpers.cluster import ClickHouseCluster, is_arm cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance("node1", main_configs=["configs/config_with_hosts.xml"]) @@ -18,7 +17,7 @@ node5 = cluster.add_instance( ) node6 = cluster.add_instance("node6", main_configs=["configs/config_for_remote.xml"]) -if platform.processor() != "arm": +if not is_arm(): node7 = cluster.add_instance( "node7", main_configs=["configs/config_for_redirect.xml"], with_hdfs=True ) @@ -273,7 +272,7 @@ def test_table_function_remote(start_cluster): ) -@pytest.mark.skipif(platform.processor() == "arm", reason="skip for ARM") +@pytest.mark.skipif(is_arm(), reason="skip for ARM") def test_redirect(start_cluster): hdfs_api = start_cluster.hdfs_api @@ -288,7 +287,7 @@ def test_redirect(start_cluster): node7.query("DROP TABLE table_test_7_1") -@pytest.mark.skipif(platform.processor() == "arm", reason="skip for ARM") +@pytest.mark.skipif(is_arm(), reason="skip for ARM") def test_HDFS(start_cluster): assert "not allowed" in node7.query_and_get_error( "CREATE TABLE table_test_7_2 (word String) ENGINE=HDFS('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'CSV')" @@ -298,7 +297,7 @@ def test_HDFS(start_cluster): ) -@pytest.mark.skipif(platform.processor() == "arm", reason="skip for ARM") +@pytest.mark.skipif(is_arm(), reason="skip for ARM") def test_schema_inference(start_cluster): error = node7.query_and_get_error("desc url('http://test.com`, 'TSVRaw'')") assert error.find("ReadWriteBufferFromHTTPBase") == -1 From ff456ffb3399a024657930623b377f91def8ad61 Mon Sep 17 00:00:00 2001 From: Max K Date: Fri, 15 Mar 2024 15:08:03 +0100 Subject: [PATCH 250/283] Revert "CI: ARM integration tests: disable tests with HDFS " --- tests/integration/helpers/cluster.py | 7 ------- .../test_allowed_url_from_config/test.py | 13 ++++--------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 194c66fa5c3..52c0d8a8ee5 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1,10 +1,8 @@ import base64 import errno -from functools import cached_property import http.client import logging import os -import platform import stat import os.path as p import pprint @@ -4748,8 +4746,3 @@ class ClickHouseKiller(object): def __exit__(self, exc_type, exc_val, exc_tb): self.clickhouse_node.start_clickhouse() - - -@cached_property -def is_arm(): - return any(arch in platform.processor().lower() for arch in ("arm, aarch")) diff --git a/tests/integration/test_allowed_url_from_config/test.py b/tests/integration/test_allowed_url_from_config/test.py index df8934aa69b..3106cf12702 100644 --- a/tests/integration/test_allowed_url_from_config/test.py +++ b/tests/integration/test_allowed_url_from_config/test.py @@ -1,5 +1,5 @@ import pytest -from helpers.cluster import ClickHouseCluster, is_arm +from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance("node1", main_configs=["configs/config_with_hosts.xml"]) @@ -16,11 +16,9 @@ node5 = cluster.add_instance( "node5", main_configs=["configs/config_without_allowed_hosts.xml"] ) node6 = cluster.add_instance("node6", main_configs=["configs/config_for_remote.xml"]) - -if not is_arm(): - node7 = cluster.add_instance( - "node7", main_configs=["configs/config_for_redirect.xml"], with_hdfs=True - ) +node7 = cluster.add_instance( + "node7", main_configs=["configs/config_for_redirect.xml"], with_hdfs=True +) @pytest.fixture(scope="module") @@ -272,7 +270,6 @@ def test_table_function_remote(start_cluster): ) -@pytest.mark.skipif(is_arm(), reason="skip for ARM") def test_redirect(start_cluster): hdfs_api = start_cluster.hdfs_api @@ -287,7 +284,6 @@ def test_redirect(start_cluster): node7.query("DROP TABLE table_test_7_1") -@pytest.mark.skipif(is_arm(), reason="skip for ARM") def test_HDFS(start_cluster): assert "not allowed" in node7.query_and_get_error( "CREATE TABLE table_test_7_2 (word String) ENGINE=HDFS('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'CSV')" @@ -297,7 +293,6 @@ def test_HDFS(start_cluster): ) -@pytest.mark.skipif(is_arm(), reason="skip for ARM") def test_schema_inference(start_cluster): error = node7.query_and_get_error("desc url('http://test.com`, 'TSVRaw'')") assert error.find("ReadWriteBufferFromHTTPBase") == -1 From 061cd5a53d141136adac7606a68042ce8355afe7 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 15 Mar 2024 14:22:46 +0000 Subject: [PATCH 251/283] Fixup #ci_set_analyzer --- src/Storages/StorageMerge.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 52362eb5cb8..4a3035dffce 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -1056,7 +1056,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( String table_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_table" : table_alias + "._table"; if (has_database_virtual_column && common_header.has(database_column) - && (storage_stage == QueryProcessingStage::FetchColumns || (dynamic_cast(&storage_snapshot_->storage) != nullptr && !pipe_header.has("'" + database_name + "'_String")))) + && (storage_stage == QueryProcessingStage::FetchColumns || !pipe_header.has("'" + database_name + "'_String"))) { ColumnWithTypeAndName column; column.name = database_column; @@ -1072,7 +1072,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( } if (has_table_virtual_column && common_header.has(table_column) - && (storage_stage == QueryProcessingStage::FetchColumns || (dynamic_cast(&storage_snapshot_->storage) != nullptr && !pipe_header.has("'" + table_name + "'_String")))) + && (storage_stage == QueryProcessingStage::FetchColumns || !pipe_header.has("'" + table_name + "'_String"))) { ColumnWithTypeAndName column; column.name = table_column; @@ -1121,7 +1121,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( /// Subordinary tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. convertAndFilterSourceStream( - header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder, processed_stage); + header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder, storage_stage); } return builder; From 7a8399317398cc32faa42b24c0df9227eef530fd Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 15 Mar 2024 13:09:13 +0000 Subject: [PATCH 252/283] Convert test 02998_system_dns_cache_table to smoke --- .../02998_system_dns_cache_table.reference | 3 ++- .../02998_system_dns_cache_table.sh | 26 ------------------- .../02998_system_dns_cache_table.sql | 3 +++ 3 files changed, 5 insertions(+), 27 deletions(-) delete mode 100755 tests/queries/0_stateless/02998_system_dns_cache_table.sh create mode 100644 tests/queries/0_stateless/02998_system_dns_cache_table.sql diff --git a/tests/queries/0_stateless/02998_system_dns_cache_table.reference b/tests/queries/0_stateless/02998_system_dns_cache_table.reference index ed6cb000142..600d0bc0b39 100644 --- a/tests/queries/0_stateless/02998_system_dns_cache_table.reference +++ b/tests/queries/0_stateless/02998_system_dns_cache_table.reference @@ -1 +1,2 @@ -localhost 127.0.0.1 IPv4 1 +hostname ip_address ip_family cached_at +String String Enum8(\'IPv4\' = 0, \'IPv6\' = 1, \'UNIX_LOCAL\' = 2) DateTime diff --git a/tests/queries/0_stateless/02998_system_dns_cache_table.sh b/tests/queries/0_stateless/02998_system_dns_cache_table.sh deleted file mode 100755 index b74fc00ab3b..00000000000 --- a/tests/queries/0_stateless/02998_system_dns_cache_table.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -# Retries are necessary because the DNS cache may be flushed before second statement is executed -i=0 -retries=5 -while [[ $i -lt $retries ]]; do - ${CLICKHOUSE_CURL} -sS --fail --data "SELECT * FROM url('http://localhost:8123/ping', CSV, 'auto', headers())" "${CLICKHOUSE_URL}" | grep -oP -q 'Ok.' || continue - - RECORDS=$(${CLICKHOUSE_CURL} -sS --fail --data "SELECT hostname, ip_address, ip_family, (isNotNull(cached_at) AND cached_at > '1970-01-01 00:00:00') FROM system.dns_cache WHERE hostname = 'localhost' and ip_family = 'IPv4';" "${CLICKHOUSE_URL}") - - if [[ -n "${RECORDS}" ]]; then - echo "${RECORDS}" - exit 0 - fi - - ((++i)) - sleep 0.2 -done - -echo "All tries to fetch entries for localhost failed, no rows returned. -Probably the DNS cache is disabled or the ClickHouse instance not responds to ping." -exit 1 diff --git a/tests/queries/0_stateless/02998_system_dns_cache_table.sql b/tests/queries/0_stateless/02998_system_dns_cache_table.sql new file mode 100644 index 00000000000..0ceb3d8a95a --- /dev/null +++ b/tests/queries/0_stateless/02998_system_dns_cache_table.sql @@ -0,0 +1,3 @@ +SELECT hostname, ip_address, ip_family, cached_at FROM system.dns_cache +LIMIT 0 +FORMAT TSVWithNamesAndTypes; From 9bb697eb171f3e4d2baed381591fae5493beb80e Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 15 Mar 2024 17:43:48 +0100 Subject: [PATCH 253/283] Fix finishing a failed RESTORE. --- src/Backups/RestorerFromBackup.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index bae2f9aaa25..87c143f0fe2 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -101,10 +101,12 @@ RestorerFromBackup::RestorerFromBackup( RestorerFromBackup::~RestorerFromBackup() { - if (!futures.empty()) + /// If an exception occurs we can come here to the destructor having some tasks still unfinished. + /// We have to wait until they finish. + if (getNumFutures() > 0) { - LOG_ERROR(log, "RestorerFromBackup must not be destroyed while {} tasks are still running", futures.size()); - chassert(false && "RestorerFromBackup must not be destroyed while some tasks are still running"); + LOG_INFO(log, "Waiting for {} tasks to finish", getNumFutures()); + waitFutures(); } } From c97731fb8c7ac9cab37445de41a4bf04838c4d77 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 15 Mar 2024 18:46:23 +0100 Subject: [PATCH 254/283] Correctly process last stacktrace --- .../postprocess-traces/postprocess-traces.pl | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/utils/postprocess-traces/postprocess-traces.pl b/utils/postprocess-traces/postprocess-traces.pl index 476fb46418f..3e50f64d864 100755 --- a/utils/postprocess-traces/postprocess-traces.pl +++ b/utils/postprocess-traces/postprocess-traces.pl @@ -8,6 +8,19 @@ use Data::Dumper; my @current_stack = (); my $grouped_stacks = {}; +sub process_stacktrace +{ + my $group = \$grouped_stacks; + for my $frame (reverse @current_stack) + { + $$group->{count} ||= 0; + ++$$group->{count}; + $group = \$$group->{children}{$frame}; + } + + @current_stack = (); +} + while (my $line = <>) { chomp $line; @@ -21,18 +34,12 @@ while (my $line = <>) if ($line eq '') { - my $group = \$grouped_stacks; - for my $frame (reverse @current_stack) - { - $$group->{count} ||= 0; - ++$$group->{count}; - $group = \$$group->{children}{$frame}; - } - - @current_stack = (); + process_stacktrace(); } } +process_stacktrace(); + sub print_group { my $group = shift; From bc6cd6e769c165a2aaffd06a2960bbce3cb616f0 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 15 Mar 2024 22:30:49 +0000 Subject: [PATCH 255/283] fix test_polymorphic_parts --- tests/integration/test_polymorphic_parts/test.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index 01bc4804c9f..b91a72c5534 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -332,7 +332,13 @@ def test_different_part_types_on_replicas(start_cluster, table, part_type): for _ in range(3): insert_random_data(table, leader, 100) - leader.query("OPTIMIZE TABLE {} FINAL".format(table)) + exec_query_with_retry( + leader, + "OPTIMIZE TABLE {} FINAL".format(table), + settings={"optimize_throw_if_noop": 1}, + silent=True, + ) + follower.query("SYSTEM SYNC REPLICA {}".format(table), timeout=20) expected = "{}\t1\n".format(part_type) From 3a8974e7e1bbad9d6ae49c3624609b69eedf4b1c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Mar 2024 16:23:12 +0100 Subject: [PATCH 256/283] A definitive guide to CAST --- src/Functions/formatDateTime.cpp | 4 +- src/Functions/parseDateTime.cpp | 2 +- .../03011_definitive_guide_to_cast.reference | 56 ++++ .../03011_definitive_guide_to_cast.sql | 252 ++++++++++++++++++ 4 files changed, 311 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03011_definitive_guide_to_cast.reference create mode 100644 tests/queries/0_stateless/03011_definitive_guide_to_cast.sql diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 01ef2a733c8..87438365901 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -1832,10 +1832,10 @@ using FunctionFromUnixTimestampInJodaSyntax = FunctionFormatDateTimeImpl(); - factory.registerAlias("DATE_FORMAT", FunctionFormatDateTime::name); + factory.registerAlias("DATE_FORMAT", FunctionFormatDateTime::name, FunctionFactory::CaseInsensitive); factory.registerFunction(); - factory.registerAlias("FROM_UNIXTIME", FunctionFromUnixTimestamp::name); + factory.registerAlias("FROM_UNIXTIME", FunctionFromUnixTimestamp::name, FunctionFactory::CaseInsensitive); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp index 18882177c90..7a0d7c75774 100644 --- a/src/Functions/parseDateTime.cpp +++ b/src/Functions/parseDateTime.cpp @@ -1942,7 +1942,7 @@ namespace REGISTER_FUNCTION(ParseDateTime) { factory.registerFunction(); - factory.registerAlias("TO_UNIXTIME", FunctionParseDateTime::name); + factory.registerAlias("TO_UNIXTIME", FunctionParseDateTime::name, FunctionFactory::CaseInsensitive); factory.registerFunction(); factory.registerFunction(); factory.registerAlias("str_to_date", FunctionParseDateTimeOrNull::name, FunctionFactory::CaseInsensitive); diff --git a/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference b/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference new file mode 100644 index 00000000000..f8f37fa7807 --- /dev/null +++ b/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference @@ -0,0 +1,56 @@ +123 +2009-02-14 00:31:30 +[1,2,3] +123 Nullable(UInt8) +\N Nullable(UInt8) +1 +255 +['Hello','wo\'rld\\'] +Hello wo\\\'rld\\\\ +wo\'rld\\ wo\\\'rld\\\\ +123 +123 +123 +1 -1 +[] [] Array(Nothing) Array(Array(Array(Tuple(UInt64, String)))) +123 +123 +123 +123 +123 +123 +123 +String 123 +123 UInt8 +200 UInt8 +123 +123 +1.1 +1.10000000000000016387 +18446744073709551615 +[1.1,2.3] +[1.10000000000000016387,2.29999999999999967236] +Row 1: +────── +CAST('1.1', 'Decimal(30, 20)'): 1.1 +CAST('1.1', 'Decimal(30, 20)'): 1.1 +CAST(plus(1, 1), 'UInt8'): 2 +-1 +\N +0 +255 +123 +Hello\0\0\0\0\0 +Hello\0\0\0\0\0 +123.45 +2024-04-25 01:02:03 +2024-04-25 01:02:03.000000 +2024-04-25 01:02:03 +2024-04-25 01:02:03.000000 +2024-03-16 16:22:33 +2024-03-16 16:22:33 +2024-04-25 2024-01-01 02:03:04 1 12 +2024-04-25 2024-01-01 02:03:04.000000 2009-02-14 00:31:30 +2024-04-25 2024-01-01 02:03:04.000000 2009-02-14 00:31:30 +1986-04-25 13:00:00 +14 diff --git a/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql new file mode 100644 index 00000000000..771123b153a --- /dev/null +++ b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql @@ -0,0 +1,252 @@ +SET session_timezone = 'Europe/Amsterdam'; + +-- Type conversion functions and operators. + + +-- 1. SQL standard CAST operator: `CAST(value AS Type)`. + +SELECT CAST(123 AS String); + +-- It convert between various data types, including parameterized data types + +SELECT CAST(1234567890 AS DateTime('Europe/Amsterdam')); + +-- and composite data types: + +SELECT CAST('[1, 2, 3]' AS Array(UInt8)); + +-- It's return type depends on the setting `cast_keep_nullable`. If it is enabled, if the source argument type is Nullable, the resulting data type will be also Nullable, even if it is not written explicitly: + +SET cast_keep_nullable = 1; +SELECT CAST(x AS UInt8) AS y, toTypeName(y) FROM VALUES('x Nullable(String)', ('123'), ('NULL')); + +SET cast_keep_nullable = 0; +SELECT CAST(x AS UInt8) AS y, toTypeName(y) FROM VALUES('x Nullable(String)', ('123'), ('NULL')); -- { serverError CANNOT_PARSE_TEXT } + +-- There are various type conversion rules, some worth noting. + +-- Conversion between numeric types can involve implementation defined overflow: + +SELECT CAST(257 AS UInt8); +SELECT CAST(-1 AS UInt8); + +-- Conversion from string acts like parsing, and for composite data types like Array, Tuple, it works in the same way as from the `Values` data format: + +SELECT CAST($$['Hello', 'wo\'rld\\']$$ AS Array(String)); + +-- ' +-- While for simple data types it does not interpret escape sequences: + +SELECT arrayJoin(CAST($$['Hello', 'wo\'rld\\']$$ AS Array(String))) AS x, CAST($$wo\'rld\\$$ AS FixedString(9)) AS y; + +-- The operator is case-insensitive: + +SELECT CAST(123 AS String); +SELECT cast(123 AS String); +SELECT Cast(123 AS String); + +-- Conversion from a floating point value to an integer will involve truncation towards zero: + +SELECT CAST(1.9, 'Int64'), CAST(-1.9, 'Int64'); + +-- Conversion from NULL into a non-Nullable type will throw an exception, as well as conversions from denormal floating point numbers (NaN, inf, -inf) to an integer, or conversion between arrays of different dimensions. + +-- However, you might find it amusing that an empty array of Nothing data type can be converted to arrays of any dimensions: + +SELECT [] AS x, CAST(x AS Array(Array(Array(Tuple(UInt64, String))))) AS y, toTypeName(x), toTypeName(y); + + +-- 2. The functional form of this operator: `CAST(value, 'Type')`: + +SELECT CAST(123, 'String'); + +-- This form is equivalent. Keep in mind that the type has to be a constant expression: + +SELECT CAST(123, 'Str'||'ing'); -- this works. + +-- This does not work: SELECT materialize('String') AS type, CAST(123, type); + +-- It is also case-insensitive: + +SELECT CasT(123, 'String'); + +-- The functional form exists for the consistency of implementation (as every operator also exists in the functional form and the functional form is represented in the query's Abstract Syntax Tree). Anyway, the functional form also makes sense for users, when they need to construct a data type name from a constant expression, or when they want to generate a query programmatically. + +-- It's worth noting that the operator form does not allow to specify the type name as a string literal: + +-- This does not work: SELECT CAST(123 AS 'String'); + +-- By only allowing it as an identifier, either bare word: + +SELECT CAST(123 AS String); + +-- Or as a MySQL or PostgreSQL quoted identifiers: + +SELECT CAST(123 AS `String`); +SELECT CAST(123 AS "String"); + +-- While the functional form only allows the type name as a string literal: + +SELECT CAST(123, 'String'); -- works +SELECT CAST(123, String); -- { serverError UNKNOWN_IDENTIFIER } + +-- However, you can cheat: + +SELECT 'String' AS String, CAST(123, String); + + +-- 3. The internal function `_CAST` which is different from `CAST` only by being not dependent on the value of `cast_keep_nullable` setting and other settings. + +-- This is needed when ClickHouse has to persist an expression for future use, like in table definitions, including primary and partition key and other indices. + +-- The function is not intended for being used directly. When a user uses a regular `CAST` operator or function in a table definition, it is transparently converted to `_CAST` to persist its behavior. However, the user can still use the internal version directly: + +SELECT _CAST(x, 'UInt8') AS y, toTypeName(y) FROM VALUES('x Nullable(String)', ('123'), ('456')); + +-- There is no operator form of this function: + +-- does not work, here UInt8 is interpreted as an alias for the value: +SELECT _CAST(123 AS UInt8); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT CAST(123 AS UInt8); -- works + + +-- 4. PostgreSQL-style cast syntax `::` + +SELECT 123::String; + +-- It has a difference from the `CAST` operator: if it is applied to a simple literal value, instead of performing a type conversion, it invokes the SQL parser directly on the corresponding text fragment of the query. The most important case will be the floating-point and decimal types. + +-- In this example, we parse `1.1` as Decimal and not involving any type conversion: + +SELECT 1.1::Decimal(30, 20); + +-- In this example, `1.1` is first parsed as usual, yielding a Float64 value, and then converted to Decimal, producing a wrong result: + +SELECT CAST(1.1 AS Decimal(30, 20)); + +-- We can change this behavior in the future. + +-- Another example: + +SELECT -1::UInt64; -- { serverError CANNOT_PARSE_NUMBER } + +SELECT CAST(-1 AS UInt64); -- conversion with overflow + +-- For composite data types, if a value is a literal, it is parsed directly: + +SELECT [1.1, 2.3]::Array(Decimal(30, 20)); + +-- But if the value contains expressions, the usage of the `::` operator will be equivalent to invoking the CAST operator on the expression: + +SELECT [1.1, 2.3 + 0]::Array(Decimal(30, 20)); + +-- The automatic column name for the result of an application of the `::` operator may be the same as for the result of an application of the CAST operator to a string containing the corresponding fragment of the query or to a corresponding expression: + +SELECT 1.1::Decimal(30, 20), CAST('1.1' AS Decimal(30, 20)), (1+1)::UInt8 FORMAT Vertical; + +-- The operator has the highest priority among others: + +SELECT 1-1::String; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- But one interesting example is the unary minus. Here the minus is not an operator, but part of the numeric literal: + +SELECT -1::String; + +-- Here it is an operator: + +SELECT 1 AS x, -x::String; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + + +-- 5. Accurate casting functions: `accurateCast`, `accurateCastOrNull`, `accurateCastOrDefault`. + +-- These functions check if the value is exactly representable in the target data type. + +-- The function `accurateCast` performs the conversion or throws an exception if the value is not exactly representable: + +SELECT accurateCast(1.123456789, 'Float32'); -- { serverError CANNOT_CONVERT_TYPE } + +-- The function `accurateCastOrNull` always wraps the target type into Nullable, and returns NULL if the value is not exactly representable: + +SELECT accurateCastOrNull(1.123456789, 'Float32'); + +-- The function `accurateCastOrDefault` takes an additional parameter, which must be of the target type, and returns it if the value is not exactly representable: + +SELECT accurateCastOrDefault(-1, 'UInt64', 0::UInt64); + +-- These functions are case-sensitive and there are no corresponding operators: + +SELECT ACCURATECAST(1, 'String'); -- { serverError UNKNOWN_FUNCTION }. + + +-- 6. Explicit conversion functions: + +-- `toString`, `toFixedString`, +-- `toUInt8`, `toUInt16`, `toUInt32`, `toUInt64`, `toUInt128`, `toUInt256`, +-- `toInt8`, `toInt16`, `toInt32`, `toInt64`, `toInt128`, `toInt256`, +-- `toFloat32`, `toFloat64`, +-- `toDecimal32`, `toDecimal64`, `toDecimal128`, `toDecimal256`, +-- `toDate`, `toDate32`, `toDateTime`, `toDateTime64`, +-- `toUUID`, `toIPv4`, `toIPv6`, +-- `toIntervalNanosecond`, `toIntervalMicrosecond`, `toIntervalMillisecond`, +-- `toIntervalSecond`, `toIntervalMinute`, `toIntervalHour`, +-- `toIntervalDay`, `toIntervalWeek`, `toIntervalMonth`, `toIntervalQuarter`, `toIntervalYear` + +-- These functions work under the same rules as the CAST operator, and can be thought as a elementary implementation parts of that operator. They allow implementation defined overflow while converting between numeric types. + +SELECT toUInt8(-1); + +-- These are ClickHouse-native conversion functions. They take an argument with the input value, and for some of data types (`FixedString`, `DateTime`, `DateTime64`, `Decimal`s) the subsequent arguments are constant expressions, defining the parameters of these data types, or the rules to interpret the source value. + +SELECT toFloat64(123); -- no arguments +SELECT toFixedString('Hello', 10) FORMAT TSV; -- the parameter of the FixedString data type, the function returns FixedString(10) +SELECT toFixedString('Hello', 5 + 5) FORMAT TSV; -- it can be a constant expression + +SELECT toDecimal32('123.456', 2); -- the scale of the Decimal data type + +SELECT toDateTime('2024-04-25 01:02:03', 'Europe/Amsterdam'); -- the time zone of DateTime +SELECT toDateTime64('2024-04-25 01:02:03', 6, 'Europe/Amsterdam'); -- the scale of DateTime64 and its time zone + +-- The length of FixedString and the scale of Decimal and DateTime64 types are mandatory arguments, while the time zone of the DateTime data type is optional. + +-- If the time zone is not specified, the time zone of the argument's data type is used, and if the argument is not a date time, the session time zone is used. + +SELECT toDateTime('2024-04-25 01:02:03'); +SELECT toDateTime64('2024-04-25 01:02:03', 6); + +-- Here the time zone can be specified as the rule of interpreration of the value during conversion: + +SELECT toString(now(), 'Europe/Amsterdam'); +SELECT toString(now()); + + +-- 7. SQL-compatibility type-defining operators: + +SELECT DATE '2024-04-25', TIMESTAMP '2024-01-01 02:03:04', INTERVAL 1 MINUTE, INTERVAL '12 hour'; + +-- These operators are interpreted as the corresponding explicit conversion functions. + + +-- 8. SQL-compatibility aliases for explicit conversion functions: + +SELECT DATE('2024-04-25'), TIMESTAMP('2024-01-01 02:03:04'), FROM_UNIXTIME(1234567890); + +-- These functions exist for compatibility with MySQL. They are case-insensive. + +SELECT date '2024-04-25', timeSTAMP('2024-01-01 02:03:04'), From_Unixtime(1234567890); + + +-- 9. Specialized conversion functions: + +-- `parseDateTimeBestEffort`, `parseDateTimeBestEffortUS`, `parseDateTime64BestEffort`, `parseDateTime64BestEffortUS`, `toUnixTimestamp` + +-- These functions are similar to explicit conversion functions, but provide special rules on how the conversion is performed. + +SELECT parseDateTimeBestEffort('25 Apr 1986 1pm'); + + +-- 10. Functions for converting between different components or rounding of date and time data types. + +SELECT toDayOfMonth(toDateTime(1234567890)); + +-- These functions are coverted in a separate topic. From f5a13a023bc1d19b688ec98902411252796f96d2 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 16 Mar 2024 18:58:16 +0100 Subject: [PATCH 257/283] Rename CacheGuard -> CachePriorityGuard (because it is more correct) --- src/Interpreters/Cache/EvictionCandidates.cpp | 2 +- src/Interpreters/Cache/EvictionCandidates.h | 2 +- src/Interpreters/Cache/FileCache.cpp | 6 ++-- src/Interpreters/Cache/FileCache.h | 8 ++--- src/Interpreters/Cache/Guards.h | 22 ++++++------- src/Interpreters/Cache/IFileCachePriority.h | 26 +++++++-------- .../Cache/LRUFileCachePriority.cpp | 26 +++++++-------- src/Interpreters/Cache/LRUFileCachePriority.h | 32 +++++++++---------- src/Interpreters/Cache/QueryLimit.cpp | 12 +++---- src/Interpreters/Cache/QueryLimit.h | 12 +++---- .../Cache/SLRUFileCachePriority.cpp | 24 +++++++------- .../Cache/SLRUFileCachePriority.h | 22 ++++++------- 12 files changed, 97 insertions(+), 97 deletions(-) diff --git a/src/Interpreters/Cache/EvictionCandidates.cpp b/src/Interpreters/Cache/EvictionCandidates.cpp index 7dceab4f95f..f1ae2baa347 100644 --- a/src/Interpreters/Cache/EvictionCandidates.cpp +++ b/src/Interpreters/Cache/EvictionCandidates.cpp @@ -32,7 +32,7 @@ void EvictionCandidates::add(LockedKey & locked_key, const FileSegmentMetadataPt ++candidates_size; } -void EvictionCandidates::evict(FileCacheQueryLimit::QueryContext * query_context, const CacheGuard::Lock & lock) +void EvictionCandidates::evict(FileCacheQueryLimit::QueryContext * query_context, const CachePriorityGuard::Lock & lock) { if (candidates.empty()) return; diff --git a/src/Interpreters/Cache/EvictionCandidates.h b/src/Interpreters/Cache/EvictionCandidates.h index 0557962d97f..e817d33d5fe 100644 --- a/src/Interpreters/Cache/EvictionCandidates.h +++ b/src/Interpreters/Cache/EvictionCandidates.h @@ -11,7 +11,7 @@ public: void add(LockedKey & locked_key, const FileSegmentMetadataPtr & candidate); - void evict(FileCacheQueryLimit::QueryContext * query_context, const CacheGuard::Lock &); + void evict(FileCacheQueryLimit::QueryContext * query_context, const CachePriorityGuard::Lock &); size_t size() const { return candidates_size; } diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index ea40ffcfa3c..65b6a3a172d 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -183,13 +183,13 @@ void FileCache::initialize() is_initialized = true; } -CacheGuard::Lock FileCache::lockCache() const +CachePriorityGuard::Lock FileCache::lockCache() const { ProfileEventTimeIncrement watch(ProfileEvents::FilesystemCacheLockCacheMicroseconds); return cache_guard.lock(); } -CacheGuard::Lock FileCache::tryLockCache(std::optional acquire_timeout) const +CachePriorityGuard::Lock FileCache::tryLockCache(std::optional acquire_timeout) const { return acquire_timeout.has_value() ? cache_guard.tryLockFor(acquire_timeout.value()) : cache_guard.tryLock(); } @@ -706,7 +706,7 @@ KeyMetadata::iterator FileCache::addFileSegment( size_t size, FileSegment::State state, const CreateFileSegmentSettings & create_settings, - const CacheGuard::Lock * lock) + const CachePriorityGuard::Lock * lock) { /// Create a file_segment_metadata and put it in `files` map by [key][offset]. diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 007c4fd9483..8ea5f4dab40 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -173,8 +173,8 @@ public: void deactivateBackgroundOperations(); - CacheGuard::Lock lockCache() const; - CacheGuard::Lock tryLockCache(std::optional acquire_timeout = std::nullopt) const; + CachePriorityGuard::Lock lockCache() const; + CachePriorityGuard::Lock tryLockCache(std::optional acquire_timeout = std::nullopt) const; std::vector sync(); @@ -208,7 +208,7 @@ private: CacheMetadata metadata; FileCachePriorityPtr main_priority; - mutable CacheGuard cache_guard; + mutable CachePriorityGuard cache_guard; struct HitsCountStash { @@ -280,7 +280,7 @@ private: size_t size, FileSegment::State state, const CreateFileSegmentSettings & create_settings, - const CacheGuard::Lock *); + const CachePriorityGuard::Lock *); }; } diff --git a/src/Interpreters/Cache/Guards.h b/src/Interpreters/Cache/Guards.h index 0ac7cb80483..6193ee38755 100644 --- a/src/Interpreters/Cache/Guards.h +++ b/src/Interpreters/Cache/Guards.h @@ -10,17 +10,17 @@ namespace DB * 2. KeyGuard::Lock (hold till the end of the method) * * FileCache::tryReserve - * 1. CacheGuard::Lock + * 1. CachePriorityGuard::Lock * 2. KeyGuard::Lock (taken without metadata lock) * 3. any number of KeyGuard::Lock's for files which are going to be evicted (taken via metadata lock) * * FileCache::removeIfExists - * 1. CacheGuard::Lock + * 1. CachePriorityGuard::Lock * 2. KeyGuard::Lock (taken via metadata lock) * 3. FileSegmentGuard::Lock * * FileCache::removeAllReleasable - * 1. CacheGuard::Lock + * 1. CachePriorityGuard::Lock * 2. any number of KeyGuard::Lock's locks (takken via metadata lock), but at a moment of time only one key lock can be hold * 3. FileSegmentGuard::Lock * @@ -34,23 +34,23 @@ namespace DB * 2. FileSegmentGuard::Lock * * FileSegment::complete - * 1. CacheGuard::Lock + * 1. CachePriorityGuard::Lock * 2. KeyGuard::Lock (taken without metadata lock) * 3. FileSegmentGuard::Lock * * Rules: - * 1. Priority of locking: CacheGuard::Lock > CacheMetadataGuard::Lock > KeyGuard::Lock > FileSegmentGuard::Lock - * 2. If we take more than one key lock at a moment of time, we need to take CacheGuard::Lock (example: tryReserve()) + * 1. Priority of locking: CachePriorityGuard::Lock > CacheMetadataGuard::Lock > KeyGuard::Lock > FileSegmentGuard::Lock + * 2. If we take more than one key lock at a moment of time, we need to take CachePriorityGuard::Lock (example: tryReserve()) * * - * _CacheGuard_ + * _CachePriorityGuard_ * 1. FileCache::tryReserve * 2. FileCache::removeIfExists(key) * 3. FileCache::removeAllReleasable * 4. FileSegment::complete * * _KeyGuard_ _CacheMetadataGuard_ - * 1. all from CacheGuard 1. getOrSet/get/set + * 1. all from CachePriorityGuard 1. getOrSet/get/set * 2. getOrSet/get/Set * * *This table does not include locks taken for introspection and system tables. @@ -59,11 +59,11 @@ namespace DB /** * Cache priority queue guard. */ -struct CacheGuard : private boost::noncopyable +struct CachePriorityGuard : private boost::noncopyable { using Mutex = std::timed_mutex; - /// struct is used (not keyword `using`) to make CacheGuard::Lock non-interchangable with other guards locks - /// so, we wouldn't be able to pass CacheGuard::Lock to a function which accepts KeyGuard::Lock, for example + /// struct is used (not keyword `using`) to make CachePriorityGuard::Lock non-interchangable with other guards locks + /// so, we wouldn't be able to pass CachePriorityGuard::Lock to a function which accepts KeyGuard::Lock, for example struct Lock : public std::unique_lock { using Base = std::unique_lock; diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h index bc036166940..58011780323 100644 --- a/src/Interpreters/Cache/IFileCachePriority.h +++ b/src/Interpreters/Cache/IFileCachePriority.h @@ -43,11 +43,11 @@ public: virtual EntryPtr getEntry() const = 0; - virtual size_t increasePriority(const CacheGuard::Lock &) = 0; + virtual size_t increasePriority(const CachePriorityGuard::Lock &) = 0; virtual void updateSize(int64_t size) = 0; - virtual void remove(const CacheGuard::Lock &) = 0; + virtual void remove(const CachePriorityGuard::Lock &) = 0; virtual void invalidate() = 0; @@ -57,13 +57,13 @@ public: virtual ~IFileCachePriority() = default; - size_t getElementsLimit(const CacheGuard::Lock &) const { return max_elements; } + size_t getElementsLimit(const CachePriorityGuard::Lock &) const { return max_elements; } - size_t getSizeLimit(const CacheGuard::Lock &) const { return max_size; } + size_t getSizeLimit(const CachePriorityGuard::Lock &) const { return max_size; } - virtual size_t getSize(const CacheGuard::Lock &) const = 0; + virtual size_t getSize(const CachePriorityGuard::Lock &) const = 0; - virtual size_t getElementsCount(const CacheGuard::Lock &) const = 0; + virtual size_t getElementsCount(const CachePriorityGuard::Lock &) const = 0; /// Throws exception if there is not enough size to fit it. virtual IteratorPtr add( /// NOLINT @@ -71,7 +71,7 @@ public: size_t offset, size_t size, const UserInfo & user, - const CacheGuard::Lock &, + const CachePriorityGuard::Lock &, bool best_effort = false) = 0; /// `reservee` is the entry for which are reserving now. @@ -79,11 +79,11 @@ public: /// for the corresponding file segment. virtual bool canFit( /// NOLINT size_t size, - const CacheGuard::Lock &, + const CachePriorityGuard::Lock &, IteratorPtr reservee = nullptr, bool best_effort = false) const = 0; - virtual void shuffle(const CacheGuard::Lock &) = 0; + virtual void shuffle(const CachePriorityGuard::Lock &) = 0; struct IPriorityDump { @@ -91,9 +91,9 @@ public: }; using PriorityDumpPtr = std::shared_ptr; - virtual PriorityDumpPtr dump(const CacheGuard::Lock &) = 0; + virtual PriorityDumpPtr dump(const CachePriorityGuard::Lock &) = 0; - using FinalizeEvictionFunc = std::function; + using FinalizeEvictionFunc = std::function; virtual bool collectCandidatesForEviction( size_t size, FileCacheReserveStat & stat, @@ -101,9 +101,9 @@ public: IFileCachePriority::IteratorPtr reservee, FinalizeEvictionFunc & finalize_eviction_func, const UserID & user_id, - const CacheGuard::Lock &) = 0; + const CachePriorityGuard::Lock &) = 0; - virtual bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) = 0; + virtual bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CachePriorityGuard::Lock &) = 0; protected: IFileCachePriority(size_t max_size_, size_t max_elements_); diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index bce03b60024..08e65b577ca 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -43,13 +43,13 @@ IFileCachePriority::IteratorPtr LRUFileCachePriority::add( /// NOLINT size_t offset, size_t size, const UserInfo &, - const CacheGuard::Lock & lock, + const CachePriorityGuard::Lock & lock, bool) { return std::make_shared(add(std::make_shared(key_metadata->key, offset, size, key_metadata), lock)); } -LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(EntryPtr entry, const CacheGuard::Lock & lock) +LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(EntryPtr entry, const CachePriorityGuard::Lock & lock) { if (entry->size == 0) { @@ -93,7 +93,7 @@ LRUFileCachePriority::LRUIterator LRUFileCachePriority::add(EntryPtr entry, cons return LRUIterator(this, iterator); } -LRUFileCachePriority::LRUQueue::iterator LRUFileCachePriority::remove(LRUQueue::iterator it, const CacheGuard::Lock &) +LRUFileCachePriority::LRUQueue::iterator LRUFileCachePriority::remove(LRUQueue::iterator it, const CachePriorityGuard::Lock &) { /// If size is 0, entry is invalidated, current_elements_num was already updated. const auto & entry = **it; @@ -150,7 +150,7 @@ bool LRUFileCachePriority::LRUIterator::operator ==(const LRUIterator & other) c return cache_priority == other.cache_priority && iterator == other.iterator; } -void LRUFileCachePriority::iterate(IterateFunc && func, const CacheGuard::Lock & lock) +void LRUFileCachePriority::iterate(IterateFunc && func, const CachePriorityGuard::Lock & lock) { for (auto it = queue.begin(); it != queue.end();) { @@ -201,7 +201,7 @@ void LRUFileCachePriority::iterate(IterateFunc && func, const CacheGuard::Lock & bool LRUFileCachePriority::canFit( /// NOLINT size_t size, - const CacheGuard::Lock & lock, + const CachePriorityGuard::Lock & lock, IteratorPtr, bool) const { @@ -212,7 +212,7 @@ bool LRUFileCachePriority::canFit( size_t size, size_t released_size_assumption, size_t released_elements_assumption, - const CacheGuard::Lock &) const + const CachePriorityGuard::Lock &) const { return (max_size == 0 || (state->current_size + size - released_size_assumption <= max_size)) && (max_elements == 0 || state->current_elements_num + 1 - released_elements_assumption <= max_elements); @@ -225,7 +225,7 @@ bool LRUFileCachePriority::collectCandidatesForEviction( IFileCachePriority::IteratorPtr, FinalizeEvictionFunc &, const UserID &, - const CacheGuard::Lock & lock) + const CachePriorityGuard::Lock & lock) { if (canFit(size, lock)) return true; @@ -264,7 +264,7 @@ bool LRUFileCachePriority::collectCandidatesForEviction( return can_fit(); } -LRUFileCachePriority::LRUIterator LRUFileCachePriority::move(LRUIterator & it, LRUFileCachePriority & other, const CacheGuard::Lock &) +LRUFileCachePriority::LRUIterator LRUFileCachePriority::move(LRUIterator & it, LRUFileCachePriority & other, const CachePriorityGuard::Lock &) { const auto & entry = *it.getEntry(); if (entry.size == 0) @@ -297,7 +297,7 @@ LRUFileCachePriority::LRUIterator LRUFileCachePriority::move(LRUIterator & it, L return LRUIterator(this, it.iterator); } -IFileCachePriority::PriorityDumpPtr LRUFileCachePriority::dump(const CacheGuard::Lock & lock) +IFileCachePriority::PriorityDumpPtr LRUFileCachePriority::dump(const CachePriorityGuard::Lock & lock) { std::vector res; iterate([&](LockedKey &, const FileSegmentMetadataPtr & segment_metadata) @@ -309,7 +309,7 @@ IFileCachePriority::PriorityDumpPtr LRUFileCachePriority::dump(const CacheGuard: } bool LRUFileCachePriority::modifySizeLimits( - size_t max_size_, size_t max_elements_, double /* size_ratio_ */, const CacheGuard::Lock & lock) + size_t max_size_, size_t max_elements_, double /* size_ratio_ */, const CachePriorityGuard::Lock & lock) { if (max_size == max_size_ && max_elements == max_elements_) return false; /// Nothing to change. @@ -353,7 +353,7 @@ bool LRUFileCachePriority::modifySizeLimits( return true; } -void LRUFileCachePriority::LRUIterator::remove(const CacheGuard::Lock & lock) +void LRUFileCachePriority::LRUIterator::remove(const CachePriorityGuard::Lock & lock) { assertValid(); cache_priority->remove(iterator, lock); @@ -389,7 +389,7 @@ void LRUFileCachePriority::LRUIterator::updateSize(int64_t size) entry->size += size; } -size_t LRUFileCachePriority::LRUIterator::increasePriority(const CacheGuard::Lock &) +size_t LRUFileCachePriority::LRUIterator::increasePriority(const CachePriorityGuard::Lock &) { assertValid(); cache_priority->queue.splice(cache_priority->queue.end(), cache_priority->queue, iterator); @@ -402,7 +402,7 @@ void LRUFileCachePriority::LRUIterator::assertValid() const throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to use invalid iterator"); } -void LRUFileCachePriority::shuffle(const CacheGuard::Lock &) +void LRUFileCachePriority::shuffle(const CachePriorityGuard::Lock &) { std::vector its; its.reserve(queue.size()); diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index a74a4b8b621..dcd4ee0a24c 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -24,13 +24,13 @@ protected: public: LRUFileCachePriority(size_t max_size_, size_t max_elements_, StatePtr state_ = nullptr); - size_t getSize(const CacheGuard::Lock &) const override { return state->current_size; } + size_t getSize(const CachePriorityGuard::Lock &) const override { return state->current_size; } - size_t getElementsCount(const CacheGuard::Lock &) const override { return state->current_elements_num; } + size_t getElementsCount(const CachePriorityGuard::Lock &) const override { return state->current_elements_num; } bool canFit( /// NOLINT size_t size, - const CacheGuard::Lock &, + const CachePriorityGuard::Lock &, IteratorPtr reservee = nullptr, bool best_effort = false) const override; @@ -39,7 +39,7 @@ public: size_t offset, size_t size, const UserInfo & user, - const CacheGuard::Lock &, + const CachePriorityGuard::Lock &, bool best_effort = false) override; bool collectCandidatesForEviction( @@ -49,9 +49,9 @@ public: IFileCachePriority::IteratorPtr reservee, FinalizeEvictionFunc & finalize_eviction_func, const UserID & user_id, - const CacheGuard::Lock &) override; + const CachePriorityGuard::Lock &) override; - void shuffle(const CacheGuard::Lock &) override; + void shuffle(const CachePriorityGuard::Lock &) override; struct LRUPriorityDump : public IPriorityDump { @@ -59,11 +59,11 @@ public: explicit LRUPriorityDump(const std::vector & infos_) : infos(infos_) {} void merge(const LRUPriorityDump & other) { infos.insert(infos.end(), other.infos.begin(), other.infos.end()); } }; - PriorityDumpPtr dump(const CacheGuard::Lock &) override; + PriorityDumpPtr dump(const CachePriorityGuard::Lock &) override; - void pop(const CacheGuard::Lock & lock) { remove(queue.begin(), lock); } + void pop(const CachePriorityGuard::Lock & lock) { remove(queue.begin(), lock); } - bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) override; + bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CachePriorityGuard::Lock &) override; private: class LRUIterator; @@ -77,9 +77,9 @@ private: void updateElementsCount(int64_t num); void updateSize(int64_t size); - bool canFit(size_t size, size_t released_size_assumption, size_t released_elements_assumption, const CacheGuard::Lock &) const; + bool canFit(size_t size, size_t released_size_assumption, size_t released_elements_assumption, const CachePriorityGuard::Lock &) const; - LRUQueue::iterator remove(LRUQueue::iterator it, const CacheGuard::Lock &); + LRUQueue::iterator remove(LRUQueue::iterator it, const CachePriorityGuard::Lock &); enum class IterationResult { @@ -88,10 +88,10 @@ private: REMOVE_AND_CONTINUE, }; using IterateFunc = std::function; - void iterate(IterateFunc && func, const CacheGuard::Lock &); + void iterate(IterateFunc && func, const CachePriorityGuard::Lock &); - LRUIterator move(LRUIterator & it, LRUFileCachePriority & other, const CacheGuard::Lock &); - LRUIterator add(EntryPtr entry, const CacheGuard::Lock &); + LRUIterator move(LRUIterator & it, LRUFileCachePriority & other, const CachePriorityGuard::Lock &); + LRUIterator add(EntryPtr entry, const CachePriorityGuard::Lock &); }; class LRUFileCachePriority::LRUIterator : public IFileCachePriority::Iterator @@ -108,9 +108,9 @@ public: EntryPtr getEntry() const override { return *iterator; } - size_t increasePriority(const CacheGuard::Lock &) override; + size_t increasePriority(const CachePriorityGuard::Lock &) override; - void remove(const CacheGuard::Lock &) override; + void remove(const CachePriorityGuard::Lock &) override; void invalidate() override; diff --git a/src/Interpreters/Cache/QueryLimit.cpp b/src/Interpreters/Cache/QueryLimit.cpp index ba4f5017772..9421005dc92 100644 --- a/src/Interpreters/Cache/QueryLimit.cpp +++ b/src/Interpreters/Cache/QueryLimit.cpp @@ -16,7 +16,7 @@ static bool isQueryInitialized() && !CurrentThread::getQueryId().empty(); } -FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::tryGetQueryContext(const CacheGuard::Lock &) +FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::tryGetQueryContext(const CachePriorityGuard::Lock &) { if (!isQueryInitialized()) return nullptr; @@ -25,7 +25,7 @@ FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::tryGetQueryContext(con return (query_iter == query_map.end()) ? nullptr : query_iter->second; } -void FileCacheQueryLimit::removeQueryContext(const std::string & query_id, const CacheGuard::Lock &) +void FileCacheQueryLimit::removeQueryContext(const std::string & query_id, const CachePriorityGuard::Lock &) { auto query_iter = query_map.find(query_id); if (query_iter == query_map.end()) @@ -41,7 +41,7 @@ void FileCacheQueryLimit::removeQueryContext(const std::string & query_id, const FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::getOrSetQueryContext( const std::string & query_id, const ReadSettings & settings, - const CacheGuard::Lock &) + const CachePriorityGuard::Lock &) { if (query_id.empty()) return nullptr; @@ -70,7 +70,7 @@ void FileCacheQueryLimit::QueryContext::add( size_t offset, size_t size, const FileCache::UserInfo & user, - const CacheGuard::Lock & lock) + const CachePriorityGuard::Lock & lock) { auto it = getPriority().add(key_metadata, offset, size, user, lock); auto [_, inserted] = records.emplace(FileCacheKeyAndOffset{key_metadata->key, offset}, it); @@ -87,7 +87,7 @@ void FileCacheQueryLimit::QueryContext::add( void FileCacheQueryLimit::QueryContext::remove( const Key & key, size_t offset, - const CacheGuard::Lock & lock) + const CachePriorityGuard::Lock & lock) { auto record = records.find({key, offset}); if (record == records.end()) @@ -100,7 +100,7 @@ void FileCacheQueryLimit::QueryContext::remove( IFileCachePriority::IteratorPtr FileCacheQueryLimit::QueryContext::tryGet( const Key & key, size_t offset, - const CacheGuard::Lock &) + const CachePriorityGuard::Lock &) { auto it = records.find({key, offset}); if (it == records.end()) diff --git a/src/Interpreters/Cache/QueryLimit.h b/src/Interpreters/Cache/QueryLimit.h index 419126601f0..7553eff82ba 100644 --- a/src/Interpreters/Cache/QueryLimit.h +++ b/src/Interpreters/Cache/QueryLimit.h @@ -13,14 +13,14 @@ public: class QueryContext; using QueryContextPtr = std::shared_ptr; - QueryContextPtr tryGetQueryContext(const CacheGuard::Lock & lock); + QueryContextPtr tryGetQueryContext(const CachePriorityGuard::Lock & lock); QueryContextPtr getOrSetQueryContext( const std::string & query_id, const ReadSettings & settings, - const CacheGuard::Lock &); + const CachePriorityGuard::Lock &); - void removeQueryContext(const std::string & query_id, const CacheGuard::Lock &); + void removeQueryContext(const std::string & query_id, const CachePriorityGuard::Lock &); class QueryContext { @@ -38,19 +38,19 @@ public: Priority::IteratorPtr tryGet( const Key & key, size_t offset, - const CacheGuard::Lock &); + const CachePriorityGuard::Lock &); void add( KeyMetadataPtr key_metadata, size_t offset, size_t size, const FileCacheUserInfo & user, - const CacheGuard::Lock &); + const CachePriorityGuard::Lock &); void remove( const Key & key, size_t offset, - const CacheGuard::Lock &); + const CachePriorityGuard::Lock &); private: using Records = std::unordered_map; diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index 43f1c1012ba..1767cb94be7 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -34,19 +34,19 @@ SLRUFileCachePriority::SLRUFileCachePriority( probationary_queue.max_size, protected_queue.max_elements); } -size_t SLRUFileCachePriority::getSize(const CacheGuard::Lock & lock) const +size_t SLRUFileCachePriority::getSize(const CachePriorityGuard::Lock & lock) const { return protected_queue.getSize(lock) + probationary_queue.getSize(lock); } -size_t SLRUFileCachePriority::getElementsCount(const CacheGuard::Lock & lock) const +size_t SLRUFileCachePriority::getElementsCount(const CachePriorityGuard::Lock & lock) const { return protected_queue.getElementsCount(lock) + probationary_queue.getElementsCount(lock); } bool SLRUFileCachePriority::canFit( /// NOLINT size_t size, - const CacheGuard::Lock & lock, + const CachePriorityGuard::Lock & lock, IteratorPtr reservee, bool best_effort) const { @@ -70,7 +70,7 @@ IFileCachePriority::IteratorPtr SLRUFileCachePriority::add( /// NOLINT size_t offset, size_t size, const UserInfo &, - const CacheGuard::Lock & lock, + const CachePriorityGuard::Lock & lock, bool is_startup) { if (is_startup) @@ -103,7 +103,7 @@ bool SLRUFileCachePriority::collectCandidatesForEviction( IFileCachePriority::IteratorPtr reservee, FinalizeEvictionFunc & finalize_eviction_func, const UserID & user_id, - const CacheGuard::Lock & lock) + const CachePriorityGuard::Lock & lock) { /// If `it` is nullptr, then it is the first space reservation attempt /// for a corresponding file segment, so it will be directly put into probationary queue. @@ -143,7 +143,7 @@ bool SLRUFileCachePriority::collectCandidatesForEviction( && !probationary_queue.collectCandidatesForEviction(size_to_downgrade, stat, res, reservee, noop, user_id, lock)) return false; - finalize_eviction_func = [=, this](const CacheGuard::Lock & lk) mutable + finalize_eviction_func = [=, this](const CachePriorityGuard::Lock & lk) mutable { for (const auto & [key, key_candidates] : *downgrade_candidates) { @@ -159,7 +159,7 @@ bool SLRUFileCachePriority::collectCandidatesForEviction( return true; } -void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const CacheGuard::Lock & lock) +void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const CachePriorityGuard::Lock & lock) { /// If entry is already in protected queue, /// we only need to increase its priority within the protected queue. @@ -242,7 +242,7 @@ void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const Cach iterator.is_protected = true; } -IFileCachePriority::PriorityDumpPtr SLRUFileCachePriority::dump(const CacheGuard::Lock & lock) +IFileCachePriority::PriorityDumpPtr SLRUFileCachePriority::dump(const CachePriorityGuard::Lock & lock) { auto res = dynamic_pointer_cast(probationary_queue.dump(lock)); auto part_res = dynamic_pointer_cast(protected_queue.dump(lock)); @@ -250,14 +250,14 @@ IFileCachePriority::PriorityDumpPtr SLRUFileCachePriority::dump(const CacheGuard return res; } -void SLRUFileCachePriority::shuffle(const CacheGuard::Lock & lock) +void SLRUFileCachePriority::shuffle(const CachePriorityGuard::Lock & lock) { protected_queue.shuffle(lock); probationary_queue.shuffle(lock); } bool SLRUFileCachePriority::modifySizeLimits( - size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock & lock) + size_t max_size_, size_t max_elements_, double size_ratio_, const CachePriorityGuard::Lock & lock) { if (max_size == max_size_ && max_elements == max_elements_ && size_ratio == size_ratio_) return false; /// Nothing to change. @@ -287,7 +287,7 @@ SLRUFileCachePriority::EntryPtr SLRUFileCachePriority::SLRUIterator::getEntry() return entry; } -size_t SLRUFileCachePriority::SLRUIterator::increasePriority(const CacheGuard::Lock & lock) +size_t SLRUFileCachePriority::SLRUIterator::increasePriority(const CachePriorityGuard::Lock & lock) { assertValid(); cache_priority->increasePriority(*this, lock); @@ -306,7 +306,7 @@ void SLRUFileCachePriority::SLRUIterator::invalidate() lru_iterator.invalidate(); } -void SLRUFileCachePriority::SLRUIterator::remove(const CacheGuard::Lock & lock) +void SLRUFileCachePriority::SLRUIterator::remove(const CachePriorityGuard::Lock & lock) { assertValid(); lru_iterator.remove(lock); diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h index d97fa80a6c7..d81ce1bc480 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.h +++ b/src/Interpreters/Cache/SLRUFileCachePriority.h @@ -21,13 +21,13 @@ public: LRUFileCachePriority::StatePtr probationary_state_ = nullptr, LRUFileCachePriority::StatePtr protected_state_ = nullptr); - size_t getSize(const CacheGuard::Lock & lock) const override; + size_t getSize(const CachePriorityGuard::Lock & lock) const override; - size_t getElementsCount(const CacheGuard::Lock &) const override; + size_t getElementsCount(const CachePriorityGuard::Lock &) const override; bool canFit( /// NOLINT size_t size, - const CacheGuard::Lock &, + const CachePriorityGuard::Lock &, IteratorPtr reservee = nullptr, bool best_effort = false) const override; @@ -36,7 +36,7 @@ public: size_t offset, size_t size, const UserInfo & user, - const CacheGuard::Lock &, + const CachePriorityGuard::Lock &, bool is_startup = false) override; bool collectCandidatesForEviction( @@ -46,13 +46,13 @@ public: IFileCachePriority::IteratorPtr reservee, FinalizeEvictionFunc & finalize_eviction_func, const UserID & user_id, - const CacheGuard::Lock &) override; + const CachePriorityGuard::Lock &) override; - void shuffle(const CacheGuard::Lock &) override; + void shuffle(const CachePriorityGuard::Lock &) override; - PriorityDumpPtr dump(const CacheGuard::Lock &) override; + PriorityDumpPtr dump(const CachePriorityGuard::Lock &) override; - bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CacheGuard::Lock &) override; + bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CachePriorityGuard::Lock &) override; private: double size_ratio; @@ -60,7 +60,7 @@ private: LRUFileCachePriority probationary_queue; LoggerPtr log = getLogger("SLRUFileCachePriority"); - void increasePriority(SLRUIterator & iterator, const CacheGuard::Lock & lock); + void increasePriority(SLRUIterator & iterator, const CachePriorityGuard::Lock & lock); }; class SLRUFileCachePriority::SLRUIterator : public IFileCachePriority::Iterator @@ -74,9 +74,9 @@ public: EntryPtr getEntry() const override; - size_t increasePriority(const CacheGuard::Lock &) override; + size_t increasePriority(const CachePriorityGuard::Lock &) override; - void remove(const CacheGuard::Lock &) override; + void remove(const CachePriorityGuard::Lock &) override; void invalidate() override; From 3fcede709e39db1525c8ed58bc6deb39814edd83 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Mar 2024 19:03:42 +0100 Subject: [PATCH 258/283] Fix error --- .../0_stateless/03011_definitive_guide_to_cast.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql index 771123b153a..7cc662eee29 100644 --- a/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql +++ b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql @@ -212,12 +212,12 @@ SELECT toDateTime64('2024-04-25 01:02:03', 6, 'Europe/Amsterdam'); -- the scale -- If the time zone is not specified, the time zone of the argument's data type is used, and if the argument is not a date time, the session time zone is used. SELECT toDateTime('2024-04-25 01:02:03'); -SELECT toDateTime64('2024-04-25 01:02:03', 6); +SELECT toDateTime64('2024-04-25 01:02:03.456789', 6); --- Here the time zone can be specified as the rule of interpreration of the value during conversion: +-- Here the time zone can be specified as the rule of interpretation of the value during conversion: -SELECT toString(now(), 'Europe/Amsterdam'); -SELECT toString(now()); +SELECT toString(1710612085::DateTime, 'America/Los_Angeles'); +SELECT toString(1710612085::DateTime); -- 7. SQL-compatibility type-defining operators: From acd4e49dc2cbc61d03cc1177af40c5c0ce11243d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Mar 2024 19:35:56 +0100 Subject: [PATCH 259/283] More guides --- .../03011_definitive_guide_to_cast.reference | 31 ++++-- .../03011_definitive_guide_to_cast.sql | 97 ++++++++++++++++++- 2 files changed, 118 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference b/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference index f8f37fa7807..f20fe0b7425 100644 --- a/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference +++ b/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference @@ -8,11 +8,23 @@ ['Hello','wo\'rld\\'] Hello wo\\\'rld\\\\ wo\'rld\\ wo\\\'rld\\\\ -123 -123 -123 +133 210 +210 +[123,456] 1 -1 [] [] Array(Nothing) Array(Array(Array(Tuple(UInt64, String)))) +1970-01-01 01:00:00 +2009-02-14 00:31:30.123456 +1970-01-01 00:59:59.888889 +2009-02-14 00:31:30 +1970-01-01 01:00:00 +2299-12-31 23:59:59.000000 +2009-02-14 +2009-02-14 +123\0\0 +123 +123 +123 123 123 123 @@ -46,9 +58,16 @@ Hello\0\0\0\0\0 2024-04-25 01:02:03 2024-04-25 01:02:03.000000 2024-04-25 01:02:03 -2024-04-25 01:02:03.000000 -2024-03-16 16:22:33 -2024-03-16 16:22:33 +2024-04-25 01:02:03.456789 +2024-03-16 11:01:25 +2024-03-16 19:01:25 +123 \N \N \N +123 0 0 0 +Nullable(UInt8) UInt8 +123 +123 +123 +\N 2024-04-25 2024-01-01 02:03:04 1 12 2024-04-25 2024-01-01 02:03:04.000000 2009-02-14 00:31:30 2024-04-25 2024-01-01 02:03:04.000000 2009-02-14 00:31:30 diff --git a/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql index 7cc662eee29..91487666a54 100644 --- a/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql +++ b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql @@ -39,11 +39,25 @@ SELECT CAST($$['Hello', 'wo\'rld\\']$$ AS Array(String)); SELECT arrayJoin(CAST($$['Hello', 'wo\'rld\\']$$ AS Array(String))) AS x, CAST($$wo\'rld\\$$ AS FixedString(9)) AS y; --- The operator is case-insensitive: +-- As conversion from String is similar to direct parsing rather than conversion from other types, +-- it can be stricter for numbers by not tolerating overflows in some cases: -SELECT CAST(123 AS String); -SELECT cast(123 AS String); -SELECT Cast(123 AS String); +SELECT CAST(-123 AS UInt8), CAST(1234 AS UInt8); + +SELECT CAST('-123' AS UInt8); -- { serverError CANNOT_PARSE_NUMBER } + +-- In some cases it still allows overflows, but it is implementation defined: + +SELECT CAST('1234' AS UInt8); + +-- Parsing from a string does not tolerate extra whitespace characters: + +SELECT CAST(' 123' AS UInt8); -- { serverError CANNOT_PARSE_TEXT } +SELECT CAST('123 ' AS UInt8); -- { serverError CANNOT_PARSE_TEXT } + +-- But for composite data types, it involves a more featured parser, that take care of whitespace inside the data structures: + +SELECT CAST('[ 123 ,456, ]' AS Array(UInt16)); -- Conversion from a floating point value to an integer will involve truncation towards zero: @@ -55,6 +69,54 @@ SELECT CAST(1.9, 'Int64'), CAST(-1.9, 'Int64'); SELECT [] AS x, CAST(x AS Array(Array(Array(Tuple(UInt64, String))))) AS y, toTypeName(x), toTypeName(y); +-- Conversion between numbers and DateTime/Date data types interprets the number as the number of seconds/days from the Unix epoch, +-- where Unix epoch starts from 1970-01-01T00:00:00Z (the midnight of Gregorian year 1970 in UTC), +-- and the number of seconds don't count the coordination seconds, as in Unix. + +-- For example, it is 1 AM in Amsterdam: + +SELECT CAST(0 AS DateTime('Europe/Amsterdam')); + +-- The numbers can be fractional and negative (for DateTime64): + +SELECT CAST(1234567890.123456 AS DateTime64(6, 'Europe/Amsterdam')); +SELECT CAST(-0.111111 AS DateTime64(6, 'Europe/Amsterdam')); + +-- If the result does not fit in the range of the corresponding time data types, it is truncated and saturated to the boundaries: + +SELECT CAST(1234567890.123456 AS DateTime('Europe/Amsterdam')); +SELECT CAST(-1 AS DateTime('Europe/Amsterdam')); + +SELECT CAST(1e20 AS DateTime64(6, 'Europe/Amsterdam')); + +-- A special case is DateTime64(9) - the maximum resolution, where is does not cover the usual range, +-- and in this case, it throws an exception on overflow (I don't mind if we change this behavior in the future): + + SELECT CAST(1e20 AS DateTime64(9, 'Europe/Amsterdam')); -- { serverError DECIMAL_OVERFLOW } + +-- If a number is converted to a Date data type, the value is interpreted as the number of days since the Unix epoch, +-- but if the number is larger than the range of the data type, it is interpreted as a unix timestamp +-- (the number of seconds since the Unix epoch), similarly how it is done for the DateTime data type, +-- for convenience (while the internal representation of Date is the number of days, +-- often people want the unix timestamp to be also parsed into the Date data type): + +SELECT CAST(14289 AS Date); +SELECT CAST(1234567890 AS Date); + +-- When converting to a FixedString, if the length of the result data type is larger than the value, the result is padded with zero bytes: + +SELECT CAST('123' AS FixedString(5)) FORMAT TSV; + +-- But if it does not fit, an exception is thrown: + +SELECT CAST('12345' AS FixedString(3)) FORMAT TSV; -- { serverError TOO_LARGE_STRING_SIZE } + +-- The operator is case-insensitive: + +SELECT CAST(123 AS String); +SELECT cast(123 AS String); +SELECT Cast(123 AS String); + -- 2. The functional form of this operator: `CAST(value, 'Type')`: @@ -219,6 +281,33 @@ SELECT toDateTime64('2024-04-25 01:02:03.456789', 6); SELECT toString(1710612085::DateTime, 'America/Los_Angeles'); SELECT toString(1710612085::DateTime); +-- Functions converting to numeric types, date and datetime, IP and UUID, also have versions with -OrNull and -OrZero fallbacks, +-- that don't throw exceptions on parsing errors. +-- They use the same rules to the accurateCast operator: + +SELECT toUInt8OrNull('123'), toUInt8OrNull('-123'), toUInt8OrNull('1234'), toUInt8OrNull(' 123'); +SELECT toUInt8OrZero('123'), toUInt8OrZero('-123'), toUInt8OrZero('1234'), toUInt8OrZero(' 123'); + +SELECT toTypeName(toUInt8OrNull('123')), toTypeName(toUInt8OrZero('123')); + +-- These functions are only applicable to string data types. +-- Although it is a room for extension: + +SELECT toUInt8OrNull(123); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +-- String and FixedString work: + +SELECT toUInt8OrNull(123::FixedString(3)); + +-- For the FixedString data type trailing zero bytes are allowed, because they are the padding for FixedString: + +SELECT toUInt8OrNull('123'::FixedString(4)); +SELECT toUInt8OrNull('123\0'::FixedString(4)); + +-- While for String, they don't: + +SELECT toUInt8OrNull('123\0'); + -- 7. SQL-compatibility type-defining operators: From 2d8676d61a58925ace94e4e71893c93fee9551c9 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Mar 2024 19:38:32 +0100 Subject: [PATCH 260/283] Update 03011_definitive_guide_to_cast.sql --- .../03011_definitive_guide_to_cast.sql | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql index 91487666a54..335b97c9db6 100644 --- a/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql +++ b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql @@ -7,7 +7,7 @@ SET session_timezone = 'Europe/Amsterdam'; SELECT CAST(123 AS String); --- It convert between various data types, including parameterized data types +-- It converts between various data types, including parameterized data types SELECT CAST(1234567890 AS DateTime('Europe/Amsterdam')); @@ -15,7 +15,7 @@ SELECT CAST(1234567890 AS DateTime('Europe/Amsterdam')); SELECT CAST('[1, 2, 3]' AS Array(UInt8)); --- It's return type depends on the setting `cast_keep_nullable`. If it is enabled, if the source argument type is Nullable, the resulting data type will be also Nullable, even if it is not written explicitly: +-- Its return type depends on the setting `cast_keep_nullable`. If it is enabled, if the source argument type is Nullable, the resulting data type will be also Nullable, even if it is not written explicitly: SET cast_keep_nullable = 1; SELECT CAST(x AS UInt8) AS y, toTypeName(y) FROM VALUES('x Nullable(String)', ('123'), ('NULL')); @@ -25,7 +25,7 @@ SELECT CAST(x AS UInt8) AS y, toTypeName(y) FROM VALUES('x Nullable(String)', (' -- There are various type conversion rules, some worth noting. --- Conversion between numeric types can involve implementation defined overflow: +-- Conversion between numeric types can involve implementation-defined overflow: SELECT CAST(257 AS UInt8); SELECT CAST(-1 AS UInt8); @@ -35,7 +35,7 @@ SELECT CAST(-1 AS UInt8); SELECT CAST($$['Hello', 'wo\'rld\\']$$ AS Array(String)); -- ' --- While for simple data types it does not interpret escape sequences: +-- While for simple data types, it does not interpret escape sequences: SELECT arrayJoin(CAST($$['Hello', 'wo\'rld\\']$$ AS Array(String))) AS x, CAST($$wo\'rld\\$$ AS FixedString(9)) AS y; @@ -159,9 +159,9 @@ SELECT 'String' AS String, CAST(123, String); -- 3. The internal function `_CAST` which is different from `CAST` only by being not dependent on the value of `cast_keep_nullable` setting and other settings. --- This is needed when ClickHouse has to persist an expression for future use, like in table definitions, including primary and partition key and other indices. +-- This is needed when ClickHouse has to persist an expression for future use, like in table definitions, including primary and partition keys and other indices. --- The function is not intended for being used directly. When a user uses a regular `CAST` operator or function in a table definition, it is transparently converted to `_CAST` to persist its behavior. However, the user can still use the internal version directly: +-- The function is not intended to be used directly. When a user uses a regular `CAST` operator or function in a table definition, it is transparently converted to `_CAST` to persist its behavior. However, the user can still use the internal version directly: SELECT _CAST(x, 'UInt8') AS y, toTypeName(y) FROM VALUES('x Nullable(String)', ('123'), ('456')); @@ -179,7 +179,7 @@ SELECT 123::String; -- It has a difference from the `CAST` operator: if it is applied to a simple literal value, instead of performing a type conversion, it invokes the SQL parser directly on the corresponding text fragment of the query. The most important case will be the floating-point and decimal types. --- In this example, we parse `1.1` as Decimal and not involving any type conversion: +-- In this example, we parse `1.1` as Decimal and do not involve any type conversion: SELECT 1.1::Decimal(30, 20); @@ -211,7 +211,7 @@ SELECT 1.1::Decimal(30, 20), CAST('1.1' AS Decimal(30, 20)), (1+1)::UInt8 FORMAT SELECT 1-1::String; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } --- But one interesting example is the unary minus. Here the minus is not an operator, but part of the numeric literal: +-- But one interesting example is the unary minus. Here the minus is not an operator but part of the numeric literal: SELECT -1::String; @@ -236,7 +236,7 @@ SELECT accurateCastOrNull(1.123456789, 'Float32'); SELECT accurateCastOrDefault(-1, 'UInt64', 0::UInt64); --- These functions are case-sensitive and there are no corresponding operators: +-- These functions are case-sensitive, and there are no corresponding operators: SELECT ACCURATECAST(1, 'String'); -- { serverError UNKNOWN_FUNCTION }. @@ -254,11 +254,11 @@ SELECT ACCURATECAST(1, 'String'); -- { serverError UNKNOWN_FUNCTION }. -- `toIntervalSecond`, `toIntervalMinute`, `toIntervalHour`, -- `toIntervalDay`, `toIntervalWeek`, `toIntervalMonth`, `toIntervalQuarter`, `toIntervalYear` --- These functions work under the same rules as the CAST operator, and can be thought as a elementary implementation parts of that operator. They allow implementation defined overflow while converting between numeric types. +-- These functions work under the same rules as the CAST operator and can be thought as elementary implementation parts of that operator. They allow implementation-defined overflow while converting between numeric types. SELECT toUInt8(-1); --- These are ClickHouse-native conversion functions. They take an argument with the input value, and for some of data types (`FixedString`, `DateTime`, `DateTime64`, `Decimal`s) the subsequent arguments are constant expressions, defining the parameters of these data types, or the rules to interpret the source value. +-- These are ClickHouse-native conversion functions. They take an argument with the input value, and for some of the data types (`FixedString`, `DateTime`, `DateTime64`, `Decimal`s), the subsequent arguments are constant expressions, defining the parameters of these data types, or the rules to interpret the source value. SELECT toFloat64(123); -- no arguments SELECT toFixedString('Hello', 10) FORMAT TSV; -- the parameter of the FixedString data type, the function returns FixedString(10) @@ -276,7 +276,7 @@ SELECT toDateTime64('2024-04-25 01:02:03', 6, 'Europe/Amsterdam'); -- the scale SELECT toDateTime('2024-04-25 01:02:03'); SELECT toDateTime64('2024-04-25 01:02:03.456789', 6); --- Here the time zone can be specified as the rule of interpretation of the value during conversion: +-- Here, the time zone can be specified as the rule of interpretation of the value during conversion: SELECT toString(1710612085::DateTime, 'America/Los_Angeles'); SELECT toString(1710612085::DateTime); @@ -320,7 +320,7 @@ SELECT DATE '2024-04-25', TIMESTAMP '2024-01-01 02:03:04', INTERVAL 1 MINUTE, IN SELECT DATE('2024-04-25'), TIMESTAMP('2024-01-01 02:03:04'), FROM_UNIXTIME(1234567890); --- These functions exist for compatibility with MySQL. They are case-insensive. +-- These functions exist for compatibility with MySQL. They are case-insensitive. SELECT date '2024-04-25', timeSTAMP('2024-01-01 02:03:04'), From_Unixtime(1234567890); @@ -329,7 +329,7 @@ SELECT date '2024-04-25', timeSTAMP('2024-01-01 02:03:04'), From_Unixtime(123456 -- `parseDateTimeBestEffort`, `parseDateTimeBestEffortUS`, `parseDateTime64BestEffort`, `parseDateTime64BestEffortUS`, `toUnixTimestamp` --- These functions are similar to explicit conversion functions, but provide special rules on how the conversion is performed. +-- These functions are similar to explicit conversion functions but provide special rules on how the conversion is performed. SELECT parseDateTimeBestEffort('25 Apr 1986 1pm'); @@ -338,4 +338,4 @@ SELECT parseDateTimeBestEffort('25 Apr 1986 1pm'); SELECT toDayOfMonth(toDateTime(1234567890)); --- These functions are coverted in a separate topic. +-- These functions are covered in a separate topic. From 175efee0891223c6242f34e6bf08301a625ef407 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Mar 2024 19:50:11 +0100 Subject: [PATCH 261/283] More guides --- .../0_stateless/03011_definitive_guide_to_cast.reference | 2 ++ tests/queries/0_stateless/03011_definitive_guide_to_cast.sql | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference b/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference index f20fe0b7425..b71011d75ff 100644 --- a/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference +++ b/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference @@ -61,6 +61,8 @@ Hello\0\0\0\0\0 2024-04-25 01:02:03.456789 2024-03-16 11:01:25 2024-03-16 19:01:25 +2024-03-16 19:01:25 +2024-03-16 11:01:25 123 \N \N \N 123 0 0 0 Nullable(UInt8) UInt8 diff --git a/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql index 335b97c9db6..e79572e1ddb 100644 --- a/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql +++ b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql @@ -281,6 +281,11 @@ SELECT toDateTime64('2024-04-25 01:02:03.456789', 6); SELECT toString(1710612085::DateTime, 'America/Los_Angeles'); SELECT toString(1710612085::DateTime); +-- In the case when the time zone is not the part of the resulting data type, but a rule of interpretation of the source value, +-- it can be non-constant. Let's clarify: in this example, the resulting data type is a String; it does not have a time zone parameter: + +SELECT toString(1710612085::DateTime, tz) FROM Values('tz String', 'Europe/Amsterdam', 'America/Los_Angeles'); + -- Functions converting to numeric types, date and datetime, IP and UUID, also have versions with -OrNull and -OrZero fallbacks, -- that don't throw exceptions on parsing errors. -- They use the same rules to the accurateCast operator: From 65eb6f135c19459ace827035dd518b371c64ee49 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Mar 2024 19:52:10 +0100 Subject: [PATCH 262/283] More guides --- tests/queries/0_stateless/03011_definitive_guide_to_cast.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql index e79572e1ddb..76819ad8313 100644 --- a/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql +++ b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql @@ -55,7 +55,7 @@ SELECT CAST('1234' AS UInt8); SELECT CAST(' 123' AS UInt8); -- { serverError CANNOT_PARSE_TEXT } SELECT CAST('123 ' AS UInt8); -- { serverError CANNOT_PARSE_TEXT } --- But for composite data types, it involves a more featured parser, that take care of whitespace inside the data structures: +-- But for composite data types, it involves a more featured parser, that takes care of whitespace inside the data structures: SELECT CAST('[ 123 ,456, ]' AS Array(UInt16)); From e5f15b6ac4894f32c7dd82091a3ee8260ef05f86 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Mar 2024 23:00:57 +0100 Subject: [PATCH 263/283] Fix errors --- src/Functions/FunctionsConversion.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 7f130b0cc86..0deb2960d34 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -364,9 +364,9 @@ struct ToDateTime64TransformUnsigned { static constexpr auto name = "toDateTime64"; - const DateTime64::NativeType scale_multiplier = 1; + const DateTime64::NativeType scale_multiplier; - ToDateTime64TransformUnsigned(UInt32 scale = 0) /// NOLINT + ToDateTime64TransformUnsigned(UInt32 scale) /// NOLINT : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) {} @@ -388,9 +388,9 @@ struct ToDateTime64TransformSigned { static constexpr auto name = "toDateTime64"; - const DateTime64::NativeType scale_multiplier = 1; + const DateTime64::NativeType scale_multiplier; - ToDateTime64TransformSigned(UInt32 scale = 0) /// NOLINT + ToDateTime64TransformSigned(UInt32 scale) /// NOLINT : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) {} @@ -412,9 +412,9 @@ struct ToDateTime64TransformFloat { static constexpr auto name = "toDateTime64"; - const UInt32 scale = 1; + const UInt32 scale; - ToDateTime64TransformFloat(UInt32 scale_ = 0) /// NOLINT + ToDateTime64TransformFloat(UInt32 scale_) /// NOLINT : scale(scale_) {} @@ -439,7 +439,7 @@ struct FromDateTime64Transform { static constexpr auto name = Transform::name; - const DateTime64::NativeType scale_multiplier = 1; + const DateTime64::NativeType scale_multiplier; FromDateTime64Transform(UInt32 scale) /// NOLINT : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) @@ -456,9 +456,9 @@ struct ToDateTime64Transform { static constexpr auto name = "toDateTime64"; - const DateTime64::NativeType scale_multiplier = 1; + const DateTime64::NativeType scale_multiplier; - ToDateTime64Transform(UInt32 scale = 0) /// NOLINT + ToDateTime64Transform(UInt32 scale) /// NOLINT : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) {} @@ -1309,14 +1309,14 @@ struct ConvertImpl && std::is_same_v) { return DateTimeTransformImpl, false>::template execute( - arguments, result_type, input_rows_count); + arguments, result_type, input_rows_count, additions); } else if constexpr (std::is_same_v && std::is_same_v && std::is_same_v) { return DateTimeTransformImpl, false>::template execute( - arguments, result_type, input_rows_count); + arguments, result_type, input_rows_count, additions); } else if constexpr (( std::is_same_v @@ -1325,7 +1325,7 @@ struct ConvertImpl && std::is_same_v) { return DateTimeTransformImpl, false>::template execute( - arguments, result_type, input_rows_count); + arguments, result_type, input_rows_count, additions); } /// Conversion of DateTime64 to Date or DateTime: discards fractional part. else if constexpr (std::is_same_v @@ -1351,7 +1351,7 @@ struct ConvertImpl && std::is_same_v) { return DateTimeTransformImpl::template execute( - arguments, result_type, input_rows_count); + arguments, result_type, input_rows_count, additions); } else if constexpr (IsDataTypeDateOrDateTime && std::is_same_v) From f1be9e67070a25362e4e67ce78540ce95cc4e952 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 00:17:59 +0100 Subject: [PATCH 264/283] Trash --- src/Functions/FunctionsConversion.cpp | 94 ++++++++++----------------- 1 file changed, 34 insertions(+), 60 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 0deb2960d34..088076eeeb4 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -181,12 +181,12 @@ struct ToDateTimeImpl /// Implementation of toDate function. -template +template struct ToDateTransform32Or64 { static constexpr auto name = "toDate"; - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + static NO_SANITIZE_UNDEFINED UInt16 execute(const FromType & from, const DateLUTImpl & time_zone) { if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) { @@ -203,12 +203,12 @@ struct ToDateTransform32Or64 }; -template +template struct ToDateTransform32Or64Signed { static constexpr auto name = "toDate"; - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + static NO_SANITIZE_UNDEFINED Int32 execute(const FromType & from, const DateLUTImpl & time_zone) { // TODO: decide narrow or extended range based on FromType if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) @@ -222,17 +222,17 @@ struct ToDateTransform32Or64Signed return 0; } return (from <= DATE_LUT_MAX_DAY_NUM) - ? static_cast(from) + ? static_cast(from) : time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATE_TIMESTAMP))); } }; -template +template struct ToDateTransform8Or16Signed { static constexpr auto name = "toDate"; - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + static NO_SANITIZE_UNDEFINED UInt16 execute(const FromType & from, const DateLUTImpl &) { if (from < 0) { @@ -247,15 +247,15 @@ struct ToDateTransform8Or16Signed /// Implementation of toDate32 function. -template +template struct ToDate32Transform32Or64 { static constexpr auto name = "toDate32"; - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + static NO_SANITIZE_UNDEFINED Int32 execute(const FromType & from, const DateLUTImpl & time_zone) { if (from < DATE_LUT_MAX_EXTEND_DAY_NUM) - return static_cast(from); + return static_cast(from); else { if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) @@ -268,12 +268,12 @@ struct ToDate32Transform32Or64 } }; -template +template struct ToDate32Transform32Or64Signed { static constexpr auto name = "toDate32"; - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl & time_zone) + static NO_SANITIZE_UNDEFINED Int32 execute(const FromType & from, const DateLUTImpl & time_zone) { static const Int32 daynum_min_offset = -static_cast(time_zone.getDayNumOffsetEpoch()); @@ -287,17 +287,17 @@ struct ToDate32Transform32Or64Signed return daynum_min_offset; return (from < DATE_LUT_MAX_EXTEND_DAY_NUM) - ? static_cast(from) + ? static_cast(from) : time_zone.toDayNum(std::min(time_t(Int64(from)), time_t(MAX_DATETIME64_TIMESTAMP))); } }; -template +template struct ToDate32Transform8Or16Signed { static constexpr auto name = "toDate32"; - static NO_SANITIZE_UNDEFINED ToType execute(const FromType & from, const DateLUTImpl &) + static NO_SANITIZE_UNDEFINED Int32 execute(const FromType & from, const DateLUTImpl &) { return from; } @@ -383,6 +383,7 @@ struct ToDateTime64TransformUnsigned return DecimalUtils::decimalFromComponentsWithMultiplier(std::min(from, MAX_DATETIME64_TIMESTAMP), 0, scale_multiplier); } }; + template struct ToDateTime64TransformSigned { @@ -407,6 +408,7 @@ struct ToDateTime64TransformSigned return DecimalUtils::decimalFromComponentsWithMultiplier(from, 0, scale_multiplier); } }; + template struct ToDateTime64TransformFloat { @@ -432,26 +434,6 @@ struct ToDateTime64TransformFloat } }; -/** Conversion of DateTime64 to Date or DateTime: discards fractional part. - */ -template -struct FromDateTime64Transform -{ - static constexpr auto name = Transform::name; - - const DateTime64::NativeType scale_multiplier; - - FromDateTime64Transform(UInt32 scale) /// NOLINT - : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) - {} - - auto execute(DateTime64::NativeType dt, const DateLUTImpl & time_zone) const - { - const auto c = DecimalUtils::splitWithScaleMultiplier(DateTime64(dt), scale_multiplier); - return Transform::execute(static_cast(c.whole), time_zone); - } -}; - struct ToDateTime64Transform { static constexpr auto name = "toDateTime64"; @@ -972,7 +954,7 @@ struct ConvertThroughParsing size_t next_offset = std::is_same_v ? (*offsets)[i] : (current_offset + fixed_string_size); size_t string_size = std::is_same_v ? next_offset - current_offset - 1 : fixed_string_size; - ReadBufferFromMemory read_buffer(&(*chars)[current_offset], string_size); + ReadBufferFromMemory read_buffer(chars->data() + current_offset, string_size); if constexpr (exception_mode == ConvertFromStringExceptionMode::Throw) { @@ -1087,23 +1069,15 @@ struct ConvertThroughParsing parsed = SerializationDecimal::tryReadText( vec_to[i], read_buffer, ToDataType::maxPrecision(), col_to->getScale()); } + else if (std::is_same_v && std::is_same_v + && fixed_string_size == IPV6_BINARY_LENGTH) + { + readBinary(vec_to[i], read_buffer); + parsed = true; + } else { - /// we want to utilize constexpr condition here, which is not mixable with value comparison - do - { - if constexpr (std::is_same_v && std::is_same_v) - { - if (fixed_string_size == IPV6_BINARY_LENGTH) - { - readBinary(vec_to[i], read_buffer); - parsed = true; - break; - } - } - - parsed = tryParseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); - } while (false); + parsed = tryParseImpl(vec_to[i], read_buffer, local_time_zone, precise_float_parsing); } } @@ -1220,7 +1194,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::template execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (( @@ -1229,7 +1203,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::template execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (( @@ -1240,7 +1214,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::template execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (( @@ -1249,7 +1223,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::template execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (( @@ -1258,7 +1232,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::template execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (( @@ -1269,7 +1243,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::template execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } /// Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to DateTime. @@ -2475,7 +2449,7 @@ public: } template - ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, UInt32 scale = 0) const + ColumnPtr executeInternal(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, UInt32 scale) const { const IDataType * from_type = arguments[0].type.get(); @@ -2510,7 +2484,7 @@ public: if (scale == 0) { - result_column = executeInternal(arguments, result_type, input_rows_count); + result_column = executeInternal(arguments, result_type, input_rows_count, 0); } else { @@ -2519,7 +2493,7 @@ public: } else { - result_column = executeInternal(arguments, result_type, input_rows_count); + result_column = executeInternal(arguments, result_type, input_rows_count, 0); } if (!result_column) From ced06cb163bdf9280cde5ee49f9ab3eb506228e3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 00:18:38 +0100 Subject: [PATCH 265/283] More guides --- .../03011_definitive_guide_to_cast.reference | 4 ++++ .../0_stateless/03011_definitive_guide_to_cast.sql | 12 +++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference b/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference index b71011d75ff..7c875a24b6d 100644 --- a/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference +++ b/tests/queries/0_stateless/03011_definitive_guide_to_cast.reference @@ -50,6 +50,8 @@ CAST(plus(1, 1), 'UInt8'): 2 -1 \N 0 +0 +1970-01-01 01:00:00 255 123 Hello\0\0\0\0\0 @@ -65,6 +67,8 @@ Hello\0\0\0\0\0 2024-03-16 11:01:25 123 \N \N \N 123 0 0 0 +123 10 10 10 +123 0 0 0 Nullable(UInt8) UInt8 123 123 diff --git a/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql index 76819ad8313..708db0adce0 100644 --- a/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql +++ b/tests/queries/0_stateless/03011_definitive_guide_to_cast.sql @@ -236,6 +236,14 @@ SELECT accurateCastOrNull(1.123456789, 'Float32'); SELECT accurateCastOrDefault(-1, 'UInt64', 0::UInt64); +-- If this parameter is omitted, it is assumed to be the default value of the corresponding data type: + +SELECT accurateCastOrDefault(-1, 'UInt64'); +SELECT accurateCastOrDefault(-1, 'DateTime'); + +-- Unfortunately, this does not work as expected: SELECT accurateCastOrDefault(-1, $$Enum8('None' = 1, 'Hello' = 2, 'World' = 3)$$); +-- https://github.com/ClickHouse/ClickHouse/issues/61495 + -- These functions are case-sensitive, and there are no corresponding operators: SELECT ACCURATECAST(1, 'String'); -- { serverError UNKNOWN_FUNCTION }. @@ -286,12 +294,14 @@ SELECT toString(1710612085::DateTime); SELECT toString(1710612085::DateTime, tz) FROM Values('tz String', 'Europe/Amsterdam', 'America/Los_Angeles'); --- Functions converting to numeric types, date and datetime, IP and UUID, also have versions with -OrNull and -OrZero fallbacks, +-- Functions converting to numeric types, date and datetime, IP and UUID, also have versions with -OrNull, -OrZero, and -OrDefault fallbacks, -- that don't throw exceptions on parsing errors. -- They use the same rules to the accurateCast operator: SELECT toUInt8OrNull('123'), toUInt8OrNull('-123'), toUInt8OrNull('1234'), toUInt8OrNull(' 123'); SELECT toUInt8OrZero('123'), toUInt8OrZero('-123'), toUInt8OrZero('1234'), toUInt8OrZero(' 123'); +SELECT toUInt8OrDefault('123', 10), toUInt8OrDefault('-123', 10), toUInt8OrDefault('1234', 10), toUInt8OrDefault(' 123', 10); +SELECT toUInt8OrDefault('123'), toUInt8OrDefault('-123'), toUInt8OrDefault('1234'), toUInt8OrDefault(' 123'); SELECT toTypeName(toUInt8OrNull('123')), toTypeName(toUInt8OrZero('123')); From d627fbef557422cff10dda9f890d6b805fda5a19 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 00:26:07 +0100 Subject: [PATCH 266/283] Fix errors --- src/Functions/FunctionsConversion.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 088076eeeb4..ada7e4ac58d 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -208,7 +208,7 @@ struct ToDateTransform32Or64Signed { static constexpr auto name = "toDate"; - static NO_SANITIZE_UNDEFINED Int32 execute(const FromType & from, const DateLUTImpl & time_zone) + static NO_SANITIZE_UNDEFINED UInt16 execute(const FromType & from, const DateLUTImpl & time_zone) { // TODO: decide narrow or extended range based on FromType if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) @@ -222,7 +222,7 @@ struct ToDateTransform32Or64Signed return 0; } return (from <= DATE_LUT_MAX_DAY_NUM) - ? static_cast(from) + ? static_cast(from) : time_zone.toDayNum(std::min(time_t(from), time_t(MAX_DATE_TIMESTAMP))); } }; @@ -255,7 +255,9 @@ struct ToDate32Transform32Or64 static NO_SANITIZE_UNDEFINED Int32 execute(const FromType & from, const DateLUTImpl & time_zone) { if (from < DATE_LUT_MAX_EXTEND_DAY_NUM) + { return static_cast(from); + } else { if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) @@ -1214,7 +1216,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::template execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } else if constexpr (( @@ -1243,7 +1245,7 @@ struct ConvertImpl && std::is_same_v && std::is_same_v) { - return DateTimeTransformImpl, false>::template execute( + return DateTimeTransformImpl, false>::template execute( arguments, result_type, input_rows_count); } /// Special case of converting Int8, Int16, Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to DateTime. From fac040e08eda9ad958785965f45ebc038155b2de Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 00:34:32 +0100 Subject: [PATCH 267/283] Add documentation --- src/Functions/castOrDefault.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 57cb03e0349..ac04a883c11 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -329,7 +329,18 @@ REGISTER_FUNCTION(CastOrDefault) factory.registerFunction("toUInt64OrDefault", [](ContextPtr context){ return std::make_unique( std::make_shared(context, "toUInt64OrDefault", std::make_shared())); }); factory.registerFunction("toUInt128OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toUInt128OrDefault", std::make_shared())); }); + std::make_shared(context, "toUInt128OrDefault", std::make_shared())); }, + FunctionDocumentation{ + .description=R"( +Converts a string in the first argument of the function to UInt128 by parsing it. +If it cannot parse the value, returns the default value, which can be provided as the second function argument, and if provided, must be of UInt128 type. +If the default value is not provided in the second argument, it is assumed to be zero. +)", + .examples{ + {"Successful conversion", "SELECT toUInt128OrDefault('1', 2::UInt128)", "1"}, + {"Default value", "SELECT toUInt128OrDefault('upyachka', 123456789012345678901234567890::UInt128)", "123456789012345678901234567890"}}, + .categories{"ConversionFunctions"} + }); factory.registerFunction("toUInt256OrDefault", [](ContextPtr context){ return std::make_unique( std::make_shared(context, "toUInt256OrDefault", std::make_shared())); }); From 96f53df7ada476384b0665f52a7c22cd33ce2415 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 12:02:02 +0100 Subject: [PATCH 268/283] Fix error --- src/Functions/castOrDefault.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index ac04a883c11..c858dfdd589 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -221,7 +221,7 @@ private: size_t scale = 0; std::string time_zone; - if (isDecimal(type)) + if (isDecimal(type) || isDateTime64(type)) { const auto & scale_argument = arguments[additional_argument_index]; @@ -338,7 +338,8 @@ If the default value is not provided in the second argument, it is assumed to be )", .examples{ {"Successful conversion", "SELECT toUInt128OrDefault('1', 2::UInt128)", "1"}, - {"Default value", "SELECT toUInt128OrDefault('upyachka', 123456789012345678901234567890::UInt128)", "123456789012345678901234567890"}}, + {"Default value", "SELECT toUInt128OrDefault('upyachka', 123456789012345678901234567890::UInt128)", "123456789012345678901234567890"}, + {"Implicit default value", "SELECT toUInt128OrDefault('upyachka')", "0"}}, .categories{"ConversionFunctions"} }); factory.registerFunction("toUInt256OrDefault", [](ContextPtr context){ return std::make_unique( From 921ec12192fc2461bc828653bd9c3900daee74c8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 12:29:45 +0100 Subject: [PATCH 269/283] Maybe not worse --- src/Functions/FunctionsConversion.cpp | 86 ++++++++------------------- 1 file changed, 24 insertions(+), 62 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index ada7e4ac58d..0d245f809bb 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -4776,67 +4777,6 @@ arguments, result_type, input_rows_count); \ } }; -class MonotonicityHelper -{ -public: - using MonotonicityForRange = FunctionCast::MonotonicityForRange; - - template - static auto monotonicityForType(const DataType * const) - { - return FunctionTo::Type::Monotonic::get; - } - - static MonotonicityForRange getMonotonicityInformation(const DataTypePtr & from_type, const IDataType * to_type) - { - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (isEnum(from_type)) - { - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - if (const auto * type = checkAndGetDataType(to_type)) - return monotonicityForType(type); - } - /// other types like Null, FixedString, Array and Tuple have no monotonicity defined - return {}; - } -}; - } @@ -4853,7 +4793,29 @@ FunctionBasePtr createFunctionBaseCast( for (size_t i = 0; i < arguments.size(); ++i) data_types[i] = arguments[i].type; - auto monotonicity = MonotonicityHelper::getMonotonicityInformation(arguments.front().type, return_type.get()); + FunctionCast::MonotonicityForRange monotonicity; + + if (isEnum(arguments.front().type) + && castTypeToEither(return_type.get(), [&](auto & type) + { + monotonicity = FunctionTo>::Type::Monotonic::get; + return true; + })) + { + } + else if (castTypeToEither< + DataTypeUInt8, DataTypeUInt16, DataTypeUInt32, DataTypeUInt64, DataTypeUInt128, DataTypeUInt256, + DataTypeInt8, DataTypeInt16, DataTypeInt32, DataTypeInt64, DataTypeInt128, DataTypeInt256, + DataTypeFloat32, DataTypeFloat64, + DataTypeDate, DataTypeDate32, DataTypeDateTime, + DataTypeString>(return_type.get(), [&](auto & type) + { + monotonicity = FunctionTo>::Type::Monotonic::get; + return true; + })) + { + } + return std::make_unique(context, name, std::move(monotonicity), data_types, return_type, diagnostic, cast_type); } From 2af6d35752e064bfd7859b23a52948c222f7c716 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 12:53:59 +0100 Subject: [PATCH 270/283] Less crap --- src/Functions/FunctionFactory.cpp | 12 +++ src/Functions/FunctionFactory.h | 7 ++ src/Functions/array/emptyArray.cpp | 3 +- src/Functions/castOrDefault.cpp | 100 ++++++++++----------- src/Functions/coverage.cpp | 6 +- src/Functions/currentProfiles.cpp | 6 +- src/Functions/fromUnixTimestamp64Micro.cpp | 3 +- src/Functions/fromUnixTimestamp64Milli.cpp | 3 +- src/Functions/fromUnixTimestamp64Nano.cpp | 3 +- src/Functions/snowflake.cpp | 12 +-- src/Functions/toUnixTimestamp64Micro.cpp | 3 +- src/Functions/toUnixTimestamp64Milli.cpp | 3 +- src/Functions/toUnixTimestamp64Nano.cpp | 3 +- 13 files changed, 86 insertions(+), 78 deletions(-) diff --git a/src/Functions/FunctionFactory.cpp b/src/Functions/FunctionFactory.cpp index 6a7274376b9..004ef745a93 100644 --- a/src/Functions/FunctionFactory.cpp +++ b/src/Functions/FunctionFactory.cpp @@ -49,6 +49,18 @@ void FunctionFactory::registerFunction( } } +void FunctionFactory::registerFunction( + const std::string & name, + FunctionSimpleCreator creator, + FunctionDocumentation doc, + CaseSensitiveness case_sensitiveness) +{ + registerFunction(name, [my_creator = std::move(creator)](ContextPtr context) + { + return std::make_unique(my_creator(context)); + }, std::move(doc), std::move(case_sensitiveness)); +} + FunctionOverloadResolverPtr FunctionFactory::getImpl( const std::string & name, diff --git a/src/Functions/FunctionFactory.h b/src/Functions/FunctionFactory.h index 588cae64e16..c2fd34f0488 100644 --- a/src/Functions/FunctionFactory.h +++ b/src/Functions/FunctionFactory.h @@ -17,6 +17,7 @@ namespace DB { using FunctionCreator = std::function; +using FunctionSimpleCreator = std::function; using FunctionFactoryData = std::pair; /** Creates function by name. @@ -66,6 +67,12 @@ public: FunctionDocumentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive); + void registerFunction( + const std::string & name, + FunctionSimpleCreator creator, + FunctionDocumentation doc = {}, + CaseSensitiveness case_sensitiveness = CaseSensitive); + FunctionDocumentation getDocumentation(const std::string & name) const; private: diff --git a/src/Functions/array/emptyArray.cpp b/src/Functions/array/emptyArray.cpp index 684f8af162a..77c191b3adc 100644 --- a/src/Functions/array/emptyArray.cpp +++ b/src/Functions/array/emptyArray.cpp @@ -49,8 +49,7 @@ private: void registerFunction(FunctionFactory & factory, const String & element_type) { factory.registerFunction(FunctionEmptyArray::getNameImpl(element_type), - [element_type](ContextPtr){ return std::make_unique( - std::make_shared(element_type)); }); + [element_type](ContextPtr){ return std::make_shared(element_type); }); } } diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index c858dfdd589..b5653fca1e9 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -320,16 +320,16 @@ REGISTER_FUNCTION(CastOrDefault) { factory.registerFunction(); - factory.registerFunction("toUInt8OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toUInt8OrDefault", std::make_shared())); }); - factory.registerFunction("toUInt16OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toUInt16OrDefault", std::make_shared())); }); - factory.registerFunction("toUInt32OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toUInt32OrDefault", std::make_shared())); }); - factory.registerFunction("toUInt64OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toUInt64OrDefault", std::make_shared())); }); - factory.registerFunction("toUInt128OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toUInt128OrDefault", std::make_shared())); }, + factory.registerFunction("toUInt8OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toUInt8OrDefault", std::make_shared()); }); + factory.registerFunction("toUInt16OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toUInt16OrDefault", std::make_shared()); }); + factory.registerFunction("toUInt32OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toUInt32OrDefault", std::make_shared()); }); + factory.registerFunction("toUInt64OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toUInt64OrDefault", std::make_shared()); }); + factory.registerFunction("toUInt128OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toUInt128OrDefault", std::make_shared()); }, FunctionDocumentation{ .description=R"( Converts a string in the first argument of the function to UInt128 by parsing it. @@ -342,51 +342,51 @@ If the default value is not provided in the second argument, it is assumed to be {"Implicit default value", "SELECT toUInt128OrDefault('upyachka')", "0"}}, .categories{"ConversionFunctions"} }); - factory.registerFunction("toUInt256OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toUInt256OrDefault", std::make_shared())); }); + factory.registerFunction("toUInt256OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toUInt256OrDefault", std::make_shared()); }); - factory.registerFunction("toInt8OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toInt8OrDefault", std::make_shared())); }); - factory.registerFunction("toInt16OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toInt16OrDefault", std::make_shared())); }); - factory.registerFunction("toInt32OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toInt32OrDefault", std::make_shared())); }); - factory.registerFunction("toInt64OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toInt64OrDefault", std::make_shared())); }); - factory.registerFunction("toInt128OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toInt128OrDefault", std::make_shared())); }); - factory.registerFunction("toInt256OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toInt256OrDefault", std::make_shared())); }); + factory.registerFunction("toInt8OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toInt8OrDefault", std::make_shared()); }); + factory.registerFunction("toInt16OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toInt16OrDefault", std::make_shared()); }); + factory.registerFunction("toInt32OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toInt32OrDefault", std::make_shared()); }); + factory.registerFunction("toInt64OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toInt64OrDefault", std::make_shared()); }); + factory.registerFunction("toInt128OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toInt128OrDefault", std::make_shared()); }); + factory.registerFunction("toInt256OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toInt256OrDefault", std::make_shared()); }); - factory.registerFunction("toFloat32OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toFloat32OrDefault", std::make_shared())); }); - factory.registerFunction("toFloat64OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toFloat64OrDefault", std::make_shared())); }); + factory.registerFunction("toFloat32OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toFloat32OrDefault", std::make_shared()); }); + factory.registerFunction("toFloat64OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toFloat64OrDefault", std::make_shared()); }); - factory.registerFunction("toDateOrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toDateOrDefault", std::make_shared())); }); - factory.registerFunction("toDate32OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toDate32OrDefault", std::make_shared())); }); - factory.registerFunction("toDateTimeOrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toDateTimeOrDefault", std::make_shared())); }); - factory.registerFunction("toDateTime64OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toDateTime64OrDefault", std::make_shared(3 /* default scale */))); }); + factory.registerFunction("toDateOrDefault", [](ContextPtr context){ + return std::make_shared(context, "toDateOrDefault", std::make_shared()); }); + factory.registerFunction("toDate32OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toDate32OrDefault", std::make_shared()); }); + factory.registerFunction("toDateTimeOrDefault", [](ContextPtr context){ + return std::make_shared(context, "toDateTimeOrDefault", std::make_shared()); }); + factory.registerFunction("toDateTime64OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toDateTime64OrDefault", std::make_shared(3 /* default scale */)); }); - factory.registerFunction("toDecimal32OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toDecimal32OrDefault", createDecimalMaxPrecision(0))); }); - factory.registerFunction("toDecimal64OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toDecimal64OrDefault", createDecimalMaxPrecision(0))); }); - factory.registerFunction("toDecimal128OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toDecimal128OrDefault", createDecimalMaxPrecision(0))); }); - factory.registerFunction("toDecimal256OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toDecimal256OrDefault", createDecimalMaxPrecision(0))); }); + factory.registerFunction("toDecimal32OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toDecimal32OrDefault", createDecimalMaxPrecision(0)); }); + factory.registerFunction("toDecimal64OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toDecimal64OrDefault", createDecimalMaxPrecision(0)); }); + factory.registerFunction("toDecimal128OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toDecimal128OrDefault", createDecimalMaxPrecision(0)); }); + factory.registerFunction("toDecimal256OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toDecimal256OrDefault", createDecimalMaxPrecision(0)); }); - factory.registerFunction("toUUIDOrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toUUIDOrDefault", std::make_shared())); }); - factory.registerFunction("toIPv4OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toIPv4OrDefault", std::make_shared())); }); - factory.registerFunction("toIPv6OrDefault", [](ContextPtr context){ return std::make_unique( - std::make_shared(context, "toIPv6OrDefault", std::make_shared())); }); + factory.registerFunction("toUUIDOrDefault", [](ContextPtr context){ + return std::make_shared(context, "toUUIDOrDefault", std::make_shared()); }); + factory.registerFunction("toIPv4OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toIPv4OrDefault", std::make_shared()); }); + factory.registerFunction("toIPv6OrDefault", [](ContextPtr context){ + return std::make_shared(context, "toIPv6OrDefault", std::make_shared()); }); } } diff --git a/src/Functions/coverage.cpp b/src/Functions/coverage.cpp index a1a43d0cf58..97f807e22b7 100644 --- a/src/Functions/coverage.cpp +++ b/src/Functions/coverage.cpp @@ -93,7 +93,7 @@ public: REGISTER_FUNCTION(Coverage) { - factory.registerFunction("coverageCurrent", [](ContextPtr){ return std::make_unique(std::make_shared(Kind::Current)); }, + factory.registerFunction("coverageCurrent", [](ContextPtr){ return std::make_shared(Kind::Current); }, FunctionDocumentation { .description=R"( @@ -124,7 +124,7 @@ See https://clang.llvm.org/docs/SanitizerCoverage.html for more information. .categories{"Introspection"} }); - factory.registerFunction("coverageCumulative", [](ContextPtr){ return std::make_unique(std::make_shared(Kind::Cumulative)); }, + factory.registerFunction("coverageCumulative", [](ContextPtr){ return std::make_shared(Kind::Cumulative); }, FunctionDocumentation { .description=R"( @@ -140,7 +140,7 @@ See the `coverageCurrent` function for the details. .categories{"Introspection"} }); - factory.registerFunction("coverageAll", [](ContextPtr){ return std::make_unique(std::make_shared(Kind::All)); }, + factory.registerFunction("coverageAll", [](ContextPtr){ return std::make_shared(Kind::All); }, FunctionDocumentation { .description=R"( diff --git a/src/Functions/currentProfiles.cpp b/src/Functions/currentProfiles.cpp index 77c8a20ccee..8f14943e011 100644 --- a/src/Functions/currentProfiles.cpp +++ b/src/Functions/currentProfiles.cpp @@ -98,9 +98,9 @@ namespace REGISTER_FUNCTION(Profiles) { - factory.registerFunction("currentProfiles", [](ContextPtr context){ return std::make_unique(std::make_shared(context, Kind::currentProfiles)); }); - factory.registerFunction("enabledProfiles", [](ContextPtr context){ return std::make_unique(std::make_shared(context, Kind::enabledProfiles)); }); - factory.registerFunction("defaultProfiles", [](ContextPtr context){ return std::make_unique(std::make_shared(context, Kind::defaultProfiles)); }); + factory.registerFunction("currentProfiles", [](ContextPtr context){ return std::make_shared(context, Kind::currentProfiles); }); + factory.registerFunction("enabledProfiles", [](ContextPtr context){ return std::make_shared(context, Kind::enabledProfiles); }); + factory.registerFunction("defaultProfiles", [](ContextPtr context){ return std::make_shared(context, Kind::defaultProfiles); }); } } diff --git a/src/Functions/fromUnixTimestamp64Micro.cpp b/src/Functions/fromUnixTimestamp64Micro.cpp index 191e2137a0d..d96e0232335 100644 --- a/src/Functions/fromUnixTimestamp64Micro.cpp +++ b/src/Functions/fromUnixTimestamp64Micro.cpp @@ -7,8 +7,7 @@ namespace DB REGISTER_FUNCTION(FromUnixTimestamp64Micro) { factory.registerFunction("fromUnixTimestamp64Micro", - [](ContextPtr context){ return std::make_unique( - std::make_shared(6, "fromUnixTimestamp64Micro", context)); }); + [](ContextPtr context){ return std::make_shared(6, "fromUnixTimestamp64Micro", context); }); } } diff --git a/src/Functions/fromUnixTimestamp64Milli.cpp b/src/Functions/fromUnixTimestamp64Milli.cpp index c6d4fcd30a2..aa77e8043c1 100644 --- a/src/Functions/fromUnixTimestamp64Milli.cpp +++ b/src/Functions/fromUnixTimestamp64Milli.cpp @@ -7,8 +7,7 @@ namespace DB REGISTER_FUNCTION(FromUnixTimestamp64Milli) { factory.registerFunction("fromUnixTimestamp64Milli", - [](ContextPtr context){ return std::make_unique( - std::make_shared(3, "fromUnixTimestamp64Milli", context)); }); + [](ContextPtr context){ return std::make_shared(3, "fromUnixTimestamp64Milli", context); }); } } diff --git a/src/Functions/fromUnixTimestamp64Nano.cpp b/src/Functions/fromUnixTimestamp64Nano.cpp index 2b5a7addbfc..f9d69219933 100644 --- a/src/Functions/fromUnixTimestamp64Nano.cpp +++ b/src/Functions/fromUnixTimestamp64Nano.cpp @@ -7,8 +7,7 @@ namespace DB REGISTER_FUNCTION(FromUnixTimestamp64Nano) { factory.registerFunction("fromUnixTimestamp64Nano", - [](ContextPtr context){ return std::make_unique( - std::make_shared(9, "fromUnixTimestamp64Nano", context)); }); + [](ContextPtr context){ return std::make_shared(9, "fromUnixTimestamp64Nano", context); }); } } diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index f2dd1f1c51d..4a2d502a31a 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -249,28 +249,24 @@ public: REGISTER_FUNCTION(DateTimeToSnowflake) { factory.registerFunction("dateTimeToSnowflake", - [](ContextPtr){ return std::make_unique( - std::make_shared("dateTimeToSnowflake")); }); + [](ContextPtr){ return std::make_shared("dateTimeToSnowflake"); }); } REGISTER_FUNCTION(DateTime64ToSnowflake) { factory.registerFunction("dateTime64ToSnowflake", - [](ContextPtr){ return std::make_unique( - std::make_shared("dateTime64ToSnowflake")); }); + [](ContextPtr){ return std::make_shared("dateTime64ToSnowflake"); }); } REGISTER_FUNCTION(SnowflakeToDateTime) { factory.registerFunction("snowflakeToDateTime", - [](ContextPtr context){ return std::make_unique( - std::make_shared("snowflakeToDateTime", context)); }); + [](ContextPtr context){ return std::make_shared("snowflakeToDateTime", context); }); } REGISTER_FUNCTION(SnowflakeToDateTime64) { factory.registerFunction("snowflakeToDateTime64", - [](ContextPtr context){ return std::make_unique( - std::make_shared("snowflakeToDateTime64", context)); }); + [](ContextPtr context){ return std::make_shared("snowflakeToDateTime64", context); }); } } diff --git a/src/Functions/toUnixTimestamp64Micro.cpp b/src/Functions/toUnixTimestamp64Micro.cpp index fd35e2a7a73..964ad5a2c18 100644 --- a/src/Functions/toUnixTimestamp64Micro.cpp +++ b/src/Functions/toUnixTimestamp64Micro.cpp @@ -7,8 +7,7 @@ namespace DB REGISTER_FUNCTION(ToUnixTimestamp64Micro) { factory.registerFunction("toUnixTimestamp64Micro", - [](ContextPtr){ return std::make_unique( - std::make_shared(6, "toUnixTimestamp64Micro")); }); + [](ContextPtr){ return std::make_shared(6, "toUnixTimestamp64Micro"); }); } } diff --git a/src/Functions/toUnixTimestamp64Milli.cpp b/src/Functions/toUnixTimestamp64Milli.cpp index e6a680f941a..bc92a6d1fe3 100644 --- a/src/Functions/toUnixTimestamp64Milli.cpp +++ b/src/Functions/toUnixTimestamp64Milli.cpp @@ -7,8 +7,7 @@ namespace DB REGISTER_FUNCTION(ToUnixTimestamp64Milli) { factory.registerFunction("toUnixTimestamp64Milli", - [](ContextPtr){ return std::make_unique( - std::make_shared(3, "toUnixTimestamp64Milli")); }); + [](ContextPtr){ return std::make_shared(3, "toUnixTimestamp64Milli"); }); } } diff --git a/src/Functions/toUnixTimestamp64Nano.cpp b/src/Functions/toUnixTimestamp64Nano.cpp index 257f011603c..8829b00bf56 100644 --- a/src/Functions/toUnixTimestamp64Nano.cpp +++ b/src/Functions/toUnixTimestamp64Nano.cpp @@ -7,8 +7,7 @@ namespace DB REGISTER_FUNCTION(ToUnixTimestamp64Nano) { factory.registerFunction("toUnixTimestamp64Nano", - [](ContextPtr){ return std::make_unique( - std::make_shared(9, "toUnixTimestamp64Nano")); }); + [](ContextPtr){ return std::make_shared(9, "toUnixTimestamp64Nano"); }); } } From 427a8b3264cd9000b6f7ab13f13ceac0878d000a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 13:20:49 +0100 Subject: [PATCH 271/283] Less crap --- programs/local/LocalServer.cpp | 4 ++++ src/Functions/CRC.cpp | 21 ++++++++------------- src/Functions/FunctionFQDN.cpp | 2 +- src/Functions/FunctionFactory.h | 21 ++++++--------------- src/Functions/FunctionsConversion.cpp | 15 ++++++--------- src/Functions/FunctionsRound.cpp | 10 +++++----- src/Functions/caseWithExpression.cpp | 4 +--- src/Functions/getFuzzerData.h | 8 ++------ src/Functions/multiIf.cpp | 6 ++---- src/Functions/toFixedString.h | 2 -- 10 files changed, 35 insertions(+), 58 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 50f9c242712..0dd3823cc6d 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -1013,13 +1013,16 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) auto it = argv.begin() + 1; for (; *it; ++it) + { if (strcmp(*it, "--") == 0) { ++it; break; } + } while (*it) + { if (strncmp(*it, "--", 2) != 0) { *(p++) = *it; @@ -1027,6 +1030,7 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) } else ++it; + } *pargc = static_cast(p - &(*pargv)[0]); *p = nullptr; diff --git a/src/Functions/CRC.cpp b/src/Functions/CRC.cpp index ba13fcf78f1..49d6dd6fa52 100644 --- a/src/Functions/CRC.cpp +++ b/src/Functions/CRC.cpp @@ -51,7 +51,7 @@ struct CRC32IEEEImpl : public CRCImpl static constexpr auto name = "CRC32IEEE"; }; -struct CRC32ZLIBImpl +struct CRC32ZLibImpl { using ReturnType = UInt32; static constexpr auto name = "CRC32"; @@ -133,13 +133,14 @@ private: } }; -template +template using FunctionCRC = FunctionStringOrArrayToT, T, typename T::ReturnType>; + // The same as IEEE variant, but uses 0xffffffff as initial value // This is the default // -// (And zlib is used here, since it has optimized version) -using FunctionCRC32ZLIB = FunctionCRC; +// (And ZLib is used here, since it has optimized version) +using FunctionCRC32ZLib = FunctionCRC; // Uses CRC-32-IEEE 802.3 polynomial using FunctionCRC32IEEE = FunctionCRC; // Uses CRC-64-ECMA polynomial @@ -147,17 +148,11 @@ using FunctionCRC64ECMA = FunctionCRC; } -template -void registerFunctionCRCImpl(FunctionFactory & factory) -{ - factory.registerFunction(T::name, {}, FunctionFactory::CaseInsensitive); -} - REGISTER_FUNCTION(CRC) { - registerFunctionCRCImpl(factory); - registerFunctionCRCImpl(factory); - registerFunctionCRCImpl(factory); + factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/FunctionFQDN.cpp b/src/Functions/FunctionFQDN.cpp index b054ff8e1d7..108a96216fd 100644 --- a/src/Functions/FunctionFQDN.cpp +++ b/src/Functions/FunctionFQDN.cpp @@ -47,7 +47,7 @@ public: REGISTER_FUNCTION(FQDN) { factory.registerFunction({}, FunctionFactory::CaseInsensitive); - factory.registerFunction("fullHostName"); + factory.registerAlias("fullHostName", "FQDN"); } } diff --git a/src/Functions/FunctionFactory.h b/src/Functions/FunctionFactory.h index c2fd34f0488..bb43d4719b8 100644 --- a/src/Functions/FunctionFactory.h +++ b/src/Functions/FunctionFactory.h @@ -35,15 +35,6 @@ public: registerFunction(Function::name, std::move(doc), case_sensitiveness); } - template - void registerFunction(const std::string & name, FunctionDocumentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive) - { - if constexpr (std::is_base_of_v) - registerFunction(name, &adaptFunctionToOverloadResolver, std::move(doc), case_sensitiveness); - else - registerFunction(name, &Function::create, std::move(doc), case_sensitiveness); - } - /// This function is used by YQL - innovative transactional DBMS that depends on ClickHouse by source code. std::vector getAllNames() const; @@ -81,17 +72,17 @@ private: Functions functions; Functions case_insensitive_functions; - template - static FunctionOverloadResolverPtr adaptFunctionToOverloadResolver(ContextPtr context) - { - return std::make_unique(Function::create(context)); - } - const Functions & getMap() const override { return functions; } const Functions & getCaseInsensitiveMap() const override { return case_insensitive_functions; } String getFactoryName() const override { return "FunctionFactory"; } + + template + void registerFunction(const std::string & name, FunctionDocumentation doc = {}, CaseSensitiveness case_sensitiveness = CaseSensitive) + { + registerFunction(name, &Function::create, std::move(doc), case_sensitiveness); + } }; const String & getFunctionCanonicalNameIfAny(const String & name); diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 0d245f809bb..ceff4f3fd7e 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1955,9 +1955,6 @@ public: static constexpr bool to_decimal = IsDataTypeDecimal && !to_datetime64; static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } - static FunctionPtr create() { return std::make_shared(); } - - FunctionConvert() = default; explicit FunctionConvert(ContextPtr context_) : context(context_) {} String getName() const override @@ -3295,9 +3292,9 @@ arguments, result_type, input_rows_count); \ }; } - static WrapperType createStringWrapper(const DataTypePtr & from_type) + WrapperType createStringWrapper(const DataTypePtr & from_type) const { - FunctionPtr function = FunctionToString::create(); + FunctionPtr function = FunctionToString::create(context); return createFunctionAdaptor(function, from_type); } @@ -3318,9 +3315,9 @@ arguments, result_type, input_rows_count); \ #define GENERATE_INTERVAL_CASE(INTERVAL_KIND) \ case IntervalKind::Kind::INTERVAL_KIND: \ - return createFunctionAdaptor(FunctionConvert::create(), from_type); + return createFunctionAdaptor(FunctionConvert::create(context), from_type); - static WrapperType createIntervalWrapper(const DataTypePtr & from_type, IntervalKind kind) + WrapperType createIntervalWrapper(const DataTypePtr & from_type, IntervalKind kind) const { switch (kind.kind) { @@ -4207,7 +4204,7 @@ arguments, result_type, input_rows_count); \ return createStringToEnumWrapper(); else if (isNativeNumber(from_type) || isEnum(from_type)) { - auto function = Function::create(); + auto function = Function::create(context); return createFunctionAdaptor(function, from_type); } else @@ -4846,7 +4843,7 @@ REGISTER_FUNCTION(Conversion) /// MySQL compatibility alias. Cannot be registered as alias, /// because we don't want it to be normalized to toDate in queries, /// otherwise CREATE DICTIONARY query breaks. - factory.registerFunction("DATE", {}, FunctionFactory::CaseInsensitive); + factory.registerFunction("DATE", &FunctionToDate::create, {}, FunctionFactory::CaseInsensitive); factory.registerFunction(); factory.registerFunction(); diff --git a/src/Functions/FunctionsRound.cpp b/src/Functions/FunctionsRound.cpp index 02fe1d659de..059476acb40 100644 --- a/src/Functions/FunctionsRound.cpp +++ b/src/Functions/FunctionsRound.cpp @@ -7,11 +7,11 @@ namespace DB REGISTER_FUNCTION(Round) { - factory.registerFunction("round", {}, FunctionFactory::CaseInsensitive); - factory.registerFunction("roundBankers", {}, FunctionFactory::CaseSensitive); - factory.registerFunction("floor", {}, FunctionFactory::CaseInsensitive); - factory.registerFunction("ceil", {}, FunctionFactory::CaseInsensitive); - factory.registerFunction("trunc", {}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::CaseSensitive); + factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::CaseInsensitive); + factory.registerFunction({}, FunctionFactory::CaseInsensitive); factory.registerFunction(); /// Compatibility aliases. diff --git a/src/Functions/caseWithExpression.cpp b/src/Functions/caseWithExpression.cpp index 9547cd200b2..71fccc8436e 100644 --- a/src/Functions/caseWithExpression.cpp +++ b/src/Functions/caseWithExpression.cpp @@ -113,9 +113,7 @@ REGISTER_FUNCTION(CaseWithExpression) factory.registerFunction(); /// These are obsolete function names. - factory.registerFunction("caseWithExpr"); + factory.registerAlias("caseWithExpr", "caseWithExpression"); } } - - diff --git a/src/Functions/getFuzzerData.h b/src/Functions/getFuzzerData.h index 635ca2bdce9..8d7b3c090c4 100644 --- a/src/Functions/getFuzzerData.h +++ b/src/Functions/getFuzzerData.h @@ -7,6 +7,7 @@ namespace DB { + class FunctionGetFuzzerData : public IFunction { inline static String fuzz_data; @@ -14,12 +15,7 @@ class FunctionGetFuzzerData : public IFunction public: static constexpr auto name = "getFuzzerData"; - inline static FunctionPtr create(ContextPtr) { return create(); } - - static FunctionPtr create() - { - return std::make_shared(); - } + inline static FunctionPtr create(ContextPtr) { return std::make_shared(); } inline String getName() const override { return name; } diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 81304f3afbd..49c45d0c0be 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -549,8 +549,8 @@ REGISTER_FUNCTION(MultiIf) factory.registerFunction(); /// These are obsolete function names. - factory.registerFunction("caseWithoutExpr"); - factory.registerFunction("caseWithoutExpression"); + factory.registerAlias("caseWithoutExpr", "multiIf"); + factory.registerAlias("caseWithoutExpression", "multiIf"); } FunctionOverloadResolverPtr createInternalMultiIfOverloadResolver(bool allow_execute_multiif_columnar, bool allow_experimental_variant_type, bool use_variant_as_common_type) @@ -559,5 +559,3 @@ FunctionOverloadResolverPtr createInternalMultiIfOverloadResolver(bool allow_exe } } - - diff --git a/src/Functions/toFixedString.h b/src/Functions/toFixedString.h index 7bee666c5dd..9c7ffc48004 100644 --- a/src/Functions/toFixedString.h +++ b/src/Functions/toFixedString.h @@ -34,7 +34,6 @@ class FunctionToFixedString : public IFunction public: static constexpr auto name = "toFixedString"; static FunctionPtr create(ContextPtr) { return std::make_shared(); } - static FunctionPtr create() { return std::make_shared(); } String getName() const override { @@ -158,4 +157,3 @@ public: }; } - From 7c35f1d07e5d9c344bdb3a14e6f03efd76212e4e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 13:23:09 +0100 Subject: [PATCH 272/283] Less crap --- src/Functions/getFuzzerData.cpp | 48 ++++++++++++++++++++++++++++++++- src/Functions/getFuzzerData.h | 46 ------------------------------- 2 files changed, 47 insertions(+), 47 deletions(-) delete mode 100644 src/Functions/getFuzzerData.h diff --git a/src/Functions/getFuzzerData.cpp b/src/Functions/getFuzzerData.cpp index 6d748619926..5c536477401 100644 --- a/src/Functions/getFuzzerData.cpp +++ b/src/Functions/getFuzzerData.cpp @@ -1,13 +1,59 @@ #ifdef FUZZING_MODE -#include + +#include +#include +#include +#include + namespace DB { +namespace +{ + +class FunctionGetFuzzerData : public IFunction +{ + inline static String fuzz_data; + +public: + static constexpr auto name = "getFuzzerData"; + + inline static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + inline String getName() const override { return name; } + + inline size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + { + return std::make_shared(); + } + + inline bool isDeterministic() const override { return false; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName &, + const DataTypePtr &, + size_t input_rows_count) const override + { + return DataTypeString().createColumnConst(input_rows_count, fuzz_data); + } + + static void update(const String & fuzz_data_) + { + fuzz_data = fuzz_data_; + } +}; + +} + REGISTER_FUNCTION(GetFuzzerData) { factory.registerFunction(); } } + #endif diff --git a/src/Functions/getFuzzerData.h b/src/Functions/getFuzzerData.h deleted file mode 100644 index 8d7b3c090c4..00000000000 --- a/src/Functions/getFuzzerData.h +++ /dev/null @@ -1,46 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -namespace DB -{ - -class FunctionGetFuzzerData : public IFunction -{ - inline static String fuzz_data; - -public: - static constexpr auto name = "getFuzzerData"; - - inline static FunctionPtr create(ContextPtr) { return std::make_shared(); } - - inline String getName() const override { return name; } - - inline size_t getNumberOfArguments() const override { return 0; } - - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override - { - return std::make_shared(); - } - - inline bool isDeterministic() const override { return false; } - - bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, - const DataTypePtr &, - size_t input_rows_count) const override - { - return DataTypeString().createColumnConst(input_rows_count, fuzz_data); - } - - static void update(const String & fuzz_data_) - { - fuzz_data = fuzz_data_; - } -}; - -} From eec0bf2f52e0f7465de2e4b78af5886d13d14893 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 13:34:34 +0100 Subject: [PATCH 273/283] Fix style --- src/Functions/castOrDefault.cpp | 100 ++++++++++++++++---------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index b5653fca1e9..44b39811882 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -320,16 +320,16 @@ REGISTER_FUNCTION(CastOrDefault) { factory.registerFunction(); - factory.registerFunction("toUInt8OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toUInt8OrDefault", std::make_shared()); }); - factory.registerFunction("toUInt16OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toUInt16OrDefault", std::make_shared()); }); - factory.registerFunction("toUInt32OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toUInt32OrDefault", std::make_shared()); }); - factory.registerFunction("toUInt64OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toUInt64OrDefault", std::make_shared()); }); - factory.registerFunction("toUInt128OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toUInt128OrDefault", std::make_shared()); }, + factory.registerFunction("toUInt8OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toUInt8OrDefault", std::make_shared()); }); + factory.registerFunction("toUInt16OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toUInt16OrDefault", std::make_shared()); }); + factory.registerFunction("toUInt32OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toUInt32OrDefault", std::make_shared()); }); + factory.registerFunction("toUInt64OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toUInt64OrDefault", std::make_shared()); }); + factory.registerFunction("toUInt128OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toUInt128OrDefault", std::make_shared()); }, FunctionDocumentation{ .description=R"( Converts a string in the first argument of the function to UInt128 by parsing it. @@ -342,51 +342,51 @@ If the default value is not provided in the second argument, it is assumed to be {"Implicit default value", "SELECT toUInt128OrDefault('upyachka')", "0"}}, .categories{"ConversionFunctions"} }); - factory.registerFunction("toUInt256OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toUInt256OrDefault", std::make_shared()); }); + factory.registerFunction("toUInt256OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toUInt256OrDefault", std::make_shared()); }); - factory.registerFunction("toInt8OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toInt8OrDefault", std::make_shared()); }); - factory.registerFunction("toInt16OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toInt16OrDefault", std::make_shared()); }); - factory.registerFunction("toInt32OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toInt32OrDefault", std::make_shared()); }); - factory.registerFunction("toInt64OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toInt64OrDefault", std::make_shared()); }); - factory.registerFunction("toInt128OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toInt128OrDefault", std::make_shared()); }); - factory.registerFunction("toInt256OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toInt256OrDefault", std::make_shared()); }); + factory.registerFunction("toInt8OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toInt8OrDefault", std::make_shared()); }); + factory.registerFunction("toInt16OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toInt16OrDefault", std::make_shared()); }); + factory.registerFunction("toInt32OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toInt32OrDefault", std::make_shared()); }); + factory.registerFunction("toInt64OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toInt64OrDefault", std::make_shared()); }); + factory.registerFunction("toInt128OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toInt128OrDefault", std::make_shared()); }); + factory.registerFunction("toInt256OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toInt256OrDefault", std::make_shared()); }); - factory.registerFunction("toFloat32OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toFloat32OrDefault", std::make_shared()); }); - factory.registerFunction("toFloat64OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toFloat64OrDefault", std::make_shared()); }); + factory.registerFunction("toFloat32OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toFloat32OrDefault", std::make_shared()); }); + factory.registerFunction("toFloat64OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toFloat64OrDefault", std::make_shared()); }); - factory.registerFunction("toDateOrDefault", [](ContextPtr context){ - return std::make_shared(context, "toDateOrDefault", std::make_shared()); }); - factory.registerFunction("toDate32OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toDate32OrDefault", std::make_shared()); }); - factory.registerFunction("toDateTimeOrDefault", [](ContextPtr context){ - return std::make_shared(context, "toDateTimeOrDefault", std::make_shared()); }); - factory.registerFunction("toDateTime64OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toDateTime64OrDefault", std::make_shared(3 /* default scale */)); }); + factory.registerFunction("toDateOrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDateOrDefault", std::make_shared()); }); + factory.registerFunction("toDate32OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDate32OrDefault", std::make_shared()); }); + factory.registerFunction("toDateTimeOrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDateTimeOrDefault", std::make_shared()); }); + factory.registerFunction("toDateTime64OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDateTime64OrDefault", std::make_shared(3 /* default scale */)); }); - factory.registerFunction("toDecimal32OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toDecimal32OrDefault", createDecimalMaxPrecision(0)); }); - factory.registerFunction("toDecimal64OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toDecimal64OrDefault", createDecimalMaxPrecision(0)); }); - factory.registerFunction("toDecimal128OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toDecimal128OrDefault", createDecimalMaxPrecision(0)); }); - factory.registerFunction("toDecimal256OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toDecimal256OrDefault", createDecimalMaxPrecision(0)); }); + factory.registerFunction("toDecimal32OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDecimal32OrDefault", createDecimalMaxPrecision(0)); }); + factory.registerFunction("toDecimal64OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDecimal64OrDefault", createDecimalMaxPrecision(0)); }); + factory.registerFunction("toDecimal128OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDecimal128OrDefault", createDecimalMaxPrecision(0)); }); + factory.registerFunction("toDecimal256OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toDecimal256OrDefault", createDecimalMaxPrecision(0)); }); - factory.registerFunction("toUUIDOrDefault", [](ContextPtr context){ - return std::make_shared(context, "toUUIDOrDefault", std::make_shared()); }); - factory.registerFunction("toIPv4OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toIPv4OrDefault", std::make_shared()); }); - factory.registerFunction("toIPv6OrDefault", [](ContextPtr context){ - return std::make_shared(context, "toIPv6OrDefault", std::make_shared()); }); + factory.registerFunction("toUUIDOrDefault", [](ContextPtr context) + { return std::make_shared(context, "toUUIDOrDefault", std::make_shared()); }); + factory.registerFunction("toIPv4OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toIPv4OrDefault", std::make_shared()); }); + factory.registerFunction("toIPv6OrDefault", [](ContextPtr context) + { return std::make_shared(context, "toIPv6OrDefault", std::make_shared()); }); } } From 938a54c85c9b82bd51607080239574b0bb83d311 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 14:12:48 +0100 Subject: [PATCH 274/283] Update reference --- .../02415_all_new_functions_must_be_documented.reference | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index e15002da69c..8b85ac48c16 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -194,10 +194,7 @@ blockSerializedSize blockSize buildId byteSize -caseWithExpr caseWithExpression -caseWithoutExpr -caseWithoutExpression catboostEvaluate cbrt ceil @@ -312,7 +309,6 @@ fromUnixTimestamp64Micro fromUnixTimestamp64Milli fromUnixTimestamp64Nano fromUnixTimestampInJodaSyntax -fullHostName fuzzBits gccMurmurHash gcd From 4527b3e0d4b1e02285fcfc3579c8c48c5b7657b2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 15:17:58 +0100 Subject: [PATCH 275/283] Remove useless code --- CMakeLists.txt | 5 ----- cmake/fuzzer.cmake | 17 ----------------- 2 files changed, 22 deletions(-) delete mode 100644 cmake/fuzzer.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index eff6dd3ff6a..957bb3f71de 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,11 +110,6 @@ endif() # - sanitize.cmake add_library(global-libs INTERFACE) -# We don't want to instrument everything with fuzzer, but only specific targets (see below), -# also, since we build our own llvm, we specifically don't want to instrument -# libFuzzer library itself - it would result in infinite recursion -#include (cmake/fuzzer.cmake) - include (cmake/sanitize.cmake) option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON) diff --git a/cmake/fuzzer.cmake b/cmake/fuzzer.cmake deleted file mode 100644 index dd0c4b080fe..00000000000 --- a/cmake/fuzzer.cmake +++ /dev/null @@ -1,17 +0,0 @@ -# see ./CMakeLists.txt for variable declaration -if (FUZZER) - if (FUZZER STREQUAL "libfuzzer") - # NOTE: Eldar Zaitov decided to name it "libfuzzer" instead of "fuzzer" to keep in mind another possible fuzzer backends. - # NOTE: no-link means that all the targets are built with instrumentation for fuzzer, but only some of them - # (tests) have entry point for fuzzer and it's not checked. - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1") - - # NOTE: oss-fuzz can change LIB_FUZZING_ENGINE variable - if (NOT LIB_FUZZING_ENGINE) - set (LIB_FUZZING_ENGINE "-fsanitize=fuzzer") - endif () - else () - message (FATAL_ERROR "Unknown fuzzer type: ${FUZZER}") - endif () -endif() From 0a3e42401ce11036a64a2d7a7c30f425d8b700f2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 15:44:36 +0100 Subject: [PATCH 276/283] Fix fuzzers --- .../aggregate_function_state_deserialization_fuzzer.cpp | 3 +++ src/Common/tests/gtest_global_context.cpp | 6 ------ src/Common/tests/gtest_global_context.h | 2 -- src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp | 3 +++ src/Formats/fuzzers/format_fuzzer.cpp | 3 +++ src/Interpreters/fuzzers/execute_query_fuzzer.cpp | 3 +++ 6 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp index 290da81944d..425364efb9c 100644 --- a/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp +++ b/src/AggregateFunctions/fuzzers/aggregate_function_state_deserialization_fuzzer.cpp @@ -27,6 +27,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) auto initialize = [&]() mutable { + if (context) + return true; + shared_context = Context::createShared(); context = Context::createGlobal(shared_context.get()); context->makeGlobalContext(); diff --git a/src/Common/tests/gtest_global_context.cpp b/src/Common/tests/gtest_global_context.cpp index ec86c953c5b..0c1556766a9 100644 --- a/src/Common/tests/gtest_global_context.cpp +++ b/src/Common/tests/gtest_global_context.cpp @@ -10,9 +10,3 @@ ContextHolder & getMutableContext() static ContextHolder holder; return holder; } - -void destroyContext() -{ - auto & holder = getMutableContext(); - return holder.destroy(); -} diff --git a/src/Common/tests/gtest_global_context.h b/src/Common/tests/gtest_global_context.h index f846a0dbe4f..7ae8bb32f70 100644 --- a/src/Common/tests/gtest_global_context.h +++ b/src/Common/tests/gtest_global_context.h @@ -28,5 +28,3 @@ struct ContextHolder const ContextHolder & getContext(); ContextHolder & getMutableContext(); - -void destroyContext(); diff --git a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp index e40734e0a57..0ae325871fb 100644 --- a/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp +++ b/src/DataTypes/fuzzers/data_type_deserialization_fuzzer.cpp @@ -24,6 +24,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) auto initialize = [&]() mutable { + if (context) + return true; + shared_context = Context::createShared(); context = Context::createGlobal(shared_context.get()); context->makeGlobalContext(); diff --git a/src/Formats/fuzzers/format_fuzzer.cpp b/src/Formats/fuzzers/format_fuzzer.cpp index 583d1173a01..46661e4828c 100644 --- a/src/Formats/fuzzers/format_fuzzer.cpp +++ b/src/Formats/fuzzers/format_fuzzer.cpp @@ -32,6 +32,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) auto initialize = [&]() mutable { + if (context) + return true; + shared_context = Context::createShared(); context = Context::createGlobal(shared_context.get()); context->makeGlobalContext(); diff --git a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp index edff202d547..a02ce66e6b5 100644 --- a/src/Interpreters/fuzzers/execute_query_fuzzer.cpp +++ b/src/Interpreters/fuzzers/execute_query_fuzzer.cpp @@ -25,6 +25,9 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) auto initialize = [&]() mutable { + if (context) + return true; + shared_context = Context::createShared(); context = Context::createGlobal(shared_context.get()); context->makeGlobalContext(); From 44e918bc67efe0b1c36059693ee2eb709d31c59c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 16:22:33 +0100 Subject: [PATCH 277/283] Revive `getFuzzerData` --- CMakeLists.txt | 9 ++++----- programs/local/LocalServer.cpp | 6 ------ programs/main.cpp | 15 +++++++-------- src/Interpreters/AsynchronousMetricLog.cpp | 1 - 4 files changed, 11 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 957bb3f71de..8c4e16eace2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -549,7 +549,9 @@ if (ENABLE_RUST) endif() endif() -if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64)) +if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" + AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND NOT ENABLE_FUZZING + AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64)) set(CHECK_LARGE_OBJECT_SIZES_DEFAULT ON) else () set(CHECK_LARGE_OBJECT_SIZES_DEFAULT OFF) @@ -572,10 +574,7 @@ if (FUZZER) if (NOT(target_type STREQUAL "INTERFACE_LIBRARY" OR target_type STREQUAL "UTILITY")) target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link") endif() - # clickhouse fuzzer isn't working correctly - # initial PR https://github.com/ClickHouse/ClickHouse/pull/27526 - #if (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse") - if (target_type STREQUAL "EXECUTABLE" AND target MATCHES ".+_fuzzer") + if (target_type STREQUAL "EXECUTABLE" AND (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse")) message(STATUS "${target} instrumented with fuzzer") target_link_libraries(${target} PUBLIC ch_contrib::fuzzer) # Add to fuzzers bundle diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 50f9c242712..10fbda0fe46 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -1000,12 +1000,6 @@ extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv) { std::vector argv(*pargv, *pargv + (*pargc + 1)); - if (!isClickhouseApp("local", argv)) - { - std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode, only clickhouse local is available." << "\033[0m" << std::endl; - exit(1); - } - /// As a user you can add flags to clickhouse binary in fuzzing mode as follows /// clickhouse local -- diff --git a/programs/main.cpp b/programs/main.cpp index c5f1b30f60e..0a35594bd30 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -68,7 +68,6 @@ namespace using MainFunc = int (*)(int, char**); #if !defined(FUZZING_MODE) - /// Add an item here to register new application std::pair clickhouse_applications[] = { @@ -105,13 +104,6 @@ std::pair clickhouse_applications[] = {"restart", mainEntryClickHouseRestart}, }; -/// Add an item here to register a new short name -std::pair clickhouse_short_names[] = -{ - {"chl", "local"}, - {"chc", "client"}, -}; - int printHelp(int, char **) { std::cerr << "Use one of the following commands:" << std::endl; @@ -121,6 +113,13 @@ int printHelp(int, char **) } #endif +/// Add an item here to register a new short name +std::pair clickhouse_short_names[] = +{ + {"chl", "local"}, + {"chc", "client"}, +}; + enum class InstructionFail { diff --git a/src/Interpreters/AsynchronousMetricLog.cpp b/src/Interpreters/AsynchronousMetricLog.cpp index f905f72e7a7..5cf7f951eec 100644 --- a/src/Interpreters/AsynchronousMetricLog.cpp +++ b/src/Interpreters/AsynchronousMetricLog.cpp @@ -8,7 +8,6 @@ #include #include #include -#include namespace DB From 01136bbc3beb01eb1f150747412db2030115507a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 19:53:58 +0100 Subject: [PATCH 278/283] Limit backtracking in parser --- programs/compressor/Compressor.cpp | 2 +- programs/format/Format.cpp | 2 +- programs/keeper-client/KeeperClient.cpp | 3 +- src/Access/AccessEntityIO.cpp | 2 +- src/Access/RowPolicyCache.cpp | 2 +- src/Access/UsersConfigAccessStorage.cpp | 2 +- .../parseAggregateFunctionParameters.cpp | 3 +- src/Backups/BackupInfo.cpp | 2 +- src/Backups/RestorerFromBackup.cpp | 4 +- src/Client/ClientBase.cpp | 24 +-- src/Client/ClientBase.h | 2 +- src/Client/QueryFuzzer.cpp | 6 +- .../NamedCollections/NamedCollectionUtils.cpp | 2 +- .../tests/gtest_compressionCodec.cpp | 2 +- src/Core/Defines.h | 2 + src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 1 + src/DataTypes/DataTypeFactory.cpp | 5 +- src/Databases/DDLDependencyVisitor.cpp | 3 +- src/Databases/DatabaseDictionary.cpp | 4 +- src/Databases/DatabaseFilesystem.cpp | 2 +- src/Databases/DatabaseHDFS.cpp | 2 +- src/Databases/DatabaseOnDisk.cpp | 18 +- src/Databases/DatabaseOrdinary.cpp | 2 +- src/Databases/DatabaseReplicated.cpp | 7 +- src/Databases/DatabaseS3.cpp | 2 +- src/Databases/DatabasesCommon.cpp | 5 +- src/Databases/DatabasesCommon.h | 3 +- src/Databases/MySQL/DatabaseMySQL.cpp | 14 +- .../MySQL/tryConvertStringLiterals.cpp | 2 +- .../MySQL/tryParseTableIDFromDDL.cpp | 2 +- .../MySQL/tryQuoteUnrecognizedTokens.cpp | 2 +- src/Databases/SQLite/DatabaseSQLite.cpp | 6 +- .../tests/gtest_dictionary_configuration.cpp | 8 +- src/Formats/SchemaInferenceUtils.cpp | 2 +- src/Functions/FunctionSQLJSON.h | 9 +- .../UserDefinedSQLObjectsBackup.cpp | 2 +- .../UserDefinedSQLObjectsDiskStorage.cpp | 3 +- .../UserDefinedSQLObjectsZooKeeperStorage.cpp | 3 +- src/Functions/formatQuery.cpp | 168 +++++++++--------- ...InterpreterShowCreateAccessEntityQuery.cpp | 2 +- src/Interpreters/AsynchronousMetricLog.cpp | 10 +- src/Interpreters/DDLTask.cpp | 5 +- .../IInterpreterUnionOrSelectQuery.cpp | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 10 +- src/Interpreters/InterpreterDeleteQuery.cpp | 3 +- .../InterpreterKillQueryQuery.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 4 +- .../JoinToSubqueryTransformVisitor.cpp | 2 +- .../MySQL/tests/gtest_create_rewritten.cpp | 2 +- src/Interpreters/SystemLog.cpp | 4 +- src/Interpreters/executeQuery.cpp | 10 +- .../getCustomKeyFilterForParallelReplicas.cpp | 2 +- src/Interpreters/loadMetadata.cpp | 4 +- .../parseColumnsListForTableFunction.cpp | 4 +- .../tests/gtest_comparison_graph.cpp | 2 +- .../tests/gtest_cycle_aliases.cpp | 10 +- .../tests/gtest_table_overrides.cpp | 4 +- src/Parsers/ExpressionListParsers.cpp | 2 +- src/Parsers/IParser.cpp | 33 ++++ src/Parsers/IParser.h | 15 +- .../KustoFunctions/IParserKQLFunction.cpp | 8 +- .../Kusto/KustoFunctions/IParserKQLFunction.h | 4 +- .../KustoFunctions/KQLCastingFunctions.cpp | 2 +- .../KustoFunctions/KQLDynamicFunctions.cpp | 10 +- .../Kusto/KustoFunctions/KQLIPFunctions.cpp | 32 ++-- .../KustoFunctions/KQLStringFunctions.cpp | 4 +- src/Parsers/Kusto/ParserKQLDistinct.cpp | 2 +- src/Parsers/Kusto/ParserKQLExtend.cpp | 4 +- src/Parsers/Kusto/ParserKQLFilter.cpp | 2 +- src/Parsers/Kusto/ParserKQLLimit.cpp | 2 +- src/Parsers/Kusto/ParserKQLMVExpand.cpp | 16 +- src/Parsers/Kusto/ParserKQLMVExpand.h | 2 +- src/Parsers/Kusto/ParserKQLMakeSeries.cpp | 20 +-- src/Parsers/Kusto/ParserKQLMakeSeries.h | 2 +- src/Parsers/Kusto/ParserKQLPrint.cpp | 2 +- src/Parsers/Kusto/ParserKQLProject.cpp | 2 +- src/Parsers/Kusto/ParserKQLQuery.cpp | 18 +- src/Parsers/Kusto/ParserKQLQuery.h | 6 +- src/Parsers/Kusto/ParserKQLSort.cpp | 2 +- src/Parsers/Kusto/ParserKQLStatement.cpp | 2 +- src/Parsers/Kusto/ParserKQLSummarize.cpp | 8 +- src/Parsers/Kusto/parseKQLQuery.cpp | 23 ++- src/Parsers/Kusto/parseKQLQuery.h | 26 +-- .../tests/gtest_alter_command_parser.cpp | 2 +- .../MySQL/tests/gtest_alter_parser.cpp | 2 +- .../MySQL/tests/gtest_column_parser.cpp | 4 +- .../MySQL/tests/gtest_constraint_parser.cpp | 10 +- .../MySQL/tests/gtest_create_parser.cpp | 8 +- .../MySQL/tests/gtest_index_parser.cpp | 50 +++--- .../tests/gtest_partition_options_parser.cpp | 26 +-- .../MySQL/tests/gtest_partition_parser.cpp | 21 ++- .../MySQL/tests/gtest_reference_parser.cpp | 21 ++- .../MySQL/tests/gtest_subpartition_parser.cpp | 5 +- .../tests/gtest_table_options_parser.cpp | 4 +- src/Parsers/PRQL/ParserPRQLQuery.cpp | 4 +- src/Parsers/PRQL/ParserPRQLQuery.h | 3 +- src/Parsers/ParserAlterQuery.cpp | 3 +- src/Parsers/ParserCreateQuery.h | 2 +- src/Parsers/QueryParameterVisitor.cpp | 2 +- src/Parsers/TokenIterator.h | 12 -- src/Parsers/examples/create_parser.cpp | 2 +- src/Parsers/examples/select_parser.cpp | 2 +- src/Parsers/fuzzers/select_parser_fuzzer.cpp | 3 +- src/Parsers/parseQuery.cpp | 28 +-- src/Parsers/parseQuery.h | 16 +- src/Parsers/tests/gtest_Parser.cpp | 13 +- src/Parsers/tests/gtest_common.cpp | 8 +- src/Parsers/tests/gtest_dictionary_parser.cpp | 18 +- src/Parsers/tests/gtest_format_hiliting.cpp | 2 +- src/Planner/PlannerJoinTree.cpp | 2 +- src/Planner/Utils.cpp | 2 +- .../Impl/ConstantExpressionTemplate.cpp | 2 +- .../Formats/Impl/MySQLDumpRowInputFormat.cpp | 3 +- .../Formats/Impl/ValuesBlockInputFormat.cpp | 4 +- .../QueryPlan/ReadFromMergeTree.cpp | 2 +- src/Server/GRPCServer.cpp | 2 +- src/Server/PostgreSQLHandler.cpp | 1 + src/Storages/ColumnsDescription.cpp | 4 +- src/Storages/ConstraintsDescription.cpp | 2 +- src/Storages/IndicesDescription.cpp | 2 +- src/Storages/KeyDescription.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- .../MergeTreeDataPartWriterCompact.cpp | 2 +- .../MergeTreeDataPartWriterOnDisk.cpp | 4 +- .../MergeTree/MergeTreeDataPartWriterWide.cpp | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- .../ReplicatedMergeTreeTableMetadata.cpp | 4 +- src/Storages/MutationCommands.cpp | 2 +- .../StorageMaterializedPostgreSQL.cpp | 2 +- src/Storages/ProjectionsDescription.cpp | 2 +- .../System/StorageSystemDDLWorkerQueue.cpp | 3 +- .../System/attachInformationSchemaTables.cpp | 2 +- src/Storages/TTLDescription.cpp | 2 +- src/Storages/getStructureOfRemoteTable.cpp | 4 +- ..._transform_query_for_external_database.cpp | 4 +- src/TableFunctions/Hive/TableFunctionHive.cpp | 4 +- src/TableFunctions/TableFunctionExplain.cpp | 8 +- 138 files changed, 549 insertions(+), 466 deletions(-) create mode 100644 src/Parsers/IParser.cpp diff --git a/programs/compressor/Compressor.cpp b/programs/compressor/Compressor.cpp index 7125fdc744f..050bb495024 100644 --- a/programs/compressor/Compressor.cpp +++ b/programs/compressor/Compressor.cpp @@ -143,7 +143,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) ParserCodec codec_parser; std::string codecs_line = boost::algorithm::join(codecs, ","); - auto ast = parseQuery(codec_parser, "(" + codecs_line + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto ast = parseQuery(codec_parser, "(" + codecs_line + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); codec = CompressionCodecFactory::instance().get(ast, nullptr); } else diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index c92106e2f90..50f801f2560 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -234,7 +234,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) size_t approx_query_length = multiple ? find_first_symbols<';'>(pos, end) - pos : end - pos; ASTPtr res = parseQueryAndMovePosition( - parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth); + parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth, cmd_settings.max_parser_backtracks); std::unique_ptr insert_query_payload = nullptr; /// If the query is INSERT ... VALUES, then we will try to parse the data. diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index 595fc65e50e..8297fab5ed9 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -44,7 +44,7 @@ String KeeperClient::executeFourLetterCommand(const String & command) std::vector KeeperClient::getCompletions(const String & prefix) const { Tokens tokens(prefix.data(), prefix.data() + prefix.size(), 0, false); - IParser::Pos pos(tokens, 0); + IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); if (pos->type != TokenType::BareWord) return registered_commands_and_four_letter_words; @@ -278,6 +278,7 @@ bool KeeperClient::processQueryText(const String & text) /* allow_multi_statements = */ true, /* max_query_size = */ 0, /* max_parser_depth = */ 0, + /* max_parser_backtracks = */ 0, /* skip_insignificant = */ false); if (!res) diff --git a/src/Access/AccessEntityIO.cpp b/src/Access/AccessEntityIO.cpp index 80bb63b04bf..b0dfd74c53b 100644 --- a/src/Access/AccessEntityIO.cpp +++ b/src/Access/AccessEntityIO.cpp @@ -62,7 +62,7 @@ AccessEntityPtr deserializeAccessEntityImpl(const String & definition) const char * end = begin + definition.size(); while (pos < end) { - queries.emplace_back(parseQueryAndMovePosition(parser, pos, end, "", true, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH)); + queries.emplace_back(parseQueryAndMovePosition(parser, pos, end, "", true, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS)); while (isWhitespaceASCII(*pos) || *pos == ';') ++pos; } diff --git a/src/Access/RowPolicyCache.cpp b/src/Access/RowPolicyCache.cpp index 13140099a63..c1c4928d0da 100644 --- a/src/Access/RowPolicyCache.cpp +++ b/src/Access/RowPolicyCache.cpp @@ -86,7 +86,7 @@ void RowPolicyCache::PolicyInfo::setPolicy(const RowPolicyPtr & policy_) try { ParserExpression parser; - parsed_filters[filter_type_i] = parseQuery(parser, filter, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + parsed_filters[filter_type_i] = parseQuery(parser, filter, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); } catch (...) { diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index e9b2e1397ab..b4b843fc77e 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -66,7 +66,7 @@ namespace String error_message; const char * pos = string_query.data(); - auto ast = tryParseQuery(parser, pos, pos + string_query.size(), error_message, false, "", false, 0, 0); + auto ast = tryParseQuery(parser, pos, pos + string_query.size(), error_message, false, "", false, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true); if (!ast) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse grant query. Error: {}", error_message); diff --git a/src/AggregateFunctions/parseAggregateFunctionParameters.cpp b/src/AggregateFunctions/parseAggregateFunctionParameters.cpp index db1efe224d1..593be1e0a79 100644 --- a/src/AggregateFunctions/parseAggregateFunctionParameters.cpp +++ b/src/AggregateFunctions/parseAggregateFunctionParameters.cpp @@ -81,7 +81,8 @@ void getAggregateFunctionNameAndParametersArray( ParserExpressionList params_parser(false); ASTPtr args_ast = parseQuery(params_parser, parameters_str.data(), parameters_str.data() + parameters_str.size(), - "parameters of aggregate function in " + error_context, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + "parameters of aggregate function in " + error_context, + 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); if (args_ast->children.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Incorrect list of parameters to aggregate function {}", diff --git a/src/Backups/BackupInfo.cpp b/src/Backups/BackupInfo.cpp index 2bff400d4fe..461f613ecd2 100644 --- a/src/Backups/BackupInfo.cpp +++ b/src/Backups/BackupInfo.cpp @@ -25,7 +25,7 @@ String BackupInfo::toString() const BackupInfo BackupInfo::fromString(const String & str) { ParserIdentifierWithOptionalParameters parser; - ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); return fromAST(*ast); } diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 87c143f0fe2..e20e8eb66c6 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -424,7 +424,7 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_ readStringUntilEOF(create_query_str, *read_buffer); read_buffer.reset(); ParserCreateQuery create_parser; - ASTPtr create_table_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr create_table_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); applyCustomStoragePolicy(create_table_query); renameDatabaseAndTableNameInCreateQuery(create_table_query, renaming_map, context->getGlobalContext()); String create_table_query_str = serializeAST(*create_table_query); @@ -534,7 +534,7 @@ void RestorerFromBackup::findDatabaseInBackupImpl(const String & database_name_i readStringUntilEOF(create_query_str, *read_buffer); read_buffer.reset(); ParserCreateQuery create_parser; - ASTPtr create_database_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr create_database_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); renameDatabaseAndTableNameInCreateQuery(create_database_query, renaming_map, context->getGlobalContext()); String create_database_query_str = serializeAST(*create_database_query); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 48962880b8f..d561a64895b 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -345,7 +345,7 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu if (dialect == Dialect::kusto) parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); else if (dialect == Dialect::prql) - parser = std::make_unique(max_length, settings.max_parser_depth); + parser = std::make_unique(max_length, settings.max_parser_depth, settings.max_parser_backtracks); else parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); @@ -353,9 +353,9 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu { String message; if (dialect == Dialect::kusto) - res = tryParseKQLQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = tryParseKQLQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth, settings.max_parser_backtracks, true); else - res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = tryParseQuery(*parser, pos, end, message, true, "", allow_multi_statements, max_length, settings.max_parser_depth, settings.max_parser_backtracks, true); if (!res) { @@ -366,9 +366,9 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu else { if (dialect == Dialect::kusto) - res = parseKQLQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = parseKQLQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth, settings.max_parser_backtracks); else - res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth); + res = parseQueryAndMovePosition(*parser, pos, end, "", allow_multi_statements, max_length, settings.max_parser_depth, settings.max_parser_backtracks); } if (is_interactive) @@ -385,12 +385,12 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu /// Consumes trailing semicolons and tries to consume the same-line trailing comment. -void ClientBase::adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth) +void ClientBase::adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth, uint32_t max_parser_backtracks) { // We have to skip the trailing semicolon that might be left // after VALUES parsing or just after a normal semicolon-terminated query. Tokens after_query_tokens(this_query_end, all_queries_end); - IParser::Pos after_query_iterator(after_query_tokens, max_parser_depth); + IParser::Pos after_query_iterator(after_query_tokens, max_parser_depth, max_parser_backtracks); while (after_query_iterator.isValid() && after_query_iterator->type == TokenType::Semicolon) { this_query_end = after_query_iterator->end; @@ -1984,6 +1984,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( return MultiQueryProcessingStage::QUERIES_END; unsigned max_parser_depth = static_cast(global_context->getSettingsRef().max_parser_depth); + unsigned max_parser_backtracks = static_cast(global_context->getSettingsRef().max_parser_backtracks); // If there are only comments left until the end of file, we just // stop. The parser can't handle this situation because it always @@ -1994,7 +1995,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( // and it makes more sense to treat them as such. { Tokens tokens(this_query_begin, all_queries_end); - IParser::Pos token_iterator(tokens, max_parser_depth); + IParser::Pos token_iterator(tokens, max_parser_depth, max_parser_backtracks); if (!token_iterator.isValid()) return MultiQueryProcessingStage::QUERIES_END; } @@ -2015,7 +2016,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( if (ignore_error) { Tokens tokens(this_query_begin, all_queries_end); - IParser::Pos token_iterator(tokens, max_parser_depth); + IParser::Pos token_iterator(tokens, max_parser_depth, max_parser_backtracks); while (token_iterator->type != TokenType::Semicolon && token_iterator.isValid()) ++token_iterator; this_query_begin = token_iterator->end; @@ -2055,7 +2056,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( // after we have processed the query. But even this guess is // beneficial so that we see proper trailing comments in "echo" and // server log. - adjustQueryEnd(this_query_end, all_queries_end, max_parser_depth); + adjustQueryEnd(this_query_end, all_queries_end, max_parser_depth, max_parser_backtracks); return MultiQueryProcessingStage::EXECUTE_QUERY; } @@ -2251,7 +2252,8 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) this_query_end = insert_ast->end; adjustQueryEnd( this_query_end, all_queries_end, - static_cast(global_context->getSettingsRef().max_parser_depth)); + static_cast(global_context->getSettingsRef().max_parser_depth), + static_cast(global_context->getSettingsRef().max_parser_backtracks)); } // Report error. diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index dd08e7c059b..7a9e9666e67 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -94,7 +94,7 @@ protected: void processParsedSingleQuery(const String & full_query, const String & query_to_execute, ASTPtr parsed_query, std::optional echo_query_ = {}, bool report_error = false); - static void adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth); + static void adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth, uint32_t max_parser_backtracks); ASTPtr parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const; static void setupSignalHandler(); diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp index 0a7cb1b36db..7be01686258 100644 --- a/src/Client/QueryFuzzer.cpp +++ b/src/Client/QueryFuzzer.cpp @@ -569,7 +569,8 @@ void QueryFuzzer::fuzzColumnDeclaration(ASTColumnDeclaration & column) auto data_type = fuzzDataType(DataTypeFactory::instance().get(column.type)); ParserDataType parser; - column.type = parseQuery(parser, data_type->getName(), DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); + column.type = parseQuery(parser, data_type->getName(), + DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); } } @@ -821,7 +822,8 @@ static ASTPtr tryParseInsertQuery(const String & full_query) ParserInsertQuery parser(end, false); String message; - return tryParseQuery(parser, pos, end, message, false, "", false, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); + return tryParseQuery(parser, pos, end, message, false, "", false, + DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true); } ASTs QueryFuzzer::getInsertQueriesForFuzzedTables(const String & full_query) diff --git a/src/Common/NamedCollections/NamedCollectionUtils.cpp b/src/Common/NamedCollections/NamedCollectionUtils.cpp index fe0f42467c7..9b569390b3c 100644 --- a/src/Common/NamedCollections/NamedCollectionUtils.cpp +++ b/src/Common/NamedCollections/NamedCollectionUtils.cpp @@ -302,7 +302,7 @@ private: readStringUntilEOF(query, in); ParserCreateNamedCollectionQuery parser; - auto ast = parseQuery(parser, query, "in file " + path, 0, settings.max_parser_depth); + auto ast = parseQuery(parser, query, "in file " + path, 0, settings.max_parser_depth, settings.max_parser_backtracks); const auto & create_query = ast->as(); return create_query; } diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp index 24f16a55c25..16573e035e0 100644 --- a/src/Compression/tests/gtest_compressionCodec.cpp +++ b/src/Compression/tests/gtest_compressionCodec.cpp @@ -442,7 +442,7 @@ CompressionCodecPtr makeCodec(const std::string & codec_string, const DataTypePt { const std::string codec_statement = "(" + codec_string + ")"; Tokens tokens(codec_statement.begin().base(), codec_statement.end().base()); - IParser::Pos token_iterator(tokens, 0); + IParser::Pos token_iterator(tokens, 0, 0); Expected expected; ASTPtr codec_ast; diff --git a/src/Core/Defines.h b/src/Core/Defines.h index cc6f49aa361..a8dd26519c2 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -63,6 +63,8 @@ static constexpr auto DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC = 120; /// Default limit on recursion depth of recursive descend parser. static constexpr auto DBMS_DEFAULT_MAX_PARSER_DEPTH = 1000; +/// Default limit on the amount of backtracking of recursive descend parser. +static constexpr auto DBMS_DEFAULT_MAX_PARSER_BACKTRACKS = 1000000; /// Default limit on query size. static constexpr auto DBMS_DEFAULT_MAX_QUERY_SIZE = 262144; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 48f6b4d621c..e6adb00137f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -607,6 +607,7 @@ class IColumn; M(Bool, use_compact_format_in_distributed_parts_names, true, "Changes format of directories names for distributed table insert parts.", 0) \ M(Bool, validate_polygons, true, "Throw exception if polygon is invalid in function pointInPolygon (e.g. self-tangent, self-intersecting). If the setting is false, the function will accept invalid polygons but may silently return wrong result.", 0) \ M(UInt64, max_parser_depth, DBMS_DEFAULT_MAX_PARSER_DEPTH, "Maximum parser depth (recursion depth of recursive descend parser).", 0) \ + M(UInt64, max_parser_backtracks, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, "Maximum parser backtracking (how many times it tries different alternatives in the recursive descend parsing process).", 0) \ M(Bool, allow_settings_after_format_in_insert, false, "Allow SETTINGS after FORMAT, but note, that this is not always safe (note: this is a compatibility setting).", 0) \ M(Seconds, periodic_live_view_refresh, 60, "Interval after which periodically refreshed live view is forced to refresh.", 0) \ M(Bool, transform_null_in, false, "If enabled, NULL values will be matched with 'IN' operator as if they are considered equal.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 4914f97a6fb..8e2b2915c2a 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -95,6 +95,7 @@ static std::map sett {"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."}, {"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"}, {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"}, + {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"}, }}, {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, diff --git a/src/DataTypes/DataTypeFactory.cpp b/src/DataTypes/DataTypeFactory.cpp index d154b386ace..844384f3c95 100644 --- a/src/DataTypes/DataTypeFactory.cpp +++ b/src/DataTypes/DataTypeFactory.cpp @@ -56,13 +56,14 @@ DataTypePtr DataTypeFactory::getImpl(const String & full_name) const { String out_err; const char * start = full_name.data(); - ast = tryParseQuery(parser, start, start + full_name.size(), out_err, false, "data type", false, DBMS_DEFAULT_MAX_QUERY_SIZE, data_type_max_parse_depth); + ast = tryParseQuery(parser, start, start + full_name.size(), out_err, false, "data type", false, + DBMS_DEFAULT_MAX_QUERY_SIZE, data_type_max_parse_depth, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true); if (!ast) return nullptr; } else { - ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", false, data_type_max_parse_depth); + ast = parseQuery(parser, full_name.data(), full_name.data() + full_name.size(), "data type", false, data_type_max_parse_depth, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); } return getImpl(ast); diff --git a/src/Databases/DDLDependencyVisitor.cpp b/src/Databases/DDLDependencyVisitor.cpp index cb85119e3b0..75a01a6190f 100644 --- a/src/Databases/DDLDependencyVisitor.cpp +++ b/src/Databases/DDLDependencyVisitor.cpp @@ -444,8 +444,9 @@ namespace ParserSelectWithUnionQuery parser; String description = fmt::format("Query for ClickHouse dictionary {}", data.table_name); String fixed_query = removeWhereConditionPlaceholder(query); + const Settings & settings = data.context->getSettingsRef(); ASTPtr select = parseQuery(parser, fixed_query, description, - data.context->getSettingsRef().max_query_size, data.context->getSettingsRef().max_parser_depth); + settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); DDLDependencyVisitor::Visitor visitor{data}; visitor.visit(select); diff --git a/src/Databases/DatabaseDictionary.cpp b/src/Databases/DatabaseDictionary.cpp index 9a65c7a46ef..76fdb4fa961 100644 --- a/src/Databases/DatabaseDictionary.cpp +++ b/src/Databases/DatabaseDictionary.cpp @@ -115,7 +115,7 @@ ASTPtr DatabaseDictionary::getCreateTableQueryImpl(const String & table_name, Co const char * pos = query.data(); std::string error_message; auto ast = tryParseQuery(parser, pos, pos + query.size(), error_message, - /* hilite = */ false, "", /* allow_multi_statements = */ false, 0, settings.max_parser_depth); + /* hilite = */ false, "", /* allow_multi_statements = */ false, 0, settings.max_parser_depth, settings.max_parser_backtracks, true); if (!ast && throw_on_error) throw Exception::createDeprecated(error_message, ErrorCodes::SYNTAX_ERROR); @@ -134,7 +134,7 @@ ASTPtr DatabaseDictionary::getCreateDatabaseQuery() const } auto settings = getContext()->getSettingsRef(); ParserCreateQuery parser; - return parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); + return parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth, settings.max_parser_backtracks); } void DatabaseDictionary::shutdown() diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 5af1e1ae0d2..05af0acf978 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -187,7 +187,7 @@ ASTPtr DatabaseFilesystem::getCreateDatabaseQuery() const const String query = fmt::format("CREATE DATABASE {} ENGINE = Filesystem('{}')", backQuoteIfNeed(getDatabaseName()), path); ParserCreateQuery parser; - ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth, settings.max_parser_backtracks); if (const auto database_comment = getDatabaseComment(); !database_comment.empty()) { diff --git a/src/Databases/DatabaseHDFS.cpp b/src/Databases/DatabaseHDFS.cpp index 3a1e6b16ccf..2688ff2443c 100644 --- a/src/Databases/DatabaseHDFS.cpp +++ b/src/Databases/DatabaseHDFS.cpp @@ -183,7 +183,7 @@ ASTPtr DatabaseHDFS::getCreateDatabaseQuery() const ParserCreateQuery parser; const String query = fmt::format("CREATE DATABASE {} ENGINE = HDFS('{}')", backQuoteIfNeed(getDatabaseName()), source); - ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth, settings.max_parser_backtracks); if (const auto database_comment = getDatabaseComment(); !database_comment.empty()) { diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index fcb073644c5..dcfc1916450 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -526,7 +526,7 @@ ASTPtr DatabaseOnDisk::getCreateDatabaseQuery() const /// If database.sql doesn't exist, then engine is Ordinary String query = "CREATE DATABASE " + backQuoteIfNeed(getDatabaseName()) + " ENGINE = Ordinary"; ParserCreateQuery parser; - ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); + ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth, settings.max_parser_backtracks); } if (const auto database_comment = getDatabaseComment(); !database_comment.empty()) @@ -707,7 +707,7 @@ ASTPtr DatabaseOnDisk::parseQueryFromMetadata( const char * pos = query.data(); std::string error_message; auto ast = tryParseQuery(parser, pos, pos + query.size(), error_message, /* hilite = */ false, - "in file " + metadata_file_path, /* allow_multi_statements = */ false, 0, settings.max_parser_depth); + "in file " + metadata_file_path, /* allow_multi_statements = */ false, 0, settings.max_parser_depth, settings.max_parser_backtracks, true); if (!ast && throw_on_error) throw Exception::createDeprecated(error_message, ErrorCodes::SYNTAX_ERROR); @@ -765,12 +765,14 @@ ASTPtr DatabaseOnDisk::getCreateQueryFromStorage(const String & table_name, cons auto ast_storage = std::make_shared(); ast_storage->set(ast_storage->engine, ast_engine); - unsigned max_parser_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); - auto create_table_query = DB::getCreateQueryFromStorage(storage, - ast_storage, - false, - max_parser_depth, - throw_on_error); + const Settings & settings = getContext()->getSettingsRef(); + auto create_table_query = DB::getCreateQueryFromStorage( + storage, + ast_storage, + false, + static_cast(settings.max_parser_depth), + static_cast(settings.max_parser_backtracks), + throw_on_error); create_table_query->set(create_table_query->as()->comment, std::make_shared("SYSTEM TABLE is built on the fly.")); diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index bc552b9c927..161dd3d3f60 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -469,7 +469,7 @@ void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & ta statement.data() + statement.size(), "in file " + table_metadata_path, 0, - local_context->getSettingsRef().max_parser_depth); + local_context->getSettingsRef().max_parser_depth, local_context->getSettingsRef().max_parser_backtracks); applyMetadataChangesToCreateQuery(ast, metadata); diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 9cf19a251f7..3b6a712510d 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -812,7 +812,8 @@ static UUID getTableUUIDIfReplicated(const String & metadata, ContextPtr context ParserCreateQuery parser; auto size = context->getSettingsRef().max_query_size; auto depth = context->getSettingsRef().max_parser_depth; - ASTPtr query = parseQuery(parser, metadata, size, depth); + auto backtracks = context->getSettingsRef().max_parser_backtracks; + ASTPtr query = parseQuery(parser, metadata, size, depth, backtracks); const ASTCreateQuery & create = query->as(); if (!create.storage || !create.storage->engine) return UUIDHelpers::Nil; @@ -1234,7 +1235,7 @@ ASTPtr DatabaseReplicated::parseQueryFromMetadataInZooKeeper(const String & node { ParserCreateQuery parser; String description = "in ZooKeeper " + zookeeper_path + "/metadata/" + node_name; - auto ast = parseQuery(parser, query, description, 0, getContext()->getSettingsRef().max_parser_depth); + auto ast = parseQuery(parser, query, description, 0, getContext()->getSettingsRef().max_parser_depth, getContext()->getSettingsRef().max_parser_backtracks); auto & create = ast->as(); if (create.uuid == UUIDHelpers::Nil || create.getTable() != TABLE_WITH_UUID_NAME_PLACEHOLDER || create.database) @@ -1559,7 +1560,7 @@ DatabaseReplicated::getTablesForBackup(const FilterByNameFunction & filter, cons for (const auto & [table_name, metadata] : snapshot) { ParserCreateQuery parser; - auto create_table_query = parseQuery(parser, metadata, 0, getContext()->getSettingsRef().max_parser_depth); + auto create_table_query = parseQuery(parser, metadata, 0, getContext()->getSettingsRef().max_parser_depth, getContext()->getSettingsRef().max_parser_backtracks); auto & create = create_table_query->as(); create.attach = false; diff --git a/src/Databases/DatabaseS3.cpp b/src/Databases/DatabaseS3.cpp index d2ca5a05ea4..159a5242dbe 100644 --- a/src/Databases/DatabaseS3.cpp +++ b/src/Databases/DatabaseS3.cpp @@ -191,7 +191,7 @@ ASTPtr DatabaseS3::getCreateDatabaseQuery() const creation_args += fmt::format(", '{}', '{}'", config.access_key_id.value(), config.secret_access_key.value()); const String query = fmt::format("CREATE DATABASE {} ENGINE = S3({})", backQuoteIfNeed(getDatabaseName()), creation_args); - ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth); + ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "", 0, settings.max_parser_depth, settings.max_parser_backtracks); if (const auto database_comment = getDatabaseComment(); !database_comment.empty()) { diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 963cf0064df..f8d6ad69ba8 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -108,7 +108,8 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo } -ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_storage, bool only_ordinary, uint32_t max_parser_depth, bool throw_on_error) +ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_storage, bool only_ordinary, + uint32_t max_parser_depth, uint32_t max_parser_backtracks, bool throw_on_error) { auto table_id = storage->getStorageID(); auto metadata_ptr = storage->getInMemoryMetadataPtr(); @@ -148,7 +149,7 @@ ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_ Expected expected; expected.max_parsed_pos = string_end; Tokens tokens(type_name.c_str(), string_end); - IParser::Pos pos(tokens, max_parser_depth); + IParser::Pos pos(tokens, max_parser_depth, max_parser_backtracks); ParserDataType parser; if (!parser.parse(pos, ast_type, expected)) { diff --git a/src/Databases/DatabasesCommon.h b/src/Databases/DatabasesCommon.h index 4e9d967c11a..81a3c55a435 100644 --- a/src/Databases/DatabasesCommon.h +++ b/src/Databases/DatabasesCommon.h @@ -13,7 +13,8 @@ namespace DB { void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata); -ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_storage, bool only_ordinary, uint32_t max_parser_depth, bool throw_on_error); +ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr & ast_storage, bool only_ordinary, + uint32_t max_parser_depth, uint32_t max_parser_backtracks, bool throw_on_error); /// Cleans a CREATE QUERY from temporary flags like "IF NOT EXISTS", "OR REPLACE", "AS SELECT" (for non-views), etc. void cleanupObjectDefinitionFromTemporaryFlags(ASTCreateQuery & query); diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 96a5c3a18ce..d9b0f7f9ac7 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -174,12 +174,14 @@ ASTPtr DatabaseMySQL::getCreateTableQueryImpl(const String & table_name, Context ast_storage->settings = nullptr; } - unsigned max_parser_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); - auto create_table_query = DB::getCreateQueryFromStorage(storage, - table_storage_define, - true, - max_parser_depth, - throw_on_error); + const Settings & settings = getContext()->getSettingsRef(); + auto create_table_query = DB::getCreateQueryFromStorage( + storage, + table_storage_define, + true, + static_cast(settings.max_parser_depth), + static_cast(settings.max_parser_backtracks), + throw_on_error); return create_table_query; } diff --git a/src/Databases/MySQL/tryConvertStringLiterals.cpp b/src/Databases/MySQL/tryConvertStringLiterals.cpp index ab392b301e8..ac65d510f67 100644 --- a/src/Databases/MySQL/tryConvertStringLiterals.cpp +++ b/src/Databases/MySQL/tryConvertStringLiterals.cpp @@ -61,7 +61,7 @@ static bool tryReadCharset( bool tryConvertStringLiterals(String & query) { Tokens tokens(query.data(), query.data() + query.size()); - IParser::Pos pos(tokens, 0); + IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); Expected expected; String rewritten_query; rewritten_query.reserve(query.size()); diff --git a/src/Databases/MySQL/tryParseTableIDFromDDL.cpp b/src/Databases/MySQL/tryParseTableIDFromDDL.cpp index a01eb311450..4fe0f44c767 100644 --- a/src/Databases/MySQL/tryParseTableIDFromDDL.cpp +++ b/src/Databases/MySQL/tryParseTableIDFromDDL.cpp @@ -10,7 +10,7 @@ StorageID tryParseTableIDFromDDL(const String & query, const String & default_da { bool is_ddl = false; Tokens tokens(query.data(), query.data() + query.size()); - IParser::Pos pos(tokens, 0); + IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); Expected expected; if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected)) { diff --git a/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp index c5a366698e6..9ecc81c693f 100644 --- a/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp +++ b/src/Databases/MySQL/tryQuoteUnrecognizedTokens.cpp @@ -37,7 +37,7 @@ static void quoteLiteral( bool tryQuoteUnrecognizedTokens(String & query) { Tokens tokens(query.data(), query.data() + query.size()); - IParser::Pos pos(tokens, 0); + IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); Expected expected; String rewritten_query; const char * copy_from = query.data(); diff --git a/src/Databases/SQLite/DatabaseSQLite.cpp b/src/Databases/SQLite/DatabaseSQLite.cpp index b3d5288cdf7..b7a82fd9d0f 100644 --- a/src/Databases/SQLite/DatabaseSQLite.cpp +++ b/src/Databases/SQLite/DatabaseSQLite.cpp @@ -194,10 +194,10 @@ ASTPtr DatabaseSQLite::getCreateTableQueryImpl(const String & table_name, Contex /// Add table_name to engine arguments storage_engine_arguments->children.insert(storage_engine_arguments->children.begin() + 1, std::make_shared(table_id.table_name)); - unsigned max_parser_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); + const Settings & settings = getContext()->getSettingsRef(); + auto create_table_query = DB::getCreateQueryFromStorage(storage, table_storage_define, true, - max_parser_depth, - throw_on_error); + static_cast(settings.max_parser_depth), static_cast(settings.max_parser_backtracks), throw_on_error); return create_table_query; } diff --git a/src/Dictionaries/tests/gtest_dictionary_configuration.cpp b/src/Dictionaries/tests/gtest_dictionary_configuration.cpp index 989ce5c8f18..08aad663a8c 100644 --- a/src/Dictionaries/tests/gtest_dictionary_configuration.cpp +++ b/src/Dictionaries/tests/gtest_dictionary_configuration.cpp @@ -48,7 +48,7 @@ TEST(ConvertDictionaryAST, SimpleDictConfiguration) " COMMENT 'hello world!'"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, getContext().context); @@ -119,7 +119,7 @@ TEST(ConvertDictionaryAST, TrickyAttributes) " SOURCE(CLICKHOUSE(HOST 'localhost'))"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, getContext().context); @@ -164,7 +164,7 @@ TEST(ConvertDictionaryAST, ComplexKeyAndLayoutWithParams) " LIFETIME(MIN 1 MAX 10)"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, getContext().context); @@ -215,7 +215,7 @@ TEST(ConvertDictionaryAST, ComplexSource) " RANGE(MIN second_column MAX third_column)"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); DictionaryConfigurationPtr config = getDictionaryConfigurationFromAST(*create, getContext().context); /// source diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index cb574551d26..0bba7a1f424 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -1054,7 +1054,7 @@ namespace { if (depth > settings.max_parser_depth) throw Exception(ErrorCodes::TOO_DEEP_RECURSION, - "Maximum parse depth ({}) exceeded. Consider raising max_parser_depth setting.", settings.max_parser_depth); + "Maximum parse depth ({}) exceeded. Consider raising max_parser_depth setting.", settings.max_parser_depth); assertChar('{', buf); skipWhitespaceIfAny(buf); diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index 3efa40df9be..37db514fd1f 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -123,7 +123,7 @@ public: class Executor { public: - static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, const ContextPtr & context) + static ColumnPtr run(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, uint32_t parse_depth, uint32_t parse_backtracks, const ContextPtr & context) { MutableColumnPtr to{result_type->createColumn()}; to->reserve(input_rows_count); @@ -161,7 +161,7 @@ public: /// Tokenize the query Tokens tokens(query.data(), query.data() + query.size()); /// Max depth 0 indicates that depth is not limited - IParser::Pos token_iterator(tokens, parse_depth); + IParser::Pos token_iterator(tokens, parse_depth, parse_backtracks); /// Parse query and create AST tree Expected expected; @@ -232,16 +232,17 @@ public: /// 3. Parser(Tokens, ASTPtr) -> complete AST /// 4. Execute functions: call getNextItem on generator and handle each item unsigned parse_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); + unsigned parse_backtracks = static_cast(getContext()->getSettingsRef().max_parser_backtracks); #if USE_SIMDJSON if (getContext()->getSettingsRef().allow_simdjson) return FunctionSQLJSONHelpers::Executor< Name, Impl>, - SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, getContext()); + SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, parse_backtracks, getContext()); #endif return FunctionSQLJSONHelpers:: Executor>, DummyJSONParser>::run( - arguments, result_type, input_rows_count, parse_depth, getContext()); + arguments, result_type, input_rows_count, parse_depth, parse_backtracks, getContext()); } }; diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp index 3ec5393fa6f..b7c7e5847bd 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp @@ -128,7 +128,7 @@ restoreUserDefinedSQLObjects(RestorerFromBackup & restorer, const String & data_ statement_def.data() + statement_def.size(), "in file " + filepath + " from backup " + backup->getNameForLogging(), 0, - context->getSettingsRef().max_parser_depth); + context->getSettingsRef().max_parser_depth, context->getSettingsRef().max_parser_backtracks); break; } } diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp index 34946db7d9e..b083c540083 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp @@ -92,7 +92,8 @@ ASTPtr UserDefinedSQLObjectsDiskStorage::tryLoadObject(UserDefinedSQLObjectType object_create_query.data() + object_create_query.size(), "", 0, - global_context->getSettingsRef().max_parser_depth); + global_context->getSettingsRef().max_parser_depth, + global_context->getSettingsRef().max_parser_backtracks); return ast; } } diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp index c43b223ffeb..4ec34c15efc 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsZooKeeperStorage.cpp @@ -314,7 +314,8 @@ ASTPtr UserDefinedSQLObjectsZooKeeperStorage::parseObjectData(const String & obj object_data.data() + object_data.size(), "", 0, - global_context->getSettingsRef().max_parser_depth); + global_context->getSettingsRef().max_parser_depth, + global_context->getSettingsRef().max_parser_backtracks); return ast; } } diff --git a/src/Functions/formatQuery.cpp b/src/Functions/formatQuery.cpp index 92403d2e88e..d7addcc284e 100644 --- a/src/Functions/formatQuery.cpp +++ b/src/Functions/formatQuery.cpp @@ -17,6 +17,9 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; } +namespace +{ + enum class OutputFormatting { SingleLine, @@ -29,21 +32,16 @@ enum class ErrorHandling Null }; -template class FunctionFormatQuery : public IFunction { public: - static constexpr auto name = Name::name; - static FunctionPtr create(ContextPtr context) - { - const auto & settings = context->getSettings(); - return std::make_shared(settings.max_query_size, settings.max_parser_depth); - } - - FunctionFormatQuery(size_t max_query_size_, size_t max_parser_depth_) - : max_query_size(max_query_size_) - , max_parser_depth(max_parser_depth_) + FunctionFormatQuery(ContextPtr context, String name_, OutputFormatting output_formatting_, ErrorHandling error_handling_) + : name(name_), output_formatting(output_formatting_), error_handling(error_handling_) { + const Settings & settings = context->getSettings(); + max_query_size = settings.max_query_size; + max_parser_depth = settings.max_parser_depth; + max_parser_backtracks = settings.max_parser_backtracks; } String getName() const override { return name; } @@ -59,7 +57,7 @@ public: validateFunctionArgumentTypes(*this, arguments, args); DataTypePtr string_type = std::make_shared(); - if constexpr (error_handling == ErrorHandling::Null) + if (error_handling == ErrorHandling::Null) return std::make_shared(string_type); else return string_type; @@ -70,7 +68,7 @@ public: const ColumnPtr col_query = arguments[0].column; ColumnUInt8::MutablePtr col_null_map; - if constexpr (error_handling == ErrorHandling::Null) + if (error_handling == ErrorHandling::Null) col_null_map = ColumnUInt8::create(input_rows_count, 0); if (const ColumnString * col_query_string = checkAndGetColumn(col_query.get())) @@ -78,7 +76,7 @@ public: auto col_res = ColumnString::create(); formatVector(col_query_string->getChars(), col_query_string->getOffsets(), col_res->getChars(), col_res->getOffsets(), col_null_map); - if constexpr (error_handling == ErrorHandling::Null) + if (error_handling == ErrorHandling::Null) return ColumnNullable::create(std::move(col_res), std::move(col_null_map)); else return col_res; @@ -113,11 +111,11 @@ private: try { - ast = parseQuery(parser, begin, end, /*query_description*/ {}, max_query_size, max_parser_depth); + ast = parseQuery(parser, begin, end, /*query_description*/ {}, max_query_size, max_parser_depth, max_parser_backtracks); } catch (...) { - if constexpr (error_handling == ErrorHandling::Null) + if (error_handling == ErrorHandling::Null) { const size_t res_data_new_size = res_data_size + 1; if (res_data_new_size > res_data.size()) @@ -135,7 +133,6 @@ private: } else { - static_assert(error_handling == ErrorHandling::Exception); throw; } } @@ -160,92 +157,91 @@ private: res_data.resize(res_data_size); } - const size_t max_query_size; - const size_t max_parser_depth; + String name; + OutputFormatting output_formatting; + ErrorHandling error_handling; + + size_t max_query_size; + size_t max_parser_depth; + size_t max_parser_backtracks; }; -struct NameFormatQuery -{ - static constexpr auto name = "formatQuery"; -}; - -struct NameFormatQueryOrNull -{ - static constexpr auto name = "formatQueryOrNull"; -}; - -struct NameFormatQuerySingleLine -{ - static constexpr auto name = "formatQuerySingleLine"; -}; - -struct NameFormatQuerySingleLineOrNull -{ - static constexpr auto name = "formatQuerySingleLineOrNull"; -}; +} REGISTER_FUNCTION(formatQuery) { - factory.registerFunction>(FunctionDocumentation{ - .description = "Returns a formatted, possibly multi-line, version of the given SQL query. Throws in case of a parsing error.\n[example:multiline]", - .syntax = "formatQuery(query)", - .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, - .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", - .examples{ - {"multiline", - "SELECT formatQuery('select a, b FRom tab WHERE a > 3 and b < 3');", - "SELECT\n" - " a,\n" - " b\n" - "FROM tab\n" - "WHERE (a > 3) AND (b < 3)"}}, - .categories{"Other"}}); + factory.registerFunction( + "formatQuery", + [](ContextPtr context) { return std::make_shared(context, "formatQuery", OutputFormatting::MultiLine, ErrorHandling::Exception); }, + FunctionDocumentation{ + .description = "Returns a formatted, possibly multi-line, version of the given SQL query. Throws in case of a parsing error.\n[example:multiline]", + .syntax = "formatQuery(query)", + .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, + .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", + .examples{ + {"multiline", + "SELECT formatQuery('select a, b FRom tab WHERE a > 3 and b < 3');", + "SELECT\n" + " a,\n" + " b\n" + "FROM tab\n" + "WHERE (a > 3) AND (b < 3)"}}, + .categories{"Other"}}); } REGISTER_FUNCTION(formatQueryOrNull) { - factory.registerFunction>(FunctionDocumentation{ - .description = "Returns a formatted, possibly multi-line, version of the given SQL query. Returns NULL in case of a parsing error.\n[example:multiline]", - .syntax = "formatQueryOrNull(query)", - .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, - .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", - .examples{ - {"multiline", - "SELECT formatQuery('select a, b FRom tab WHERE a > 3 and b < 3');", - "SELECT\n" - " a,\n" - " b\n" - "FROM tab\n" - "WHERE (a > 3) AND (b < 3)"}}, - .categories{"Other"}}); + factory.registerFunction( + "formatQueryOrNull", + [](ContextPtr context) { return std::make_shared(context, "formatQueryOrNull", OutputFormatting::MultiLine, ErrorHandling::Null); }, + FunctionDocumentation{ + .description = "Returns a formatted, possibly multi-line, version of the given SQL query. Returns NULL in case of a parsing error.\n[example:multiline]", + .syntax = "formatQueryOrNull(query)", + .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, + .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", + .examples{ + {"multiline", + "SELECT formatQuery('select a, b FRom tab WHERE a > 3 and b < 3');", + "SELECT\n" + " a,\n" + " b\n" + "FROM tab\n" + "WHERE (a > 3) AND (b < 3)"}}, + .categories{"Other"}}); } REGISTER_FUNCTION(formatQuerySingleLine) { - factory.registerFunction>(FunctionDocumentation{ - .description = "Like formatQuery() but the returned formatted string contains no line breaks. Throws in case of a parsing error.\n[example:multiline]", - .syntax = "formatQuerySingleLine(query)", - .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, - .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", - .examples{ - {"multiline", - "SELECT formatQuerySingleLine('select a, b FRom tab WHERE a > 3 and b < 3');", - "SELECT a, b FROM tab WHERE (a > 3) AND (b < 3)"}}, - .categories{"Other"}}); + factory.registerFunction( + "formatQuerySingleLine", + [](ContextPtr context) { return std::make_shared(context, "formatQuerySingleLine", OutputFormatting::SingleLine, ErrorHandling::Exception); }, + FunctionDocumentation{ + .description = "Like formatQuery() but the returned formatted string contains no line breaks. Throws in case of a parsing error.\n[example:multiline]", + .syntax = "formatQuerySingleLine(query)", + .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, + .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", + .examples{ + {"multiline", + "SELECT formatQuerySingleLine('select a, b FRom tab WHERE a > 3 and b < 3');", + "SELECT a, b FROM tab WHERE (a > 3) AND (b < 3)"}}, + .categories{"Other"}}); } REGISTER_FUNCTION(formatQuerySingleLineOrNull) { - factory.registerFunction>(FunctionDocumentation{ - .description = "Like formatQuery() but the returned formatted string contains no line breaks. Returns NULL in case of a parsing error.\n[example:multiline]", - .syntax = "formatQuerySingleLineOrNull(query)", - .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, - .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", - .examples{ - {"multiline", - "SELECT formatQuerySingleLine('select a, b FRom tab WHERE a > 3 and b < 3');", - "SELECT a, b FROM tab WHERE (a > 3) AND (b < 3)"}}, - .categories{"Other"}}); + factory.registerFunction( + "formatQuerySingleLineOrNull", + [](ContextPtr context) { return std::make_shared(context, "formatQuerySingleLineOrNull", OutputFormatting::SingleLine, ErrorHandling::Null); }, + FunctionDocumentation{ + .description = "Like formatQuery() but the returned formatted string contains no line breaks. Returns NULL in case of a parsing error.\n[example:multiline]", + .syntax = "formatQuerySingleLineOrNull(query)", + .arguments = {{"query", "The SQL query to be formatted. [String](../../sql-reference/data-types/string.md)"}}, + .returned_value = "The formatted query. [String](../../sql-reference/data-types/string.md).", + .examples{ + {"multiline", + "SELECT formatQuerySingleLine('select a, b FRom tab WHERE a > 3 and b < 3');", + "SELECT a, b FROM tab WHERE (a > 3) AND (b < 3)"}}, + .categories{"Other"}}); } } diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index a55588baeaa..1147d74c146 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -206,7 +206,7 @@ namespace if (!filter.empty()) { ParserExpression parser; - ASTPtr expr = parseQuery(parser, filter, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr expr = parseQuery(parser, filter, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); query->filters.emplace_back(type, std::move(expr)); } } diff --git a/src/Interpreters/AsynchronousMetricLog.cpp b/src/Interpreters/AsynchronousMetricLog.cpp index 5cf7f951eec..dc67bd91550 100644 --- a/src/Interpreters/AsynchronousMetricLog.cpp +++ b/src/Interpreters/AsynchronousMetricLog.cpp @@ -21,31 +21,31 @@ ColumnsDescription AsynchronousMetricLogElement::getColumnsDescription() { "hostname", std::make_shared(std::make_shared()), - parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), "Hostname of the server executing the query." }, { "event_date", std::make_shared(), - parseQuery(codec_parser, "(Delta(2), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + parseQuery(codec_parser, "(Delta(2), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), "Event date." }, { "event_time", std::make_shared(), - parseQuery(codec_parser, "(Delta(4), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + parseQuery(codec_parser, "(Delta(4), ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), "Event time." }, { "metric", std::make_shared(std::make_shared()), - parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), "Metric name." }, { "value", std::make_shared(), - parseQuery(codec_parser, "(ZSTD(3))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH), + parseQuery(codec_parser, "(ZSTD(3))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS), "Metric value." } }; diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index fe2baea6b4e..e10f3ecfbc9 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -154,7 +154,8 @@ void DDLLogEntry::parse(const String & data) rb >> "settings: " >> settings_str >> "\n"; ParserSetQuery parser{true}; constexpr UInt64 max_depth = 16; - ASTPtr settings_ast = parseQuery(parser, settings_str, Context::getGlobalContextInstance()->getSettingsRef().max_query_size, max_depth); + constexpr UInt64 max_backtracks = DBMS_DEFAULT_MAX_PARSER_BACKTRACKS; + ASTPtr settings_ast = parseQuery(parser, settings_str, Context::getGlobalContextInstance()->getSettingsRef().max_query_size, max_depth, max_backtracks); settings.emplace(std::move(settings_ast->as()->changes)); } } @@ -197,7 +198,7 @@ void DDLTaskBase::parseQueryFromEntry(ContextPtr context) ParserQuery parser_query(end, settings.allow_settings_after_format_in_insert); String description; - query = parseQuery(parser_query, begin, end, description, 0, settings.max_parser_depth); + query = parseQuery(parser_query, begin, end, description, 0, settings.max_parser_depth, settings.max_parser_backtracks); } void DDLTaskBase::formatRewrittenQuery(ContextPtr context) diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp index 60110916760..fed29b410db 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.cpp @@ -96,7 +96,7 @@ static ASTPtr parseAdditionalPostFilter(const Context & context) ParserExpression parser; return parseQuery( parser, filter.data(), filter.data() + filter.size(), - "additional filter", settings.max_query_size, settings.max_parser_depth); + "additional filter", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); } static ActionsDAGPtr makeAdditionalPostFilter(ASTPtr & ast, ContextPtr context, const Block & header) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index edd7452c130..2a08e8458a4 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -381,7 +381,7 @@ ASTPtr InterpreterCreateQuery::formatColumns(const NamesAndTypesList & columns) String type_name = column.type->getName(); const char * pos = type_name.data(); const char * end = pos + type_name.size(); - column_declaration->type = parseQuery(type_parser, pos, end, "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + column_declaration->type = parseQuery(type_parser, pos, end, "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); columns_list->children.emplace_back(column_declaration); } @@ -401,7 +401,7 @@ ASTPtr InterpreterCreateQuery::formatColumns(const NamesAndTypesList & columns, String type_name = alias_column.type->getName(); const char * type_pos = type_name.data(); const char * type_end = type_pos + type_name.size(); - column_declaration->type = parseQuery(type_parser, type_pos, type_end, "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + column_declaration->type = parseQuery(type_parser, type_pos, type_end, "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); column_declaration->default_specifier = "ALIAS"; @@ -409,7 +409,7 @@ ASTPtr InterpreterCreateQuery::formatColumns(const NamesAndTypesList & columns, const char * alias_pos = alias.data(); const char * alias_end = alias_pos + alias.size(); ParserExpression expression_parser; - column_declaration->default_expression = parseQuery(expression_parser, alias_pos, alias_end, "expression", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + column_declaration->default_expression = parseQuery(expression_parser, alias_pos, alias_end, "expression", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); column_declaration->children.push_back(column_declaration->default_expression); columns_list->children.emplace_back(column_declaration); @@ -433,7 +433,7 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns) String type_name = column.type->getName(); const char * type_name_pos = type_name.data(); const char * type_name_end = type_name_pos + type_name.size(); - column_declaration->type = parseQuery(type_parser, type_name_pos, type_name_end, "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + column_declaration->type = parseQuery(type_parser, type_name_pos, type_name_end, "data type", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); if (column.default_desc.expression) { @@ -1852,10 +1852,12 @@ void InterpreterCreateQuery::addColumnsDescriptionToCreateQueryIfNecessary(ASTCr auto ast_storage = std::make_shared(); unsigned max_parser_depth = static_cast(getContext()->getSettingsRef().max_parser_depth); + unsigned max_parser_backtracks = static_cast(getContext()->getSettingsRef().max_parser_backtracks); auto query_from_storage = DB::getCreateQueryFromStorage(storage, ast_storage, false, max_parser_depth, + max_parser_backtracks, true); auto & create_query_from_storage = query_from_storage->as(); diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 97ae9649ae8..8fb0dabb5b5 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -97,7 +97,8 @@ BlockIO InterpreterDeleteQuery::execute() alter_query.data() + alter_query.size(), "ALTER query", 0, - DBMS_DEFAULT_MAX_PARSER_DEPTH); + DBMS_DEFAULT_MAX_PARSER_DEPTH, + DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); auto context = Context::createCopy(getContext()); context->setSetting("mutations_sync", 2); /// Lightweight delete is always synchronous diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 86196270ed1..26dae6a1df3 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -281,7 +281,7 @@ BlockIO InterpreterKillQueryQuery::execute() const auto with_round_bracket = alter_command.front() == '('; ParserAlterCommand parser{with_round_bracket}; auto command_ast - = parseQuery(parser, alter_command, 0, getContext()->getSettingsRef().max_parser_depth); + = parseQuery(parser, alter_command, 0, getContext()->getSettingsRef().max_parser_depth, getContext()->getSettingsRef().max_parser_backtracks); required_access_rights = InterpreterAlterQuery::getRequiredAccessForCommand( command_ast->as(), table_id.database_name, table_id.table_name); if (!access->isGranted(required_access_rights)) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index a314492c5b0..07f4e94680c 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -160,7 +160,7 @@ FilterDAGInfoPtr generateFilterActions( { ParserExpression expr_parser; /// We should add back quotes around column name as it can contain dots. - expr_list->children.push_back(parseQuery(expr_parser, backQuoteIfNeed(column_str), 0, context->getSettingsRef().max_parser_depth)); + expr_list->children.push_back(parseQuery(expr_parser, backQuoteIfNeed(column_str), 0, context->getSettingsRef().max_parser_depth, context->getSettingsRef().max_parser_backtracks)); } select_ast->setExpression(ASTSelectQuery::Expression::TABLES, std::make_shared()); @@ -331,7 +331,7 @@ ASTPtr parseAdditionalFilterConditionForTable( const auto & settings = context.getSettingsRef(); return parseQuery( parser, filter.data(), filter.data() + filter.size(), - "additional filter", settings.max_query_size, settings.max_parser_depth); + "additional filter", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); } } diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 6251a9604e1..5c4ae528fc1 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -43,7 +43,7 @@ ASTPtr makeSubqueryTemplate(const String & table_alias) String query_template = "(select * from _t)"; if (!table_alias.empty()) query_template += " as " + table_alias; - ASTPtr subquery_template = parseQuery(parser, query_template, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr subquery_template = parseQuery(parser, query_template, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); if (!subquery_template) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parse subquery template"); return subquery_template; diff --git a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp index 9f6e9b930fd..6d6077a0295 100644 --- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp +++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp @@ -19,7 +19,7 @@ using namespace DB; static inline ASTPtr tryRewrittenCreateQuery(const String & query, ContextPtr context) { ParserExternalDDLQuery external_ddl_parser; - ASTPtr ast = parseQuery(external_ddl_parser, "EXTERNAL DDL FROM MySQL(test_database, test_database) " + query, 0, 0); + ASTPtr ast = parseQuery(external_ddl_parser, "EXTERNAL DDL FROM MySQL(test_database, test_database) " + query, 0, 0, 0); return MySQLInterpreter::InterpreterCreateImpl::getRewrittenQueries( *ast->as()->external_ddl->as(), diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index a74b5c67726..e4cbbb8f5f7 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -216,7 +216,7 @@ std::shared_ptr createSystemLog( /// Validate engine definition syntax to prevent some configuration errors. ParserStorageWithComment storage_parser; auto storage_ast = parseQuery(storage_parser, log_settings.engine.data(), log_settings.engine.data() + log_settings.engine.size(), - "Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + "Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); auto & storage_with_comment = storage_ast->as(); /// Add comment to AST. So it will be saved when the table will be renamed. @@ -647,7 +647,7 @@ ASTPtr SystemLog::getCreateTableQuery() ASTPtr storage_with_comment_ast = parseQuery( storage_parser, storage_def.data(), storage_def.data() + storage_def.size(), - "Storage to create table for " + LogElement::name(), 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + "Storage to create table for " + LogElement::name(), 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); StorageWithComment & storage_with_comment = storage_with_comment_ast->as(); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 88021038ebb..7dd46534fdf 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -746,18 +746,18 @@ static std::tuple executeQueryImpl( { ParserKQLStatement parser(end, settings.allow_settings_after_format_in_insert); /// TODO: parser should fail early when max_query_size limit is reached. - ast = parseKQLQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + ast = parseKQLQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); } else if (settings.dialect == Dialect::prql && !internal) { - ParserPRQLQuery parser(max_query_size, settings.max_parser_depth); - ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + ParserPRQLQuery parser(max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); } else { ParserQuery parser(end, settings.allow_settings_after_format_in_insert); /// TODO: parser should fail early when max_query_size limit is reached. - ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth); + ast = parseQuery(parser, begin, end, "", max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); #ifndef NDEBUG /// Verify that AST formatting is consistent: @@ -774,7 +774,7 @@ static std::tuple executeQueryImpl( ast2 = parseQuery(parser, formatted1.data(), formatted1.data() + formatted1.size(), - "", new_max_query_size, settings.max_parser_depth); + "", new_max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); } catch (const Exception & e) { diff --git a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp index 1295a4d5a75..d78b6ab0c4d 100644 --- a/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp +++ b/src/Interpreters/getCustomKeyFilterForParallelReplicas.cpp @@ -122,7 +122,7 @@ ASTPtr parseCustomKeyForTable(const String & custom_key, const Context & context const auto & settings = context.getSettingsRef(); return parseQuery( parser, custom_key.data(), custom_key.data() + custom_key.size(), - "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth); + "parallel replicas custom key", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); } } diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 2723eb37350..226472175b3 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -55,9 +55,11 @@ static void executeCreateQuery( bool create, bool has_force_restore_data_flag) { + const Settings & settings = context->getSettingsRef(); ParserCreateQuery parser; ASTPtr ast = parseQuery( - parser, query.data(), query.data() + query.size(), "in file " + file_name, 0, context->getSettingsRef().max_parser_depth); + parser, query.data(), query.data() + query.size(), "in file " + file_name, + 0, settings.max_parser_depth, settings.max_parser_backtracks); auto & ast_create_query = ast->as(); ast_create_query.setDatabase(database); diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index 78b72022a9a..30a41c090d5 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -115,7 +115,7 @@ ColumnsDescription parseColumnsListFromString(const std::string & structure, con ParserColumnDeclarationList parser(true, true); const Settings & settings = context->getSettingsRef(); - ASTPtr columns_list_raw = parseQuery(parser, structure, "columns declaration list", settings.max_query_size, settings.max_parser_depth); + ASTPtr columns_list_raw = parseQuery(parser, structure, "columns declaration list", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); auto * columns_list = dynamic_cast(columns_list_raw.get()); if (!columns_list) @@ -136,7 +136,7 @@ bool tryParseColumnsListFromString(const std::string & structure, ColumnsDescrip const char * start = structure.data(); const char * end = structure.data() + structure.size(); ASTPtr columns_list_raw = tryParseQuery( - parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth); + parser, start, end, error, false, "columns declaration list", false, settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks, true); if (!columns_list_raw) return false; diff --git a/src/Interpreters/tests/gtest_comparison_graph.cpp b/src/Interpreters/tests/gtest_comparison_graph.cpp index 96a78241c8e..ac24a8de368 100644 --- a/src/Interpreters/tests/gtest_comparison_graph.cpp +++ b/src/Interpreters/tests/gtest_comparison_graph.cpp @@ -12,7 +12,7 @@ using namespace DB; static ComparisonGraph getGraph(const String & query) { ParserExpressionList parser(false); - ASTPtr ast = parseQuery(parser, query, 0, 0); + ASTPtr ast = parseQuery(parser, query, 0, 0, 0); return ComparisonGraph(ast->children); } diff --git a/src/Interpreters/tests/gtest_cycle_aliases.cpp b/src/Interpreters/tests/gtest_cycle_aliases.cpp index 2bdeac90f8f..5ff3fbe1c2d 100644 --- a/src/Interpreters/tests/gtest_cycle_aliases.cpp +++ b/src/Interpreters/tests/gtest_cycle_aliases.cpp @@ -14,10 +14,10 @@ TEST(QueryNormalizer, SimpleLoopAlias) { String query = "a as a"; ParserExpressionList parser(false); - ASTPtr ast = parseQuery(parser, query, 0, 0); + ASTPtr ast = parseQuery(parser, query, 0, 0, 0); Aliases aliases; - aliases["a"] = parseQuery(parser, "a as a", 0, 0)->children[0]; + aliases["a"] = parseQuery(parser, "a as a", 0, 0, 0)->children[0]; Settings settings; QueryNormalizer::Data normalizer_data(aliases, {}, false, settings, false); @@ -28,11 +28,11 @@ TEST(QueryNormalizer, SimpleCycleAlias) { String query = "a as b, b as a"; ParserExpressionList parser(false); - ASTPtr ast = parseQuery(parser, query, 0, 0); + ASTPtr ast = parseQuery(parser, query, 0, 0, 0); Aliases aliases; - aliases["a"] = parseQuery(parser, "b as a", 0, 0)->children[0]; - aliases["b"] = parseQuery(parser, "a as b", 0, 0)->children[0]; + aliases["a"] = parseQuery(parser, "b as a", 0, 0, 0)->children[0]; + aliases["b"] = parseQuery(parser, "a as b", 0, 0, 0)->children[0]; Settings settings; QueryNormalizer::Data normalizer_data(aliases, {}, false, settings, true); diff --git a/src/Interpreters/tests/gtest_table_overrides.cpp b/src/Interpreters/tests/gtest_table_overrides.cpp index 779bc7a53a4..09aa2e1f37f 100644 --- a/src/Interpreters/tests/gtest_table_overrides.cpp +++ b/src/Interpreters/tests/gtest_table_overrides.cpp @@ -34,11 +34,11 @@ TEST_P(TableOverrideTest, applyOverrides) const auto & [database_query, table_query, expected_query] = GetParam(); ParserCreateQuery parser; ASTPtr database_ast; - ASSERT_NO_THROW(database_ast = parseQuery(parser, database_query, 0, 0)); + ASSERT_NO_THROW(database_ast = parseQuery(parser, database_query, 0, 0, 0)); auto * database = database_ast->as(); ASSERT_NE(nullptr, database); ASTPtr table_ast; - ASSERT_NO_THROW(table_ast = parseQuery(parser, table_query, 0, 0)); + ASSERT_NO_THROW(table_ast = parseQuery(parser, table_query, 0, 0, 0)); auto * table = table_ast->as(); ASSERT_NE(nullptr, table); auto table_name = table->table->as()->name(); diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 6d267a7d215..1e7d0158878 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -1918,7 +1918,7 @@ public: && string_literal->as().value.tryGet(literal)) { Tokens tokens(literal.data(), literal.data() + literal.size()); - IParser::Pos token_pos(tokens, 0); + IParser::Pos token_pos(tokens, pos.max_depth, pos.max_backtracks); Expected token_expected; ASTPtr expr; diff --git a/src/Parsers/IParser.cpp b/src/Parsers/IParser.cpp new file mode 100644 index 00000000000..d1e9ace89b6 --- /dev/null +++ b/src/Parsers/IParser.cpp @@ -0,0 +1,33 @@ +#include +#include + +namespace DB +{ + +IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs) +{ + depth = rhs.depth; + max_depth = rhs.max_depth; + + if (rhs.backtracks > backtracks) + backtracks = rhs.backtracks; + + max_backtracks = rhs.max_backtracks; + + if (rhs < *this) + { + ++backtracks; + if (max_backtracks && backtracks > max_backtracks) + throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Maximum amount of backtracking ({}) exceeded in the parser. " + "Consider rising max_parser_backtracks parameter.", max_backtracks); + } + + TokenIterator::operator=(rhs); + + if (backtracks % 1000 == 0) + std::cerr << backtracks << "\n"; + + return *this; +} + +} diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 198ec0346ff..291f8ee7d44 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -62,11 +62,18 @@ public: uint32_t depth = 0; uint32_t max_depth = 0; - Pos(Tokens & tokens_, uint32_t max_depth_) : TokenIterator(tokens_), max_depth(max_depth_) + uint32_t backtracks = 0; + uint32_t max_backtracks = 0; + + Pos(Tokens & tokens_, uint32_t max_depth_, uint32_t max_backtracks_) + : TokenIterator(tokens_), max_depth(max_depth_), max_backtracks(max_backtracks_) { } - Pos(TokenIterator token_iterator_, uint32_t max_depth_) : TokenIterator(token_iterator_), max_depth(max_depth_) { } + Pos(TokenIterator token_iterator_, uint32_t max_depth_, uint32_t max_backtracks_) + : TokenIterator(token_iterator_), max_depth(max_depth_), max_backtracks(max_backtracks_) + { + } ALWAYS_INLINE void increaseDepth() { @@ -97,6 +104,10 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error in parser: incorrect calculation of parse depth"); --depth; } + + Pos(const Pos & rhs) = default; + + Pos & operator=(const Pos & rhs); }; /** Get the text of this parser parses. */ diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp index 152c29e5941..1d77007a37c 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.cpp @@ -279,13 +279,13 @@ String IParserKQLFunction::getKQLFunctionName(IParser::Pos & pos) } String IParserKQLFunction::kqlCallToExpression( - const std::string_view function_name, const std::initializer_list params, const uint32_t max_depth) + const std::string_view function_name, const std::initializer_list params, uint32_t max_depth, uint32_t max_backtracks) { - return kqlCallToExpression(function_name, std::span(params), max_depth); + return kqlCallToExpression(function_name, std::span(params), max_depth, max_backtracks); } String IParserKQLFunction::kqlCallToExpression( - const std::string_view function_name, const std::span params, const uint32_t max_depth) + const std::string_view function_name, const std::span params, uint32_t max_depth, uint32_t max_backtracks) { const auto params_str = std::accumulate( std::cbegin(params), @@ -302,7 +302,7 @@ String IParserKQLFunction::kqlCallToExpression( const auto kql_call = std::format("{}({})", function_name, params_str); DB::Tokens call_tokens(kql_call.c_str(), kql_call.c_str() + kql_call.length()); - DB::IParser::Pos tokens_pos(call_tokens, max_depth); + DB::IParser::Pos tokens_pos(call_tokens, max_depth, max_backtracks); return DB::IParserKQLFunction::getExpression(tokens_pos); } diff --git a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h index 147436551f9..f5069e80745 100644 --- a/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h +++ b/src/Parsers/Kusto/KustoFunctions/IParserKQLFunction.h @@ -77,8 +77,8 @@ public: static std::optional getOptionalArgument(const String & function_name, DB::IParser::Pos & pos, ArgumentState argument_state = ArgumentState::Parsed); static String - kqlCallToExpression(std::string_view function_name, std::initializer_list params, uint32_t max_depth); - static String kqlCallToExpression(std::string_view function_name, std::span params, uint32_t max_depth); + kqlCallToExpression(std::string_view function_name, std::initializer_list params, uint32_t max_depth, uint32_t max_backtracks); + static String kqlCallToExpression(std::string_view function_name, std::span params, uint32_t max_depth, uint32_t max_backtracks); static String escapeSingleQuotes(const String & input); protected: diff --git a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp index b0eec16f56f..87841e295ba 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLCastingFunctions.cpp @@ -99,7 +99,7 @@ bool ToTimeSpan::convertImpl(String & out, IParser::Pos & pos) ++pos; try { - auto result = kqlCallToExpression("time", {arg}, pos.max_depth); + auto result = kqlCallToExpression("time", {arg}, pos.max_depth, pos.max_backtracks); out = std::format("{}", result); } catch (...) diff --git a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp index 924ac9f6490..e90be363e4b 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLDynamicFunctions.cpp @@ -99,7 +99,7 @@ bool ArrayRotateRight::convertImpl(String & out, IParser::Pos & pos) const auto array = getArgument(function_name, pos, ArgumentState::Raw); const auto count = getArgument(function_name, pos, ArgumentState::Raw); - out = kqlCallToExpression("array_rotate_left", {array, "-1 * " + count}, pos.max_depth); + out = kqlCallToExpression("array_rotate_left", {array, "-1 * " + count}, pos.max_depth, pos.max_backtracks); return true; } @@ -140,7 +140,7 @@ bool ArrayShiftRight::convertImpl(String & out, IParser::Pos & pos) "array_shift_left", fill ? std::initializer_list{array, negated_count, *fill} : std::initializer_list{array, negated_count}, - pos.max_depth); + pos.max_depth, pos.max_backtracks); return true; } @@ -233,8 +233,8 @@ bool JaccardIndex::convertImpl(String & out, IParser::Pos & pos) const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); out = std::format( "divide(length({0}), length({1}))", - kqlCallToExpression("set_intersect", {lhs, rhs}, pos.max_depth), - kqlCallToExpression("set_union", {lhs, rhs}, pos.max_depth)); + kqlCallToExpression("set_intersect", {lhs, rhs}, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("set_union", {lhs, rhs}, pos.max_depth, pos.max_backtracks)); return true; } @@ -292,7 +292,7 @@ bool SetDifference::convertImpl(String & out, IParser::Pos & pos) while (auto next_array = getOptionalArgument(function_name, pos, ArgumentState::Raw)) arrays.push_back(*next_array); - return kqlCallToExpression("set_union", std::vector(arrays.cbegin(), arrays.cend()), pos.max_depth); + return kqlCallToExpression("set_union", std::vector(arrays.cbegin(), arrays.cend()), pos.max_depth, pos.max_backtracks); }); out = std::format("arrayFilter(x -> not has({1}, x), arrayDistinct({0}))", lhs, rhs); diff --git a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp index 6f853b16fbc..06566dc54ec 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLIPFunctions.cpp @@ -34,10 +34,10 @@ bool Ipv4Compare::convertImpl(String & out, IParser::Pos & pos) "sign(IPv4StringToNumOrNull(toString((tupleElement(IPv4CIDRToRange(assumeNotNull(lhs_ip_{5}), " "toUInt8(min2({4}, min2(assumeNotNull(lhs_mask_{5}), assumeNotNull(rhs_mask_{5})))) as mask_{5}), 1))))" " - IPv4StringToNumOrNull(toString((tupleElement(IPv4CIDRToRange(assumeNotNull(rhs_ip_{5}), mask_{5}), 1))))))", - kqlCallToExpression("parse_ipv4", {lhs}, pos.max_depth), - kqlCallToExpression("ipv4_netmask_suffix", {lhs}, pos.max_depth), - kqlCallToExpression("parse_ipv4", {rhs}, pos.max_depth), - kqlCallToExpression("ipv4_netmask_suffix", {rhs}, pos.max_depth), + kqlCallToExpression("parse_ipv4", {lhs}, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("ipv4_netmask_suffix", {lhs}, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("parse_ipv4", {rhs}, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("ipv4_netmask_suffix", {rhs}, pos.max_depth, pos.max_backtracks), mask ? *mask : "32", generateUniqueIdentifier()); return true; @@ -56,8 +56,8 @@ bool Ipv4IsInRange::convertImpl(String & out, IParser::Pos & pos) "or isNull({1} as range_start_ip_{3}) or isNull({2} as range_mask_{3}), null, " "bitXor(range_start_ip_{3}, bitAnd(ip_{3}, bitNot(toUInt32(intExp2(toInt32(32 - range_mask_{3})) - 1)))) = 0) ", ip_address, - kqlCallToExpression("parse_ipv4", {ip_range}, pos.max_depth), - kqlCallToExpression("ipv4_netmask_suffix", {ip_range}, pos.max_depth), + kqlCallToExpression("parse_ipv4", {ip_range}, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("ipv4_netmask_suffix", {ip_range}, pos.max_depth, pos.max_backtracks), generateUniqueIdentifier()); return true; } @@ -71,7 +71,7 @@ bool Ipv4IsMatch::convertImpl(String & out, IParser::Pos & pos) const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Raw); - out = std::format("equals({}, 0)", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask ? *mask : "32"}, pos.max_depth)); + out = std::format("equals({}, 0)", kqlCallToExpression("ipv4_compare", {lhs, rhs, mask ? *mask : "32"}, pos.max_depth, pos.max_backtracks)); return true; } @@ -196,7 +196,7 @@ bool Ipv6IsMatch::convertImpl(String & out, IParser::Pos & pos) const auto lhs = getArgument(function_name, pos, ArgumentState::Raw); const auto rhs = getArgument(function_name, pos, ArgumentState::Raw); const auto mask = getOptionalArgument(function_name, pos, ArgumentState::Raw); - out = std::format("equals({}, 0)", kqlCallToExpression("ipv6_compare", {lhs, rhs, mask ? *mask : "128"}, pos.max_depth)); + out = std::format("equals({}, 0)", kqlCallToExpression("ipv6_compare", {lhs, rhs, mask ? *mask : "128"}, pos.max_depth, pos.max_backtracks)); return true; } @@ -228,9 +228,9 @@ bool ParseIpv6Mask::convertImpl(String & out, IParser::Pos & pos) const auto unique_identifier = generateUniqueIdentifier(); out = std::format( "if(empty({0} as ipv4_{3}), {1}, {2})", - kqlCallToExpression("format_ipv4", {"trim_start('::', " + ip_address + ")", mask + " - 96"}, pos.max_depth), - kqlCallToExpression("parse_ipv6", {"strcat(tostring(parse_ipv6(" + ip_address + ")), '/', tostring(" + mask + "))"}, pos.max_depth), - kqlCallToExpression("parse_ipv6", {"ipv4_" + unique_identifier}, pos.max_depth), + kqlCallToExpression("format_ipv4", {"trim_start('::', " + ip_address + ")", mask + " - 96"}, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("parse_ipv6", {"strcat(tostring(parse_ipv6(" + ip_address + ")), '/', tostring(" + mask + "))"}, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("parse_ipv6", {"ipv4_" + unique_identifier}, pos.max_depth, pos.max_backtracks), unique_identifier); return true; } @@ -247,9 +247,9 @@ bool FormatIpv4::convertImpl(String & out, IParser::Pos & pos) "ifNull(if(isNotNull(toUInt32OrNull(toString({0})) as param_as_uint32_{3}) and toTypeName({0}) = 'String' or ({1}) < 0 " "or isNull(ifNull(param_as_uint32_{3}, {2}) as ip_as_number_{3}), null, " "IPv4NumToString(bitAnd(ip_as_number_{3}, bitNot(toUInt32(intExp2(toInt32(32 - ({1}))) - 1))))), '')", - ParserKQLBase::getExprFromToken(ip_address, pos.max_depth), + ParserKQLBase::getExprFromToken(ip_address, pos.max_depth, pos.max_backtracks), mask ? *mask : "32", - kqlCallToExpression("parse_ipv4", {"tostring(" + ip_address + ")"}, pos.max_depth), + kqlCallToExpression("parse_ipv4", {"tostring(" + ip_address + ")"}, pos.max_depth, pos.max_backtracks), generateUniqueIdentifier()); return true; } @@ -266,10 +266,10 @@ bool FormatIpv4Mask::convertImpl(String & out, IParser::Pos & pos) out = std::format( "if(empty({1} as formatted_ip_{2}) or position(toTypeName({0}), 'Int') = 0 or not {0} between 0 and 32, '', " "concat(formatted_ip_{2}, '/', toString(toInt64(min2({0}, ifNull({3} as suffix_{2}, 32))))))", - ParserKQLBase::getExprFromToken(calculated_mask, pos.max_depth), - kqlCallToExpression("format_ipv4", {ip_address, calculated_mask}, pos.max_depth), + ParserKQLBase::getExprFromToken(calculated_mask, pos.max_depth, pos.max_backtracks), + kqlCallToExpression("format_ipv4", {ip_address, calculated_mask}, pos.max_depth, pos.max_backtracks), generateUniqueIdentifier(), - kqlCallToExpression("ipv4_netmask_suffix", {"tostring(" + ip_address + ")"}, pos.max_depth)); + kqlCallToExpression("ipv4_netmask_suffix", {"tostring(" + ip_address + ")"}, pos.max_depth, pos.max_backtracks)); return true; } } diff --git a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp index 82cfa68b180..18c986c2191 100644 --- a/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp +++ b/src/Parsers/Kusto/KustoFunctions/KQLStringFunctions.cpp @@ -442,7 +442,7 @@ bool ParseJSON::convertImpl(String & out, IParser::Pos & pos) { --pos; auto arg = getArgument(fn_name, pos); - auto result = kqlCallToExpression("dynamic", {arg}, pos.max_depth); + auto result = kqlCallToExpression("dynamic", {arg}, pos.max_depth, pos.max_backtracks); out = std::format("{}", result); } else @@ -729,7 +729,7 @@ bool Trim::convertImpl(String & out, IParser::Pos & pos) const auto regex = getArgument(fn_name, pos, ArgumentState::Raw); const auto source = getArgument(fn_name, pos, ArgumentState::Raw); - out = kqlCallToExpression("trim_start", {regex, std::format("trim_end({0}, {1})", regex, source)}, pos.max_depth); + out = kqlCallToExpression("trim_start", {regex, std::format("trim_end({0}, {1})", regex, source)}, pos.max_depth, pos.max_backtracks); return true; } diff --git a/src/Parsers/Kusto/ParserKQLDistinct.cpp b/src/Parsers/Kusto/ParserKQLDistinct.cpp index 2de4d2c28e7..3ec823a61b5 100644 --- a/src/Parsers/Kusto/ParserKQLDistinct.cpp +++ b/src/Parsers/Kusto/ParserKQLDistinct.cpp @@ -12,7 +12,7 @@ bool ParserKQLDistinct::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) expr = getExprFromToken(pos); Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLExtend.cpp b/src/Parsers/Kusto/ParserKQLExtend.cpp index b37618f69fd..41ce296bd25 100644 --- a/src/Parsers/Kusto/ParserKQLExtend.cpp +++ b/src/Parsers/Kusto/ParserKQLExtend.cpp @@ -23,7 +23,7 @@ bool ParserKQLExtend ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) String except_str; String new_extend_str; Tokens ntokens(extend_expr.c_str(), extend_expr.c_str() + extend_expr.size()); - IParser::Pos npos(ntokens, pos.max_depth); + IParser::Pos npos(ntokens, pos.max_depth, pos.max_backtracks); String alias; @@ -77,7 +77,7 @@ bool ParserKQLExtend ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) String expr = std::format("SELECT * {}, {} from prev", except_str, new_extend_str); Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserSelectQuery().parse(new_pos, select_query, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLFilter.cpp b/src/Parsers/Kusto/ParserKQLFilter.cpp index 74d8610ecd4..b060ce8d2c7 100644 --- a/src/Parsers/Kusto/ParserKQLFilter.cpp +++ b/src/Parsers/Kusto/ParserKQLFilter.cpp @@ -14,7 +14,7 @@ bool ParserKQLFilter::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr where_expression; Tokens token_filter(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos pos_filter(token_filter, pos.max_depth); + IParser::Pos pos_filter(token_filter, pos.max_depth, pos.max_backtracks); if (!ParserExpressionWithOptionalAlias(false).parse(pos_filter, where_expression, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLLimit.cpp b/src/Parsers/Kusto/ParserKQLLimit.cpp index 910f0e8e1a3..0eb460757b1 100644 --- a/src/Parsers/Kusto/ParserKQLLimit.cpp +++ b/src/Parsers/Kusto/ParserKQLLimit.cpp @@ -14,7 +14,7 @@ bool ParserKQLLimit::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto expr = getExprFromToken(pos); Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserExpressionWithOptionalAlias(false).parse(new_pos, limit_length, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLMVExpand.cpp b/src/Parsers/Kusto/ParserKQLMVExpand.cpp index 7d242dffaf7..d174e9b5911 100644 --- a/src/Parsers/Kusto/ParserKQLMVExpand.cpp +++ b/src/Parsers/Kusto/ParserKQLMVExpand.cpp @@ -69,7 +69,7 @@ bool ParserKQLMVExpand::parseColumnArrayExprs(ColumnArrayExprs & column_array_ex auto add_columns = [&] { - column_array_expr = getExprFromToken(String(expr_begin_pos->begin, expr_end_pos->end), pos.max_depth); + column_array_expr = getExprFromToken(String(expr_begin_pos->begin, expr_end_pos->end), pos.max_depth, pos.max_backtracks); if (alias.empty()) { @@ -189,7 +189,7 @@ bool ParserKQLMVExpand::parserMVExpand(KQLMVExpand & kql_mv_expand, Pos & pos, E return true; } -bool ParserKQLMVExpand::genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_node, int32_t max_depth) +bool ParserKQLMVExpand::genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks) { String expand_str; String cast_type_column_remove, cast_type_column_rename; @@ -253,7 +253,7 @@ bool ParserKQLMVExpand::genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_no if (cast_type_column_remove.empty()) { query = std::format("Select {} {} From {} {}", columns, extra_columns, input, expand_str); - if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth, max_backtracks)) return false; if (!setSubQuerySource(sub_query_node, select_node, false, false)) return false; @@ -262,14 +262,14 @@ bool ParserKQLMVExpand::genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_no else { query = std::format("(Select {} {} From {} {})", columns, extra_columns, input, expand_str); - if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth, max_backtracks)) return false; if (!setSubQuerySource(sub_query_node, select_node, true, false)) return false; select_node = std::move(sub_query_node); auto rename_query = std::format("(Select * {}, {} From {})", cast_type_column_remove, cast_type_column_rename, "query"); - if (!parseSQLQueryByString(std::make_unique(), rename_query, sub_query_node, max_depth)) + if (!parseSQLQueryByString(std::make_unique(), rename_query, sub_query_node, max_depth, max_backtracks)) return false; if (!setSubQuerySource(sub_query_node, select_node, true, true)) return false; @@ -277,7 +277,7 @@ bool ParserKQLMVExpand::genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_no select_node = std::move(sub_query_node); query = std::format("Select * {}, {} from {}", cast_type_column_restore, cast_type_column_restore_name, "rename_query"); - if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth)) + if (!parseSQLQueryByString(std::make_unique(), query, sub_query_node, max_depth, max_backtracks)) return false; sub_query_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(select_node)); select_node = std::move(sub_query_node); @@ -294,12 +294,12 @@ bool ParserKQLMVExpand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) KQLMVExpand kql_mv_expand; if (!parserMVExpand(kql_mv_expand, pos, expected)) return false; - if (!genQuery(kql_mv_expand, node, pos.max_depth)) + if (!genQuery(kql_mv_expand, node, pos.max_depth, pos.max_backtracks)) return false; const String setting_str = "enable_unaligned_array_join = 1"; Tokens token_settings(setting_str.c_str(), setting_str.c_str() + setting_str.size()); - IParser::Pos pos_settings(token_settings, pos.max_depth); + IParser::Pos pos_settings(token_settings, pos.max_depth, pos.max_backtracks); if (!ParserSetQuery(true).parse(pos_settings, setting, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLMVExpand.h b/src/Parsers/Kusto/ParserKQLMVExpand.h index 61f206bb00d..068aee53f58 100644 --- a/src/Parsers/Kusto/ParserKQLMVExpand.h +++ b/src/Parsers/Kusto/ParserKQLMVExpand.h @@ -33,7 +33,7 @@ protected: static bool parseColumnArrayExprs(ColumnArrayExprs & column_array_exprs, Pos & pos, Expected & expected); static bool parserMVExpand(KQLMVExpand & kql_mv_expand, Pos & pos, Expected & expected); - static bool genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_node, int32_t max_depth); + static bool genQuery(KQLMVExpand & kql_mv_expand, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks); const char * getName() const override { return "KQL mv-expand"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp index e89423e2fc9..4759efc0025 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.cpp +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.cpp @@ -142,7 +142,7 @@ bool ParserKQLMakeSeries ::parseFromToStepClause(FromToStepClause & from_to_step || ParserKQLDateTypeTimespan().parseConstKQLTimespan(from_to_step.step_str)) { from_to_step.is_timespan = true; - from_to_step.step = std::stod(getExprFromToken(from_to_step.step_str, pos.max_depth)); + from_to_step.step = std::stod(getExprFromToken(from_to_step.step_str, pos.max_depth, pos.max_backtracks)); } else from_to_step.step = std::stod(from_to_step.step_str); @@ -150,7 +150,7 @@ bool ParserKQLMakeSeries ::parseFromToStepClause(FromToStepClause & from_to_step return true; } -bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & select_node, const uint32_t & max_depth) +bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks) { const uint64_t era_diff = 62135596800; // this magic number is the differicen is second form 0001-01-01 (Azure start time ) and 1970-01-01 (CH start time) @@ -166,15 +166,15 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & auto step = from_to_step.step; if (!kql_make_series.from_to_step.from_str.empty()) - start_str = getExprFromToken(kql_make_series.from_to_step.from_str, max_depth); + start_str = getExprFromToken(kql_make_series.from_to_step.from_str, max_depth, max_backtracks); if (!kql_make_series.from_to_step.to_str.empty()) - end_str = getExprFromToken(from_to_step.to_str, max_depth); + end_str = getExprFromToken(from_to_step.to_str, max_depth, max_backtracks); auto date_type_cast = [&](String & src) { Tokens tokens(src.c_str(), src.c_str() + src.size()); - IParser::Pos pos(tokens, max_depth); + IParser::Pos pos(tokens, max_depth, max_backtracks); String res; while (isValidKQLPos(pos)) { @@ -201,7 +201,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & { std::vector group_expression_tokens; Tokens tokens(group_expression.c_str(), group_expression.c_str() + group_expression.size()); - IParser::Pos pos(tokens, max_depth); + IParser::Pos pos(tokens, max_depth, max_backtracks); while (isValidKQLPos(pos)) { if (String(pos->begin, pos->end) == "AS") @@ -296,7 +296,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & ASTPtr sub_query_node; - if (!ParserSimpleCHSubquery(select_node).parseByString(sub_sub_query, sub_query_node, max_depth)) + if (!ParserSimpleCHSubquery(select_node).parseByString(sub_sub_query, sub_query_node, max_depth, max_backtracks)) return false; select_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(sub_query_node)); @@ -351,7 +351,7 @@ bool ParserKQLMakeSeries ::makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & else main_query = std::format("{},{}", group_expression_alias, final_axis_agg_alias_list); - if (!ParserSimpleCHSubquery(select_node).parseByString(sub_query, sub_query_node, max_depth)) + if (!ParserSimpleCHSubquery(select_node).parseByString(sub_query, sub_query_node, max_depth, max_backtracks)) return false; select_node->as()->setExpression(ASTSelectQuery::Expression::TABLES, std::move(sub_query_node)); @@ -411,10 +411,10 @@ bool ParserKQLMakeSeries ::parseImpl(Pos & pos, ASTPtr & node, Expected & expect subquery_columns += ", " + column_str; } - makeSeries(kql_make_series, node, pos.max_depth); + makeSeries(kql_make_series, node, pos.max_depth, pos.max_backtracks); Tokens token_main_query(kql_make_series.main_query.c_str(), kql_make_series.main_query.c_str() + kql_make_series.main_query.size()); - IParser::Pos pos_main_query(token_main_query, pos.max_depth); + IParser::Pos pos_main_query(token_main_query, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(true).parse(pos_main_query, select_expression_list, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLMakeSeries.h b/src/Parsers/Kusto/ParserKQLMakeSeries.h index ef7cc4976f6..6a32e76eff3 100644 --- a/src/Parsers/Kusto/ParserKQLMakeSeries.h +++ b/src/Parsers/Kusto/ParserKQLMakeSeries.h @@ -42,7 +42,7 @@ protected: String main_query; }; - static bool makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & select_node, const uint32_t & max_depth); + static bool makeSeries(KQLMakeSeries & kql_make_series, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks); static bool parseAggregationColumns(AggregationColumns & aggregation_columns, Pos & pos); static bool parseFromToStepClause(FromToStepClause & from_to_step, Pos & pos); diff --git a/src/Parsers/Kusto/ParserKQLPrint.cpp b/src/Parsers/Kusto/ParserKQLPrint.cpp index bd9980ea96d..37483439f14 100644 --- a/src/Parsers/Kusto/ParserKQLPrint.cpp +++ b/src/Parsers/Kusto/ParserKQLPrint.cpp @@ -10,7 +10,7 @@ bool ParserKQLPrint::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) const String expr = getExprFromToken(pos); Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(true).parse(new_pos, select_expression_list, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLProject.cpp b/src/Parsers/Kusto/ParserKQLProject.cpp index fdc458b7707..eab9ee082c5 100644 --- a/src/Parsers/Kusto/ParserKQLProject.cpp +++ b/src/Parsers/Kusto/ParserKQLProject.cpp @@ -12,7 +12,7 @@ bool ParserKQLProject ::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) expr = getExprFromToken(pos); Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(false).parse(new_pos, select_expression_list, expected)) return false; diff --git a/src/Parsers/Kusto/ParserKQLQuery.cpp b/src/Parsers/Kusto/ParserKQLQuery.cpp index a54a2b0eda9..6fd9c95ec6f 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.cpp +++ b/src/Parsers/Kusto/ParserKQLQuery.cpp @@ -33,20 +33,20 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -bool ParserKQLBase::parseByString(const String expr, ASTPtr & node, const uint32_t max_depth) +bool ParserKQLBase::parseByString(String expr, ASTPtr & node, uint32_t max_depth, uint32_t max_backtracks) { Expected expected; Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos pos(tokens, max_depth); + IParser::Pos pos(tokens, max_depth, max_backtracks); return parse(pos, node, expected); } -bool ParserKQLBase::parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, int32_t max_depth) +bool ParserKQLBase::parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks) { Expected expected; Tokens token_subquery(query.c_str(), query.c_str() + query.size()); - IParser::Pos pos_subquery(token_subquery, max_depth); + IParser::Pos pos_subquery(token_subquery, max_depth, max_backtracks); if (!parser->parse(pos_subquery, select_node, expected)) return false; return true; @@ -121,10 +121,10 @@ bool ParserKQLBase::setSubQuerySource(ASTPtr & select_query, ASTPtr & source, bo return true; } -String ParserKQLBase::getExprFromToken(const String & text, const uint32_t max_depth) +String ParserKQLBase::getExprFromToken(const String & text, uint32_t max_depth, uint32_t max_backtracks) { Tokens tokens(text.c_str(), text.c_str() + text.size()); - IParser::Pos pos(tokens, max_depth); + IParser::Pos pos(tokens, max_depth, max_backtracks); return getExprFromToken(pos); } @@ -523,7 +523,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) String sub_query = std::format("({})", String(operation_pos.front().second->begin, last_pos->end)); Tokens token_subquery(sub_query.c_str(), sub_query.c_str() + sub_query.size()); - IParser::Pos pos_subquery(token_subquery, pos.max_depth); + IParser::Pos pos_subquery(token_subquery, pos.max_depth, pos.max_backtracks); if (!ParserKQLSubquery().parse(pos_subquery, tables, expected)) return false; @@ -544,7 +544,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (oprator) { Tokens token_clause(op_calsue.c_str(), op_calsue.c_str() + op_calsue.size()); - IParser::Pos pos_clause(token_clause, pos.max_depth); + IParser::Pos pos_clause(token_clause, pos.max_depth, pos.max_backtracks); if (!oprator->parse(pos_clause, node, expected)) return false; } @@ -577,7 +577,7 @@ bool ParserKQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto expr = String("*"); Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); if (!std::make_unique()->parse(new_pos, node, expected)) return false; } diff --git a/src/Parsers/Kusto/ParserKQLQuery.h b/src/Parsers/Kusto/ParserKQLQuery.h index a594f43ceec..e003ee3ee8b 100644 --- a/src/Parsers/Kusto/ParserKQLQuery.h +++ b/src/Parsers/Kusto/ParserKQLQuery.h @@ -9,11 +9,11 @@ class ParserKQLBase : public IParserBase { public: static String getExprFromToken(Pos & pos); - static String getExprFromToken(const String & text, uint32_t max_depth); + static String getExprFromToken(const String & text, uint32_t max_depth, uint32_t max_backtracks); static String getExprFromPipe(Pos & pos); static bool setSubQuerySource(ASTPtr & select_query, ASTPtr & source, bool dest_is_subquery, bool src_is_subquery); - static bool parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, int32_t max_depth); - bool parseByString(String expr, ASTPtr & node, uint32_t max_depth); + static bool parseSQLQueryByString(ParserPtr && parser, String & query, ASTPtr & select_node, uint32_t max_depth, uint32_t max_backtracks); + bool parseByString(String expr, ASTPtr & node, uint32_t max_depth, uint32_t max_backtracks); }; class ParserKQLQuery : public IParserBase diff --git a/src/Parsers/Kusto/ParserKQLSort.cpp b/src/Parsers/Kusto/ParserKQLSort.cpp index 7e5ac2b17e7..852ba50698d 100644 --- a/src/Parsers/Kusto/ParserKQLSort.cpp +++ b/src/Parsers/Kusto/ParserKQLSort.cpp @@ -19,7 +19,7 @@ bool ParserKQLSort::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) auto expr = getExprFromToken(pos); Tokens tokens(expr.c_str(), expr.c_str() + expr.size()); - IParser::Pos new_pos(tokens, pos.max_depth); + IParser::Pos new_pos(tokens, pos.max_depth, pos.max_backtracks); auto pos_backup = new_pos; if (!order_list.parse(pos_backup, order_expression_list, expected)) diff --git a/src/Parsers/Kusto/ParserKQLStatement.cpp b/src/Parsers/Kusto/ParserKQLStatement.cpp index 668696fa9dc..fbf2110e664 100644 --- a/src/Parsers/Kusto/ParserKQLStatement.cpp +++ b/src/Parsers/Kusto/ParserKQLStatement.cpp @@ -95,7 +95,7 @@ bool ParserKQLTableFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expe } ++pos; Tokens token_kql(kql_statement.c_str(), kql_statement.c_str() + kql_statement.size()); - IParser::Pos pos_kql(token_kql, pos.max_depth); + IParser::Pos pos_kql(token_kql, pos.max_depth, pos.max_backtracks); if (kql_p.parse(pos_kql, select, expected)) { diff --git a/src/Parsers/Kusto/ParserKQLSummarize.cpp b/src/Parsers/Kusto/ParserKQLSummarize.cpp index a45717930bb..47d706d0b4b 100644 --- a/src/Parsers/Kusto/ParserKQLSummarize.cpp +++ b/src/Parsers/Kusto/ParserKQLSummarize.cpp @@ -192,10 +192,10 @@ bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected expr_columns = expr_columns + "," + expr_aggregation; } - String converted_columns = getExprFromToken(expr_columns, pos.max_depth); + String converted_columns = getExprFromToken(expr_columns, pos.max_depth, pos.max_backtracks); Tokens token_converted_columns(converted_columns.c_str(), converted_columns.c_str() + converted_columns.size()); - IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth); + IParser::Pos pos_converted_columns(token_converted_columns, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(true).parse(pos_converted_columns, select_expression_list, expected)) return false; @@ -204,10 +204,10 @@ bool ParserKQLSummarize::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (groupby) { - String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth); + String converted_groupby = getExprFromToken(expr_groupby, pos.max_depth, pos.max_backtracks); Tokens token_converted_groupby(converted_groupby.c_str(), converted_groupby.c_str() + converted_groupby.size()); - IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth); + IParser::Pos postoken_converted_groupby(token_converted_groupby, pos.max_depth, pos.max_backtracks); if (!ParserNotEmptyExpressionList(false).parse(postoken_converted_groupby, group_expression_list, expected)) return false; diff --git a/src/Parsers/Kusto/parseKQLQuery.cpp b/src/Parsers/Kusto/parseKQLQuery.cpp index bcc04ef7001..34a009873f8 100644 --- a/src/Parsers/Kusto/parseKQLQuery.cpp +++ b/src/Parsers/Kusto/parseKQLQuery.cpp @@ -322,12 +322,13 @@ ASTPtr tryParseKQLQuery( bool allow_multi_statements, size_t max_query_size, size_t max_parser_depth, + size_t max_parser_backtracks, bool skip_insignificant) { const char * query_begin = _out_query_end; Tokens tokens(query_begin, all_queries_end, max_query_size, skip_insignificant); /// NOTE: consider use UInt32 for max_parser_depth setting. - IParser::Pos token_iterator(tokens, static_cast(max_parser_depth)); + IParser::Pos token_iterator(tokens, static_cast(max_parser_depth), static_cast(max_parser_backtracks)); if (token_iterator->isEnd() || token_iterator->type == TokenType::Semicolon) @@ -441,10 +442,11 @@ ASTPtr parseKQLQueryAndMovePosition( const std::string & query_description, bool allow_multi_statements, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { std::string error_message; - ASTPtr res = tryParseKQLQuery(parser, pos, end, error_message, false, query_description, allow_multi_statements, max_query_size, max_parser_depth); + ASTPtr res = tryParseKQLQuery(parser, pos, end, error_message, false, query_description, allow_multi_statements, max_query_size, max_parser_depth, max_parser_backtracks); if (res) return res; @@ -458,9 +460,10 @@ ASTPtr parseKQLQuery( const char * end, const std::string & query_description, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { - return parseKQLQueryAndMovePosition(parser, begin, end, query_description, false, max_query_size, max_parser_depth); + return parseKQLQueryAndMovePosition(parser, begin, end, query_description, false, max_query_size, max_parser_depth, max_parser_backtracks); } ASTPtr parseKQLQuery( @@ -468,18 +471,20 @@ ASTPtr parseKQLQuery( const std::string & query, const std::string & query_description, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { - return parseKQLQuery(parser, query.data(), query.data() + query.size(), query_description, max_query_size, max_parser_depth); + return parseKQLQuery(parser, query.data(), query.data() + query.size(), query_description, max_query_size, max_parser_depth, max_parser_backtracks); } ASTPtr parseKQLQuery( IParser & parser, const std::string & query, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { - return parseKQLQuery(parser, query.data(), query.data() + query.size(), parser.getName(), max_query_size, max_parser_depth); + return parseKQLQuery(parser, query.data(), query.data() + query.size(), parser.getName(), max_query_size, max_parser_depth, max_parser_backtracks); } } diff --git a/src/Parsers/Kusto/parseKQLQuery.h b/src/Parsers/Kusto/parseKQLQuery.h index fca017e70fe..9e52ba56307 100644 --- a/src/Parsers/Kusto/parseKQLQuery.h +++ b/src/Parsers/Kusto/parseKQLQuery.h @@ -3,6 +3,7 @@ #include #include #include + namespace DB { @@ -10,10 +11,6 @@ namespace DB * Used in syntax error message. */ -} -namespace DB -{ - class IParser; /// Parse query or set 'out_error_message'. @@ -24,11 +21,11 @@ ASTPtr tryParseKQLQuery( std::string & out_error_message, bool hilite, const std::string & description, - bool allow_multi_statements, /// If false, check for non-space characters after semicolon and set error message if any. - size_t max_query_size, /// If (end - pos) > max_query_size and query is longer than max_query_size then throws "Max query size exceeded". - /// Disabled if zero. Is used in order to check query size if buffer can contains data for INSERT query. + bool allow_multi_statements, + size_t max_query_size, size_t max_parser_depth, - bool skip_insignificant = true); /// If true, lexer will skip all insignificant tokens (e.g. whitespaces) + size_t max_parser_backtracks, + bool skip_insignificant = true); /// Parse query or throw an exception with error message. @@ -39,7 +36,8 @@ ASTPtr parseKQLQueryAndMovePosition( const std::string & description, bool allow_multi_statements, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); ASTPtr parseKQLQuery( IParser & parser, @@ -47,18 +45,22 @@ ASTPtr parseKQLQuery( const char * end, const std::string & description, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); ASTPtr parseKQLQuery( IParser & parser, const std::string & query, const std::string & query_description, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); ASTPtr parseKQLQuery( IParser & parser, const std::string & query, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); + } diff --git a/src/Parsers/MySQL/tests/gtest_alter_command_parser.cpp b/src/Parsers/MySQL/tests/gtest_alter_command_parser.cpp index d406cdbd3b9..4db96646e16 100644 --- a/src/Parsers/MySQL/tests/gtest_alter_command_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_alter_command_parser.cpp @@ -11,7 +11,7 @@ using namespace DB::MySQLParser; static inline ASTPtr tryParserQuery(IParser & parser, const String & query) { - return parseQuery(parser, query.data(), query.data() + query.size(), "", 0, 0); + return parseQuery(parser, query.data(), query.data() + query.size(), "", 0, 0, 0); } TEST(ParserAlterCommand, AddAlterCommand) diff --git a/src/Parsers/MySQL/tests/gtest_alter_parser.cpp b/src/Parsers/MySQL/tests/gtest_alter_parser.cpp index 4ebbe332710..2b12d7bdcf1 100644 --- a/src/Parsers/MySQL/tests/gtest_alter_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_alter_parser.cpp @@ -9,7 +9,7 @@ using namespace DB::MySQLParser; static inline ASTPtr tryParserQuery(IParser & parser, const String & query) { - return parseQuery(parser, query.data(), query.data() + query.size(), "", 0, 0); + return parseQuery(parser, query.data(), query.data() + query.size(), "", 0, 0, 0); } TEST(ParserAlterQuery, AlterQuery) diff --git a/src/Parsers/MySQL/tests/gtest_column_parser.cpp b/src/Parsers/MySQL/tests/gtest_column_parser.cpp index b1c7c778bea..21c37e4ee2e 100644 --- a/src/Parsers/MySQL/tests/gtest_column_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_column_parser.cpp @@ -17,7 +17,7 @@ TEST(ParserColumn, AllNonGeneratedColumnOption) String input = "col_01 VARCHAR(100) NOT NULL DEFAULT NULL AUTO_INCREMENT UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf8 " "COLUMN_FORMAT FIXED STORAGE MEMORY REFERENCES tbl_name (col_01) CHECK 1"; - ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0, 0); EXPECT_EQ(ast->as()->name, "col_01"); EXPECT_EQ(ast->as()->data_type->as()->name, "VARCHAR"); EXPECT_EQ(ast->as()->data_type->as()->arguments->children[0]->as()->value.safeGet(), 100); @@ -42,7 +42,7 @@ TEST(ParserColumn, AllGeneratedColumnOption) String input = "col_01 VARCHAR(100) NULL UNIQUE KEY PRIMARY KEY COMMENT 'column comment' COLLATE utf8 " "REFERENCES tbl_name (col_01) CHECK 1 GENERATED ALWAYS AS (1) STORED"; - ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_column, input.data(), input.data() + input.size(), "", 0, 0, 0); EXPECT_EQ(ast->as()->name, "col_01"); EXPECT_EQ(ast->as()->data_type->as()->name, "VARCHAR"); EXPECT_EQ(ast->as()->data_type->as()->arguments->children[0]->as()->value.safeGet(), 100); diff --git a/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp b/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp index 9c9124c9f58..a06f2ade24a 100644 --- a/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_constraint_parser.cpp @@ -14,7 +14,7 @@ TEST(ParserConstraint, CheckConstraint) ParserDeclareConstraint p_constraint; String constraint_01 = "CONSTRAINT symbol_name CHECK col_01 = 1"; - ASTPtr ast_constraint_01 = parseQuery(p_constraint, constraint_01.data(), constraint_01.data() + constraint_01.size(), "", 0, 0); + ASTPtr ast_constraint_01 = parseQuery(p_constraint, constraint_01.data(), constraint_01.data() + constraint_01.size(), "", 0, 0, 0); EXPECT_EQ(ast_constraint_01->as()->constraint_name, "symbol_name"); auto * check_expression_01 = ast_constraint_01->as()->check_expression->as(); EXPECT_EQ(check_expression_01->name, "equals"); @@ -22,7 +22,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(check_expression_01->arguments->children[1]->as()->value.safeGet(), 1); String constraint_02 = "CONSTRAINT CHECK col_01 = 1"; - ASTPtr ast_constraint_02 = parseQuery(p_constraint, constraint_02.data(), constraint_02.data() + constraint_02.size(), "", 0, 0); + ASTPtr ast_constraint_02 = parseQuery(p_constraint, constraint_02.data(), constraint_02.data() + constraint_02.size(), "", 0, 0, 0); EXPECT_EQ(ast_constraint_02->as()->constraint_name, ""); auto * check_expression_02 = ast_constraint_02->as()->check_expression->as(); EXPECT_EQ(check_expression_02->name, "equals"); @@ -30,7 +30,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(check_expression_02->arguments->children[1]->as()->value.safeGet(), 1); String constraint_03 = "CHECK col_01 = 1"; - ASTPtr ast_constraint_03 = parseQuery(p_constraint, constraint_03.data(), constraint_03.data() + constraint_03.size(), "", 0, 0); + ASTPtr ast_constraint_03 = parseQuery(p_constraint, constraint_03.data(), constraint_03.data() + constraint_03.size(), "", 0, 0, 0); EXPECT_EQ(ast_constraint_03->as()->constraint_name, ""); auto * check_expression_03 = ast_constraint_03->as()->check_expression->as(); EXPECT_EQ(check_expression_03->name, "equals"); @@ -38,7 +38,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(check_expression_03->arguments->children[1]->as()->value.safeGet(), 1); String constraint_04 = "CONSTRAINT CHECK col_01 = 1 ENFORCED"; - ASTPtr ast_constraint_04 = parseQuery(p_constraint, constraint_04.data(), constraint_04.data() + constraint_04.size(), "", 0, 0); + ASTPtr ast_constraint_04 = parseQuery(p_constraint, constraint_04.data(), constraint_04.data() + constraint_04.size(), "", 0, 0, 0); EXPECT_TRUE(ast_constraint_04->as()->enforced); EXPECT_EQ(ast_constraint_04->as()->constraint_name, ""); auto * check_expression_04 = ast_constraint_04->as()->check_expression->as(); @@ -47,7 +47,7 @@ TEST(ParserConstraint, CheckConstraint) EXPECT_EQ(check_expression_04->arguments->children[1]->as()->value.safeGet(), 1); String constraint_05 = "CONSTRAINT CHECK col_01 = 1 NOT ENFORCED"; - ASTPtr ast_constraint_05 = parseQuery(p_constraint, constraint_05.data(), constraint_05.data() + constraint_05.size(), "", 0, 0); + ASTPtr ast_constraint_05 = parseQuery(p_constraint, constraint_05.data(), constraint_05.data() + constraint_05.size(), "", 0, 0, 0); EXPECT_FALSE(ast_constraint_05->as()->enforced); EXPECT_EQ(ast_constraint_05->as()->constraint_name, ""); auto * check_expression_05 = ast_constraint_05->as()->check_expression->as(); diff --git a/src/Parsers/MySQL/tests/gtest_create_parser.cpp b/src/Parsers/MySQL/tests/gtest_create_parser.cpp index 2f65eb6e592..8512b88ffc1 100644 --- a/src/Parsers/MySQL/tests/gtest_create_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_create_parser.cpp @@ -15,16 +15,16 @@ TEST(CreateTableParser, LikeCreate) { ParserCreateQuery p_create_query; String like_create_01 = "CREATE TABLE IF NOT EXISTS table_name LIKE table_name_01"; - parseQuery(p_create_query, like_create_01.data(), like_create_01.data() + like_create_01.size(), "", 0, 0); + parseQuery(p_create_query, like_create_01.data(), like_create_01.data() + like_create_01.size(), "", 0, 0, 0); String like_create_02 = "CREATE TABLE IF NOT EXISTS table_name (LIKE table_name_01)"; - parseQuery(p_create_query, like_create_02.data(), like_create_02.data() + like_create_02.size(), "", 0, 0); + parseQuery(p_create_query, like_create_02.data(), like_create_02.data() + like_create_02.size(), "", 0, 0, 0); } TEST(CreateTableParser, SimpleCreate) { ParserCreateQuery p_create_query; String input = "CREATE TABLE IF NOT EXISTS table_name(col_01 VARCHAR(100), INDEX (col_01), CHECK 1) ENGINE INNODB PARTITION BY HASH(col_01)"; - ASTPtr ast = parseQuery(p_create_query, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_create_query, input.data(), input.data() + input.size(), "", 0, 0, 0); EXPECT_TRUE(ast->as()->if_not_exists); EXPECT_EQ(ast->as()->columns_list->as()->columns->children.size(), 1); EXPECT_EQ(ast->as()->columns_list->as()->indices->children.size(), 1); @@ -37,7 +37,7 @@ TEST(CreateTableParser, SS) { ParserCreateQuery p_create_query; String input = "CREATE TABLE `test_table_1` (`a` int DEFAULT NULL, `b` int DEFAULT NULL) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci"; - ASTPtr ast = parseQuery(p_create_query, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_create_query, input.data(), input.data() + input.size(), "", 0, 0, 0); WriteBufferFromOStream buf(std::cerr, 4096); ast->dumpTree(buf); buf.finalize(); diff --git a/src/Parsers/MySQL/tests/gtest_index_parser.cpp b/src/Parsers/MySQL/tests/gtest_index_parser.cpp index a8be6787b2c..187bac3e090 100644 --- a/src/Parsers/MySQL/tests/gtest_index_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_index_parser.cpp @@ -14,7 +14,7 @@ TEST(ParserIndex, AllIndexOptions) String input = "INDEX (col_01, col_02(100), col_03 DESC) KEY_BLOCK_SIZE 3 USING HASH WITH PARSER parser_name COMMENT 'index comment' VISIBLE"; ParserDeclareIndex p_index; - ASTPtr ast = parseQuery(p_index, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_index, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclareIndex * declare_index = ast->as(); EXPECT_EQ(declare_index->index_columns->children[0]->as()->name(), "col_01"); @@ -33,7 +33,7 @@ TEST(ParserIndex, OptionalIndexOptions) String input = "INDEX (col_01, col_02(100), col_03 DESC) USING HASH INVISIBLE KEY_BLOCK_SIZE 3"; ParserDeclareIndex p_index; - ASTPtr ast = parseQuery(p_index, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_index, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclareIndex * declare_index = ast->as(); EXPECT_EQ(declare_index->index_columns->children[0]->as()->name(), "col_01"); @@ -50,28 +50,28 @@ TEST(ParserIndex, OrdinaryIndex) { ParserDeclareIndex p_index; String non_unique_index_01 = "KEY index_name USING HASH (col_01) INVISIBLE"; - parseQuery(p_index, non_unique_index_01.data(), non_unique_index_01.data() + non_unique_index_01.size(), "", 0, 0); + parseQuery(p_index, non_unique_index_01.data(), non_unique_index_01.data() + non_unique_index_01.size(), "", 0, 0, 0); String non_unique_index_02 = "INDEX index_name USING HASH (col_01) INVISIBLE"; - parseQuery(p_index, non_unique_index_02.data(), non_unique_index_02.data() + non_unique_index_02.size(), "", 0, 0); + parseQuery(p_index, non_unique_index_02.data(), non_unique_index_02.data() + non_unique_index_02.size(), "", 0, 0, 0); String fulltext_index_01 = "FULLTEXT index_name (col_01) INVISIBLE"; - parseQuery(p_index, fulltext_index_01.data(), fulltext_index_01.data() + fulltext_index_01.size(), "", 0, 0); + parseQuery(p_index, fulltext_index_01.data(), fulltext_index_01.data() + fulltext_index_01.size(), "", 0, 0, 0); String fulltext_index_02 = "FULLTEXT INDEX index_name (col_01) INVISIBLE"; - parseQuery(p_index, fulltext_index_02.data(), fulltext_index_02.data() + fulltext_index_02.size(), "", 0, 0); + parseQuery(p_index, fulltext_index_02.data(), fulltext_index_02.data() + fulltext_index_02.size(), "", 0, 0, 0); String fulltext_index_03 = "FULLTEXT KEY index_name (col_01) INVISIBLE"; - parseQuery(p_index, fulltext_index_03.data(), fulltext_index_03.data() + fulltext_index_03.size(), "", 0, 0); + parseQuery(p_index, fulltext_index_03.data(), fulltext_index_03.data() + fulltext_index_03.size(), "", 0, 0, 0); String spatial_index_01 = "SPATIAL index_name (col_01) INVISIBLE"; - parseQuery(p_index, spatial_index_01.data(), spatial_index_01.data() + spatial_index_01.size(), "", 0, 0); + parseQuery(p_index, spatial_index_01.data(), spatial_index_01.data() + spatial_index_01.size(), "", 0, 0, 0); String spatial_index_02 = "SPATIAL INDEX index_name (col_01) INVISIBLE"; - parseQuery(p_index, spatial_index_02.data(), spatial_index_02.data() + spatial_index_02.size(), "", 0, 0); + parseQuery(p_index, spatial_index_02.data(), spatial_index_02.data() + spatial_index_02.size(), "", 0, 0, 0); String spatial_index_03 = "SPATIAL KEY index_name (col_01) INVISIBLE"; - parseQuery(p_index, spatial_index_03.data(), spatial_index_03.data() + spatial_index_03.size(), "", 0, 0); + parseQuery(p_index, spatial_index_03.data(), spatial_index_03.data() + spatial_index_03.size(), "", 0, 0, 0); } TEST(ParserIndex, ConstraintIndex) @@ -79,47 +79,47 @@ TEST(ParserIndex, ConstraintIndex) ParserDeclareIndex p_index; String primary_key_01 = "PRIMARY KEY (col_01) INVISIBLE"; - parseQuery(p_index, primary_key_01.data(), primary_key_01.data() + primary_key_01.size(), "", 0, 0); + parseQuery(p_index, primary_key_01.data(), primary_key_01.data() + primary_key_01.size(), "", 0, 0, 0); String primary_key_02 = "PRIMARY KEY USING BTREE (col_01) INVISIBLE"; - parseQuery(p_index, primary_key_02.data(), primary_key_02.data() + primary_key_02.size(), "", 0, 0); + parseQuery(p_index, primary_key_02.data(), primary_key_02.data() + primary_key_02.size(), "", 0, 0, 0); String primary_key_03 = "CONSTRAINT PRIMARY KEY USING BTREE (col_01) INVISIBLE"; - parseQuery(p_index, primary_key_03.data(), primary_key_03.data() + primary_key_03.size(), "", 0, 0); + parseQuery(p_index, primary_key_03.data(), primary_key_03.data() + primary_key_03.size(), "", 0, 0, 0); String primary_key_04 = "CONSTRAINT index_name PRIMARY KEY USING BTREE (col_01) INVISIBLE"; - parseQuery(p_index, primary_key_04.data(), primary_key_04.data() + primary_key_04.size(), "", 0, 0); + parseQuery(p_index, primary_key_04.data(), primary_key_04.data() + primary_key_04.size(), "", 0, 0, 0); String unique_key_01 = "UNIQUE (col_01) INVISIBLE"; - parseQuery(p_index, unique_key_01.data(), unique_key_01.data() + unique_key_01.size(), "", 0, 0); + parseQuery(p_index, unique_key_01.data(), unique_key_01.data() + unique_key_01.size(), "", 0, 0, 0); String unique_key_02 = "UNIQUE INDEX (col_01) INVISIBLE"; - parseQuery(p_index, unique_key_02.data(), unique_key_02.data() + unique_key_02.size(), "", 0, 0); + parseQuery(p_index, unique_key_02.data(), unique_key_02.data() + unique_key_02.size(), "", 0, 0, 0); String unique_key_03 = "UNIQUE KEY (col_01) INVISIBLE"; - parseQuery(p_index, unique_key_03.data(), unique_key_03.data() + unique_key_03.size(), "", 0, 0); + parseQuery(p_index, unique_key_03.data(), unique_key_03.data() + unique_key_03.size(), "", 0, 0, 0); String unique_key_04 = "UNIQUE KEY index_name (col_01) INVISIBLE"; - parseQuery(p_index, unique_key_04.data(), unique_key_04.data() + unique_key_04.size(), "", 0, 0); + parseQuery(p_index, unique_key_04.data(), unique_key_04.data() + unique_key_04.size(), "", 0, 0, 0); String unique_key_05 = "UNIQUE KEY index_name USING HASH (col_01) INVISIBLE"; - parseQuery(p_index, unique_key_05.data(), unique_key_05.data() + unique_key_05.size(), "", 0, 0); + parseQuery(p_index, unique_key_05.data(), unique_key_05.data() + unique_key_05.size(), "", 0, 0, 0); String unique_key_06 = "CONSTRAINT UNIQUE KEY index_name USING HASH (col_01) INVISIBLE"; - parseQuery(p_index, unique_key_06.data(), unique_key_06.data() + unique_key_06.size(), "", 0, 0); + parseQuery(p_index, unique_key_06.data(), unique_key_06.data() + unique_key_06.size(), "", 0, 0, 0); String unique_key_07 = "CONSTRAINT index_name UNIQUE KEY index_name_1 USING HASH (col_01) INVISIBLE"; - parseQuery(p_index, unique_key_07.data(), unique_key_07.data() + unique_key_07.size(), "", 0, 0); + parseQuery(p_index, unique_key_07.data(), unique_key_07.data() + unique_key_07.size(), "", 0, 0, 0); String foreign_key_01 = "FOREIGN KEY (col_01) REFERENCES tbl_name (col_01)"; - parseQuery(p_index, foreign_key_01.data(), foreign_key_01.data() + foreign_key_01.size(), "", 0, 0); + parseQuery(p_index, foreign_key_01.data(), foreign_key_01.data() + foreign_key_01.size(), "", 0, 0, 0); String foreign_key_02 = "FOREIGN KEY index_name (col_01) REFERENCES tbl_name (col_01)"; - parseQuery(p_index, foreign_key_02.data(), foreign_key_02.data() + foreign_key_02.size(), "", 0, 0); + parseQuery(p_index, foreign_key_02.data(), foreign_key_02.data() + foreign_key_02.size(), "", 0, 0, 0); String foreign_key_03 = "CONSTRAINT FOREIGN KEY index_name (col_01) REFERENCES tbl_name (col_01)"; - parseQuery(p_index, foreign_key_03.data(), foreign_key_03.data() + foreign_key_03.size(), "", 0, 0); + parseQuery(p_index, foreign_key_03.data(), foreign_key_03.data() + foreign_key_03.size(), "", 0, 0, 0); String foreign_key_04 = "CONSTRAINT index_name FOREIGN KEY index_name_01 (col_01) REFERENCES tbl_name (col_01)"; - parseQuery(p_index, foreign_key_04.data(), foreign_key_04.data() + foreign_key_04.size(), "", 0, 0); + parseQuery(p_index, foreign_key_04.data(), foreign_key_04.data() + foreign_key_04.size(), "", 0, 0, 0); } diff --git a/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp b/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp index 01b757e5891..6ec8d73530e 100644 --- a/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_partition_options_parser.cpp @@ -14,14 +14,14 @@ TEST(ParserPartitionOptions, HashPatitionOptions) String hash_partition = "PARTITION BY HASH(col_01)"; ParserDeclarePartitionOptions p_partition_options; - ASTPtr ast_01 = parseQuery(p_partition_options, hash_partition.data(), hash_partition.data() + hash_partition.size(), "", 0, 0); + ASTPtr ast_01 = parseQuery(p_partition_options, hash_partition.data(), hash_partition.data() + hash_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "hash"); EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String linear_hash_partition = "PARTITION BY LINEAR HASH(col_01)"; - ASTPtr ast_02 = parseQuery(p_partition_options, linear_hash_partition.data(), linear_hash_partition.data() + linear_hash_partition.size(), "", 0, 0); + ASTPtr ast_02 = parseQuery(p_partition_options, linear_hash_partition.data(), linear_hash_partition.data() + linear_hash_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "linear_hash"); @@ -33,14 +33,14 @@ TEST(ParserPartitionOptions, KeyPatitionOptions) String key_partition = "PARTITION BY KEY(col_01)"; ParserDeclarePartitionOptions p_partition_options; - ASTPtr ast_01 = parseQuery(p_partition_options, key_partition.data(), key_partition.data() + key_partition.size(), "", 0, 0); + ASTPtr ast_01 = parseQuery(p_partition_options, key_partition.data(), key_partition.data() + key_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "key"); EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String linear_key_partition = "PARTITION BY LINEAR KEY(col_01, col_02)"; - ASTPtr ast_02 = parseQuery(p_partition_options, linear_key_partition.data(), linear_key_partition.data() + linear_key_partition.size(), "", 0, 0); + ASTPtr ast_02 = parseQuery(p_partition_options, linear_key_partition.data(), linear_key_partition.data() + linear_key_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "linear_key"); @@ -49,7 +49,7 @@ TEST(ParserPartitionOptions, KeyPatitionOptions) EXPECT_EQ(columns_list->children[1]->as()->name(), "col_02"); String key_partition_with_algorithm = "PARTITION BY KEY ALGORITHM=1 (col_01)"; - ASTPtr ast_03 = parseQuery(p_partition_options, key_partition_with_algorithm.data(), key_partition_with_algorithm.data() + key_partition_with_algorithm.size(), "", 0, 0); + ASTPtr ast_03 = parseQuery(p_partition_options, key_partition_with_algorithm.data(), key_partition_with_algorithm.data() + key_partition_with_algorithm.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_03 = ast_03->as(); EXPECT_EQ(declare_partition_options_03->partition_type, "key_1"); @@ -61,14 +61,14 @@ TEST(ParserPartitionOptions, RangePatitionOptions) String range_partition = "PARTITION BY RANGE(col_01)"; ParserDeclarePartitionOptions p_partition_options; - ASTPtr ast_01 = parseQuery(p_partition_options, range_partition.data(), range_partition.data() + range_partition.size(), "", 0, 0); + ASTPtr ast_01 = parseQuery(p_partition_options, range_partition.data(), range_partition.data() + range_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "range"); EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String range_columns_partition = "PARTITION BY RANGE COLUMNS(col_01, col_02)"; - ASTPtr ast_02 = parseQuery(p_partition_options, range_columns_partition.data(), range_columns_partition.data() + range_columns_partition.size(), "", 0, 0); + ASTPtr ast_02 = parseQuery(p_partition_options, range_columns_partition.data(), range_columns_partition.data() + range_columns_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "range"); @@ -82,14 +82,14 @@ TEST(ParserPartitionOptions, ListPatitionOptions) String range_partition = "PARTITION BY LIST(col_01)"; ParserDeclarePartitionOptions p_partition_options; - ASTPtr ast_01 = parseQuery(p_partition_options, range_partition.data(), range_partition.data() + range_partition.size(), "", 0, 0); + ASTPtr ast_01 = parseQuery(p_partition_options, range_partition.data(), range_partition.data() + range_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_01 = ast_01->as(); EXPECT_EQ(declare_partition_options_01->partition_type, "list"); EXPECT_EQ(declare_partition_options_01->partition_expression->as()->name(), "col_01"); String range_columns_partition = "PARTITION BY LIST COLUMNS(col_01, col_02)"; - ASTPtr ast_02 = parseQuery(p_partition_options, range_columns_partition.data(), range_columns_partition.data() + range_columns_partition.size(), "", 0, 0); + ASTPtr ast_02 = parseQuery(p_partition_options, range_columns_partition.data(), range_columns_partition.data() + range_columns_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options_02 = ast_02->as(); EXPECT_EQ(declare_partition_options_02->partition_type, "list"); @@ -103,7 +103,7 @@ TEST(ParserPartitionOptions, PatitionNumberOptions) String numbers_partition = "PARTITION BY KEY(col_01) PARTITIONS 2"; ParserDeclarePartitionOptions p_partition_options; - ASTPtr ast = parseQuery(p_partition_options, numbers_partition.data(), numbers_partition.data() + numbers_partition.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_partition_options, numbers_partition.data(), numbers_partition.data() + numbers_partition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); @@ -116,7 +116,7 @@ TEST(ParserPartitionOptions, PatitionWithSubpartitionOptions) String partition_with_subpartition = "PARTITION BY KEY(col_01) PARTITIONS 3 SUBPARTITION BY HASH(col_02) SUBPARTITIONS 4"; ParserDeclarePartitionOptions p_partition_options; - ASTPtr ast = parseQuery(p_partition_options, partition_with_subpartition.data(), partition_with_subpartition.data() + partition_with_subpartition.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_partition_options, partition_with_subpartition.data(), partition_with_subpartition.data() + partition_with_subpartition.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); @@ -134,7 +134,7 @@ TEST(ParserPartitionOptions, PatitionOptionsWithDeclarePartition) ParserDeclarePartitionOptions p_partition_options; ASTPtr ast = parseQuery(p_partition_options, partition_options_with_declare.data(), - partition_options_with_declare.data() + partition_options_with_declare.size(), "", 0, 0); + partition_options_with_declare.data() + partition_options_with_declare.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); @@ -153,7 +153,7 @@ TEST(ParserPartitionOptions, PatitionOptionsWithDeclarePartitions) ParserDeclarePartitionOptions p_partition_options; ASTPtr ast = parseQuery(p_partition_options, partition_options_with_declare.data(), - partition_options_with_declare.data() + partition_options_with_declare.size(), "", 0, 0); + partition_options_with_declare.data() + partition_options_with_declare.size(), "", 0, 0, 0); ASTDeclarePartitionOptions * declare_partition_options = ast->as(); EXPECT_EQ(declare_partition_options->partition_type, "key"); diff --git a/src/Parsers/MySQL/tests/gtest_partition_parser.cpp b/src/Parsers/MySQL/tests/gtest_partition_parser.cpp index 458c7acd553..07c7c03dbb7 100644 --- a/src/Parsers/MySQL/tests/gtest_partition_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_partition_parser.cpp @@ -17,7 +17,7 @@ TEST(ParserPartition, AllPatitionOptions) " TABLESPACE table_space_name"; ParserDeclarePartition p_partition; - ASTPtr ast = parseQuery(p_partition, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_partition, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition = ast->as(); EXPECT_EQ(declare_partition->partition_name, "partition_name"); @@ -35,7 +35,7 @@ TEST(ParserPartition, OptionalPatitionOptions) { String input = "PARTITION partition_name STORAGE engine = engine_name max_rows 1000 min_rows 0 tablespace table_space_name"; ParserDeclarePartition p_partition; - ASTPtr ast = parseQuery(p_partition, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_partition, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition = ast->as(); EXPECT_EQ(declare_partition->partition_name, "partition_name"); @@ -50,7 +50,7 @@ TEST(ParserPartition, PatitionOptionsWithLessThan) { ParserDeclarePartition p_partition; String partition_01 = "PARTITION partition_01 VALUES LESS THAN (1991) STORAGE engine = engine_name"; - ASTPtr ast_partition_01 = parseQuery(p_partition, partition_01.data(), partition_01.data() + partition_01.size(), "", 0, 0); + ASTPtr ast_partition_01 = parseQuery(p_partition, partition_01.data(), partition_01.data() + partition_01.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_01 = ast_partition_01->as(); EXPECT_EQ(declare_partition_01->partition_name, "partition_01"); @@ -59,7 +59,7 @@ TEST(ParserPartition, PatitionOptionsWithLessThan) EXPECT_EQ(declare_options_01->changes["engine"]->as()->name(), "engine_name"); String partition_02 = "PARTITION partition_02 VALUES LESS THAN MAXVALUE STORAGE engine = engine_name"; - ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0); + ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_02 = ast_partition_02->as(); EXPECT_EQ(declare_partition_02->partition_name, "partition_02"); @@ -68,7 +68,7 @@ TEST(ParserPartition, PatitionOptionsWithLessThan) EXPECT_EQ(declare_options_02->changes["engine"]->as()->name(), "engine_name"); String partition_03 = "PARTITION partition_03 VALUES LESS THAN (50, MAXVALUE) STORAGE engine = engine_name"; - ASTPtr ast_partition_03 = parseQuery(p_partition, partition_03.data(), partition_03.data() + partition_03.size(), "", 0, 0); + ASTPtr ast_partition_03 = parseQuery(p_partition, partition_03.data(), partition_03.data() + partition_03.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_03 = ast_partition_03->as(); EXPECT_EQ(declare_partition_03->partition_name, "partition_03"); @@ -79,7 +79,7 @@ TEST(ParserPartition, PatitionOptionsWithLessThan) EXPECT_EQ(declare_options_03->changes["engine"]->as()->name(), "engine_name"); String partition_04 = "PARTITION partition_04 VALUES LESS THAN (MAXVALUE, MAXVALUE) STORAGE engine = engine_name"; - ASTPtr ast_partition_04 = parseQuery(p_partition, partition_04.data(), partition_04.data() + partition_04.size(), "", 0, 0); + ASTPtr ast_partition_04 = parseQuery(p_partition, partition_04.data(), partition_04.data() + partition_04.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_04 = ast_partition_04->as(); EXPECT_EQ(declare_partition_04->partition_name, "partition_04"); @@ -94,7 +94,7 @@ TEST(ParserPartition, PatitionOptionsWithInExpression) { ParserDeclarePartition p_partition; String partition_01 = "PARTITION partition_01 VALUES IN (NULL, 1991, MAXVALUE) STORAGE engine = engine_name"; - ASTPtr ast_partition_01 = parseQuery(p_partition, partition_01.data(), partition_01.data() + partition_01.size(), "", 0, 0); + ASTPtr ast_partition_01 = parseQuery(p_partition, partition_01.data(), partition_01.data() + partition_01.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_01 = ast_partition_01->as(); EXPECT_EQ(declare_partition_01->partition_name, "partition_01"); @@ -106,7 +106,7 @@ TEST(ParserPartition, PatitionOptionsWithInExpression) EXPECT_EQ(declare_options_01->changes["engine"]->as()->name(), "engine_name"); String partition_02 = "PARTITION partition_02 VALUES IN ((NULL, 1991), (1991, NULL), (MAXVALUE, MAXVALUE)) STORAGE engine = engine_name"; - ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0); + ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_02 = ast_partition_02->as(); EXPECT_EQ(declare_partition_02->partition_name, "partition_02"); @@ -132,18 +132,17 @@ TEST(ParserPartition, PatitionOptionsWithSubpartitions) { ParserDeclarePartition p_partition; String partition_01 = "PARTITION partition_01 VALUES IN (NULL, 1991, MAXVALUE) STORAGE engine = engine_name (SUBPARTITION s_p01)"; - ASTPtr ast_partition_01 = parseQuery(p_partition, partition_01.data(), partition_01.data() + partition_01.size(), "", 0, 0); + ASTPtr ast_partition_01 = parseQuery(p_partition, partition_01.data(), partition_01.data() + partition_01.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_01 = ast_partition_01->as(); EXPECT_EQ(declare_partition_01->partition_name, "partition_01"); EXPECT_TRUE(declare_partition_01->subpartitions->as()->children[0]->as()); String partition_02 = "PARTITION partition_02 VALUES IN (NULL, 1991, MAXVALUE) STORAGE engine = engine_name (SUBPARTITION s_p01, SUBPARTITION s_p02)"; - ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0); + ASTPtr ast_partition_02 = parseQuery(p_partition, partition_02.data(), partition_02.data() + partition_02.size(), "", 0, 0, 0); ASTDeclarePartition * declare_partition_02 = ast_partition_02->as(); EXPECT_EQ(declare_partition_02->partition_name, "partition_02"); EXPECT_TRUE(declare_partition_02->subpartitions->as()->children[0]->as()); EXPECT_TRUE(declare_partition_02->subpartitions->as()->children[1]->as()); } - diff --git a/src/Parsers/MySQL/tests/gtest_reference_parser.cpp b/src/Parsers/MySQL/tests/gtest_reference_parser.cpp index 7447f16fc7c..d5b3c9b596d 100644 --- a/src/Parsers/MySQL/tests/gtest_reference_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_reference_parser.cpp @@ -12,12 +12,12 @@ TEST(ParserReference, SimpleReference) ParserDeclareReference p_reference; String reference_01 = "REFERENCES table_name (ref_col_01)"; - ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0); + ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_01->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name(), "ref_col_01"); String reference_02 = "REFERENCES table_name (ref_col_01, ref_col_02)"; - ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0); + ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_02->as()->reference_table_name, "table_name"); ASTPtr arguments = ast_reference_02->as()->reference_expression->as()->arguments; EXPECT_EQ(arguments->children[0]->as()->name(), "ref_col_01"); @@ -28,19 +28,19 @@ TEST(ParserReference, ReferenceDifferenceKind) { ParserDeclareReference p_reference; String reference_01 = "REFERENCES table_name (ref_col_01) MATCH FULL"; - ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0); + ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_01->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_01->as()->kind, ASTDeclareReference::MATCH_FULL); String reference_02 = "REFERENCES table_name (ref_col_01) MATCH PARTIAL"; - ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0); + ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_02->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_02->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_02->as()->kind, ASTDeclareReference::MATCH_PARTIAL); String reference_03 = "REFERENCES table_name (ref_col_01) MATCH SIMPLE"; - ASTPtr ast_reference_03 = parseQuery(p_reference, reference_03.data(), reference_03.data() + reference_03.size(), "", 0, 0); + ASTPtr ast_reference_03 = parseQuery(p_reference, reference_03.data(), reference_03.data() + reference_03.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_03->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_03->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_03->as()->kind, ASTDeclareReference::MATCH_SIMPLE); @@ -50,7 +50,7 @@ TEST(ParserReference, ReferenceDifferenceOption) { ParserDeclareReference p_reference; String reference_01 = "REFERENCES table_name (ref_col_01) MATCH FULL ON DELETE RESTRICT ON UPDATE RESTRICT"; - ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0); + ASTPtr ast_reference_01 = parseQuery(p_reference, reference_01.data(), reference_01.data() + reference_01.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_01->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_01->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_01->as()->kind, ASTDeclareReference::MATCH_FULL); @@ -58,7 +58,7 @@ TEST(ParserReference, ReferenceDifferenceOption) EXPECT_EQ(ast_reference_01->as()->on_update_option, ASTDeclareReference::RESTRICT); String reference_02 = "REFERENCES table_name (ref_col_01) MATCH FULL ON DELETE CASCADE ON UPDATE CASCADE"; - ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0); + ASTPtr ast_reference_02 = parseQuery(p_reference, reference_02.data(), reference_02.data() + reference_02.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_02->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_02->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_02->as()->kind, ASTDeclareReference::MATCH_FULL); @@ -66,7 +66,7 @@ TEST(ParserReference, ReferenceDifferenceOption) EXPECT_EQ(ast_reference_02->as()->on_update_option, ASTDeclareReference::CASCADE); String reference_03 = "REFERENCES table_name (ref_col_01) MATCH FULL ON DELETE SET NULL ON UPDATE SET NULL"; - ASTPtr ast_reference_03 = parseQuery(p_reference, reference_03.data(), reference_03.data() + reference_03.size(), "", 0, 0); + ASTPtr ast_reference_03 = parseQuery(p_reference, reference_03.data(), reference_03.data() + reference_03.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_03->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_03->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_03->as()->kind, ASTDeclareReference::MATCH_FULL); @@ -74,7 +74,7 @@ TEST(ParserReference, ReferenceDifferenceOption) EXPECT_EQ(ast_reference_03->as()->on_update_option, ASTDeclareReference::SET_NULL); String reference_04 = "REFERENCES table_name (ref_col_01) MATCH FULL ON UPDATE NO ACTION ON DELETE NO ACTION"; - ASTPtr ast_reference_04 = parseQuery(p_reference, reference_04.data(), reference_04.data() + reference_04.size(), "", 0, 0); + ASTPtr ast_reference_04 = parseQuery(p_reference, reference_04.data(), reference_04.data() + reference_04.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_04->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_04->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_04->as()->kind, ASTDeclareReference::MATCH_FULL); @@ -82,11 +82,10 @@ TEST(ParserReference, ReferenceDifferenceOption) EXPECT_EQ(ast_reference_04->as()->on_update_option, ASTDeclareReference::NO_ACTION); String reference_05 = "REFERENCES table_name (ref_col_01) MATCH FULL ON UPDATE SET DEFAULT ON DELETE SET DEFAULT"; - ASTPtr ast_reference_05 = parseQuery(p_reference, reference_05.data(), reference_05.data() + reference_05.size(), "", 0, 0); + ASTPtr ast_reference_05 = parseQuery(p_reference, reference_05.data(), reference_05.data() + reference_05.size(), "", 0, 0, 0); EXPECT_EQ(ast_reference_05->as()->reference_table_name, "table_name"); EXPECT_EQ(ast_reference_05->as()->reference_expression->as()->name(), "ref_col_01"); EXPECT_EQ(ast_reference_05->as()->kind, ASTDeclareReference::MATCH_FULL); EXPECT_EQ(ast_reference_05->as()->on_delete_option, ASTDeclareReference::SET_DEFAULT); EXPECT_EQ(ast_reference_05->as()->on_update_option, ASTDeclareReference::SET_DEFAULT); } - diff --git a/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp b/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp index b375f73c55c..1876cd1d028 100644 --- a/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_subpartition_parser.cpp @@ -14,7 +14,7 @@ TEST(ParserSubpartition, AllSubpatitionOptions) " DATA DIRECTORY 'data_directory' INDEX DIRECTORY 'index_directory' max_rows 1000 MIN_ROWs 0" " TABLESPACE table_space_name"; MySQLParser::ParserDeclareSubPartition p_subpartition; - ASTPtr ast = parseQuery(p_subpartition, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_subpartition, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclareSubPartition * declare_subpartition = ast->as(); EXPECT_EQ(declare_subpartition->logical_name, "subpartition_name"); @@ -32,7 +32,7 @@ TEST(ParserSubpartition, OptionalSubpatitionOptions) { String input = "SUBPARTITION subpartition_name STORAGE engine = engine_name max_rows 1000 min_rows 0 tablespace table_space_name"; MySQLParser::ParserDeclareSubPartition p_subpartition; - ASTPtr ast = parseQuery(p_subpartition, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_subpartition, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclareSubPartition * declare_subpartition = ast->as(); EXPECT_EQ(declare_subpartition->logical_name, "subpartition_name"); @@ -42,4 +42,3 @@ TEST(ParserSubpartition, OptionalSubpatitionOptions) EXPECT_EQ(declare_options->changes["max_rows"]->as()->value.safeGet(), 1000); EXPECT_EQ(declare_options->changes["tablespace"]->as()->name(), "table_space_name"); } - diff --git a/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp b/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp index 42b9279c96d..a84da7cb9d5 100644 --- a/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp +++ b/src/Parsers/MySQL/tests/gtest_table_options_parser.cpp @@ -18,7 +18,7 @@ TEST(ParserTableOptions, AllSubpatitionOptions) " STATS_PERSISTENT DEFAULT STATS_SAMPLE_PAGES 3 TABLESPACE tablespace_name STORAGE MEMORY UNION (table_01, table_02)"; ParserDeclareTableOptions p_table_options; - ASTPtr ast = parseQuery(p_table_options, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_table_options, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclareOptions * declare_options = ast->as(); EXPECT_EQ(declare_options->changes["auto_increment"]->as()->value.safeGet(), 1); @@ -56,7 +56,7 @@ TEST(ParserTableOptions, OptionalTableOptions) { String input = "STATS_AUTO_RECALC DEFAULT AUTO_INCREMENt = 1 "; ParserDeclareTableOptions p_table_options; - ASTPtr ast = parseQuery(p_table_options, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(p_table_options, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTDeclareOptions * declare_options = ast->as(); EXPECT_EQ(declare_options->changes["auto_increment"]->as()->value.safeGet(), 1); diff --git a/src/Parsers/PRQL/ParserPRQLQuery.cpp b/src/Parsers/PRQL/ParserPRQLQuery.cpp index b3733b727dc..fb1796714cb 100644 --- a/src/Parsers/PRQL/ParserPRQLQuery.cpp +++ b/src/Parsers/PRQL/ParserPRQLQuery.cpp @@ -69,7 +69,9 @@ bool ParserPRQLQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) "", false, max_query_size, - max_parser_depth); + max_parser_depth, + max_parser_backtracks, + true); if (!node) throw Exception( diff --git a/src/Parsers/PRQL/ParserPRQLQuery.h b/src/Parsers/PRQL/ParserPRQLQuery.h index 4fc450df6b6..88bf97f69d1 100644 --- a/src/Parsers/PRQL/ParserPRQLQuery.h +++ b/src/Parsers/PRQL/ParserPRQLQuery.h @@ -13,9 +13,10 @@ private: // These fields are not used when PRQL is disabled at build time. [[maybe_unused]] size_t max_query_size; [[maybe_unused]] size_t max_parser_depth; + [[maybe_unused]] size_t max_parser_backtracks; public: - ParserPRQLQuery(size_t max_query_size_, size_t max_parser_depth_) : max_query_size{max_query_size_}, max_parser_depth{max_parser_depth_} + ParserPRQLQuery(size_t max_query_size_, size_t max_parser_depth_, size_t max_parser_backtracks_) : max_query_size(max_query_size_), max_parser_depth(max_parser_depth_), max_parser_backtracks(max_parser_backtracks_) { } diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index b1cc7622e00..4bc95e67afb 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -865,7 +865,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected else if (s_modify_sql_security.ignore(pos, expected)) { /// This is a hack so we can reuse parser from create and don't have to write `MODIFY SQL SECURITY SQL SECURITY INVOKER` - pos -= 2; + --pos; + --pos; if (!sql_security_p.parse(pos, command_sql_security, expected)) return false; command->type = ASTAlterCommand::MODIFY_SQL_SECURITY; diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h index 440a8bc1dc7..30bce57f9d9 100644 --- a/src/Parsers/ParserCreateQuery.h +++ b/src/Parsers/ParserCreateQuery.h @@ -286,7 +286,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E { const String type_int("INT"); Tokens tokens(type_int.data(), type_int.data() + type_int.size()); - Pos tmp_pos(tokens, 0); + Pos tmp_pos(tokens, pos.max_depth, pos.max_backtracks); Expected tmp_expected; ParserDataType().parse(tmp_pos, type, tmp_expected); } diff --git a/src/Parsers/QueryParameterVisitor.cpp b/src/Parsers/QueryParameterVisitor.cpp index b8679cc3b96..9afd9a8615c 100644 --- a/src/Parsers/QueryParameterVisitor.cpp +++ b/src/Parsers/QueryParameterVisitor.cpp @@ -43,7 +43,7 @@ NameSet analyzeReceiveQueryParams(const std::string & query) const char * query_end = query.data() + query.size(); ParserQuery parser(query_end); - ASTPtr extract_query_ast = parseQuery(parser, query_begin, query_end, "analyzeReceiveQueryParams", 0, 0); + ASTPtr extract_query_ast = parseQuery(parser, query_begin, query_end, "analyzeReceiveQueryParams", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); QueryParameterVisitor(query_params).visit(extract_query_ast); NameSet query_param_names; diff --git a/src/Parsers/TokenIterator.h b/src/Parsers/TokenIterator.h index 8cb59aa12e2..192f2f55e6a 100644 --- a/src/Parsers/TokenIterator.h +++ b/src/Parsers/TokenIterator.h @@ -62,18 +62,6 @@ public: return *this; } - ALWAYS_INLINE TokenIterator & operator-=(int value) - { - index -= value; - return *this; - } - - ALWAYS_INLINE TokenIterator & operator+=(int value) - { - index += value; - return *this; - } - ALWAYS_INLINE bool operator<(const TokenIterator & rhs) const { return index < rhs.index; } ALWAYS_INLINE bool operator<=(const TokenIterator & rhs) const { return index <= rhs.index; } ALWAYS_INLINE bool operator==(const TokenIterator & rhs) const { return index == rhs.index; } diff --git a/src/Parsers/examples/create_parser.cpp b/src/Parsers/examples/create_parser.cpp index c241b353b4f..b628c79435c 100644 --- a/src/Parsers/examples/create_parser.cpp +++ b/src/Parsers/examples/create_parser.cpp @@ -13,7 +13,7 @@ int main(int, char **) std::string input = "CREATE TABLE hits (URL String, UserAgentMinor2 FixedString(2), EventTime DateTime) ENGINE = Log"; ParserCreateQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); WriteBufferFromOStream out(std::cerr, 4096); formatAST(*ast, out); diff --git a/src/Parsers/examples/select_parser.cpp b/src/Parsers/examples/select_parser.cpp index 15295170c6b..3ed358121f6 100644 --- a/src/Parsers/examples/select_parser.cpp +++ b/src/Parsers/examples/select_parser.cpp @@ -23,7 +23,7 @@ try " FORMAT TabSeparated"; ParserQueryWithOutput parser(input.data() + input.size()); - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); std::cout << "Success." << std::endl; WriteBufferFromOStream out(std::cerr, 4096); diff --git a/src/Parsers/fuzzers/select_parser_fuzzer.cpp b/src/Parsers/fuzzers/select_parser_fuzzer.cpp index ae490ed4e56..aed83853c33 100644 --- a/src/Parsers/fuzzers/select_parser_fuzzer.cpp +++ b/src/Parsers/fuzzers/select_parser_fuzzer.cpp @@ -15,7 +15,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size) DB::ParserQueryWithOutput parser(input.data() + input.size()); const UInt64 max_parser_depth = 1000; - DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, max_parser_depth); + const UInt64 max_parser_backtracks = 1000000; + DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, max_parser_depth, max_parser_backtracks); const UInt64 max_ast_depth = 1000; ast->checkDepth(max_ast_depth); diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 8f9977c0b8d..7aad0b010a5 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -234,12 +234,13 @@ ASTPtr tryParseQuery( bool allow_multi_statements, size_t max_query_size, size_t max_parser_depth, + size_t max_parser_backtracks, bool skip_insignificant) { const char * query_begin = _out_query_end; Tokens tokens(query_begin, all_queries_end, max_query_size, skip_insignificant); /// NOTE: consider use UInt32 for max_parser_depth setting. - IParser::Pos token_iterator(tokens, static_cast(max_parser_depth)); + IParser::Pos token_iterator(tokens, static_cast(max_parser_depth), static_cast(max_parser_backtracks)); if (token_iterator->isEnd() || token_iterator->type == TokenType::Semicolon) @@ -356,10 +357,13 @@ ASTPtr parseQueryAndMovePosition( const std::string & query_description, bool allow_multi_statements, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { std::string error_message; - ASTPtr res = tryParseQuery(parser, pos, end, error_message, false, query_description, allow_multi_statements, max_query_size, max_parser_depth); + ASTPtr res = tryParseQuery( + parser, pos, end, error_message, false, query_description, allow_multi_statements, + max_query_size, max_parser_depth, max_parser_backtracks, true); if (res) return res; @@ -374,9 +378,10 @@ ASTPtr parseQuery( const char * end, const std::string & query_description, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { - return parseQueryAndMovePosition(parser, begin, end, query_description, false, max_query_size, max_parser_depth); + return parseQueryAndMovePosition(parser, begin, end, query_description, false, max_query_size, max_parser_depth, max_parser_backtracks); } @@ -385,9 +390,10 @@ ASTPtr parseQuery( const std::string & query, const std::string & query_description, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { - return parseQuery(parser, query.data(), query.data() + query.size(), query_description, max_query_size, max_parser_depth); + return parseQuery(parser, query.data(), query.data() + query.size(), query_description, max_query_size, max_parser_depth, max_parser_backtracks); } @@ -395,9 +401,10 @@ ASTPtr parseQuery( IParser & parser, const std::string & query, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + size_t max_parser_backtracks) { - return parseQuery(parser, query.data(), query.data() + query.size(), parser.getName(), max_query_size, max_parser_depth); + return parseQuery(parser, query.data(), query.data() + query.size(), parser.getName(), max_query_size, max_parser_depth, max_parser_backtracks); } @@ -406,6 +413,7 @@ std::pair splitMultipartQuery( std::vector & queries_list, size_t max_query_size, size_t max_parser_depth, + size_t max_parser_backtracks, bool allow_settings_after_format_in_insert) { ASTPtr ast; @@ -422,7 +430,7 @@ std::pair splitMultipartQuery( { begin = pos; - ast = parseQueryAndMovePosition(parser, pos, end, "", true, max_query_size, max_parser_depth); + ast = parseQueryAndMovePosition(parser, pos, end, "", true, max_query_size, max_parser_depth, max_parser_backtracks); auto * insert = ast->as(); diff --git a/src/Parsers/parseQuery.h b/src/Parsers/parseQuery.h index a087f145d2c..93c1a465267 100644 --- a/src/Parsers/parseQuery.h +++ b/src/Parsers/parseQuery.h @@ -19,7 +19,8 @@ ASTPtr tryParseQuery( size_t max_query_size, /// If (end - pos) > max_query_size and query is longer than max_query_size then throws "Max query size exceeded". /// Disabled if zero. Is used in order to check query size if buffer can contains data for INSERT query. size_t max_parser_depth, - bool skip_insignificant = true); /// If true, lexer will skip all insignificant tokens (e.g. whitespaces) + size_t max_parser_backtracks, + bool skip_insignificant); /// If true, lexer will skip all insignificant tokens (e.g. whitespaces) /// Parse query or throw an exception with error message. @@ -30,7 +31,8 @@ ASTPtr parseQueryAndMovePosition( const std::string & description, bool allow_multi_statements, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); ASTPtr parseQuery( IParser & parser, @@ -38,20 +40,23 @@ ASTPtr parseQuery( const char * end, const std::string & description, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); ASTPtr parseQuery( IParser & parser, const std::string & query, const std::string & query_description, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); ASTPtr parseQuery( IParser & parser, const std::string & query, size_t max_query_size, - size_t max_parser_depth); + size_t max_parser_depth, + size_t max_parser_backtracks); /** Split queries separated by ; on to list of single queries @@ -63,6 +68,7 @@ std::pair splitMultipartQuery( std::vector & queries_list, size_t max_query_size, size_t max_parser_depth, + size_t max_parser_backtracks, bool allow_settings_after_format_in_insert); } diff --git a/src/Parsers/tests/gtest_Parser.cpp b/src/Parsers/tests/gtest_Parser.cpp index 19947cd38cc..f0abc68f966 100644 --- a/src/Parsers/tests/gtest_Parser.cpp +++ b/src/Parsers/tests/gtest_Parser.cpp @@ -1,7 +1,4 @@ -#include -#include #include -#include #include #include #include @@ -10,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -54,12 +50,12 @@ TEST_P(ParserTest, parseQuery) { if (std::string(expected_ast).starts_with("throws")) { - EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + EXPECT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0, 0), DB::Exception); } else { ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0, 0)); if (std::string("CREATE USER or ALTER USER query") != parser->getName() && std::string("ATTACH access entity query") != parser->getName()) { @@ -106,7 +102,7 @@ TEST_P(ParserTest, parseQuery) } else { - ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0, 0), DB::Exception); } } @@ -649,12 +645,13 @@ INSTANTIATE_TEST_SUITE_P(ParserKQLQuery, ParserKQLTest, static constexpr size_t kDummyMaxQuerySize = 256 * 1024; static constexpr size_t kDummyMaxParserDepth = 256; +static constexpr size_t kDummyMaxParserBacktracks = 1000000; INSTANTIATE_TEST_SUITE_P( ParserPRQL, ParserTest, ::testing::Combine( - ::testing::Values(std::make_shared(kDummyMaxQuerySize, kDummyMaxParserDepth)), + ::testing::Values(std::make_shared(kDummyMaxQuerySize, kDummyMaxParserDepth, kDummyMaxParserBacktracks)), ::testing::ValuesIn(std::initializer_list{ { "from albums\ngroup {author_id} (\n aggregate {first_published = min published}\n)\njoin a=author side:left (==author_id)\njoin p=purchases side:right (==author_id)\ngroup {a.id, p.purchase_id} (\n aggregate {avg_sell = min first_published}\n)", diff --git a/src/Parsers/tests/gtest_common.cpp b/src/Parsers/tests/gtest_common.cpp index 52d3ceb47e2..8ff9400d8a2 100644 --- a/src/Parsers/tests/gtest_common.cpp +++ b/src/Parsers/tests/gtest_common.cpp @@ -28,7 +28,7 @@ TEST_P(ParserRegexTest, parseQuery) ASSERT_TRUE(expected_ast); DB::ASTPtr ast; - ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0, 0)); DB::WriteBufferFromOwnString buf; formatAST(*ast->clone(), buf, false, false); EXPECT_THAT(buf.str(), ::testing::MatchesRegex(expected_ast)); @@ -45,12 +45,12 @@ TEST_P(ParserKQLTest, parseKQLQuery) { if (std::string(expected_ast).starts_with("throws")) { - EXPECT_THROW(parseKQLQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + EXPECT_THROW(parseKQLQuery(*parser, input_text.begin(), input_text.end(), 0, 0, 0), DB::Exception); } else { DB::ASTPtr ast; - ASSERT_NO_THROW(ast = parseKQLQuery(*parser, input_text.begin(), input_text.end(), 0, 0)); + ASSERT_NO_THROW(ast = parseKQLQuery(*parser, input_text.begin(), input_text.end(), 0, 0, 0)); if (std::string("CREATE USER or ALTER USER query") != parser->getName() && std::string("ATTACH access entity query") != parser->getName()) { @@ -78,6 +78,6 @@ TEST_P(ParserKQLTest, parseKQLQuery) } else { - ASSERT_THROW(parseKQLQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception); + ASSERT_THROW(parseKQLQuery(*parser, input_text.begin(), input_text.end(), 0, 0, 0), DB::Exception); } } diff --git a/src/Parsers/tests/gtest_dictionary_parser.cpp b/src/Parsers/tests/gtest_dictionary_parser.cpp index c0a975f7a38..a1ba46125a7 100644 --- a/src/Parsers/tests/gtest_dictionary_parser.cpp +++ b/src/Parsers/tests/gtest_dictionary_parser.cpp @@ -40,7 +40,7 @@ TEST(ParserDictionaryDDL, SimpleDictionary) " RANGE(MIN second_column MAX third_column)"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); EXPECT_EQ(create->getTable(), "dict1"); EXPECT_EQ(create->getDatabase(), "test"); @@ -136,7 +136,7 @@ TEST(ParserDictionaryDDL, AttributesWithMultipleProperties) " SOURCE(CLICKHOUSE(HOST 'localhost'))"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); EXPECT_EQ(create->getTable(), "dict2"); EXPECT_EQ(create->getDatabase(), ""); @@ -183,7 +183,7 @@ TEST(ParserDictionaryDDL, CustomAttributePropertiesOrder) " LIFETIME(300)"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); /// test attributes @@ -238,7 +238,7 @@ TEST(ParserDictionaryDDL, NestedSource) " RANGE(MIN second_column MAX third_column)"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); EXPECT_EQ(create->getTable(), "dict4"); EXPECT_EQ(create->getDatabase(), ""); @@ -286,7 +286,7 @@ TEST(ParserDictionaryDDL, Formatting) " RANGE(MIN second_column MAX third_column)"; ParserCreateDictionaryQuery parser; - ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0); + ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 0, 0); ASTCreateQuery * create = ast->as(); auto str = serializeAST(*create); EXPECT_EQ(str, "CREATE DICTIONARY test.dict5 (`key_column1` UInt64 DEFAULT 1 HIERARCHICAL INJECTIVE, `key_column2` String DEFAULT '', `second_column` UInt8 EXPRESSION intDiv(50, rand() % 1000), `third_column` UInt8) PRIMARY KEY key_column1, key_column2 SOURCE(MYSQL(HOST 'localhost' PORT 9000 USER 'default' REPLICA (HOST '127.0.0.1' PRIORITY 1) PASSWORD '')) LIFETIME(MIN 1 MAX 10) LAYOUT(CACHE(SIZE_IN_CELLS 50)) RANGE(MIN second_column MAX third_column)"); @@ -297,7 +297,7 @@ TEST(ParserDictionaryDDL, ParseDropQuery) String input1 = "DROP DICTIONARY test.dict1"; ParserDropQuery parser; - ASTPtr ast1 = parseQuery(parser, input1.data(), input1.data() + input1.size(), "", 0, 0); + ASTPtr ast1 = parseQuery(parser, input1.data(), input1.data() + input1.size(), "", 0, 0, 0); ASTDropQuery * drop1 = ast1->as(); EXPECT_TRUE(drop1->is_dictionary); @@ -308,7 +308,7 @@ TEST(ParserDictionaryDDL, ParseDropQuery) String input2 = "DROP DICTIONARY IF EXISTS dict2"; - ASTPtr ast2 = parseQuery(parser, input2.data(), input2.data() + input2.size(), "", 0, 0); + ASTPtr ast2 = parseQuery(parser, input2.data(), input2.data() + input2.size(), "", 0, 0, 0); ASTDropQuery * drop2 = ast2->as(); EXPECT_TRUE(drop2->is_dictionary); @@ -323,7 +323,7 @@ TEST(ParserDictionaryDDL, ParsePropertiesQueries) String input1 = "SHOW CREATE DICTIONARY test.dict1"; ParserTablePropertiesQuery parser; - ASTPtr ast1 = parseQuery(parser, input1.data(), input1.data() + input1.size(), "", 0, 0); + ASTPtr ast1 = parseQuery(parser, input1.data(), input1.data() + input1.size(), "", 0, 0, 0); ASTShowCreateDictionaryQuery * show1 = ast1->as(); EXPECT_EQ(show1->getTable(), "dict1"); @@ -332,7 +332,7 @@ TEST(ParserDictionaryDDL, ParsePropertiesQueries) String input2 = "EXISTS DICTIONARY dict2"; - ASTPtr ast2 = parseQuery(parser, input2.data(), input2.data() + input2.size(), "", 0, 0); + ASTPtr ast2 = parseQuery(parser, input2.data(), input2.data() + input2.size(), "", 0, 0, 0); ASTExistsDictionaryQuery * show2 = ast2->as(); EXPECT_EQ(show2->getTable(), "dict2"); diff --git a/src/Parsers/tests/gtest_format_hiliting.cpp b/src/Parsers/tests/gtest_format_hiliting.cpp index e87b093db9d..00e8197af1f 100644 --- a/src/Parsers/tests/gtest_format_hiliting.cpp +++ b/src/Parsers/tests/gtest_format_hiliting.cpp @@ -50,7 +50,7 @@ void compare(const String & expected, const String & query) { using namespace DB; ParserQuery parser(query.data() + query.size()); - ASTPtr ast = parseQuery(parser, query, 0, 0); + ASTPtr ast = parseQuery(parser, query, 0, 0, 0); WriteBufferFromOwnString write_buffer; IAST::FormatSettings settings(write_buffer, true, true); diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 7b3fb0c5c91..8ca8f0f258b 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -538,7 +538,7 @@ FilterDAGInfo buildAdditionalFiltersIfNeeded(const StoragePtr & storage, ParserExpression parser; additional_filter_ast = parseQuery( parser, filter.data(), filter.data() + filter.size(), - "additional filter", settings.max_query_size, settings.max_parser_depth); + "additional filter", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); break; } } diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index bd0b831ee58..a04f9f502e2 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -523,7 +523,7 @@ ASTPtr parseAdditionalResultFilter(const Settings & settings) ParserExpression parser; auto additional_result_filter_ast = parseQuery( parser, additional_result_filter.data(), additional_result_filter.data() + additional_result_filter.size(), - "additional result filter", settings.max_query_size, settings.max_parser_depth); + "additional result filter", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); return additional_result_filter_ast; } diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index f91f7cf536b..9d056b42101 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -537,7 +537,7 @@ bool ConstantExpressionTemplate::parseLiteralAndAssertType( ParserArrayOfLiterals parser_array; ParserTupleOfLiterals parser_tuple; - IParser::Pos iterator(token_iterator, static_cast(settings.max_parser_depth)); + IParser::Pos iterator(token_iterator, static_cast(settings.max_parser_depth), static_cast(settings.max_parser_backtracks)); while (iterator->begin < istr.position()) ++iterator; Expected expected; diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp index 9c7f095e661..67bdd1cf877 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp @@ -274,7 +274,8 @@ static bool tryToExtractStructureFromCreateQuery(ReadBuffer & in, NamesAndTypesL String error; const char * start = create_query_str.data(); const char * end = create_query_str.data() + create_query_str.size(); - ASTPtr query = tryParseQuery(parser, start, end, error, false, "MySQL create query", false, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr query = tryParseQuery(parser, start, end, error, false, "MySQL create query", false, + DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true); if (!query) return false; diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 8659dcd2318..353de76eea8 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -194,7 +194,7 @@ void ValuesBlockInputFormat::readUntilTheEndOfRowAndReTokenize(size_t current_co auto * row_end = buf->position(); buf->rollbackToCheckpoint(); tokens.emplace(buf->position(), row_end); - token_iterator.emplace(*tokens, static_cast(context->getSettingsRef().max_parser_depth)); + token_iterator.emplace(*tokens, static_cast(context->getSettingsRef().max_parser_depth), static_cast(context->getSettingsRef().max_parser_backtracks)); auto const & first = (*token_iterator).get(); if (first.isError() || first.isEnd()) { @@ -418,7 +418,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx { Expected expected; /// Keep a copy to the start of the column tokens to use if later if necessary - ti_start = IParser::Pos(*token_iterator, static_cast(settings.max_parser_depth)); + ti_start = IParser::Pos(*token_iterator, static_cast(settings.max_parser_depth), static_cast(settings.max_parser_backtracks)); parsed = parser.parse(*token_iterator, ast, expected); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 21e3cfcceab..fb92be6eed9 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1343,7 +1343,7 @@ static void buildIndexes( { const auto & indices = settings.ignore_data_skipping_indices.toString(); Tokens tokens(indices.data(), indices.data() + indices.size(), settings.max_query_size); - IParser::Pos pos(tokens, static_cast(settings.max_parser_depth)); + IParser::Pos pos(tokens, static_cast(settings.max_parser_depth), static_cast(settings.max_parser_backtracks)); Expected expected; /// Use an unordered list rather than string vector diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 15765f99b4b..f21991e8d58 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -885,7 +885,7 @@ namespace const char * begin = query_text.data(); const char * end = begin + query_text.size(); ParserQuery parser(end, settings.allow_settings_after_format_in_insert); - ast = parseQuery(parser, begin, end, "", settings.max_query_size, settings.max_parser_depth); + ast = parseQuery(parser, begin, end, "", settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); /// Choose input format. insert_query = ast->as(); diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index c62dc8109ea..83e06628185 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -284,6 +284,7 @@ void PostgreSQLHandler::processQuery() auto parse_res = splitMultipartQuery(query->query, queries, settings.max_query_size, settings.max_parser_depth, + settings.max_parser_backtracks, settings.allow_settings_after_format_in_insert); if (!parse_res.second) throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot parse and execute the following part of query: {}", String(parse_res.first)); diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index e08dac3a332..16b89f24243 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -145,7 +145,7 @@ void ColumnDescription::readText(ReadBuffer & buf) readEscapedStringUntilEOL(modifiers, buf); ParserColumnDeclaration column_parser(/* require type */ true); - ASTPtr ast = parseQuery(column_parser, "x T " + modifiers, "column parser", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr ast = parseQuery(column_parser, "x T " + modifiers, "column parser", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); if (auto * col_ast = ast->as()) { @@ -211,7 +211,7 @@ void ColumnsDescription::setAliases(NamesAndAliases aliases) const char * alias_expression_pos = alias.expression.data(); const char * alias_expression_end = alias_expression_pos + alias.expression.size(); ParserExpression expression_parser; - description.default_desc.expression = parseQuery(expression_parser, alias_expression_pos, alias_expression_end, "expression", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + description.default_desc.expression = parseQuery(expression_parser, alias_expression_pos, alias_expression_end, "expression", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); add(std::move(description)); } diff --git a/src/Storages/ConstraintsDescription.cpp b/src/Storages/ConstraintsDescription.cpp index 219c3fd0c97..d492de2c2b2 100644 --- a/src/Storages/ConstraintsDescription.cpp +++ b/src/Storages/ConstraintsDescription.cpp @@ -45,7 +45,7 @@ ConstraintsDescription ConstraintsDescription::parse(const String & str) ConstraintsDescription res; ParserConstraintDeclarationList parser; - ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); for (const auto & constraint : list->children) res.constraints.push_back(constraint); diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index c723fa4225c..14555dca63b 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -173,7 +173,7 @@ IndicesDescription IndicesDescription::parse(const String & str, const ColumnsDe return result; ParserIndexDeclarationList parser; - ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); for (const auto & index : list->children) result.emplace_back(IndexDescription::getIndexFromAST(index, columns, context)); diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index c407cef627d..d63b40e2b11 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -171,7 +171,7 @@ KeyDescription KeyDescription::parse(const String & str, const ColumnsDescriptio return result; ParserExpression parser; - ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); FunctionNameNormalizer().visit(ast.get()); return getKeyFromAST(ast, columns, context); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 984d06e6a61..023202019e4 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -957,7 +957,7 @@ void IMergeTreeDataPart::loadDefaultCompressionCodec() try { ParserCodec codec_parser; - auto codec_ast = parseQuery(codec_parser, codec_line.data() + buf.getPosition(), codec_line.data() + codec_line.length(), "codec parser", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto codec_ast = parseQuery(codec_parser, codec_line.data() + buf.getPosition(), codec_line.data() + codec_line.length(), "codec parser", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); default_codec = CompressionCodecFactory::instance().get(codec_ast, {}); } catch (const DB::Exception & ex) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 1721fd15b8d..fe45d0bee54 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -12,7 +12,7 @@ namespace ErrorCodes static CompressionCodecPtr getMarksCompressionCodec(const String & marks_compression_codec) { ParserCodec codec_parser; - auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); return CompressionCodecFactory::instance().get(ast, nullptr); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index fd83d2ebfe9..a31da5bc4fe 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -243,7 +243,7 @@ void MergeTreeDataPartWriterOnDisk::initPrimaryIndex() if (compress_primary_key) { ParserCodec codec_parser; - auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.primary_key_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.primary_key_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); CompressionCodecPtr primary_key_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr); index_compressor_stream = std::make_unique(*index_file_hashing_stream, primary_key_compression_codec, settings.primary_key_compress_block_size); index_source_hashing_stream = std::make_unique(*index_compressor_stream); @@ -268,7 +268,7 @@ void MergeTreeDataPartWriterOnDisk::initStatistics() void MergeTreeDataPartWriterOnDisk::initSkipIndices() { ParserCodec codec_parser; - auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr); for (const auto & skip_index : skip_indices) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index d79590ded21..6a3b08d4d65 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -135,7 +135,7 @@ void MergeTreeDataPartWriterWide::addStreams( compression_codec = CompressionCodecFactory::instance().get(effective_codec_desc, nullptr, default_codec, true); ParserCodec codec_parser; - auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr); const auto column_desc = metadata_snapshot->columns.tryGetColumnDescription(GetColumnsOptions(GetColumnsOptions::AllPhysical), column.getNameInStorage()); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index ef679b61a79..6471f510291 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -606,7 +606,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd Strings forced_indices; { Tokens tokens(indices.data(), indices.data() + indices.size(), settings.max_query_size); - IParser::Pos pos(tokens, static_cast(settings.max_parser_depth)); + IParser::Pos pos(tokens, static_cast(settings.max_parser_depth), static_cast(settings.max_parser_backtracks)); Expected expected; if (!parseIdentifiersOrStringLiterals(pos, expected, forced_indices)) throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse force_data_skipping_indices ('{}')", indices); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 41188891118..0ca7a4d74d9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -432,7 +432,7 @@ StorageInMemoryMetadata ReplicatedMergeTreeTableMetadata::Diff::getNewMetadata(c auto parse_key_expr = [] (const String & key_expr) { ParserNotEmptyExpressionList parser(false); - auto new_sorting_key_expr_list = parseQuery(parser, key_expr, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto new_sorting_key_expr_list = parseQuery(parser, key_expr, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); ASTPtr order_by_ast; if (new_sorting_key_expr_list->children.size() == 1) @@ -489,7 +489,7 @@ StorageInMemoryMetadata ReplicatedMergeTreeTableMetadata::Diff::getNewMetadata(c if (!new_ttl_table.empty()) { ParserTTLExpressionList parser; - auto ttl_for_table_ast = parseQuery(parser, new_ttl_table, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + auto ttl_for_table_ast = parseQuery(parser, new_ttl_table, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); new_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST( ttl_for_table_ast, new_metadata.columns, context, new_metadata.primary_key, true /* allow_suspicious; because it is replication */); } diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index f6ec277c270..aaf5c1b5d87 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -228,7 +228,7 @@ void MutationCommands::readText(ReadBuffer & in) ParserAlterCommandList p_alter_commands; auto commands_ast = parseQuery( - p_alter_commands, commands_str.data(), commands_str.data() + commands_str.length(), "mutation commands list", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + p_alter_commands, commands_str.data(), commands_str.data() + commands_str.length(), "mutation commands list", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); for (const auto & child : commands_ast->children) { diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 64d329f74b2..f686fbda664 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -479,7 +479,7 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery( ASTPtr result; Tokens tokens(attr.attr_def.data(), attr.attr_def.data() + attr.attr_def.size()); - IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH); + IParser::Pos pos(tokens, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); if (!expr_parser.parse(pos, result, expected)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse default expression: {}", attr.attr_def); diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 08ebe3a10d0..0bcbedee41a 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -341,7 +341,7 @@ ProjectionsDescription ProjectionsDescription::parse(const String & str, const C return result; ParserProjectionDeclarationList parser; - ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr list = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); for (const auto & projection_ast : list->children) { diff --git a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp index ac5dd6c05d0..635686780a0 100644 --- a/src/Storages/System/StorageSystemDDLWorkerQueue.cpp +++ b/src/Storages/System/StorageSystemDDLWorkerQueue.cpp @@ -78,7 +78,8 @@ static String clusterNameFromDDLQuery(ContextPtr context, const DDLTask & task) ParserQuery parser_query(end, settings.allow_settings_after_format_in_insert); ASTPtr query = parseQuery(parser_query, begin, end, description, settings.max_query_size, - settings.max_parser_depth); + settings.max_parser_depth, + settings.max_parser_backtracks); String cluster_name; if (const auto * query_on_cluster = dynamic_cast(query.get())) diff --git a/src/Storages/System/attachInformationSchemaTables.cpp b/src/Storages/System/attachInformationSchemaTables.cpp index 3482867bbf7..5afdd7a02ac 100644 --- a/src/Storages/System/attachInformationSchemaTables.cpp +++ b/src/Storages/System/attachInformationSchemaTables.cpp @@ -478,7 +478,7 @@ static void createInformationSchemaView(ContextMutablePtr context, IDatabase & d ParserCreateQuery parser; ASTPtr ast = parseQuery(parser, query.data(), query.data() + query.size(), "Attach query from embedded resource " + metadata_resource_name, - DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); + DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); auto & ast_create = ast->as(); assert(view_name == ast_create.getTable()); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index a675afbdc26..3d1ce76dff1 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -425,7 +425,7 @@ TTLTableDescription TTLTableDescription::parse(const String & str, const Columns return result; ParserTTLExpressionList parser; - ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); FunctionNameNormalizer().visit(ast.get()); return getTTLForTableFromAST(ast, columns, context, primary_key, context->getSettingsRef().allow_suspicious_ttl_expressions); diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 32266f20923..c545367b63d 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -32,6 +32,7 @@ ColumnsDescription getStructureOfRemoteTableInShard( const ASTPtr & table_func_ptr) { String query; + const Settings & settings = context->getSettingsRef(); if (table_func_ptr) { @@ -110,7 +111,8 @@ ColumnsDescription getStructureOfRemoteTableInShard( column.default_desc.kind = columnDefaultKindFromString(kind_name); String expr_str = (*default_expr)[i].get(); column.default_desc.expression = parseQuery( - expr_parser, expr_str.data(), expr_str.data() + expr_str.size(), "default expression", 0, context->getSettingsRef().max_parser_depth); + expr_parser, expr_str.data(), expr_str.data() + expr_str.size(), "default expression", + 0, settings.max_parser_depth, settings.max_parser_backtracks); } res.add(column); diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 475cf5a4eae..7e2d393c3d1 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -118,7 +118,7 @@ static void checkOld( const std::string & expected) { ParserSelectQuery parser; - ASTPtr ast = parseQuery(parser, query, 1000, 1000); + ASTPtr ast = parseQuery(parser, query, 1000, 1000, 1000000); SelectQueryInfo query_info; SelectQueryOptions select_options; query_info.syntax_analyzer_result @@ -161,7 +161,7 @@ static void checkNewAnalyzer( const std::string & expected) { ParserSelectQuery parser; - ASTPtr ast = parseQuery(parser, query, 1000, 1000); + ASTPtr ast = parseQuery(parser, query, 1000, 1000, 1000000); SelectQueryOptions select_query_options; auto query_tree = buildQueryTree(ast, state.context); diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp index e840d5fc8be..80494dbe5a8 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.cpp +++ b/src/TableFunctions/Hive/TableFunctionHive.cpp @@ -17,6 +17,7 @@ #include #include + namespace DB { @@ -99,7 +100,8 @@ StoragePtr TableFunctionHive::executeImpl( "(" + partition_by_def + ")", "partition by declaration list", settings.max_query_size, - settings.max_parser_depth); + settings.max_parser_depth, + settings.max_parser_backtracks); StoragePtr storage; storage = std::make_shared( hive_metastore_url, diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index 400fc81e6d4..8607597fa67 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -63,7 +63,7 @@ std::vector TableFunctionExplain::skipAnalysisForArguments(const QueryTr return {}; } -void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/) +void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPtr context) { const auto * function = ast_function->as(); if (!function || !function->arguments) @@ -94,12 +94,12 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt const auto & settings_str = settings_arg->value.get(); if (!settings_str.empty()) { - constexpr UInt64 max_size = 4096; - constexpr UInt64 max_depth = 16; + const Settings & settings = context->getSettingsRef(); /// parse_only_internals_ = true - we don't want to parse `SET` keyword ParserSetQuery settings_parser(/* parse_only_internals_ = */ true); - ASTPtr settings_ast = parseQuery(settings_parser, settings_str, max_size, max_depth); + ASTPtr settings_ast = parseQuery(settings_parser, settings_str, + settings.max_query_size, settings.max_parser_depth, settings.max_parser_backtracks); explain_query->setSettings(std::move(settings_ast)); } From f0c9fe6bc90798d3d4e402817e14c83b831663f1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 19:54:20 +0100 Subject: [PATCH 279/283] Limit backtracking in parser --- src/Parsers/IParser.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Parsers/IParser.cpp b/src/Parsers/IParser.cpp index d1e9ace89b6..5679fba1a0c 100644 --- a/src/Parsers/IParser.cpp +++ b/src/Parsers/IParser.cpp @@ -24,9 +24,6 @@ IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs) TokenIterator::operator=(rhs); - if (backtracks % 1000 == 0) - std::cerr << backtracks << "\n"; - return *this; } From 65d091cc65362f9f86e1efa5a51001a67c11b03c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 20:16:23 +0100 Subject: [PATCH 280/283] Limit backtracking in parser --- src/Functions/getFuzzerData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/getFuzzerData.cpp b/src/Functions/getFuzzerData.cpp index 5c536477401..a6f8dd1de2c 100644 --- a/src/Functions/getFuzzerData.cpp +++ b/src/Functions/getFuzzerData.cpp @@ -41,7 +41,7 @@ public: return DataTypeString().createColumnConst(input_rows_count, fuzz_data); } - static void update(const String & fuzz_data_) + [[maybe_unused]] static void update(const String & fuzz_data_) { fuzz_data = fuzz_data_; } From 13797b9712994edfe37d5bcfbca9d19d174ea95b Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 20:21:07 +0100 Subject: [PATCH 281/283] Fix style --- src/Common/ErrorCodes.cpp | 1 + src/Parsers/IParser.cpp | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 9f2572cbfc6..0f01036aa06 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -584,6 +584,7 @@ M(703, INVALID_IDENTIFIER) \ M(704, QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS) \ M(705, TABLE_NOT_EMPTY) \ + M(706, TOO_SLOW_PARSING) \ \ M(900, DISTRIBUTED_CACHE_ERROR) \ M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ diff --git a/src/Parsers/IParser.cpp b/src/Parsers/IParser.cpp index 5679fba1a0c..41981a4bb8a 100644 --- a/src/Parsers/IParser.cpp +++ b/src/Parsers/IParser.cpp @@ -4,6 +4,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int TOO_SLOW_PARSING; +} + IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs) { depth = rhs.depth; @@ -18,7 +23,7 @@ IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs) { ++backtracks; if (max_backtracks && backtracks > max_backtracks) - throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Maximum amount of backtracking ({}) exceeded in the parser. " + throw Exception(ErrorCodes::TOO_SLOW_PARSING, "Maximum amount of backtracking ({}) exceeded in the parser. " "Consider rising max_parser_backtracks parameter.", max_backtracks); } From 95cfba9439b1c298fec390521510af32b6a5d66e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 17 Mar 2024 20:26:43 +0100 Subject: [PATCH 282/283] Add a test --- src/Common/ErrorCodes.cpp | 9 ++++----- .../0_stateless/03012_parser_backtracking.reference | 1 + tests/queries/0_stateless/03012_parser_backtracking.sh | 7 +++++++ 3 files changed, 12 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03012_parser_backtracking.reference create mode 100755 tests/queries/0_stateless/03012_parser_backtracking.sh diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 0f01036aa06..75ba9cff81e 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -584,11 +584,6 @@ M(703, INVALID_IDENTIFIER) \ M(704, QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS) \ M(705, TABLE_NOT_EMPTY) \ - M(706, TOO_SLOW_PARSING) \ - \ - M(900, DISTRIBUTED_CACHE_ERROR) \ - M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ - \ M(706, LIBSSH_ERROR) \ M(707, GCP_ERROR) \ M(708, ILLEGAL_STATISTIC) \ @@ -600,6 +595,10 @@ M(715, CANNOT_DETECT_FORMAT) \ M(716, CANNOT_FORGET_PARTITION) \ M(717, EXPERIMENTAL_FEATURE_ERROR) \ + M(718, TOO_SLOW_PARSING) \ + \ + M(900, DISTRIBUTED_CACHE_ERROR) \ + M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/tests/queries/0_stateless/03012_parser_backtracking.reference b/tests/queries/0_stateless/03012_parser_backtracking.reference new file mode 100644 index 00000000000..84727754516 --- /dev/null +++ b/tests/queries/0_stateless/03012_parser_backtracking.reference @@ -0,0 +1 @@ +TOO_SLOW_PARSING diff --git a/tests/queries/0_stateless/03012_parser_backtracking.sh b/tests/queries/0_stateless/03012_parser_backtracking.sh new file mode 100755 index 00000000000..889753fb048 --- /dev/null +++ b/tests/queries/0_stateless/03012_parser_backtracking.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL --query "SELECT((((((((((SELECT(((((((((SELECT((((((((((SELECT(((((((((SELECT((((((((((SELECT(((((((((SELECT 1+)))))))))))))))))))))))))))))))))))))))))))))))))))))))))" 2>&1 | grep -o -F 'TOO_SLOW_PARSING' From 6549a96ced628dbbaa5b1c32304459ae6d40930a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 18 Mar 2024 10:38:28 +0100 Subject: [PATCH 283/283] Revert "Analyzer: Fix virtual columns in StorageMerge" --- .../InterpreterSelectQueryAnalyzer.cpp | 1 - src/Storages/StorageMerge.cpp | 93 ++++++------------- tests/analyzer_tech_debt.txt | 1 + 3 files changed, 27 insertions(+), 68 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index 539d7a59f6f..922f4a99b4a 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -86,7 +86,6 @@ void replaceStorageInQueryTree(QueryTreeNodePtr & query_tree, const ContextPtr & continue; auto replacement_table_expression = std::make_shared(storage, context); - replacement_table_expression->setAlias(node->getAlias()); if (auto table_expression_modifiers = table_node.getTableExpressionModifiers()) replacement_table_expression->setTableExpressionModifiers(*table_expression_modifiers); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 3a4ec6f9b17..9161b2773da 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -885,7 +885,6 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo if (modified_query_info.table_expression) { auto replacement_table_expression = std::make_shared(storage, storage_lock, storage_snapshot_); - replacement_table_expression->setAlias(modified_query_info.table_expression->getAlias()); if (query_info.table_expression_modifiers) replacement_table_expression->setTableExpressionModifiers(*query_info.table_expression_modifiers); @@ -1026,7 +1025,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( const auto & [database_name, storage, _, table_name] = storage_with_lock; bool allow_experimental_analyzer = context->getSettingsRef().allow_experimental_analyzer; auto storage_stage - = storage->getQueryProcessingStage(context, processed_stage, storage_snapshot_, modified_query_info); + = storage->getQueryProcessingStage(context, QueryProcessingStage::Complete, storage_snapshot_, modified_query_info); builder = plan.buildQueryPipeline( QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context)); @@ -1053,80 +1052,40 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( Block pipe_header = builder->getHeader(); - if (allow_experimental_analyzer) + if (has_database_virtual_column && common_header.has("_database") && !pipe_header.has("_database")) { - String table_alias = modified_query_info.query_tree->as()->getJoinTree()->as()->getAlias(); + ColumnWithTypeAndName column; + column.name = "_database"; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(database_name)); - String database_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_database" : table_alias + "._database"; - String table_column = table_alias.empty() || processed_stage == QueryProcessingStage::FetchColumns ? "_table" : table_alias + "._table"; + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto adding_column_actions = std::make_shared( + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); - if (has_database_virtual_column && common_header.has(database_column) - && (storage_stage == QueryProcessingStage::FetchColumns || !pipe_header.has("'" + database_name + "'_String"))) - { - ColumnWithTypeAndName column; - column.name = database_column; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(database_name)); - - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); - - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); - } - - if (has_table_virtual_column && common_header.has(table_column) - && (storage_stage == QueryProcessingStage::FetchColumns || !pipe_header.has("'" + table_name + "'_String"))) - { - ColumnWithTypeAndName column; - column.name = table_column; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(table_name)); - - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); - - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); - } + builder->addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, adding_column_actions); }); } - else + + if (has_table_virtual_column && common_header.has("_table") && !pipe_header.has("_table")) { - if (has_database_virtual_column && common_header.has("_database") && !pipe_header.has("_database")) - { - ColumnWithTypeAndName column; - column.name = "_database"; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(database_name)); + ColumnWithTypeAndName column; + column.name = "_table"; + column.type = std::make_shared(std::make_shared()); + column.column = column.type->createColumnConst(0, Field(table_name)); - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); - } + auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); + auto adding_column_actions = std::make_shared( + std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); - if (has_table_virtual_column && common_header.has("_table") && !pipe_header.has("_table")) - { - ColumnWithTypeAndName column; - column.name = "_table"; - column.type = std::make_shared(std::make_shared()); - column.column = column.type->createColumnConst(0, Field(table_name)); - - auto adding_column_dag = ActionsDAG::makeAddingColumnActions(std::move(column)); - auto adding_column_actions = std::make_shared( - std::move(adding_column_dag), ExpressionActionsSettings::fromContext(context, CompileExpressions::yes)); - builder->addSimpleTransform([&](const Block & stream_header) - { return std::make_shared(stream_header, adding_column_actions); }); - } + builder->addSimpleTransform([&](const Block & stream_header) + { return std::make_shared(stream_header, adding_column_actions); }); } /// Subordinary tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. convertAndFilterSourceStream( - header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder, storage_stage); + header, modified_query_info, storage_snapshot_, aliases, row_policy_data_opt, context, *builder, processed_stage); } return builder; @@ -1157,13 +1116,13 @@ QueryPlan ReadFromMerge::createPlanForTable( bool allow_experimental_analyzer = modified_context->getSettingsRef().allow_experimental_analyzer; auto storage_stage = storage->getQueryProcessingStage(modified_context, - processed_stage, + QueryProcessingStage::Complete, storage_snapshot_, modified_query_info); QueryPlan plan; - if (processed_stage <= storage_stage) + if (processed_stage <= storage_stage || (allow_experimental_analyzer && processed_stage == QueryProcessingStage::FetchColumns)) { /// If there are only virtual columns in query, you must request at least one other column. if (real_column_names.empty()) @@ -1208,7 +1167,7 @@ QueryPlan ReadFromMerge::createPlanForTable( row_policy_data_opt->addStorageFilter(source_step_with_filter); } } - else if (processed_stage > storage_stage || allow_experimental_analyzer) + else if (processed_stage > storage_stage || (allow_experimental_analyzer && processed_stage != QueryProcessingStage::FetchColumns)) { /// Maximum permissible parallelism is streams_num modified_context->setSetting("max_threads", streams_num); diff --git a/tests/analyzer_tech_debt.txt b/tests/analyzer_tech_debt.txt index 7cd73705e77..bcf0eee0d0e 100644 --- a/tests/analyzer_tech_debt.txt +++ b/tests/analyzer_tech_debt.txt @@ -1,4 +1,5 @@ 00223_shard_distributed_aggregation_memory_efficient +00717_merge_and_distributed 00725_memory_tracking 01062_pm_all_join_with_block_continuation 01083_expressions_in_engine_arguments