diff --git a/contrib/llvm-project b/contrib/llvm-project index d80af319f5f..4bfaeb31dd0 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit d80af319f5f047067b956b2fe93a6c00038c1e0d +Subproject commit 4bfaeb31dd0ef13f025221f93c138974a3e0a22a diff --git a/contrib/vectorscan b/contrib/vectorscan index f6250ae3e5a..b4bba94b1a2 160000 --- a/contrib/vectorscan +++ b/contrib/vectorscan @@ -1 +1 @@ -Subproject commit f6250ae3e5a3085000239313ad0689cc1e00cdc2 +Subproject commit b4bba94b1a250603b0b198e0394946e32f6c3f30 diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 65bf49c2914..7a4e6386d0d 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -26,6 +26,7 @@ logging.basicConfig( total_start_seconds = time.perf_counter() stage_start_seconds = total_start_seconds + # Thread executor that does not hides exception that happens during function # execution, and rethrows it after join() class SafeThread(Thread): @@ -158,6 +159,7 @@ for e in subst_elems: available_parameters[name] = values + # Takes parallel lists of templates, substitutes them with all combos of # parameters. The set of parameters is determined based on the first list. # Note: keep the order of queries -- sometimes we have DROP IF EXISTS diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 782cf29863c..214f2d550b4 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -670,7 +670,6 @@ if args.report == "main": ) elif args.report == "all-queries": - print((header_template.format())) add_tested_commits() diff --git a/docker/test/stateful/s3downloader b/docker/test/stateful/s3downloader index b1302877d6a..96f2aa96dd5 100755 --- a/docker/test/stateful/s3downloader +++ b/docker/test/stateful/s3downloader @@ -10,31 +10,38 @@ import requests import tempfile -DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com' +DEFAULT_URL = "https://clickhouse-datasets.s3.amazonaws.com" AVAILABLE_DATASETS = { - 'hits': 'hits_v1.tar', - 'visits': 'visits_v1.tar', + "hits": "hits_v1.tar", + "visits": "visits_v1.tar", } RETRIES_COUNT = 5 + def _get_temp_file_name(): - return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) + return os.path.join( + tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()) + ) + def build_url(base_url, dataset): - return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset]) + return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset]) + def dowload_with_progress(url, path): logging.info("Downloading from %s to temp path %s", url, path) for i in range(RETRIES_COUNT): try: - with open(path, 'wb') as f: + with open(path, "wb") as f: response = requests.get(url, stream=True) response.raise_for_status() - total_length = response.headers.get('content-length') + total_length = response.headers.get("content-length") if total_length is None or int(total_length) == 0: - logging.info("No content-length, will download file without progress") + logging.info( + "No content-length, will download file without progress" + ) f.write(response.content) else: dl = 0 @@ -46,7 +53,11 @@ def dowload_with_progress(url, path): if sys.stdout.isatty(): done = int(50 * dl / total_length) percent = int(100 * float(dl) / total_length) - sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent)) + sys.stdout.write( + "\r[{}{}] {}%".format( + "=" * done, " " * (50 - done), percent + ) + ) sys.stdout.flush() break except Exception as ex: @@ -56,14 +67,21 @@ def dowload_with_progress(url, path): if os.path.exists(path): os.remove(path) else: - raise Exception("Cannot download dataset from {}, all retries exceeded".format(url)) + raise Exception( + "Cannot download dataset from {}, all retries exceeded".format(url) + ) sys.stdout.write("\n") logging.info("Downloading finished") + def unpack_to_clickhouse_directory(tar_path, clickhouse_path): - logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path) - with tarfile.open(tar_path, 'r') as comp_file: + logging.info( + "Will unpack data from temp path %s to clickhouse db %s", + tar_path, + clickhouse_path, + ) + with tarfile.open(tar_path, "r") as comp_file: comp_file.extractall(path=clickhouse_path) logging.info("Unpack finished") @@ -72,15 +90,21 @@ if __name__ == "__main__": logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser( - description="Simple tool for dowloading datasets for clickhouse from S3") + description="Simple tool for dowloading datasets for clickhouse from S3" + ) - parser.add_argument('--dataset-names', required=True, nargs='+', choices=list(AVAILABLE_DATASETS.keys())) - parser.add_argument('--url-prefix', default=DEFAULT_URL) - parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/') + parser.add_argument( + "--dataset-names", + required=True, + nargs="+", + choices=list(AVAILABLE_DATASETS.keys()), + ) + parser.add_argument("--url-prefix", default=DEFAULT_URL) + parser.add_argument("--clickhouse-data-path", default="/var/lib/clickhouse/") args = parser.parse_args() datasets = args.dataset_names - logging.info("Will fetch following datasets: %s", ', '.join(datasets)) + logging.info("Will fetch following datasets: %s", ", ".join(datasets)) for dataset in datasets: logging.info("Processing %s", dataset) temp_archive_path = _get_temp_file_name() @@ -92,10 +116,11 @@ if __name__ == "__main__": logging.info("Some exception occured %s", str(ex)) raise finally: - logging.info("Will remove downloaded file %s from filesystem if it exists", temp_archive_path) + logging.info( + "Will remove downloaded file %s from filesystem if it exists", + temp_archive_path, + ) if os.path.exists(temp_archive_path): os.remove(temp_archive_path) logging.info("Processing of %s finished", dataset) logging.info("Fetch finished, enjoy your tables!") - - diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index ade59224035..e509809c028 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -170,6 +170,7 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] fi rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: +rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & # Compress tables. diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index e8c5e17024c..746cc7bb2d5 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -11,13 +11,14 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ aspell \ curl \ git \ + file \ libxml2-utils \ moreutils \ python3-fuzzywuzzy \ python3-pip \ shellcheck \ yamllint \ - && pip3 install black==22.8.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \ + && pip3 install black==23.1.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \ && apt-get clean \ && rm -rf /root/.cache/pip diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 9fea158b100..f1b7a40094d 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -377,8 +377,9 @@ CREATE TABLE table_name i32 Int32, s String, ... - INDEX a (u64 * i32, s) TYPE minmax GRANULARITY 3, - INDEX b (u64 * length(s)) TYPE set(1000) GRANULARITY 4 + INDEX idx1 u64 TYPE bloom_filter GRANULARITY 3, + INDEX idx2 u64 * i32 TYPE minmax GRANULARITY 3, + INDEX idx3 u64 * length(s) TYPE set(1000) GRANULARITY 4 ) ENGINE = MergeTree() ... ``` @@ -386,8 +387,9 @@ CREATE TABLE table_name Indices from the example can be used by ClickHouse to reduce the amount of data to read from disk in the following queries: ``` sql -SELECT count() FROM table WHERE s < 'z' -SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 +SELECT count() FROM table WHERE u64 == 10; +SELECT count() FROM table WHERE u64 * i32 >= 1234 +SELECT count() FROM table WHERE u64 * length(s) == 1234 ``` Data skipping indexes can also be created on composite columns: diff --git a/docs/en/getting-started/example-datasets/_category_.yml b/docs/en/getting-started/example-datasets/_category_.yml deleted file mode 100644 index 2ee34c63e93..00000000000 --- a/docs/en/getting-started/example-datasets/_category_.yml +++ /dev/null @@ -1,7 +0,0 @@ -position: 1 -label: 'Example Datasets' -collapsible: true -collapsed: true -link: - type: doc - id: en/getting-started/example-datasets/ diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 3debea0087e..ae3756d5d41 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -154,7 +154,7 @@ Arrays are written as a list of comma-separated values in square brackets. Numbe In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id. If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_enum_as_number) to optimize ENUM parsing. -Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) structures is represented as an array. +Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) structures is represented as an array. For example: @@ -1150,7 +1150,7 @@ Any set of bytes can be output in the strings. Use the `JSONEachRow` format if y ### Usage of Nested Structures {#jsoneachrow-nested} -If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) setting. +If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) setting. For example, consider the following table: @@ -1776,7 +1776,7 @@ message MessageType { ``` ClickHouse tries to find a column named `x.y.z` (or `x_y_z` or `X.y_Z` and so on). -Nested messages are suitable to input or output a [nested data structures](/docs/en/sql-reference/data-types/nested-data-structures/nested.md). +Nested messages are suitable to input or output a [nested data structures](/docs/en/sql-reference/data-types/nested-data-structures/index.md). Default values defined in a protobuf schema like this @@ -1978,7 +1978,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t - [output_format_parquet_row_group_size](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`. - [output_format_parquet_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`. -- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`. +- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/index.md) table in Parquet input format. Default value - `false`. - [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. - [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. - [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. diff --git a/docs/en/interfaces/overview.md b/docs/en/interfaces/overview.md index e5fa503e8fc..ee47e010f9e 100644 --- a/docs/en/interfaces/overview.md +++ b/docs/en/interfaces/overview.md @@ -6,7 +6,7 @@ keywords: [clickhouse, network, interfaces, http, tcp, grpc, command-line, clien description: ClickHouse provides three network interfaces --- -# Interfaces +# Drivers and Interfaces ClickHouse provides three network interfaces (they can be optionally wrapped in TLS for additional security): diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 69eb782868a..d58dd1376eb 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -331,7 +331,7 @@ It is also possible to `BACKUP`/`RESTORE` to S3 by configuring an S3 disk in the
- s3 + s3_plain
diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 172627c7c3e..aa991cd9f15 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -964,7 +964,7 @@ Default value: 1. ### input_format_arrow_import_nested {#input_format_arrow_import_nested} -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format. +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format. Possible values: @@ -1024,7 +1024,7 @@ Default value: `none`. ### input_format_orc_import_nested {#input_format_orc_import_nested} -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format. +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format. Possible values: @@ -1073,7 +1073,7 @@ Default value: `none`. ### input_format_parquet_import_nested {#input_format_parquet_import_nested} -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format. +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format. Possible values: @@ -1538,6 +1538,6 @@ Default value: `1GiB`. ### input_format_native_allow_types_conversion {#input_format_native_allow_types_conversion} -Allow types conversion in Native input format between columns from input data and requested columns. +Allow types conversion in Native input format between columns from input data and requested columns. Enabled by default. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index f938c16b490..2fb7da4cb98 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3438,7 +3438,7 @@ Default value: `throw`. ## flatten_nested {#flatten-nested} -Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns. +Sets the data format of a [nested](../../sql-reference/data-types/nested-data-structures/index.md) columns. Possible values: diff --git a/docs/en/operations/utilities/index.md b/docs/en/operations/utilities/index.md index b2f66af1084..bf6b3a63d23 100644 --- a/docs/en/operations/utilities/index.md +++ b/docs/en/operations/utilities/index.md @@ -1,7 +1,7 @@ --- slug: /en/operations/utilities/ sidebar_position: 56 -sidebar_label: Utilities +sidebar_label: List of tools and utilities pagination_next: 'en/operations/utilities/clickhouse-copier' --- diff --git a/docs/en/sql-reference/data-types/index.md b/docs/en/sql-reference/data-types/index.md index ef6a0fb3ea5..c61a3069db6 100644 --- a/docs/en/sql-reference/data-types/index.md +++ b/docs/en/sql-reference/data-types/index.md @@ -1,13 +1,33 @@ --- slug: /en/sql-reference/data-types/ -sidebar_label: Data Types +sidebar_label: List of data types sidebar_position: 37 --- -# Data Types +# ClickHouse Data Types -ClickHouse can store various kinds of data in table cells. +ClickHouse can store various kinds of data in table cells. This section describes the supported data types and special considerations for using and/or implementing them if any. -This section describes the supported data types and special considerations for using and/or implementing them if any. +:::note +You can check whether a data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table. +::: -You can check whether data type name is case-sensitive in the [system.data_type_families](../../operations/system-tables/data_type_families.md#system_tables-data_type_families) table. +ClickHouse data types include: + +- **Integer types**: [signed and unsigned integers](./int-uint.md) (`UInt8`, `UInt16`, `UInt32`, `UInt64`, `UInt128`, `UInt256`, `Int8`, `Int16`, `Int32`, `Int64`, `Int128`, `Int256`) +- **Floating-point numbers**: [floats](./float.md)(`Float32` and `Float64`) and [`Decimal` values](./decimal.md) +- **Boolean**: ClickHouse has a [`Boolean` type](./boolean.md) +- **Strings**: [`String`](./string.md) and [`FixedString`](./fixedstring.md) +- **Dates**: use [`Date`](./date.md) and [`Date32`](./date32.md) for days, and [`DateTime`](./datetime.md) and [`DateTime64`](./datetime64.md) for instances in time +- **JSON**: the [`JSON` object](./json.md) stores a JSON document in a single column +- **UUID**: a performant option for storing [`UUID` values](./uuid.md) +- **Low cardinality types**: use an [`Enum`](./enum.md) when you have a handful of unique values, or use [`LowCardinality`](./lowcardinality.md) when you have up to 10,000 unique values of a column +- **Arrays**: any column can be defined as an [`Array` of values](./array.md) +- **Maps**: use [`Map`](./map.md) for storing key/value pairs +- **Aggregation function types**: use [`SimpleAggregateFunction`](./simpleaggregatefunction.md) and [`AggregateFunction`](./aggregatefunction.md) for storing the intermediate status of aggregate function results +- **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell +- **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type. +- **Nullable**: [`Nullbale`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type) +- **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses +- **Geo types**: for[ geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon` +- **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md) \ No newline at end of file diff --git a/docs/en/sql-reference/data-types/nested-data-structures/index.md b/docs/en/sql-reference/data-types/nested-data-structures/index.md index 1d958c018d8..d118170cd39 100644 --- a/docs/en/sql-reference/data-types/nested-data-structures/index.md +++ b/docs/en/sql-reference/data-types/nested-data-structures/index.md @@ -1,7 +1,105 @@ --- -slug: /en/sql-reference/data-types/nested-data-structures/ -sidebar_label: Nested Data Structures -sidebar_position: 54 +slug: /en/sql-reference/data-types/nested-data-structures/nested +sidebar_position: 57 +sidebar_label: Nested(Name1 Type1, Name2 Type2, ...) --- -# Nested Data Structures +# Nested + +## Nested(name1 Type1, Name2 Type2, …) + +A nested data structure is like a table inside a cell. The parameters of a nested data structure – the column names and types – are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure. + +Example: + +``` sql +CREATE TABLE test.visits +( + CounterID UInt32, + StartDate Date, + Sign Int8, + IsNew UInt8, + VisitID UInt64, + UserID UInt64, + ... + Goals Nested + ( + ID UInt32, + Serial UInt32, + EventTime DateTime, + Price Int64, + OrderID String, + CurrencyID UInt32 + ), + ... +) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign) +``` + +This example declares the `Goals` nested data structure, which contains data about conversions (goals reached). Each row in the ‘visits’ table can correspond to zero or any number of conversions. + +When [flatten_nested](../../../operations/settings/settings.md#flatten-nested) is set to `0` (which is not by default), arbitrary levels of nesting are supported. + +In most cases, when working with a nested data structure, its columns are specified with column names separated by a dot. These columns make up an array of matching types. All the column arrays of a single nested data structure have the same length. + +Example: + +``` sql +SELECT + Goals.ID, + Goals.EventTime +FROM test.visits +WHERE CounterID = 101500 AND length(Goals.ID) < 5 +LIMIT 10 +``` + +``` text +┌─Goals.ID───────────────────────┬─Goals.EventTime───────────────────────────────────────────────────────────────────────────┐ +│ [1073752,591325,591325] │ ['2014-03-17 16:38:10','2014-03-17 16:38:48','2014-03-17 16:42:27'] │ +│ [1073752] │ ['2014-03-17 00:28:25'] │ +│ [1073752] │ ['2014-03-17 10:46:20'] │ +│ [1073752,591325,591325,591325] │ ['2014-03-17 13:59:20','2014-03-17 22:17:55','2014-03-17 22:18:07','2014-03-17 22:18:51'] │ +│ [] │ [] │ +│ [1073752,591325,591325] │ ['2014-03-17 11:37:06','2014-03-17 14:07:47','2014-03-17 14:36:21'] │ +│ [] │ [] │ +│ [] │ [] │ +│ [591325,1073752] │ ['2014-03-17 00:46:05','2014-03-17 00:46:05'] │ +│ [1073752,591325,591325,591325] │ ['2014-03-17 13:28:33','2014-03-17 13:30:26','2014-03-17 18:51:21','2014-03-17 18:51:45'] │ +└────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +It is easiest to think of a nested data structure as a set of multiple column arrays of the same length. + +The only place where a SELECT query can specify the name of an entire nested data structure instead of individual columns is the ARRAY JOIN clause. For more information, see “ARRAY JOIN clause”. Example: + +``` sql +SELECT + Goal.ID, + Goal.EventTime +FROM test.visits +ARRAY JOIN Goals AS Goal +WHERE CounterID = 101500 AND length(Goals.ID) < 5 +LIMIT 10 +``` + +``` text +┌─Goal.ID─┬──────Goal.EventTime─┐ +│ 1073752 │ 2014-03-17 16:38:10 │ +│ 591325 │ 2014-03-17 16:38:48 │ +│ 591325 │ 2014-03-17 16:42:27 │ +│ 1073752 │ 2014-03-17 00:28:25 │ +│ 1073752 │ 2014-03-17 10:46:20 │ +│ 1073752 │ 2014-03-17 13:59:20 │ +│ 591325 │ 2014-03-17 22:17:55 │ +│ 591325 │ 2014-03-17 22:18:07 │ +│ 591325 │ 2014-03-17 22:18:51 │ +│ 1073752 │ 2014-03-17 11:37:06 │ +└─────────┴─────────────────────┘ +``` + +You can’t perform SELECT for an entire nested data structure. You can only explicitly list individual columns that are part of it. + +For an INSERT query, you should pass all the component column arrays of a nested data structure separately (as if they were individual column arrays). During insertion, the system checks that they have the same length. + +For a DESCRIBE query, the columns in a nested data structure are listed separately in the same way. + +The ALTER query for elements in a nested data structure has limitations. diff --git a/docs/en/sql-reference/data-types/nested-data-structures/nested.md b/docs/en/sql-reference/data-types/nested-data-structures/nested.md deleted file mode 100644 index d118170cd39..00000000000 --- a/docs/en/sql-reference/data-types/nested-data-structures/nested.md +++ /dev/null @@ -1,105 +0,0 @@ ---- -slug: /en/sql-reference/data-types/nested-data-structures/nested -sidebar_position: 57 -sidebar_label: Nested(Name1 Type1, Name2 Type2, ...) ---- - -# Nested - -## Nested(name1 Type1, Name2 Type2, …) - -A nested data structure is like a table inside a cell. The parameters of a nested data structure – the column names and types – are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure. - -Example: - -``` sql -CREATE TABLE test.visits -( - CounterID UInt32, - StartDate Date, - Sign Int8, - IsNew UInt8, - VisitID UInt64, - UserID UInt64, - ... - Goals Nested - ( - ID UInt32, - Serial UInt32, - EventTime DateTime, - Price Int64, - OrderID String, - CurrencyID UInt32 - ), - ... -) ENGINE = CollapsingMergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192, Sign) -``` - -This example declares the `Goals` nested data structure, which contains data about conversions (goals reached). Each row in the ‘visits’ table can correspond to zero or any number of conversions. - -When [flatten_nested](../../../operations/settings/settings.md#flatten-nested) is set to `0` (which is not by default), arbitrary levels of nesting are supported. - -In most cases, when working with a nested data structure, its columns are specified with column names separated by a dot. These columns make up an array of matching types. All the column arrays of a single nested data structure have the same length. - -Example: - -``` sql -SELECT - Goals.ID, - Goals.EventTime -FROM test.visits -WHERE CounterID = 101500 AND length(Goals.ID) < 5 -LIMIT 10 -``` - -``` text -┌─Goals.ID───────────────────────┬─Goals.EventTime───────────────────────────────────────────────────────────────────────────┐ -│ [1073752,591325,591325] │ ['2014-03-17 16:38:10','2014-03-17 16:38:48','2014-03-17 16:42:27'] │ -│ [1073752] │ ['2014-03-17 00:28:25'] │ -│ [1073752] │ ['2014-03-17 10:46:20'] │ -│ [1073752,591325,591325,591325] │ ['2014-03-17 13:59:20','2014-03-17 22:17:55','2014-03-17 22:18:07','2014-03-17 22:18:51'] │ -│ [] │ [] │ -│ [1073752,591325,591325] │ ['2014-03-17 11:37:06','2014-03-17 14:07:47','2014-03-17 14:36:21'] │ -│ [] │ [] │ -│ [] │ [] │ -│ [591325,1073752] │ ['2014-03-17 00:46:05','2014-03-17 00:46:05'] │ -│ [1073752,591325,591325,591325] │ ['2014-03-17 13:28:33','2014-03-17 13:30:26','2014-03-17 18:51:21','2014-03-17 18:51:45'] │ -└────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────────┘ -``` - -It is easiest to think of a nested data structure as a set of multiple column arrays of the same length. - -The only place where a SELECT query can specify the name of an entire nested data structure instead of individual columns is the ARRAY JOIN clause. For more information, see “ARRAY JOIN clause”. Example: - -``` sql -SELECT - Goal.ID, - Goal.EventTime -FROM test.visits -ARRAY JOIN Goals AS Goal -WHERE CounterID = 101500 AND length(Goals.ID) < 5 -LIMIT 10 -``` - -``` text -┌─Goal.ID─┬──────Goal.EventTime─┐ -│ 1073752 │ 2014-03-17 16:38:10 │ -│ 591325 │ 2014-03-17 16:38:48 │ -│ 591325 │ 2014-03-17 16:42:27 │ -│ 1073752 │ 2014-03-17 00:28:25 │ -│ 1073752 │ 2014-03-17 10:46:20 │ -│ 1073752 │ 2014-03-17 13:59:20 │ -│ 591325 │ 2014-03-17 22:17:55 │ -│ 591325 │ 2014-03-17 22:18:07 │ -│ 591325 │ 2014-03-17 22:18:51 │ -│ 1073752 │ 2014-03-17 11:37:06 │ -└─────────┴─────────────────────┘ -``` - -You can’t perform SELECT for an entire nested data structure. You can only explicitly list individual columns that are part of it. - -For an INSERT query, you should pass all the component column arrays of a nested data structure separately (as if they were individual column arrays). During insertion, the system checks that they have the same length. - -For a DESCRIBE query, the columns in a nested data structure are listed separately in the same way. - -The ALTER query for elements in a nested data structure has limitations. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 503ef66143e..d06ab253cf7 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1232,12 +1232,14 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64 └───────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` -## formatDateTime +## formatDateTime {#date_time_functions-formatDateTime} Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column. formatDateTime uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format. +The opposite operation of this function is [parseDateTime](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTime). + Alias: `DATE_FORMAT`. **Syntax** @@ -1257,7 +1259,7 @@ Using replacement fields, you can define a pattern for the resulting string. “ |----------|---------------------------------------------------------|------------| | %a | abbreviated weekday name (Mon-Sun) | Mon | | %b | abbreviated month name (Jan-Dec) | Jan | -| %c | month as a decimal number (01-12) | 01 | +| %c | month as an integer number (01-12) | 01 | | %C | year divided by 100 and truncated to integer (00-99) | 20 | | %d | day of the month, zero-padded (01-31) | 02 | | %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 | @@ -1273,7 +1275,7 @@ Using replacement fields, you can define a pattern for the resulting string. “ | %j | day of the year (001-366) | 002 | | %k | hour in 24h format (00-23) | 22 | | %l | hour in 12h format (01-12) | 09 | -| %m | month as a decimal number (01-12) | 01 | +| %m | month as an integer number (01-12) | 01 | | %M | minute (00-59) | 33 | | %n | new-line character (‘’) | | | %p | AM or PM designation | PM | @@ -1286,7 +1288,7 @@ Using replacement fields, you can define a pattern for the resulting string. “ | %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S | 22:33:44 | | %u | ISO 8601 weekday as number with Monday as 1 (1-7) | 2 | | %V | ISO 8601 week number (01-53) | 01 | -| %w | weekday as a decimal number with Sunday as 0 (0-6) | 2 | +| %w | weekday as a integer number with Sunday as 0 (0-6) | 2 | | %W | full weekday name (Monday-Sunday) | Monday | | %y | Year, last two digits (00-99) | 18 | | %Y | Year | 2018 | @@ -1328,10 +1330,11 @@ Result: - [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax) -## formatDateTimeInJodaSyntax +## formatDateTimeInJodaSyntax {#date_time_functions-formatDateTimeInJodaSyntax} Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. +The opposite operation of this function is [parseDateTimeInJodaSyntax](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax). **Replacement fields** diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 5d96113fe50..90f6cf0aa7d 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1148,6 +1148,85 @@ Result: └───────────────────────────┴──────────────────────────────┘ ``` +## parseDateTime {#type_conversion_functions-parseDateTime} + +Converts a [String](/docs/en/sql-reference/data-types/string.md) to [DateTime](/docs/en/sql-reference/data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format). + +This function is the opposite operation of function [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime). + +**Syntax** + +``` sql +parseDateTime(str, format[, timezone]) +``` + +**Arguments** + +- `str` — the String to be parsed +- `format` — the format string +- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional. + +**Returned value(s)** + +Returns DateTime values parsed from input string according to a MySQL style format string. + +**Supported format specifiers** + +All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except: +- %f: fractional second +- %Q: Quarter (1-4) + +**Example** + +``` sql +SELECT parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s') + +┌─parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s')─┐ +│ 2021-01-04 23:00:00 │ +└───────────────────────────────────────────────────────────┘ +``` + +Alias: `TO_TIMESTAMP`. + +## parseDateTimeInJodaSyntax {#type_conversion_functions-parseDateTimeInJodaSyntax} + +Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax. + +This function is the opposite operation of function [formatDateTimeInJodaSyntax](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax). + +**Syntax** + +``` sql +parseDateTimeInJodaSyntax(str, format[, timezone]) +``` + +**Arguments** + +- `str` — the String to be parsed +- `format` — the format string +- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional. + +**Returned value(s)** + +Returns DateTime values parsed from input string according to a Joda style format. + +**Supported format specifiers** + +All format specifiers listed in [formatDateTimeInJoda](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) are supported, except: +- S: fraction of second +- z: time zone +- Z: time zone offset/id + +**Example** + +``` sql +SELECT parseDateTimeInJodaSyntax('2023-02-24 14:53:31', 'yyyy-MM-dd HH:mm:ss', 'Europe/Minsk') + +┌─parseDateTimeInJodaSyntax('2023-02-24 14:53:31', 'yyyy-MM-dd HH:mm:ss', 'Europe/Minsk')─┐ +│ 2023-02-24 14:53:31 │ +└─────────────────────────────────────────────────────────────────────────────────────────┘ +``` + ## parseDateTimeBestEffort ## parseDateTime32BestEffort @@ -1351,7 +1430,6 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity and returns zero date or zero date time when it encounters a date format that cannot be processed. - ## toLowCardinality Converts input parameter to the [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) version of same data type. diff --git a/docs/en/sql-reference/statements/describe-table.md b/docs/en/sql-reference/statements/describe-table.md index 4864743abbc..b9190107127 100644 --- a/docs/en/sql-reference/statements/describe-table.md +++ b/docs/en/sql-reference/statements/describe-table.md @@ -24,9 +24,9 @@ The `DESCRIBE` statement returns a row for each table column with the following - `ttl_expression` — A [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) expression. - `is_subcolumn` — A flag that equals `1` for internal subcolumns. It is included into the result only if subcolumn description is enabled by the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. -All columns in [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot. +All columns in [Nested](../../sql-reference/data-types/nested-data-structures/index.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot. -To show internal subcolumns of other data types, use the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. +To show internal subcolumns of other data types, use the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. **Example** diff --git a/docs/en/sql-reference/statements/index.md b/docs/en/sql-reference/statements/index.md index b286d8c932d..100b8889aaa 100644 --- a/docs/en/sql-reference/statements/index.md +++ b/docs/en/sql-reference/statements/index.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/statements/ sidebar_position: 1 -sidebar_label: Statements +sidebar_label: List of statements --- -# ClickHouse SQL Statements +# ClickHouse SQL Statements Statements represent various kinds of action you can perform using SQL queries. Each kind of statement has it’s own syntax and usage details that are described separately: diff --git a/docs/en/sql-reference/statements/select/array-join.md b/docs/en/sql-reference/statements/select/array-join.md index b8e6be24798..3d88a0f9b7a 100644 --- a/docs/en/sql-reference/statements/select/array-join.md +++ b/docs/en/sql-reference/statements/select/array-join.md @@ -185,7 +185,7 @@ SETTINGS enable_unaligned_array_join = 1; ## ARRAY JOIN with Nested Data Structure -`ARRAY JOIN` also works with [nested data structures](../../../sql-reference/data-types/nested-data-structures/nested.md): +`ARRAY JOIN` also works with [nested data structures](../../../sql-reference/data-types/nested-data-structures/index.md): ``` sql CREATE TABLE nested_test diff --git a/docs/zh/engines/database-engines/materialize-mysql.md b/docs/zh/engines/database-engines/materialize-mysql.md index b7ee3a038b8..5d1394f9456 100644 --- a/docs/zh/engines/database-engines/materialize-mysql.md +++ b/docs/zh/engines/database-engines/materialize-mysql.md @@ -97,7 +97,7 @@ CREATE DATABASE mysql ENGINE = MaterializeMySQL('localhost:3306', 'db', 'user', ### DDL查询 {#ddl-queries} -MySQL DDL查询转换为相应的ClickHouse DDL查询([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md))。如果ClickHouse无法解析某个DDL查询,则该查询将被忽略。 +MySQL DDL查询转换为相应的ClickHouse DDL查询([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md))。如果ClickHouse无法解析某个DDL查询,则该查询将被忽略。 ### Data Replication {#data-replication} diff --git a/docs/zh/engines/database-engines/materialized-mysql.md b/docs/zh/engines/database-engines/materialized-mysql.md index 4cc4ae58840..5c735556c48 100644 --- a/docs/zh/engines/database-engines/materialized-mysql.md +++ b/docs/zh/engines/database-engines/materialized-mysql.md @@ -109,7 +109,7 @@ MySQL中的Time 类型,会被ClickHouse转换成微秒来存储 ### DDL Queries {#ddl-queries} -MySQL DDL 语句会被转换成对应的ClickHouse DDL 语句,比如: ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create/index.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). 如果ClickHouse 无法解析某些语句DDL 操作,则会跳过。 +MySQL DDL 语句会被转换成对应的ClickHouse DDL 语句,比如: ([ALTER](../../sql-reference/statements/alter/index.md), [CREATE](../../sql-reference/statements/create.md), [DROP](../../sql-reference/statements/drop.md), [RENAME](../../sql-reference/statements/rename.md)). 如果ClickHouse 无法解析某些语句DDL 操作,则会跳过。 ### 数据复制 {#data-replication} diff --git a/docs/zh/faq/general.md b/docs/zh/faq/general.md index 5a95b9aad07..530be7f08d8 100644 --- a/docs/zh/faq/general.md +++ b/docs/zh/faq/general.md @@ -1,5 +1,5 @@ --- -slug: /zh/faq/general +slug: /zh/faq/general/overview --- # 常见问题 {#chang-jian-wen-ti} diff --git a/docs/zh/faq/general/index.md b/docs/zh/faq/general/index.md index 8b0b42cede2..9693e7ffc82 100644 --- a/docs/zh/faq/general/index.md +++ b/docs/zh/faq/general/index.md @@ -21,8 +21,7 @@ sidebar_label: General - [我如何为 ClickHouse贡献代码?](../../faq/general/how-do-i-contribute-code-to-clickhouse.md) - !!! info "没找到您需要的内容?" - 请查阅 [其他 F.A.Q. 类别](../../faq/) 或者从左侧导航栏浏览其他文档 - + 请查阅 [其他 F.A.Q. 类别](../../faq/index.md) 或者从左侧导航栏浏览其他文档 + {## [原始文档](https://clickhouse.com/docs/en/faq/general/) ##} diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md index 54827388013..51167521018 100644 --- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md +++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md @@ -338,6 +338,12 @@ UserID.bin,URL.bin,和EventTime.bin是UserID :::note - 最后一个颗粒(1082颗粒)是少于8192行的。 +- 我们在本指南开头的“DDL 语句详细信息”中提到,我们禁用了自适应索引粒度(为了简化本指南中的讨论,并使图表和结果可重现)。 + + 因此,示例表中所有颗粒(除了最后一个)都具有相同大小。 + +- 对于具有自适应索引粒度的表(默认情况下索引粒度是自适应的),某些粒度的大小可以小于 8192 行,具体取决于行数据大小。 + - 我们将主键列(UserID, URL)中的一些列值标记为橙色。 这些橙色标记的列值是每个颗粒中每个主键列的最小值。这里的例外是最后一个颗粒(上图中的颗粒1082),最后一个颗粒我们标记的是最大的值。 diff --git a/docs/zh/sql-reference/functions/geo/index.mdx b/docs/zh/sql-reference/functions/geo/index.mdx deleted file mode 100644 index fcfc4bd4717..00000000000 --- a/docs/zh/sql-reference/functions/geo/index.mdx +++ /dev/null @@ -1,10 +0,0 @@ ---- -slug: /zh/sql-reference/functions/geo/ -sidebar_label: Geo -sidebar_position: 62 -title: "Geo Functions" ---- - -import Content from '@site/docs/en/sql-reference/functions/geo/index.md'; - - diff --git a/docs/zh/sql-reference/statements/alter/index.md b/docs/zh/sql-reference/statements/alter/index.md index 8320b207725..e173837a16c 100644 --- a/docs/zh/sql-reference/statements/alter/index.md +++ b/docs/zh/sql-reference/statements/alter/index.md @@ -1,5 +1,5 @@ --- -slug: /zh/sql-reference/statements/alter/ +slug: /zh/sql-reference/statements/alter/overview sidebar_position: 35 sidebar_label: ALTER --- diff --git a/docs/zh/sql-reference/statements/create/index.md b/docs/zh/sql-reference/statements/create/index.md deleted file mode 100644 index f63ed0a7acd..00000000000 --- a/docs/zh/sql-reference/statements/create/index.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -slug: /zh/sql-reference/statements/create/ -sidebar_label: CREATE -sidebar_position: 34 ---- - -# CREATE语法 {#create-queries} - -CREATE语法包含以下子集: - -- [DATABASE](../../../sql-reference/statements/create/database.md) diff --git a/docs/zh/sql-reference/statements/index.md b/docs/zh/sql-reference/statements/index.md index 989c368ebc4..2fdfeb1786f 100644 --- a/docs/zh/sql-reference/statements/index.md +++ b/docs/zh/sql-reference/statements/index.md @@ -10,7 +10,7 @@ sidebar_position: 31 - [SELECT](../../sql-reference/statements/select/index.md) - [INSERT INTO](../../sql-reference/statements/insert-into.md) -- [CREATE](../../sql-reference/statements/create/index.md) +- [CREATE](../../sql-reference/statements/create.md) - [ALTER](../../sql-reference/statements/alter/index.md) - [SYSTEM](../../sql-reference/statements/system.md) - [SHOW](../../sql-reference/statements/show.md) diff --git a/src/Backups/BackupCoordinationRemote.cpp b/src/Backups/BackupCoordinationRemote.cpp index e6399cafb8f..e7ea9becd24 100644 --- a/src/Backups/BackupCoordinationRemote.cpp +++ b/src/Backups/BackupCoordinationRemote.cpp @@ -810,12 +810,9 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic &) if (existing_backup_uuid == toString(backup_uuid)) continue; - String status; - if (zk->tryGet(root_zookeeper_path + "/" + existing_backup_path + "/stage", status)) - { - if (status != Stage::COMPLETED) - return true; - } + const auto status = zk->get(root_zookeeper_path + "/" + existing_backup_path + "/stage"); + if (status != Stage::COMPLETED) + return true; } zk->createIfNotExists(backup_stage_path, ""); diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index a031221a725..3cee4a8e718 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -349,12 +349,14 @@ The server successfully detected this situation and will download merged part fr M(DiskS3PutObject, "Number of DiskS3 API PutObject calls.") \ M(DiskS3GetObject, "Number of DiskS3 API GetObject calls.") \ \ - M(ReadBufferFromS3Microseconds, "Time spend in reading from S3.") \ - M(ReadBufferFromS3InitMicroseconds, "Time spend initializing connection to S3.") \ + M(ReadBufferFromS3Microseconds, "Time spent on reading from S3.") \ + M(ReadBufferFromS3InitMicroseconds, "Time spent initializing connection to S3.") \ M(ReadBufferFromS3Bytes, "Bytes read from S3.") \ M(ReadBufferFromS3RequestsErrors, "Number of exceptions while reading from S3.") \ \ + M(WriteBufferFromS3Microseconds, "Time spent on writing to S3.") \ M(WriteBufferFromS3Bytes, "Bytes written to S3.") \ + M(WriteBufferFromS3RequestsErrors, "Number of exceptions while writing to S3.") \ \ M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \ \ diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index 2172aa4c4fe..bbb4c3ba5b0 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -54,55 +55,19 @@ struct FormatDateTimeTraits }; -template struct ActionValueTypeMap {}; -template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; -template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; -template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; -template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; -template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; -template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; -template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; -template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; -template <> struct ActionValueTypeMap { using ActionValueType = UInt16; }; -template <> struct ActionValueTypeMap { using ActionValueType = Int32; }; -template <> struct ActionValueTypeMap { using ActionValueType = UInt32; }; -template <> struct ActionValueTypeMap { using ActionValueType = Int64; }; - -/// Counts the number of literal characters in Joda format string until the next closing literal -/// sequence single quote. Returns -1 if no literal single quote was found. -/// In Joda format string(https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) -/// literal content must be quoted with single quote. and two single quote means literal with one single quote. -/// For example: -/// Format string: "'aaaa'", unescaped literal: "aaaa"; -/// Format string: "'aa''aa'", unescaped literal: "aa'aa"; -/// Format string: "'aaa''aa" is not valid because of missing of end single quote. -Int64 numLiteralChars(const char * cur, const char * end) -{ - bool found = false; - Int64 count = 0; - while (cur < end) - { - if (*cur == '\'') - { - if (cur + 1 < end && *(cur + 1) == '\'') - { - count += 2; - cur += 2; - } - else - { - found = true; - break; - } - } - else - { - ++count; - ++cur; - } - } - return found ? count : -1; -} +template struct InstructionValueTypeMap {}; +template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; +template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; +template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; +template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; +template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; +template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; +template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; +template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; +template <> struct InstructionValueTypeMap { using InstructionValueType = UInt16; }; +template <> struct InstructionValueTypeMap { using InstructionValueType = Int32; }; +template <> struct InstructionValueTypeMap { using InstructionValueType = UInt32; }; +template <> struct InstructionValueTypeMap { using InstructionValueType = Int64; }; /// Cast value from integer to string, making sure digits number in result string is no less than total_digits by padding leading '0'. String padValue(UInt32 val, size_t min_digits) @@ -184,7 +149,7 @@ private: } template - class Action + class Instruction { public: /// Using std::function will cause performance degradation in MySQL format by 0.45x. @@ -201,8 +166,8 @@ private: /// extra_shift is only used in MySQL format syntax. It is always 0 in Joda format syntax. size_t extra_shift = 0; - /// Action for appending date/time related number in specified format. - explicit Action(Func && func_) : func(std::move(func_)) {} + /// Instruction for appending date/time related number in specified format. + explicit Instruction(Func && func_) : func(std::move(func_)) {} void perform(char *& dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone) { @@ -825,8 +790,8 @@ public: if constexpr (std::is_same_v) scale = times->getScale(); - using T = typename ActionValueTypeMap::ActionValueType; - std::vector> instructions; + using T = typename InstructionValueTypeMap::InstructionValueType; + std::vector> instructions; String out_template; auto result_size = parseFormat(format, instructions, scale, out_template); @@ -898,27 +863,25 @@ public: } template - size_t parseFormat(const String & format, std::vector> & instructions, UInt32 scale, String & out_template) const + size_t parseFormat(const String & format, std::vector> & instructions, UInt32 scale, String & out_template) const { + static_assert( + format_syntax == FormatDateTimeTraits::FormatSyntax::MySQL || format_syntax == FormatDateTimeTraits::FormatSyntax::Joda, + "format syntax must be one of MySQL or Joda"); + if constexpr (format_syntax == FormatDateTimeTraits::FormatSyntax::MySQL) return parseMySQLFormat(format, instructions, scale, out_template); - else if constexpr (format_syntax == FormatDateTimeTraits::FormatSyntax::Joda) - return parseJodaFormat(format, instructions, scale, out_template); else - throw Exception( - ErrorCodes::NOT_IMPLEMENTED, - "Unknown datetime format style {} in function {}", - magic_enum::enum_name(format_syntax), - getName()); + return parseJodaFormat(format, instructions, scale, out_template); } template - size_t parseMySQLFormat(const String & format, std::vector> & instructions, UInt32 scale, String & out_template) const + size_t parseMySQLFormat(const String & format, std::vector> & instructions, UInt32 scale, String & out_template) const { auto add_extra_shift = [&](size_t amount) { if (instructions.empty()) - instructions.emplace_back(&Action::mysqlNoop); + instructions.emplace_back(&Instruction::mysqlNoop); instructions.back().extra_shift += amount; }; @@ -931,7 +894,7 @@ public: }; const char * pos = format.data(); - const char * const end = pos + format.size(); + const char * const end = format.data() + format.size(); while (true) { @@ -953,43 +916,43 @@ public: { // Abbreviated weekday [Mon...Sun] case 'a': - instructions.emplace_back(&Action::mysqlDayOfWeekTextShort); + instructions.emplace_back(&Instruction::mysqlDayOfWeekTextShort); out_template += "Mon"; break; // Abbreviated month [Jan...Dec] case 'b': - instructions.emplace_back(&Action::mysqlMonthOfYearTextShort); + instructions.emplace_back(&Instruction::mysqlMonthOfYearTextShort); out_template += "Jan"; break; - // Month as a decimal number (01-12) + // Month as a integer number (01-12) case 'c': - instructions.emplace_back(&Action::mysqlMonth); + instructions.emplace_back(&Instruction::mysqlMonth); out_template += "00"; break; // Year, divided by 100, zero-padded case 'C': - instructions.emplace_back(&Action::mysqlCentury); + instructions.emplace_back(&Instruction::mysqlCentury); out_template += "00"; break; // Day of month, zero-padded (01-31) case 'd': - instructions.emplace_back(&Action::mysqlDayOfMonth); + instructions.emplace_back(&Instruction::mysqlDayOfMonth); out_template += "00"; break; // Short MM/DD/YY date, equivalent to %m/%d/%y case 'D': - instructions.emplace_back(&Action::mysqlAmericanDate); + instructions.emplace_back(&Instruction::mysqlAmericanDate); out_template += "00/00/00"; break; // Day of month, space-padded ( 1-31) 23 case 'e': - instructions.emplace_back(&Action::mysqlDayOfMonthSpacePadded); + instructions.emplace_back(&Instruction::mysqlDayOfMonthSpacePadded); out_template += " 0"; break; @@ -997,86 +960,86 @@ public: case 'f': { /// If the time data type has no fractional part, then we print '0' as the fractional part. - instructions.emplace_back(&Action::mysqlFractionalSecond); + instructions.emplace_back(&Instruction::mysqlFractionalSecond); out_template += String(std::max(1, scale), '0'); break; } // Short YYYY-MM-DD date, equivalent to %Y-%m-%d 2001-08-23 case 'F': - instructions.emplace_back(&Action::mysqlISO8601Date); + instructions.emplace_back(&Instruction::mysqlISO8601Date); out_template += "0000-00-00"; break; // Last two digits of year of ISO 8601 week number (see %G) case 'g': - instructions.emplace_back(&Action::mysqlISO8601Year2); + instructions.emplace_back(&Instruction::mysqlISO8601Year2); out_template += "00"; break; // Year of ISO 8601 week number (see %V) case 'G': - instructions.emplace_back(&Action::mysqlISO8601Year4); + instructions.emplace_back(&Instruction::mysqlISO8601Year4); out_template += "0000"; break; // Day of the year (001-366) 235 case 'j': - instructions.emplace_back(&Action::mysqlDayOfYear); + instructions.emplace_back(&Instruction::mysqlDayOfYear); out_template += "000"; break; - // Month as a decimal number (01-12) + // Month as a integer number (01-12) case 'm': - instructions.emplace_back(&Action::mysqlMonth); + instructions.emplace_back(&Instruction::mysqlMonth); out_template += "00"; break; // ISO 8601 weekday as number with Monday as 1 (1-7) case 'u': - instructions.emplace_back(&Action::mysqlDayOfWeek); + instructions.emplace_back(&Instruction::mysqlDayOfWeek); out_template += "0"; break; // ISO 8601 week number (01-53) case 'V': - instructions.emplace_back(&Action::mysqlISO8601Week); + instructions.emplace_back(&Instruction::mysqlISO8601Week); out_template += "00"; break; - // Weekday as a decimal number with Sunday as 0 (0-6) 4 + // Weekday as a integer number with Sunday as 0 (0-6) 4 case 'w': - instructions.emplace_back(&Action::mysqlDayOfWeek0To6); + instructions.emplace_back(&Instruction::mysqlDayOfWeek0To6); out_template += "0"; break; // Full weekday [Monday...Sunday] case 'W': - instructions.emplace_back(&Action::mysqlDayOfWeekTextLong); + instructions.emplace_back(&Instruction::mysqlDayOfWeekTextLong); out_template += "Monday"; break; // Two digits year case 'y': - instructions.emplace_back(&Action::mysqlYear2); + instructions.emplace_back(&Instruction::mysqlYear2); out_template += "00"; break; // Four digits year case 'Y': - instructions.emplace_back(&Action::mysqlYear4); + instructions.emplace_back(&Instruction::mysqlYear4); out_template += "0000"; break; // Quarter (1-4) case 'Q': - instructions.template emplace_back(&Action::mysqlQuarter); + instructions.template emplace_back(&Instruction::mysqlQuarter); out_template += "0"; break; // Offset from UTC timezone as +hhmm or -hhmm case 'z': - instructions.emplace_back(&Action::mysqlTimezoneOffset); + instructions.emplace_back(&Instruction::mysqlTimezoneOffset); out_template += "+0000"; break; @@ -1084,79 +1047,79 @@ public: // Minute (00-59) case 'M': - add_instruction_or_extra_shift(&Action::mysqlMinute, 2); + add_instruction_or_extra_shift(&Instruction::mysqlMinute, 2); out_template += "00"; break; // AM or PM case 'p': - add_instruction_or_extra_shift(&Action::mysqlAMPM, 2); + add_instruction_or_extra_shift(&Instruction::mysqlAMPM, 2); out_template += "AM"; break; // 12-hour HH:MM time, equivalent to %h:%i %p 2:55 PM case 'r': - add_instruction_or_extra_shift(&Action::mysqlHHMM12, 8); + add_instruction_or_extra_shift(&Instruction::mysqlHHMM12, 8); out_template += "12:00 AM"; break; // 24-hour HH:MM time, equivalent to %H:%i 14:55 case 'R': - add_instruction_or_extra_shift(&Action::mysqlHHMM24, 5); + add_instruction_or_extra_shift(&Instruction::mysqlHHMM24, 5); out_template += "00:00"; break; // Seconds case 's': - add_instruction_or_extra_shift(&Action::mysqlSecond, 2); + add_instruction_or_extra_shift(&Instruction::mysqlSecond, 2); out_template += "00"; break; // Seconds case 'S': - add_instruction_or_extra_shift(&Action::mysqlSecond, 2); + add_instruction_or_extra_shift(&Instruction::mysqlSecond, 2); out_template += "00"; break; // ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S 14:55:02 case 'T': - add_instruction_or_extra_shift(&Action::mysqlISO8601Time, 8); + add_instruction_or_extra_shift(&Instruction::mysqlISO8601Time, 8); out_template += "00:00:00"; break; // Hour in 12h format (01-12) case 'h': - add_instruction_or_extra_shift(&Action::mysqlHour12, 2); + add_instruction_or_extra_shift(&Instruction::mysqlHour12, 2); out_template += "12"; break; // Hour in 24h format (00-23) case 'H': - add_instruction_or_extra_shift(&Action::mysqlHour24, 2); + add_instruction_or_extra_shift(&Instruction::mysqlHour24, 2); out_template += "00"; break; // Minute of hour range [0, 59] case 'i': - add_instruction_or_extra_shift(&Action::mysqlMinute, 2); + add_instruction_or_extra_shift(&Instruction::mysqlMinute, 2); out_template += "00"; break; // Hour in 12h format (01-12) case 'I': - add_instruction_or_extra_shift(&Action::mysqlHour12, 2); + add_instruction_or_extra_shift(&Instruction::mysqlHour12, 2); out_template += "12"; break; // Hour in 24h format (00-23) case 'k': - add_instruction_or_extra_shift(&Action::mysqlHour24, 2); + add_instruction_or_extra_shift(&Instruction::mysqlHour24, 2); out_template += "00"; break; // Hour in 12h format (01-12) case 'l': - add_instruction_or_extra_shift(&Action::mysqlHour12, 2); + add_instruction_or_extra_shift(&Instruction::mysqlHour12, 2); out_template += "12"; break; @@ -1209,7 +1172,7 @@ public: } template - size_t parseJodaFormat(const String & format, std::vector> & instructions, UInt32, String &) const + size_t parseJodaFormat(const String & format, std::vector> & instructions, UInt32, String &) const { /// If the argument was DateTime, add instruction for printing. If it was date, just append default literal auto add_instruction = [&](auto && func [[maybe_unused]], const String & default_literal [[maybe_unused]]) @@ -1217,13 +1180,12 @@ public: if constexpr (std::is_same_v || std::is_same_v) instructions.emplace_back(func); else - instructions.emplace_back(std::bind_front(&Action::template jodaLiteral, default_literal)); + instructions.emplace_back(std::bind_front(&Instruction::template jodaLiteral, default_literal)); }; size_t reserve_size = 0; const char * pos = format.data(); - const char * end = pos + format.size(); - + const char * end = format.data() + format.size(); while (pos < end) { const char * cur_token = pos; @@ -1235,7 +1197,7 @@ public: if (pos + 1 < end && *(pos + 1) == '\'') { std::string_view literal(cur_token, 1); - instructions.emplace_back(std::bind_front(&Action::template jodaLiteral, literal)); + instructions.emplace_back(std::bind_front(&Instruction::template jodaLiteral, literal)); ++reserve_size; pos += 2; } @@ -1251,7 +1213,7 @@ public: { std::string_view literal(cur_token + i, 1); instructions.emplace_back( - std::bind_front(&Action::template jodaLiteral, literal)); + std::bind_front(&Instruction::template jodaLiteral, literal)); ++reserve_size; if (*(cur_token + i) == '\'') i += 1; @@ -1272,115 +1234,115 @@ public: switch (*cur_token) { case 'G': - instructions.emplace_back(std::bind_front(&Action::jodaEra, repetitions)); + instructions.emplace_back(std::bind_front(&Instruction::jodaEra, repetitions)); reserve_size += repetitions <= 3 ? 2 : 13; break; case 'C': - instructions.emplace_back(std::bind_front(&Action::jodaCenturyOfEra, repetitions)); + instructions.emplace_back(std::bind_front(&Instruction::jodaCenturyOfEra, repetitions)); /// Year range [1900, 2299] reserve_size += std::max(repetitions, 2); break; case 'Y': - instructions.emplace_back(std::bind_front(&Action::jodaYearOfEra, repetitions)); + instructions.emplace_back(std::bind_front(&Instruction::jodaYearOfEra, repetitions)); /// Year range [1900, 2299] reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4); break; case 'x': - instructions.emplace_back(std::bind_front(&Action::jodaWeekYear, repetitions)); + instructions.emplace_back(std::bind_front(&Instruction::jodaWeekYear, repetitions)); /// weekyear range [1900, 2299] reserve_size += std::max(repetitions, 4); break; case 'w': - instructions.emplace_back(std::bind_front(&Action::jodaWeekOfWeekYear, repetitions)); + instructions.emplace_back(std::bind_front(&Instruction::jodaWeekOfWeekYear, repetitions)); /// Week of weekyear range [1, 52] reserve_size += std::max(repetitions, 2); break; case 'e': - instructions.emplace_back(std::bind_front(&Action::jodaDayOfWeek1Based, repetitions)); + instructions.emplace_back(std::bind_front(&Instruction::jodaDayOfWeek1Based, repetitions)); /// Day of week range [1, 7] reserve_size += std::max(repetitions, 1); break; case 'E': - instructions.emplace_back(std::bind_front(&Action::jodaDayOfWeekText, repetitions)); + instructions.emplace_back(std::bind_front(&Instruction::jodaDayOfWeekText, repetitions)); /// Maximum length of short name is 3, maximum length of full name is 9. reserve_size += repetitions <= 3 ? 3 : 9; break; case 'y': - instructions.emplace_back(std::bind_front(&Action::jodaYear, repetitions)); + instructions.emplace_back(std::bind_front(&Instruction::jodaYear, repetitions)); /// Year range [1900, 2299] reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4); break; case 'D': - instructions.emplace_back(std::bind_front(&Action::jodaDayOfYear, repetitions)); + instructions.emplace_back(std::bind_front(&Instruction::jodaDayOfYear, repetitions)); /// Day of year range [1, 366] reserve_size += std::max(repetitions, 3); break; case 'M': if (repetitions <= 2) { - instructions.emplace_back(std::bind_front(&Action::jodaMonthOfYear, repetitions)); + instructions.emplace_back(std::bind_front(&Instruction::jodaMonthOfYear, repetitions)); /// Month of year range [1, 12] reserve_size += 2; } else { - instructions.emplace_back(std::bind_front(&Action::jodaMonthOfYearText, repetitions)); + instructions.emplace_back(std::bind_front(&Instruction::jodaMonthOfYearText, repetitions)); /// Maximum length of short name is 3, maximum length of full name is 9. reserve_size += repetitions <= 3 ? 3 : 9; } break; case 'd': - instructions.emplace_back(std::bind_front(&Action::jodaDayOfMonth, repetitions)); + instructions.emplace_back(std::bind_front(&Instruction::jodaDayOfMonth, repetitions)); /// Day of month range [1, 3] reserve_size += std::max(repetitions, 3); break; case 'a': /// Default half day of day is "AM" - add_instruction(std::bind_front(&Action::jodaHalfDayOfDay, repetitions), "AM"); + add_instruction(std::bind_front(&Instruction::jodaHalfDayOfDay, repetitions), "AM"); reserve_size += 2; break; case 'K': /// Default hour of half day is 0 add_instruction( - std::bind_front(&Action::jodaHourOfHalfDay, repetitions), padValue(0, repetitions)); + std::bind_front(&Instruction::jodaHourOfHalfDay, repetitions), padValue(0, repetitions)); /// Hour of half day range [0, 11] reserve_size += std::max(repetitions, 2); break; case 'h': /// Default clock hour of half day is 12 add_instruction( - std::bind_front(&Action::jodaClockHourOfHalfDay, repetitions), + std::bind_front(&Instruction::jodaClockHourOfHalfDay, repetitions), padValue(12, repetitions)); /// Clock hour of half day range [1, 12] reserve_size += std::max(repetitions, 2); break; case 'H': /// Default hour of day is 0 - add_instruction(std::bind_front(&Action::jodaHourOfDay, repetitions), padValue(0, repetitions)); + add_instruction(std::bind_front(&Instruction::jodaHourOfDay, repetitions), padValue(0, repetitions)); /// Hour of day range [0, 23] reserve_size += std::max(repetitions, 2); break; case 'k': /// Default clock hour of day is 24 - add_instruction(std::bind_front(&Action::jodaClockHourOfDay, repetitions), padValue(24, repetitions)); + add_instruction(std::bind_front(&Instruction::jodaClockHourOfDay, repetitions), padValue(24, repetitions)); /// Clock hour of day range [1, 24] reserve_size += std::max(repetitions, 2); break; case 'm': /// Default minute of hour is 0 - add_instruction(std::bind_front(&Action::jodaMinuteOfHour, repetitions), padValue(0, repetitions)); + add_instruction(std::bind_front(&Instruction::jodaMinuteOfHour, repetitions), padValue(0, repetitions)); /// Minute of hour range [0, 59] reserve_size += std::max(repetitions, 2); break; case 's': /// Default second of minute is 0 - add_instruction(std::bind_front(&Action::jodaSecondOfMinute, repetitions), padValue(0, repetitions)); + add_instruction(std::bind_front(&Instruction::jodaSecondOfMinute, repetitions), padValue(0, repetitions)); /// Second of minute range [0, 59] reserve_size += std::max(repetitions, 2); break; case 'S': /// Default fraction of second is 0 - instructions.emplace_back(std::bind_front(&Action::jodaFractionOfSecond, repetitions)); + instructions.emplace_back(std::bind_front(&Instruction::jodaFractionOfSecond, repetitions)); /// 'S' repetitions range [0, 9] reserve_size += repetitions <= 9 ? repetitions : 9; break; @@ -1388,7 +1350,7 @@ public: if (repetitions <= 3) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Short name time zone is not yet supported"); - instructions.emplace_back(std::bind_front(&Action::jodaTimezone, repetitions)); + instructions.emplace_back(std::bind_front(&Instruction::jodaTimezone, repetitions)); /// Longest length of full name of time zone is 32. reserve_size += 32; break; @@ -1399,7 +1361,7 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for {}", String(cur_token, repetitions)); std::string_view literal(cur_token, pos - cur_token); - instructions.emplace_back(std::bind_front(&Action::template jodaLiteral, literal)); + instructions.emplace_back(std::bind_front(&Instruction::template jodaLiteral, literal)); reserve_size += pos - cur_token; break; } diff --git a/src/Functions/numLiteralChars.h b/src/Functions/numLiteralChars.h new file mode 100644 index 00000000000..ba7a0fbf193 --- /dev/null +++ b/src/Functions/numLiteralChars.h @@ -0,0 +1,44 @@ +#pragma once + +#include + +namespace DB +{ + +/// Counts the number of literal characters in Joda format string until the next closing literal +/// sequence single quote. Returns -1 if no literal single quote was found. +/// In Joda format string(https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) +/// literal content must be quoted with single quote. and two single quote means literal with one single quote. +/// For example: +/// Format string: "'aaaa'", unescaped literal: "aaaa"; +/// Format string: "'aa''aa'", unescaped literal: "aa'aa"; +/// Format string: "'aaa''aa" is not valid because of missing of end single quote. +inline Int64 numLiteralChars(const char * cur, const char * end) +{ + bool found = false; + Int64 count = 0; + while (cur < end) + { + if (*cur == '\'') + { + if (cur + 1 < end && *(cur + 1) == '\'') + { + count += 2; + cur += 2; + } + else + { + found = true; + break; + } + } + else + { + ++count; + ++cur; + } + } + return found ? count : -1; +} + +} diff --git a/src/Functions/parseDateTime.cpp b/src/Functions/parseDateTime.cpp new file mode 100644 index 00000000000..7799520b8e5 --- /dev/null +++ b/src/Functions/parseDateTime.cpp @@ -0,0 +1,1765 @@ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; + extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; + extern const int CANNOT_PARSE_DATETIME; + extern const int NOT_ENOUGH_SPACE; +} + +namespace +{ + using Pos = const char *; + + constexpr Int32 minYear = 1970; + constexpr Int32 maxYear = 2106; + + const std::unordered_map> dayOfWeekMap{ + {"mon", {"day", 1}}, + {"tue", {"sday", 2}}, + {"wed", {"nesday", 3}}, + {"thu", {"rsday", 4}}, + {"fri", {"day", 5}}, + {"sat", {"urday", 6}}, + {"sun", {"day", 7}}, + }; + + const std::unordered_map> monthMap{ + {"jan", {"uary", 1}}, + {"feb", {"ruary", 2}}, + {"mar", {"rch", 3}}, + {"apr", {"il", 4}}, + {"may", {"", 5}}, + {"jun", {"e", 6}}, + {"jul", {"y", 7}}, + {"aug", {"ust", 8}}, + {"sep", {"tember", 9}}, + {"oct", {"ober", 10}}, + {"nov", {"ember", 11}}, + {"dec", {"ember", 12}}, + }; + + /// key: month, value: total days of current month if current year is leap year. + constexpr Int32 leapDays[] = {0, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + + /// key: month, value: total days of current month if current year is not leap year. + constexpr Int32 normalDays[] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + + /// key: month, value: cumulative days from January to current month(inclusive) if current year is leap year. + constexpr Int32 cumulativeLeapDays[] = {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}; + + /// key: month, value: cumulative days from January to current month(inclusive) if current year is not leap year. + constexpr Int32 cumulativeDays[] = {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365}; + + /// key: year, value: cumulative days from epoch(1970-01-01) to the first day of current year(exclusive). + constexpr Int32 cumulativeYearDays[] + = {0, 365, 730, 1096, 1461, 1826, 2191, 2557, 2922, 3287, 3652, 4018, 4383, 4748, 5113, 5479, 5844, 6209, + 6574, 6940, 7305, 7670, 8035, 8401, 8766, 9131, 9496, 9862, 10227, 10592, 10957, 11323, 11688, 12053, 12418, 12784, + 13149, 13514, 13879, 14245, 14610, 14975, 15340, 15706, 16071, 16436, 16801, 17167, 17532, 17897, 18262, 18628, 18993, 19358, + 19723, 20089, 20454, 20819, 21184, 21550, 21915, 22280, 22645, 23011, 23376, 23741, 24106, 24472, 24837, 25202, 25567, 25933, + 26298, 26663, 27028, 27394, 27759, 28124, 28489, 28855, 29220, 29585, 29950, 30316, 30681, 31046, 31411, 31777, 32142, 32507, + 32872, 33238, 33603, 33968, 34333, 34699, 35064, 35429, 35794, 36160, 36525, 36890, 37255, 37621, 37986, 38351, 38716, 39082, + 39447, 39812, 40177, 40543, 40908, 41273, 41638, 42004, 42369, 42734, 43099, 43465, 43830, 44195, 44560, 44926, 45291, 45656, + 46021, 46387, 46752, 47117, 47482, 47847, 48212, 48577, 48942, 49308, 49673}; + + struct DateTime + { + /// If both week_date_format and week_date_format is false, date is composed of year, month and day + Int32 year = 1970; /// year, range [1970, 2106] + Int32 month = 1; /// month of year, range [1, 12] + Int32 day = 1; /// day of month, range [1, 31] + + Int32 week = 1; /// ISO week of year, range [1, 53] + Int32 day_of_week = 1; /// day of week, range [1, 7], 1 represents Monday, 2 represents Tuesday... + bool week_date_format + = false; /// If true, date is composed of week year(reuse year), week of year(use week) and day of week(use day_of_week) + + Int32 day_of_year = 1; /// day of year, range [1, 366] + bool day_of_year_format = false; /// If true, date is composed of year(reuse year), day of year(use day_of_year) + + bool is_year_of_era = false; /// If true, year is calculated from era and year of era, the latter cannot be zero or negative. + bool has_year = false; /// Whether year was explicitly specified. + + /// If is_clock_hour = true, is_hour_of_half_day = true, hour's range is [1, 12] + /// If is_clock_hour = true, is_hour_of_half_day = false, hour's range is [1, 24] + /// If is_clock_hour = false, is_hour_of_half_day = true, hour's range is [0, 11] + /// If is_clock_hour = false, is_hour_of_half_day = false, hour's range is [0, 23] + Int32 hour = 0; + Int32 minute = 0; /// range [0, 59] + Int32 second = 0; /// range [0, 59] + + bool is_am = true; /// If is_hour_of_half_day = true and is_am = false (i.e. pm) then add 12 hours to the result DateTime + bool is_clock_hour = false; /// Whether the hour is clockhour + bool is_hour_of_half_day = false; /// Whether the hour is of half day + + bool has_time_zone_offset = false; /// If true, time zone offset is explicitly specified. + Int64 time_zone_offset = 0; /// Offset in seconds between current timezone to UTC. + + void reset() + { + year = 1970; + month = 1; + day = 1; + + week = 1; + day_of_week = 1; + week_date_format = false; + + day_of_year = 1; + day_of_year_format = false; + + is_year_of_era = false; + has_year = false; + + hour = 0; + minute = 0; + second = 0; + + is_am = true; + is_clock_hour = false; + is_hour_of_half_day = false; + + has_time_zone_offset = false; + time_zone_offset = 0; + } + + /// Input text is expected to be lowered by caller + void setEra(const String & text) // NOLINT + { + if (text == "bc") + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Era BC exceeds the range of DateTime"); + else if (text != "ad") + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Unknown era {}", text); + } + + void setCentury(Int32 century) + { + if (century < 19 || century > 21) + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for century must be in the range [19, 21]", century); + + year = 100 * century; + has_year = true; + } + + void setYear(Int32 year_, bool is_year_of_era_ = false, bool is_week_year = false) + { + if (year_ < minYear || year_ > maxYear) + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for year must be in the range [{}, {}]", year_, minYear, maxYear); + + year = year_; + has_year = true; + is_year_of_era = is_year_of_era_; + if (is_week_year) + { + week_date_format = true; + day_of_year_format = false; + } + } + + void setYear2(Int32 year_) + { + if (year_ >= 70 && year_ < 100) + year_ += 1900; + else if (year_ >= 0 && year_ < 70) + year_ += 2000; + else + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for year2 must be in the range [0, 99]", year_); + + setYear(year_, false, false); + } + + void setMonth(Int32 month_) + { + if (month_ < 1 || month_ > 12) + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for month of year must be in the range [1, 12]", month_); + + month = month_; + week_date_format = false; + day_of_year_format = false; + if (!has_year) + { + has_year = true; + year = 2000; + } + } + + void setWeek(Int32 week_) + { + if (week_ < 1 || week_ > 53) + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for week of week year must be in the range [1, 53]", week_); + + week = week_; + week_date_format = true; + day_of_year_format = false; + if (!has_year) + { + has_year = true; + year = 2000; + } + } + + void setDayOfYear(Int32 day_of_year_) + { + if (day_of_year_ < 1 || day_of_year_ > 366) + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for day of year must be in the range [1, 366]", day_of_year_); + + day_of_year = day_of_year_; + day_of_year_format = true; + week_date_format = false; + if (!has_year) + { + has_year = true; + year = 2000; + } + } + + void setDayOfMonth(Int32 day_of_month) + { + if (day_of_month < 1 || day_of_month > 31) + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for day of month must be in the range [1, 31]", day_of_month); + + day = day_of_month; + week_date_format = false; + day_of_year_format = false; + if (!has_year) + { + has_year = true; + year = 2000; + } + } + + void setDayOfWeek(Int32 day_of_week_) + { + if (day_of_week_ < 1 || day_of_week_ > 7) + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for day of week must be in the range [1, 7]", day_of_week_); + + day_of_week = day_of_week_; + week_date_format = true; + day_of_year_format = false; + if (!has_year) + { + has_year = true; + year = 2000; + } + } + + /// Input text is expected to be lowered by caller + void setAMPM(const String & text) + { + if (text == "am") + is_am = true; + else if (text == "pm") + is_am = false; + else + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Unknown half day of day: {}", text); + } + + void setHour(Int32 hour_, bool is_hour_of_half_day_ = false, bool is_clock_hour_ = false) + { + Int32 max_hour; + Int32 min_hour; + Int32 new_hour = hour_; + if (!is_hour_of_half_day_ && !is_clock_hour_) + { + max_hour = 23; + min_hour = 0; + } + else if (!is_hour_of_half_day_ && is_clock_hour_) + { + max_hour = 24; + min_hour = 1; + new_hour = hour_ % 24; + } + else if (is_hour_of_half_day_ && !is_clock_hour_) + { + max_hour = 11; + min_hour = 0; + } + else + { + max_hour = 12; + min_hour = 1; + new_hour = hour_ % 12; + } + + if (hour_ < min_hour || hour_ > max_hour) + throw Exception( + ErrorCodes::CANNOT_PARSE_DATETIME, + "Value {} for hour must be in the range [{}, {}] if_hour_of_half_day={} and is_clock_hour={}", + hour, + max_hour, + min_hour, + is_hour_of_half_day_, + is_clock_hour_); + + hour = new_hour; + is_hour_of_half_day = is_hour_of_half_day_; + is_clock_hour = is_clock_hour_; + } + + void setMinute(Int32 minute_) + { + if (minute_ < 0 || minute_ > 59) + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for minute must be in the range [0, 59]", minute_); + + minute = minute_; + } + + void setSecond(Int32 second_) + { + if (second_ < 0 || second_ > 59) + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Value {} for second must be in the range [0, 59]", second_); + + second = second_; + } + + /// For debug + [[maybe_unused]] String toString() const + { + String res; + res += "year:" + std::to_string(year); + res += ","; + res += "month:" + std::to_string(month); + res += ","; + res += "day:" + std::to_string(day); + res += ","; + res += "hour:" + std::to_string(hour); + res += ","; + res += "minute:" + std::to_string(minute); + res += ","; + res += "second:" + std::to_string(second); + res += ","; + res += "AM:" + std::to_string(is_am); + return res; + } + + static bool isLeapYear(Int32 year_) { return year_ % 4 == 0 && (year_ % 100 != 0 || year_ % 400 == 0); } + + static bool isDateValid(Int32 year_, Int32 month_, Int32 day_) + { + /// The range of month[1, 12] and day[1, 31] already checked before + bool leap = isLeapYear(year_); + return (year_ >= minYear && year_ <= maxYear) && ((leap && day_ <= leapDays[month_]) || (!leap && day_ <= normalDays[month_])); + } + + static bool isDayOfYearValid(Int32 year_, Int32 day_of_year_) + { + /// The range of day_of_year[1, 366] already checked before + bool leap = isLeapYear(year_); + return (year_ >= minYear && year_ <= maxYear) && (day_of_year_ <= 365 + (leap ? 1 : 0)); + } + + static Int32 extractISODayOfTheWeek(Int32 days_since_epoch) + { + if (days_since_epoch < 0) + { + // negative date: start off at 4 and cycle downwards + return (7 - ((-days_since_epoch + 3) % 7)); + } + else + { + // positive date: start off at 4 and cycle upwards + return ((days_since_epoch + 3) % 7) + 1; + } + } + + static Int32 daysSinceEpochFromWeekDate(int32_t week_year_, int32_t week_of_year_, int32_t day_of_week_) + { + /// The range of week_of_year[1, 53], day_of_week[1, 7] already checked before + if (week_year_ < minYear || week_year_ > maxYear) + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid week year {}", week_year_); + + Int32 days_since_epoch_of_jan_fourth = daysSinceEpochFromDate(week_year_, 1, 4); + Int32 first_day_of_week_year = extractISODayOfTheWeek(days_since_epoch_of_jan_fourth); + return days_since_epoch_of_jan_fourth - (first_day_of_week_year - 1) + 7 * (week_of_year_ - 1) + day_of_week_ - 1; + } + + static Int32 daysSinceEpochFromDayOfYear(Int32 year_, Int32 day_of_year_) + { + if (!isDayOfYearValid(year_, day_of_year_)) + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, year:{} day of year:{}", year_, day_of_year_); + + Int32 res = daysSinceEpochFromDate(year_, 1, 1); + res += day_of_year_ - 1; + return res; + } + + static Int32 daysSinceEpochFromDate(Int32 year_, Int32 month_, Int32 day_) + { + if (!isDateValid(year_, month_, day_)) + throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, year:{} month:{} day:{}", year_, month_, day_); + + Int32 res = cumulativeYearDays[year_ - 1970]; + res += isLeapYear(year_) ? cumulativeLeapDays[month_ - 1] : cumulativeDays[month_ - 1]; + res += day_ - 1; + return res; + } + + Int64 buildDateTime(const DateLUTImpl & time_zone) + { + if (is_hour_of_half_day && !is_am) + hour += 12; + + // Convert the parsed date/time into a timestamp. + Int32 days_since_epoch; + if (week_date_format) + days_since_epoch = daysSinceEpochFromWeekDate(year, week, day_of_week); + else if (day_of_year_format) + days_since_epoch = daysSinceEpochFromDayOfYear(year, day_of_year); + else + days_since_epoch = daysSinceEpochFromDate(year, month, day); + + Int64 seconds_since_epoch = days_since_epoch * 86400UZ + hour * 3600UZ + minute * 60UZ + second; + + /// Time zone is not specified, use local time zone + if (!has_time_zone_offset) + time_zone_offset = time_zone.timezoneOffset(seconds_since_epoch); + + /// Time zone is specified in format string. + if (seconds_since_epoch >= time_zone_offset) + seconds_since_epoch -= time_zone_offset; + else + throw Exception(ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE, "Seconds since epoch is negative"); + + return seconds_since_epoch; + } + }; + + enum class ParseSyntax + { + MySQL, + Joda + }; + + /// _FUNC_(str[, format, timezone]) + template + class FunctionParseDateTimeImpl : public IFunction + { + public: + static constexpr auto name = Name::name; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override { return name; } + + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 1 && arguments.size() != 2 && arguments.size() != 3) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1, 2 or 3", + getName(), + arguments.size()); + + if (!isString(arguments[0].type)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}. Should be String", + arguments[0].type->getName(), + getName()); + + if (arguments.size() > 1 && !isString(arguments[1].type)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of second argument of function {}. Should be String", + arguments[0].type->getName(), + getName()); + + if (arguments.size() > 2 && !isString(arguments[2].type)) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of third argument of function {}. Should be String", + arguments[0].type->getName(), + getName()); + + String time_zone_name = getTimeZone(arguments).getTimeZone(); + return std::make_shared(time_zone_name); + } + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override + { + const auto * col_str = checkAndGetColumn(arguments[0].column.get()); + if (!col_str) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of first ('str') argument of function {}. Must be string.", + arguments[0].column->getName(), + getName()); + + String format = getFormat(arguments); + const auto & time_zone = getTimeZone(arguments); + std::vector instructions = parseFormat(format); + + auto col_res = ColumnDateTime::create(); + col_res->reserve(input_rows_count); + auto & res_data = col_res->getData(); + + /// Make datetime fit in a cache line. + alignas(64) DateTime datetime; + for (size_t i = 0; i < input_rows_count; ++i) + { + datetime.reset(); + + StringRef str_ref = col_str->getDataAt(i); + Pos cur = str_ref.data; + Pos end = str_ref.data + str_ref.size; + for (const auto & instruction : instructions) + { + cur = instruction.perform(cur, end, datetime); + } + + // Ensure all input was consumed. + if (cur < end) + throw Exception( + ErrorCodes::CANNOT_PARSE_DATETIME, + "Invalid format input {} is malformed at {}", + str_ref.toView(), + std::string_view(cur, end - cur)); + + Int64 time = datetime.buildDateTime(time_zone); + res_data.push_back(static_cast(time)); + } + + return col_res; + } + + + private: + class Instruction + { + private: + enum class NeedCheckSpace + { + Yes, + No + }; + + using Func = std::conditional_t< + parse_syntax == ParseSyntax::MySQL, + Pos (*)(Pos, Pos, const String &, DateTime &), + std::function>; + const Func func{}; + const String func_name; + const String literal; /// Only used when current instruction parses literal + const String fragment; /// Parsed fragments in MySQL or Joda format string + + public: + explicit Instruction(Func && func_, const char * func_name_, const std::string_view & fragment_) + : func(std::move(func_)), func_name(func_name_), fragment(fragment_) + { + } + + explicit Instruction(const String & literal_) : literal(literal_), fragment("LITERAL") { } + explicit Instruction(String && literal_) : literal(std::move(literal_)), fragment("LITERAL") { } + + /// For debug + [[maybe_unused]] String toString() const + { + if (func) + return "func:" + func_name + ",fragment:" + fragment; + else + return "literal:" + literal + ",fragment:" + fragment; + } + + Pos perform(Pos cur, Pos end, DateTime & date) const + { + if (func) + return func(cur, end, fragment, date); + else + { + /// literal: + checkSpace(cur, end, literal.size(), "insufficient space to parse literal", fragment); + if (std::string_view(cur, literal.size()) != literal) + throw Exception( + ErrorCodes::CANNOT_PARSE_DATETIME, + "Unable to parse fragment {} from {} because literal {} is expected but {} provided", + fragment, + std::string_view(cur, end - cur), + literal, + std::string_view(cur, literal.size())); + cur += literal.size(); + return cur; + } + } + + template + static Pos readNumber2(Pos cur, Pos end, [[maybe_unused]] const String & fragment, T & res) + { + if constexpr (need_check_space == NeedCheckSpace::Yes) + checkSpace(cur, end, 2, "readNumber2 requires size >= 2", fragment); + + res = (*cur - '0'); + ++cur; + res = res * 10 + (*cur - '0'); + ++cur; + return cur; + } + + template + static Pos readNumber3(Pos cur, Pos end, [[maybe_unused]] const String & fragment, T & res) + { + if constexpr (need_check_space == NeedCheckSpace::Yes) + checkSpace(cur, end, 3, "readNumber4 requires size >= 3", fragment); + + res = (*cur - '0'); + ++cur; + res = res * 10 + (*cur - '0'); + ++cur; + res = res * 10 + (*cur - '0'); + ++cur; + return cur; + } + + template + static Pos readNumber4(Pos cur, Pos end, [[maybe_unused]] const String & fragment, T & res) + { + if constexpr (need_check_space == NeedCheckSpace::Yes) + checkSpace(cur, end, 4, "readNumber4 requires size >= 4", fragment); + + res = (*cur - '0'); + ++cur; + res = res * 10 + (*cur - '0'); + ++cur; + res = res * 10 + (*cur - '0'); + ++cur; + res = res * 10 + (*cur - '0'); + ++cur; + return cur; + } + + static void checkSpace(Pos cur, Pos end, size_t len, const String & msg, const String & fragment) + { + if (cur > end || cur + len > end) [[unlikely]] + throw Exception( + ErrorCodes::NOT_ENOUGH_SPACE, + "Unable to parse fragment {} from {} because {}", + fragment, + std::string_view(cur, end - cur), + msg); + } + + template + static Pos assertChar(Pos cur, Pos end, char expected, const String & fragment) + { + if constexpr (need_check_space == NeedCheckSpace::Yes) + checkSpace(cur, end, 1, "assertChar requires size >= 1", fragment); + + if (*cur != expected) + throw Exception( + ErrorCodes::CANNOT_PARSE_DATETIME, + "Unable to parse fragment {} from {} because char {} is expected but {} provided", + fragment, + std::string_view(cur, end - cur), + String(expected, 1), + String(*cur, 1)); + + ++cur; + return cur; + } + + static Pos mysqlDayOfWeekTextShort(Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 3, "mysqlDayOfWeekTextShort requires size >= 3", fragment); + + String text(cur, 3); + boost::to_lower(text); + auto it = dayOfWeekMap.find(text); + if (it == dayOfWeekMap.end()) + throw Exception( + ErrorCodes::CANNOT_PARSE_DATETIME, + "Unable to parse fragment {} from {} because of unknown day of week short text {} ", + fragment, + std::string_view(cur, end - cur), + text); + date.setDayOfWeek(it->second.second); + cur += 3; + return cur; + } + + static Pos mysqlMonthOfYearTextShort(Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 3, "mysqlMonthOfYearTextShort requires size >= 3", fragment); + + String text(cur, 3); + boost::to_lower(text); + auto it = monthMap.find(text); + if (it == monthMap.end()) + throw Exception( + ErrorCodes::CANNOT_PARSE_DATETIME, + "Unable to parse fragment {} from {} because of unknown month of year short text {}", + fragment, + std::string_view(cur, end - cur), + text); + + date.setMonth(it->second.second); + cur += 3; + return cur; + } + + static Pos mysqlMonth(Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 month; + cur = readNumber2(cur, end, fragment, month); + date.setMonth(month); + return cur; + } + + static Pos mysqlCentury(Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 century; + cur = readNumber2(cur, end, fragment, century); + date.setCentury(century); + return cur; + } + + static Pos mysqlDayOfMonth(Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 day_of_month; + cur = readNumber2(cur, end, fragment, day_of_month); + date.setDayOfMonth(day_of_month); + return cur; + } + + static Pos mysqlAmericanDate(Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 8, "mysqlAmericanDate requires size >= 8", fragment); + + Int32 month; + cur = readNumber2(cur, end, fragment, month); + cur = assertChar(cur, end, '/', fragment); + date.setMonth(month); + + Int32 day; + cur = readNumber2(cur, end, fragment, day); + cur = assertChar(cur, end, '/', fragment); + date.setDayOfMonth(day); + + Int32 year; + cur = readNumber2(cur, end, fragment, year); + date.setYear(year); + return cur; + } + + static Pos mysqlDayOfMonthSpacePadded(Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 2, "mysqlDayOfMonthSpacePadded requires size >= 2", fragment); + + Int32 day_of_month = *cur == ' ' ? 0 : (*cur - '0'); + ++cur; + + day_of_month = 10 * day_of_month + (*cur - '0'); + ++cur; + + date.setDayOfMonth(day_of_month); + return cur; + } + + static Pos mysqlISO8601Date(Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 10, "mysqlISO8601Date requires size >= 10", fragment); + + Int32 year; + Int32 month; + Int32 day; + cur = readNumber4(cur, end, fragment, year); + cur = assertChar(cur, end, '-', fragment); + cur = readNumber2(cur, end, fragment, month); + cur = assertChar(cur, end, '-', fragment); + cur = readNumber2(cur, end, fragment, day); + + date.setYear(year); + date.setMonth(month); + date.setDayOfMonth(day); + return cur; + } + + static Pos mysqlISO8601Year2(Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 year2; + cur = readNumber2(cur, end, fragment, year2); + date.setYear2(year2); + return cur; + } + + static Pos mysqlISO8601Year4(Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 year; + cur = readNumber4(cur, end, fragment, year); + date.setYear(year); + return cur; + } + + static Pos mysqlDayOfYear(Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 day_of_year; + cur = readNumber3(cur, end, fragment, day_of_year); + date.setDayOfYear(day_of_year); + return cur; + } + + static Pos mysqlDayOfWeek(Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 1, "mysqlDayOfWeek requires size >= 1", fragment); + date.setDayOfWeek(*cur - '0'); + ++cur; + return cur; + } + + static Pos mysqlISO8601Week(Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 week; + cur = readNumber2(cur, end, fragment, week); + date.setWeek(week); + return cur; + } + + static Pos mysqlDayOfWeek0To6(Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 1, "mysqlDayOfWeek requires size >= 1", fragment); + + Int32 day_of_week = *cur - '0'; + if (day_of_week == 0) + day_of_week = 7; + + date.setDayOfWeek(day_of_week); + ++cur; + return cur; + } + + static Pos mysqlDayOfWeekTextLong(Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 6, "jodaDayOfWeekText requires size >= 6", fragment); + String text1(cur, 3); + boost::to_lower(text1); + auto it = dayOfWeekMap.find(text1); + if (it == dayOfWeekMap.end()) + throw Exception( + ErrorCodes::CANNOT_PARSE_DATETIME, + "Unable to parse first part of fragment {} from {} because of unknown day of week text: {}", + fragment, + std::string_view(cur, end - cur), + text1); + cur += 3; + + size_t expected_remaining_size = it->second.first.size(); + checkSpace(cur, end, expected_remaining_size, "jodaDayOfWeekText requires the second parg size >= " + std::to_string(expected_remaining_size), fragment); + String text2(cur, expected_remaining_size); + boost::to_lower(text2); + if (text2 != it->second.first) + throw Exception( + ErrorCodes::CANNOT_PARSE_DATETIME, + "Unable to parse second part of fragment {} from {} because of unknown day of week text: {}", + fragment, + std::string_view(cur, end - cur), + text1 + text2); + cur += expected_remaining_size; + + date.setDayOfWeek(it->second.second); + return cur; + } + + static Pos mysqlYear2(Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 year2; + cur = readNumber2(cur, end, fragment, year2); + date.setYear2(year2); + return cur; + } + + static Pos mysqlYear4(Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 year; + cur = readNumber4(cur, end, fragment, year); + date.setYear(year); + return cur; + } + + static Pos mysqlTimezoneOffset(Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 5, "mysqlTimezoneOffset requires size >= 5", fragment); + + Int32 sign; + if (*cur == '-') + sign = -1; + else if (*cur == '+') + sign = 1; + else + throw Exception( + ErrorCodes::CANNOT_PARSE_DATETIME, + "Unable to parse fragment {} from {} because of unknown sign time zone offset: {}", + fragment, + std::string_view(cur, end - cur), + std::string_view(cur, 1)); + ++cur; + + Int32 hour; + cur = readNumber2(cur, end, fragment, hour); + + Int32 minute; + cur = readNumber2(cur, end, fragment, minute); + + date.has_time_zone_offset = true; + date.time_zone_offset = sign * (hour * 3600 + minute * 60); + return cur; + } + + static Pos mysqlMinute(Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 minute; + cur = readNumber2(cur, end, fragment, minute); + date.setMinute(minute); + return cur; + } + + static Pos mysqlAMPM(Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 2, "mysqlAMPM requires size >= 2", fragment); + + String text(cur, 2); + boost::to_lower(text); + date.setAMPM(text); + cur += 2; + return cur; + } + + static Pos mysqlHHMM12(Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 8, "mysqlHHMM12 requires size >= 8", fragment); + + Int32 hour; + cur = readNumber2(cur, end, fragment, hour); + cur = assertChar(cur, end, ':', fragment); + date.setHour(hour, true, true); + + Int32 minute; + cur = readNumber2(cur, end, fragment, minute); + cur = assertChar(cur, end, ' ', fragment); + date.setMinute(minute); + + cur = mysqlAMPM(cur, end, fragment, date); + return cur; + } + + static Pos mysqlHHMM24(Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 5, "mysqlHHMM24 requires size >= 5", fragment); + + Int32 hour; + cur = readNumber2(cur, end, fragment, hour); + cur = assertChar(cur, end, ':', fragment); + date.setHour(hour, false, false); + + Int32 minute; + cur = readNumber2(cur, end, fragment, minute); + date.setMinute(minute); + return cur; + } + + static Pos mysqlSecond(Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 second; + cur = readNumber2(cur, end, fragment, second); + date.setSecond(second); + return cur; + } + + static Pos mysqlISO8601Time(Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 8, "mysqlISO8601Time requires size >= 8", fragment); + + Int32 hour; + Int32 minute; + Int32 second; + cur = readNumber2(cur, end, fragment, hour); + cur = assertChar(cur, end, ':', fragment); + cur = readNumber2(cur, end, fragment, minute); + cur = assertChar(cur, end, ':', fragment); + cur = readNumber2(cur, end, fragment, second); + + date.setHour(hour, false, false); + date.setMinute(minute); + date.setSecond(second); + return cur; + } + + static Pos mysqlHour12(Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 hour; + cur = readNumber2(cur, end, fragment, hour); + date.setHour(hour, true, true); + return cur; + } + + static Pos mysqlHour24(Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 hour; + cur = readNumber2(cur, end, fragment, hour); + date.setHour(hour, false, false); + return cur; + } + + static Pos readNumberWithVariableLength( + Pos cur, + Pos end, + bool allow_negative, + bool allow_plus_sign, + bool is_year, + int repetitions, + int max_digits_to_read, + const String & fragment, + Int32 & result) + { + bool negative = false; + if (allow_negative && cur < end && *cur == '-') + { + negative = true; + ++cur; + } + else if (allow_plus_sign && cur < end && *cur == '+') + { + negative = false; + ++cur; + } + + Int64 number = 0; + const Pos start = cur; + if (is_year && repetitions == 2) + { + // If abbreviated two year digit is provided in format string, try to read + // in two digits of year and convert to appropriate full length year The + // two-digit mapping is as follows: [00, 69] -> [2000, 2069] + // [70, 99] -> [1970, 1999] + // If more than two digits are provided, then simply read in full year + // normally without conversion + int count = 0; + while (cur < end && cur < start + max_digits_to_read && *cur >= '0' && *cur <= '9') + { + number = number * 10 + (*cur - '0'); + ++cur; + ++count; + } + if (count == 2) + { + if (number >= 70) + number += 1900; + else if (number >= 0 && number < 70) + number += 2000; + } + else + { + while (cur < end && cur < start + max_digits_to_read && *cur >= '0' && *cur <= '9') + { + number = number * 10 + (*cur - '0'); + ++cur; + } + } + } + else + { + while (cur < end && cur < start + max_digits_to_read && *cur >= '0' && *cur <= '9') + { + number = number * 10 + (*cur - '0'); + ++cur; + } + } + + /// Need to have read at least one digit. + if (cur == start) + throw Exception( + ErrorCodes::CANNOT_PARSE_DATETIME, + "Unable to parse fragment {} from {} because read number failed", + fragment, + std::string_view(cur, end - cur)); + + if (negative) + number *= -1; + + /// Check if number exceeds the range of Int32 + if (number < std::numeric_limits::lowest() || number > std::numeric_limits::max()) + throw Exception( + ErrorCodes::CANNOT_PARSE_DATETIME, + "Unable to parse fragment {} from {} because number is out of range of Int32", + fragment, + std::string_view(start, cur - start)); + result = static_cast(number); + + return cur; + } + + static Pos jodaEra(int, Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 2, "jodaEra requires size >= 2", fragment); + + String era(cur, 2); + boost::to_lower(era); + date.setEra(era); + cur += 2; + return cur; + } + + static Pos jodaCenturyOfEra(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 century; + cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, repetitions, fragment, century); + date.setCentury(century); + return cur; + } + + static Pos jodaYearOfEra(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 year_of_era; + cur = readNumberWithVariableLength(cur, end, false, false, true, repetitions, repetitions, fragment, year_of_era); + date.setYear(year_of_era, true); + return cur; + } + + static Pos jodaWeekYear(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 week_year; + cur = readNumberWithVariableLength(cur, end, true, true, true, repetitions, repetitions, fragment, week_year); + date.setYear(week_year, false, true); + return cur; + } + + static Pos jodaWeekOfWeekYear(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 week; + cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), fragment, week); + date.setWeek(week); + return cur; + } + + static Pos jodaDayOfWeek1Based(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 day_of_week; + cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, repetitions, fragment, day_of_week); + date.setDayOfWeek(day_of_week); + return cur; + } + + static Pos + jodaDayOfWeekText(size_t /*min_represent_digits*/, Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 3, "jodaDayOfWeekText requires size >= 3", fragment); + + String text1(cur, 3); + boost::to_lower(text1); + auto it = dayOfWeekMap.find(text1); + if (it == dayOfWeekMap.end()) + throw Exception( + ErrorCodes::CANNOT_PARSE_DATETIME, + "Unable to parse fragment {} from {} because of unknown day of week text: {}", + fragment, + std::string_view(cur, end - cur), + text1); + cur += 3; + date.setDayOfWeek(it->second.second); + + size_t expected_remaining_size = it->second.first.size(); + if (cur + expected_remaining_size <= end) + { + String text2(cur, expected_remaining_size); + boost::to_lower(text2); + if (text2 == it->second.first) + { + cur += expected_remaining_size; + return cur; + } + } + return cur; + } + + static Pos jodaYear(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 year; + cur = readNumberWithVariableLength(cur, end, true, true, true, repetitions, repetitions, fragment, year); + date.setYear(year); + return cur; + } + + static Pos jodaDayOfYear(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 day_of_year; + cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 3), fragment, day_of_year); + date.setDayOfYear(day_of_year); + return cur; + } + + static Pos jodaMonthOfYear(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 month; + cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, 2, fragment, month); + date.setMonth(month); + return cur; + } + + static Pos jodaMonthOfYearText(int, Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 3, "jodaMonthOfYearText requires size >= 3", fragment); + String text1(cur, 3); + boost::to_lower(text1); + auto it = monthMap.find(text1); + if (it == monthMap.end()) + throw Exception( + ErrorCodes::CANNOT_PARSE_DATETIME, + "Unable to parse fragment {} from {} because of unknown month of year text: {}", + fragment, + std::string_view(cur, end - cur), + text1); + cur += 3; + date.setMonth(it->second.second); + + size_t expected_remaining_size = it->second.first.size(); + if (cur + expected_remaining_size <= end) + { + String text2(cur, expected_remaining_size); + boost::to_lower(text2); + if (text2 == it->second.first) + { + cur += expected_remaining_size; + return cur; + } + } + return cur; + } + + static Pos jodaDayOfMonth(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 day_of_month; + cur = readNumberWithVariableLength( + cur, end, false, false, false, repetitions, std::max(repetitions, 2), fragment, day_of_month); + date.setDayOfMonth(day_of_month); + return cur; + } + + static Pos jodaHalfDayOfDay(int, Pos cur, Pos end, const String & fragment, DateTime & date) + { + checkSpace(cur, end, 2, "jodaHalfDayOfDay requires size >= 2", fragment); + + String text(cur, 2); + boost::to_lower(text); + date.setAMPM(text); + cur += 2; + return cur; + } + + static Pos jodaHourOfHalfDay(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 hour; + cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), fragment, hour); + date.setHour(hour, true, false); + return cur; + } + + static Pos jodaClockHourOfHalfDay(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 hour; + cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), fragment, hour); + date.setHour(hour, true, true); + return cur; + } + + static Pos jodaHourOfDay(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 hour; + cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), fragment, hour); + date.setHour(hour, false, false); + return cur; + } + + static Pos jodaClockHourOfDay(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 hour; + cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), fragment, hour); + date.setHour(hour, false, true); + return cur; + } + + static Pos jodaMinuteOfHour(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 minute; + cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), fragment, minute); + date.setMinute(minute); + return cur; + } + + static Pos jodaSecondOfMinute(int repetitions, Pos cur, Pos end, const String & fragment, DateTime & date) + { + Int32 second; + cur = readNumberWithVariableLength(cur, end, false, false, false, repetitions, std::max(repetitions, 2), fragment, second); + date.setSecond(second); + return cur; + } + }; + + std::vector parseFormat(const String & format) const + { + static_assert( + parse_syntax == ParseSyntax::MySQL || parse_syntax == ParseSyntax::Joda, + "parse syntax must be one of MySQL or Joda"); + + if constexpr (parse_syntax == ParseSyntax::MySQL) + return parseMysqlFormat(format); + else + return parseJodaFormat(format); + } + + std::vector parseMysqlFormat(const String & format) const + { +#define ACTION_ARGS(func) &(func), #func, std::string_view(pos - 1, 2) + + Pos pos = format.data(); + Pos end = format.data() + format.size(); + + std::vector instructions; + while (true) + { + Pos next_percent_pos = find_first_symbols<'%'>(pos, end); + + if (next_percent_pos < end) + { + if (pos < next_percent_pos) + instructions.emplace_back(String(pos, next_percent_pos - pos)); + + pos = next_percent_pos + 1; + if (pos >= end) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, "'%' must not be the last character in the format string, use '%%' instead"); + + switch (*pos) + { + // Abbreviated weekday [Mon...Sun] + case 'a': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlDayOfWeekTextShort)); + break; + + // Abbreviated month [Jan...Dec] + case 'b': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonthOfYearTextShort)); + break; + + // Month as a decimal number (01-12) + case 'c': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonth)); + break; + + // Year, divided by 100, zero-padded + case 'C': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlCentury)); + break; + + // Day of month, zero-padded (01-31) + case 'd': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlDayOfMonth)); + break; + + // Short MM/DD/YY date, equivalent to %m/%d/%y + case 'D': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlAmericanDate)); + break; + + // Day of month, space-padded ( 1-31) 23 + case 'e': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlDayOfMonthSpacePadded)); + break; + + + // Short YYYY-MM-DD date, equivalent to %Y-%m-%d 2001-08-23 + case 'F': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlISO8601Date)); + break; + + // Last two digits of year of ISO 8601 week number (see %G) + case 'g': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlISO8601Year2)); + break; + + // Year of ISO 8601 week number (see %V) + case 'G': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlISO8601Year4)); + break; + + // Day of the year (001-366) 235 + case 'j': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlDayOfYear)); + break; + + // Month as a decimal number (01-12) + case 'm': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMonth)); + break; + + // ISO 8601 weekday as number with Monday as 1 (1-7) + case 'u': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlDayOfWeek)); + break; + + // ISO 8601 week number (01-53) + case 'V': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlISO8601Week)); + break; + + // Weekday as a integer number with Sunday as 0 (0-6) 4 + case 'w': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlDayOfWeek0To6)); + break; + + // Full weekday [Monday...Sunday] + case 'W': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlDayOfWeekTextLong)); + break; + + // Two digits year + case 'y': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlYear2)); + break; + + // Four digits year + case 'Y': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlYear4)); + break; + + // Quarter (1-4) + case 'Q': + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for quarter"); + break; + + // Offset from UTC timezone as +hhmm or -hhmm + case 'z': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlTimezoneOffset)); + break; + + // Minute (00-59) + case 'M': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMinute)); + break; + + // AM or PM + case 'p': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlAMPM)); + break; + + // 12-hour HH:MM time, equivalent to %h:%i %p 2:55 PM + case 'r': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHHMM12)); + break; + + // 24-hour HH:MM time, equivalent to %H:%i 14:55 + case 'R': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHHMM24)); + break; + + // Seconds + case 's': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlSecond)); + break; + + // Seconds + case 'S': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlSecond)); + break; + + // ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S 14:55:02 + case 'T': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlISO8601Time)); + break; + + // Hour in 12h format (01-12) + case 'h': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12)); + break; + + // Hour in 24h format (00-23) + case 'H': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24)); + break; + + // Minute of hour range [0, 59] + case 'i': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlMinute)); + break; + + // Hour in 12h format (01-12) + case 'I': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12)); + break; + + // Hour in 24h format (00-23) + case 'k': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour24)); + break; + + // Hour in 12h format (01-12) + case 'l': + instructions.emplace_back(ACTION_ARGS(Instruction::mysqlHour12)); + break; + + case 't': + instructions.emplace_back("\t"); + break; + + case 'n': + instructions.emplace_back("\n"); + break; + + // Escaped literal characters. + case '%': + instructions.emplace_back("%"); + break; + + /// Unimplemented + + /// Fractional seconds + case 'f': + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for fractional seconds"); + case 'U': + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for WEEK (Sun-Sat)"); + case 'v': + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for WEEK (Mon-Sun)"); + case 'x': + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for YEAR for week (Mon-Sun)"); + case 'X': + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for YEAR for week (Sun-Sat)"); + default: + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Incorrect syntax '{}', symbol is not supported '{}' for function {}", + format, + *pos, + getName()); + } + + ++pos; + } + else + { + /// Handle characters after last % + if (pos < end) + instructions.emplace_back(String(pos, end - pos)); + break; + } + } + return instructions; +#undef ACTION_ARGS + } + + std::vector parseJodaFormat(const String & format) const + { +#define ACTION_ARGS_WITH_BIND(func, arg) std::bind_front(&(func), (arg)), #func, std::string_view(cur_token, repetitions) + + Pos pos = format.data(); + Pos end = format.data() + format.size(); + + std::vector instructions; + while (pos < end) + { + Pos cur_token = pos; + + // Literal case + if (*cur_token == '\'') + { + // Case 1: 2 consecutive single quote + if (pos + 1 < end && *(pos + 1) == '\'') + { + instructions.emplace_back(String(cur_token, 1)); + pos += 2; + } + else + { + // Case 2: find closing single quote + Int64 count = numLiteralChars(cur_token + 1, end); + if (count == -1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No closing single quote for literal"); + else + { + for (Int64 i = 1; i <= count; i++) + { + instructions.emplace_back(String(cur_token + i, 1)); + if (*(cur_token + i) == '\'') + i += 1; + } + pos += count + 2; + } + } + } + else + { + int repetitions = 1; + ++pos; + while (pos < end && *cur_token == *pos) + { + ++repetitions; + ++pos; + } + switch (*cur_token) + { + case 'G': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaEra, repetitions)); + break; + case 'C': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaCenturyOfEra, repetitions)); + break; + case 'Y': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaYearOfEra, repetitions)); + break; + case 'x': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaWeekYear, repetitions)); + break; + case 'w': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaWeekOfWeekYear, repetitions)); + break; + case 'e': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaDayOfWeek1Based, repetitions)); + break; + case 'E': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaDayOfWeekText, repetitions)); + break; + case 'y': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaYear, repetitions)); + break; + case 'D': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaDayOfYear, repetitions)); + break; + case 'M': + if (repetitions <= 2) + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaMonthOfYear, repetitions)); + else + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaMonthOfYearText, repetitions)); + break; + case 'd': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaDayOfMonth, repetitions)); + break; + case 'a': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaHalfDayOfDay, repetitions)); + break; + case 'K': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaHourOfHalfDay, repetitions)); + break; + case 'h': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaClockHourOfHalfDay, repetitions)); + break; + case 'H': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaHourOfDay, repetitions)); + break; + case 'k': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaClockHourOfDay, repetitions)); + break; + case 'm': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaMinuteOfHour, repetitions)); + break; + case 's': + instructions.emplace_back(ACTION_ARGS_WITH_BIND(Instruction::jodaSecondOfMinute, repetitions)); + break; + case 'S': + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for fractional seconds"); + case 'z': + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for timezone"); + case 'Z': + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for timezone offset id"); + default: + if (isalpha(*cur_token)) + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, "format is not supported for {}", String(cur_token, repetitions)); + + instructions.emplace_back(String(cur_token, pos - cur_token)); + break; + } + } + } + return instructions; +#undef ACTION_ARGS_WITH_BIND + } + + + String getFormat(const ColumnsWithTypeAndName & arguments) const + { + if (arguments.size() < 2) + { + if constexpr (parse_syntax == ParseSyntax::Joda) + return "yyyy-MM-dd HH:mm:ss"; + else + return "%Y-%m-%d %H:%M:%S"; + } + + const auto * format_column = checkAndGetColumnConst(arguments[1].column.get()); + if (!format_column) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of second ('format') argument of function {}. Must be constant string.", + arguments[1].column->getName(), + getName()); + return format_column->getValue(); + } + + const DateLUTImpl & getTimeZone(const ColumnsWithTypeAndName & arguments) const + { + if (arguments.size() < 3) + return DateLUT::instance(); + + const auto * col = checkAndGetColumnConst(arguments[2].column.get()); + if (!col) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of third ('timezone') argument of function {}. Must be constant String.", + arguments[2].column->getName(), + getName()); + + String time_zone = col->getValue(); + return DateLUT::instance(time_zone); + } + }; + + struct NameParseDateTime + { + static constexpr auto name = "parseDateTime"; + }; + + struct NameParseDateTimeInJodaSyntax + { + static constexpr auto name = "parseDateTimeInJodaSyntax"; + }; + + + using FunctionParseDateTime = FunctionParseDateTimeImpl; + using FunctionParseDateTimeInJodaSyntax + = FunctionParseDateTimeImpl; +} + +REGISTER_FUNCTION(ParseDateTime) +{ + factory.registerFunction(); + factory.registerAlias("TO_UNIXTIME", FunctionParseDateTime::name); + + factory.registerFunction(); +} + + +} diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index ed25c101d7e..784110f735e 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -159,6 +159,8 @@ namespace detail if (out_stream_callback) request.setChunkedTransferEncoding(true); + else if (method == Poco::Net::HTTPRequest::HTTP_POST) + request.setContentLength(0); /// No callback - no body for (auto & [header, value] : http_header_entries) request.set(header, value); diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index dd90fd9387a..4c1b1b65d19 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -23,6 +23,8 @@ namespace ProfileEvents { extern const Event WriteBufferFromS3Bytes; + extern const Event WriteBufferFromS3Microseconds; + extern const Event WriteBufferFromS3RequestsErrors; extern const Event S3WriteBytes; extern const Event S3CreateMultipartUpload; @@ -200,7 +202,11 @@ void WriteBufferFromS3::createMultipartUpload() if (write_settings.for_object_storage) ProfileEvents::increment(ProfileEvents::DiskS3CreateMultipartUpload); + Stopwatch watch; auto outcome = client_ptr->CreateMultipartUpload(req); + watch.stop(); + + ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds()); if (outcome.IsSuccess()) { @@ -208,7 +214,10 @@ void WriteBufferFromS3::createMultipartUpload() LOG_TRACE(log, "Multipart upload has created. Bucket: {}, Key: {}, Upload id: {}", bucket, key, multipart_upload_id); } else + { + ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1); throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType()); + } } void WriteBufferFromS3::writePart() @@ -345,9 +354,13 @@ void WriteBufferFromS3::processUploadRequest(UploadPartTask & task) ResourceCost cost = task.req.GetContentLength(); ResourceGuard rlock(write_settings.resource_link, cost); + Stopwatch watch; auto outcome = client_ptr->UploadPart(task.req); + watch.stop(); rlock.unlock(); // Avoid acquiring other locks under resource lock + ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds()); + if (outcome.IsSuccess()) { task.tag = outcome.GetResult().GetETag(); @@ -356,6 +369,7 @@ void WriteBufferFromS3::processUploadRequest(UploadPartTask & task) } else { + ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1); write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType()); } @@ -391,25 +405,34 @@ void WriteBufferFromS3::completeMultipartUpload() if (write_settings.for_object_storage) ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload); + Stopwatch watch; auto outcome = client_ptr->CompleteMultipartUpload(req); + watch.stop(); + + ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds()); if (outcome.IsSuccess()) { LOG_TRACE(log, "Multipart upload has completed. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, tags.size()); return; } - else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) - { - /// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests - /// BTW, NO_SUCH_UPLOAD is expected error and we shouldn't retry it - LOG_INFO(log, "Multipart upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Upload_id: {}, Parts: {}, will retry", bucket, key, multipart_upload_id, tags.size()); - } else { - throw S3Exception( - outcome.GetError().GetErrorType(), - "Message: {}, Key: {}, Bucket: {}, Tags: {}", - outcome.GetError().GetMessage(), key, bucket, fmt::join(tags.begin(), tags.end(), " ")); + ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1); + + if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) + { + /// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests + /// BTW, NO_SUCH_UPLOAD is expected error and we shouldn't retry it + LOG_INFO(log, "Multipart upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Upload_id: {}, Parts: {}, will retry", bucket, key, multipart_upload_id, tags.size()); + } + else + { + throw S3Exception( + outcome.GetError().GetErrorType(), + "Message: {}, Key: {}, Bucket: {}, Tags: {}", + outcome.GetError().GetMessage(), key, bucket, fmt::join(tags.begin(), tags.end(), " ")); + } } } @@ -506,28 +529,36 @@ void WriteBufferFromS3::processPutRequest(const PutObjectTask & task) ResourceCost cost = task.req.GetContentLength(); ResourceGuard rlock(write_settings.resource_link, cost); + Stopwatch watch; auto outcome = client_ptr->PutObject(task.req); + watch.stop(); rlock.unlock(); + ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Microseconds, watch.elapsedMicroseconds()); + bool with_pool = static_cast(schedule); if (outcome.IsSuccess()) { LOG_TRACE(log, "Single part upload has completed. Bucket: {}, Key: {}, Object size: {}, WithPool: {}", bucket, key, task.req.GetContentLength(), with_pool); return; } - else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) - { - write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure - /// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests - LOG_INFO(log, "Single part upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Object size: {}, WithPool: {}, will retry", bucket, key, task.req.GetContentLength(), with_pool); - } else { - write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure - throw S3Exception( - outcome.GetError().GetErrorType(), - "Message: {}, Key: {}, Bucket: {}, Object size: {}, WithPool: {}", - outcome.GetError().GetMessage(), key, bucket, task.req.GetContentLength(), with_pool); + ProfileEvents::increment(ProfileEvents::WriteBufferFromS3RequestsErrors, 1); + if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) + { + write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure + /// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests + LOG_INFO(log, "Single part upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Object size: {}, WithPool: {}, will retry", bucket, key, task.req.GetContentLength(), with_pool); + } + else + { + write_settings.resource_link.accumulate(cost); // We assume no resource was used in case of failure + throw S3Exception( + outcome.GetError().GetErrorType(), + "Message: {}, Key: {}, Bucket: {}, Object size: {}, WithPool: {}", + outcome.GetError().GetMessage(), key, bucket, task.req.GetContentLength(), with_pool); + } } } diff --git a/src/IO/ZlibInflatingReadBuffer.cpp b/src/IO/ZlibInflatingReadBuffer.cpp index 9c2ee640cbe..09e4fce7c4c 100644 --- a/src/IO/ZlibInflatingReadBuffer.cpp +++ b/src/IO/ZlibInflatingReadBuffer.cpp @@ -6,6 +6,7 @@ namespace DB namespace ErrorCodes { extern const int ZLIB_INFLATE_FAILED; + extern const int ARGUMENT_OUT_OF_BOUND; } ZlibInflatingReadBuffer::ZlibInflatingReadBuffer( @@ -17,6 +18,11 @@ ZlibInflatingReadBuffer::ZlibInflatingReadBuffer( : CompressedReadBufferWrapper(std::move(in_), buf_size, existing_memory, alignment) , eof_flag(false) { + if (buf_size > max_buffer_size) + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Zlib does not support decompression with buffer size greater than {}, got buffer size: {}", + max_buffer_size, buf_size); + zstr.zalloc = nullptr; zstr.zfree = nullptr; zstr.opaque = nullptr; @@ -31,10 +37,7 @@ ZlibInflatingReadBuffer::ZlibInflatingReadBuffer( window_bits += 16; } -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wold-style-cast" int rc = inflateInit2(&zstr, window_bits); -#pragma GCC diagnostic pop if (rc != Z_OK) throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateInit2 failed: {}; zlib version: {}.", zError(rc), ZLIB_VERSION); @@ -61,16 +64,22 @@ bool ZlibInflatingReadBuffer::nextImpl() { in->nextIfAtEnd(); zstr.next_in = reinterpret_cast(in->position()); - zstr.avail_in = static_cast(in->buffer().end() - in->position()); + zstr.avail_in = static_cast(std::min( + static_cast(in->buffer().end() - in->position()), + static_cast(max_buffer_size))); } + /// init output bytes (place, where decompressed data will be) zstr.next_out = reinterpret_cast(internal_buffer.begin()); - zstr.avail_out = static_cast(internal_buffer.size()); + zstr.avail_out = static_cast(internal_buffer.size()); + size_t old_total_in = zstr.total_in; int rc = inflate(&zstr, Z_NO_FLUSH); /// move in stream on place, where reading stopped - in->position() = in->buffer().end() - zstr.avail_in; + size_t bytes_read = zstr.total_in - old_total_in; + in->position() += bytes_read; + /// change size of working buffer (it's size equal to internal_buffer size without unused uncompressed values) working_buffer.resize(internal_buffer.size() - zstr.avail_out); @@ -94,9 +103,10 @@ bool ZlibInflatingReadBuffer::nextImpl() return true; } } + /// If it is not end and not OK, something went wrong, throw exception if (rc != Z_OK) - throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateReset failed: {}", zError(rc)); + throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflate failed: {}", zError(rc)); } while (working_buffer.empty()); diff --git a/src/IO/ZlibInflatingReadBuffer.h b/src/IO/ZlibInflatingReadBuffer.h index b534b7cb5c4..d9ca4c61268 100644 --- a/src/IO/ZlibInflatingReadBuffer.h +++ b/src/IO/ZlibInflatingReadBuffer.h @@ -4,6 +4,7 @@ #include #include +#include #include @@ -33,6 +34,11 @@ private: z_stream zstr; bool eof_flag; + + /// Limit size of buffer because zlib uses + /// UInt32 for sizes of internal buffers. + using BufferSizeType = decltype(zstr.avail_in); + static constexpr auto max_buffer_size = std::numeric_limits::max(); }; } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index ac19d01d0e9..273d81ff9f9 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2874,8 +2874,10 @@ void InterpreterSelectQuery::executeMergeSorted(QueryPlan & query_plan, const st SortDescription sort_description = getSortDescription(query, context); const UInt64 limit = getLimitForSorting(query, context); const auto max_block_size = context->getSettingsRef().max_block_size; + const auto exact_rows_before_limit = context->getSettingsRef().exact_rows_before_limit; - auto merging_sorted = std::make_unique(query_plan.getCurrentDataStream(), std::move(sort_description), max_block_size, limit); + auto merging_sorted = std::make_unique( + query_plan.getCurrentDataStream(), std::move(sort_description), max_block_size, limit, exact_rows_before_limit); merging_sorted->setStepDescription("Merge sorted streams " + description); query_plan.addStep(std::move(merging_sorted)); } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index b79fe9bcd46..5f641df2ebc 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -525,7 +525,8 @@ void addMergeSortingStep(QueryPlan & query_plan, auto merging_sorted = std::make_unique(query_plan.getCurrentDataStream(), sort_description, max_block_size, - query_analysis_result.partial_sorting_limit); + query_analysis_result.partial_sorting_limit, + settings.exact_rows_before_limit); merging_sorted->setStepDescription("Merge sorted streams " + description); query_plan.addStep(std::move(merging_sorted)); } diff --git a/src/Processors/Formats/IOutputFormat.h b/src/Processors/Formats/IOutputFormat.h index 02e91d5b28b..58700a978ff 100644 --- a/src/Processors/Formats/IOutputFormat.h +++ b/src/Processors/Formats/IOutputFormat.h @@ -39,7 +39,7 @@ public: virtual void setRowsBeforeLimit(size_t /*rows_before_limit*/) {} /// Counter to calculate rows_before_limit_at_least in processors pipeline. - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit_counter.swap(counter); } + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit_counter.swap(counter); } /// Notify about progress. Method could be called from different threads. /// Passed value are delta, that must be summarized. diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h index 6d17db69c9e..c9dd7d8d77d 100644 --- a/src/Processors/IProcessor.h +++ b/src/Processors/IProcessor.h @@ -21,6 +21,9 @@ class IQueryPlanStep; struct StorageLimits; using StorageLimitsList = std::list; +class RowsBeforeLimitCounter; +using RowsBeforeLimitCounterPtr = std::shared_ptr; + class IProcessor; using ProcessorPtr = std::shared_ptr; using Processors = std::vector; @@ -357,6 +360,10 @@ public: /// You should zero internal counters in the call, in order to make in idempotent. virtual std::optional getReadProgress() { return std::nullopt; } + /// Set rows_before_limit counter for current processor. + /// This counter is used to calculate the number of rows right before any filtration of LimitTransform. + virtual void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr /* counter */) {} + protected: virtual void onCancel() {} diff --git a/src/Processors/LimitTransform.cpp b/src/Processors/LimitTransform.cpp index 2feee7e65b1..5e24062d67a 100644 --- a/src/Processors/LimitTransform.cpp +++ b/src/Processors/LimitTransform.cpp @@ -183,7 +183,7 @@ LimitTransform::Status LimitTransform::preparePair(PortsData & data) auto rows = data.current_chunk.getNumRows(); - if (rows_before_limit_at_least) + if (rows_before_limit_at_least && !data.input_port_has_counter) rows_before_limit_at_least->add(rows); /// Skip block (for 'always_read_till_end' case). diff --git a/src/Processors/LimitTransform.h b/src/Processors/LimitTransform.h index 0d8c5f4ea47..33ff968985f 100644 --- a/src/Processors/LimitTransform.h +++ b/src/Processors/LimitTransform.h @@ -41,6 +41,11 @@ private: InputPort * input_port = nullptr; OutputPort * output_port = nullptr; bool is_finished = false; + + /// This flag is used to avoid counting rows multiple times before applying a limit + /// condition, which can happen through certain input ports like PartialSortingTransform and + /// RemoteSource. + bool input_port_has_counter = false; }; std::vector ports_data; @@ -66,7 +71,8 @@ public: InputPort & getInputPort() { return inputs.front(); } OutputPort & getOutputPort() { return outputs.front(); } - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit_at_least.swap(counter); } + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit_at_least.swap(counter); } + void setInputPortHasCounter(size_t pos) { ports_data[pos].input_port_has_counter = true; } }; } diff --git a/src/Processors/Merges/AggregatingSortedTransform.h b/src/Processors/Merges/AggregatingSortedTransform.h index b0cdf4c8a3c..b7a88cca952 100644 --- a/src/Processors/Merges/AggregatingSortedTransform.h +++ b/src/Processors/Merges/AggregatingSortedTransform.h @@ -16,7 +16,7 @@ public: const Block & header, size_t num_inputs, SortDescription description_, size_t max_block_size) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/CollapsingSortedTransform.h b/src/Processors/Merges/CollapsingSortedTransform.h index a37e1c8402f..abe3eefb401 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.h +++ b/src/Processors/Merges/CollapsingSortedTransform.h @@ -20,7 +20,7 @@ public: WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/FinishAggregatingInOrderTransform.h b/src/Processors/Merges/FinishAggregatingInOrderTransform.h index ecc88899e88..b82a103fee0 100644 --- a/src/Processors/Merges/FinishAggregatingInOrderTransform.h +++ b/src/Processors/Merges/FinishAggregatingInOrderTransform.h @@ -20,7 +20,7 @@ public: size_t max_block_size, size_t max_block_bytes) : IMergingTransform( - num_inputs, header, {}, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, + num_inputs, header, {}, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, header, num_inputs, params, diff --git a/src/Processors/Merges/GraphiteRollupSortedTransform.h b/src/Processors/Merges/GraphiteRollupSortedTransform.h index e6307c629ea..f3c391c77ce 100644 --- a/src/Processors/Merges/GraphiteRollupSortedTransform.h +++ b/src/Processors/Merges/GraphiteRollupSortedTransform.h @@ -15,7 +15,7 @@ public: SortDescription description_, size_t max_block_size, Graphite::Params params_, time_t time_of_merge_) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index 4a6a1662f16..fbb47969b2f 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -14,10 +14,12 @@ IMergingTransformBase::IMergingTransformBase( const Block & input_header, const Block & output_header, bool have_all_inputs_, - UInt64 limit_hint_) + UInt64 limit_hint_, + bool always_read_till_end_) : IProcessor(InputPorts(num_inputs, input_header), {output_header}) , have_all_inputs(have_all_inputs_) , limit_hint(limit_hint_) + , always_read_till_end(always_read_till_end_) { } @@ -33,10 +35,12 @@ IMergingTransformBase::IMergingTransformBase( const Blocks & input_headers, const Block & output_header, bool have_all_inputs_, - UInt64 limit_hint_) + UInt64 limit_hint_, + bool always_read_till_end_) : IProcessor(createPorts(input_headers), {output_header}) , have_all_inputs(have_all_inputs_) , limit_hint(limit_hint_) + , always_read_till_end(always_read_till_end_) { } @@ -98,7 +102,7 @@ IProcessor::Status IMergingTransformBase::prepareInitializeInputs() /// (e.g. with optimized 'ORDER BY primary_key LIMIT n' and small 'n') /// we won't have to read any chunks anymore; auto chunk = input.pull(limit_hint != 0); - if (limit_hint && chunk.getNumRows() < limit_hint) + if ((limit_hint && chunk.getNumRows() < limit_hint) || always_read_till_end) input.setNeeded(); if (!chunk.hasRows()) @@ -164,6 +168,21 @@ IProcessor::Status IMergingTransformBase::prepare() if (is_port_full) return Status::PortFull; + if (always_read_till_end) + { + for (auto & input : inputs) + { + if (!input.isFinished()) + { + input.setNeeded(); + if (input.hasData()) + std::ignore = input.pull(); + + return Status::NeedData; + } + } + } + for (auto & input : inputs) input.close(); diff --git a/src/Processors/Merges/IMergingTransform.h b/src/Processors/Merges/IMergingTransform.h index 1d223179f8a..c218f622870 100644 --- a/src/Processors/Merges/IMergingTransform.h +++ b/src/Processors/Merges/IMergingTransform.h @@ -17,13 +17,15 @@ public: const Block & input_header, const Block & output_header, bool have_all_inputs_, - UInt64 limit_hint_); + UInt64 limit_hint_, + bool always_read_till_end_); IMergingTransformBase( const Blocks & input_headers, const Block & output_header, bool have_all_inputs_, - UInt64 limit_hint_); + UInt64 limit_hint_, + bool always_read_till_end_); OutputPort & getOutputPort() { return outputs.front(); } @@ -67,6 +69,7 @@ private: std::atomic have_all_inputs; bool is_initialized = false; UInt64 limit_hint = 0; + bool always_read_till_end = false; IProcessor::Status prepareInitializeInputs(); }; @@ -83,8 +86,9 @@ public: const Block & output_header, bool have_all_inputs_, UInt64 limit_hint_, + bool always_read_till_end_, Args && ... args) - : IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_, limit_hint_) + : IMergingTransformBase(num_inputs, input_header, output_header, have_all_inputs_, limit_hint_, always_read_till_end_) , algorithm(std::forward(args) ...) { } @@ -95,9 +99,10 @@ public: const Block & output_header, bool have_all_inputs_, UInt64 limit_hint_, + bool always_read_till_end_, bool empty_chunk_on_finish_, Args && ... args) - : IMergingTransformBase(input_headers, output_header, have_all_inputs_, limit_hint_) + : IMergingTransformBase(input_headers, output_header, have_all_inputs_, limit_hint_, always_read_till_end_) , empty_chunk_on_finish(empty_chunk_on_finish_) , algorithm(std::forward(args) ...) { diff --git a/src/Processors/Merges/MergingSortedTransform.cpp b/src/Processors/Merges/MergingSortedTransform.cpp index 12cbeecff8d..2e5eda9b54b 100644 --- a/src/Processors/Merges/MergingSortedTransform.cpp +++ b/src/Processors/Merges/MergingSortedTransform.cpp @@ -14,6 +14,7 @@ MergingSortedTransform::MergingSortedTransform( size_t max_block_size, SortingQueueStrategy sorting_queue_strategy, UInt64 limit_, + bool always_read_till_end_, WriteBuffer * out_row_sources_buf_, bool quiet_, bool use_average_block_sizes, @@ -24,6 +25,7 @@ MergingSortedTransform::MergingSortedTransform( header, have_all_inputs_, limit_, + always_read_till_end_, header, num_inputs, description_, diff --git a/src/Processors/Merges/MergingSortedTransform.h b/src/Processors/Merges/MergingSortedTransform.h index 50586177c6d..3042550d5d5 100644 --- a/src/Processors/Merges/MergingSortedTransform.h +++ b/src/Processors/Merges/MergingSortedTransform.h @@ -18,6 +18,7 @@ public: size_t max_block_size, SortingQueueStrategy sorting_queue_strategy, UInt64 limit_ = 0, + bool always_read_till_end_ = false, WriteBuffer * out_row_sources_buf_ = nullptr, bool quiet_ = false, bool use_average_block_sizes = false, diff --git a/src/Processors/Merges/ReplacingSortedTransform.h b/src/Processors/Merges/ReplacingSortedTransform.h index 8284a2c3a26..8289f102cb7 100644 --- a/src/Processors/Merges/ReplacingSortedTransform.h +++ b/src/Processors/Merges/ReplacingSortedTransform.h @@ -20,7 +20,7 @@ public: bool use_average_block_sizes = false, bool cleanup = false) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/SummingSortedTransform.h b/src/Processors/Merges/SummingSortedTransform.h index 0530ac2e96b..204224ecf06 100644 --- a/src/Processors/Merges/SummingSortedTransform.h +++ b/src/Processors/Merges/SummingSortedTransform.h @@ -19,7 +19,7 @@ public: const Names & partition_key_columns, size_t max_block_size) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, header, num_inputs, std::move(description_), diff --git a/src/Processors/Merges/VersionedCollapsingTransform.h b/src/Processors/Merges/VersionedCollapsingTransform.h index 5eced1cb58d..e7eb164f515 100644 --- a/src/Processors/Merges/VersionedCollapsingTransform.h +++ b/src/Processors/Merges/VersionedCollapsingTransform.h @@ -19,7 +19,7 @@ public: WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, header, num_inputs, std::move(description_), diff --git a/src/Processors/OffsetTransform.h b/src/Processors/OffsetTransform.h index d24440d68ea..79a7d15fe0b 100644 --- a/src/Processors/OffsetTransform.h +++ b/src/Processors/OffsetTransform.h @@ -45,7 +45,7 @@ public: InputPort & getInputPort() { return inputs.front(); } OutputPort & getOutputPort() { return outputs.front(); } - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit_at_least.swap(counter); } + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit_at_least.swap(counter); } }; } diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index 2bb29a0b6fe..9b9cc221ca8 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -55,6 +55,10 @@ std::unique_ptr createLocalPlan( auto query_plan = std::make_unique(); auto new_context = Context::createCopy(context); + /// Do not push down limit to local plan, as it will break `rows_before_limit_at_least` counter. + if (processed_stage == QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit) + processed_stage = QueryProcessingStage::WithMergeableStateAfterAggregation; + /// Do not apply AST optimizations, because query /// is already optimized and some optimizations /// can be applied only for non-distributed tables diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 0ab8e091e05..db44da5a0fc 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -98,11 +98,13 @@ SortingStep::SortingStep( const DataStream & input_stream, SortDescription sort_description_, size_t max_block_size_, - UInt64 limit_) + UInt64 limit_, + bool always_read_till_end_) : ITransformingStep(input_stream, input_stream.header, getTraits(limit_)) , type(Type::MergingSorted) , result_description(std::move(sort_description_)) , limit(limit_) + , always_read_till_end(always_read_till_end_) , sort_settings(max_block_size_) { sort_settings.max_block_size = max_block_size_; @@ -175,7 +177,8 @@ void SortingStep::mergingSorted(QueryPipelineBuilder & pipeline, const SortDescr result_sort_desc, sort_settings.max_block_size, SortingQueueStrategy::Batch, - limit_); + limit_, + always_read_till_end); pipeline.addTransform(std::move(transform)); } @@ -262,7 +265,13 @@ void SortingStep::fullSort( if (pipeline.getNumStreams() > 1) { auto transform = std::make_shared( - pipeline.getHeader(), pipeline.getNumStreams(), result_sort_desc, sort_settings.max_block_size, SortingQueueStrategy::Batch, limit_); + pipeline.getHeader(), + pipeline.getNumStreams(), + result_sort_desc, + sort_settings.max_block_size, + SortingQueueStrategy::Batch, + limit_, + always_read_till_end); pipeline.addTransform(std::move(transform)); } diff --git a/src/Processors/QueryPlan/SortingStep.h b/src/Processors/QueryPlan/SortingStep.h index eed1f26e110..371a24ac6f2 100644 --- a/src/Processors/QueryPlan/SortingStep.h +++ b/src/Processors/QueryPlan/SortingStep.h @@ -53,7 +53,9 @@ public: const DataStream & input_stream, SortDescription sort_description_, size_t max_block_size_, - UInt64 limit_ = 0); + UInt64 limit_ = 0, + bool always_read_till_end_ = false + ); String getName() const override { return "Sorting"; } @@ -100,6 +102,7 @@ private: SortDescription prefix_description; const SortDescription result_description; UInt64 limit; + bool always_read_till_end = false; Settings sort_settings; diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp index 525f332c2a3..6cdc2e95b9d 100644 --- a/src/Processors/Sources/RemoteSource.cpp +++ b/src/Processors/Sources/RemoteSource.cpp @@ -107,8 +107,13 @@ std::optional RemoteSource::tryGenerate() /// Get rows_before_limit result for remote query from ProfileInfo packet. query_executor->setProfileInfoCallback([this](const ProfileInfo & info) { - if (rows_before_limit && info.hasAppliedLimit()) - rows_before_limit->set(info.getRowsBeforeLimit()); + if (rows_before_limit) + { + if (info.hasAppliedLimit()) + rows_before_limit->add(info.getRowsBeforeLimit()); + else + manually_add_rows_before_limit_counter = true; /// Remote subquery doesn't contain a limit + } }); if (async_query_sending) @@ -162,11 +167,15 @@ std::optional RemoteSource::tryGenerate() if (!block) { + if (manually_add_rows_before_limit_counter) + rows_before_limit->add(rows); + query_executor->finish(); return {}; } UInt64 num_rows = block.rows(); + rows += num_rows; Chunk chunk(block.getColumns(), num_rows); if (add_aggregation_info) diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h index cffa62aeca7..2e7be6fd731 100644 --- a/src/Processors/Sources/RemoteSource.h +++ b/src/Processors/Sources/RemoteSource.h @@ -3,7 +3,7 @@ #include #include #include -#include "Core/UUID.h" +#include #include namespace DB @@ -27,7 +27,7 @@ public: void connectToScheduler(InputPort & input_port); - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); } + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { rows_before_limit.swap(counter); } UUID getParallelReplicasGroupUUID(); @@ -56,6 +56,8 @@ private: bool is_async_state = false; UUID uuid; int fd = -1; + size_t rows = 0; + bool manually_add_rows_before_limit_counter = false; }; /// Totals source from RemoteQueryExecutor. diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp index 2628bf7d6db..7c2b93faa91 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.cpp +++ b/src/Processors/Transforms/ColumnGathererTransform.cpp @@ -126,7 +126,7 @@ ColumnGathererTransform::ColumnGathererTransform( ReadBuffer & row_sources_buf_, size_t block_preferred_size_) : IMergingTransform( - num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, + num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, num_inputs, row_sources_buf_, block_preferred_size_) , log(&Poco::Logger::get("ColumnGathererStream")) { diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp index d8412eff588..05fddc35e15 100644 --- a/src/Processors/Transforms/FinishSortingTransform.cpp +++ b/src/Processors/Transforms/FinishSortingTransform.cpp @@ -109,6 +109,7 @@ void FinishSortingTransform::generate() generated_prefix = true; } + // TODO: Here we should also consider LIMIT optimization. generated_chunk = merge_sorter->read(); if (!generated_chunk) diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index cf5b4be4239..abeef0f9a47 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -844,6 +844,7 @@ MergeJoinTransform::MergeJoinTransform( output_header, /* have_all_inputs_= */ true, limit_hint_, + /* always_read_till_end_= */ false, /* empty_chunk_on_finish_= */ true, table_join, input_headers, max_block_size) , log(&Poco::Logger::get("MergeJoinTransform")) diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index efd9249066c..eebdd678a4b 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -187,6 +187,7 @@ void MergeSortingTransform::consume(Chunk chunk) max_merged_block_size, SortingQueueStrategy::Batch, limit, + /*always_read_till_end_=*/ false, nullptr, quiet, use_average_block_sizes, diff --git a/src/Processors/Transforms/PartialSortingTransform.h b/src/Processors/Transforms/PartialSortingTransform.h index 6dab4497fc7..8f25c93037f 100644 --- a/src/Processors/Transforms/PartialSortingTransform.h +++ b/src/Processors/Transforms/PartialSortingTransform.h @@ -20,7 +20,7 @@ public: String getName() const override { return "PartialSortingTransform"; } - void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { read_rows.swap(counter); } + void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) override { read_rows.swap(counter); } protected: void transform(Chunk & chunk) override; diff --git a/src/Processors/Transforms/TotalsHavingTransform.h b/src/Processors/Transforms/TotalsHavingTransform.h index 2567781771e..f252d683b9a 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.h +++ b/src/Processors/Transforms/TotalsHavingTransform.h @@ -42,6 +42,8 @@ public: Status prepare() override; void work() override; + bool hasFilter() const { return !filter_column_name.empty(); } + static Block transformHeader(Block block, const ActionsDAG * expression, const std::string & filter_column_name, bool remove_filter, bool final, const ColumnsMask & aggregates_mask); protected: diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index b7b18014f1f..f060f2f508f 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -129,50 +130,79 @@ static void checkCompleted(Processors & processors) static void initRowsBeforeLimit(IOutputFormat * output_format) { RowsBeforeLimitCounterPtr rows_before_limit_at_least; - - /// TODO: add setRowsBeforeLimitCounter as virtual method to IProcessor. - std::vector limits; - std::vector remote_sources; - + std::vector processors; + std::map> limit_candidates; std::unordered_set visited; + bool has_limit = false; struct QueuedEntry { IProcessor * processor; - bool visited_limit; + LimitTransform * limit_processor; + ssize_t limit_input_port; }; std::queue queue; - queue.push({ output_format, false }); + queue.push({ output_format, nullptr, -1 }); visited.emplace(output_format); while (!queue.empty()) { auto * processor = queue.front().processor; - auto visited_limit = queue.front().visited_limit; + auto * limit_processor = queue.front().limit_processor; + auto limit_input_port = queue.front().limit_input_port; queue.pop(); - if (!visited_limit) + /// Set counter based on the following cases: + /// 1. Remote: Set counter on Remote + /// 2. Limit ... PartialSorting: Set counter on PartialSorting + /// 3. Limit ... TotalsHaving(with filter) ... Remote: Set counter on the input port of Limit + /// 4. Limit ... Remote: Set counter on Remote + /// 5. Limit ... : Set counter on the input port of Limit + + /// Case 1. + if (typeid_cast(processor) && !limit_processor) { - if (auto * limit = typeid_cast(processor)) + processors.emplace_back(processor); + continue; + } + + if (auto * limit = typeid_cast(processor)) + { + has_limit = true; + + /// Ignore child limits + if (limit_processor) + continue; + + limit_processor = limit; + limit_candidates[limit_processor] = {}; + } + else if (limit_processor) + { + /// Case 2. + if (typeid_cast(processor)) { - visited_limit = true; - limits.emplace_back(limit); + processors.emplace_back(processor); + limit_candidates[limit_processor].push_back(limit_input_port); + continue; } - if (auto * source = typeid_cast(processor)) - remote_sources.emplace_back(source); - } - else if (auto * sorting = typeid_cast(processor)) - { - if (!rows_before_limit_at_least) - rows_before_limit_at_least = std::make_shared(); + /// Case 3. + if (auto * having = typeid_cast(processor)) + { + if (having->hasFilter()) + continue; + } - sorting->setRowsBeforeLimitCounter(rows_before_limit_at_least); - - /// Don't go to children. Take rows_before_limit from last PartialSortingTransform. - continue; + /// Case 4. + if (typeid_cast(processor)) + { + processors.emplace_back(processor); + limit_candidates[limit_processor].push_back(limit_input_port); + continue; + } } /// Skip totals and extremes port for output format. @@ -180,37 +210,58 @@ static void initRowsBeforeLimit(IOutputFormat * output_format) { auto * child_processor = &format->getPort(IOutputFormat::PortKind::Main).getOutputPort().getProcessor(); if (visited.emplace(child_processor).second) - queue.push({ child_processor, visited_limit }); + queue.push({ child_processor, limit_processor, limit_input_port }); continue; } - for (auto & child_port : processor->getInputs()) + if (limit_processor == processor) { - auto * child_processor = &child_port.getOutputPort().getProcessor(); - if (visited.emplace(child_processor).second) - queue.push({ child_processor, visited_limit }); + ssize_t i = 0; + for (auto & child_port : processor->getInputs()) + { + auto * child_processor = &child_port.getOutputPort().getProcessor(); + if (visited.emplace(child_processor).second) + queue.push({ child_processor, limit_processor, i }); + ++i; + } + } + else + { + for (auto & child_port : processor->getInputs()) + { + auto * child_processor = &child_port.getOutputPort().getProcessor(); + if (visited.emplace(child_processor).second) + queue.push({ child_processor, limit_processor, limit_input_port }); + } } } - if (!rows_before_limit_at_least && (!limits.empty() || !remote_sources.empty())) + /// Case 5. + for (auto && [limit, ports] : limit_candidates) { - rows_before_limit_at_least = std::make_shared(); - - for (auto & limit : limits) - limit->setRowsBeforeLimitCounter(rows_before_limit_at_least); - - for (auto & source : remote_sources) - source->setRowsBeforeLimitCounter(rows_before_limit_at_least); + /// If there are some input ports which don't have the counter, add it to LimitTransform. + if (ports.size() < limit->getInputs().size()) + { + processors.push_back(limit); + for (auto port : ports) + limit->setInputPortHasCounter(port); + } } - /// If there is a limit, then enable rows_before_limit_at_least - /// It is needed when zero rows is read, but we still want rows_before_limit_at_least in result. - if (!limits.empty()) - rows_before_limit_at_least->add(0); + if (!processors.empty()) + { + rows_before_limit_at_least = std::make_shared(); + for (auto & processor : processors) + processor->setRowsBeforeLimitCounter(rows_before_limit_at_least); + + /// If there is a limit, then enable rows_before_limit_at_least + /// It is needed when zero rows is read, but we still want rows_before_limit_at_least in result. + if (has_limit) + rows_before_limit_at_least->add(0); - if (rows_before_limit_at_least) output_format->setRowsBeforeLimitCounter(rows_before_limit_at_least); + } } diff --git a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp index 2fa5873544f..d968dae3ff8 100644 --- a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp +++ b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp @@ -83,7 +83,7 @@ TEST(MergingSortedTest, SimpleBlockSizeTest) EXPECT_EQ(pipe.numOutputPorts(), 3); auto transform = std::make_shared(pipe.getHeader(), pipe.numOutputPorts(), sort_description, - DEFAULT_MERGE_BLOCK_SIZE, SortingQueueStrategy::Batch, 0, nullptr, false, true); + DEFAULT_MERGE_BLOCK_SIZE, SortingQueueStrategy::Batch, 0, false, nullptr, false, true); pipe.addTransform(std::move(transform)); @@ -125,7 +125,7 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes) EXPECT_EQ(pipe.numOutputPorts(), 3); auto transform = std::make_shared(pipe.getHeader(), pipe.numOutputPorts(), sort_description, - DEFAULT_MERGE_BLOCK_SIZE, SortingQueueStrategy::Batch, 0, nullptr, false, true); + DEFAULT_MERGE_BLOCK_SIZE, SortingQueueStrategy::Batch, 0, false, nullptr, false, true); pipe.addTransform(std::move(transform)); diff --git a/src/Server/HTTP/HTTPServerRequest.cpp b/src/Server/HTTP/HTTPServerRequest.cpp index b66c6c6a62e..891ac39c931 100644 --- a/src/Server/HTTP/HTTPServerRequest.cpp +++ b/src/Server/HTTP/HTTPServerRequest.cpp @@ -63,11 +63,9 @@ HTTPServerRequest::HTTPServerRequest(HTTPContextPtr context, HTTPServerResponse } else if (getMethod() != HTTPRequest::HTTP_GET && getMethod() != HTTPRequest::HTTP_HEAD && getMethod() != HTTPRequest::HTTP_DELETE) { - /// That check for has_body may be false-negative in rare cases, but it's okay - bool has_body = in->hasPendingData(); stream = std::move(in); - if (!startsWith(getContentType(), "multipart/form-data") && has_body) - LOG_WARNING(&Poco::Logger::get("HTTPServerRequest"), "Got an HTTP request with no content length " + if (!startsWith(getContentType(), "multipart/form-data")) + LOG_WARNING(LogFrequencyLimiter(&Poco::Logger::get("HTTPServerRequest"), 10), "Got an HTTP request with no content length " "and no chunked/multipart encoding, it may be impossible to distinguish graceful EOF from abnormal connection loss"); } else diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 73d3aebe0d0..11b938cd722 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -58,6 +58,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int TIMEOUT_EXCEEDED; extern const int TOO_LARGE_DISTRIBUTED_DEPTH; + extern const int ABORTED; } static Block adoptBlock(const Block & header, const Block & block, Poco::Logger * log) @@ -295,6 +296,10 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si auto thread_group = CurrentThread::getGroup(); return [this, thread_group, &job, ¤t_block, num_shards]() { + /// Avoid Logical error: 'Pipeline for PushingPipelineExecutor was finished before all data was inserted' (whatever it means) + if (isCancelled()) + throw Exception(ErrorCodes::ABORTED, "Writing job was cancelled"); + SCOPE_EXIT_SAFE( if (thread_group) CurrentThread::detachFromGroupIfNotDetached(); diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 9d9d8420e2c..d1dfa96b87c 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -929,7 +929,16 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() { case MergeTreeData::MergingParams::Ordinary: merged_transform = std::make_shared( - header, pipes.size(), sort_description, merge_block_size, SortingQueueStrategy::Default, 0, ctx->rows_sources_write_buf.get(), true, ctx->blocks_are_granules_size); + header, + pipes.size(), + sort_description, + merge_block_size, + SortingQueueStrategy::Default, + /* limit_= */0, + /* always_read_till_end_= */false, + ctx->rows_sources_write_buf.get(), + true, + ctx->blocks_are_granules_size); break; case MergeTreeData::MergingParams::Collapsing: diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 8eafc54cb4c..0882ff5a0bc 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -76,50 +76,52 @@ std::unique_lock ReplicatedMergeTreePartCheckThread::pausePartsCheck void ReplicatedMergeTreePartCheckThread::cancelRemovedPartsCheck(const MergeTreePartInfo & drop_range_info) { - Strings removed_names; + Strings parts_to_remove; { std::lock_guard lock(parts_mutex); - removed_names.reserve(parts_queue.size()); /// Avoid memory limit in the middle - for (auto it = parts_queue.begin(); it != parts_queue.end();) - { - if (drop_range_info.contains(MergeTreePartInfo::fromPartName(it->first, storage.format_version))) - { - /// Remove part from the queue to avoid part resurrection - /// if we will check it and enqueue fetch after DROP/REPLACE execution. - removed_names.push_back(it->first); - parts_set.erase(it->first); - it = parts_queue.erase(it); - } - else - { - ++it; - } - } + for (const auto & elem : parts_queue) + if (drop_range_info.contains(MergeTreePartInfo::fromPartName(elem.first, storage.format_version))) + parts_to_remove.push_back(elem.first); } - /// This filtering is not necessary - auto new_end = std::remove_if(removed_names.begin(), removed_names.end(), [this](const String & part_name) + /// We have to remove parts that were not removed by removePartAndEnqueueFetch + LOG_INFO(log, "Removing broken parts from ZooKeeper: {}", fmt::join(parts_to_remove, ", ")); + storage.removePartsFromZooKeeperWithRetries(parts_to_remove); /// May throw + + /// Now we can remove parts from the check queue. + /// It's not atomic (because it's bad idea to hold the mutex while removing something from zk with retries), + /// but the check thread is currently paused, and no new parts in drop_range_info can by enqueued + /// while the corresponding DROP_RANGE/REPLACE_RANGE exists, so it should be okay. We will recheck it just in case. + + StringSet removed_parts; + for (auto & part : parts_to_remove) + removed_parts.emplace(std::move(part)); + size_t count = 0; + + std::lock_guard lock(parts_mutex); + for (const auto & elem : parts_queue) { - auto part = storage.getPartIfExists(part_name, {MergeTreeDataPartState::Active, MergeTreeDataPartState::Outdated, MergeTreeDataPartState::Deleting}); - /// The rest of parts will be removed normally - return part && !part->outdated_because_broken; + bool is_removed = removed_parts.contains(elem.first); + bool should_have_been_removed = drop_range_info.contains(MergeTreePartInfo::fromPartName(elem.first, storage.format_version)); + if (is_removed != should_have_been_removed) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent parts_queue: name={}, is_removed={}, should_have_been_removed={}", + elem.first, is_removed, should_have_been_removed); + count += is_removed; + } + + if (count != parts_to_remove.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected number of parts to remove from parts_queue: should be {}, got {}", + parts_to_remove.size(), count); + + auto new_end = std::remove_if(parts_queue.begin(), parts_queue.end(), [&removed_parts] (const auto & elem) + { + return removed_parts.contains(elem.first); }); - removed_names.erase(new_end, removed_names.end()); - if (removed_names.empty()) - return; - try - { - /// We have to remove parts that were not removed by removePartAndEnqueueFetch - LOG_INFO(log, "Removing broken parts from ZooKeeper: {}", fmt::join(removed_names, ", ")); - storage.removePartsFromZooKeeperWithRetries(removed_names, /* max_retries */ 100); - } - catch (...) - { - /// It's highly unlikely to happen on normal use cases. And if it happens it's easier to restart and reinitialize - LOG_FATAL(log, "Failed to remove parts [{}] from ZooKeeper: {}", fmt::join(removed_names, ", "), getCurrentExceptionMessage(/* with_stacktrace = */ true)); - std::terminate(); - } + parts_queue.erase(new_end, parts_queue.end()); + + for (const auto & elem : removed_parts) + parts_set.erase(elem); } size_t ReplicatedMergeTreePartCheckThread::size() const diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 3b115b68364..4d164ae2b12 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -49,6 +49,7 @@ StorageS3Cluster::StorageS3Cluster( ContextPtr context_, bool structure_argument_was_provided_) : IStorageCluster(table_id_) + , log(&Poco::Logger::get("StorageS3Cluster (" + table_id_.table_name + ")")) , s3_configuration{configuration_} , cluster_name(configuration_.cluster_name) , format_name(configuration_.format) @@ -156,6 +157,7 @@ Pipe StorageS3Cluster::read( processed_stage, extension); + remote_query_executor->setLogger(log); pipes.emplace_back(std::make_shared(remote_query_executor, add_agg_info, false, false)); } } diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index e55382c66b2..98a0bde260e 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -48,6 +48,7 @@ public: ClusterPtr getCluster(ContextPtr context) const override; private: + Poco::Logger * log; StorageS3::Configuration s3_configuration; String cluster_name; String format_name; diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index d60a9e6afd1..64b64896f66 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -141,7 +141,6 @@ def prepare_tests_results_for_clickhouse( report_url: str, check_name: str, ) -> List[dict]: - pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master" base_ref = "master" head_ref = "master" diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 192d216614e..f2b1105b3b0 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -96,7 +96,6 @@ def get_images_dict(repo_path: str, image_file_path: str) -> ImagesDict: def get_changed_docker_images( pr_info: PRInfo, images_dict: ImagesDict ) -> Set[DockerImage]: - if not images_dict: return set() diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py index c6fe6cd5fb5..c2d279f7fec 100755 --- a/tests/ci/get_previous_release_tag.py +++ b/tests/ci/get_previous_release_tag.py @@ -51,7 +51,6 @@ def find_previous_release( for release in releases: if release.version < server_version: - # Check if the artifact exists on GitHub. # It can be not true for a short period of time # after creating a tag for a new release before uploading the packages. diff --git a/tests/ci/report.py b/tests/ci/report.py index 947fb33d905..ddee035d26f 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -473,7 +473,7 @@ def create_build_html_report( commit_url: str, ) -> str: rows = "" - for (build_result, build_log_url, artifact_urls) in zip( + for build_result, build_log_url, artifact_urls in zip( build_results, build_logs_urls, artifact_urls_list ): row = "" diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 5e151e6c098..953b55dbf63 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -6,6 +6,7 @@ import os import argparse import logging import time +import random def get_options(i, upgrade_check): @@ -43,6 +44,10 @@ def get_options(i, upgrade_check): client_options.append("join_algorithm='auto'") client_options.append("max_rows_in_join=1000") + if i > 0 and random.random() < 1 / 3: + client_options.append("allow_experimental_query_cache=1") + client_options.append("use_query_cache=1") + if i % 5 == 1: client_options.append("memory_tracker_fault_probability=0.001") diff --git a/tests/clickhouse-test b/tests/clickhouse-test index cc9098b7fb4..a355c2f8e73 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -77,7 +77,7 @@ def trim_for_log(s): return s lines = s.splitlines() if len(lines) > 10000: - separator = "-" * 40 + str(len(lines) - 10000) + " lines are hidden" + "-" * 40 + separator = "-" * 40 + str(len(lines) - 10000) + " lines are hidden" + "-" * 40 return "\n".join(lines[:5000] + [] + [separator] + [] + lines[-5000:]) else: return "\n".join(lines) @@ -95,7 +95,13 @@ class HTTPError(Exception): # Helpers to execute queries via HTTP interface. def clickhouse_execute_http( - base_args, query, timeout=30, settings=None, default_format=None, max_http_retries=5, retry_error_codes=False + base_args, + query, + timeout=30, + settings=None, + default_format=None, + max_http_retries=5, + retry_error_codes=False, ): if args.secure: client = http.client.HTTPSConnection( @@ -146,12 +152,36 @@ def clickhouse_execute_http( return data -def clickhouse_execute(base_args, query, timeout=30, settings=None, max_http_retries=5, retry_error_codes=False): - return clickhouse_execute_http(base_args, query, timeout, settings, max_http_retries=max_http_retries, retry_error_codes=retry_error_codes).strip() + +def clickhouse_execute( + base_args, + query, + timeout=30, + settings=None, + max_http_retries=5, + retry_error_codes=False, +): + return clickhouse_execute_http( + base_args, + query, + timeout, + settings, + max_http_retries=max_http_retries, + retry_error_codes=retry_error_codes, + ).strip() -def clickhouse_execute_json(base_args, query, timeout=60, settings=None, max_http_retries=5): - data = clickhouse_execute_http(base_args, query, timeout, settings, "JSONEachRow", max_http_retries=max_http_retries) +def clickhouse_execute_json( + base_args, query, timeout=60, settings=None, max_http_retries=5 +): + data = clickhouse_execute_http( + base_args, + query, + timeout, + settings, + "JSONEachRow", + max_http_retries=max_http_retries, + ) if not data: return None rows = [] @@ -648,7 +678,9 @@ class TestCase: clickhouse_execute( args, - "CREATE DATABASE IF NOT EXISTS " + database + get_db_engine(testcase_args, database), + "CREATE DATABASE IF NOT EXISTS " + + database + + get_db_engine(testcase_args, database), settings=get_create_database_settings(args, testcase_args), ) @@ -831,7 +863,8 @@ class TestCase: # TODO: remove checking "no-upgrade-check" after 23.1 elif args.upgrade_check and ( - "no-upgrade-check" in tags or "no-upgrade-check" in tags): + "no-upgrade-check" in tags or "no-upgrade-check" in tags + ): return FailureReason.NO_UPGRADE_CHECK elif tags and ("no-s3-storage" in tags) and args.s3_storage: @@ -1051,7 +1084,11 @@ class TestCase: @staticmethod def send_test_name_failed(suite: str, case: str): pid = os.getpid() - clickhouse_execute(args, f"SELECT 'Running test {suite}/{case} from pid={pid}'", retry_error_codes=True) + clickhouse_execute( + args, + f"SELECT 'Running test {suite}/{case} from pid={pid}'", + retry_error_codes=True, + ) def run_single_test( self, server_logs_level, client_options @@ -2217,6 +2254,7 @@ def find_binary(name): raise Exception(f"{name} was not found in PATH") + def find_clickhouse_command(binary, command): symlink = binary + "-" + command if os.access(symlink, os.X_OK): @@ -2225,6 +2263,7 @@ def find_clickhouse_command(binary, command): # To avoid requiring symlinks (in case you download binary from CI) return binary + " " + command + def get_additional_client_options(args): if args.client_option: return " ".join("--" + option for option in args.client_option) @@ -2566,7 +2605,9 @@ if __name__ == "__main__": "WARNING: --extract_from_config option is deprecated and will be removed the the future", file=sys.stderr, ) - args.extract_from_config = find_clickhouse_command(args.binary, "extract-from-config") + args.extract_from_config = find_clickhouse_command( + args.binary, "extract-from-config" + ) if args.configclient: args.client += " --config-file=" + args.configclient diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index dc5ada81995..a9a996e0a5f 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -63,6 +63,7 @@ DEFAULT_ENV_NAME = ".env" SANITIZER_SIGN = "==================" + # to create docker-compose env file def _create_env_file(path, variables): logging.debug(f"Env {variables} stored in {path}") @@ -1454,7 +1455,6 @@ class ClickHouseCluster: config_root_name="clickhouse", extra_configs=[], ) -> "ClickHouseInstance": - """Add an instance to the cluster. name - the name of the instance directory and the value of the 'instance' macro in ClickHouse. @@ -3089,7 +3089,6 @@ class ClickHouseInstance: config_root_name="clickhouse", extra_configs=[], ): - self.name = name self.base_cmd = cluster.base_cmd self.docker_id = cluster.get_instance_docker_id(self.name) diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index e408c9beec1..471aa2bdc2e 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -216,7 +216,6 @@ class _NetworkManager: container_exit_timeout=60, docker_api_version=os.environ.get("DOCKER_API_VERSION"), ): - self.container_expire_timeout = container_expire_timeout self.container_exit_timeout = container_exit_timeout @@ -232,7 +231,6 @@ class _NetworkManager: def _ensure_container(self): if self._container is None or self._container_expire_time <= time.time(): - for i in range(5): if self._container is not None: try: diff --git a/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py b/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py index d424ad58fa4..370aa23a014 100644 --- a/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py +++ b/tests/integration/helpers/pytest_xdist_logging_to_separate_files.py @@ -1,6 +1,7 @@ import logging import os.path + # Makes the parallel workers of pytest-xdist to log to separate files. # Without this function all workers will log to the same log file # and mix everything together making it much more difficult for troubleshooting. diff --git a/tests/integration/runner b/tests/integration/runner index c1b3178faa4..f658bac412b 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -243,11 +243,18 @@ if __name__ == "__main__": ) parser.add_argument( - "--no-random", action="store", dest="no_random", help="Disable tests order randomization" + "--no-random", + action="store", + dest="no_random", + help="Disable tests order randomization", ) parser.add_argument( - "--pre-pull", action="store_true", default=False, dest="pre_pull", help="Pull images for docker_compose before all other actions" + "--pre-pull", + action="store_true", + default=False, + dest="pre_pull", + help="Pull images for docker_compose before all other actions", ) parser.add_argument( @@ -306,7 +313,6 @@ if __name__ == "__main__": # if not args.no_random: # rand_args += f"--random-seed={os.getpid()}" - net = "" if args.network: net = "--net={}".format(args.network) @@ -416,8 +422,11 @@ if __name__ == "__main__": name=CONTAINER_NAME, ) - cmd = cmd_base + " " + args.command - cmd_pre_pull = cmd_base + " find /compose -name docker_compose_*.yml -exec docker-compose -f '{}' pull \;" + cmd = cmd_base + " " + args.command + cmd_pre_pull = ( + cmd_base + + " find /compose -name docker_compose_*.yml -exec docker-compose -f '{}' pull \;" + ) containers = subprocess.check_output( f"docker ps --all --quiet --filter name={CONTAINER_NAME} --format={{{{.ID}}}}", diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index 9dd0a1c40ef..9dcb036cdf9 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -9,14 +9,13 @@ from helpers.test_tools import TSV, assert_eq_with_retry cluster = ClickHouseCluster(__file__) -num_nodes = 4 -ddl_task_timeout = 640 +num_nodes = 10 def generate_cluster_def(): path = os.path.join( os.path.dirname(os.path.realpath(__file__)), - "./_gen/cluster_for_disallow_concurrency_test.xml", + "./_gen/cluster_for_concurrency_test.xml", ) os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, "w") as f: @@ -86,7 +85,7 @@ def drop_after_test(): node0.query( "DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY", settings={ - "distributed_ddl_task_timeout": ddl_task_timeout, + "distributed_ddl_task_timeout": 360, }, ) @@ -101,7 +100,6 @@ def new_backup_name(): def create_and_fill_table(): - node0.query("SET mutations_sync=2") node0.query( "CREATE TABLE tbl ON CLUSTER 'cluster' (" "x UInt64" @@ -109,10 +107,7 @@ def create_and_fill_table(): "ORDER BY x" ) for i in range(num_nodes): - nodes[i].query(f"INSERT INTO tbl SELECT number FROM numbers(100000000)") - nodes[i].query( - f"INSERT INTO tbl SELECT number+100000000 FROM numbers(100000000)" - ) + nodes[i].query(f"INSERT INTO tbl SELECT number FROM numbers(40000000)") # All the tests have concurrent backup/restores with same backup names @@ -143,8 +138,6 @@ def test_concurrent_backups_on_same_node(): nodes[0], f"SELECT status FROM system.backups WHERE status == 'BACKUP_CREATED' AND id = '{id}'", "BACKUP_CREATED", - retry_count=100, - sleep_time=1, ) # This restore part is added to confirm creating an internal backup & restore work @@ -152,11 +145,10 @@ def test_concurrent_backups_on_same_node(): nodes[0].query( f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY", settings={ - "distributed_ddl_task_timeout": ddl_task_timeout, + "distributed_ddl_task_timeout": 360, }, ) nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}") - nodes[0].query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl") def test_concurrent_backups_on_different_nodes(): @@ -181,8 +173,6 @@ def test_concurrent_backups_on_different_nodes(): nodes[1], f"SELECT status FROM system.backups WHERE status == 'BACKUP_CREATED' AND id = '{id}'", "BACKUP_CREATED", - retry_count=100, - sleep_time=1, ) @@ -206,14 +196,12 @@ def test_concurrent_restores_on_same_node(): nodes[0], f"SELECT status FROM system.backups WHERE status == 'BACKUP_CREATED' AND id = '{id}'", "BACKUP_CREATED", - retry_count=100, - sleep_time=1, ) nodes[0].query( f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY", settings={ - "distributed_ddl_task_timeout": ddl_task_timeout, + "distributed_ddl_task_timeout": 360, }, ) restore_id = ( @@ -237,46 +225,44 @@ def test_concurrent_restores_on_different_node(): backup_name = new_backup_name() id = ( - nodes[1] + nodes[0] .query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name} ASYNC") .split("\t")[0] ) assert_eq_with_retry( - nodes[1], + nodes[0], f"SELECT status FROM system.backups WHERE status == 'CREATING_BACKUP' AND id = '{id}'", "CREATING_BACKUP", ) assert_eq_with_retry( - nodes[1], + nodes[0], f"SELECT status FROM system.backups WHERE status == 'BACKUP_CREATED' AND id = '{id}'", "BACKUP_CREATED", - retry_count=100, - sleep_time=1, ) - nodes[1].query( + nodes[0].query( f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY", settings={ - "distributed_ddl_task_timeout": ddl_task_timeout, + "distributed_ddl_task_timeout": 360, }, ) restore_id = ( - nodes[1] + nodes[0] .query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC") .split("\t")[0] ) assert_eq_with_retry( - nodes[1], - f"SELECT status FROM system.backups WHERE status == 'RESTORING' AND id == '{restore_id}'", + nodes[0], + f"SELECT status FROM system.backups WHERE status == 'RESTORING'", "RESTORING", ) - assert "Concurrent restores not supported" in nodes[0].query_and_get_error( + assert "Concurrent restores not supported" in nodes[1].query_and_get_error( f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" ) assert_eq_with_retry( - nodes[1], + nodes[0], f"SELECT status FROM system.backups WHERE status == 'RESTORED' AND id == '{restore_id}'", "RESTORED", ) diff --git a/tests/integration/test_backward_compatibility/test_detach_part_wrong_partition_id.py b/tests/integration/test_backward_compatibility/test_detach_part_wrong_partition_id.py index 02fccfae4e5..a6f7a8653da 100644 --- a/tests/integration/test_backward_compatibility/test_detach_part_wrong_partition_id.py +++ b/tests/integration/test_backward_compatibility/test_detach_part_wrong_partition_id.py @@ -24,7 +24,6 @@ def start_cluster(): def test_detach_part_wrong_partition_id(start_cluster): - # Here we create table with partition by UUID. node_21_6.query( "create table tab (id UUID, value UInt32) engine = MergeTree PARTITION BY (id) order by tuple()" diff --git a/tests/integration/test_cluster_copier/test_three_nodes.py b/tests/integration/test_cluster_copier/test_three_nodes.py index 31d6c0448f4..e7d07757adb 100644 --- a/tests/integration/test_cluster_copier/test_three_nodes.py +++ b/tests/integration/test_cluster_copier/test_three_nodes.py @@ -19,7 +19,6 @@ cluster = ClickHouseCluster(__file__) def started_cluster(): global cluster try: - for name in ["first", "second", "third"]: cluster.add_instance( name, diff --git a/tests/integration/test_cluster_copier/test_two_nodes.py b/tests/integration/test_cluster_copier/test_two_nodes.py index 10ab7d03b00..2b6fcf6cac2 100644 --- a/tests/integration/test_cluster_copier/test_two_nodes.py +++ b/tests/integration/test_cluster_copier/test_two_nodes.py @@ -19,7 +19,6 @@ cluster = ClickHouseCluster(__file__) def started_cluster(): global cluster try: - for name in ["first_of_two", "second_of_two"]: instance = cluster.add_instance( name, diff --git a/tests/integration/test_composable_protocols/test.py b/tests/integration/test_composable_protocols/test.py index bc87fea5296..df74cfffa54 100644 --- a/tests/integration/test_composable_protocols/test.py +++ b/tests/integration/test_composable_protocols/test.py @@ -63,7 +63,6 @@ def netcat(hostname, port, content): def test_connections(): - client = Client(server.ip_address, 9000, command=cluster.client_bin_path) assert client.query("SELECT 1") == "1\n" diff --git a/tests/integration/test_create_query_constraints/test.py b/tests/integration/test_create_query_constraints/test.py index 8df043fd24b..33c41b4f161 100644 --- a/tests/integration/test_create_query_constraints/test.py +++ b/tests/integration/test_create_query_constraints/test.py @@ -25,7 +25,6 @@ def start_cluster(): def test_create_query_const_constraints(): - instance.query("CREATE USER u_const SETTINGS max_threads = 1 CONST") instance.query("GRANT ALL ON *.* TO u_const") @@ -57,7 +56,6 @@ def test_create_query_const_constraints(): def test_create_query_minmax_constraints(): - instance.query("CREATE USER u_minmax SETTINGS max_threads = 4 MIN 2 MAX 6") instance.query("GRANT ALL ON *.* TO u_minmax") diff --git a/tests/integration/test_dictionaries_all_layouts_separate_sources/common.py b/tests/integration/test_dictionaries_all_layouts_separate_sources/common.py index b38e81b0227..01addae2542 100644 --- a/tests/integration/test_dictionaries_all_layouts_separate_sources/common.py +++ b/tests/integration/test_dictionaries_all_layouts_separate_sources/common.py @@ -348,7 +348,6 @@ class RangedLayoutTester(BaseLayoutTester): self.layouts = LAYOUTS_RANGED def execute(self, layout_name, node): - if layout_name not in self.layout_to_dictionary: raise RuntimeError("Source doesn't support layout: {}".format(layout_name)) diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index 027ef8feed0..2428c53854e 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -7,7 +7,6 @@ import pytest def started_cluster(): global cluster try: - cluster = ClickHouseCluster(__file__) cluster.add_instance( "disks_app_test", main_configs=["config.xml"], with_minio=True diff --git a/tests/integration/test_distributed_ddl_parallel/test.py b/tests/integration/test_distributed_ddl_parallel/test.py index 6ebfe472e09..eb98dd3e230 100644 --- a/tests/integration/test_distributed_ddl_parallel/test.py +++ b/tests/integration/test_distributed_ddl_parallel/test.py @@ -10,6 +10,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) + # By default the exceptions that was throwed in threads will be ignored # (they will not mark the test as failed, only printed to stderr). # diff --git a/tests/integration/test_fetch_memory_usage/test.py b/tests/integration/test_fetch_memory_usage/test.py index a4371140150..7591cc0e8a9 100644 --- a/tests/integration/test_fetch_memory_usage/test.py +++ b/tests/integration/test_fetch_memory_usage/test.py @@ -18,7 +18,6 @@ def started_cluster(): def test_huge_column(started_cluster): - if ( node.is_built_with_thread_sanitizer() or node.is_built_with_memory_sanitizer() diff --git a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py index b8bafb3d0c1..fe69d72c1c7 100644 --- a/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py +++ b/tests/integration/test_host_regexp_multiple_ptr_records_concurrent/scripts/stress_test.py @@ -13,7 +13,6 @@ number_of_iterations = 100 def perform_request(): - buffer = BytesIO() crl = pycurl.Curl() crl.setopt(pycurl.INTERFACE, client_ip) diff --git a/tests/integration/test_jbod_balancer/test.py b/tests/integration/test_jbod_balancer/test.py index e746698611a..df34a075d5a 100644 --- a/tests/integration/test_jbod_balancer/test.py +++ b/tests/integration/test_jbod_balancer/test.py @@ -45,7 +45,6 @@ def start_cluster(): def check_balance(node, table): - partitions = node.query( """ WITH diff --git a/tests/integration/test_keeper_and_access_storage/test.py b/tests/integration/test_keeper_and_access_storage/test.py index 6ec307f7082..0314825b6b7 100644 --- a/tests/integration/test_keeper_and_access_storage/test.py +++ b/tests/integration/test_keeper_and_access_storage/test.py @@ -10,6 +10,7 @@ node1 = cluster.add_instance( "node1", main_configs=["configs/keeper.xml"], stay_alive=True ) + # test that server is able to start @pytest.fixture(scope="module") def started_cluster(): diff --git a/tests/integration/test_keeper_back_to_back/test.py b/tests/integration/test_keeper_back_to_back/test.py index 73fface02b4..b737ac284d2 100644 --- a/tests/integration/test_keeper_back_to_back/test.py +++ b/tests/integration/test_keeper_back_to_back/test.py @@ -546,7 +546,6 @@ def test_random_requests(started_cluster): def test_end_of_session(started_cluster): - fake_zk1 = None fake_zk2 = None genuine_zk1 = None @@ -685,6 +684,7 @@ def test_concurrent_watches(started_cluster): nonlocal watches_created nonlocal all_paths_created fake_zk.ensure_path(global_path + "/" + str(i)) + # new function each time def dumb_watch(event): nonlocal dumb_watch_triggered_counter diff --git a/tests/integration/test_keeper_persistent_log/test.py b/tests/integration/test_keeper_persistent_log/test.py index 70cc14fe26d..4164ffb33d3 100644 --- a/tests/integration/test_keeper_persistent_log/test.py +++ b/tests/integration/test_keeper_persistent_log/test.py @@ -163,7 +163,6 @@ def test_state_duplicate_restart(started_cluster): # http://zookeeper-user.578899.n2.nabble.com/Why-are-ephemeral-nodes-written-to-disk-tp7583403p7583418.html def test_ephemeral_after_restart(started_cluster): - try: node_zk = None node_zk2 = None diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index 063421bf922..de5a9416119 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -114,7 +114,6 @@ def start_clickhouse(): def copy_zookeeper_data(make_zk_snapshots): - if make_zk_snapshots: # force zookeeper to create snapshot generate_zk_snapshot() else: diff --git a/tests/integration/test_merge_tree_load_parts/test.py b/tests/integration/test_merge_tree_load_parts/test.py index 777b6f14fc6..dfbe00c8e28 100644 --- a/tests/integration/test_merge_tree_load_parts/test.py +++ b/tests/integration/test_merge_tree_load_parts/test.py @@ -148,17 +148,17 @@ def test_merge_tree_load_parts_corrupted(started_cluster): node1.query("SYSTEM WAIT LOADING PARTS mt_load_parts_2") def check_parts_loading(node, partition, loaded, failed, skipped): - for (min_block, max_block) in loaded: + for min_block, max_block in loaded: part_name = f"{partition}_{min_block}_{max_block}" assert node.contains_in_log(f"Loading Active part {part_name}") assert node.contains_in_log(f"Finished loading Active part {part_name}") - for (min_block, max_block) in failed: + for min_block, max_block in failed: part_name = f"{partition}_{min_block}_{max_block}" assert node.contains_in_log(f"Loading Active part {part_name}") assert not node.contains_in_log(f"Finished loading Active part {part_name}") - for (min_block, max_block) in skipped: + for min_block, max_block in skipped: part_name = f"{partition}_{min_block}_{max_block}" assert not node.contains_in_log(f"Loading Active part {part_name}") assert not node.contains_in_log(f"Finished loading Active part {part_name}") diff --git a/tests/integration/test_merge_tree_s3_failover/s3_endpoint/endpoint.py b/tests/integration/test_merge_tree_s3_failover/s3_endpoint/endpoint.py index b6567dfebc5..4613fdb850b 100644 --- a/tests/integration/test_merge_tree_s3_failover/s3_endpoint/endpoint.py +++ b/tests/integration/test_merge_tree_s3_failover/s3_endpoint/endpoint.py @@ -42,7 +42,6 @@ def delete(_bucket): @route("/<_bucket>/<_path:path>", ["GET", "POST", "PUT", "DELETE"]) def server(_bucket, _path): - # It's delete query for failed part if _path.endswith("delete"): response.set_header("Location", "http://minio1:9001/" + _bucket + "/" + _path) diff --git a/tests/integration/test_merge_tree_settings_constraints/test.py b/tests/integration/test_merge_tree_settings_constraints/test.py index 0bb0179108d..be6e2a31873 100644 --- a/tests/integration/test_merge_tree_settings_constraints/test.py +++ b/tests/integration/test_merge_tree_settings_constraints/test.py @@ -20,7 +20,6 @@ def start_cluster(): def test_merge_tree_settings_constraints(): - assert "Setting storage_policy should not be changed" in instance.query_and_get_error( f"CREATE TABLE wrong_table (number Int64) engine = MergeTree() ORDER BY number SETTINGS storage_policy = 'secret_policy'" ) diff --git a/tests/integration/test_old_parts_finally_removed/test.py b/tests/integration/test_old_parts_finally_removed/test.py index 108b72c5ccd..5347d433419 100644 --- a/tests/integration/test_old_parts_finally_removed/test.py +++ b/tests/integration/test_old_parts_finally_removed/test.py @@ -63,7 +63,6 @@ def test_part_finally_removed(started_cluster): ) for i in range(60): - if ( node1.query( "SELECT count() from system.parts WHERE table = 'drop_outdated_part'" diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py index ae4393fc6f6..a34141c6189 100644 --- a/tests/integration/test_partition/test.py +++ b/tests/integration/test_partition/test.py @@ -528,7 +528,9 @@ def test_make_clone_in_detached(started_cluster): ["cp", "-r", path + "all_0_0_0", path + "detached/broken_all_0_0_0"] ) assert_eq_with_retry(instance, "select * from clone_in_detached", "\n") - assert ["broken_all_0_0_0",] == sorted( + assert [ + "broken_all_0_0_0", + ] == sorted( instance.exec_in_container(["ls", path + "detached/"]).strip().split("\n") ) diff --git a/tests/integration/test_password_constraints/test.py b/tests/integration/test_password_constraints/test.py index e3628861b28..9cdff51caa1 100644 --- a/tests/integration/test_password_constraints/test.py +++ b/tests/integration/test_password_constraints/test.py @@ -17,7 +17,6 @@ def start_cluster(): def test_complexity_rules(start_cluster): - error_message = "DB::Exception: Invalid password. The password should: be at least 12 characters long, contain at least 1 numeric character, contain at least 1 lowercase character, contain at least 1 uppercase character, contain at least 1 special character" assert error_message in node.query_and_get_error( "CREATE USER u_1 IDENTIFIED WITH plaintext_password BY ''" diff --git a/tests/integration/test_profile_events_s3/test.py b/tests/integration/test_profile_events_s3/test.py index b5e095514cb..5c56b5c05cb 100644 --- a/tests/integration/test_profile_events_s3/test.py +++ b/tests/integration/test_profile_events_s3/test.py @@ -37,6 +37,8 @@ init_list = { "ReadBufferFromS3InitMicroseconds": 0, "ReadBufferFromS3RequestsErrors": 0, "WriteBufferFromS3Bytes": 0, + "WriteBufferFromS3Microseconds": 0, + "WriteBufferFromS3RequestsErrors": 0, "S3ReadMicroseconds": 0, "S3ReadRequestsCount": 0, "S3ReadRequestsErrorsTotal": 0, diff --git a/tests/integration/test_read_only_table/test.py b/tests/integration/test_read_only_table/test.py index 914c6a99508..df084f9dbbd 100644 --- a/tests/integration/test_read_only_table/test.py +++ b/tests/integration/test_read_only_table/test.py @@ -49,7 +49,6 @@ def start_cluster(): def test_restart_zookeeper(start_cluster): - for table_id in range(NUM_TABLES): node1.query( f"INSERT INTO test_table_{table_id} VALUES (1), (2), (3), (4), (5);" diff --git a/tests/integration/test_reload_auxiliary_zookeepers/test.py b/tests/integration/test_reload_auxiliary_zookeepers/test.py index bb1455333fc..476c5dee99e 100644 --- a/tests/integration/test_reload_auxiliary_zookeepers/test.py +++ b/tests/integration/test_reload_auxiliary_zookeepers/test.py @@ -20,7 +20,6 @@ def start_cluster(): def test_reload_auxiliary_zookeepers(start_cluster): - node.query( "CREATE TABLE simple (date Date, id UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/0/simple', 'node') ORDER BY tuple() PARTITION BY date;" ) diff --git a/tests/integration/test_s3_aws_sdk_has_slightly_unreliable_behaviour/s3_endpoint/endpoint.py b/tests/integration/test_s3_aws_sdk_has_slightly_unreliable_behaviour/s3_endpoint/endpoint.py index d6a732cc681..1d33ca02f86 100644 --- a/tests/integration/test_s3_aws_sdk_has_slightly_unreliable_behaviour/s3_endpoint/endpoint.py +++ b/tests/integration/test_s3_aws_sdk_has_slightly_unreliable_behaviour/s3_endpoint/endpoint.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from bottle import request, route, run, response + # Handle for MultipleObjectsDelete. @route("/<_bucket>", ["POST"]) def delete(_bucket): diff --git a/tests/integration/test_s3_with_proxy/test.py b/tests/integration/test_s3_with_proxy/test.py index 1102d190a87..1af040c3c30 100644 --- a/tests/integration/test_s3_with_proxy/test.py +++ b/tests/integration/test_s3_with_proxy/test.py @@ -5,6 +5,7 @@ import time import pytest from helpers.cluster import ClickHouseCluster + # Runs simple proxy resolver in python env container. def run_resolver(cluster): container_id = cluster.get_container_id("resolver") diff --git a/tests/integration/test_sharding_key_from_default_column/test.py b/tests/integration/test_sharding_key_from_default_column/test.py index 1ecf96305a4..5bce3ee4169 100644 --- a/tests/integration/test_sharding_key_from_default_column/test.py +++ b/tests/integration/test_sharding_key_from_default_column/test.py @@ -49,14 +49,14 @@ def test_default_column(): "INSERT INTO TABLE dist (x) VALUES (1), (2), (3), (4)", settings=settings ) node1.query("SYSTEM FLUSH DISTRIBUTED dist") - assert node1.query("SELECT x, y, z FROM local") == TSV( + assert node1.query("SELECT x, y, z FROM local ORDER BY x") == TSV( [[2, 102, 104], [4, 104, 108]] ) - assert node2.query("SELECT x, y, z FROM local") == TSV( + assert node2.query("SELECT x, y, z FROM local ORDER BY x") == TSV( [[1, 101, 102], [3, 103, 106]] ) - assert node1.query("SELECT x, y, z FROM dist") == TSV( - [[2, 102, 104], [4, 104, 108], [1, 101, 102], [3, 103, 106]] + assert node1.query("SELECT x, y, z FROM dist ORDER BY x") == TSV( + [[1, 101, 102], [2, 102, 104], [3, 103, 106], [4, 104, 108]] ) # INSERT INTO TABLE dist (x, y) @@ -66,12 +66,12 @@ def test_default_column(): settings=settings, ) node1.query("SYSTEM FLUSH DISTRIBUTED dist") - assert node1.query("SELECT x, y, z FROM local") == TSV([[2, 22, 24]]) - assert node2.query("SELECT x, y, z FROM local") == TSV( + assert node1.query("SELECT x, y, z FROM local ORDER BY x") == TSV([[2, 22, 24]]) + assert node2.query("SELECT x, y, z FROM local ORDER BY x") == TSV( [[1, 11, 12], [3, 33, 36]] ) - assert node1.query("SELECT x, y, z FROM dist") == TSV( - [[2, 22, 24], [1, 11, 12], [3, 33, 36]] + assert node1.query("SELECT x, y, z FROM dist ORDER BY x") == TSV( + [[1, 11, 12], [2, 22, 24], [3, 33, 36]] ) @@ -96,14 +96,14 @@ def test_materialized_column_allow_insert_materialized(): "INSERT INTO TABLE dist (x) VALUES (1), (2), (3), (4)", settings=settings ) node1.query("SYSTEM FLUSH DISTRIBUTED dist") - assert node1.query("SELECT x, y, z FROM local") == TSV( + assert node1.query("SELECT x, y, z FROM local ORDER BY x") == TSV( [[2, 102, 104], [4, 104, 108]] ) - assert node2.query("SELECT x, y, z FROM local") == TSV( + assert node2.query("SELECT x, y, z FROM local ORDER BY x") == TSV( [[1, 101, 102], [3, 103, 106]] ) - assert node1.query("SELECT x, y, z FROM dist") == TSV( - [[2, 102, 104], [4, 104, 108], [1, 101, 102], [3, 103, 106]] + assert node1.query("SELECT x, y, z FROM dist ORDER BY x") == TSV( + [[1, 101, 102], [2, 102, 104], [3, 103, 106], [4, 104, 108]] ) # INSERT INTO TABLE dist (x, y) @@ -113,12 +113,12 @@ def test_materialized_column_allow_insert_materialized(): settings=settings, ) node1.query("SYSTEM FLUSH DISTRIBUTED dist") - assert node1.query("SELECT x, y, z FROM local") == TSV([[2, 22, 24]]) - assert node2.query("SELECT x, y, z FROM local") == TSV( + assert node1.query("SELECT x, y, z FROM local ORDER BY x") == TSV([[2, 22, 24]]) + assert node2.query("SELECT x, y, z FROM local ORDER BY x") == TSV( [[1, 11, 12], [3, 33, 36]] ) - assert node1.query("SELECT x, y, z FROM dist") == TSV( - [[2, 22, 24], [1, 11, 12], [3, 33, 36]] + assert node1.query("SELECT x, y, z FROM dist ORDER BY x") == TSV( + [[1, 11, 12], [2, 22, 24], [3, 33, 36]] ) @@ -143,14 +143,14 @@ def test_materialized_column_disallow_insert_materialized(): "INSERT INTO TABLE dist (x) VALUES (1), (2), (3), (4)", settings=settings ) node1.query("SYSTEM FLUSH DISTRIBUTED dist") - assert node1.query("SELECT x, y, z FROM local") == TSV( + assert node1.query("SELECT x, y, z FROM local ORDER BY x") == TSV( [[2, 202, -200], [4, 204, -200]] ) - assert node2.query("SELECT x, y, z FROM local") == TSV( + assert node2.query("SELECT x, y, z FROM local ORDER BY x") == TSV( [[1, 201, -200], [3, 203, -200]] ) - assert node1.query("SELECT x, y, z FROM dist") == TSV( - [[2, 202, -200], [4, 204, -200], [1, 201, -200], [3, 203, -200]] + assert node1.query("SELECT x, y, z FROM dist ORDER BY x") == TSV( + [[1, 201, -200], [2, 202, -200], [3, 203, -200], [4, 204, -200]] ) # INSERT INTO TABLE dist (x, y) @@ -183,12 +183,12 @@ def test_materialized_column_disallow_insert_materialized_different_shards(): "INSERT INTO TABLE dist (x) VALUES (1), (2), (3), (4)", settings=settings ) node1.query("SYSTEM FLUSH DISTRIBUTED dist") - assert node1.query("SELECT x, y, z FROM local") == TSV( + assert node1.query("SELECT x, y, z FROM local ORDER BY x") == TSV( [[1, 201, -200], [3, 203, -200]] ) - assert node2.query("SELECT x, y, z FROM local") == TSV( + assert node2.query("SELECT x, y, z FROM local ORDER BY x") == TSV( [[2, 202, -200], [4, 204, -200]] ) - assert node1.query("SELECT x, y, z FROM dist") == TSV( - [[1, 201, -200], [3, 203, -200], [2, 202, -200], [4, 204, -200]] + assert node1.query("SELECT x, y, z FROM dist ORDER BY x") == TSV( + [[1, 201, -200], [2, 202, -200], [3, 203, -200], [4, 204, -200]] ) diff --git a/tests/integration/test_ssl_cert_authentication/test.py b/tests/integration/test_ssl_cert_authentication/test.py index 7c62ca0d8b6..b3570b6e281 100644 --- a/tests/integration/test_ssl_cert_authentication/test.py +++ b/tests/integration/test_ssl_cert_authentication/test.py @@ -87,7 +87,6 @@ config = """ def execute_query_native(node, query, user, cert_name): - config_path = f"{SCRIPT_DIR}/configs/client.xml" formatted = config.format( diff --git a/tests/integration/test_storage_kafka/kafka_pb2.py b/tests/integration/test_storage_kafka/kafka_pb2.py index 7de1363bbf1..3e47af6c1e0 100644 --- a/tests/integration/test_storage_kafka/kafka_pb2.py +++ b/tests/integration/test_storage_kafka/kafka_pb2.py @@ -21,7 +21,6 @@ _builder.BuildTopDescriptorsAndMessages( DESCRIPTOR, "clickhouse_path.format_schemas.kafka_pb2", globals() ) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None _KEYVALUEPAIR._serialized_start = 46 _KEYVALUEPAIR._serialized_end = 88 diff --git a/tests/integration/test_storage_kafka/message_with_repeated_pb2.py b/tests/integration/test_storage_kafka/message_with_repeated_pb2.py index 4d1a23c0b43..3715a9bea04 100644 --- a/tests/integration/test_storage_kafka/message_with_repeated_pb2.py +++ b/tests/integration/test_storage_kafka/message_with_repeated_pb2.py @@ -21,7 +21,6 @@ _builder.BuildTopDescriptorsAndMessages( DESCRIPTOR, "clickhouse_path.format_schemas.message_with_repeated_pb2", globals() ) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None DESCRIPTOR._serialized_options = b"H\001" _MESSAGE._serialized_start = 62 diff --git a/tests/integration/test_storage_kafka/social_pb2.py b/tests/integration/test_storage_kafka/social_pb2.py index 830ade81d33..f91a7bd0539 100644 --- a/tests/integration/test_storage_kafka/social_pb2.py +++ b/tests/integration/test_storage_kafka/social_pb2.py @@ -21,7 +21,6 @@ _builder.BuildTopDescriptorsAndMessages( DESCRIPTOR, "clickhouse_path.format_schemas.social_pb2", globals() ) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None _USER._serialized_start = 47 _USER._serialized_end = 90 diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 51952ac1eb7..3a4fa6c6bfe 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -121,7 +121,7 @@ def kafka_create_topic( def kafka_delete_topic(admin_client, topic, max_retries=50): result = admin_client.delete_topics([topic]) - for (topic, e) in result.topic_error_codes: + for topic, e in result.topic_error_codes: if e == 0: logging.debug(f"Topic {topic} deleted") else: @@ -917,9 +917,7 @@ def describe_consumer_group(kafka_cluster, name): member_info["client_id"] = client_id member_info["client_host"] = client_host member_topics_assignment = [] - for (topic, partitions) in MemberAssignment.decode( - member_assignment - ).assignment: + for topic, partitions in MemberAssignment.decode(member_assignment).assignment: member_topics_assignment.append({"topic": topic, "partitions": partitions}) member_info["assignment"] = member_topics_assignment res.append(member_info) @@ -1537,7 +1535,6 @@ def test_kafka_protobuf_no_delimiter(kafka_cluster): def test_kafka_materialized_view(kafka_cluster): - instance.query( """ DROP TABLE IF EXISTS test.view; @@ -2315,7 +2312,6 @@ def test_kafka_virtual_columns2(kafka_cluster): def test_kafka_produce_key_timestamp(kafka_cluster): - admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -2444,7 +2440,6 @@ def test_kafka_insert_avro(kafka_cluster): def test_kafka_produce_consume_avro(kafka_cluster): - admin_client = KafkaAdminClient( bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port) ) @@ -4031,7 +4026,6 @@ def test_kafka_predefined_configuration(kafka_cluster): # https://github.com/ClickHouse/ClickHouse/issues/26643 def test_issue26643(kafka_cluster): - # for backporting: # admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092") admin_client = KafkaAdminClient( @@ -4313,7 +4307,6 @@ def test_row_based_formats(kafka_cluster): "RowBinaryWithNamesAndTypes", "MsgPack", ]: - print(format_name) kafka_create_topic(admin_client, format_name) @@ -4438,7 +4431,6 @@ def test_block_based_formats_2(kafka_cluster): "ORC", "JSONCompactColumns", ]: - kafka_create_topic(admin_client, format_name) instance.query( diff --git a/tests/integration/test_storage_nats/nats_pb2.py b/tests/integration/test_storage_nats/nats_pb2.py index 4330ff57950..e9e5cb72363 100644 --- a/tests/integration/test_storage_nats/nats_pb2.py +++ b/tests/integration/test_storage_nats/nats_pb2.py @@ -31,7 +31,6 @@ ProtoKeyValue = _reflection.GeneratedProtocolMessageType( _sym_db.RegisterMessage(ProtoKeyValue) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None _PROTOKEYVALUE._serialized_start = 45 _PROTOKEYVALUE._serialized_end = 88 diff --git a/tests/integration/test_storage_postgresql_replica/test.py b/tests/integration/test_storage_postgresql_replica/test.py index 5df8b9029e6..8666d7ae58c 100644 --- a/tests/integration/test_storage_postgresql_replica/test.py +++ b/tests/integration/test_storage_postgresql_replica/test.py @@ -706,7 +706,6 @@ def test_abrupt_connection_loss_while_heavy_replication(started_cluster): def test_abrupt_server_restart_while_heavy_replication(started_cluster): - # FIXME (kssenii) temporary disabled if instance.is_built_with_sanitizer(): pytest.skip("Temporary disabled (FIXME)") diff --git a/tests/integration/test_storage_rabbitmq/rabbitmq_pb2.py b/tests/integration/test_storage_rabbitmq/rabbitmq_pb2.py index e017b4e66c2..a5845652eef 100644 --- a/tests/integration/test_storage_rabbitmq/rabbitmq_pb2.py +++ b/tests/integration/test_storage_rabbitmq/rabbitmq_pb2.py @@ -21,7 +21,6 @@ _builder.BuildTopDescriptorsAndMessages( DESCRIPTOR, "clickhouse_path.format_schemas.rabbitmq_pb2", globals() ) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None _KEYVALUEPROTO._serialized_start = 49 _KEYVALUEPROTO._serialized_end = 92 diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 2e54f21787a..53b6c4109ef 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -2864,7 +2864,6 @@ def test_rabbitmq_predefined_configuration(rabbitmq_cluster): def test_rabbitmq_msgpack(rabbitmq_cluster): - instance.query( """ drop table if exists rabbit_in; @@ -2908,7 +2907,6 @@ def test_rabbitmq_msgpack(rabbitmq_cluster): def test_rabbitmq_address(rabbitmq_cluster): - instance2.query( """ drop table if exists rabbit_in; @@ -3243,7 +3241,6 @@ def test_block_based_formats_2(rabbitmq_cluster): "ORC", "JSONCompactColumns", ]: - print(format_name) instance.query( diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 8b20727a7b5..4d493d9526b 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -18,6 +18,7 @@ MINIO_INTERNAL_PORT = 9001 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + # Creates S3 bucket for tests and allows anonymous read-write access to it. def prepare_s3_bucket(started_cluster): # Allows read-write access for bucket without authorization. diff --git a/tests/integration/test_storage_s3/test_invalid_env_credentials.py b/tests/integration/test_storage_s3/test_invalid_env_credentials.py index 2f5d9349904..aa6479a2ed3 100644 --- a/tests/integration/test_storage_s3/test_invalid_env_credentials.py +++ b/tests/integration/test_storage_s3/test_invalid_env_credentials.py @@ -11,6 +11,7 @@ MINIO_INTERNAL_PORT = 9001 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + # Creates S3 bucket for tests and allows anonymous read-write access to it. def prepare_s3_bucket(started_cluster): # Allows read-write access for bucket without authorization. diff --git a/tests/integration/test_system_merges/test.py b/tests/integration/test_system_merges/test.py index 0a469bd7bbd..ff303afe19e 100644 --- a/tests/integration/test_system_merges/test.py +++ b/tests/integration/test_system_merges/test.py @@ -171,7 +171,6 @@ def test_mutation_simple(started_cluster, replicated): starting_block = 0 if replicated else 1 try: - for node in nodes: node.query( f"create table {name} (a Int64) engine={engine} order by tuple()" diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index 99978cbf6dc..89824293320 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -1863,7 +1863,7 @@ def test_ttl_move_if_exists(started_cluster, name, dest_type): ) ) - for (node, policy) in zip( + for node, policy in zip( [node1, node2], ["only_jbod_1", "small_jbod_with_external"] ): node.query( diff --git a/tests/integration/test_zero_copy_fetch/test.py b/tests/integration/test_zero_copy_fetch/test.py index b71752528d3..9b9aa5e0da7 100644 --- a/tests/integration/test_zero_copy_fetch/test.py +++ b/tests/integration/test_zero_copy_fetch/test.py @@ -16,7 +16,6 @@ cluster = ClickHouseCluster(__file__) @pytest.fixture(scope="module") def started_cluster(): try: - cluster.add_instance( "node1", main_configs=["configs/storage_conf.xml"], diff --git a/tests/queries/0_stateless/00386_long_in_pk.python b/tests/queries/0_stateless/00386_long_in_pk.python index e33bb254c60..c7b04102dc5 100644 --- a/tests/queries/0_stateless/00386_long_in_pk.python +++ b/tests/queries/0_stateless/00386_long_in_pk.python @@ -1,57 +1,72 @@ #!/usr/bin/env python3 + def gen_queries(): - create_template = 'create table tab_00386 (a Int8, b String, c Tuple(Int8), d Tuple(Tuple(Int8)), e Tuple(Int8, String), f Tuple(Tuple(Int8, String))) engine = MergeTree order by ({}) partition by {}' - drop_query = 'drop table if exists tab_00386' - values = ('1', "'a'", 'tuple(1)', 'tuple(tuple(1))', "(1, 'a')", "tuple((1, 'a'))") + create_template = "create table tab_00386 (a Int8, b String, c Tuple(Int8), d Tuple(Tuple(Int8)), e Tuple(Int8, String), f Tuple(Tuple(Int8, String))) engine = MergeTree order by ({}) partition by {}" + drop_query = "drop table if exists tab_00386" + values = ("1", "'a'", "tuple(1)", "tuple(tuple(1))", "(1, 'a')", "tuple((1, 'a'))") insert_query = "insert into tab_00386 values (1, 'a', tuple(1), tuple(tuple(1)), (1, 'a'), tuple((1, 'a')))" - columns = tuple('a b c d'.split()) - order_by_columns = tuple('a b c'.split()) - partition_by_columns = tuple(' tuple() a'.split()) + columns = tuple("a b c d".split()) + order_by_columns = tuple("a b c".split()) + partition_by_columns = tuple(" tuple() a".split()) for partition in partition_by_columns: for key_mask in range(1, 1 << len(order_by_columns)): - key = ','.join(order_by_columns[i] for i in range(len(order_by_columns)) if (1 << i) & key_mask != 0) + key = ",".join( + order_by_columns[i] + for i in range(len(order_by_columns)) + if (1 << i) & key_mask != 0 + ) create_query = create_template.format(key, partition) for q in (drop_query, create_query, insert_query): yield q for column, value in zip(columns, values): - yield 'select {} in {} from tab_00386'.format(column, value) - yield 'select {} in tuple({}) from tab_00386'.format(column, value) - yield 'select {} in (select {} from tab_00386) from tab_00386'.format(column, column) + yield "select {} in {} from tab_00386".format(column, value) + yield "select {} in tuple({}) from tab_00386".format(column, value) + yield "select {} in (select {} from tab_00386) from tab_00386".format( + column, column + ) for i in range(len(columns)): for j in range(i, len(columns)): - yield 'select ({}, {}) in tuple({}, {}) from tab_00386'.format(columns[i], columns[j], values[i], values[j]) - yield 'select ({}, {}) in (select {}, {} from tab_00386) from tab_00386'.format(columns[i], columns[j], columns[i], columns[j]) - yield 'select ({}, {}) in (select ({}, {}) from tab_00386) from tab_00386'.format(columns[i], columns[j], columns[i], columns[j]) + yield "select ({}, {}) in tuple({}, {}) from tab_00386".format( + columns[i], columns[j], values[i], values[j] + ) + yield "select ({}, {}) in (select {}, {} from tab_00386) from tab_00386".format( + columns[i], columns[j], columns[i], columns[j] + ) + yield "select ({}, {}) in (select ({}, {}) from tab_00386) from tab_00386".format( + columns[i], columns[j], columns[i], columns[j] + ) yield "select e in (1, 'a') from tab_00386" yield "select f in tuple((1, 'a')) from tab_00386" yield "select f in tuple(tuple((1, 'a'))) from tab_00386" - yield 'select e in (select a, b from tab_00386) from tab_00386' - yield 'select e in (select (a, b) from tab_00386) from tab_00386' - yield 'select f in (select tuple((a, b)) from tab_00386) from tab_00386' - yield 'select tuple(f) in (select tuple(tuple((a, b))) from tab_00386) from tab_00386' + yield "select e in (select a, b from tab_00386) from tab_00386" + yield "select e in (select (a, b) from tab_00386) from tab_00386" + yield "select f in (select tuple((a, b)) from tab_00386) from tab_00386" + yield "select tuple(f) in (select tuple(tuple((a, b))) from tab_00386) from tab_00386" + import requests import os + def main(): - url = os.environ['CLICKHOUSE_URL'] + url = os.environ["CLICKHOUSE_URL"] for q in gen_queries(): resp = requests.post(url, data=q) - if resp.status_code != 200 or resp.text.strip() not in ('1', ''): - print('Query:', q) - print('Code:', resp.status_code) + if resp.status_code != 200 or resp.text.strip() not in ("1", ""): + print("Query:", q) + print("Code:", resp.status_code) print(resp.text) break - requests.post(url, data='drop table tab_00386') + requests.post(url, data="drop table tab_00386") + if __name__ == "__main__": main() - diff --git a/tests/queries/0_stateless/00411_long_accurate_number_comparison.python b/tests/queries/0_stateless/00411_long_accurate_number_comparison.python index 3c8a8f2ea25..183a2637d36 100644 --- a/tests/queries/0_stateless/00411_long_accurate_number_comparison.python +++ b/tests/queries/0_stateless/00411_long_accurate_number_comparison.python @@ -2,8 +2,20 @@ import os, itertools, urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse, sys + def get_ch_answer(query): - return urllib.request.urlopen(os.environ.get('CLICKHOUSE_URL', 'http://localhost:' + os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') ), data=query.encode()).read().decode() + return ( + urllib.request.urlopen( + os.environ.get( + "CLICKHOUSE_URL", + "http://localhost:" + os.environ.get("CLICKHOUSE_PORT_HTTP", "8123"), + ), + data=query.encode(), + ) + .read() + .decode() + ) + def check_answers(query, answer): ch_answer = get_ch_answer(query) @@ -13,36 +25,34 @@ def check_answers(query, answer): print("Fetched answer :", ch_answer) exit(-1) + def get_values(): values = [0, 1, -1] for bits in [8, 16, 32, 64]: values += [2**bits, 2**bits - 1] - values += [2**(bits-1) - 1, 2**(bits-1), 2**(bits-1) + 1] - values += [-2**(bits-1) - 1, -2**(bits-1), -2**(bits-1) + 1] + values += [2 ** (bits - 1) - 1, 2 ** (bits - 1), 2 ** (bits - 1) + 1] + values += [-(2 ** (bits - 1)) - 1, -(2 ** (bits - 1)), -(2 ** (bits - 1)) + 1] return values + def is_valid_integer(x): - return -2**63 <= x and x <= 2**64-1 + return -(2**63) <= x and x <= 2**64 - 1 -TEST_WITH_CASTING=True -GENERATE_TEST_FILES=False +TEST_WITH_CASTING = True +GENERATE_TEST_FILES = False TYPES = { - "UInt8" : { "bits" : 8, "sign" : False, "float" : False }, - "Int8" : { "bits" : 8, "sign" : True, "float" : False }, - - "UInt16": { "bits" : 16, "sign" : False, "float" : False }, - "Int16" : { "bits" : 16, "sign" : True, "float" : False }, - - "UInt32": { "bits" : 32, "sign" : False, "float" : False }, - "Int32" : { "bits" : 32, "sign" : True, "float" : False }, - - "UInt64": { "bits" : 64, "sign" : False, "float" : False }, - "Int64" : { "bits" : 64, "sign" : True, "float" : False } - - #"Float32" : { "bits" : 32, "sign" : True, "float" : True }, - #"Float64" : { "bits" : 64, "sign" : True, "float" : True } + "UInt8": {"bits": 8, "sign": False, "float": False}, + "Int8": {"bits": 8, "sign": True, "float": False}, + "UInt16": {"bits": 16, "sign": False, "float": False}, + "Int16": {"bits": 16, "sign": True, "float": False}, + "UInt32": {"bits": 32, "sign": False, "float": False}, + "Int32": {"bits": 32, "sign": True, "float": False}, + "UInt64": {"bits": 64, "sign": False, "float": False}, + "Int64": {"bits": 64, "sign": True, "float": False} + # "Float32" : { "bits" : 32, "sign" : True, "float" : True }, + # "Float64" : { "bits" : 64, "sign" : True, "float" : True } } @@ -55,14 +65,18 @@ def inside_range(value, type_name): return True if signed: - return -2**(bits-1) <= value and value <= 2**(bits-1) - 1 + return -(2 ** (bits - 1)) <= value and value <= 2 ** (bits - 1) - 1 else: return 0 <= value and value <= 2**bits - 1 def test_operators(v1, v2, v1_passed, v2_passed): - query_str = "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2},\t".format(v1=v1_passed, v2=v2_passed) - query_str += "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2} ".format(v1=v2_passed, v2=v1_passed) + query_str = "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2},\t".format( + v1=v1_passed, v2=v2_passed + ) + query_str += "{v1} = {v2}, {v1} != {v2}, {v1} < {v2}, {v1} <= {v2}, {v1} > {v2}, {v1} >= {v2} ".format( + v1=v2_passed, v2=v1_passed + ) answers = [v1 == v2, v1 != v2, v1 < v2, v1 <= v2, v1 > v2, v1 >= v2] answers += [v2 == v1, v2 != v1, v2 < v1, v2 <= v1, v2 > v1, v2 >= v1] @@ -74,6 +88,7 @@ def test_operators(v1, v2, v1_passed, v2_passed): VALUES = [x for x in get_values() if is_valid_integer(x)] + def test_pair(v1, v2): query = "SELECT {}, {}, ".format(v1, v2) answers = "{}\t{}\t".format(v1, v2) @@ -87,19 +102,58 @@ def test_pair(v1, v2): if inside_range(v1, t1): for t2 in TYPES.keys(): if inside_range(v2, t2): - q, a = test_operators(v1, v2, 'to{}({})'.format(t1, v1), 'to{}({})'.format(t2, v2)) - query += ', ' + q + q, a = test_operators( + v1, v2, "to{}({})".format(t1, v1), "to{}({})".format(t2, v2) + ) + query += ", " + q answers += "\t" + a check_answers(query, answers) return query, answers -VALUES_INT = [0, -1, 1, 2**64-1, 2**63, -2**63, 2**63-1, 2**51, 2**52, 2**53-1, 2**53, 2**53+1, 2**53+2, -2**53+1, -2**53, -2**53-1, -2**53-2, 2*52, -2**52] -VALUES_FLOAT = [float(x) for x in VALUES_INT + [-0.5, 0.5, -1.5, 1.5, 2**53, 2**51 - 0.5, 2**51 + 0.5, 2**60, -2**60, -2**63 - 10000, 2**63 + 10000]] +VALUES_INT = [ + 0, + -1, + 1, + 2**64 - 1, + 2**63, + -(2**63), + 2**63 - 1, + 2**51, + 2**52, + 2**53 - 1, + 2**53, + 2**53 + 1, + 2**53 + 2, + -(2**53) + 1, + -(2**53), + -(2**53) - 1, + -(2**53) - 2, + 2 * 52, + -(2**52), +] +VALUES_FLOAT = [ + float(x) + for x in VALUES_INT + + [ + -0.5, + 0.5, + -1.5, + 1.5, + 2**53, + 2**51 - 0.5, + 2**51 + 0.5, + 2**60, + -(2**60), + -(2**63) - 10000, + 2**63 + 10000, + ] +] + def test_float_pair(i, f): - f_str = ("%.9f" % f) + f_str = "%.9f" % f query = "SELECT '{}', '{}', ".format(i, f_str) answers = "{}\t{}\t".format(i, f_str) @@ -110,8 +164,8 @@ def test_float_pair(i, f): if TEST_WITH_CASTING: for t1 in TYPES.keys(): if inside_range(i, t1): - q, a = test_operators(i, f, 'to{}({})'.format(t1, i), f_str) - query += ', ' + q + q, a = test_operators(i, f, "to{}({})".format(t1, i), f_str) + query += ", " + q answers += "\t" + a check_answers(query, answers) @@ -120,23 +174,27 @@ def test_float_pair(i, f): def main(): if GENERATE_TEST_FILES: - base_name = '00411_accurate_number_comparison' - sql_file = open(base_name + '.sql', 'wt') - ref_file = open(base_name + '.reference', 'wt') + base_name = "00411_accurate_number_comparison" + sql_file = open(base_name + ".sql", "wt") + ref_file = open(base_name + ".reference", "wt") num_int_tests = len(list(itertools.combinations(VALUES, 2))) num_parts = 4 for part in range(0, num_parts): - if 'int' + str(part + 1) in sys.argv[1:]: - for (v1, v2) in itertools.islice(itertools.combinations(VALUES, 2), part * num_int_tests // num_parts, (part + 1) * num_int_tests // num_parts): + if "int" + str(part + 1) in sys.argv[1:]: + for v1, v2 in itertools.islice( + itertools.combinations(VALUES, 2), + part * num_int_tests // num_parts, + (part + 1) * num_int_tests // num_parts, + ): q, a = test_pair(v1, v2) if GENERATE_TEST_FILES: sql_file.write(q + ";\n") ref_file.write(a + "\n") - if 'float' in sys.argv[1:]: - for (i, f) in itertools.product(VALUES_INT, VALUES_FLOAT): + if "float" in sys.argv[1:]: + for i, f in itertools.product(VALUES_INT, VALUES_FLOAT): q, a = test_float_pair(i, f) if GENERATE_TEST_FILES: sql_file.write(q + ";\n") diff --git a/tests/queries/0_stateless/00646_url_engine.python b/tests/queries/0_stateless/00646_url_engine.python index 5f3b7546dd5..0a26f8039c2 100644 --- a/tests/queries/0_stateless/00646_url_engine.python +++ b/tests/queries/0_stateless/00646_url_engine.python @@ -12,6 +12,7 @@ import subprocess from io import StringIO from http.server import BaseHTTPRequestHandler, HTTPServer + def is_ipv6(host): try: socket.inet_aton(host) @@ -19,6 +20,7 @@ def is_ipv6(host): except: return True + def get_local_port(host, ipv6): if ipv6: family = socket.AF_INET6 @@ -29,8 +31,9 @@ def get_local_port(host, ipv6): fd.bind((host, 0)) return fd.getsockname()[1] -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') -CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123") ##################################################################################### # This test starts an HTTP server and serves data to clickhouse url-engine based table. @@ -39,27 +42,42 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') ##################################################################################### # IP-address of this host accessible from the outside world. Get the first one -HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0] +HTTP_SERVER_HOST = ( + subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0] +) IS_IPV6 = is_ipv6(HTTP_SERVER_HOST) HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6) # IP address and port of the HTTP server started from this script. HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) if IS_IPV6: - HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/" + ) else: - HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/" + ) + +CSV_DATA = os.path.join( + tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()) +) -CSV_DATA = os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) def get_ch_answer(query): host = CLICKHOUSE_HOST if IS_IPV6: - host = f'[{host}]' + host = f"[{host}]" - url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP)) + url = os.environ.get( + "CLICKHOUSE_URL", + "http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP), + ) return urllib.request.urlopen(url, data=query.encode()).read().decode() + def check_answers(query, answer): ch_answer = get_ch_answer(query) if ch_answer.strip() != answer.strip(): @@ -68,18 +86,19 @@ def check_answers(query, answer): print("Fetched answer :", ch_answer, file=sys.stderr) raise Exception("Fail on query") + class CSVHTTPServer(BaseHTTPRequestHandler): def _set_headers(self): self.send_response(200) - self.send_header('Content-type', 'text/csv') + self.send_header("Content-type", "text/csv") self.end_headers() def do_GET(self): self._set_headers() - with open(CSV_DATA, 'r') as fl: - reader = csv.reader(fl, delimiter=',') + with open(CSV_DATA, "r") as fl: + reader = csv.reader(fl, delimiter=",") for row in reader: - self.wfile.write((', '.join(row) + '\n').encode()) + self.wfile.write((", ".join(row) + "\n").encode()) return def do_HEAD(self): @@ -87,33 +106,33 @@ class CSVHTTPServer(BaseHTTPRequestHandler): return def read_chunk(self): - msg = '' + msg = "" while True: sym = self.rfile.read(1) - if sym == '': + if sym == "": break - msg += sym.decode('utf-8') - if msg.endswith('\r\n'): + msg += sym.decode("utf-8") + if msg.endswith("\r\n"): break length = int(msg[:-2], 16) if length == 0: - return '' + return "" content = self.rfile.read(length) - self.rfile.read(2) # read sep \r\n - return content.decode('utf-8') + self.rfile.read(2) # read sep \r\n + return content.decode("utf-8") def do_POST(self): - data = '' + data = "" while True: chunk = self.read_chunk() if not chunk: break data += chunk with StringIO(data) as fl: - reader = csv.reader(fl, delimiter=',') - with open(CSV_DATA, 'a') as d: + reader = csv.reader(fl, delimiter=",") + with open(CSV_DATA, "a") as d: for row in reader: - d.write(','.join(row) + '\n') + d.write(",".join(row) + "\n") self._set_headers() self.wfile.write(b"ok") @@ -124,6 +143,7 @@ class CSVHTTPServer(BaseHTTPRequestHandler): class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 + def start_server(): if IS_IPV6: httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer) @@ -133,49 +153,76 @@ def start_server(): t = threading.Thread(target=httpd.serve_forever) return t, httpd + # test section -def test_select(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests=[], answers=[], test_data=""): - with open(CSV_DATA, 'w') as f: # clear file - f.write('') + +def test_select( + table_name="", + schema="str String,numuint UInt32,numint Int32,double Float64", + requests=[], + answers=[], + test_data="", +): + with open(CSV_DATA, "w") as f: # clear file + f.write("") if test_data: - with open(CSV_DATA, 'w') as f: + with open(CSV_DATA, "w") as f: f.write(test_data + "\n") if table_name: get_ch_answer("drop table if exists {}".format(table_name)) - get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR)) + get_ch_answer( + "create table {} ({}) engine=URL('{}', 'CSV')".format( + table_name, schema, HTTP_SERVER_URL_STR + ) + ) for i in range(len(requests)): tbl = table_name if not tbl: - tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) check_answers(requests[i].format(tbl=tbl), answers[i]) if table_name: get_ch_answer("drop table if exists {}".format(table_name)) -def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests_insert=[], requests_select=[], answers=[]): - with open(CSV_DATA, 'w') as f: # flush test file - f.write('') +def test_insert( + table_name="", + schema="str String,numuint UInt32,numint Int32,double Float64", + requests_insert=[], + requests_select=[], + answers=[], +): + with open(CSV_DATA, "w") as f: # flush test file + f.write("") if table_name: get_ch_answer("drop table if exists {}".format(table_name)) - get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR)) + get_ch_answer( + "create table {} ({}) engine=URL('{}', 'CSV')".format( + table_name, schema, HTTP_SERVER_URL_STR + ) + ) for req in requests_insert: tbl = table_name if not tbl: - tbl = "table function url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "table function url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) get_ch_answer(req.format(tbl=tbl)) - for i in range(len(requests_select)): tbl = table_name if not tbl: - tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) check_answers(requests_select[i].format(tbl=tbl), answers[i]) if table_name: @@ -185,9 +232,11 @@ def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,do def main(): test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8" select_only_requests = { - "select str,numuint,numint,double from {tbl}" : test_data.replace(',', '\t'), - "select numuint, count(*) from {tbl} group by numuint" : "2\t2", - "select str,numuint,numint,double from {tbl} limit 1": test_data.split("\n")[0].replace(',', '\t'), + "select str,numuint,numint,double from {tbl}": test_data.replace(",", "\t"), + "select numuint, count(*) from {tbl} group by numuint": "2\t2", + "select str,numuint,numint,double from {tbl} limit 1": test_data.split("\n")[ + 0 + ].replace(",", "\t"), } insert_requests = [ @@ -196,21 +245,41 @@ def main(): ] select_requests = { - "select distinct numuint from {tbl} order by numuint": '\n'.join([str(i) for i in range(11)]), - "select count(*) from {tbl}": '12', - 'select double, count(*) from {tbl} group by double order by double': "7.7\t2\n9.9\t10" + "select distinct numuint from {tbl} order by numuint": "\n".join( + [str(i) for i in range(11)] + ), + "select count(*) from {tbl}": "12", + "select double, count(*) from {tbl} group by double order by double": "7.7\t2\n9.9\t10", } t, httpd = start_server() t.start() # test table with url engine - test_select(table_name="test_table_select", requests=list(select_only_requests.keys()), answers=list(select_only_requests.values()), test_data=test_data) + test_select( + table_name="test_table_select", + requests=list(select_only_requests.keys()), + answers=list(select_only_requests.values()), + test_data=test_data, + ) # test table function url - test_select(requests=list(select_only_requests.keys()), answers=list(select_only_requests.values()), test_data=test_data) - #test insert into table with url engine - test_insert(table_name="test_table_insert", requests_insert=insert_requests, requests_select=list(select_requests.keys()), answers=list(select_requests.values())) - #test insert into table function url - test_insert(requests_insert=insert_requests, requests_select=list(select_requests.keys()), answers=list(select_requests.values())) + test_select( + requests=list(select_only_requests.keys()), + answers=list(select_only_requests.values()), + test_data=test_data, + ) + # test insert into table with url engine + test_insert( + table_name="test_table_insert", + requests_insert=insert_requests, + requests_select=list(select_requests.keys()), + answers=list(select_requests.values()), + ) + # test insert into table function url + test_insert( + requests_insert=insert_requests, + requests_select=list(select_requests.keys()), + answers=list(select_requests.values()), + ) httpd.shutdown() t.join() diff --git a/tests/queries/0_stateless/00990_hasToken.python b/tests/queries/0_stateless/00990_hasToken.python index 7d3775adc9d..e9bc514474a 100644 --- a/tests/queries/0_stateless/00990_hasToken.python +++ b/tests/queries/0_stateless/00990_hasToken.python @@ -12,35 +12,46 @@ HAYSTACKS = [ NEEDLE = "needle" -HAY_RE = re.compile(r'\bhay\b', re.IGNORECASE) -NEEDLE_RE = re.compile(r'\bneedle\b', re.IGNORECASE) +HAY_RE = re.compile(r"\bhay\b", re.IGNORECASE) +NEEDLE_RE = re.compile(r"\bneedle\b", re.IGNORECASE) + def replace_follow_case(replacement): def func(match): g = match.group() - if g.islower(): return replacement.lower() - if g.istitle(): return replacement.title() - if g.isupper(): return replacement.upper() + if g.islower(): + return replacement.lower() + if g.istitle(): + return replacement.title() + if g.isupper(): + return replacement.upper() return replacement + return func + def replace_separators(query, new_sep): - SEP_RE = re.compile('\\s+') + SEP_RE = re.compile("\\s+") result = SEP_RE.sub(new_sep, query) return result -def enlarge_haystack(query, times, separator=''): - return HAY_RE.sub(replace_follow_case(('hay' + separator) * times), query) + +def enlarge_haystack(query, times, separator=""): + return HAY_RE.sub(replace_follow_case(("hay" + separator) * times), query) + def small_needle(query): - return NEEDLE_RE.sub(replace_follow_case('n'), query) + return NEEDLE_RE.sub(replace_follow_case("n"), query) + def remove_needle(query): - return NEEDLE_RE.sub('', query) + return NEEDLE_RE.sub("", query) + def replace_needle(query, new_needle): return NEEDLE_RE.sub(new_needle, query) + # with str.lower, str.uppert, str.title and such def transform_needle(query, string_transformation_func): def replace_with_transformation(match): @@ -49,19 +60,21 @@ def transform_needle(query, string_transformation_func): return NEEDLE_RE.sub(replace_with_transformation, query) -def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, table_query_template, const_query_template): + +def create_cases( + case_sensitive_func, + case_insensitive_func, + table_row_template, + table_query_template, + const_query_template, +): const_queries = [] table_rows = [] table_queries = set() def add_case(func, haystack, needle, match): match = int(match) - args = dict( - func = func, - haystack = haystack, - needle = needle, - match = match - ) + args = dict(func=func, haystack=haystack, needle=needle, match=match) const_queries.append(const_query_template.substitute(args)) table_queries.add(table_query_template.substitute(args)) table_rows.append(table_row_template.substitute(args)) @@ -69,14 +82,28 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, def add_case_sensitive(haystack, needle, match): add_case(case_sensitive_func, haystack, needle, match) if match: - add_case(case_sensitive_func, transform_needle(haystack, str.swapcase), transform_needle(needle, str.swapcase), match) + add_case( + case_sensitive_func, + transform_needle(haystack, str.swapcase), + transform_needle(needle, str.swapcase), + match, + ) def add_case_insensitive(haystack, needle, match): add_case(case_insensitive_func, haystack, needle, match) if match: - add_case(case_insensitive_func, transform_needle(haystack, str.swapcase), needle, match) - add_case(case_insensitive_func, haystack, transform_needle(needle, str.swapcase), match) - + add_case( + case_insensitive_func, + transform_needle(haystack, str.swapcase), + needle, + match, + ) + add_case( + case_insensitive_func, + haystack, + transform_needle(needle, str.swapcase), + match, + ) # Negative cases add_case_sensitive(remove_needle(HAYSTACKS[0]), NEEDLE, False) @@ -85,7 +112,7 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, for haystack in HAYSTACKS: add_case_sensitive(transform_needle(haystack, str.swapcase), NEEDLE, False) - sep = '' + sep = "" h = replace_separators(haystack, sep) add_case_sensitive(h, NEEDLE, False) @@ -102,8 +129,7 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, add_case_sensitive(haystack, NEEDLE, True) add_case_insensitive(haystack, NEEDLE, True) - - for sep in list(''' ,'''): + for sep in list(""" ,"""): h = replace_separators(haystack, sep) add_case_sensitive(h, NEEDLE, True) add_case_sensitive(small_needle(h), small_needle(NEEDLE), True) @@ -114,32 +140,43 @@ def create_cases(case_sensitive_func, case_insensitive_func, table_row_template, add_case_insensitive(enlarge_haystack(h, 200, sep), NEEDLE, True) # case insesitivity works only on ASCII strings - add_case_sensitive(replace_needle(h, 'иголка'), replace_needle(NEEDLE, 'иголка'), True) - add_case_sensitive(replace_needle(h, '指针'), replace_needle(NEEDLE, '指针'), True) + add_case_sensitive( + replace_needle(h, "иголка"), replace_needle(NEEDLE, "иголка"), True + ) + add_case_sensitive( + replace_needle(h, "指针"), replace_needle(NEEDLE, "指针"), True + ) - for sep in list('''~!@$%^&*()-=+|]}[{";:/?.><\t''') + [r'\\\\']: + for sep in list("""~!@$%^&*()-=+|]}[{";:/?.><\t""") + [r"\\\\"]: h = replace_separators(HAYSTACKS[0], sep) add_case(case_sensitive_func, h, NEEDLE, True) return table_rows, table_queries, const_queries -def main(): +def main(): def query(x): print(x) - CONST_QUERY = Template("""SELECT ${func}('${haystack}', '${needle}'), ' expecting ', ${match};""") - TABLE_QUERY = Template("""WITH '${needle}' as n + CONST_QUERY = Template( + """SELECT ${func}('${haystack}', '${needle}'), ' expecting ', ${match};""" + ) + TABLE_QUERY = Template( + """WITH '${needle}' as n SELECT haystack, needle, ${func}(haystack, n) as result FROM ht - WHERE func = '${func}' AND needle = n AND result != match;""") + WHERE func = '${func}' AND needle = n AND result != match;""" + ) TABLE_ROW = Template("""('${haystack}', '${needle}', ${match}, '${func}')""") - rows, table_queries, const_queries = create_cases('hasToken', 'hasTokenCaseInsensitive', TABLE_ROW, TABLE_QUERY, CONST_QUERY) + rows, table_queries, const_queries = create_cases( + "hasToken", "hasTokenCaseInsensitive", TABLE_ROW, TABLE_QUERY, CONST_QUERY + ) for q in const_queries: query(q) - query("""DROP TABLE IF EXISTS ht; + query( + """DROP TABLE IF EXISTS ht; CREATE TABLE IF NOT EXISTS ht ( @@ -150,11 +187,15 @@ def main(): ) ENGINE MergeTree() ORDER BY haystack; -INSERT INTO ht VALUES {values};""".format(values=", ".join(rows))) +INSERT INTO ht VALUES {values};""".format( + values=", ".join(rows) + ) + ) for q in sorted(table_queries): query(q) query("""DROP TABLE ht""") -if __name__ == '__main__': + +if __name__ == "__main__": main() diff --git a/tests/queries/0_stateless/00991_live_view_watch_event_live.python b/tests/queries/0_stateless/00991_live_view_watch_event_live.python index 901d388ec01..9b7a3300c15 100644 --- a/tests/queries/0_stateless/00991_live_view_watch_event_live.python +++ b/tests/queries/0_stateless/00991_live_view_watch_event_live.python @@ -8,28 +8,32 @@ import sys import signal -CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT') -CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL') -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') +CLICKHOUSE_CLIENT = os.environ.get("CLICKHOUSE_CLIENT") +CLICKHOUSE_CURL = os.environ.get("CLICKHOUSE_CURL") +CLICKHOUSE_URL = os.environ.get("CLICKHOUSE_URL") def send_query(query): cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] + cmd += ["--query", query] # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout + return subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ).stdout def send_query_in_process_group(query): cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] + cmd += ["--query", query] # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid) + return subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, preexec_fn=os.setsid + ) def read_lines_and_push_to_queue(pipe, queue): try: - for line in iter(pipe.readline, ''): + for line in iter(pipe.readline, ""): line = line.strip() print(line) sys.stdout.flush() @@ -41,41 +45,44 @@ def read_lines_and_push_to_queue(pipe, queue): def test(): - send_query('DROP TABLE IF EXISTS test.lv').read() - send_query('DROP TABLE IF EXISTS test.mt').read() - send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read() - send_query('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt').read() + send_query("DROP TABLE IF EXISTS test.lv").read() + send_query("DROP TABLE IF EXISTS test.mt").read() + send_query( + "CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()" + ).read() + send_query("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt").read() q = queue.Queue() - p = send_query_in_process_group('WATCH test.lv') + p = send_query_in_process_group("WATCH test.lv") thread = threading.Thread(target=read_lines_and_push_to_queue, args=(p.stdout, q)) thread.start() line = q.get() print(line) - assert (line == '0\t1') + assert line == "0\t1" - send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read() + send_query("INSERT INTO test.mt VALUES (1),(2),(3)").read() line = q.get() print(line) - assert (line == '6\t2') + assert line == "6\t2" - send_query('INSERT INTO test.mt VALUES (4),(5),(6)').read() + send_query("INSERT INTO test.mt VALUES (4),(5),(6)").read() line = q.get() print(line) - assert (line == '21\t3') + assert line == "21\t3" # Send Ctrl+C to client. os.killpg(os.getpgid(p.pid), signal.SIGINT) # This insert shouldn't affect lv. - send_query('INSERT INTO test.mt VALUES (7),(8),(9)').read() + send_query("INSERT INTO test.mt VALUES (7),(8),(9)").read() line = q.get() print(line) - assert (line is None) + assert line is None - send_query('DROP TABLE if exists test.lv').read() - send_query('DROP TABLE if exists test.lv').read() + send_query("DROP TABLE if exists test.lv").read() + send_query("DROP TABLE if exists test.lv").read() thread.join() + test() diff --git a/tests/queries/0_stateless/00991_live_view_watch_http.python b/tests/queries/0_stateless/00991_live_view_watch_http.python index d5a1e6e8ed9..72c07b27d82 100755 --- a/tests/queries/0_stateless/00991_live_view_watch_http.python +++ b/tests/queries/0_stateless/00991_live_view_watch_http.python @@ -7,26 +7,30 @@ import os import sys -CLICKHOUSE_CLIENT = os.environ.get('CLICKHOUSE_CLIENT') -CLICKHOUSE_CURL = os.environ.get('CLICKHOUSE_CURL') -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') +CLICKHOUSE_CLIENT = os.environ.get("CLICKHOUSE_CLIENT") +CLICKHOUSE_CURL = os.environ.get("CLICKHOUSE_CURL") +CLICKHOUSE_URL = os.environ.get("CLICKHOUSE_URL") def send_query(query): cmd = list(CLICKHOUSE_CLIENT.split()) - cmd += ['--query', query] + cmd += ["--query", query] # print(cmd) - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout + return subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ).stdout def send_http_query(query): - cmd = list(CLICKHOUSE_CURL.split()) # list(['curl', '-sSN', '--max-time', '10']) - cmd += ['-sSN', CLICKHOUSE_URL, '-d', query] - return subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT).stdout + cmd = list(CLICKHOUSE_CURL.split()) # list(['curl', '-sSN', '--max-time', '10']) + cmd += ["-sSN", CLICKHOUSE_URL, "-d", query] + return subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ).stdout def read_lines_and_push_to_queue(pipe, queue): - for line in iter(pipe.readline, ''): + for line in iter(pipe.readline, ""): line = line.strip() print(line) sys.stdout.flush() @@ -36,28 +40,31 @@ def read_lines_and_push_to_queue(pipe, queue): def test(): - send_query('DROP TABLE IF EXISTS test.lv').read() - send_query('DROP TABLE IF EXISTS test.mt').read() - send_query('CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()').read() - send_query('CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt').read() + send_query("DROP TABLE IF EXISTS test.lv").read() + send_query("DROP TABLE IF EXISTS test.mt").read() + send_query( + "CREATE TABLE test.mt (a Int32) Engine=MergeTree order by tuple()" + ).read() + send_query("CREATE LIVE VIEW test.lv AS SELECT sum(a) FROM test.mt").read() q = queue.Queue() - pipe = send_http_query('WATCH test.lv') + pipe = send_http_query("WATCH test.lv") thread = threading.Thread(target=read_lines_and_push_to_queue, args=(pipe, q)) thread.start() line = q.get() print(line) - assert (line == '0\t1') + assert line == "0\t1" - send_query('INSERT INTO test.mt VALUES (1),(2),(3)').read() + send_query("INSERT INTO test.mt VALUES (1),(2),(3)").read() line = q.get() print(line) - assert (line == '6\t2') + assert line == "6\t2" - send_query('DROP TABLE if exists test.lv').read() - send_query('DROP TABLE if exists test.lv').read() + send_query("DROP TABLE if exists test.lv").read() + send_query("DROP TABLE if exists test.lv").read() thread.join() + test() diff --git a/tests/queries/0_stateless/01558_ttest_scipy.python b/tests/queries/0_stateless/01558_ttest_scipy.python index 4d913d4292f..75e1c2701b2 100644 --- a/tests/queries/0_stateless/01558_ttest_scipy.python +++ b/tests/queries/0_stateless/01558_ttest_scipy.python @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 import os import sys from scipy import stats @@ -6,70 +6,86 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient + def test_and_check(name, a, b, t_stat, p_value, precision=1e-2): client = ClickHouseClient() client.query("DROP TABLE IF EXISTS ttest;") - client.query("CREATE TABLE ttest (left Float64, right UInt8) ENGINE = Memory;"); - client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(i, 0) for i in a]))) - client.query("INSERT INTO ttest VALUES {};".format(", ".join(['({},{})'.format(j, 1) for j in b]))) + client.query("CREATE TABLE ttest (left Float64, right UInt8) ENGINE = Memory;") + client.query( + "INSERT INTO ttest VALUES {};".format( + ", ".join(["({},{})".format(i, 0) for i in a]) + ) + ) + client.query( + "INSERT INTO ttest VALUES {};".format( + ", ".join(["({},{})".format(j, 1) for j in b]) + ) + ) real = client.query_return_df( - "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + - "roundBankers({}(left, right).2, 16) as p_value ".format(name) + - "FROM ttest FORMAT TabSeparatedWithNames;") - real_t_stat = real['t_stat'][0] - real_p_value = real['p_value'][0] - assert(abs(real_t_stat - np.float64(t_stat)) < precision), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) - assert(abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) + "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + + "roundBankers({}(left, right).2, 16) as p_value ".format(name) + + "FROM ttest FORMAT TabSeparatedWithNames;" + ) + real_t_stat = real["t_stat"][0] + real_p_value = real["p_value"][0] + assert ( + abs(real_t_stat - np.float64(t_stat)) < precision + ), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) + assert ( + abs(real_p_value - np.float64(p_value)) < precision + ), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) client.query("DROP TABLE IF EXISTS ttest;") def test_student(): - rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True) test_and_check("studentTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True) test_and_check("studentTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=2, scale=10,size=512), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=20,size=1024), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + rvs1 = np.round(stats.norm.rvs(loc=2, scale=10, size=512), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=20, size=1024), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True) test_and_check("studentTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=512), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = True) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=512), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=True) test_and_check("studentTTest", rvs1, rvs2, s, p) + def test_welch(): - rvs1 = np.round(stats.norm.rvs(loc=1, scale=15,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) + rvs1 = np.round(stats.norm.rvs(loc=1, scale=15, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False) test_and_check("welchTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=7,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=3,size=500), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=7, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=3, size=500), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False) test_and_check("welchTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=1,size=512), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=1, size=512), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False) test_and_check("welchTTest", rvs1, rvs2, s, p) - rvs1 = np.round(stats.norm.rvs(loc=5, scale=10,size=512), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=10,size=1024), 2) - s, p = stats.ttest_ind(rvs1, rvs2, equal_var = False) + rvs1 = np.round(stats.norm.rvs(loc=5, scale=10, size=512), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=10, size=1024), 2) + s, p = stats.ttest_ind(rvs1, rvs2, equal_var=False) test_and_check("welchTTest", rvs1, rvs2, s, p) + if __name__ == "__main__": test_student() test_welch() - print("Ok.") \ No newline at end of file + print("Ok.") diff --git a/tests/queries/0_stateless/01561_mann_whitney_scipy.python b/tests/queries/0_stateless/01561_mann_whitney_scipy.python index 7958e8bbaf1..4713120287d 100644 --- a/tests/queries/0_stateless/01561_mann_whitney_scipy.python +++ b/tests/queries/0_stateless/01561_mann_whitney_scipy.python @@ -6,7 +6,7 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient @@ -14,40 +14,51 @@ from pure_http_client import ClickHouseClient def test_and_check(name, a, b, t_stat, p_value): client = ClickHouseClient() client.query("DROP TABLE IF EXISTS mann_whitney;") - client.query("CREATE TABLE mann_whitney (left Float64, right UInt8) ENGINE = Memory;"); - client.query("INSERT INTO mann_whitney VALUES {};".format(", ".join(['({},{}), ({},{})'.format(i, 0, j, 1) for i,j in zip(a, b)]))) + client.query( + "CREATE TABLE mann_whitney (left Float64, right UInt8) ENGINE = Memory;" + ) + client.query( + "INSERT INTO mann_whitney VALUES {};".format( + ", ".join(["({},{}), ({},{})".format(i, 0, j, 1) for i, j in zip(a, b)]) + ) + ) real = client.query_return_df( - "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + - "roundBankers({}(left, right).2, 16) as p_value ".format(name) + - "FROM mann_whitney FORMAT TabSeparatedWithNames;") - real_t_stat = real['t_stat'][0] - real_p_value = real['p_value'][0] - assert(abs(real_t_stat - np.float64(t_stat) < 1e-2)), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) - assert(abs(real_p_value - np.float64(p_value)) < 1e-2), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) + "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + + "roundBankers({}(left, right).2, 16) as p_value ".format(name) + + "FROM mann_whitney FORMAT TabSeparatedWithNames;" + ) + real_t_stat = real["t_stat"][0] + real_p_value = real["p_value"][0] + assert abs( + real_t_stat - np.float64(t_stat) < 1e-2 + ), "clickhouse_t_stat {}, scipy_t_stat {}".format(real_t_stat, t_stat) + assert ( + abs(real_p_value - np.float64(p_value)) < 1e-2 + ), "clickhouse_p_value {}, scipy_p_value {}".format(real_p_value, p_value) client.query("DROP TABLE IF EXISTS mann_whitney;") def test_mann_whitney(): - rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 5) - rvs2 = np.round(stats.expon.rvs(scale=0.2,size=500), 5) - s, p = stats.mannwhitneyu(rvs1, rvs2, alternative='two-sided') + rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 5) + rvs2 = np.round(stats.expon.rvs(scale=0.2, size=500), 5) + s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="two-sided") test_and_check("mannWhitneyUTest", rvs1, rvs2, s, p) test_and_check("mannWhitneyUTest('two-sided')", rvs1, rvs2, s, p) equal = np.round(stats.cauchy.rvs(scale=5, size=500), 5) - s, p = stats.mannwhitneyu(equal, equal, alternative='two-sided') + s, p = stats.mannwhitneyu(equal, equal, alternative="two-sided") test_and_check("mannWhitneyUTest('two-sided')", equal, equal, s, p) - s, p = stats.mannwhitneyu(equal, equal, alternative='less', use_continuity=False) + s, p = stats.mannwhitneyu(equal, equal, alternative="less", use_continuity=False) test_and_check("mannWhitneyUTest('less', 0)", equal, equal, s, p) - - rvs1 = np.round(stats.cauchy.rvs(scale=10,size=65536), 5) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=65536), 5) - s, p = stats.mannwhitneyu(rvs1, rvs2, alternative='greater') + rvs1 = np.round(stats.cauchy.rvs(scale=10, size=65536), 5) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=65536), 5) + s, p = stats.mannwhitneyu(rvs1, rvs2, alternative="greater") test_and_check("mannWhitneyUTest('greater')", rvs1, rvs2, s, p) + if __name__ == "__main__": test_mann_whitney() - print("Ok.") \ No newline at end of file + print("Ok.") diff --git a/tests/queries/0_stateless/01626_cnf_fuzz_long.python b/tests/queries/0_stateless/01626_cnf_fuzz_long.python index 10c12d14182..de9e4a21dbb 100644 --- a/tests/queries/0_stateless/01626_cnf_fuzz_long.python +++ b/tests/queries/0_stateless/01626_cnf_fuzz_long.python @@ -4,14 +4,18 @@ from random import randint, choices import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient client = ClickHouseClient() N = 10 -create_query = "CREATE TABLE t_cnf_fuzz(" + ", ".join([f"c{i} UInt8" for i in range(N)]) + ") ENGINE = Memory" +create_query = ( + "CREATE TABLE t_cnf_fuzz(" + + ", ".join([f"c{i} UInt8" for i in range(N)]) + + ") ENGINE = Memory" +) client.query("DROP TABLE IF EXISTS t_cnf_fuzz") client.query(create_query) @@ -35,6 +39,7 @@ client.query(insert_query) MAX_CLAUSES = 10 MAX_ATOMS = 5 + def generate_dnf(): clauses = [] num_clauses = randint(1, MAX_CLAUSES) @@ -42,12 +47,17 @@ def generate_dnf(): num_atoms = randint(1, MAX_ATOMS) atom_ids = choices(range(N), k=num_atoms) negates = choices([0, 1], k=num_atoms) - atoms = [f"(NOT c{i})" if neg else f"c{i}" for (i, neg) in zip(atom_ids, negates)] + atoms = [ + f"(NOT c{i})" if neg else f"c{i}" for (i, neg) in zip(atom_ids, negates) + ] clauses.append("(" + " AND ".join(atoms) + ")") return " OR ".join(clauses) -select_query = "SELECT count() FROM t_cnf_fuzz WHERE {} SETTINGS convert_query_to_cnf = {}" + +select_query = ( + "SELECT count() FROM t_cnf_fuzz WHERE {} SETTINGS convert_query_to_cnf = {}" +) fail_report = """ Failed query: '{}'. diff --git a/tests/queries/0_stateless/01654_test_writer_block_sequence.python b/tests/queries/0_stateless/01654_test_writer_block_sequence.python index e80cc273076..bc4e3da9ed5 100644 --- a/tests/queries/0_stateless/01654_test_writer_block_sequence.python +++ b/tests/queries/0_stateless/01654_test_writer_block_sequence.python @@ -5,15 +5,20 @@ import random import string CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient + def get_random_string(length): - return ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(length)) + return "".join( + random.choice(string.ascii_uppercase + string.digits) for _ in range(length) + ) + client = ClickHouseClient() + def insert_block(table_name, block_granularity_rows, block_rows): global client block_data = [] @@ -25,9 +30,12 @@ def insert_block(table_name, block_granularity_rows, block_rows): values_row = ", ".join("(1, '" + row + "')" for row in block_data) client.query("INSERT INTO {} VALUES {}".format(table_name, values_row)) + try: client.query("DROP TABLE IF EXISTS t") - client.query("CREATE TABLE t (v UInt8, data String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0") + client.query( + "CREATE TABLE t (v UInt8, data String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0" + ) client.query("SYSTEM STOP MERGES t") @@ -53,6 +61,10 @@ try: client.query("SYSTEM START MERGES t") client.query("OPTIMIZE TABLE t FINAL") - print(client.query_return_df("SELECT COUNT() as C FROM t FORMAT TabSeparatedWithNames")['C'][0]) + print( + client.query_return_df( + "SELECT COUNT() as C FROM t FORMAT TabSeparatedWithNames" + )["C"][0] + ) finally: client.query("DROP TABLE IF EXISTS t") diff --git a/tests/queries/0_stateless/01854_HTTP_dict_decompression.python b/tests/queries/0_stateless/01854_HTTP_dict_decompression.python index 4f6878665aa..7d98a24e83e 100644 --- a/tests/queries/0_stateless/01854_HTTP_dict_decompression.python +++ b/tests/queries/0_stateless/01854_HTTP_dict_decompression.python @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from http.server import SimpleHTTPRequestHandler,HTTPServer +from http.server import SimpleHTTPRequestHandler, HTTPServer import socket import csv import sys @@ -21,6 +21,7 @@ def is_ipv6(host): except: return True + def get_local_port(host, ipv6): if ipv6: family = socket.AF_INET6 @@ -31,8 +32,9 @@ def get_local_port(host, ipv6): fd.bind((host, 0)) return fd.getsockname()[1] -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', 'localhost') -CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "localhost") +CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123") ##################################################################################### # This test starts an HTTP server and serves data to clickhouse url-engine based table. @@ -42,16 +44,24 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') ##################################################################################### # IP-address of this host accessible from the outside world. Get the first one -HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0] +HTTP_SERVER_HOST = ( + subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0] +) IS_IPV6 = is_ipv6(HTTP_SERVER_HOST) HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6) # IP address and port of the HTTP server started from this script. HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) if IS_IPV6: - HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/" + ) else: - HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/" + ) # Because we need to check the content of file.csv we can create this content and avoid reading csv CSV_DATA = "Hello, 1\nWorld, 2\nThis, 152\nis, 9283\ntesting, 2313213\ndata, 555\n" @@ -59,19 +69,24 @@ CSV_DATA = "Hello, 1\nWorld, 2\nThis, 152\nis, 9283\ntesting, 2313213\ndata, 555 # Choose compression method # (Will change during test, need to check standard data sending, to make sure that nothing broke) -COMPRESS_METHOD = 'none' -ADDING_ENDING = '' -ENDINGS = ['.gz', '.xz'] +COMPRESS_METHOD = "none" +ADDING_ENDING = "" +ENDINGS = [".gz", ".xz"] SEND_ENCODING = True + def get_ch_answer(query): host = CLICKHOUSE_HOST if IS_IPV6: - host = f'[{host}]' + host = f"[{host}]" - url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP)) + url = os.environ.get( + "CLICKHOUSE_URL", + "http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP), + ) return urllib.request.urlopen(url, data=query.encode()).read().decode() + def check_answers(query, answer): ch_answer = get_ch_answer(query) if ch_answer.strip() != answer.strip(): @@ -80,18 +95,19 @@ def check_answers(query, answer): print("Fetched answer :", ch_answer, file=sys.stderr) raise Exception("Fail on query") + # Server with head method which is useful for debuging by hands class HttpProcessor(SimpleHTTPRequestHandler): def _set_headers(self): self.send_response(200) if SEND_ENCODING: - self.send_header('Content-Encoding', COMPRESS_METHOD) - if COMPRESS_METHOD == 'none': - self.send_header('Content-Length', len(CSV_DATA.encode())) + self.send_header("Content-Encoding", COMPRESS_METHOD) + if COMPRESS_METHOD == "none": + self.send_header("Content-Length", len(CSV_DATA.encode())) else: self.compress_data() - self.send_header('Content-Length', len(self.data)) - self.send_header('Content-Type', 'text/csv') + self.send_header("Content-Length", len(self.data)) + self.send_header("Content-Type", "text/csv") self.end_headers() def do_HEAD(self): @@ -99,18 +115,17 @@ class HttpProcessor(SimpleHTTPRequestHandler): return def compress_data(self): - if COMPRESS_METHOD == 'gzip': + if COMPRESS_METHOD == "gzip": self.data = gzip.compress((CSV_DATA).encode()) - elif COMPRESS_METHOD == 'lzma': + elif COMPRESS_METHOD == "lzma": self.data = lzma.compress((CSV_DATA).encode()) else: - self.data = 'WRONG CONVERSATION'.encode() - + self.data = "WRONG CONVERSATION".encode() def do_GET(self): self._set_headers() - if COMPRESS_METHOD == 'none': + if COMPRESS_METHOD == "none": self.wfile.write(CSV_DATA.encode()) else: self.wfile.write(self.data) @@ -119,9 +134,11 @@ class HttpProcessor(SimpleHTTPRequestHandler): def log_message(self, format, *args): return + class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 + def start_server(requests_amount): if IS_IPV6: httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) @@ -135,52 +152,60 @@ def start_server(requests_amount): t = threading.Thread(target=real_func) return t + ##################################################################### # Testing area. ##################################################################### -def test_select(dict_name="", schema="word String, counter UInt32", requests=[], answers=[], test_data=""): + +def test_select( + dict_name="", + schema="word String, counter UInt32", + requests=[], + answers=[], + test_data="", +): global ADDING_ENDING global SEND_ENCODING global COMPRESS_METHOD for i in range(len(requests)): if i > 2: - ADDING_ENDING = ENDINGS[i-3] + ADDING_ENDING = ENDINGS[i - 3] SEND_ENCODING = False if dict_name: get_ch_answer("drop dictionary if exists {}".format(dict_name)) - get_ch_answer('''CREATE DICTIONARY {} ({}) + get_ch_answer( + """CREATE DICTIONARY {} ({}) PRIMARY KEY word SOURCE(HTTP(url '{}' format 'CSV')) LAYOUT(complex_key_hashed()) - LIFETIME(0)'''.format(dict_name, schema, HTTP_SERVER_URL_STR + '/test.csv' + ADDING_ENDING)) + LIFETIME(0)""".format( + dict_name, schema, HTTP_SERVER_URL_STR + "/test.csv" + ADDING_ENDING + ) + ) COMPRESS_METHOD = requests[i] print(i, COMPRESS_METHOD, ADDING_ENDING, SEND_ENCODING) check_answers("SELECT * FROM {} ORDER BY word".format(dict_name), answers[i]) + def main(): # first three for encoding, second three for url - insert_requests = [ - 'none', - 'gzip', - 'lzma', - 'gzip', - 'lzma' - ] + insert_requests = ["none", "gzip", "lzma", "gzip", "lzma"] # This answers got experemently in non compressed mode and they are correct - answers = ['''Hello 1\nThis 152\nWorld 2\ndata 555\nis 9283\ntesting 2313213'''] * 5 + answers = ["""Hello 1\nThis 152\nWorld 2\ndata 555\nis 9283\ntesting 2313213"""] * 5 t = start_server(len(insert_requests)) t.start() - test_select(dict_name="test_table_select", requests=insert_requests, answers=answers) + test_select( + dict_name="test_table_select", requests=insert_requests, answers=answers + ) t.join() print("PASSED") - if __name__ == "__main__": try: main() @@ -191,5 +216,3 @@ if __name__ == "__main__": sys.stderr.flush() os._exit(1) - - diff --git a/tests/queries/0_stateless/01913_exact_rows_before_limit_full.reference b/tests/queries/0_stateless/01913_exact_rows_before_limit_full.reference new file mode 100644 index 00000000000..a0f4560ca1c --- /dev/null +++ b/tests/queries/0_stateless/01913_exact_rows_before_limit_full.reference @@ -0,0 +1,162 @@ +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 10000 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 10 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [12] + ], + + "rows": 1, + + "rows_before_limit_at_least": 3 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 20 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 60 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 40 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 60 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 40 +} +{ + "meta": + [ + { + "name": "i", + "type": "Int32" + } + ], + + "data": + [ + [0] + ], + + "rows": 1, + + "rows_before_limit_at_least": 20 +} diff --git a/tests/queries/0_stateless/01913_exact_rows_before_limit_full.sql b/tests/queries/0_stateless/01913_exact_rows_before_limit_full.sql new file mode 100644 index 00000000000..84f97090169 --- /dev/null +++ b/tests/queries/0_stateless/01913_exact_rows_before_limit_full.sql @@ -0,0 +1,29 @@ +-- Tags: no-parallel, no-random-merge-tree-settings + +drop table if exists test; + +create table test (i int) engine MergeTree order by tuple(); + +insert into test select arrayJoin(range(10000)); + +set exact_rows_before_limit = 1, output_format_write_statistics = 0, max_block_size = 100; + +select * from test limit 1 FORMAT JSONCompact; + +select * from test where i < 10 group by i limit 1 FORMAT JSONCompact; + +select * from test group by i having i in (10, 11, 12) limit 1 FORMAT JSONCompact; + +select * from test where i < 20 order by i limit 1 FORMAT JSONCompact; + +set prefer_localhost_replica = 0; +select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 30 limit 1 FORMAT JSONCompact; +select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 20 order by i limit 1 FORMAT JSONCompact; + +set prefer_localhost_replica = 1; +select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 30 limit 1 FORMAT JSONCompact; +select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 20 order by i limit 1 FORMAT JSONCompact; + +select * from (select * from cluster(test_cluster_two_shards, currentDatabase(), test) where i < 10) limit 1 FORMAT JSONCompact; + +drop table if exists test; diff --git a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference index 7f73a8c6554..b9a7d17e955 100644 --- a/tests/queries/0_stateless/01951_distributed_push_down_limit.reference +++ b/tests/queries/0_stateless/01951_distributed_push_down_limit.reference @@ -13,8 +13,7 @@ Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) Sorting (Merge sorted streams after aggregation stage for ORDER BY) Union - Limit (preliminary LIMIT (with OFFSET)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY))))) - ReadFromStorage (SystemNumbers) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Convert VIEW subquery result to VIEW table structure + (Materialize constants after VIEW subquery + (Projection + Before ORDER BY))))) + ReadFromStorage (SystemNumbers) ReadFromRemote (Read from remote replica) diff --git a/tests/queries/0_stateless/02010_lc_native.python b/tests/queries/0_stateless/02010_lc_native.python index e6d6f9e1317..a197d32a3b9 100755 --- a/tests/queries/0_stateless/02010_lc_native.python +++ b/tests/queries/0_stateless/02010_lc_native.python @@ -5,13 +5,13 @@ import socket import os import uuid -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') -CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000')) -CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default') +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) +CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") + def writeVarUInt(x, ba): for _ in range(0, 9): - byte = x & 0x7F if x > 0x7F: byte |= 0x80 @@ -24,12 +24,12 @@ def writeVarUInt(x, ba): def writeStringBinary(s, ba): - b = bytes(s, 'utf-8') + b = bytes(s, "utf-8") writeVarUInt(len(s), ba) ba.extend(b) -def readStrict(s, size = 1): +def readStrict(s, size=1): res = bytearray() while size: cur = s.recv(size) @@ -48,18 +48,23 @@ def readUInt(s, size=1): val += res[i] << (i * 8) return val + def readUInt8(s): return readUInt(s) + def readUInt16(s): return readUInt(s, 2) + def readUInt32(s): return readUInt(s, 4) + def readUInt64(s): return readUInt(s, 8) + def readVarUInt(s): x = 0 for i in range(9): @@ -75,25 +80,25 @@ def readVarUInt(s): def readStringBinary(s): size = readVarUInt(s) s = readStrict(s, size) - return s.decode('utf-8') + return s.decode("utf-8") def sendHello(s): ba = bytearray() - writeVarUInt(0, ba) # Hello - writeStringBinary('simple native protocol', ba) + writeVarUInt(0, ba) # Hello + writeStringBinary("simple native protocol", ba) writeVarUInt(21, ba) writeVarUInt(9, ba) writeVarUInt(54449, ba) - writeStringBinary('default', ba) # database - writeStringBinary('default', ba) # user - writeStringBinary('', ba) # pwd + writeStringBinary("default", ba) # database + writeStringBinary("default", ba) # user + writeStringBinary("", ba) # pwd s.sendall(ba) def receiveHello(s): p_type = readVarUInt(s) - assert (p_type == 0) # Hello + assert p_type == 0 # Hello server_name = readStringBinary(s) # print("Server name: ", server_name) server_version_major = readVarUInt(s) @@ -111,78 +116,79 @@ def receiveHello(s): def serializeClientInfo(ba, query_id): - writeStringBinary('default', ba) # initial_user - writeStringBinary(query_id, ba) # initial_query_id - writeStringBinary('127.0.0.1:9000', ba) # initial_address - ba.extend([0] * 8) # initial_query_start_time_microseconds - ba.append(1) # TCP - writeStringBinary('os_user', ba) # os_user - writeStringBinary('client_hostname', ba) # client_hostname - writeStringBinary('client_name', ba) # client_name + writeStringBinary("default", ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary("127.0.0.1:9000", ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary("os_user", ba) # os_user + writeStringBinary("client_hostname", ba) # client_hostname + writeStringBinary("client_name", ba) # client_name writeVarUInt(21, ba) writeVarUInt(9, ba) writeVarUInt(54449, ba) - writeStringBinary('', ba) # quota_key - writeVarUInt(0, ba) # distributed_depth - writeVarUInt(1, ba) # client_version_patch - ba.append(0) # No telemetry + writeStringBinary("", ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry def sendQuery(s, query): ba = bytearray() query_id = uuid.uuid4().hex - writeVarUInt(1, ba) # query + writeVarUInt(1, ba) # query writeStringBinary(query_id, ba) - ba.append(1) # INITIAL_QUERY + ba.append(1) # INITIAL_QUERY # client info serializeClientInfo(ba, query_id) - writeStringBinary('', ba) # No settings - writeStringBinary('', ba) # No interserver secret - writeVarUInt(2, ba) # Stage - Complete - ba.append(0) # No compression - writeStringBinary(query, ba) # query, finally + writeStringBinary("", ba) # No settings + writeStringBinary("", ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally s.sendall(ba) def serializeBlockInfo(ba): - writeVarUInt(1, ba) # 1 - ba.append(0) # is_overflows - writeVarUInt(2, ba) # 2 - writeVarUInt(0, ba) # 0 - ba.extend([0] * 4) # bucket_num + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num def sendEmptyBlock(s): ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(0, ba) # rows - writeVarUInt(0, ba) # columns + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns s.sendall(ba) def assertPacket(packet, expected): - assert(packet == expected), packet + assert packet == expected, packet + def readHeader(s): packet_type = readVarUInt(s) - if packet_type == 2: # Exception + if packet_type == 2: # Exception raise RuntimeError(readException(s)) - assertPacket(packet_type, 1) # Data + assertPacket(packet_type, 1) # Data - readStringBinary(s) # external table name + readStringBinary(s) # external table name # BlockInfo - assertPacket(readVarUInt(s), 1) # 1 - assertPacket(readUInt8(s), 0) # is_overflows - assertPacket(readVarUInt(s), 2) # 2 - assertPacket(readUInt32(s), 4294967295) # bucket_num - assertPacket(readVarUInt(s), 0) # 0 - columns = readVarUInt(s) # rows - rows = readVarUInt(s) # columns + assertPacket(readVarUInt(s), 1) # 1 + assertPacket(readUInt8(s), 0) # is_overflows + assertPacket(readVarUInt(s), 2) # 2 + assertPacket(readUInt32(s), 4294967295) # bucket_num + assertPacket(readVarUInt(s), 0) # 0 + columns = readVarUInt(s) # rows + rows = readVarUInt(s) # columns print("Rows {} Columns {}".format(rows, columns)) for _ in range(columns): col_name = readStringBinary(s) @@ -194,9 +200,9 @@ def readException(s): code = readUInt32(s) name = readStringBinary(s) text = readStringBinary(s) - readStringBinary(s) # trace - assertPacket(readUInt8(s), 0) # has_nested - return "code {}: {}".format(code, text.replace('DB::Exception:', '')) + readStringBinary(s) # trace + assertPacket(readUInt8(s), 0) # has_nested + return "code {}: {}".format(code, text.replace("DB::Exception:", "")) def insertValidLowCardinalityRow(): @@ -205,7 +211,12 @@ def insertValidLowCardinalityRow(): s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery( + s, + "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( + CLICKHOUSE_DATABASE + ), + ) # external tables sendEmptyBlock(s) @@ -213,25 +224,27 @@ def insertValidLowCardinalityRow(): # Data ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(1, ba) # rows - writeVarUInt(1, ba) # columns - writeStringBinary('x', ba) - writeStringBinary('LowCardinality(String)', ba) - ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys - ba.extend([3, 2] + [0] * 6) # indexes type: UInt64 [3], with additional keys [2] - ba.extend([1] + [0] * 7) # num_keys in dict - writeStringBinary('hello', ba) # key - ba.extend([1] + [0] * 7) # num_indexes - ba.extend([0] * 8) # UInt64 index (0 for 'hello') + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary("x", ba) + writeStringBinary("LowCardinality(String)", ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend( + [3, 2] + [0] * 6 + ) # indexes type: UInt64 [3], with additional keys [2] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary("hello", ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 8) # UInt64 index (0 for 'hello') s.sendall(ba) # Fin block sendEmptyBlock(s) - assertPacket(readVarUInt(s), 5) # End of stream + assertPacket(readVarUInt(s), 5) # End of stream s.close() @@ -241,7 +254,12 @@ def insertLowCardinalityRowWithIndexOverflow(): s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery( + s, + "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( + CLICKHOUSE_DATABASE + ), + ) # external tables sendEmptyBlock(s) @@ -249,19 +267,21 @@ def insertLowCardinalityRowWithIndexOverflow(): # Data ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(1, ba) # rows - writeVarUInt(1, ba) # columns - writeStringBinary('x', ba) - writeStringBinary('LowCardinality(String)', ba) - ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys - ba.extend([3, 2] + [0] * 6) # indexes type: UInt64 [3], with additional keys [2] - ba.extend([1] + [0] * 7) # num_keys in dict - writeStringBinary('hello', ba) # key - ba.extend([1] + [0] * 7) # num_indexes - ba.extend([0] * 7 + [1]) # UInt64 index (overflow) + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary("x", ba) + writeStringBinary("LowCardinality(String)", ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend( + [3, 2] + [0] * 6 + ) # indexes type: UInt64 [3], with additional keys [2] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary("hello", ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 7 + [1]) # UInt64 index (overflow) s.sendall(ba) assertPacket(readVarUInt(s), 2) @@ -275,7 +295,12 @@ def insertLowCardinalityRowWithIncorrectDictType(): s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery( + s, + "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( + CLICKHOUSE_DATABASE + ), + ) # external tables sendEmptyBlock(s) @@ -283,32 +308,40 @@ def insertLowCardinalityRowWithIncorrectDictType(): # Data ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(1, ba) # rows - writeVarUInt(1, ba) # columns - writeStringBinary('x', ba) - writeStringBinary('LowCardinality(String)', ba) - ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys - ba.extend([3, 3] + [0] * 6) # indexes type: UInt64 [3], with global dict and add keys [1 + 2] - ba.extend([1] + [0] * 7) # num_keys in dict - writeStringBinary('hello', ba) # key - ba.extend([1] + [0] * 7) # num_indexes - ba.extend([0] * 8) # UInt64 index (overflow) + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary("x", ba) + writeStringBinary("LowCardinality(String)", ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend( + [3, 3] + [0] * 6 + ) # indexes type: UInt64 [3], with global dict and add keys [1 + 2] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary("hello", ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 8) # UInt64 index (overflow) s.sendall(ba) assertPacket(readVarUInt(s), 2) print(readException(s)) s.close() + def insertLowCardinalityRowWithIncorrectAdditionalKeys(): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.settimeout(30) s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) sendHello(s) receiveHello(s) - sendQuery(s, 'insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV'.format(CLICKHOUSE_DATABASE)) + sendQuery( + s, + "insert into {}.tab settings input_format_defaults_for_omitted_fields=0 format TSV".format( + CLICKHOUSE_DATABASE + ), + ) # external tables sendEmptyBlock(s) @@ -316,30 +349,34 @@ def insertLowCardinalityRowWithIncorrectAdditionalKeys(): # Data ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(1, ba) # rows - writeVarUInt(1, ba) # columns - writeStringBinary('x', ba) - writeStringBinary('LowCardinality(String)', ba) - ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys - ba.extend([3, 0] + [0] * 6) # indexes type: UInt64 [3], with NO additional keys [0] - ba.extend([1] + [0] * 7) # num_keys in dict - writeStringBinary('hello', ba) # key - ba.extend([1] + [0] * 7) # num_indexes - ba.extend([0] * 8) # UInt64 index (0 for 'hello') + writeVarUInt(1, ba) # rows + writeVarUInt(1, ba) # columns + writeStringBinary("x", ba) + writeStringBinary("LowCardinality(String)", ba) + ba.extend([1] + [0] * 7) # SharedDictionariesWithAdditionalKeys + ba.extend( + [3, 0] + [0] * 6 + ) # indexes type: UInt64 [3], with NO additional keys [0] + ba.extend([1] + [0] * 7) # num_keys in dict + writeStringBinary("hello", ba) # key + ba.extend([1] + [0] * 7) # num_indexes + ba.extend([0] * 8) # UInt64 index (0 for 'hello') s.sendall(ba) assertPacket(readVarUInt(s), 2) print(readException(s)) s.close() + def main(): insertValidLowCardinalityRow() insertLowCardinalityRowWithIndexOverflow() insertLowCardinalityRowWithIncorrectDictType() insertLowCardinalityRowWithIncorrectAdditionalKeys() + if __name__ == "__main__": main() diff --git a/tests/queries/0_stateless/02126_url_auth.python b/tests/queries/0_stateless/02126_url_auth.python index 57b16fb413e..9b2e68a017d 100644 --- a/tests/queries/0_stateless/02126_url_auth.python +++ b/tests/queries/0_stateless/02126_url_auth.python @@ -12,6 +12,7 @@ import subprocess from io import StringIO from http.server import BaseHTTPRequestHandler, HTTPServer + def is_ipv6(host): try: socket.inet_aton(host) @@ -19,6 +20,7 @@ def is_ipv6(host): except: return True + def get_local_port(host, ipv6): if ipv6: family = socket.AF_INET6 @@ -29,8 +31,9 @@ def get_local_port(host, ipv6): fd.bind((host, 0)) return fd.getsockname()[1] -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') -CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123") ##################################################################################### # This test starts an HTTP server and serves data to clickhouse url-engine based table. @@ -39,27 +42,42 @@ CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') ##################################################################################### # IP-address of this host accessible from the outside world. Get the first one -HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0] +HTTP_SERVER_HOST = ( + subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0] +) IS_IPV6 = is_ipv6(HTTP_SERVER_HOST) HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6) # IP address and port of the HTTP server started from this script. HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) if IS_IPV6: - HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/" + ) else: - HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/" + ) + +CSV_DATA = os.path.join( + tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()) +) -CSV_DATA = os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())) def get_ch_answer(query): host = CLICKHOUSE_HOST if IS_IPV6: - host = f'[{host}]' + host = f"[{host}]" - url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP)) + url = os.environ.get( + "CLICKHOUSE_URL", + "http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP), + ) return urllib.request.urlopen(url, data=query.encode()).read().decode() + def check_answers(query, answer): ch_answer = get_ch_answer(query) if ch_answer.strip() != answer.strip(): @@ -68,15 +86,16 @@ def check_answers(query, answer): print("Fetched answer :", ch_answer, file=sys.stderr) raise Exception("Fail on query") + class CSVHTTPServer(BaseHTTPRequestHandler): def _set_headers(self): self.send_response(200) - self.send_header('Content-type', 'text/csv') + self.send_header("Content-type", "text/csv") self.end_headers() def do_GET(self): self._set_headers() - self.wfile.write(('hello, world').encode()) + self.wfile.write(("hello, world").encode()) # with open(CSV_DATA, 'r') as fl: # reader = csv.reader(fl, delimiter=',') # for row in reader: @@ -84,33 +103,33 @@ class CSVHTTPServer(BaseHTTPRequestHandler): return def read_chunk(self): - msg = '' + msg = "" while True: sym = self.rfile.read(1) - if sym == '': + if sym == "": break - msg += sym.decode('utf-8') - if msg.endswith('\r\n'): + msg += sym.decode("utf-8") + if msg.endswith("\r\n"): break length = int(msg[:-2], 16) if length == 0: - return '' + return "" content = self.rfile.read(length) - self.rfile.read(2) # read sep \r\n - return content.decode('utf-8') + self.rfile.read(2) # read sep \r\n + return content.decode("utf-8") def do_POST(self): - data = '' + data = "" while True: chunk = self.read_chunk() if not chunk: break data += chunk with StringIO(data) as fl: - reader = csv.reader(fl, delimiter=',') - with open(CSV_DATA, 'a') as d: + reader = csv.reader(fl, delimiter=",") + with open(CSV_DATA, "a") as d: for row in reader: - d.write(','.join(row) + '\n') + d.write(",".join(row) + "\n") self._set_headers() self.wfile.write(b"ok") @@ -121,6 +140,7 @@ class CSVHTTPServer(BaseHTTPRequestHandler): class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 + def start_server(): if IS_IPV6: httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer) @@ -130,57 +150,87 @@ def start_server(): t = threading.Thread(target=httpd.serve_forever) return t, httpd + # test section -def test_select(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests=[], answers=[], test_data=""): - with open(CSV_DATA, 'w') as f: # clear file - f.write('') + +def test_select( + table_name="", + schema="str String,numuint UInt32,numint Int32,double Float64", + requests=[], + answers=[], + test_data="", +): + with open(CSV_DATA, "w") as f: # clear file + f.write("") if test_data: - with open(CSV_DATA, 'w') as f: + with open(CSV_DATA, "w") as f: f.write(test_data + "\n") if table_name: get_ch_answer("drop table if exists {}".format(table_name)) - get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR)) + get_ch_answer( + "create table {} ({}) engine=URL('{}', 'CSV')".format( + table_name, schema, HTTP_SERVER_URL_STR + ) + ) for i in range(len(requests)): tbl = table_name if not tbl: - tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) check_answers(requests[i].format(tbl=tbl), answers[i]) if table_name: get_ch_answer("drop table if exists {}".format(table_name)) -def test_insert(table_name="", schema="str String,numuint UInt32,numint Int32,double Float64", requests_insert=[], requests_select=[], answers=[]): - with open(CSV_DATA, 'w') as f: # flush test file - f.write('') + +def test_insert( + table_name="", + schema="str String,numuint UInt32,numint Int32,double Float64", + requests_insert=[], + requests_select=[], + answers=[], +): + with open(CSV_DATA, "w") as f: # flush test file + f.write("") if table_name: get_ch_answer("drop table if exists {}".format(table_name)) - get_ch_answer("create table {} ({}) engine=URL('{}', 'CSV')".format(table_name, schema, HTTP_SERVER_URL_STR)) + get_ch_answer( + "create table {} ({}) engine=URL('{}', 'CSV')".format( + table_name, schema, HTTP_SERVER_URL_STR + ) + ) for req in requests_insert: tbl = table_name if not tbl: - tbl = "table function url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "table function url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) get_ch_answer(req.format(tbl=tbl)) - for i in range(len(requests_select)): tbl = table_name if not tbl: - tbl = "url('{addr}', 'CSV', '{schema}')".format(addr=HTTP_SERVER_URL_STR, schema=schema) + tbl = "url('{addr}', 'CSV', '{schema}')".format( + addr=HTTP_SERVER_URL_STR, schema=schema + ) check_answers(requests_select[i].format(tbl=tbl), answers[i]) if table_name: get_ch_answer("drop table if exists {}".format(table_name)) + def test_select_url_engine(requests=[], answers=[], test_data=""): for i in range(len(requests)): check_answers(requests[i], answers[i]) + def main(): test_data = "Hello,2,-2,7.7\nWorld,2,-5,8.8" """ @@ -203,19 +253,29 @@ def main(): """ if IS_IPV6: - query = "select * from url('http://guest:guest@" + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/', 'RawBLOB', 'a String')" + query = ( + "select * from url('http://guest:guest@" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/', 'RawBLOB', 'a String')" + ) else: - query = "select * from url('http://guest:guest@" + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/', 'RawBLOB', 'a String')" - - + query = ( + "select * from url('http://guest:guest@" + + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + + "/', 'RawBLOB', 'a String')" + ) select_requests_url_auth = { - query : 'hello, world', + query: "hello, world", } t, httpd = start_server() t.start() - test_select(requests=list(select_requests_url_auth.keys()), answers=list(select_requests_url_auth.values()), test_data=test_data) + test_select( + requests=list(select_requests_url_auth.keys()), + answers=list(select_requests_url_auth.values()), + test_data=test_data, + ) httpd.shutdown() t.join() print("PASSED") diff --git a/tests/queries/0_stateless/02158_proportions_ztest_cmp.python b/tests/queries/0_stateless/02158_proportions_ztest_cmp.python index d622004db28..0555f8c36ec 100644 --- a/tests/queries/0_stateless/02158_proportions_ztest_cmp.python +++ b/tests/queries/0_stateless/02158_proportions_ztest_cmp.python @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 import os import sys from math import sqrt, nan @@ -8,7 +8,7 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient @@ -25,7 +25,7 @@ def twosample_proportion_ztest(s1, s2, t1, t2, alpha): return nan, nan, nan, nan z_stat = (p1 - p2) / se - one_side = 1 - stats.norm.cdf(abs(z_stat)) + one_side = 1 - stats.norm.cdf(abs(z_stat)) p_value = one_side * 2 z = stats.norm.ppf(1 - 0.5 * alpha) @@ -38,71 +38,171 @@ def twosample_proportion_ztest(s1, s2, t1, t2, alpha): def test_and_check(name, z_stat, p_value, ci_lower, ci_upper, precision=1e-2): client = ClickHouseClient() real = client.query_return_df( - "SELECT roundBankers({}.1, 16) as z_stat, ".format(name) + - "roundBankers({}.2, 16) as p_value, ".format(name) + - "roundBankers({}.3, 16) as ci_lower, ".format(name) + - "roundBankers({}.4, 16) as ci_upper ".format(name) + - "FORMAT TabSeparatedWithNames;") - real_z_stat = real['z_stat'][0] - real_p_value = real['p_value'][0] - real_ci_lower = real['ci_lower'][0] - real_ci_upper = real['ci_upper'][0] - assert((np.isnan(real_z_stat) and np.isnan(z_stat)) or abs(real_z_stat - np.float64(z_stat)) < precision), "clickhouse_z_stat {}, py_z_stat {}".format(real_z_stat, z_stat) - assert((np.isnan(real_p_value) and np.isnan(p_value)) or abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value) - assert((np.isnan(real_ci_lower) and np.isnan(ci_lower)) or abs(real_ci_lower - np.float64(ci_lower)) < precision), "clickhouse_ci_lower {}, py_ci_lower {}".format(real_ci_lower, ci_lower) - assert((np.isnan(real_ci_upper) and np.isnan(ci_upper)) or abs(real_ci_upper - np.float64(ci_upper)) < precision), "clickhouse_ci_upper {}, py_ci_upper {}".format(real_ci_upper, ci_upper) + "SELECT roundBankers({}.1, 16) as z_stat, ".format(name) + + "roundBankers({}.2, 16) as p_value, ".format(name) + + "roundBankers({}.3, 16) as ci_lower, ".format(name) + + "roundBankers({}.4, 16) as ci_upper ".format(name) + + "FORMAT TabSeparatedWithNames;" + ) + real_z_stat = real["z_stat"][0] + real_p_value = real["p_value"][0] + real_ci_lower = real["ci_lower"][0] + real_ci_upper = real["ci_upper"][0] + assert (np.isnan(real_z_stat) and np.isnan(z_stat)) or abs( + real_z_stat - np.float64(z_stat) + ) < precision, "clickhouse_z_stat {}, py_z_stat {}".format(real_z_stat, z_stat) + assert (np.isnan(real_p_value) and np.isnan(p_value)) or abs( + real_p_value - np.float64(p_value) + ) < precision, "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value) + assert (np.isnan(real_ci_lower) and np.isnan(ci_lower)) or abs( + real_ci_lower - np.float64(ci_lower) + ) < precision, "clickhouse_ci_lower {}, py_ci_lower {}".format( + real_ci_lower, ci_lower + ) + assert (np.isnan(real_ci_upper) and np.isnan(ci_upper)) or abs( + real_ci_upper - np.float64(ci_upper) + ) < precision, "clickhouse_ci_upper {}, py_ci_upper {}".format( + real_ci_upper, ci_upper + ) def test_mean_ztest(): counts = [0, 0] nobs = [0, 0] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(10, 10, 10, 10, 0.05) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + 10, 10, 10, 10, 0.05 + ) counts = [10, 10] nobs = [10, 10] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(10, 10, 10, 10, 0.05) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + 10, 10, 10, 10, 0.05 + ) counts = [16, 16] nobs = [16, 18] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) counts = [10, 20] nobs = [30, 40] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) counts = [20, 10] nobs = [40, 30] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) - counts = [randrange(10,20), randrange(10,20)] - nobs = [randrange(counts[0] + 1, counts[0] * 2), randrange(counts[1], counts[1] * 2)] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + counts = [randrange(10, 20), randrange(10, 20)] + nobs = [ + randrange(counts[0] + 1, counts[0] * 2), + randrange(counts[1], counts[1] * 2), + ] + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) - counts = [randrange(1,100), randrange(1,200)] + counts = [randrange(1, 100), randrange(1, 200)] nobs = [randrange(counts[0], counts[0] * 2), randrange(counts[1], counts[1] * 3)] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) - counts = [randrange(1,200), randrange(1,100)] + counts = [randrange(1, 200), randrange(1, 100)] nobs = [randrange(counts[0], counts[0] * 3), randrange(counts[1], counts[1] * 2)] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) - counts = [randrange(1,1000), randrange(1,1000)] + counts = [randrange(1, 1000), randrange(1, 1000)] nobs = [randrange(counts[0], counts[0] * 2), randrange(counts[1], counts[1] * 2)] - z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest(counts[0], counts[1], nobs[0], nobs[1], 0.05) - test_and_check("proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" % (counts[0], counts[1], nobs[0], nobs[1]), z_stat, p_value, ci_lower, ci_upper) + z_stat, p_value, ci_lower, ci_upper = twosample_proportion_ztest( + counts[0], counts[1], nobs[0], nobs[1], 0.05 + ) + test_and_check( + "proportionsZTest(%d, %d, %d, %d, 0.95, 'unpooled')" + % (counts[0], counts[1], nobs[0], nobs[1]), + z_stat, + p_value, + ci_lower, + ci_upper, + ) if __name__ == "__main__": test_mean_ztest() print("Ok.") - diff --git a/tests/queries/0_stateless/02158_ztest_cmp.python b/tests/queries/0_stateless/02158_ztest_cmp.python index 8fc22d78e74..9591a150337 100644 --- a/tests/queries/0_stateless/02158_ztest_cmp.python +++ b/tests/queries/0_stateless/02158_ztest_cmp.python @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python3 import os import sys from statistics import variance @@ -7,7 +7,7 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient @@ -30,46 +30,95 @@ def twosample_mean_ztest(rvs1, rvs2, alpha=0.05): def test_and_check(name, a, b, t_stat, p_value, ci_low, ci_high, precision=1e-2): client = ClickHouseClient() client.query("DROP TABLE IF EXISTS ztest;") - client.query("CREATE TABLE ztest (left Float64, right UInt8) ENGINE = Memory;"); - client.query("INSERT INTO ztest VALUES {};".format(", ".join(['({},{})'.format(i, 0) for i in a]))) - client.query("INSERT INTO ztest VALUES {};".format(", ".join(['({},{})'.format(j, 1) for j in b]))) + client.query("CREATE TABLE ztest (left Float64, right UInt8) ENGINE = Memory;") + client.query( + "INSERT INTO ztest VALUES {};".format( + ", ".join(["({},{})".format(i, 0) for i in a]) + ) + ) + client.query( + "INSERT INTO ztest VALUES {};".format( + ", ".join(["({},{})".format(j, 1) for j in b]) + ) + ) real = client.query_return_df( - "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + - "roundBankers({}(left, right).2, 16) as p_value, ".format(name) + - "roundBankers({}(left, right).3, 16) as ci_low, ".format(name) + - "roundBankers({}(left, right).4, 16) as ci_high ".format(name) + - "FROM ztest FORMAT TabSeparatedWithNames;") - real_t_stat = real['t_stat'][0] - real_p_value = real['p_value'][0] - real_ci_low = real['ci_low'][0] - real_ci_high = real['ci_high'][0] - assert(abs(real_t_stat - np.float64(t_stat)) < precision), "clickhouse_t_stat {}, py_t_stat {}".format(real_t_stat, t_stat) - assert(abs(real_p_value - np.float64(p_value)) < precision), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value) - assert(abs(real_ci_low - np.float64(ci_low)) < precision), "clickhouse_ci_low {}, py_ci_low {}".format(real_ci_low, ci_low) - assert(abs(real_ci_high - np.float64(ci_high)) < precision), "clickhouse_ci_high {}, py_ci_high {}".format(real_ci_high, ci_high) + "SELECT roundBankers({}(left, right).1, 16) as t_stat, ".format(name) + + "roundBankers({}(left, right).2, 16) as p_value, ".format(name) + + "roundBankers({}(left, right).3, 16) as ci_low, ".format(name) + + "roundBankers({}(left, right).4, 16) as ci_high ".format(name) + + "FROM ztest FORMAT TabSeparatedWithNames;" + ) + real_t_stat = real["t_stat"][0] + real_p_value = real["p_value"][0] + real_ci_low = real["ci_low"][0] + real_ci_high = real["ci_high"][0] + assert ( + abs(real_t_stat - np.float64(t_stat)) < precision + ), "clickhouse_t_stat {}, py_t_stat {}".format(real_t_stat, t_stat) + assert ( + abs(real_p_value - np.float64(p_value)) < precision + ), "clickhouse_p_value {}, py_p_value {}".format(real_p_value, p_value) + assert ( + abs(real_ci_low - np.float64(ci_low)) < precision + ), "clickhouse_ci_low {}, py_ci_low {}".format(real_ci_low, ci_low) + assert ( + abs(real_ci_high - np.float64(ci_high)) < precision + ), "clickhouse_ci_high {}, py_ci_high {}".format(real_ci_high, ci_high) client.query("DROP TABLE IF EXISTS ztest;") def test_mean_ztest(): - rvs1 = np.round(stats.norm.rvs(loc=1, scale=5,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=10, scale=5,size=500), 2) + rvs1 = np.round(stats.norm.rvs(loc=1, scale=5, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=10, scale=5, size=500), 2) s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2) - test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch) + test_and_check( + "meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), + rvs1, + rvs2, + s, + p, + cl, + ch, + ) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=5,size=500), 2) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=5, size=500), 2) s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2) - test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch) + test_and_check( + "meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), + rvs1, + rvs2, + s, + p, + cl, + ch, + ) - rvs1 = np.round(stats.norm.rvs(loc=2, scale=10,size=512), 2) - rvs2 = np.round(stats.norm.rvs(loc=5, scale=20,size=1024), 2) + rvs1 = np.round(stats.norm.rvs(loc=2, scale=10, size=512), 2) + rvs2 = np.round(stats.norm.rvs(loc=5, scale=20, size=1024), 2) s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2) - test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch) + test_and_check( + "meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), + rvs1, + rvs2, + s, + p, + cl, + ch, + ) - rvs1 = np.round(stats.norm.rvs(loc=0, scale=10,size=1024), 2) - rvs2 = np.round(stats.norm.rvs(loc=0, scale=10,size=512), 2) + rvs1 = np.round(stats.norm.rvs(loc=0, scale=10, size=1024), 2) + rvs2 = np.round(stats.norm.rvs(loc=0, scale=10, size=512), 2) s, p, cl, ch = twosample_mean_ztest(rvs1, rvs2) - test_and_check("meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), rvs1, rvs2, s, p, cl, ch) + test_and_check( + "meanZTest(%f, %f, 0.95)" % (variance(rvs1), variance(rvs2)), + rvs1, + rvs2, + s, + p, + cl, + ch, + ) if __name__ == "__main__": diff --git a/tests/queries/0_stateless/02187_async_inserts_all_formats.python b/tests/queries/0_stateless/02187_async_inserts_all_formats.python index 65a323ef9db..fa555c78f8b 100644 --- a/tests/queries/0_stateless/02187_async_inserts_all_formats.python +++ b/tests/queries/0_stateless/02187_async_inserts_all_formats.python @@ -3,47 +3,71 @@ import os import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) -CLICKHOUSE_URL = os.environ.get('CLICKHOUSE_URL') -CLICKHOUSE_TMP = os.environ.get('CLICKHOUSE_TMP') +CLICKHOUSE_URL = os.environ.get("CLICKHOUSE_URL") +CLICKHOUSE_TMP = os.environ.get("CLICKHOUSE_TMP") from pure_http_client import ClickHouseClient client = ClickHouseClient() + def run_test(data_format, gen_data_template, settings): print(data_format) client.query("TRUNCATE TABLE t_async_insert") expected = client.query(gen_data_template.format("TSV")).strip() - data = client.query(gen_data_template.format(data_format), settings=settings,binary_result=True) + data = client.query( + gen_data_template.format(data_format), settings=settings, binary_result=True + ) insert_query = "INSERT INTO t_async_insert FORMAT {}".format(data_format) client.query_with_data(insert_query, data, settings=settings) result = client.query("SELECT * FROM t_async_insert FORMAT TSV").strip() if result != expected: - print("Failed for format {}.\nExpected:\n{}\nGot:\n{}\n".format(data_format, expected, result)) + print( + "Failed for format {}.\nExpected:\n{}\nGot:\n{}\n".format( + data_format, expected, result + ) + ) exit(1) -formats = client.query("SELECT name FROM system.formats WHERE is_input AND is_output \ - AND name NOT IN ('CapnProto', 'RawBLOB', 'Template', 'ProtobufSingle', 'LineAsString', 'Protobuf', 'ProtobufList') ORDER BY name").strip().split('\n') + +formats = ( + client.query( + "SELECT name FROM system.formats WHERE is_input AND is_output \ + AND name NOT IN ('CapnProto', 'RawBLOB', 'Template', 'ProtobufSingle', 'LineAsString', 'Protobuf', 'ProtobufList') ORDER BY name" + ) + .strip() + .split("\n") +) # Generic formats client.query("DROP TABLE IF EXISTS t_async_insert") -client.query("CREATE TABLE t_async_insert (id UInt64, s String, arr Array(UInt64)) ENGINE = Memory") +client.query( + "CREATE TABLE t_async_insert (id UInt64, s String, arr Array(UInt64)) ENGINE = Memory" +) gen_data_query = "SELECT number AS id, toString(number) AS s, range(number) AS arr FROM numbers(10) FORMAT {}" for data_format in formats: - run_test(data_format, gen_data_query, settings={"async_insert": 1, "wait_for_async_insert": 1}) + run_test( + data_format, + gen_data_query, + settings={"async_insert": 1, "wait_for_async_insert": 1}, + ) # LineAsString client.query("DROP TABLE IF EXISTS t_async_insert") client.query("CREATE TABLE t_async_insert (s String) ENGINE = Memory") gen_data_query = "SELECT toString(number) AS s FROM numbers(10) FORMAT {}" -run_test('LineAsString', gen_data_query, settings={"async_insert": 1, "wait_for_async_insert": 1}) +run_test( + "LineAsString", + gen_data_query, + settings={"async_insert": 1, "wait_for_async_insert": 1}, +) # TODO: add CapnProto and Protobuf diff --git a/tests/queries/0_stateless/02205_HTTP_user_agent.python b/tests/queries/0_stateless/02205_HTTP_user_agent.python index 0d3a563c094..5787ae186ab 100644 --- a/tests/queries/0_stateless/02205_HTTP_user_agent.python +++ b/tests/queries/0_stateless/02205_HTTP_user_agent.python @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from http.server import SimpleHTTPRequestHandler,HTTPServer +from http.server import SimpleHTTPRequestHandler, HTTPServer import socket import sys import threading @@ -17,6 +17,7 @@ def is_ipv6(host): except: return True + def get_local_port(host, ipv6): if ipv6: family = socket.AF_INET6 @@ -27,20 +28,19 @@ def get_local_port(host, ipv6): fd.bind((host, 0)) return fd.getsockname()[1] -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', 'localhost') -CLICKHOUSE_PORT_HTTP = os.environ.get('CLICKHOUSE_PORT_HTTP', '8123') + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "localhost") +CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123") # Server returns this JSON response. -SERVER_JSON_RESPONSE = \ -'''{ +SERVER_JSON_RESPONSE = """{ "login": "ClickHouse", "id": 54801242, "name": "ClickHouse", "company": null -}''' +}""" -EXPECTED_ANSWER = \ -'''{\\n\\t"login": "ClickHouse",\\n\\t"id": 54801242,\\n\\t"name": "ClickHouse",\\n\\t"company": null\\n}''' +EXPECTED_ANSWER = """{\\n\\t"login": "ClickHouse",\\n\\t"id": 54801242,\\n\\t"name": "ClickHouse",\\n\\t"company": null\\n}""" ##################################################################################### # This test starts an HTTP server and serves data to clickhouse url-engine based table. @@ -51,26 +51,38 @@ EXPECTED_ANSWER = \ ##################################################################################### # IP-address of this host accessible from the outside world. Get the first one -HTTP_SERVER_HOST = subprocess.check_output(['hostname', '-i']).decode('utf-8').strip().split()[0] +HTTP_SERVER_HOST = ( + subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0] +) IS_IPV6 = is_ipv6(HTTP_SERVER_HOST) HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6) # IP address and port of the HTTP server started from this script. HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) if IS_IPV6: - HTTP_SERVER_URL_STR = 'http://' + f'[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/" + ) else: - HTTP_SERVER_URL_STR = 'http://' + f'{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}' + "/" + HTTP_SERVER_URL_STR = ( + "http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/" + ) def get_ch_answer(query): host = CLICKHOUSE_HOST if IS_IPV6: - host = f'[{host}]' + host = f"[{host}]" - url = os.environ.get('CLICKHOUSE_URL', 'http://{host}:{port}'.format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP)) + url = os.environ.get( + "CLICKHOUSE_URL", + "http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP), + ) return urllib.request.urlopen(url, data=query.encode()).read().decode() + def check_answers(query, answer): ch_answer = get_ch_answer(query) if ch_answer.strip() != answer.strip(): @@ -79,16 +91,17 @@ def check_answers(query, answer): print("Fetched answer :", ch_answer, file=sys.stderr) raise Exception("Fail on query") + # Server with check for User-Agent headers. class HttpProcessor(SimpleHTTPRequestHandler): def _set_headers(self): - user_agent = self.headers.get('User-Agent') - if user_agent and user_agent.startswith('ClickHouse/'): + user_agent = self.headers.get("User-Agent") + if user_agent and user_agent.startswith("ClickHouse/"): self.send_response(200) else: self.send_response(403) - self.send_header('Content-Type', 'text/csv') + self.send_header("Content-Type", "text/csv") self.end_headers() def do_GET(self): @@ -98,9 +111,11 @@ class HttpProcessor(SimpleHTTPRequestHandler): def log_message(self, format, *args): return + class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 + def start_server(requests_amount): if IS_IPV6: httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) @@ -114,15 +129,18 @@ def start_server(requests_amount): t = threading.Thread(target=real_func) return t + ##################################################################### # Testing area. ##################################################################### + def test_select(): global HTTP_SERVER_URL_STR - query = 'SELECT * FROM url(\'{}\',\'JSONAsString\');'.format(HTTP_SERVER_URL_STR) + query = "SELECT * FROM url('{}','JSONAsString');".format(HTTP_SERVER_URL_STR) check_answers(query, EXPECTED_ANSWER) + def main(): # HEAD + GET t = start_server(3) @@ -131,6 +149,7 @@ def main(): t.join() print("PASSED") + if __name__ == "__main__": try: main() @@ -141,4 +160,3 @@ if __name__ == "__main__": sys.stderr.flush() os._exit(1) - diff --git a/tests/queries/0_stateless/02233_HTTP_ranged.python b/tests/queries/0_stateless/02233_HTTP_ranged.python index e74d494edf5..66ef3304098 100644 --- a/tests/queries/0_stateless/02233_HTTP_ranged.python +++ b/tests/queries/0_stateless/02233_HTTP_ranged.python @@ -122,7 +122,7 @@ class HttpProcessor(BaseHTTPRequestHandler): get_call_num = 0 responses_to_get = [] - def send_head(self, from_get = False): + def send_head(self, from_get=False): if self.headers["Range"] and HttpProcessor.allow_range: try: self.range = parse_byte_range(self.headers["Range"]) @@ -146,7 +146,9 @@ class HttpProcessor(BaseHTTPRequestHandler): self.send_error(416, "Requested Range Not Satisfiable") return None - retry_range_request = first != 0 and from_get is True and len(HttpProcessor.responses_to_get) > 0 + retry_range_request = ( + first != 0 and from_get is True and len(HttpProcessor.responses_to_get) > 0 + ) if retry_range_request: code = HttpProcessor.responses_to_get.pop() if code not in HttpProcessor.responses: @@ -244,7 +246,9 @@ def run_test(allow_range, settings, check_retries=False): raise Exception("HTTP Range was not used when supported") if check_retries and len(HttpProcessor.responses_to_get) > 0: - raise Exception("Expected to get http response 500, which had to be retried, but 200 ok returned and then retried") + raise Exception( + "Expected to get http response 500, which had to be retried, but 200 ok returned and then retried" + ) if retries_num > 0: expected_get_call_num += retries_num - 1 @@ -263,7 +267,7 @@ def run_test(allow_range, settings, check_retries=False): def main(): - settings = {"max_download_buffer_size" : 20} + settings = {"max_download_buffer_size": 20} # Test Accept-Ranges=False run_test(allow_range=False, settings=settings) @@ -271,7 +275,7 @@ def main(): run_test(allow_range=True, settings=settings) # Test Accept-Ranges=True, parallel download is used - settings = {"max_download_buffer_size" : 10} + settings = {"max_download_buffer_size": 10} run_test(allow_range=True, settings=settings) # Test Accept-Ranges=True, parallel download is not used, diff --git a/tests/queries/0_stateless/02294_anova_cmp.python b/tests/queries/0_stateless/02294_anova_cmp.python index 7597b3712d1..2212a887b2f 100644 --- a/tests/queries/0_stateless/02294_anova_cmp.python +++ b/tests/queries/0_stateless/02294_anova_cmp.python @@ -7,7 +7,7 @@ import pandas as pd import numpy as np CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient @@ -22,15 +22,22 @@ def test_and_check(rvs, n_groups, f_stat, p_value, precision=1e-2): client.query("DROP TABLE IF EXISTS anova;") client.query("CREATE TABLE anova (left Float64, right UInt64) ENGINE = Memory;") for group in range(n_groups): - client.query(f'''INSERT INTO anova VALUES {", ".join([f'({i},{group})' for i in rvs[group]])};''') + client.query( + f"""INSERT INTO anova VALUES {", ".join([f'({i},{group})' for i in rvs[group]])};""" + ) real = client.query_return_df( - '''SELECT roundBankers(a.1, 16) as f_stat, roundBankers(a.2, 16) as p_value FROM (SELECT anova(left, right) as a FROM anova) FORMAT TabSeparatedWithNames;''') + """SELECT roundBankers(a.1, 16) as f_stat, roundBankers(a.2, 16) as p_value FROM (SELECT anova(left, right) as a FROM anova) FORMAT TabSeparatedWithNames;""" + ) - real_f_stat = real['f_stat'][0] - real_p_value = real['p_value'][0] - assert(abs(real_f_stat - np.float64(f_stat)) < precision), f"clickhouse_f_stat {real_f_stat}, py_f_stat {f_stat}" - assert(abs(real_p_value - np.float64(p_value)) < precision), f"clickhouse_p_value {real_p_value}, py_p_value {p_value}" + real_f_stat = real["f_stat"][0] + real_p_value = real["p_value"][0] + assert ( + abs(real_f_stat - np.float64(f_stat)) < precision + ), f"clickhouse_f_stat {real_f_stat}, py_f_stat {f_stat}" + assert ( + abs(real_p_value - np.float64(p_value)) < precision + ), f"clickhouse_p_value {real_p_value}, py_p_value {p_value}" client.query("DROP TABLE IF EXISTS anova;") diff --git a/tests/queries/0_stateless/02346_read_in_order_fixed_prefix.python b/tests/queries/0_stateless/02346_read_in_order_fixed_prefix.python index 399533480a9..7f52daeb408 100644 --- a/tests/queries/0_stateless/02346_read_in_order_fixed_prefix.python +++ b/tests/queries/0_stateless/02346_read_in_order_fixed_prefix.python @@ -123,10 +123,14 @@ Uses FinishSortingTransform: {} for query in queries: check_query(query["where"], query["order_by"], query["optimize"], False) - check_query(query["where"], query["order_by"] + ["e"], query["optimize"], query["optimize"]) + check_query( + query["where"], query["order_by"] + ["e"], query["optimize"], query["optimize"] + ) where_columns = [f"bitNot({col})" for col in query["where"]] check_query(where_columns, query["order_by"], query["optimize"], False) - check_query(where_columns, query["order_by"] + ["e"], query["optimize"], query["optimize"]) + check_query( + where_columns, query["order_by"] + ["e"], query["optimize"], query["optimize"] + ) print("OK") diff --git a/tests/queries/0_stateless/02403_big_http_chunk_size.python b/tests/queries/0_stateless/02403_big_http_chunk_size.python index 4e2e97e487b..4d2f01db55b 100644 --- a/tests/queries/0_stateless/02403_big_http_chunk_size.python +++ b/tests/queries/0_stateless/02403_big_http_chunk_size.python @@ -8,8 +8,8 @@ TRANSFER_ENCODING_HEADER = "Transfer-Encoding" def main(): - host = os.environ['CLICKHOUSE_HOST'] - port = int(os.environ['CLICKHOUSE_PORT_HTTP']) + host = os.environ["CLICKHOUSE_HOST"] + port = int(os.environ["CLICKHOUSE_PORT_HTTP"]) sock = socket(AF_INET, SOCK_STREAM) sock.connect((host, port)) @@ -47,4 +47,3 @@ def main(): if __name__ == "__main__": main() - diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 571a3c3afb5..4ae98bda16d 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -513,6 +513,7 @@ nullIf nullIn nullInIgnoreSet or +parseDateTime parseDateTime32BestEffort parseDateTime32BestEffortOrNull parseDateTime32BestEffortOrZero @@ -528,6 +529,7 @@ parseDateTimeBestEffortOrZero parseDateTimeBestEffortUS parseDateTimeBestEffortUSOrNull parseDateTimeBestEffortUSOrZero +parseDateTimeInJodaSyntax parseTimeDelta partitionId path diff --git a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python index c638b3d2040..696eb01ff7e 100644 --- a/tests/queries/0_stateless/02458_insert_select_progress_tcp.python +++ b/tests/queries/0_stateless/02458_insert_select_progress_tcp.python @@ -5,13 +5,13 @@ import os import uuid import json -CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') -CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000')) -CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default') +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "127.0.0.1") +CLICKHOUSE_PORT = int(os.environ.get("CLICKHOUSE_PORT_TCP", "900000")) +CLICKHOUSE_DATABASE = os.environ.get("CLICKHOUSE_DATABASE", "default") + def writeVarUInt(x, ba): for _ in range(0, 9): - byte = x & 0x7F if x > 0x7F: byte |= 0x80 @@ -24,12 +24,12 @@ def writeVarUInt(x, ba): def writeStringBinary(s, ba): - b = bytes(s, 'utf-8') + b = bytes(s, "utf-8") writeVarUInt(len(s), ba) ba.extend(b) -def readStrict(s, size = 1): +def readStrict(s, size=1): res = bytearray() while size: cur = s.recv(size) @@ -48,18 +48,23 @@ def readUInt(s, size=1): val += res[i] << (i * 8) return val + def readUInt8(s): return readUInt(s) + def readUInt16(s): return readUInt(s, 2) + def readUInt32(s): return readUInt(s, 4) + def readUInt64(s): return readUInt(s, 8) + def readVarUInt(s): x = 0 for i in range(9): @@ -75,25 +80,25 @@ def readVarUInt(s): def readStringBinary(s): size = readVarUInt(s) s = readStrict(s, size) - return s.decode('utf-8') + return s.decode("utf-8") def sendHello(s): ba = bytearray() - writeVarUInt(0, ba) # Hello - writeStringBinary('simple native protocol', ba) + writeVarUInt(0, ba) # Hello + writeStringBinary("simple native protocol", ba) writeVarUInt(21, ba) writeVarUInt(9, ba) writeVarUInt(54449, ba) - writeStringBinary(CLICKHOUSE_DATABASE, ba) # database - writeStringBinary('default', ba) # user - writeStringBinary('', ba) # pwd + writeStringBinary(CLICKHOUSE_DATABASE, ba) # database + writeStringBinary("default", ba) # user + writeStringBinary("", ba) # pwd s.sendall(ba) def receiveHello(s): p_type = readVarUInt(s) - assert (p_type == 0) # Hello + assert p_type == 0 # Hello server_name = readStringBinary(s) # print("Server name: ", server_name) server_version_major = readVarUInt(s) @@ -111,65 +116,65 @@ def receiveHello(s): def serializeClientInfo(ba, query_id): - writeStringBinary('default', ba) # initial_user - writeStringBinary(query_id, ba) # initial_query_id - writeStringBinary('127.0.0.1:9000', ba) # initial_address - ba.extend([0] * 8) # initial_query_start_time_microseconds - ba.append(1) # TCP - writeStringBinary('os_user', ba) # os_user - writeStringBinary('client_hostname', ba) # client_hostname - writeStringBinary('client_name', ba) # client_name + writeStringBinary("default", ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary("127.0.0.1:9000", ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary("os_user", ba) # os_user + writeStringBinary("client_hostname", ba) # client_hostname + writeStringBinary("client_name", ba) # client_name writeVarUInt(21, ba) writeVarUInt(9, ba) writeVarUInt(54449, ba) - writeStringBinary('', ba) # quota_key - writeVarUInt(0, ba) # distributed_depth - writeVarUInt(1, ba) # client_version_patch - ba.append(0) # No telemetry + writeStringBinary("", ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry def sendQuery(s, query): ba = bytearray() query_id = uuid.uuid4().hex - writeVarUInt(1, ba) # query + writeVarUInt(1, ba) # query writeStringBinary(query_id, ba) - ba.append(1) # INITIAL_QUERY + ba.append(1) # INITIAL_QUERY # client info serializeClientInfo(ba, query_id) - writeStringBinary('', ba) # No settings - writeStringBinary('', ba) # No interserver secret - writeVarUInt(2, ba) # Stage - Complete - ba.append(0) # No compression - writeStringBinary(query, ba) # query, finally + writeStringBinary("", ba) # No settings + writeStringBinary("", ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally s.sendall(ba) def serializeBlockInfo(ba): - writeVarUInt(1, ba) # 1 - ba.append(0) # is_overflows - writeVarUInt(2, ba) # 2 - writeVarUInt(0, ba) # 0 - ba.extend([0] * 4) # bucket_num + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num def sendEmptyBlock(s): ba = bytearray() - writeVarUInt(2, ba) # Data - writeStringBinary('', ba) + writeVarUInt(2, ba) # Data + writeStringBinary("", ba) serializeBlockInfo(ba) - writeVarUInt(0, ba) # rows - writeVarUInt(0, ba) # columns + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns s.sendall(ba) def assertPacket(packet, expected): - assert(packet == expected), packet + assert packet == expected, packet -class Progress(): +class Progress: def __init__(self): # NOTE: this is done in ctor to initialize __dict__ self.read_rows = 0 @@ -198,11 +203,12 @@ class Progress(): def __bool__(self): return ( - self.read_rows > 0 or - self.read_bytes > 0 or - self.total_rows_to_read > 0 or - self.written_rows > 0 or - self.written_bytes > 0) + self.read_rows > 0 + or self.read_bytes > 0 + or self.total_rows_to_read > 0 + or self.written_rows > 0 + or self.written_bytes > 0 + ) def readProgress(s): @@ -219,13 +225,14 @@ def readProgress(s): progress.readPacket(s) return progress + def readException(s): code = readUInt32(s) name = readStringBinary(s) text = readStringBinary(s) - readStringBinary(s) # trace - assertPacket(readUInt8(s), 0) # has_nested - return "code {}: {}".format(code, text.replace('DB::Exception:', '')) + readStringBinary(s) # trace + assertPacket(readUInt8(s), 0) # has_nested + return "code {}: {}".format(code, text.replace("DB::Exception:", "")) def main(): @@ -236,7 +243,10 @@ def main(): receiveHello(s) # For 1 second sleep and 1000ms of interactive_delay we definitelly should have non zero progress packet. # NOTE: interactive_delay=0 cannot be used since in this case CompletedPipelineExecutor will not call cancelled callback. - sendQuery(s, "insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000") + sendQuery( + s, + "insert into function null('_ Int') select sleep(1) from numbers(2) settings max_block_size=1, interactive_delay=1000", + ) # external tables sendEmptyBlock(s) diff --git a/tests/queries/0_stateless/02473_multistep_prewhere.python b/tests/queries/0_stateless/02473_multistep_prewhere.python index 37a7280dac2..a942568233c 100644 --- a/tests/queries/0_stateless/02473_multistep_prewhere.python +++ b/tests/queries/0_stateless/02473_multistep_prewhere.python @@ -4,18 +4,19 @@ import os import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient class Tester: - ''' + """ - Creates test table - Deletes the specified range of rows - Masks another range using row-level policy - Runs some read queries and checks that the results - ''' + """ + def __init__(self, session, url, index_granularity, total_rows): self.session = session self.url = url @@ -25,10 +26,10 @@ class Tester: self.repro_queries = [] def report_error(self): - print('Repro steps:', '\n\n\t'.join(self.repro_queries)) + print("Repro steps:", "\n\n\t".join(self.repro_queries)) exit(1) - def query(self, query_text, include_in_repro_steps = True, expected_data = None): + def query(self, query_text, include_in_repro_steps=True, expected_data=None): self.repro_queries.append(query_text) resp = self.session.post(self.url, data=query_text) if resp.status_code != 200: @@ -36,113 +37,187 @@ class Tester: error = resp.text[0:40] if error not in self.reported_errors: self.reported_errors.add(error) - print('Code:', resp.status_code) - print('Result:', resp.text) + print("Code:", resp.status_code) + print("Result:", resp.text) self.report_error() result = resp.text # Check that the result is as expected - if ((not expected_data is None) and (int(result) != len(expected_data))): - print('Expected {} rows, got {}'.format(len(expected_data), result)) - print('Expected data:' + str(expected_data)) + if (not expected_data is None) and (int(result) != len(expected_data)): + print("Expected {} rows, got {}".format(len(expected_data), result)) + print("Expected data:" + str(expected_data)) self.report_error() if not include_in_repro_steps: self.repro_queries.pop() - - def check_data(self, all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end): + def check_data( + self, + all_data, + delete_range_start, + delete_range_end, + row_level_policy_range_start, + row_level_policy_range_end, + ): all_data_after_delete = all_data[ - ~((all_data.a == 0) & - (all_data.b > delete_range_start) & - (all_data.b <= delete_range_end))] + ~( + (all_data.a == 0) + & (all_data.b > delete_range_start) + & (all_data.b <= delete_range_end) + ) + ] all_data_after_row_policy = all_data_after_delete[ - (all_data_after_delete.b <= row_level_policy_range_start) | - (all_data_after_delete.b > row_level_policy_range_end)] + (all_data_after_delete.b <= row_level_policy_range_start) + | (all_data_after_delete.b > row_level_policy_range_end) + ] - for to_select in ['count()', 'sum(d)']: # Test reading with and without column with default value - self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data_after_row_policy) + for to_select in [ + "count()", + "sum(d)", + ]: # Test reading with and without column with default value + self.query( + "SELECT {} FROM tab_02473;".format(to_select), + False, + all_data_after_row_policy, + ) delta = 10 for query_range_start in [0, delta]: - for query_range_end in [self.total_rows - delta]: #, self.total_rows]: + for query_range_end in [self.total_rows - delta]: # , self.total_rows]: expected = all_data_after_row_policy[ - (all_data_after_row_policy.a == 0) & - (all_data_after_row_policy.b > query_range_start) & - (all_data_after_row_policy.b <= query_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format( - to_select, query_range_start, query_range_end), False, expected) + (all_data_after_row_policy.a == 0) + & (all_data_after_row_policy.b > query_range_start) + & (all_data_after_row_policy.b <= query_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;".format( + to_select, query_range_start, query_range_end + ), + False, + expected, + ) expected = all_data_after_row_policy[ - (all_data_after_row_policy.a == 0) & - (all_data_after_row_policy.c > query_range_start) & - (all_data_after_row_policy.c <= query_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;'.format( - to_select, query_range_start, query_range_end), False, expected) + (all_data_after_row_policy.a == 0) + & (all_data_after_row_policy.c > query_range_start) + & (all_data_after_row_policy.c <= query_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE c > {} AND c <= {} WHERE a == 0;".format( + to_select, query_range_start, query_range_end + ), + False, + expected, + ) expected = all_data_after_row_policy[ - (all_data_after_row_policy.a == 0) & - ((all_data_after_row_policy.c <= query_range_start) | - (all_data_after_row_policy.c > query_range_end))] - self.query('SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;'.format( - to_select, query_range_start, query_range_end), False, expected) + (all_data_after_row_policy.a == 0) + & ( + (all_data_after_row_policy.c <= query_range_start) + | (all_data_after_row_policy.c > query_range_end) + ) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE c <= {} OR c > {} WHERE a == 0;".format( + to_select, query_range_start, query_range_end + ), + False, + expected, + ) - - def run_test(self, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end): + def run_test( + self, + delete_range_start, + delete_range_end, + row_level_policy_range_start, + row_level_policy_range_end, + ): self.repro_queries = [] - self.query(''' + self.query( + """ CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, PRIMARY KEY (a)) ENGINE = MergeTree() ORDER BY (a, b) - SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity)) + SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};""".format( + self.index_granularity + ) + ) - self.query('INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});'.format(self.total_rows)) + self.query( + "INSERT INTO tab_02473 select 0, number+1, number+1 FROM numbers({});".format( + self.total_rows + ) + ) client = ClickHouseClient() - all_data = client.query_return_df("SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;") + all_data = client.query_return_df( + "SELECT a, b, c, 1 as d FROM tab_02473 FORMAT TabSeparatedWithNames;" + ) - self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;') + self.query("OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;") # After all data has been written add a column with default value - self.query('ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;') + self.query("ALTER TABLE tab_02473 ADD COLUMN d Int64 DEFAULT 1;") self.check_data(all_data, -100, -100, -100, -100) - self.query('DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};'.format( - delete_range_start, delete_range_end)) + self.query( + "DELETE FROM tab_02473 WHERE a = 0 AND b > {} AND b <= {};".format( + delete_range_start, delete_range_end + ) + ) self.check_data(all_data, delete_range_start, delete_range_end, -100, -100) - self.query('CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;'.format( - row_level_policy_range_start, row_level_policy_range_end)) + self.query( + "CREATE ROW POLICY policy_tab_02473 ON tab_02473 FOR SELECT USING b <= {} OR b > {} TO default;".format( + row_level_policy_range_start, row_level_policy_range_end + ) + ) - self.check_data(all_data, delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end) + self.check_data( + all_data, + delete_range_start, + delete_range_end, + row_level_policy_range_start, + row_level_policy_range_end, + ) - self.query('DROP POLICY policy_tab_02473 ON tab_02473;') - - self.query('DROP TABLE tab_02473;') + self.query("DROP POLICY policy_tab_02473 ON tab_02473;") + self.query("DROP TABLE tab_02473;") def main(): # Set mutations to synchronous mode and enable lightweight DELETE's - url = os.environ['CLICKHOUSE_URL'] + '&max_threads=1' + url = os.environ["CLICKHOUSE_URL"] + "&max_threads=1" - default_index_granularity = 10; + default_index_granularity = 10 total_rows = 8 * default_index_granularity step = default_index_granularity session = requests.Session() - for index_granularity in [default_index_granularity-1, default_index_granularity]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]: + for index_granularity in [ + default_index_granularity - 1, + default_index_granularity, + ]: # [default_index_granularity-1, default_index_granularity+1, default_index_granularity]: tester = Tester(session, url, index_granularity, total_rows) # Test combinations of ranges of various size masked by lightweight DELETES # along with ranges of various size masked by row-level policies for delete_range_start in range(0, total_rows, 3 * step): - for delete_range_end in range(delete_range_start + 3 * step, total_rows, 2 * step): + for delete_range_end in range( + delete_range_start + 3 * step, total_rows, 2 * step + ): for row_level_policy_range_start in range(0, total_rows, 3 * step): - for row_level_policy_range_end in range(row_level_policy_range_start + 3 * step, total_rows, 2 * step): - tester.run_test(delete_range_start, delete_range_end, row_level_policy_range_start, row_level_policy_range_end) + for row_level_policy_range_end in range( + row_level_policy_range_start + 3 * step, total_rows, 2 * step + ): + tester.run_test( + delete_range_start, + delete_range_end, + row_level_policy_range_start, + row_level_policy_range_end, + ) if __name__ == "__main__": main() - diff --git a/tests/queries/0_stateless/02473_multistep_split_prewhere.python b/tests/queries/0_stateless/02473_multistep_split_prewhere.python index 41d8a746e11..19444994fd2 100644 --- a/tests/queries/0_stateless/02473_multistep_split_prewhere.python +++ b/tests/queries/0_stateless/02473_multistep_split_prewhere.python @@ -4,16 +4,17 @@ import os import sys CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient class Tester: - ''' + """ - Creates test table with multiple integer columns - Runs read queries with multiple range conditions on different columns in PREWHERE and check that the result is correct - ''' + """ + def __init__(self, session, url, index_granularity, total_rows): self.session = session self.url = url @@ -23,10 +24,10 @@ class Tester: self.repro_queries = [] def report_error(self): - print('Repro steps:', '\n\n\t'.join(self.repro_queries)) + print("Repro steps:", "\n\n\t".join(self.repro_queries)) exit(1) - def query(self, query_text, include_in_repro_steps = True, expected_data = None): + def query(self, query_text, include_in_repro_steps=True, expected_data=None): self.repro_queries.append(query_text) resp = self.session.post(self.url, data=query_text) if resp.status_code != 200: @@ -34,98 +35,150 @@ class Tester: error = resp.text[0:40] if error not in self.reported_errors: self.reported_errors.add(error) - print('Code:', resp.status_code) - print('Result:', resp.text) + print("Code:", resp.status_code) + print("Result:", resp.text) self.report_error() result = resp.text # Check that the result is as expected - if ((not expected_data is None) and (int(result) != len(expected_data))): - print('Expected {} rows, got {}'.format(len(expected_data), result)) - print('Expected data:' + str(expected_data)) + if (not expected_data is None) and (int(result) != len(expected_data)): + print("Expected {} rows, got {}".format(len(expected_data), result)) + print("Expected data:" + str(expected_data)) self.report_error() if not include_in_repro_steps: self.repro_queries.pop() - - def check_data(self, all_data, c_range_start, c_range_end, d_range_start, d_range_end): - for to_select in ['count()', 'sum(e)']: # Test reading with and without column with default value - self.query('SELECT {} FROM tab_02473;'.format(to_select), False, all_data) + def check_data( + self, all_data, c_range_start, c_range_end, d_range_start, d_range_end + ): + for to_select in [ + "count()", + "sum(e)", + ]: # Test reading with and without column with default value + self.query("SELECT {} FROM tab_02473;".format(to_select), False, all_data) delta = 10 for b_range_start in [0, delta]: - for b_range_end in [self.total_rows - delta]: #, self.total_rows]: + for b_range_end in [self.total_rows - delta]: # , self.total_rows]: expected = all_data[ - (all_data.a == 0) & - (all_data.b > b_range_start) & - (all_data.b <= b_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;'.format( - to_select, b_range_start, b_range_end), False, expected) + (all_data.a == 0) + & (all_data.b > b_range_start) + & (all_data.b <= b_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} WHERE a == 0;".format( + to_select, b_range_start, b_range_end + ), + False, + expected, + ) expected = all_data[ - (all_data.a == 0) & - (all_data.b > b_range_start) & - (all_data.b <= b_range_end) & - (all_data.c > c_range_start) & - (all_data.c <= c_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} WHERE a == 0;'.format( - to_select, b_range_start, b_range_end, c_range_start, c_range_end), False, expected) + (all_data.a == 0) + & (all_data.b > b_range_start) + & (all_data.b <= b_range_end) + & (all_data.c > c_range_start) + & (all_data.c <= c_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} WHERE a == 0;".format( + to_select, + b_range_start, + b_range_end, + c_range_start, + c_range_end, + ), + False, + expected, + ) expected = all_data[ - (all_data.a == 0) & - (all_data.b > b_range_start) & - (all_data.b <= b_range_end) & - (all_data.c > c_range_start) & - (all_data.c <= c_range_end) & - (all_data.d > d_range_start) & - (all_data.d <= d_range_end)] - self.query('SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} AND d > {} AND d <= {} WHERE a == 0;'.format( - to_select, b_range_start, b_range_end, c_range_start, c_range_end, d_range_start, d_range_end), False, expected) - + (all_data.a == 0) + & (all_data.b > b_range_start) + & (all_data.b <= b_range_end) + & (all_data.c > c_range_start) + & (all_data.c <= c_range_end) + & (all_data.d > d_range_start) + & (all_data.d <= d_range_end) + ] + self.query( + "SELECT {} from tab_02473 PREWHERE b > {} AND b <= {} AND c > {} AND c <= {} AND d > {} AND d <= {} WHERE a == 0;".format( + to_select, + b_range_start, + b_range_end, + c_range_start, + c_range_end, + d_range_start, + d_range_end, + ), + False, + expected, + ) def run_test(self, c_range_start, c_range_end, d_range_start, d_range_end): self.repro_queries = [] - self.query(''' + self.query( + """ CREATE TABLE tab_02473 (a Int8, b Int32, c Int32, d Int32, PRIMARY KEY (a)) ENGINE = MergeTree() ORDER BY (a, b) - SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};'''.format(self.index_granularity)) + SETTINGS min_bytes_for_wide_part = 0, index_granularity = {};""".format( + self.index_granularity + ) + ) - self.query('INSERT INTO tab_02473 select 0, number+1, number+1, number+1 FROM numbers({});'.format(self.total_rows)) + self.query( + "INSERT INTO tab_02473 select 0, number+1, number+1, number+1 FROM numbers({});".format( + self.total_rows + ) + ) client = ClickHouseClient() - all_data = client.query_return_df("SELECT a, b, c, d, 1 as e FROM tab_02473 FORMAT TabSeparatedWithNames;") + all_data = client.query_return_df( + "SELECT a, b, c, d, 1 as e FROM tab_02473 FORMAT TabSeparatedWithNames;" + ) - self.query('OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;') + self.query("OPTIMIZE TABLE tab_02473 FINAL SETTINGS mutations_sync=2;") # After all data has been written add a column with default value - self.query('ALTER TABLE tab_02473 ADD COLUMN e Int64 DEFAULT 1;') + self.query("ALTER TABLE tab_02473 ADD COLUMN e Int64 DEFAULT 1;") - self.check_data(all_data, c_range_start, c_range_end, d_range_start, d_range_end) - - self.query('DROP TABLE tab_02473;') + self.check_data( + all_data, c_range_start, c_range_end, d_range_start, d_range_end + ) + self.query("DROP TABLE tab_02473;") def main(): # Enable multiple prewhere read steps - url = os.environ['CLICKHOUSE_URL'] + '&enable_multiple_prewhere_read_steps=1&move_all_conditions_to_prewhere=0&max_threads=1' + url = ( + os.environ["CLICKHOUSE_URL"] + + "&enable_multiple_prewhere_read_steps=1&move_all_conditions_to_prewhere=0&max_threads=1" + ) - default_index_granularity = 10; + default_index_granularity = 10 total_rows = 8 * default_index_granularity step = default_index_granularity session = requests.Session() - for index_granularity in [default_index_granularity-1, default_index_granularity]: + for index_granularity in [default_index_granularity - 1, default_index_granularity]: tester = Tester(session, url, index_granularity, total_rows) # Test combinations of ranges of columns c and d for c_range_start in range(0, total_rows, int(2.3 * step)): - for c_range_end in range(c_range_start + 3 * step, total_rows, int(2.1 * step)): - for d_range_start in range(int(0.5 * step), total_rows, int(2.7 * step)): - for d_range_end in range(d_range_start + 3 * step, total_rows, int(2.2 * step)): - tester.run_test(c_range_start, c_range_end, d_range_start, d_range_end) + for c_range_end in range( + c_range_start + 3 * step, total_rows, int(2.1 * step) + ): + for d_range_start in range( + int(0.5 * step), total_rows, int(2.7 * step) + ): + for d_range_end in range( + d_range_start + 3 * step, total_rows, int(2.2 * step) + ): + tester.run_test( + c_range_start, c_range_end, d_range_start, d_range_end + ) if __name__ == "__main__": main() - diff --git a/tests/queries/0_stateless/02481_async_insert_dedup.python b/tests/queries/0_stateless/02481_async_insert_dedup.python index 0cea7301ce5..1be2b673b73 100644 --- a/tests/queries/0_stateless/02481_async_insert_dedup.python +++ b/tests/queries/0_stateless/02481_async_insert_dedup.python @@ -8,7 +8,7 @@ import time from threading import Thread CURDIR = os.path.dirname(os.path.realpath(__file__)) -sys.path.insert(0, os.path.join(CURDIR, 'helpers')) +sys.path.insert(0, os.path.join(CURDIR, "helpers")) from pure_http_client import ClickHouseClient @@ -16,29 +16,39 @@ client = ClickHouseClient() # test table without partition client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY") -client.query(''' +client.query( + """ CREATE TABLE t_async_insert_dedup_no_part ( KeyID UInt32 ) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}') ORDER BY (KeyID) -''') +""" +) -client.query("insert into t_async_insert_dedup_no_part values (1), (2), (3), (4), (5)", settings = {"async_insert": 1, "wait_for_async_insert": 1, "insert_keeper_fault_injection_probability": 0}) +client.query( + "insert into t_async_insert_dedup_no_part values (1), (2), (3), (4), (5)", + settings={ + "async_insert": 1, + "wait_for_async_insert": 1, + "insert_keeper_fault_injection_probability": 0, + }, +) result = client.query("select count(*) from t_async_insert_dedup_no_part") print(result, flush=True) client.query("DROP TABLE IF EXISTS t_async_insert_dedup_no_part NO DELAY") + # generate data and push to queue def generate_data(q, total_number): old_data = [] max_chunk_size = 30 - partitions = ['2022-11-11 10:10:10', '2022-12-12 10:10:10'] + partitions = ["2022-11-11 10:10:10", "2022-12-12 10:10:10"] last_number = 0 while True: - dup_simulate = random.randint(0,3) + dup_simulate = random.randint(0, 3) # insert old data randomly. 25% of them are dup. if dup_simulate == 0: - last_idx = len(old_data)-1 + last_idx = len(old_data) - 1 if last_idx < 0: continue idx = last_idx - random.randint(0, 50) @@ -53,7 +63,7 @@ def generate_data(q, total_number): end = start + chunk_size if end > total_number: end = total_number - for i in range(start, end+1): + for i in range(start, end + 1): partition = partitions[random.randint(0, 1)] insert_stmt += "('{}', {}),".format(partition, i) insert_stmt = insert_stmt[:-1] @@ -65,33 +75,46 @@ def generate_data(q, total_number): # wait all the tasks is done. q.join() + def fetch_and_insert_data(q, client): while True: insert = q.get() - client.query(insert, settings = {"async_insert": 1, "async_insert_deduplicate": 1, "wait_for_async_insert": 0, "async_insert_busy_timeout_ms": 1500, "insert_keeper_fault_injection_probability": 0}) + client.query( + insert, + settings={ + "async_insert": 1, + "async_insert_deduplicate": 1, + "wait_for_async_insert": 0, + "async_insert_busy_timeout_ms": 1500, + "insert_keeper_fault_injection_probability": 0, + }, + ) q.task_done() sleep_time = random.randint(50, 500) - time.sleep(sleep_time/1000.0) + time.sleep(sleep_time / 1000.0) + # main process client.query("DROP TABLE IF EXISTS t_async_insert_dedup NO DELAY") -client.query(''' +client.query( + """ CREATE TABLE t_async_insert_dedup ( EventDate DateTime, KeyID UInt32 ) Engine = ReplicatedMergeTree('/clickhouse/tables/{shard}/{database}/t_async_insert_dedup', '{replica}') PARTITION BY toYYYYMM(EventDate) ORDER BY (KeyID, EventDate) SETTINGS use_async_block_ids_cache = 1 -''') +""" +) q = queue.Queue(100) total_number = 10000 -gen = Thread(target = generate_data, args = [q, total_number]) +gen = Thread(target=generate_data, args=[q, total_number]) gen.start() for i in range(3): - insert = Thread(target = fetch_and_insert_data, args = [q, client]) + insert = Thread(target=fetch_and_insert_data, args=[q, client]) insert.start() gen.join() @@ -109,7 +132,7 @@ while True: errMsg = f"the size of result is {len(result)}. we expect {total_number}." else: for i in range(total_number): - expect = str(i+1) + expect = str(i + 1) real = result[i] if expect != real: err = True @@ -117,7 +140,7 @@ while True: break # retry several times to get stable results. if err and retry >= 5: - print (errMsg, flush=True) + print(errMsg, flush=True) elif err: retry += 1 continue @@ -125,11 +148,15 @@ while True: print(len(result), flush=True) break -result = client.query("SELECT value FROM system.metrics where metric = 'AsyncInsertCacheSize'") +result = client.query( + "SELECT value FROM system.metrics where metric = 'AsyncInsertCacheSize'" +) result = int(result.split()[0]) if result <= 0: raise Exception(f"AsyncInsertCacheSize should > 0, but got {result}") -result = client.query("SELECT value FROM system.events where event = 'AsyncInsertCacheHits'") +result = client.query( + "SELECT value FROM system.events where event = 'AsyncInsertCacheHits'" +) result = int(result.split()[0]) if result <= 0: raise Exception(f"AsyncInsertCacheHits should > 0, but got {result}") diff --git a/tests/queries/0_stateless/02668_parse_datetime.reference b/tests/queries/0_stateless/02668_parse_datetime.reference new file mode 100644 index 00000000000..b893c1bc6e9 --- /dev/null +++ b/tests/queries/0_stateless/02668_parse_datetime.reference @@ -0,0 +1,199 @@ +-- { echoOn } +-- year +select parseDateTime('2020', '%Y', 'UTC') = toDateTime('2020-01-01', 'UTC'); +1 +-- month +select parseDateTime('02', '%m', 'UTC') = toDateTime('2000-02-01', 'UTC'); +1 +select parseDateTime('07', '%m', 'UTC') = toDateTime('2000-07-01', 'UTC'); +1 +select parseDateTime('11-', '%m-', 'UTC') = toDateTime('2000-11-01', 'UTC'); +1 +select parseDateTime('00', '%m'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%m'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('12345', '%m'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('02', '%c', 'UTC') = toDateTime('2000-02-01', 'UTC'); +1 +select parseDateTime('07', '%c', 'UTC') = toDateTime('2000-07-01', 'UTC'); +1 +select parseDateTime('11-', '%c-', 'UTC') = toDateTime('2000-11-01', 'UTC'); +1 +select parseDateTime('00', '%c'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%c'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('12345', '%c'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('jun', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC'); +1 +select parseDateTime('JUN', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC'); +1 +select parseDateTime('abc', '%b'); -- { serverError CANNOT_PARSE_DATETIME } +-- day of month +select parseDateTime('07', '%d', 'UTC') = toDateTime('2000-01-07', 'UTC'); +1 +select parseDateTime('01', '%d', 'UTC') = toDateTime('2000-01-01', 'UTC'); +1 +select parseDateTime('/11', '/%d', 'UTC') = toDateTime('2000-01-11', 'UTC'); +1 +select parseDateTime('00', '%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('32', '%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('12345', '%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('02-31', '%m-%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('04-31', '%m-%d'); -- { serverError CANNOT_PARSE_DATETIME } +-- The last one is chosen if multiple months of year if supplied +select parseDateTime('01 31 20 02', '%m %d %d %m', 'UTC') = toDateTime('2000-02-20', 'UTC'); +1 +select parseDateTime('02 31 20 04', '%m %d %d %m', 'UTC') = toDateTime('2000-04-20', 'UTC'); +1 +select parseDateTime('02 31 01', '%m %d %m', 'UTC') = toDateTime('2000-01-31', 'UTC'); +1 +select parseDateTime('2000-02-29', '%Y-%m-%d', 'UTC') = toDateTime('2000-02-29', 'UTC'); +1 +select parseDateTime('2001-02-29', '%Y-%m-%d'); -- { serverError CANNOT_PARSE_DATETIME } +-- day of year +select parseDateTime('001', '%j', 'UTC') = toDateTime('2000-01-01', 'UTC'); +1 +select parseDateTime('007', '%j', 'UTC') = toDateTime('2000-01-07', 'UTC'); +1 +select parseDateTime('/031/', '/%j/', 'UTC') = toDateTime('2000-01-31', 'UTC'); +1 +select parseDateTime('032', '%j', 'UTC') = toDateTime('2000-02-01', 'UTC'); +1 +select parseDateTime('060', '%j', 'UTC') = toDateTime('2000-02-29', 'UTC'); +1 +select parseDateTime('365', '%j', 'UTC') = toDateTime('2000-12-30', 'UTC'); +1 +select parseDateTime('366', '%j', 'UTC') = toDateTime('2000-12-31', 'UTC'); +1 +select parseDateTime('1980 001', '%Y %j', 'UTC') = toDateTime('1980-01-01', 'UTC'); +1 +select parseDateTime('1980 007', '%Y %j', 'UTC') = toDateTime('1980-01-07', 'UTC'); +1 +select parseDateTime('1980 /007', '%Y /%j', 'UTC') = toDateTime('1980-01-07', 'UTC'); +1 +select parseDateTime('1980 /031/', '%Y /%j/', 'UTC') = toDateTime('1980-01-31', 'UTC'); +1 +select parseDateTime('1980 032', '%Y %j', 'UTC') = toDateTime('1980-02-01', 'UTC'); +1 +select parseDateTime('1980 060', '%Y %j', 'UTC') = toDateTime('1980-02-29', 'UTC'); +1 +select parseDateTime('1980 366', '%Y %j', 'UTC') = toDateTime('1980-12-31', 'UTC'); +1 +select parseDateTime('1981 366', '%Y %j'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('367', '%j'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('000', '%j'); -- { serverError CANNOT_PARSE_DATETIME } +-- The last one is chosen if multiple day of years are supplied. +select parseDateTime('2000 366 2001', '%Y %j %Y'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('2001 366 2000', '%Y %j %Y', 'UTC') = toDateTime('2000-12-31', 'UTC'); +1 +-- hour of day +select parseDateTime('07', '%H', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('23', '%H', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +1 +select parseDateTime('00', '%H', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('10', '%H', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +select parseDateTime('24', '%H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('1234567', '%H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('07', '%k', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('23', '%k', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +1 +select parseDateTime('00', '%k', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('10', '%k', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +select parseDateTime('24', '%k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('1234567', '%k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- hour of half day +select parseDateTime('07', '%h', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('12', '%h', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('01', '%h', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTime('10', '%h', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +select parseDateTime('00', '%h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('07', '%I', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('12', '%I', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('01', '%I', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTime('10', '%I', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +select parseDateTime('00', '%I', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%I', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%I', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('07', '%l', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('12', '%l', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('01', '%l', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTime('10', '%l', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +select parseDateTime('00', '%l', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%l', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%l', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- half of day +select parseDateTime('07 PM', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('07 AM', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('07 pm', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('07 am', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTime('00 AM', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('00 PM', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('00 am', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('00 pm', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('01 PM', '%h %p', 'UTC') = toDateTime('1970-01-01 13:00:00', 'UTC'); +1 +select parseDateTime('01 AM', '%h %p', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTime('06 PM', '%h %p', 'UTC') = toDateTime('1970-01-01 18:00:00', 'UTC'); +1 +select parseDateTime('06 AM', '%h %p', 'UTC') = toDateTime('1970-01-01 06:00:00', 'UTC'); +1 +select parseDateTime('12 PM', '%h %p', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); +1 +select parseDateTime('12 AM', '%h %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +-- minute +select parseDateTime('08', '%i', 'UTC') = toDateTime('1970-01-01 00:08:00', 'UTC'); +1 +select parseDateTime('59', '%i', 'UTC') = toDateTime('1970-01-01 00:59:00', 'UTC'); +1 +select parseDateTime('00/', '%i/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('60', '%i', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%i', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%i', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- second +select parseDateTime('09', '%s', 'UTC') = toDateTime('1970-01-01 00:00:09', 'UTC'); +1 +select parseDateTime('58', '%s', 'UTC') = toDateTime('1970-01-01 00:00:58', 'UTC'); +1 +select parseDateTime('00/', '%s/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTime('60', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- mixed YMD format +select parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s', 'UTC') = toDateTime('2021-01-04 23:00:00', 'UTC'); +1 +select parseDateTime('2019-07-03 11:04:10', '%Y-%m-%d %H:%i:%s', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +1 +select parseDateTime('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +1 diff --git a/tests/queries/0_stateless/02668_parse_datetime.sql b/tests/queries/0_stateless/02668_parse_datetime.sql new file mode 100644 index 00000000000..86e8877eedc --- /dev/null +++ b/tests/queries/0_stateless/02668_parse_datetime.sql @@ -0,0 +1,135 @@ +-- { echoOn } +-- year +select parseDateTime('2020', '%Y', 'UTC') = toDateTime('2020-01-01', 'UTC'); + +-- month +select parseDateTime('02', '%m', 'UTC') = toDateTime('2000-02-01', 'UTC'); +select parseDateTime('07', '%m', 'UTC') = toDateTime('2000-07-01', 'UTC'); +select parseDateTime('11-', '%m-', 'UTC') = toDateTime('2000-11-01', 'UTC'); +select parseDateTime('00', '%m'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%m'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('12345', '%m'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('02', '%c', 'UTC') = toDateTime('2000-02-01', 'UTC'); +select parseDateTime('07', '%c', 'UTC') = toDateTime('2000-07-01', 'UTC'); +select parseDateTime('11-', '%c-', 'UTC') = toDateTime('2000-11-01', 'UTC'); +select parseDateTime('00', '%c'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%c'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('12345', '%c'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('jun', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC'); +select parseDateTime('JUN', '%b', 'UTC') = toDateTime('2000-06-01', 'UTC'); +select parseDateTime('abc', '%b'); -- { serverError CANNOT_PARSE_DATETIME } + +-- day of month +select parseDateTime('07', '%d', 'UTC') = toDateTime('2000-01-07', 'UTC'); +select parseDateTime('01', '%d', 'UTC') = toDateTime('2000-01-01', 'UTC'); +select parseDateTime('/11', '/%d', 'UTC') = toDateTime('2000-01-11', 'UTC'); +select parseDateTime('00', '%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('32', '%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('12345', '%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('02-31', '%m-%d'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('04-31', '%m-%d'); -- { serverError CANNOT_PARSE_DATETIME } +-- The last one is chosen if multiple months of year if supplied +select parseDateTime('01 31 20 02', '%m %d %d %m', 'UTC') = toDateTime('2000-02-20', 'UTC'); +select parseDateTime('02 31 20 04', '%m %d %d %m', 'UTC') = toDateTime('2000-04-20', 'UTC'); +select parseDateTime('02 31 01', '%m %d %m', 'UTC') = toDateTime('2000-01-31', 'UTC'); +select parseDateTime('2000-02-29', '%Y-%m-%d', 'UTC') = toDateTime('2000-02-29', 'UTC'); +select parseDateTime('2001-02-29', '%Y-%m-%d'); -- { serverError CANNOT_PARSE_DATETIME } + +-- day of year +select parseDateTime('001', '%j', 'UTC') = toDateTime('2000-01-01', 'UTC'); +select parseDateTime('007', '%j', 'UTC') = toDateTime('2000-01-07', 'UTC'); +select parseDateTime('/031/', '/%j/', 'UTC') = toDateTime('2000-01-31', 'UTC'); +select parseDateTime('032', '%j', 'UTC') = toDateTime('2000-02-01', 'UTC'); +select parseDateTime('060', '%j', 'UTC') = toDateTime('2000-02-29', 'UTC'); +select parseDateTime('365', '%j', 'UTC') = toDateTime('2000-12-30', 'UTC'); +select parseDateTime('366', '%j', 'UTC') = toDateTime('2000-12-31', 'UTC'); +select parseDateTime('1980 001', '%Y %j', 'UTC') = toDateTime('1980-01-01', 'UTC'); +select parseDateTime('1980 007', '%Y %j', 'UTC') = toDateTime('1980-01-07', 'UTC'); +select parseDateTime('1980 /007', '%Y /%j', 'UTC') = toDateTime('1980-01-07', 'UTC'); +select parseDateTime('1980 /031/', '%Y /%j/', 'UTC') = toDateTime('1980-01-31', 'UTC'); +select parseDateTime('1980 032', '%Y %j', 'UTC') = toDateTime('1980-02-01', 'UTC'); +select parseDateTime('1980 060', '%Y %j', 'UTC') = toDateTime('1980-02-29', 'UTC'); +select parseDateTime('1980 366', '%Y %j', 'UTC') = toDateTime('1980-12-31', 'UTC'); +select parseDateTime('1981 366', '%Y %j'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('367', '%j'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('000', '%j'); -- { serverError CANNOT_PARSE_DATETIME } +-- The last one is chosen if multiple day of years are supplied. +select parseDateTime('2000 366 2001', '%Y %j %Y'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('2001 366 2000', '%Y %j %Y', 'UTC') = toDateTime('2000-12-31', 'UTC'); + +-- hour of day +select parseDateTime('07', '%H', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('23', '%H', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +select parseDateTime('00', '%H', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('10', '%H', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +select parseDateTime('24', '%H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('1234567', '%H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('07', '%k', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('23', '%k', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +select parseDateTime('00', '%k', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('10', '%k', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +select parseDateTime('24', '%k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('1234567', '%k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- hour of half day +select parseDateTime('07', '%h', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('12', '%h', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('01', '%h', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTime('10', '%h', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +select parseDateTime('00', '%h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('07', '%I', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('12', '%I', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('01', '%I', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTime('10', '%I', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +select parseDateTime('00', '%I', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%I', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%I', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('07', '%l', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('12', '%l', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('01', '%l', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTime('10', '%l', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +select parseDateTime('00', '%l', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('13', '%l', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%l', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- half of day +select parseDateTime('07 PM', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('07 AM', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('07 pm', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('07 am', '%H %p', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTime('00 AM', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('00 PM', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('00 am', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('00 pm', '%H %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('01 PM', '%h %p', 'UTC') = toDateTime('1970-01-01 13:00:00', 'UTC'); +select parseDateTime('01 AM', '%h %p', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTime('06 PM', '%h %p', 'UTC') = toDateTime('1970-01-01 18:00:00', 'UTC'); +select parseDateTime('06 AM', '%h %p', 'UTC') = toDateTime('1970-01-01 06:00:00', 'UTC'); +select parseDateTime('12 PM', '%h %p', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); +select parseDateTime('12 AM', '%h %p', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); + +-- minute +select parseDateTime('08', '%i', 'UTC') = toDateTime('1970-01-01 00:08:00', 'UTC'); +select parseDateTime('59', '%i', 'UTC') = toDateTime('1970-01-01 00:59:00', 'UTC'); +select parseDateTime('00/', '%i/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('60', '%i', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%i', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%i', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- second +select parseDateTime('09', '%s', 'UTC') = toDateTime('1970-01-01 00:00:09', 'UTC'); +select parseDateTime('58', '%s', 'UTC') = toDateTime('1970-01-01 00:00:58', 'UTC'); +select parseDateTime('00/', '%s/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTime('60', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('-1', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime('123456789', '%s', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- mixed YMD format +select parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s', 'UTC') = toDateTime('2021-01-04 23:00:00', 'UTC'); +select parseDateTime('2019-07-03 11:04:10', '%Y-%m-%d %H:%i:%s', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); +select parseDateTime('10:04:11 03-07-2019', '%s:%i:%H %d-%m-%Y', 'UTC') = toDateTime('2019-07-03 11:04:10', 'UTC'); + +-- { echoOff } \ No newline at end of file diff --git a/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.reference b/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.reference new file mode 100644 index 00000000000..e8cc31944c3 --- /dev/null +++ b/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.reference @@ -0,0 +1,346 @@ +-- { echoOn } +-- empty +select parseDateTimeInJodaSyntax(' ', ' ', 'UTC') = toDateTime('1970-01-01', 'UTC'); +1 +-- era +select parseDateTimeInJodaSyntax('AD 1999', 'G YYYY') = toDateTime('1999-01-01'); +1 +select parseDateTimeInJodaSyntax('ad 1999', 'G YYYY') = toDateTime('1999-01-01'); +1 +select parseDateTimeInJodaSyntax('Ad 1999', 'G YYYY') = toDateTime('1999-01-01'); +1 +select parseDateTimeInJodaSyntax('AD 1999', 'G YYYY') = toDateTime('1999-01-01'); +1 +select parseDateTimeInJodaSyntax('AD 1999', 'G yyyy') = toDateTime('1999-01-01'); +1 +select parseDateTimeInJodaSyntax('AD 1999 2000', 'G YYYY yyyy') = toDateTime('2000-01-01'); +1 +select parseDateTimeInJodaSyntax('AD 1999 2000', 'G yyyy YYYY') = toDateTime('2000-01-01'); +1 +select parseDateTimeInJodaSyntax('AD 1999', 'G Y'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('AD 1999', 'G YY'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('AD 1999', 'G YYY'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('BC', 'G'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('AB', 'G'); -- { serverError CANNOT_PARSE_DATETIME } +-- year of era +select parseDateTimeInJodaSyntax('2106', 'YYYY', 'UTC') = toDateTime('2106-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1970', 'YYYY', 'UTC') = toDateTime('1970-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1969', 'YYYY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('2107', 'YYYY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('+1999', 'YYYY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('12', 'YY', 'UTC') = toDateTime('2012-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('69', 'YY', 'UTC') = toDateTime('2069-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('70', 'YY', 'UTC') = toDateTime('1970-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('99', 'YY', 'UTC') = toDateTime('1999-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('01', 'YY', 'UTC') = toDateTime('2001-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1', 'YY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('99 98 97', 'YY YY YY', 'UTC') = toDateTime('1997-01-01', 'UTC'); +1 +-- year +select parseDateTimeInJodaSyntax('12', 'yy', 'UTC') = toDateTime('2012-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('69', 'yy', 'UTC') = toDateTime('2069-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('70', 'yy', 'UTC') = toDateTime('1970-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('99', 'yy', 'UTC') = toDateTime('1999-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+99', 'yy', 'UTC') = toDateTime('1999-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+99 02', 'yy MM', 'UTC') = toDateTime('1999-02-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10 +10', 'MM yy', 'UTC') = toDateTime('2010-10-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10+2001', 'MMyyyy', 'UTC') = toDateTime('2001-10-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+200110', 'yyyyMM', 'UTC') = toDateTime('2001-10-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1970', 'yyyy', 'UTC') = toDateTime('1970-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('2106', 'yyyy', 'UTC') = toDateTime('2106-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1969', 'yyyy', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('2107', 'yyyy', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- week year +select parseDateTimeInJodaSyntax('2106', 'xxxx', 'UTC') = toDateTime('2106-01-04', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1971', 'xxxx', 'UTC') = toDateTime('1971-01-04', 'UTC'); +1 +select parseDateTimeInJodaSyntax('2025', 'xxxx', 'UTC') = toDateTime('2024-12-30', 'UTC'); +1 +select parseDateTimeInJodaSyntax('12', 'xx', 'UTC') = toDateTime('2012-01-02', 'UTC'); +1 +select parseDateTimeInJodaSyntax('69', 'xx', 'UTC') = toDateTime('2068-12-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('99', 'xx', 'UTC') = toDateTime('1999-01-04', 'UTC'); +1 +select parseDateTimeInJodaSyntax('01', 'xx', 'UTC') = toDateTime('2001-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+10', 'xx', 'UTC') = toDateTime('2010-01-04', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+99 01', 'xx ww', 'UTC') = toDateTime('1999-01-04', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+99 02', 'xx ww', 'UTC') = toDateTime('1999-01-11', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10 +10', 'ww xx', 'UTC') = toDateTime('2010-03-08', 'UTC'); +1 +select parseDateTimeInJodaSyntax('2+10', 'wwxx', 'UTC') = toDateTime('2010-01-11', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+102', 'xxM', 'UTC') = toDateTime('2010-02-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('+20102', 'xxxxM', 'UTC') = toDateTime('2010-02-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1970', 'xxxx', 'UTC'); -- { serverError VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE } +select parseDateTimeInJodaSyntax('1969', 'xxxx', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('2107', 'xxxx', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- century of era +select parseDateTimeInJodaSyntax('20', 'CC', 'UTC') = toDateTime('2000-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('21', 'CC', 'UTC') = toDateTime('2100-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('19', 'CC', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('22', 'CC', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- month +select parseDateTimeInJodaSyntax('1', 'M', 'UTC') = toDateTime('2000-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax(' 7', ' MM', 'UTC') = toDateTime('2000-07-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('11', 'M', 'UTC') = toDateTime('2000-11-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10-', 'M-', 'UTC') = toDateTime('2000-10-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('-12-', '-M-', 'UTC') = toDateTime('2000-12-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0', 'M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('13', 'M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('12345', 'M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +--- Ensure MMM and MMMM specifiers consume both short- and long-form month names +select parseDateTimeInJodaSyntax('Aug', 'MMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('AuG', 'MMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('august', 'MMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('Aug', 'MMMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('AuG', 'MMMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('august', 'MMMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +1 +--- invalid month names +select parseDateTimeInJodaSyntax('Decembr', 'MMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('Decembr', 'MMMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('Decemberary', 'MMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('Decemberary', 'MMMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('asdf', 'MMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('asdf', 'MMMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- day of month +select parseDateTimeInJodaSyntax('1', 'd', 'UTC') = toDateTime('2000-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 ', 'dd ', 'UTC') = toDateTime('2000-01-07', 'UTC'); +1 +select parseDateTimeInJodaSyntax('/11', '/dd', 'UTC') = toDateTime('2000-01-11', 'UTC'); +1 +select parseDateTimeInJodaSyntax('/31/', '/d/', 'UTC') = toDateTime('2000-01-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0', 'd', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('32', 'd', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('12345', 'd', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('02-31', 'M-d', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('04-31', 'M-d', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- The last one is chosen if multiple day of months are supplied. +select parseDateTimeInJodaSyntax('2 31 1', 'M d M', 'UTC') = toDateTime('2000-01-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1 31 20 2', 'M d d M', 'UTC') = toDateTime('2000-02-20', 'UTC'); +1 +select parseDateTimeInJodaSyntax('2 31 20 4', 'M d d M', 'UTC') = toDateTime('2000-04-20', 'UTC'); +1 +--- Leap year +select parseDateTimeInJodaSyntax('2020-02-29', 'YYYY-M-d', 'UTC') = toDateTime('2020-02-29', 'UTC'); +1 +select parseDateTimeInJodaSyntax('2001-02-29', 'YYYY-M-d', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- day of year +select parseDateTimeInJodaSyntax('1', 'D', 'UTC') = toDateTime('2000-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 ', 'DD ', 'UTC') = toDateTime('2000-01-07', 'UTC'); +1 +select parseDateTimeInJodaSyntax('/11', '/DD', 'UTC') = toDateTime('2000-01-11', 'UTC'); +1 +select parseDateTimeInJodaSyntax('/31/', '/DDD/', 'UTC') = toDateTime('2000-01-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('32', 'D', 'UTC') = toDateTime('2000-02-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('60', 'D', 'UTC') = toDateTime('2000-02-29', 'UTC'); +1 +select parseDateTimeInJodaSyntax('365', 'D', 'UTC') = toDateTime('2000-12-30', 'UTC'); +1 +select parseDateTimeInJodaSyntax('366', 'D', 'UTC') = toDateTime('2000-12-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 1', 'yyyy D', 'UTC') = toDateTime('1999-01-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 7 ', 'yyyy DD ', 'UTC') = toDateTime('1999-01-07', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 /11', 'yyyy /DD', 'UTC') = toDateTime('1999-01-11', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 /31/', 'yyyy /DD/', 'UTC') = toDateTime('1999-01-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 32', 'yyyy D', 'UTC') = toDateTime('1999-02-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 60', 'yyyy D', 'UTC') = toDateTime('1999-03-01', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 365', 'yyyy D', 'UTC') = toDateTime('1999-12-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1999 366', 'yyyy D', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +--- Ensure all days of year are checked against final selected year +select parseDateTimeInJodaSyntax('2001 366 2000', 'yyyy D yyyy', 'UTC') = toDateTime('2000-12-31', 'UTC'); +1 +select parseDateTimeInJodaSyntax('2000 366 2001', 'yyyy D yyyy', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('0', 'D', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('367', 'D', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- hour of day +select parseDateTimeInJodaSyntax('7', 'H', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('23', 'HH', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0', 'HHH', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10', 'HHHHHHHH', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +--- invalid hour od day +select parseDateTimeInJodaSyntax('24', 'H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 'H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- clock hour of day +select parseDateTimeInJodaSyntax('7', 'k', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('24', 'kk', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1', 'kkk', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10', 'kkkkkkkk', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +-- invalid clock hour of day +select parseDateTimeInJodaSyntax('25', 'k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('0', 'k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- hour of half day +select parseDateTimeInJodaSyntax('7', 'K', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('11', 'KK', 'UTC') = toDateTime('1970-01-01 11:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0', 'KKK', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10', 'KKKKKKKK', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +-- invalid hour of half day +select parseDateTimeInJodaSyntax('12', 'K', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 'K', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'K', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- clock hour of half day +select parseDateTimeInJodaSyntax('7', 'h', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('12', 'hh', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1', 'hhh', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('10', 'hhhhhhhh', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +1 +-- invalid clock hour of half day +select parseDateTimeInJodaSyntax('13', 'h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('0', 'h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- half of day +--- Half of day has no effect if hour or clockhour of day is provided hour of day tests +select parseDateTimeInJodaSyntax('7 PM', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 AM', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 pm', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 am', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0 PM', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0 AM', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0 pm', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0 am', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 PM', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 AM', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 pm', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('7 am', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('24 PM', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('24 AM', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('24 pm', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('24 am', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +-- Half of day has effect if hour or clockhour of halfday is provided +select parseDateTimeInJodaSyntax('0 PM', 'K a', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0 AM', 'K a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('6 PM', 'K a', 'UTC') = toDateTime('1970-01-01 18:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('6 AM', 'K a', 'UTC') = toDateTime('1970-01-01 06:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('11 PM', 'K a', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('11 AM', 'K a', 'UTC') = toDateTime('1970-01-01 11:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1 PM', 'h a', 'UTC') = toDateTime('1970-01-01 13:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1 AM', 'h a', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('6 PM', 'h a', 'UTC') = toDateTime('1970-01-01 18:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('6 AM', 'h a', 'UTC') = toDateTime('1970-01-01 06:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('12 PM', 'h a', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('12 AM', 'h a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +-- time gives precendent to most recent time specifier +select parseDateTimeInJodaSyntax('0 1 AM', 'H h a', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('12 1 PM', 'H h a', 'UTC') = toDateTime('1970-01-01 13:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1 AM 0', 'h a H', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('1 AM 12', 'h a H', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); +1 +-- minute +select parseDateTimeInJodaSyntax('8', 'm', 'UTC') = toDateTime('1970-01-01 00:08:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('59', 'mm', 'UTC') = toDateTime('1970-01-01 00:59:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0/', 'mmm/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('60', 'm', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 'm', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'm', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- second +select parseDateTimeInJodaSyntax('9', 's', 'UTC') = toDateTime('1970-01-01 00:00:09', 'UTC'); +1 +select parseDateTimeInJodaSyntax('58', 'ss', 'UTC') = toDateTime('1970-01-01 00:00:58', 'UTC'); +1 +select parseDateTimeInJodaSyntax('0/', 's/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +1 +select parseDateTimeInJodaSyntax('60', 's', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 's', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 's', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } diff --git a/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.sql b/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.sql new file mode 100644 index 00000000000..99426a8c8d8 --- /dev/null +++ b/tests/queries/0_stateless/02668_parse_datetime_in_joda_syntax.sql @@ -0,0 +1,233 @@ +-- { echoOn } +-- empty +select parseDateTimeInJodaSyntax(' ', ' ', 'UTC') = toDateTime('1970-01-01', 'UTC'); + +-- era +select parseDateTimeInJodaSyntax('AD 1999', 'G YYYY') = toDateTime('1999-01-01'); +select parseDateTimeInJodaSyntax('ad 1999', 'G YYYY') = toDateTime('1999-01-01'); +select parseDateTimeInJodaSyntax('Ad 1999', 'G YYYY') = toDateTime('1999-01-01'); +select parseDateTimeInJodaSyntax('AD 1999', 'G YYYY') = toDateTime('1999-01-01'); +select parseDateTimeInJodaSyntax('AD 1999', 'G yyyy') = toDateTime('1999-01-01'); +select parseDateTimeInJodaSyntax('AD 1999 2000', 'G YYYY yyyy') = toDateTime('2000-01-01'); +select parseDateTimeInJodaSyntax('AD 1999 2000', 'G yyyy YYYY') = toDateTime('2000-01-01'); +select parseDateTimeInJodaSyntax('AD 1999', 'G Y'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('AD 1999', 'G YY'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('AD 1999', 'G YYY'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('BC', 'G'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('AB', 'G'); -- { serverError CANNOT_PARSE_DATETIME } + +-- year of era +select parseDateTimeInJodaSyntax('2106', 'YYYY', 'UTC') = toDateTime('2106-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('1970', 'YYYY', 'UTC') = toDateTime('1970-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('1969', 'YYYY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('2107', 'YYYY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('+1999', 'YYYY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +select parseDateTimeInJodaSyntax('12', 'YY', 'UTC') = toDateTime('2012-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('69', 'YY', 'UTC') = toDateTime('2069-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('70', 'YY', 'UTC') = toDateTime('1970-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('99', 'YY', 'UTC') = toDateTime('1999-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('01', 'YY', 'UTC') = toDateTime('2001-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('1', 'YY', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +select parseDateTimeInJodaSyntax('99 98 97', 'YY YY YY', 'UTC') = toDateTime('1997-01-01', 'UTC'); + +-- year +select parseDateTimeInJodaSyntax('12', 'yy', 'UTC') = toDateTime('2012-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('69', 'yy', 'UTC') = toDateTime('2069-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('70', 'yy', 'UTC') = toDateTime('1970-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('99', 'yy', 'UTC') = toDateTime('1999-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('+99', 'yy', 'UTC') = toDateTime('1999-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('+99 02', 'yy MM', 'UTC') = toDateTime('1999-02-01', 'UTC'); +select parseDateTimeInJodaSyntax('10 +10', 'MM yy', 'UTC') = toDateTime('2010-10-01', 'UTC'); +select parseDateTimeInJodaSyntax('10+2001', 'MMyyyy', 'UTC') = toDateTime('2001-10-01', 'UTC'); +select parseDateTimeInJodaSyntax('+200110', 'yyyyMM', 'UTC') = toDateTime('2001-10-01', 'UTC'); +select parseDateTimeInJodaSyntax('1970', 'yyyy', 'UTC') = toDateTime('1970-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('2106', 'yyyy', 'UTC') = toDateTime('2106-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('1969', 'yyyy', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('2107', 'yyyy', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- week year +select parseDateTimeInJodaSyntax('2106', 'xxxx', 'UTC') = toDateTime('2106-01-04', 'UTC'); +select parseDateTimeInJodaSyntax('1971', 'xxxx', 'UTC') = toDateTime('1971-01-04', 'UTC'); +select parseDateTimeInJodaSyntax('2025', 'xxxx', 'UTC') = toDateTime('2024-12-30', 'UTC'); +select parseDateTimeInJodaSyntax('12', 'xx', 'UTC') = toDateTime('2012-01-02', 'UTC'); +select parseDateTimeInJodaSyntax('69', 'xx', 'UTC') = toDateTime('2068-12-31', 'UTC'); +select parseDateTimeInJodaSyntax('99', 'xx', 'UTC') = toDateTime('1999-01-04', 'UTC'); +select parseDateTimeInJodaSyntax('01', 'xx', 'UTC') = toDateTime('2001-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('+10', 'xx', 'UTC') = toDateTime('2010-01-04', 'UTC'); +select parseDateTimeInJodaSyntax('+99 01', 'xx ww', 'UTC') = toDateTime('1999-01-04', 'UTC'); +select parseDateTimeInJodaSyntax('+99 02', 'xx ww', 'UTC') = toDateTime('1999-01-11', 'UTC'); +select parseDateTimeInJodaSyntax('10 +10', 'ww xx', 'UTC') = toDateTime('2010-03-08', 'UTC'); +select parseDateTimeInJodaSyntax('2+10', 'wwxx', 'UTC') = toDateTime('2010-01-11', 'UTC'); +select parseDateTimeInJodaSyntax('+102', 'xxM', 'UTC') = toDateTime('2010-02-01', 'UTC'); +select parseDateTimeInJodaSyntax('+20102', 'xxxxM', 'UTC') = toDateTime('2010-02-01', 'UTC'); +select parseDateTimeInJodaSyntax('1970', 'xxxx', 'UTC'); -- { serverError VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE } +select parseDateTimeInJodaSyntax('1969', 'xxxx', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('2107', 'xxxx', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- century of era +select parseDateTimeInJodaSyntax('20', 'CC', 'UTC') = toDateTime('2000-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('21', 'CC', 'UTC') = toDateTime('2100-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('19', 'CC', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('22', 'CC', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- month +select parseDateTimeInJodaSyntax('1', 'M', 'UTC') = toDateTime('2000-01-01', 'UTC'); +select parseDateTimeInJodaSyntax(' 7', ' MM', 'UTC') = toDateTime('2000-07-01', 'UTC'); +select parseDateTimeInJodaSyntax('11', 'M', 'UTC') = toDateTime('2000-11-01', 'UTC'); +select parseDateTimeInJodaSyntax('10-', 'M-', 'UTC') = toDateTime('2000-10-01', 'UTC'); +select parseDateTimeInJodaSyntax('-12-', '-M-', 'UTC') = toDateTime('2000-12-01', 'UTC'); +select parseDateTimeInJodaSyntax('0', 'M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('13', 'M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('12345', 'M', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +--- Ensure MMM and MMMM specifiers consume both short- and long-form month names +select parseDateTimeInJodaSyntax('Aug', 'MMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +select parseDateTimeInJodaSyntax('AuG', 'MMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +select parseDateTimeInJodaSyntax('august', 'MMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +select parseDateTimeInJodaSyntax('Aug', 'MMMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +select parseDateTimeInJodaSyntax('AuG', 'MMMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +select parseDateTimeInJodaSyntax('august', 'MMMM', 'UTC') = toDateTime('2000-08-01', 'UTC'); +--- invalid month names +select parseDateTimeInJodaSyntax('Decembr', 'MMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('Decembr', 'MMMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('Decemberary', 'MMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('Decemberary', 'MMMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('asdf', 'MMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('asdf', 'MMMM', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- day of month +select parseDateTimeInJodaSyntax('1', 'd', 'UTC') = toDateTime('2000-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('7 ', 'dd ', 'UTC') = toDateTime('2000-01-07', 'UTC'); +select parseDateTimeInJodaSyntax('/11', '/dd', 'UTC') = toDateTime('2000-01-11', 'UTC'); +select parseDateTimeInJodaSyntax('/31/', '/d/', 'UTC') = toDateTime('2000-01-31', 'UTC'); +select parseDateTimeInJodaSyntax('0', 'd', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('32', 'd', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('12345', 'd', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('02-31', 'M-d', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('04-31', 'M-d', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +-- The last one is chosen if multiple day of months are supplied. +select parseDateTimeInJodaSyntax('2 31 1', 'M d M', 'UTC') = toDateTime('2000-01-31', 'UTC'); +select parseDateTimeInJodaSyntax('1 31 20 2', 'M d d M', 'UTC') = toDateTime('2000-02-20', 'UTC'); +select parseDateTimeInJodaSyntax('2 31 20 4', 'M d d M', 'UTC') = toDateTime('2000-04-20', 'UTC'); +--- Leap year +select parseDateTimeInJodaSyntax('2020-02-29', 'YYYY-M-d', 'UTC') = toDateTime('2020-02-29', 'UTC'); +select parseDateTimeInJodaSyntax('2001-02-29', 'YYYY-M-d', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- day of year +select parseDateTimeInJodaSyntax('1', 'D', 'UTC') = toDateTime('2000-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('7 ', 'DD ', 'UTC') = toDateTime('2000-01-07', 'UTC'); +select parseDateTimeInJodaSyntax('/11', '/DD', 'UTC') = toDateTime('2000-01-11', 'UTC'); +select parseDateTimeInJodaSyntax('/31/', '/DDD/', 'UTC') = toDateTime('2000-01-31', 'UTC'); +select parseDateTimeInJodaSyntax('32', 'D', 'UTC') = toDateTime('2000-02-01', 'UTC'); +select parseDateTimeInJodaSyntax('60', 'D', 'UTC') = toDateTime('2000-02-29', 'UTC'); +select parseDateTimeInJodaSyntax('365', 'D', 'UTC') = toDateTime('2000-12-30', 'UTC'); +select parseDateTimeInJodaSyntax('366', 'D', 'UTC') = toDateTime('2000-12-31', 'UTC'); +select parseDateTimeInJodaSyntax('1999 1', 'yyyy D', 'UTC') = toDateTime('1999-01-01', 'UTC'); +select parseDateTimeInJodaSyntax('1999 7 ', 'yyyy DD ', 'UTC') = toDateTime('1999-01-07', 'UTC'); +select parseDateTimeInJodaSyntax('1999 /11', 'yyyy /DD', 'UTC') = toDateTime('1999-01-11', 'UTC'); +select parseDateTimeInJodaSyntax('1999 /31/', 'yyyy /DD/', 'UTC') = toDateTime('1999-01-31', 'UTC'); +select parseDateTimeInJodaSyntax('1999 32', 'yyyy D', 'UTC') = toDateTime('1999-02-01', 'UTC'); +select parseDateTimeInJodaSyntax('1999 60', 'yyyy D', 'UTC') = toDateTime('1999-03-01', 'UTC'); +select parseDateTimeInJodaSyntax('1999 365', 'yyyy D', 'UTC') = toDateTime('1999-12-31', 'UTC'); +select parseDateTimeInJodaSyntax('1999 366', 'yyyy D', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +--- Ensure all days of year are checked against final selected year +select parseDateTimeInJodaSyntax('2001 366 2000', 'yyyy D yyyy', 'UTC') = toDateTime('2000-12-31', 'UTC'); +select parseDateTimeInJodaSyntax('2000 366 2001', 'yyyy D yyyy', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('0', 'D', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('367', 'D', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- hour of day +select parseDateTimeInJodaSyntax('7', 'H', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('23', 'HH', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('0', 'HHH', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('10', 'HHHHHHHH', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +--- invalid hour od day +select parseDateTimeInJodaSyntax('24', 'H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 'H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'H', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- clock hour of day +select parseDateTimeInJodaSyntax('7', 'k', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('24', 'kk', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('1', 'kkk', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('10', 'kkkkkkkk', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +-- invalid clock hour of day +select parseDateTimeInJodaSyntax('25', 'k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('0', 'k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'k', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- hour of half day +select parseDateTimeInJodaSyntax('7', 'K', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('11', 'KK', 'UTC') = toDateTime('1970-01-01 11:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('0', 'KKK', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('10', 'KKKKKKKK', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +-- invalid hour of half day +select parseDateTimeInJodaSyntax('12', 'K', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 'K', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'K', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- clock hour of half day +select parseDateTimeInJodaSyntax('7', 'h', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('12', 'hh', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('1', 'hhh', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('10', 'hhhhhhhh', 'UTC') = toDateTime('1970-01-01 10:00:00', 'UTC'); +-- invalid clock hour of half day +select parseDateTimeInJodaSyntax('13', 'h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('0', 'h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'h', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- half of day +--- Half of day has no effect if hour or clockhour of day is provided hour of day tests +select parseDateTimeInJodaSyntax('7 PM', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('7 AM', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('7 pm', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('7 am', 'H a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('0 PM', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('0 AM', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('0 pm', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('0 am', 'H a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('7 PM', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('7 AM', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('7 pm', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('7 am', 'k a', 'UTC') = toDateTime('1970-01-01 07:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('24 PM', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('24 AM', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('24 pm', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('24 am', 'k a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +-- Half of day has effect if hour or clockhour of halfday is provided +select parseDateTimeInJodaSyntax('0 PM', 'K a', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('0 AM', 'K a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('6 PM', 'K a', 'UTC') = toDateTime('1970-01-01 18:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('6 AM', 'K a', 'UTC') = toDateTime('1970-01-01 06:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('11 PM', 'K a', 'UTC') = toDateTime('1970-01-01 23:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('11 AM', 'K a', 'UTC') = toDateTime('1970-01-01 11:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('1 PM', 'h a', 'UTC') = toDateTime('1970-01-01 13:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('1 AM', 'h a', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('6 PM', 'h a', 'UTC') = toDateTime('1970-01-01 18:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('6 AM', 'h a', 'UTC') = toDateTime('1970-01-01 06:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('12 PM', 'h a', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('12 AM', 'h a', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +-- time gives precendent to most recent time specifier +select parseDateTimeInJodaSyntax('0 1 AM', 'H h a', 'UTC') = toDateTime('1970-01-01 01:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('12 1 PM', 'H h a', 'UTC') = toDateTime('1970-01-01 13:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('1 AM 0', 'h a H', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('1 AM 12', 'h a H', 'UTC') = toDateTime('1970-01-01 12:00:00', 'UTC'); + +-- minute +select parseDateTimeInJodaSyntax('8', 'm', 'UTC') = toDateTime('1970-01-01 00:08:00', 'UTC'); +select parseDateTimeInJodaSyntax('59', 'mm', 'UTC') = toDateTime('1970-01-01 00:59:00', 'UTC'); +select parseDateTimeInJodaSyntax('0/', 'mmm/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('60', 'm', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 'm', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 'm', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- second +select parseDateTimeInJodaSyntax('9', 's', 'UTC') = toDateTime('1970-01-01 00:00:09', 'UTC'); +select parseDateTimeInJodaSyntax('58', 'ss', 'UTC') = toDateTime('1970-01-01 00:00:58', 'UTC'); +select parseDateTimeInJodaSyntax('0/', 's/', 'UTC') = toDateTime('1970-01-01 00:00:00', 'UTC'); +select parseDateTimeInJodaSyntax('60', 's', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('-1', 's', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTimeInJodaSyntax('123456789', 's', 'UTC'); -- { serverError CANNOT_PARSE_DATETIME } + +-- { echoOff } diff --git a/utils/changelog-simple/format-changelog.py b/utils/changelog-simple/format-changelog.py index d5e1518270e..01f2694dd0f 100755 --- a/utils/changelog-simple/format-changelog.py +++ b/utils/changelog-simple/format-changelog.py @@ -20,6 +20,7 @@ parser.add_argument( ) args = parser.parse_args() + # This function mirrors the PR description checks in ClickhousePullRequestTrigger. # Returns False if the PR should not be mentioned changelog. def parse_one_pull_request(item): diff --git a/utils/check-style/check-black b/utils/check-style/check-black index 141dcd1b406..33c463d1668 100755 --- a/utils/check-style/check-black +++ b/utils/check-style/check-black @@ -4,10 +4,22 @@ set -e # We check only our code, that's why we skip contrib GIT_ROOT=$(git rev-parse --show-cdup) -GIT_ROOT=${GIT_ROOT:-.} +GIT_ROOT=${GIT_ROOT:-./} tmp=$(mktemp) -# Find all *.py files in the repo except the contrib directory -find_cmd=(find "$GIT_ROOT" -name '*.py' -not -path "$GIT_ROOT/contrib/*") + +# Find all *.py, *.python files and executable files without extension +# that are determined as python scripts by 'file' util +# in the repo except the contrib directory. +find_cmd=( + find "$GIT_ROOT" -type f -not -path "${GIT_ROOT}contrib/*" + \( + -name '*.py' -or -name "*.python" -or + \( + -executable -not -name "*.*" -exec sh -c 'file {} | grep -q "Python script"' \; + \) + \) +) + if ! "${find_cmd[@]}" -exec black --check --diff {} + 1>"$tmp" 2>&1; then # Show the result only if some files need formatting cat "$tmp" @@ -16,4 +28,5 @@ if ! "${find_cmd[@]}" -exec black --check --diff {} + 1>"$tmp" 2>&1; then # Automatically add changed files to stage "${find_cmd[@]}" -exec git add -u {} + 1>/dev/null 2>&1 fi + rm "$tmp" diff --git a/utils/clickhouse-diagnostics/clickhouse-diagnostics b/utils/clickhouse-diagnostics/clickhouse-diagnostics index cf65e4efbfb..5cacbf1d4d4 100755 --- a/utils/clickhouse-diagnostics/clickhouse-diagnostics +++ b/utils/clickhouse-diagnostics/clickhouse-diagnostics @@ -19,9 +19,9 @@ import tenacity import xmltodict import yaml -SELECT_VERSION = r'SELECT version()' +SELECT_VERSION = r"SELECT version()" -SELECT_UPTIME = r''' +SELECT_UPTIME = r""" {% if version_ge('21.3') -%} SELECT formatReadableTimeDelta(uptime()) {% else -%} @@ -29,18 +29,18 @@ SELECT toString(floor(uptime() / 3600 / 24)) || ' days ' || toString(floor(uptime() % (24 * 3600) / 3600, 1)) || ' hours' {% endif -%} -''' +""" SELECT_SYSTEM_TABLES = "SELECT name FROM system.tables WHERE database = 'system'" -SELECT_DATABASE_ENGINES = r'''SELECT +SELECT_DATABASE_ENGINES = r"""SELECT engine, count() "count" FROM system.databases GROUP BY engine -''' +""" -SELECT_DATABASES = r'''SELECT +SELECT_DATABASES = r"""SELECT name, engine, tables, @@ -62,17 +62,17 @@ LEFT JOIN ) AS db_stats ON db.name = db_stats.database ORDER BY bytes_on_disk DESC LIMIT 10 -''' +""" -SELECT_TABLE_ENGINES = r'''SELECT +SELECT_TABLE_ENGINES = r"""SELECT engine, count() "count" FROM system.tables WHERE database != 'system' GROUP BY engine -''' +""" -SELECT_DICTIONARIES = r'''SELECT +SELECT_DICTIONARIES = r"""SELECT source, type, status, @@ -80,13 +80,13 @@ SELECT_DICTIONARIES = r'''SELECT FROM system.dictionaries GROUP BY source, type, status ORDER BY status DESC, source -''' +""" SELECT_ACCESS = "SHOW ACCESS" SELECT_QUOTA_USAGE = "SHOW QUOTA" -SELECT_REPLICAS = r'''SELECT +SELECT_REPLICAS = r"""SELECT database, table, is_leader, @@ -98,9 +98,9 @@ SELECT_REPLICAS = r'''SELECT FROM system.replicas ORDER BY absolute_delay DESC LIMIT 10 -''' +""" -SELECT_REPLICATION_QUEUE = r'''SELECT +SELECT_REPLICATION_QUEUE = r"""SELECT database, table, replica_name, @@ -121,9 +121,9 @@ SELECT_REPLICATION_QUEUE = r'''SELECT FROM system.replication_queue ORDER BY create_time ASC LIMIT 20 -''' +""" -SELECT_REPLICATED_FETCHES = r'''SELECT +SELECT_REPLICATED_FETCHES = r"""SELECT database, table, round(elapsed, 1) "elapsed", @@ -140,9 +140,9 @@ SELECT_REPLICATED_FETCHES = r'''SELECT to_detached, thread_id FROM system.replicated_fetches -''' +""" -SELECT_PARTS_PER_TABLE = r'''SELECT +SELECT_PARTS_PER_TABLE = r"""SELECT database, table, count() "partitions", @@ -162,9 +162,9 @@ FROM GROUP BY database, table ORDER BY max_parts_per_partition DESC LIMIT 10 -''' +""" -SELECT_MERGES = r'''SELECT +SELECT_MERGES = r"""SELECT database, table, round(elapsed, 1) "elapsed", @@ -187,9 +187,9 @@ SELECT_MERGES = r'''SELECT formatReadableSize(memory_usage) "memory_usage" {% endif -%} FROM system.merges -''' +""" -SELECT_MUTATIONS = r'''SELECT +SELECT_MUTATIONS = r"""SELECT database, table, mutation_id, @@ -206,9 +206,9 @@ SELECT_MUTATIONS = r'''SELECT FROM system.mutations WHERE NOT is_done ORDER BY create_time DESC -''' +""" -SELECT_RECENT_DATA_PARTS = r'''SELECT +SELECT_RECENT_DATA_PARTS = r"""SELECT database, table, engine, @@ -242,9 +242,9 @@ SELECT_RECENT_DATA_PARTS = r'''SELECT FROM system.parts WHERE modification_time > now() - INTERVAL 3 MINUTE ORDER BY modification_time DESC -''' +""" -SELECT_DETACHED_DATA_PARTS = r'''SELECT +SELECT_DETACHED_DATA_PARTS = r"""SELECT database, table, partition_id, @@ -255,9 +255,9 @@ SELECT_DETACHED_DATA_PARTS = r'''SELECT max_block_number, level FROM system.detached_parts -''' +""" -SELECT_PROCESSES = r'''SELECT +SELECT_PROCESSES = r"""SELECT elapsed, query_id, {% if normalize_queries -%} @@ -285,9 +285,9 @@ SELECT_PROCESSES = r'''SELECT {% endif -%} FROM system.processes ORDER BY elapsed DESC -''' +""" -SELECT_TOP_QUERIES_BY_DURATION = r'''SELECT +SELECT_TOP_QUERIES_BY_DURATION = r"""SELECT type, query_start_time, query_duration_ms, @@ -339,9 +339,9 @@ WHERE type != 'QueryStart' AND event_time >= now() - INTERVAL 1 DAY ORDER BY query_duration_ms DESC LIMIT 10 -''' +""" -SELECT_TOP_QUERIES_BY_MEMORY_USAGE = r'''SELECT +SELECT_TOP_QUERIES_BY_MEMORY_USAGE = r"""SELECT type, query_start_time, query_duration_ms, @@ -393,9 +393,9 @@ WHERE type != 'QueryStart' AND event_time >= now() - INTERVAL 1 DAY ORDER BY memory_usage DESC LIMIT 10 -''' +""" -SELECT_FAILED_QUERIES = r'''SELECT +SELECT_FAILED_QUERIES = r"""SELECT type, query_start_time, query_duration_ms, @@ -448,9 +448,9 @@ WHERE type != 'QueryStart' AND exception != '' ORDER BY query_start_time DESC LIMIT 10 -''' +""" -SELECT_STACK_TRACES = r'''SELECT +SELECT_STACK_TRACES = r"""SELECT '\n' || arrayStringConcat( arrayMap( x, @@ -459,9 +459,9 @@ SELECT_STACK_TRACES = r'''SELECT arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') AS trace FROM system.stack_trace -''' +""" -SELECT_CRASH_LOG = r'''SELECT +SELECT_CRASH_LOG = r"""SELECT event_time, signal, thread_id, @@ -470,7 +470,7 @@ SELECT_CRASH_LOG = r'''SELECT version FROM system.crash_log ORDER BY event_time DESC -''' +""" def retry(exception_types, max_attempts=5, max_interval=5): @@ -481,7 +481,8 @@ def retry(exception_types, max_attempts=5, max_interval=5): retry=tenacity.retry_if_exception_type(exception_types), wait=tenacity.wait_random_exponential(multiplier=0.5, max=max_interval), stop=tenacity.stop_after_attempt(max_attempts), - reraise=True) + reraise=True, + ) class ClickhouseError(Exception): @@ -502,9 +503,9 @@ class ClickhouseClient: def __init__(self, *, host="localhost", port=8123, user="default", password): self._session = requests.Session() if user: - self._session.headers['X-ClickHouse-User'] = user - self._session.headers['X-ClickHouse-Key'] = password - self._url = f'http://{host}:{port}' + self._session.headers["X-ClickHouse-User"] = user + self._session.headers["X-ClickHouse-Key"] = password + self._url = f"http://{host}:{port}" self._timeout = 60 self._ch_version = None @@ -516,7 +517,16 @@ class ClickhouseClient: return self._ch_version @retry(requests.exceptions.ConnectionError) - def query(self, query, query_args=None, format=None, post_data=None, timeout=None, echo=False, dry_run=False): + def query( + self, + query, + query_args=None, + format=None, + post_data=None, + timeout=None, + echo=False, + dry_run=False, + ): """ Execute query. """ @@ -524,28 +534,30 @@ class ClickhouseClient: query = self.render_query(query, **query_args) if format: - query += f' FORMAT {format}' + query += f" FORMAT {format}" if timeout is None: timeout = self._timeout if echo: - print(sqlparse.format(query, reindent=True), '\n') + print(sqlparse.format(query, reindent=True), "\n") if dry_run: return None try: - response = self._session.post(self._url, - params={ - 'query': query, - }, - json=post_data, - timeout=timeout) + response = self._session.post( + self._url, + params={ + "query": query, + }, + json=post_data, + timeout=timeout, + ) response.raise_for_status() - if format in ('JSON', 'JSONCompact'): + if format in ("JSON", "JSONCompact"): return response.json() return response.text.strip() @@ -555,7 +567,9 @@ class ClickhouseClient: def render_query(self, query, **kwargs): env = jinja2.Environment() - env.globals['version_ge'] = lambda version: version_ge(self.clickhouse_version, version) + env.globals["version_ge"] = lambda version: version_ge( + self.clickhouse_version, version + ) template = env.from_string(query) return template.render(kwargs) @@ -578,11 +592,13 @@ class ClickhouseConfig: @classmethod def load(cls): - return ClickhouseConfig(cls._load_config('/var/lib/clickhouse/preprocessed_configs/config.xml')) + return ClickhouseConfig( + cls._load_config("/var/lib/clickhouse/preprocessed_configs/config.xml") + ) @staticmethod def _load_config(config_path): - with open(config_path, 'r') as file: + with open(config_path, "r") as file: return xmltodict.parse(file.read()) @classmethod @@ -591,8 +607,8 @@ class ClickhouseConfig: for key, value in list(config.items()): if isinstance(value, MutableMapping): cls._mask_secrets(config[key]) - elif key in ('password', 'secret_access_key', 'header', 'identity'): - config[key] = '*****' + elif key in ("password", "secret_access_key", "header", "identity"): + config[key] = "*****" class DiagnosticsData: @@ -603,53 +619,53 @@ class DiagnosticsData: def __init__(self, args): self.args = args self.host = args.host - self._sections = [{'section': None, 'data': {}}] + self._sections = [{"section": None, "data": {}}] def add_string(self, name, value, section=None): self._section(section)[name] = { - 'type': 'string', - 'value': value, + "type": "string", + "value": value, } def add_xml_document(self, name, document, section=None): self._section(section)[name] = { - 'type': 'xml', - 'value': document, + "type": "xml", + "value": document, } def add_query(self, name, query, result, section=None): self._section(section)[name] = { - 'type': 'query', - 'query': query, - 'result': result, + "type": "query", + "query": query, + "result": result, } def add_command(self, name, command, result, section=None): self._section(section)[name] = { - 'type': 'command', - 'command': command, - 'result': result, + "type": "command", + "command": command, + "result": result, } def dump(self, format): - if format.startswith('json'): + if format.startswith("json"): result = self._dump_json() - elif format.startswith('yaml'): + elif format.startswith("yaml"): result = self._dump_yaml() else: result = self._dump_wiki() - if format.endswith('.gz'): - compressor = gzip.GzipFile(mode='wb', fileobj=sys.stdout.buffer) + if format.endswith(".gz"): + compressor = gzip.GzipFile(mode="wb", fileobj=sys.stdout.buffer) compressor.write(result.encode()) else: print(result) def _section(self, name=None): - if self._sections[-1]['section'] != name: - self._sections.append({'section': name, 'data': {}}) + if self._sections[-1]["section"] != name: + self._sections.append({"section": name, "data": {}}) - return self._sections[-1]['data'] + return self._sections[-1]["data"] def _dump_json(self): """ @@ -669,85 +685,85 @@ class DiagnosticsData: """ def _write_title(buffer, value): - buffer.write(f'### {value}\n') + buffer.write(f"### {value}\n") def _write_subtitle(buffer, value): - buffer.write(f'#### {value}\n') + buffer.write(f"#### {value}\n") def _write_string_item(buffer, name, item): - value = item['value'] - if value != '': - value = f'**{value}**' - buffer.write(f'{name}: {value}\n') + value = item["value"] + if value != "": + value = f"**{value}**" + buffer.write(f"{name}: {value}\n") def _write_xml_item(buffer, section_name, name, item): if section_name: - buffer.write(f'##### {name}\n') + buffer.write(f"##### {name}\n") else: _write_subtitle(buffer, name) - _write_result(buffer, item['value'], format='XML') + _write_result(buffer, item["value"], format="XML") def _write_query_item(buffer, section_name, name, item): if section_name: - buffer.write(f'##### {name}\n') + buffer.write(f"##### {name}\n") else: _write_subtitle(buffer, name) - _write_query(buffer, item['query']) - _write_result(buffer, item['result']) + _write_query(buffer, item["query"]) + _write_result(buffer, item["result"]) def _write_command_item(buffer, section_name, name, item): if section_name: - buffer.write(f'##### {name}\n') + buffer.write(f"##### {name}\n") else: _write_subtitle(buffer, name) - _write_command(buffer, item['command']) - _write_result(buffer, item['result']) + _write_command(buffer, item["command"]) + _write_result(buffer, item["result"]) def _write_unknown_item(buffer, section_name, name, item): if section_name: - buffer.write(f'**{name}**\n') + buffer.write(f"**{name}**\n") else: _write_subtitle(buffer, name) json.dump(item, buffer, indent=2) def _write_query(buffer, query): - buffer.write('**query**\n') - buffer.write('```sql\n') + buffer.write("**query**\n") + buffer.write("```sql\n") buffer.write(query) - buffer.write('\n```\n') + buffer.write("\n```\n") def _write_command(buffer, command): - buffer.write('**command**\n') - buffer.write('```\n') + buffer.write("**command**\n") + buffer.write("```\n") buffer.write(command) - buffer.write('\n```\n') + buffer.write("\n```\n") def _write_result(buffer, result, format=None): - buffer.write('**result**\n') - buffer.write(f'```{format}\n' if format else '```\n') + buffer.write("**result**\n") + buffer.write(f"```{format}\n" if format else "```\n") buffer.write(result) - buffer.write('\n```\n') + buffer.write("\n```\n") buffer = io.StringIO() - _write_title(buffer, f'Diagnostics data for host {self.host}') + _write_title(buffer, f"Diagnostics data for host {self.host}") for section in self._sections: - section_name = section['section'] + section_name = section["section"] if section_name: _write_subtitle(buffer, section_name) - for name, item in section['data'].items(): - if item['type'] == 'string': + for name, item in section["data"].items(): + if item["type"] == "string": _write_string_item(buffer, name, item) - elif item['type'] == 'query': + elif item["type"] == "query": _write_query_item(buffer, section_name, name, item) - elif item['type'] == 'command': + elif item["type"] == "command": _write_command_item(buffer, section_name, name, item) - elif item['type'] == 'xml': + elif item["type"] == "xml": _write_xml_item(buffer, section_name, name, item) else: _write_unknown_item(buffer, section_name, name, item) @@ -760,126 +776,196 @@ def main(): Program entry point. """ args = parse_args() - timestamp = datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S') - client = ClickhouseClient(host=args.host, port=args.port, user=args.user, password=args.password) + timestamp = datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M:%S") + client = ClickhouseClient( + host=args.host, port=args.port, user=args.user, password=args.password + ) ch_config = ClickhouseConfig.load() version = client.clickhouse_version - system_tables = [row[0] for row in execute_query(client, SELECT_SYSTEM_TABLES, format='JSONCompact')['data']] + system_tables = [ + row[0] + for row in execute_query(client, SELECT_SYSTEM_TABLES, format="JSONCompact")[ + "data" + ] + ] diagnostics = DiagnosticsData(args) - diagnostics.add_string('Version', version) - diagnostics.add_string('Timestamp', timestamp) - diagnostics.add_string('Uptime', execute_query(client, SELECT_UPTIME)) + diagnostics.add_string("Version", version) + diagnostics.add_string("Timestamp", timestamp) + diagnostics.add_string("Uptime", execute_query(client, SELECT_UPTIME)) - diagnostics.add_xml_document('ClickHouse configuration', ch_config.dump()) + diagnostics.add_xml_document("ClickHouse configuration", ch_config.dump()) - if version_ge(version, '20.8'): - add_query(diagnostics, 'Access configuration', - client=client, - query=SELECT_ACCESS, - format='TSVRaw') - add_query(diagnostics, 'Quotas', - client=client, - query=SELECT_QUOTA_USAGE, - format='Vertical') + if version_ge(version, "20.8"): + add_query( + diagnostics, + "Access configuration", + client=client, + query=SELECT_ACCESS, + format="TSVRaw", + ) + add_query( + diagnostics, + "Quotas", + client=client, + query=SELECT_QUOTA_USAGE, + format="Vertical", + ) - add_query(diagnostics, 'Database engines', - client=client, - query=SELECT_DATABASE_ENGINES, - format='PrettyCompactNoEscapes', - section='Schema') - add_query(diagnostics, 'Databases (top 10 by size)', - client=client, - query=SELECT_DATABASES, - format='PrettyCompactNoEscapes', - section='Schema') - add_query(diagnostics, 'Table engines', - client=client, - query=SELECT_TABLE_ENGINES, - format='PrettyCompactNoEscapes', - section='Schema') - add_query(diagnostics, 'Dictionaries', - client=client, - query=SELECT_DICTIONARIES, - format='PrettyCompactNoEscapes', - section='Schema') + add_query( + diagnostics, + "Database engines", + client=client, + query=SELECT_DATABASE_ENGINES, + format="PrettyCompactNoEscapes", + section="Schema", + ) + add_query( + diagnostics, + "Databases (top 10 by size)", + client=client, + query=SELECT_DATABASES, + format="PrettyCompactNoEscapes", + section="Schema", + ) + add_query( + diagnostics, + "Table engines", + client=client, + query=SELECT_TABLE_ENGINES, + format="PrettyCompactNoEscapes", + section="Schema", + ) + add_query( + diagnostics, + "Dictionaries", + client=client, + query=SELECT_DICTIONARIES, + format="PrettyCompactNoEscapes", + section="Schema", + ) - add_query(diagnostics, 'Replicated tables (top 10 by absolute delay)', - client=client, - query=SELECT_REPLICAS, - format='PrettyCompactNoEscapes', - section='Replication') - add_query(diagnostics, 'Replication queue (top 20 oldest tasks)', - client=client, - query=SELECT_REPLICATION_QUEUE, - format='Vertical', - section='Replication') - if version_ge(version, '21.3'): - add_query(diagnostics, 'Replicated fetches', - client=client, - query=SELECT_REPLICATED_FETCHES, - format='Vertical', - section='Replication') + add_query( + diagnostics, + "Replicated tables (top 10 by absolute delay)", + client=client, + query=SELECT_REPLICAS, + format="PrettyCompactNoEscapes", + section="Replication", + ) + add_query( + diagnostics, + "Replication queue (top 20 oldest tasks)", + client=client, + query=SELECT_REPLICATION_QUEUE, + format="Vertical", + section="Replication", + ) + if version_ge(version, "21.3"): + add_query( + diagnostics, + "Replicated fetches", + client=client, + query=SELECT_REPLICATED_FETCHES, + format="Vertical", + section="Replication", + ) - add_query(diagnostics, 'Top 10 tables by max parts per partition', - client=client, - query=SELECT_PARTS_PER_TABLE, - format='PrettyCompactNoEscapes') - add_query(diagnostics, 'Merges in progress', - client=client, - query=SELECT_MERGES, - format='Vertical') - add_query(diagnostics, 'Mutations in progress', - client=client, - query=SELECT_MUTATIONS, - format='Vertical') - add_query(diagnostics, 'Recent data parts (modification time within last 3 minutes)', - client=client, - query=SELECT_RECENT_DATA_PARTS, - format='Vertical') + add_query( + diagnostics, + "Top 10 tables by max parts per partition", + client=client, + query=SELECT_PARTS_PER_TABLE, + format="PrettyCompactNoEscapes", + ) + add_query( + diagnostics, + "Merges in progress", + client=client, + query=SELECT_MERGES, + format="Vertical", + ) + add_query( + diagnostics, + "Mutations in progress", + client=client, + query=SELECT_MUTATIONS, + format="Vertical", + ) + add_query( + diagnostics, + "Recent data parts (modification time within last 3 minutes)", + client=client, + query=SELECT_RECENT_DATA_PARTS, + format="Vertical", + ) - add_query(diagnostics, 'system.detached_parts', - client=client, - query=SELECT_DETACHED_DATA_PARTS, - format='PrettyCompactNoEscapes', - section='Detached data') - add_command(diagnostics, 'Disk space usage', - command='du -sh -L -c /var/lib/clickhouse/data/*/*/detached/* | sort -rsh', - section='Detached data') + add_query( + diagnostics, + "system.detached_parts", + client=client, + query=SELECT_DETACHED_DATA_PARTS, + format="PrettyCompactNoEscapes", + section="Detached data", + ) + add_command( + diagnostics, + "Disk space usage", + command="du -sh -L -c /var/lib/clickhouse/data/*/*/detached/* | sort -rsh", + section="Detached data", + ) - add_query(diagnostics, 'Queries in progress (process list)', - client=client, - query=SELECT_PROCESSES, - format='Vertical', - section='Queries') - add_query(diagnostics, 'Top 10 queries by duration', - client=client, - query=SELECT_TOP_QUERIES_BY_DURATION, - format='Vertical', - section='Queries') - add_query(diagnostics, 'Top 10 queries by memory usage', - client=client, - query=SELECT_TOP_QUERIES_BY_MEMORY_USAGE, - format='Vertical', - section='Queries') - add_query(diagnostics, 'Last 10 failed queries', - client=client, - query=SELECT_FAILED_QUERIES, - format='Vertical', - section='Queries') + add_query( + diagnostics, + "Queries in progress (process list)", + client=client, + query=SELECT_PROCESSES, + format="Vertical", + section="Queries", + ) + add_query( + diagnostics, + "Top 10 queries by duration", + client=client, + query=SELECT_TOP_QUERIES_BY_DURATION, + format="Vertical", + section="Queries", + ) + add_query( + diagnostics, + "Top 10 queries by memory usage", + client=client, + query=SELECT_TOP_QUERIES_BY_MEMORY_USAGE, + format="Vertical", + section="Queries", + ) + add_query( + diagnostics, + "Last 10 failed queries", + client=client, + query=SELECT_FAILED_QUERIES, + format="Vertical", + section="Queries", + ) - add_query(diagnostics, 'Stack traces', - client=client, - query=SELECT_STACK_TRACES, - format='Vertical') + add_query( + diagnostics, + "Stack traces", + client=client, + query=SELECT_STACK_TRACES, + format="Vertical", + ) - if 'crash_log' in system_tables: - add_query(diagnostics, 'Crash log', - client=client, - query=SELECT_CRASH_LOG, - format='Vertical') + if "crash_log" in system_tables: + add_query( + diagnostics, + "Crash log", + client=client, + query=SELECT_CRASH_LOG, + format="Vertical", + ) - add_command(diagnostics, 'uname', 'uname -a') + add_command(diagnostics, "uname", "uname -a") diagnostics.dump(args.format) @@ -889,29 +975,34 @@ def parse_args(): Parse command-line arguments. """ parser = argparse.ArgumentParser() - parser.add_argument('--format', - choices=['json', 'yaml', 'json.gz', 'yaml.gz', 'wiki', 'wiki.gz'], - default='wiki') - parser.add_argument('--normalize-queries', - action='store_true', - default=False) - parser.add_argument('--host', dest="host", help="clickhouse host") - parser.add_argument('--port', dest="port", default=8123, help="clickhouse http port") - parser.add_argument('--user', dest="user", default="default", help="clickhouse user") - parser.add_argument('--password', dest="password", help="clickhouse password") + parser.add_argument( + "--format", + choices=["json", "yaml", "json.gz", "yaml.gz", "wiki", "wiki.gz"], + default="wiki", + ) + parser.add_argument("--normalize-queries", action="store_true", default=False) + parser.add_argument("--host", dest="host", help="clickhouse host") + parser.add_argument( + "--port", dest="port", default=8123, help="clickhouse http port" + ) + parser.add_argument( + "--user", dest="user", default="default", help="clickhouse user" + ) + parser.add_argument("--password", dest="password", help="clickhouse password") return parser.parse_args() def add_query(diagnostics, name, client, query, format, section=None): query_args = { - 'normalize_queries': diagnostics.args.normalize_queries, + "normalize_queries": diagnostics.args.normalize_queries, } query = client.render_query(query, **query_args) diagnostics.add_query( name=name, query=query, result=execute_query(client, query, render_query=False, format=format), - section=section) + section=section, + ) def execute_query(client, query, render_query=True, format=None): @@ -926,14 +1017,18 @@ def execute_query(client, query, render_query=True, format=None): def add_command(diagnostics, name, command, section=None): diagnostics.add_command( - name=name, - command=command, - result=execute_command(command), - section=section) + name=name, command=command, result=execute_command(command), section=section + ) def execute_command(command, input=None): - proc = subprocess.Popen(command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + proc = subprocess.Popen( + command, + shell=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) if isinstance(input, str): input = input.encode() @@ -941,7 +1036,7 @@ def execute_command(command, input=None): stdout, stderr = proc.communicate(input=input) if proc.returncode: - return f'failed with exit code {proc.returncode}\n{stderr.decode()}' + return f"failed with exit code {proc.returncode}\n{stderr.decode()}" return stdout.decode() @@ -957,8 +1052,8 @@ def parse_version(version): """ Parse version string. """ - return [int(x) for x in version.strip().split('.') if x.isnumeric()] + return [int(x) for x in version.strip().split(".") if x.isnumeric()] -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/utils/keeper-overload/keeper-overload.py b/utils/keeper-overload/keeper-overload.py index bdb4563c713..0a059b10588 100755 --- a/utils/keeper-overload/keeper-overload.py +++ b/utils/keeper-overload/keeper-overload.py @@ -166,7 +166,7 @@ def main(args): keeper_bench_path = args.keeper_bench_path keepers = [] - for (port, server_id) in zip(PORTS, SERVER_IDS): + for port, server_id in zip(PORTS, SERVER_IDS): keepers.append( Keeper( keeper_binary_path, server_id, port, workdir, args.with_thread_fuzzer diff --git a/utils/s3tools/s3uploader b/utils/s3tools/s3uploader index 33db76f57f4..d53661614c0 100755 --- a/utils/s3tools/s3uploader +++ b/utils/s3tools/s3uploader @@ -28,39 +28,48 @@ class S3API(object): bucket = self.connection.get_bucket(bucket_name) key = bucket.initiate_multipart_upload(s3_path) logging.info("Will upload to s3 path %s", s3_path) - chunksize = 1024 * 1024 * 1024 # 1 GB + chunksize = 1024 * 1024 * 1024 # 1 GB filesize = os.stat(file_path).st_size logging.info("File size is %s", filesize) chunkcount = int(math.ceil(filesize / chunksize)) def call_back(x, y): print("Uploaded {}/{} bytes".format(x, y)) + try: for i in range(chunkcount + 1): logging.info("Uploading chunk %s of %s", i, chunkcount + 1) offset = chunksize * i bytes_size = min(chunksize, filesize - offset) - with open(file_path, 'r') as fp: + with open(file_path, "r") as fp: fp.seek(offset) - key.upload_part_from_file(fp=fp, part_num=i+1, - size=bytes_size, cb=call_back, - num_cb=100) + key.upload_part_from_file( + fp=fp, part_num=i + 1, size=bytes_size, cb=call_back, num_cb=100 + ) key.complete_upload() except Exception as ex: key.cancel_upload() raise ex logging.info("Contents were set") return "https://{bucket}.{mds_url}/{path}".format( - bucket=bucket_name, mds_url=self.mds_url, path=s3_path) + bucket=bucket_name, mds_url=self.mds_url, path=s3_path + ) def set_file_contents(self, bucket, local_file_path, s3_file_path): key = Key(bucket) key.key = s3_file_path file_size = os.stat(local_file_path).st_size - logging.info("Uploading file `%s` to `%s`. Size is %s", local_file_path, s3_file_path, file_size) + logging.info( + "Uploading file `%s` to `%s`. Size is %s", + local_file_path, + s3_file_path, + file_size, + ) + def call_back(x, y): print("Uploaded {}/{} bytes".format(x, y)) + key.set_contents_from_filename(local_file_path, cb=call_back) def upload_data_for_static_files_disk(self, bucket_name, directory_path, s3_path): @@ -74,12 +83,14 @@ class S3API(object): path = root.split(os.sep) for file in files: local_file_path = os.path.join(root, file) - s3_file = local_file_path[len(directory_path) + 1:] + s3_file = local_file_path[len(directory_path) + 1 :] s3_file_path = os.path.join(s3_path, s3_file) self.set_file_contents(bucket, local_file_path, s3_file_path) logging.info("Uploading finished") - return "https://{bucket}.{mds_url}/{path}".format(bucket=bucket_name, mds_url=self.mds_url, path=s3_path) + return "https://{bucket}.{mds_url}/{path}".format( + bucket=bucket_name, mds_url=self.mds_url, path=s3_path + ) def list_bucket_keys(self, bucket_name): bucket = self.connection.get_bucket(bucket_name) @@ -91,100 +102,119 @@ class S3API(object): bucket.get_all_keys() for obj in bucket.get_all_keys(): if obj.key.startswith(folder_path): - print('Removing ' + obj.key) + print("Removing " + obj.key) obj.delete() -def make_tar_file_for_table(clickhouse_data_path, db_name, table_name, - tmp_prefix): - - relative_data_path = os.path.join('data', db_name, table_name) - relative_meta_path = os.path.join('metadata', db_name, table_name + '.sql') +def make_tar_file_for_table(clickhouse_data_path, db_name, table_name, tmp_prefix): + relative_data_path = os.path.join("data", db_name, table_name) + relative_meta_path = os.path.join("metadata", db_name, table_name + ".sql") path_to_data = os.path.join(clickhouse_data_path, relative_data_path) path_to_metadata = os.path.join(clickhouse_data_path, relative_meta_path) - temporary_file_name = tmp_prefix + '/{tname}.tar'.format(tname=table_name) + temporary_file_name = tmp_prefix + "/{tname}.tar".format(tname=table_name) with tarfile.open(temporary_file_name, "w") as bundle: bundle.add(path_to_data, arcname=relative_data_path) bundle.add(path_to_metadata, arcname=relative_meta_path) return temporary_file_name -USAGE_EXAMPLES = ''' +USAGE_EXAMPLES = """ examples: \t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --clickhouse-data-path /opt/clickhouse/ --table-name default.some_tbl --bucket-name some-bucket \t./s3uploader --dataset-name some_ds --access-key-id XXX --secret-access-key YYY --file-path some_ds.tsv.xz --bucket-name some-bucket --s3-path /path/to/ -''' +""" if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") parser = argparse.ArgumentParser( description="Simple tool for uploading datasets to clickhouse S3", - usage='%(prog)s [options] {}'.format(USAGE_EXAMPLES)) - parser.add_argument('--s3-api-url', default='s3.amazonaws.com') - parser.add_argument('--s3-common-url', default='s3.amazonaws.com') - parser.add_argument('--bucket-name', default='clickhouse-datasets') - parser.add_argument('--dataset-name', required=True, - help='Name of dataset, will be used in uploaded path') - parser.add_argument('--access-key-id', required=True) - parser.add_argument('--secret-access-key', required=True) - parser.add_argument('--clickhouse-data-path', - default='/var/lib/clickhouse/', - help='Path to clickhouse database on filesystem') - parser.add_argument('--s3-path', help='Path in s3, where to upload file') - parser.add_argument('--tmp-prefix', default='/tmp', - help='Prefix to store temporary downloaded file') + usage="%(prog)s [options] {}".format(USAGE_EXAMPLES), + ) + parser.add_argument("--s3-api-url", default="s3.amazonaws.com") + parser.add_argument("--s3-common-url", default="s3.amazonaws.com") + parser.add_argument("--bucket-name", default="clickhouse-datasets") + parser.add_argument( + "--dataset-name", + required=True, + help="Name of dataset, will be used in uploaded path", + ) + parser.add_argument("--access-key-id", required=True) + parser.add_argument("--secret-access-key", required=True) + parser.add_argument( + "--clickhouse-data-path", + default="/var/lib/clickhouse/", + help="Path to clickhouse database on filesystem", + ) + parser.add_argument("--s3-path", help="Path in s3, where to upload file") + parser.add_argument( + "--tmp-prefix", default="/tmp", help="Prefix to store temporary downloaded file" + ) data_group = parser.add_mutually_exclusive_group(required=True) - table_name_argument = data_group.add_argument('--table-name', - help='Name of table with database, if you are uploading partitions') - data_group.add_argument('--file-path', - help='Name of file, if you are uploading') - data_group.add_argument('--directory-path', help='Path to directory with files to upload') - data_group.add_argument('--list-directory', help='List s3 directory by --directory-path') - data_group.add_argument('--remove-directory', help='Remove s3 directory by --directory-path') + table_name_argument = data_group.add_argument( + "--table-name", + help="Name of table with database, if you are uploading partitions", + ) + data_group.add_argument("--file-path", help="Name of file, if you are uploading") + data_group.add_argument( + "--directory-path", help="Path to directory with files to upload" + ) + data_group.add_argument( + "--list-directory", help="List s3 directory by --directory-path" + ) + data_group.add_argument( + "--remove-directory", help="Remove s3 directory by --directory-path" + ) args = parser.parse_args() if args.table_name is not None and args.clickhouse_data_path is None: - raise argparse.ArgumentError(table_name_argument, - "You should specify --clickhouse-data-path to upload --table") + raise argparse.ArgumentError( + table_name_argument, + "You should specify --clickhouse-data-path to upload --table", + ) s3_conn = S3API( - args.access_key_id, args.secret_access_key, - args.s3_api_url, args.s3_common_url) + args.access_key_id, args.secret_access_key, args.s3_api_url, args.s3_common_url + ) - file_path = '' + file_path = "" directory_path = args.directory_path s3_path = args.s3_path if args.list_directory: s3_conn.list_bucket_keys(args.bucket_name) elif args.remove_directory: - print('Removing s3 path: ' + args.remove_directory) + print("Removing s3 path: " + args.remove_directory) s3_conn.remove_folder_from_bucket(args.bucket_name, args.remove_directory) elif args.directory_path is not None: - url = s3_conn.upload_data_for_static_files_disk(args.bucket_name, directory_path, s3_path) + url = s3_conn.upload_data_for_static_files_disk( + args.bucket_name, directory_path, s3_path + ) logging.info("Data uploaded: %s", url) else: - if args.table_name is not None: - if '.' not in args.table_name: - db_name = 'default' + if "." not in args.table_name: + db_name = "default" else: - db_name, table_name = args.table_name.split('.') + db_name, table_name = args.table_name.split(".") file_path = make_tar_file_for_table( - args.clickhouse_data_path, db_name, table_name, args.tmp_prefix) + args.clickhouse_data_path, db_name, table_name, args.tmp_prefix + ) else: file_path = args.file_path - if 'tsv' in file_path: + if "tsv" in file_path: s3_path = os.path.join( - args.dataset_name, 'tsv', os.path.basename(file_path)) + args.dataset_name, "tsv", os.path.basename(file_path) + ) if args.table_name is not None: s3_path = os.path.join( - args.dataset_name, 'partitions', os.path.basename(file_path)) + args.dataset_name, "partitions", os.path.basename(file_path) + ) elif args.s3_path is not None: s3_path = os.path.join( - args.dataset_name, args.s3_path, os.path.basename(file_path)) + args.dataset_name, args.s3_path, os.path.basename(file_path) + ) else: raise Exception("Don't know s3-path to upload") diff --git a/utils/test_history/test-history b/utils/test_history/test-history index fdd6c36e9dc..5f031af1d3a 100755 --- a/utils/test_history/test-history +++ b/utils/test_history/test-history @@ -11,13 +11,14 @@ from termcolor import colored import sys COLORMAP = { - "success": colored("success", 'green'), - "failure": colored("failure", 'red'), - "error": colored("error", 'red'), - "pending": colored("pending", 'yellow'), - "not run": colored("not run", 'white'), + "success": colored("success", "green"), + "failure": colored("failure", "red"), + "error": colored("error", "red"), + "pending": colored("pending", "yellow"), + "not run": colored("not run", "white"), } + def _filter_statuses(statuses): """ Squash statuses to latest state @@ -69,7 +70,7 @@ if __name__ == "__main__": date_since = datetime.datetime.strptime(args.since, "%Y-%m-%d %H:%M:%S") gh = Github(args.token) - repo = gh.get_repo('ClickHouse/ClickHouse') + repo = gh.get_repo("ClickHouse/ClickHouse") commits = get_commits(repo, date_since) longest_header = [] @@ -101,6 +102,6 @@ if __name__ == "__main__": result_data.append(current_result) if sys.stdout.isatty(): - longest_header = [colored(h, 'white', attrs=['bold']) for h in longest_header] + longest_header = [colored(h, "white", attrs=["bold"]) for h in longest_header] print(tabulate.tabulate(result_data, headers=longest_header, tablefmt="grid"))