diff --git a/.gitmodules b/.gitmodules index 67f181414e5..8bf4f6e8975 100644 --- a/.gitmodules +++ b/.gitmodules @@ -35,10 +35,9 @@ [submodule "contrib/unixodbc"] path = contrib/unixodbc url = https://github.com/ClickHouse/UnixODBC -[submodule "contrib/protobuf"] - path = contrib/protobuf - url = https://github.com/ClickHouse/protobuf - branch = v3.13.0.1 +[submodule "contrib/google-protobuf"] + path = contrib/google-protobuf + url = https://github.com/ClickHouse/google-protobuf.git [submodule "contrib/boost"] path = contrib/boost url = https://github.com/ClickHouse/boost diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 020fe1e1c5a..4a4ff9982ea 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -88,7 +88,7 @@ add_contrib (thrift-cmake thrift) # parquet/arrow/orc add_contrib (arrow-cmake arrow) # requires: snappy, thrift, double-conversion add_contrib (avro-cmake avro) # requires: snappy -add_contrib (protobuf-cmake protobuf) +add_contrib (google-protobuf-cmake google-protobuf) add_contrib (openldap-cmake openldap) add_contrib (grpc-cmake grpc) add_contrib (msgpack-c-cmake msgpack-c) @@ -156,7 +156,7 @@ add_contrib (libgsasl-cmake libgsasl) # requires krb5 add_contrib (librdkafka-cmake librdkafka) # requires: libgsasl add_contrib (nats-io-cmake nats-io) add_contrib (isa-l-cmake isa-l) -add_contrib (libhdfs3-cmake libhdfs3) # requires: protobuf, krb5, isa-l +add_contrib (libhdfs3-cmake libhdfs3) # requires: google-protobuf, krb5, isa-l add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift/avro/arrow/libhdfs3 add_contrib (cppkafka-cmake cppkafka) add_contrib (libpqxx-cmake libpqxx) diff --git a/contrib/google-protobuf b/contrib/google-protobuf new file mode 160000 index 00000000000..c47efe2d8f6 --- /dev/null +++ b/contrib/google-protobuf @@ -0,0 +1 @@ +Subproject commit c47efe2d8f6a60022b49ecd6cc23660687c8598f diff --git a/contrib/protobuf-cmake/CMakeLists.txt b/contrib/google-protobuf-cmake/CMakeLists.txt similarity index 97% rename from contrib/protobuf-cmake/CMakeLists.txt rename to contrib/google-protobuf-cmake/CMakeLists.txt index 5e22136fc1f..8afb86b25dd 100644 --- a/contrib/protobuf-cmake/CMakeLists.txt +++ b/contrib/google-protobuf-cmake/CMakeLists.txt @@ -5,7 +5,7 @@ if(NOT ENABLE_PROTOBUF) return() endif() -set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/protobuf/src") +set(Protobuf_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf/src") if(OS_FREEBSD AND SANITIZE STREQUAL "address") # ../contrib/protobuf/src/google/protobuf/arena_impl.h:45:10: fatal error: 'sanitizer/asan_interface.h' file not found # #include @@ -17,8 +17,8 @@ if(OS_FREEBSD AND SANITIZE STREQUAL "address") endif() endif() -set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/protobuf") -set(protobuf_binary_dir "${ClickHouse_BINARY_DIR}/contrib/protobuf") +set(protobuf_source_dir "${ClickHouse_SOURCE_DIR}/contrib/google-protobuf") +set(protobuf_binary_dir "${ClickHouse_BINARY_DIR}/contrib/google-protobuf") add_definitions(-DGOOGLE_PROTOBUF_CMAKE_BUILD) @@ -35,7 +35,6 @@ set(libprotobuf_lite_files ${protobuf_source_dir}/src/google/protobuf/arena.cc ${protobuf_source_dir}/src/google/protobuf/arenastring.cc ${protobuf_source_dir}/src/google/protobuf/extension_set.cc - ${protobuf_source_dir}/src/google/protobuf/field_access_listener.cc ${protobuf_source_dir}/src/google/protobuf/generated_enum_util.cc ${protobuf_source_dir}/src/google/protobuf/generated_message_table_driven_lite.cc ${protobuf_source_dir}/src/google/protobuf/generated_message_util.cc @@ -86,6 +85,7 @@ set(libprotobuf_files ${protobuf_source_dir}/src/google/protobuf/empty.pb.cc ${protobuf_source_dir}/src/google/protobuf/extension_set_heavy.cc ${protobuf_source_dir}/src/google/protobuf/field_mask.pb.cc + ${protobuf_source_dir}/src/google/protobuf/generated_message_bases.cc ${protobuf_source_dir}/src/google/protobuf/generated_message_reflection.cc ${protobuf_source_dir}/src/google/protobuf/generated_message_table_driven.cc ${protobuf_source_dir}/src/google/protobuf/io/gzip_stream.cc @@ -316,7 +316,7 @@ else () add_dependencies(protoc "${PROTOC_BUILD_DIR}/protoc") endif () -include("${ClickHouse_SOURCE_DIR}/contrib/protobuf-cmake/protobuf_generate.cmake") +include("${ClickHouse_SOURCE_DIR}/contrib/google-protobuf-cmake/protobuf_generate.cmake") add_library(_protobuf INTERFACE) target_link_libraries(_protobuf INTERFACE _libprotobuf) diff --git a/contrib/protobuf-cmake/protobuf_generate.cmake b/contrib/google-protobuf-cmake/protobuf_generate.cmake similarity index 100% rename from contrib/protobuf-cmake/protobuf_generate.cmake rename to contrib/google-protobuf-cmake/protobuf_generate.cmake diff --git a/contrib/protobuf b/contrib/protobuf deleted file mode 160000 index 6bb70196c53..00000000000 --- a/contrib/protobuf +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 6bb70196c5360268d9f021bb7936fb0b551724c2 diff --git a/docker/test/unit/run.sh b/docker/test/unit/run.sh index abc35fa40d2..a4784466e27 100644 --- a/docker/test/unit/run.sh +++ b/docker/test/unit/run.sh @@ -3,5 +3,5 @@ set -x service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test ''; -gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt +timeout 40m gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt ./process_unit_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index 1f1c996d4bf..f7cc52e622e 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -119,7 +119,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree]( The data of TIME type in MySQL is converted to microseconds in ClickHouse. -Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws exception "Unhandled data type" and stops replication. +Other types are not supported. If MySQL table contains a column of such type, ClickHouse throws an exception and stops replication. ## Specifics and Recommendations {#specifics-and-recommendations} diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md index 08e9f998626..33d75dc9582 100644 --- a/docs/en/engines/database-engines/materialized-postgresql.md +++ b/docs/en/engines/database-engines/materialized-postgresql.md @@ -55,7 +55,7 @@ ATTACH TABLE postgres_database.new_table; ``` :::warning -Before version 22.1, adding a table to replication left an unremoved temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1. +Before version 22.1, adding a table to replication left a non-removed temporary replication slot (named `{db_name}_ch_replication_slot_tmp`). If attaching tables in ClickHouse version before 22.1, make sure to delete it manually (`SELECT pg_drop_replication_slot('{db_name}_ch_replication_slot_tmp')`). Otherwise disk usage will grow. This issue is fixed in 22.1. ::: ## Dynamically removing tables from replication {#dynamically-removing-table-from-replication} @@ -257,7 +257,7 @@ Please note that this should be used only if it is actually needed. If there is 1. [CREATE PUBLICATION](https://postgrespro.ru/docs/postgresql/14/sql-createpublication) -- create query privilege. -2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privelege. +2. [CREATE_REPLICATION_SLOT](https://postgrespro.ru/docs/postgrespro/10/protocol-replication#PROTOCOL-REPLICATION-CREATE-SLOT) -- replication privilege. 3. [pg_drop_replication_slot](https://postgrespro.ru/docs/postgrespro/9.5/functions-admin#functions-replication) -- replication privilege or superuser. diff --git a/docs/en/engines/database-engines/sqlite.md b/docs/en/engines/database-engines/sqlite.md index fc2a6525a68..0fa3c0fff58 100644 --- a/docs/en/engines/database-engines/sqlite.md +++ b/docs/en/engines/database-engines/sqlite.md @@ -30,7 +30,7 @@ Allows to connect to [SQLite](https://www.sqlite.org/index.html) database and pe ## Specifics and Recommendations {#specifics-and-recommendations} -SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multitasked. +SQLite stores the entire database (definitions, tables, indices, and the data itself) as a single cross-platform file on a host machine. During writing SQLite locks the entire database file, therefore write operations are performed sequentially. Read operations can be multi-tasked. SQLite does not require service management (such as startup scripts) or access control based on `GRANT` and passwords. Access control is handled by means of file-system permissions given to the database file itself. ## Usage Example {#usage-example} diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index b9db0fae68f..08cd88826e5 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -156,7 +156,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us | rpc\_client\_connect\_timeout | 600 * 1000 | | rpc\_client\_read\_timeout | 3600 * 1000 | | rpc\_client\_write\_timeout | 3600 * 1000 | -| rpc\_client\_socekt\_linger\_timeout | -1 | +| rpc\_client\_socket\_linger\_timeout | -1 | | rpc\_client\_connect\_retry | 10 | | rpc\_client\_timeout | 3600 * 1000 | | dfs\_default\_replica | 3 | @@ -176,7 +176,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us | output\_write\_timeout | 3600 * 1000 | | output\_close\_timeout | 3600 * 1000 | | output\_packetpool\_size | 1024 | -| output\_heeartbeat\_interval | 10 * 1000 | +| output\_heartbeat\_interval | 10 * 1000 | | dfs\_client\_failover\_max\_attempts | 15 | | dfs\_client\_read\_shortcircuit\_streams\_cache\_size | 256 | | dfs\_client\_socketcache\_expiryMsec | 3000 | diff --git a/docs/en/engines/table-engines/integrations/hive.md b/docs/en/engines/table-engines/integrations/hive.md index adcb73605bb..5d10e417ae3 100644 --- a/docs/en/engines/table-engines/integrations/hive.md +++ b/docs/en/engines/table-engines/integrations/hive.md @@ -6,7 +6,7 @@ sidebar_label: Hive # Hive -The Hive engine allows you to perform `SELECT` quries on HDFS Hive table. Currently it supports input formats as below: +The Hive engine allows you to perform `SELECT` queries on HDFS Hive table. Currently it supports input formats as below: - Text: only supports simple scalar column types except `binary` diff --git a/docs/en/engines/table-engines/integrations/nats.md b/docs/en/engines/table-engines/integrations/nats.md index 7f09c516d6f..a82d74e0d95 100644 --- a/docs/en/engines/table-engines/integrations/nats.md +++ b/docs/en/engines/table-engines/integrations/nats.md @@ -10,7 +10,7 @@ This engine allows integrating ClickHouse with [NATS](https://nats.io/). `NATS` lets you: -- Publish or subcribe to message subjects. +- Publish or subscribe to message subjects. - Process new messages as they become available. ## Creating a Table {#table_engine-redisstreams-creating-a-table} @@ -46,7 +46,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Required parameters: - `nats_url` – host:port (for example, `localhost:5672`).. -- `nats_subjects` – List of subject for NATS table to subscribe/publsh to. Supports wildcard subjects like `foo.*.bar` or `baz.>` +- `nats_subjects` – List of subject for NATS table to subscribe/publish to. Supports wildcard subjects like `foo.*.bar` or `baz.>` - `nats_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. Optional parameters: diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index f27d4d48f75..51b3048706f 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -57,7 +57,7 @@ or via config (since version 21.11): ``` -Some parameters can be overriden by key value arguments: +Some parameters can be overridden by key value arguments: ``` sql SELECT * FROM postgresql(postgres1, schema='schema1', table='table1'); ``` diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 595bc0c344f..f2eaacd92a5 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -23,7 +23,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32) - `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. - `format` — The [format](../../../interfaces/formats.md#formats) of the file. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). -- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension. +- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will auto-detect compression by file extension. ### PARTITION BY @@ -140,8 +140,8 @@ The following settings can be set before query execution or placed into configur - `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). - `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. - `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`. -- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. DEfault value us `500`. -- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurenly for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each inflight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enought, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file. +- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. Default value us `500`. +- `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each in-flight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enough, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file. Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md index f841f157376..03617a1a709 100644 --- a/docs/en/engines/table-engines/mergetree-family/annindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md @@ -78,7 +78,7 @@ ENGINE = MergeTree ORDER BY id; ``` -With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyperparameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes. +With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyper parameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes. As the indexes are built only during insertions into table, `INSERT` and `OPTIMIZE` queries are slower than for ordinary table. At this stage indexes remember all the information about the given data. ANNIndexes should be used if you have immutable or rarely changed data and many read requests. @@ -135,7 +135,7 @@ ORDER BY id; Annoy supports `L2Distance` and `cosineDistance`. -In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time tradeoff between better accuracy and speed. +In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time trade-off between better accuracy and speed. __Example__: ``` sql diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md index edb320a2507..7e564b23676 100644 --- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -165,7 +165,7 @@ Performance of such a query heavily depends on the table layout. Because of that The key factors for a good performance: -- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will underutilize the machine +- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will under-utilize the machine - partitions shouldn't be too small, so batch processing won't degenerate into row-by-row processing - partitions should be comparable in size, so all threads will do roughly the same amount of work diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 79ced0b6ce5..07f706af91d 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -779,7 +779,7 @@ Disks, volumes and storage policies should be declared inside the ` @@ -1224,7 +1224,7 @@ Limit parameters (mainly for internal usage): * `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage. * `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage. * `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated. -* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurenly for one object. +* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. Other parameters: * `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks//`. diff --git a/docs/en/engines/table-engines/special/executable.md b/docs/en/engines/table-engines/special/executable.md index 25049d7b46e..d976beeab6c 100644 --- a/docs/en/engines/table-engines/special/executable.md +++ b/docs/en/engines/table-engines/special/executable.md @@ -65,7 +65,7 @@ if __name__ == "__main__": main() ``` -The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings everytime you run a `SELECT` from `my_executable_table`: +The following `my_executable_table` is built from the output of `my_script.py`, which will generate 10 random strings every time you run a `SELECT` from `my_executable_table`: ```sql CREATE TABLE my_executable_table ( @@ -223,4 +223,4 @@ SETTINGS pool_size = 4; ``` -ClickHouse will maintain 4 processes on-demand when your client queries the `sentiment_pooled` table. \ No newline at end of file +ClickHouse will maintain 4 processes on-demand when your client queries the `sentiment_pooled` table. diff --git a/docs/en/engines/table-engines/special/keepermap.md b/docs/en/engines/table-engines/special/keepermap.md index a1c7009b712..6ce56adbae9 100644 --- a/docs/en/engines/table-engines/special/keepermap.md +++ b/docs/en/engines/table-engines/special/keepermap.md @@ -72,7 +72,7 @@ Additionally, number of keys will have a soft limit of 4 for the number of keys. If multiple tables are created on the same ZooKeeper path, the values are persisted until there exists at least 1 table using it. As a result, it is possible to use `ON CLUSTER` clause when creating the table and sharing the data from multiple ClickHouse instances. -Of course, it's possible to manually run `CREATE TABLE` with same path on nonrelated ClickHouse instances to have same data sharing effect. +Of course, it's possible to manually run `CREATE TABLE` with same path on unrelated ClickHouse instances to have same data sharing effect. ## Supported operations {#table_engine-KeeperMap-supported-operations} diff --git a/docs/en/getting-started/example-datasets/amazon-reviews.md b/docs/en/getting-started/example-datasets/amazon-reviews.md index f35806aa66f..75e4549cb78 100644 --- a/docs/en/getting-started/example-datasets/amazon-reviews.md +++ b/docs/en/getting-started/example-datasets/amazon-reviews.md @@ -87,7 +87,7 @@ ORDER BY (marketplace, review_date, product_category); 3. We are now ready to insert the data into ClickHouse. Before we do, check out the [list of files in the dataset](https://s3.amazonaws.com/amazon-reviews-pds/tsv/index.txt) and decide which ones you want to include. -4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of mulitple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`: +4. We will insert all of the US reviews - which is about 151M rows. The following `INSERT` command uses the `s3Cluster` table function, which allows the processing of multiple S3 files in parallel using all the nodes of your cluster. We also use a wildcard to insert any file that starts with the name `https://s3.amazonaws.com/amazon-reviews-pds/tsv/amazon_reviews_us_`: ```sql INSERT INTO amazon_reviews @@ -473,4 +473,4 @@ It runs quite a bit faster - which means the cache is helping us out here: └────────────┴───────────────────────────────────────────────────────────────────────┴────────────────────┴───────┘ 50 rows in set. Elapsed: 33.954 sec. Processed 150.96 million rows, 68.95 GB (4.45 million rows/s., 2.03 GB/s.) -``` \ No newline at end of file +``` diff --git a/docs/en/getting-started/example-datasets/cell-towers.md b/docs/en/getting-started/example-datasets/cell-towers.md index 048eecb285b..a84eb5d561f 100644 --- a/docs/en/getting-started/example-datasets/cell-towers.md +++ b/docs/en/getting-started/example-datasets/cell-towers.md @@ -317,7 +317,7 @@ To build a Superset dashboard using the OpenCelliD dataset you should: Make sure that you set **SSL** on when connecting to ClickHouse Cloud or other ClickHouse systems that enforce the use of SSL. ::: - ![Add ClickHouse as a Superset datasource](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png) + ![Add ClickHouse as a Superset data source](@site/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png) ### Add the table **cell_towers** as a Superset **dataset** @@ -364,5 +364,5 @@ The data is also available for interactive queries in the [Playground](https://p This [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=) will populate the username and even the query for you. -Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the hostname and port number). +Although you cannot create tables in the Playground, you can run all of the queries and even use Superset (adjust the host name and port number). ::: diff --git a/docs/en/getting-started/example-datasets/github.md b/docs/en/getting-started/example-datasets/github.md index 02965ed5e33..9ed8782e512 100644 --- a/docs/en/getting-started/example-datasets/github.md +++ b/docs/en/getting-started/example-datasets/github.md @@ -806,7 +806,7 @@ FROM 31 rows in set. Elapsed: 0.043 sec. Processed 7.54 million rows, 40.53 MB (176.71 million rows/s., 950.40 MB/s.) ``` -Maybe a little more near the end of the month, but overall we keep a good even distribution. Again this is unrealiable due to the filtering of the docs filter during data insertion. +Maybe a little more near the end of the month, but overall we keep a good even distribution. Again this is unreliable due to the filtering of the docs filter during data insertion. ## Authors with the most diverse impact @@ -940,7 +940,7 @@ LIMIT 10 10 rows in set. Elapsed: 0.106 sec. Processed 798.15 thousand rows, 13.97 MB (7.51 million rows/s., 131.41 MB/s.) ``` -This makes sense because Alexey has been responsible for maintaining the Change log. But what if we use the basename of the file to identify his popular files - this allows for renames and should focus on code contributions. +This makes sense because Alexey has been responsible for maintaining the Change log. But what if we use the base name of the file to identify his popular files - this allows for renames and should focus on code contributions. [play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBiYXNlLAogICAgY291bnQoKSBBUyBjCkZST00gZ2l0X2NsaWNraG91c2UuZmlsZV9jaGFuZ2VzCldIRVJFIChhdXRob3IgPSAnQWxleGV5IE1pbG92aWRvdicpIEFORCAoZmlsZV9leHRlbnNpb24gSU4gKCdoJywgJ2NwcCcsICdzcWwnKSkKR1JPVVAgQlkgYmFzZW5hbWUocGF0aCkgQVMgYmFzZQpPUkRFUiBCWSBjIERFU0MKTElNSVQgMTA=) diff --git a/docs/en/getting-started/example-datasets/opensky.md b/docs/en/getting-started/example-datasets/opensky.md index 7093a2df04f..df28809495c 100644 --- a/docs/en/getting-started/example-datasets/opensky.md +++ b/docs/en/getting-started/example-datasets/opensky.md @@ -9,7 +9,7 @@ The data in this dataset is derived and cleaned from the full OpenSky dataset to Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd -Martin Strohmeier, Xavier Olive, Jannis Lübbe, Matthias Schäfer, and Vincent Lenders +Martin Strohmeier, Xavier Olive, Jannis Luebbe, Matthias Schaefer, and Vincent Lenders "Crowdsourced air traffic data from the OpenSky Network 2019–2020" Earth System Science Data 13(2), 2021 https://doi.org/10.5194/essd-13-357-2021 diff --git a/docs/en/getting-started/example-datasets/reddit-comments.md b/docs/en/getting-started/example-datasets/reddit-comments.md index 02961a07730..4ede8297e13 100644 --- a/docs/en/getting-started/example-datasets/reddit-comments.md +++ b/docs/en/getting-started/example-datasets/reddit-comments.md @@ -542,7 +542,7 @@ LIMIT 10; 10 rows in set. Elapsed: 5.956 sec. Processed 14.69 billion rows, 126.19 GB (2.47 billion rows/s., 21.19 GB/s.) ``` -11. Let's see which subreddits had the biggest increase in commnents from 2018 to 2019: +11. Let's see which subreddits had the biggest increase in comments from 2018 to 2019: ```sql SELECT @@ -718,4 +718,3 @@ ORDER BY quarter ASC; └────────────┴────────────┴───────────┴──────────┘ 70 rows in set. Elapsed: 325.835 sec. Processed 14.69 billion rows, 2.57 TB (45.08 million rows/s., 7.87 GB/s.) -``` \ No newline at end of file diff --git a/docs/en/getting-started/example-datasets/youtube-dislikes.md b/docs/en/getting-started/example-datasets/youtube-dislikes.md index 5f4ef696b8b..e24c6e5a6dc 100644 --- a/docs/en/getting-started/example-datasets/youtube-dislikes.md +++ b/docs/en/getting-started/example-datasets/youtube-dislikes.md @@ -22,7 +22,7 @@ The steps below will easily work on a local install of ClickHouse too. The only ## Step-by-step instructions -1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the reult: +1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the result: ```sql DESCRIBE s3Cluster( @@ -322,7 +322,7 @@ ORDER BY month ASC; A spike of uploaders [around covid is noticeable](https://www.theverge.com/2020/3/27/21197642/youtube-with-me-style-videos-views-coronavirus-cook-workout-study-home-beauty). -### More subtitiles over time and when +### More subtitles over time and when With advances in speech recognition, it’s easier than ever to create subtitles for video with youtube adding auto-captioning in late 2009 - was the jump then? @@ -484,4 +484,4 @@ ARRAY JOIN │ 20th │ 16 │ │ 10th │ 6 │ └────────────┴─────────┘ -``` \ No newline at end of file +``` diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 82be1c10dcc..f93ab264511 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -275,9 +275,9 @@ Type: UInt64 Default: 1000 -## max_concurrent_insert_queries +## max_concurrent_queries -Limit on total number of concurrent insert queries. Zero means Unlimited. +Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited. :::note These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. @@ -287,9 +287,9 @@ Type: UInt64 Default: 0 -## max_concurrent_queries +## max_concurrent_insert_queries -Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited. +Limit on total number of concurrent insert queries. Zero means Unlimited. :::note These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. @@ -1277,49 +1277,6 @@ For more information, see the section [Creating replicated tables](../../engines ``` - -## max_concurrent_queries_for_user {#max-concurrent-queries-for-user} - -The maximum number of simultaneously processed queries related to MergeTree table per user. - -Possible values: - -- Positive integer. -- 0 — No limit. - -Default value: `0`. - -**Example** - -``` xml -5 -``` - -## max_concurrent_queries_for_all_users {#max-concurrent-queries-for-all-users} - -Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries. - -Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users and database administrator can set it to 100 for itself to run queries for investigation even when the server is overloaded. - -Modifying the setting for one query or user does not affect other queries. - -Possible values: - -- Positive integer. -- 0 — No limit. - -Default value: `0`. - -**Example** - -``` xml -99 -``` - -**See Also** - -- [max_concurrent_queries](#max-concurrent-queries) - ## max_open_files {#max-open-files} The maximum number of open files. @@ -1947,7 +1904,7 @@ Config fields: - `regexp` - RE2 compatible regular expression (mandatory) - `replace` - substitution string for sensitive data (optional, by default - six asterisks) -The masking rules are applied to the whole query (to prevent leaks of sensitive data from malformed / non-parsable queries). +The masking rules are applied to the whole query (to prevent leaks of sensitive data from malformed / non-parseable queries). `system.events` table have counter `QueryMaskingRulesMatch` which have an overall number of query masking rules matches. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 3b87b829c92..65038d3a256 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1182,7 +1182,7 @@ Possible values: - `bin` - as 16-bytes binary. - `str` - as a string of 36 bytes. -- `ext` - as extention with ExtType = 2. +- `ext` - as extension with ExtType = 2. Default value: `ext`. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 01aec76b6d3..6c951739d41 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -646,6 +646,48 @@ Used for the same purpose as `max_block_size`, but it sets the recommended block However, the block size cannot be more than `max_block_size` rows. By default: 1,000,000. It only works when reading from MergeTree engines. +## max_concurrent_queries_for_user {#max-concurrent-queries-for-user} + +The maximum number of simultaneously processed queries related to MergeTree table per user. + +Possible values: + +- Positive integer. +- 0 — No limit. + +Default value: `0`. + +**Example** + +``` xml +5 +``` + +## max_concurrent_queries_for_all_users {#max-concurrent-queries-for-all-users} + +Throw exception if the value of this setting is less or equal than the current number of simultaneously processed queries. + +Example: `max_concurrent_queries_for_all_users` can be set to 99 for all users and database administrator can set it to 100 for itself to run queries for investigation even when the server is overloaded. + +Modifying the setting for one query or user does not affect other queries. + +Possible values: + +- Positive integer. +- 0 — No limit. + +Default value: `0`. + +**Example** + +``` xml +99 +``` + +**See Also** + +- [max_concurrent_queries](/docs/en/operations/server-configuration-parameters/settings.md/#max_concurrent_queries) + ## merge_tree_min_rows_for_concurrent_read {#setting-merge-tree-min-rows-for-concurrent-read} If the number of rows to be read from a file of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `merge_tree_min_rows_for_concurrent_read` then ClickHouse tries to perform a concurrent reading from this file on several threads. @@ -1050,6 +1092,12 @@ Timeouts in seconds on the socket used for communicating with the client. Default value: 10, 300, 300. +## handshake_timeout_ms {#handshake-timeout-ms} + +Timeout in milliseconds for receiving Hello packet from replicas during handshake. + +Default value: 10000. + ## cancel_http_readonly_queries_on_client_close {#cancel-http-readonly-queries-on-client-close} Cancels HTTP read-only queries (e.g. SELECT) when a client closes the connection without waiting for the response. @@ -1107,7 +1155,7 @@ Default value: `0`. Could be used for throttling speed when replicating the data to add or replace new nodes. :::note -60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8). +60000000 bytes/s approximately corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8). ::: ## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server} @@ -1128,7 +1176,7 @@ Default value: `0`. Could be used for throttling speed when replicating the data to add or replace new nodes. :::note -60000000 bytes/s approximatly corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8). +60000000 bytes/s approximately corresponds to 457 Mbps (60000000 / 1024 / 1024 * 8). ::: ## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms} @@ -2030,7 +2078,7 @@ FORMAT PrettyCompactMonoBlock ## distributed_push_down_limit {#distributed-push-down-limit} -Enables or disables [LIMIT](#limit) applying on each shard separatelly. +Enables or disables [LIMIT](#limit) applying on each shard separately. This will allow to avoid: - Sending extra rows over network; @@ -2431,7 +2479,7 @@ Default value: 0. ## allow_introspection_functions {#settings-allow_introspection_functions} -Enables or disables [introspections functions](../../sql-reference/functions/introspection.md) for query profiling. +Enables or disables [introspection functions](../../sql-reference/functions/introspection.md) for query profiling. Possible values: @@ -3492,7 +3540,7 @@ Default value: `0`. ## database_replicated_initial_query_timeout_sec {#database_replicated_initial_query_timeout_sec} -Sets how long initial DDL query should wait for Replicated database to precess previous DDL queue entries in seconds. +Sets how long initial DDL query should wait for Replicated database to process previous DDL queue entries in seconds. Possible values: diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md index ccdc2d8c742..2915b053458 100644 --- a/docs/en/operations/system-tables/columns.md +++ b/docs/en/operations/system-tables/columns.md @@ -28,7 +28,7 @@ The `system.columns` table contains the following columns (the column type is sh - `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression. - `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name. - `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. -- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned. +- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned. - `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned. - `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned. - `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned. diff --git a/docs/en/operations/system-tables/dropped_tables.md b/docs/en/operations/system-tables/dropped_tables.md index 144c03109ac..e2a09094c87 100644 --- a/docs/en/operations/system-tables/dropped_tables.md +++ b/docs/en/operations/system-tables/dropped_tables.md @@ -12,7 +12,7 @@ Columns: - `table` ([String](../../sql-reference/data-types/string.md)) — Table name. - `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid. - `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name. -- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadate_dropped directory. +- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadata_dropped directory. - `table_dropped_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the next attempt to remove table's data is scheduled on. Usually it's the table when the table was dropped plus `database_atomic_delay_before_drop_table_sec` **Example** diff --git a/docs/en/operations/system-tables/information_schema.md b/docs/en/operations/system-tables/information_schema.md index 07e9a9e2f58..35fd3a753b5 100644 --- a/docs/en/operations/system-tables/information_schema.md +++ b/docs/en/operations/system-tables/information_schema.md @@ -43,7 +43,7 @@ Columns: - `data_type` ([String](../../sql-reference/data-types/string.md)) — Column type. - `character_maximum_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. - `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. -- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned. +- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bit width for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned. - `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned. - `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned. - `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned. diff --git a/docs/en/operations/system-tables/licenses.md b/docs/en/operations/system-tables/licenses.md index 9296e78c797..0f09d559d8b 100644 --- a/docs/en/operations/system-tables/licenses.md +++ b/docs/en/operations/system-tables/licenses.md @@ -3,7 +3,7 @@ slug: /en/operations/system-tables/licenses --- # licenses -Сontains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources. +Contains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources. Columns: diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index e61c6ed2ba4..9159d1e9284 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -100,7 +100,7 @@ Columns: - `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). :::note -The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simpliest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields. +The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simplest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields. ::: - `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the minimum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). diff --git a/docs/en/operations/system-tables/quotas.md b/docs/en/operations/system-tables/quotas.md index a9748a2b464..a72a306d6b7 100644 --- a/docs/en/operations/system-tables/quotas.md +++ b/docs/en/operations/system-tables/quotas.md @@ -14,8 +14,8 @@ Columns: - `['user_name']` — Connections with the same user name share the same quota. - `['ip_address']` — Connections from the same IP share the same quota. - `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota_key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header. - - `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `user_name`. - - `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `ip_address`. + - `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the quota is tracked for `user_name`. + - `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the quota is tracked for `ip_address`. - `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Time interval lengths in seconds. - `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows which users the quota is applied to. Values: - `0` — The quota applies to users specify in the `apply_to_list`. diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index 82e9fa206ea..e4461e14236 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -50,7 +50,7 @@ Columns: - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) - [Distributed](../../engines/table-engines/special/distributed.md#distributed) -- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underying `Buffer` table). +- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underlying `Buffer` table). - `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `NULL` (does not includes any underlying storage). diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index a5aae422be7..89d54adc30d 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -43,7 +43,7 @@ Columns: - `event` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) - For trace type `ProfileEvent` is the name of updated profile event, for other trace types is an empty string. -- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of incremnt of profile event, for other trace types is 0. +- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of increment of profile event, for other trace types is 0. **Example** diff --git a/docs/en/operations/system-tables/zookeeper_log.md b/docs/en/operations/system-tables/zookeeper_log.md index 970ed192a48..b7cc4e22cd6 100644 --- a/docs/en/operations/system-tables/zookeeper_log.md +++ b/docs/en/operations/system-tables/zookeeper_log.md @@ -33,7 +33,7 @@ Columns with request response parameters: - `zxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — ZooKeeper transaction ID. The serial number issued by the ZooKeeper server in response to a successfully executed request (`0` if the request was not executed/returned an error/the client does not know whether the request was executed). - `error` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — Error code. Can have many values, here are just some of them: - - `ZOK` — The request was executed seccessfully. + - `ZOK` — The request was executed successfully. - `ZCONNECTIONLOSS` — The connection was lost. - `ZOPERATIONTIMEOUT` — The request execution timeout has expired. - `ZSESSIONEXPIRED` — The session has expired. @@ -43,7 +43,7 @@ Columns with request response parameters: - `path_created` ([String](../../sql-reference/data-types/string.md)) — The path to the created ZooKeeper node (for responses to the `CREATE` request), may differ from the `path` if the node is created as a `sequential`. - `stat_czxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that caused this ZooKeeper node to be created. - `stat_mzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that last modified this ZooKeeper node. -- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified childern of this ZooKeeper node. +- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified children of this ZooKeeper node. - `stat_version` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the data of this ZooKeeper node. - `stat_cversion` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the children of this ZooKeeper node. - `stat_dataLength` ([Int32](../../sql-reference/data-types/int-uint.md)) — The length of the data field of this ZooKeeper node. diff --git a/docs/en/operations/utilities/clickhouse-keeper-client.md b/docs/en/operations/utilities/clickhouse-keeper-client.md new file mode 100644 index 00000000000..77f816fe428 --- /dev/null +++ b/docs/en/operations/utilities/clickhouse-keeper-client.md @@ -0,0 +1,53 @@ +--- +slug: /en/operations/utilities/clickhouse-keeper-client +sidebar_label: clickhouse-keeper-client +--- + +# clickhouse-keeper-client + +A client application to interact with clickhouse-keeper by its native protocol. + +## Keys {#clickhouse-keeper-client} + +- `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode. +- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. +- `-p N`, `--port=N` — Server port. Default value: 2181 +- `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s. +- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s. +- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s. +- `--history-file=FILE_PATH` — Set path of history file. Default value: `~/.keeper-client-history`. +- `--help` — Shows the help message. + +## Example {#clickhouse-keeper-client-example} + +```bash +./clickhouse-keeper-client -h localhost:2181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30 +Connected to ZooKeeper at [::1]:2181 with session_id 137 +/ :) ls +keeper foo bar +/ :) cd keeper +/keeper :) ls +api_version +/keeper :) cd api_version +/keeper/api_version :) ls + +/keeper/api_version :) cd xyz +Path /keeper/api_version/xyz does not exists +/keeper/api_version :) cd ../../ +/ :) ls +keeper foo bar +/ :) get keeper/api_version +2 +``` + +## Commands {#clickhouse-keeper-client-commands} + +- `ls [path]` -- Lists the nodes for the given path (default: cwd) +- `cd [path]` -- Change the working path (default `.`) +- `set [version]` -- Updates the node's value. Only update if version matches (default: -1) +- `create ` -- Creates new node +- `get ` -- Returns the node's value +- `remove ` -- Remove the node +- `rmr ` -- Recursively deletes path. Confirmation required +- `flwc ` -- Executes four-letter-word command +- `help` -- Prints this message diff --git a/docs/en/operations/utilities/clickhouse-obfuscator.md b/docs/en/operations/utilities/clickhouse-obfuscator.md index 077489ba76e..ad51e9c7776 100644 --- a/docs/en/operations/utilities/clickhouse-obfuscator.md +++ b/docs/en/operations/utilities/clickhouse-obfuscator.md @@ -24,7 +24,7 @@ It is designed to retain the following properties of data: Most of the properties above are viable for performance testing: -reading data, filtering, aggregatio, and sorting will work at almost the same speed +reading data, filtering, aggregation, and sorting will work at almost the same speed as on original data due to saved cardinalities, magnitudes, compression ratios, etc. It works in a deterministic fashion: you define a seed value and the transformation is determined by input data and by seed. diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 1b20f74d466..5fbbf8f723c 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -356,7 +356,7 @@ Type: `UInt8`. Let’s consider an example of calculating the `retention` function to determine site traffic. -**1.** Сreate a table to illustrate an example. +**1.** Create a table to illustrate an example. ``` sql CREATE TABLE retention_test(date Date, uid Int32) ENGINE = Memory; diff --git a/docs/en/sql-reference/aggregate-functions/reference/boundrat.md b/docs/en/sql-reference/aggregate-functions/reference/boundrat.md new file mode 100644 index 00000000000..f3907af8030 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/boundrat.md @@ -0,0 +1,44 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/boundingRatio +sidebar_position: 2 +title: boundingRatio +--- + +Aggregate function that calculates the slope between the leftmost and rightmost points across a group of values. + +Example: + +Sample data: +```sql +SELECT + number, + number * 1.5 +FROM numbers(10) +``` +```response +┌─number─┬─multiply(number, 1.5)─┐ +│ 0 │ 0 │ +│ 1 │ 1.5 │ +│ 2 │ 3 │ +│ 3 │ 4.5 │ +│ 4 │ 6 │ +│ 5 │ 7.5 │ +│ 6 │ 9 │ +│ 7 │ 10.5 │ +│ 8 │ 12 │ +│ 9 │ 13.5 │ +└────────┴───────────────────────┘ +``` + +The boundingRatio() function returns the slope of the line between the leftmost and rightmost points, in the above data these points are `(0,0)` and `(9,13.5)`. + +```sql +SELECT boundingRatio(number, number * 1.5) +FROM numbers(10) +``` +```response +┌─boundingRatio(number, multiply(number, 1.5))─┐ +│ 1.5 │ +└──────────────────────────────────────────────┘ +``` + diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md index f412724ea08..e9e2c367610 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md @@ -5,7 +5,7 @@ sidebar_position: 351 # cramersV -[Cramér's V](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V) (sometimes referred to as Cramér's phi) is a measure of association between two columns in a table. The result of the `cramersV` function ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. It may be viewed as the association between two variables as a percentage of their maximum possible variation. +[Cramer's V](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V) (sometimes referred to as Cramer's phi) is a measure of association between two columns in a table. The result of the `cramersV` function ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. It may be viewed as the association between two variables as a percentage of their maximum possible variation. **Syntax** @@ -69,4 +69,4 @@ Result: ┌─────cramersV(a, b)─┐ │ 0.8944271909999159 │ └────────────────────┘ -``` \ No newline at end of file +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md index 8e577efbc4d..f5ad3a8a937 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md @@ -6,7 +6,7 @@ sidebar_position: 352 # cramersVBiasCorrected -Cramér's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramér's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction). +Cramer's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramer's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction). diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md index a8203c6b3f4..75041ace7a3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md @@ -6,7 +6,7 @@ sidebar_title: exponentialMovingAverage ## exponentialMovingAverage -Сalculates the exponential moving average of values for the determined time. +Calculates the exponential moving average of values for the determined time. **Syntax** @@ -27,7 +27,7 @@ Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the **Returned values** -- Returnes an [exponentially smoothed moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) of the values for the past `x` time at the latest point of time. +- Returns an [exponentially smoothed moving average](https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average) of the values for the past `x` time at the latest point of time. Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64). diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md index 5f57407a419..3d833555a43 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md @@ -5,7 +5,7 @@ sidebar_position: 125 # groupBitAnd -Applies bitwise `AND` for series of numbers. +Applies bit-wise `AND` for series of numbers. ``` sql groupBitAnd(expr) diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md index 59be69540b0..138ee998405 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md @@ -5,7 +5,7 @@ sidebar_position: 126 # groupBitOr -Applies bitwise `OR` for series of numbers. +Applies bit-wise `OR` for series of numbers. ``` sql groupBitOr(expr) diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md index b00876a2fdf..168335a010c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md @@ -5,7 +5,7 @@ sidebar_position: 127 # groupBitXor -Applies bitwise `XOR` for series of numbers. +Applies bit-wise `XOR` for series of numbers. ``` sql groupBitXor(expr) diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index 50208352f38..17ef494e9ad 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -9,74 +9,75 @@ toc_hidden: true Standard aggregate functions: -- [count](../../../sql-reference/aggregate-functions/reference/count.md) -- [min](../../../sql-reference/aggregate-functions/reference/min.md) -- [max](../../../sql-reference/aggregate-functions/reference/max.md) -- [sum](../../../sql-reference/aggregate-functions/reference/sum.md) -- [avg](../../../sql-reference/aggregate-functions/reference/avg.md) -- [any](../../../sql-reference/aggregate-functions/reference/any.md) -- [stddevPop](../../../sql-reference/aggregate-functions/reference/stddevpop.md) -- [stddevSamp](../../../sql-reference/aggregate-functions/reference/stddevsamp.md) -- [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md) -- [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md) -- [covarPop](../../../sql-reference/aggregate-functions/reference/covarpop.md) -- [covarSamp](../../../sql-reference/aggregate-functions/reference/covarsamp.md) +- [count](/docs/en/sql-reference/aggregate-functions/reference/count.md) +- [min](/docs/en/sql-reference/aggregate-functions/reference/min.md) +- [max](/docs/en/sql-reference/aggregate-functions/reference/max.md) +- [sum](/docs/en/sql-reference/aggregate-functions/reference/sum.md) +- [avg](/docs/en/sql-reference/aggregate-functions/reference/avg.md) +- [any](/docs/en/sql-reference/aggregate-functions/reference/any.md) +- [stddevPop](/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md) +- [stddevSamp](/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md) +- [varPop](/docs/en/sql-reference/aggregate-functions/reference/varpop.md) +- [varSamp](/docs/en/sql-reference/aggregate-functions/reference/varsamp.md) +- [covarPop](/docs/en/sql-reference/aggregate-functions/reference/covarpop.md) +- [covarSamp](/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md) ClickHouse-specific aggregate functions: -- [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md) -- [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md) -- [first_value](../../../sql-reference/aggregate-functions/reference/first_value.md) -- [last_value](../../../sql-reference/aggregate-functions/reference/last_value.md) -- [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md) -- [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md) -- [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md) -- [topK](../../../sql-reference/aggregate-functions/reference/topk.md) -- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md) -- [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md) -- [groupArrayLast](../../../sql-reference/aggregate-functions/reference/grouparraylast.md) -- [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md) -- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md) -- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md) -- [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md) -- [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md) -- [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md) -- [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md) -- [groupBitmap](../../../sql-reference/aggregate-functions/reference/groupbitmap.md) -- [groupBitmapAnd](../../../sql-reference/aggregate-functions/reference/groupbitmapand.md) -- [groupBitmapOr](../../../sql-reference/aggregate-functions/reference/groupbitmapor.md) -- [groupBitmapXor](../../../sql-reference/aggregate-functions/reference/groupbitmapxor.md) -- [sumWithOverflow](../../../sql-reference/aggregate-functions/reference/sumwithoverflow.md) -- [sumMap](../../../sql-reference/aggregate-functions/reference/summap.md) -- [minMap](../../../sql-reference/aggregate-functions/reference/minmap.md) -- [maxMap](../../../sql-reference/aggregate-functions/reference/maxmap.md) -- [skewSamp](../../../sql-reference/aggregate-functions/reference/skewsamp.md) -- [skewPop](../../../sql-reference/aggregate-functions/reference/skewpop.md) -- [kurtSamp](../../../sql-reference/aggregate-functions/reference/kurtsamp.md) -- [kurtPop](../../../sql-reference/aggregate-functions/reference/kurtpop.md) -- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md) -- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md) -- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) -- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md) -- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md) -- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md) -- [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md) -- [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md) -- [quantileExactLow](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactlow) -- [quantileExactHigh](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexacthigh) -- [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md) -- [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md) -- [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md) -- [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md) -- [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) -- [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md) -- [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16) -- [quantileBFloat16Weighted](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted) -- [simpleLinearRegression](../../../sql-reference/aggregate-functions/reference/simplelinearregression.md) -- [stochasticLinearRegression](../../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md) -- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md) -- [categoricalInformationValue](../../../sql-reference/aggregate-functions/reference/categoricalinformationvalue.md) +- [anyHeavy](/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md) +- [anyLast](/docs/en/sql-reference/aggregate-functions/reference/anylast.md) +- [boundingRatio](/docs/en/sql-reference/aggregate-functions/reference/boundrat.md) +- [first_value](/docs/en/sql-reference/aggregate-functions/reference/first_value.md) +- [last_value](/docs/en/sql-reference/aggregate-functions/reference/last_value.md) +- [argMin](/docs/en/sql-reference/aggregate-functions/reference/argmin.md) +- [argMax](/docs/en/sql-reference/aggregate-functions/reference/argmax.md) +- [avgWeighted](/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md) +- [topK](/docs/en/sql-reference/aggregate-functions/reference/topk.md) +- [topKWeighted](/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md) +- [groupArray](/docs/en/sql-reference/aggregate-functions/reference/grouparray.md) +- [groupArrayLast](/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md) +- [groupUniqArray](/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md) +- [groupArrayInsertAt](/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md) +- [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md) +- [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md) +- [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md) +- [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md) +- [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md) +- [groupBitmap](/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md) +- [groupBitmapAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md) +- [groupBitmapOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md) +- [groupBitmapXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md) +- [sumWithOverflow](/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md) +- [sumMap](/docs/en/sql-reference/aggregate-functions/reference/summap.md) +- [minMap](/docs/en/sql-reference/aggregate-functions/reference/minmap.md) +- [maxMap](/docs/en/sql-reference/aggregate-functions/reference/maxmap.md) +- [skewSamp](/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md) +- [skewPop](/docs/en/sql-reference/aggregate-functions/reference/skewpop.md) +- [kurtSamp](/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md) +- [kurtPop](/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md) +- [uniq](/docs/en/sql-reference/aggregate-functions/reference/uniq.md) +- [uniqExact](/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md) +- [uniqCombined](/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md) +- [uniqCombined64](/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md) +- [uniqHLL12](/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md) +- [uniqTheta](/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md) +- [quantile](/docs/en/sql-reference/aggregate-functions/reference/quantile.md) +- [quantiles](/docs/en/sql-reference/aggregate-functions/reference/quantiles.md) +- [quantileExact](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md) +- [quantileExactLow](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactlow) +- [quantileExactHigh](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md#quantileexacthigh) +- [quantileExactWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md) +- [quantileTiming](/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md) +- [quantileTimingWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md) +- [quantileDeterministic](/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md) +- [quantileTDigest](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md) +- [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md) +- [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16) +- [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted) +- [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md) +- [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md) +- [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md) +- [categoricalInformationValue](/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md) - [contingency](./contingency.md) - [cramersV](./cramersv.md) - [cramersVBiasCorrected](./cramersvbiascorrected.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md b/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md index 3da9645181e..d159eec7ce6 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kolmogorovsmirnovtest.md @@ -30,11 +30,11 @@ Samples must belong to continuous, one-dimensional probability distributions. The null hypothesis is that samples come from the same distribution, e.g. F(x) = G(x) for all x. And the alternative is that the distributions are not identical. - `'greater'` - The null hypothesis is that values in the first sample are *stohastically smaller* than those in the second one, + The null hypothesis is that values in the first sample are *stochastically smaller* than those in the second one, e.g. the CDF of first distribution lies above and hence to the left of that for the second one. Which in fact means that F(x) >= G(x) for all x. And the alternative in this case is that F(x) < G(x) for at least one x. - `'less'`. - The null hypothesis is that values in the first sample are *stohastically greater* than those in the second one, + The null hypothesis is that values in the first sample are *stochastically greater* than those in the second one, e.g. the CDF of first distribution lies below and hence to the right of that for the second one. Which in fact means that F(x) <= G(x) for all x. And the alternative in this case is that F(x) > G(x) for at least one x. - `computation_method` — the method used to compute p-value. (Optional, default: `'auto'`.) [String](../../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index 7b9addbbdde..b3e21e0e69e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -14,7 +14,7 @@ The result depends on the order of running the query, and is nondeterministic. When using multiple `quantile*` functions with different levels in a query, the internal states are not combined (that is, the query works less efficiently than it could). In this case, use the [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) function. :::note -Using `quantileTDigestWeighted` [is not recommended for tiny data sets](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) and can lead to significat error. In this case, consider possibility of using [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) instead. +Using `quantileTDigestWeighted` [is not recommended for tiny data sets](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) and can lead to significant error. In this case, consider possibility of using [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) instead. ::: **Syntax** diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md index 9481172c25b..f7615d90790 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md @@ -18,7 +18,7 @@ stochasticLinearRegression(1.0, 1.0, 10, 'SGD') 1. `learning rate` is the coefficient on step length, when gradient descent step is performed. Too big learning rate may cause infinite weights of the model. Default is `0.00001`. 2. `l2 regularization coefficient` which may help to prevent overfitting. Default is `0.1`. 3. `mini-batch size` sets the number of elements, which gradients will be computed and summed to perform one step of gradient descent. Pure stochastic descent uses one element, however having small batches(about 10 elements) make gradient steps more stable. Default is `15`. -4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergance and stability of stochastic gradient methods. +4. `method for updating weights`, they are: `Adam` (by default), `SGD`, `Momentum`, `Nesterov`. `Momentum` and `Nesterov` require little bit more computations and memory, however they happen to be useful in terms of speed of convergence and stability of stochastic gradient methods. ### Usage diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index 059c6acdb9e..0da273e01ad 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -22,7 +22,7 @@ Resolution: 1 second. The point in time is saved as a [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time), regardless of the time zone or daylight saving time. The time zone affects how the values of the `DateTime` type values are displayed in text format and how the values specified as strings are parsed (‘2020-01-01 05:00:01’). -Timezone agnostic unix timestamp is stored in tables, and the timezone is used to transform it to text format or back during data import/export or to make calendar calculations on the values (example: `toDate`, `toHour` functions et cetera). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata. +Timezone agnostic Unix timestamp is stored in tables, and the timezone is used to transform it to text format or back during data import/export or to make calendar calculations on the values (example: `toDate`, `toHour` functions etc.). The time zone is not stored in the rows of the table (or in resultset), but is stored in the column metadata. A list of supported time zones can be found in the [IANA Time Zone Database](https://www.iana.org/time-zones) and also can be queried by `SELECT * FROM system.time_zones`. [The list](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) is also available at Wikipedia. @@ -30,7 +30,7 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isn’t explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter. -ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionaly you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function. +ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function. When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting. @@ -120,9 +120,9 @@ FROM dt As timezone conversion only changes the metadata, the operation has no computation cost. -## Limitations on timezones support +## Limitations on time zones support -Some timezones may not be supported completely. There are a few cases: +Some time zones may not be supported completely. There are a few cases: If the offset from UTC is not a multiple of 15 minutes, the calculation of hours and minutes can be incorrect. For example, the time zone in Monrovia, Liberia has offset UTC -0:44:30 before 7 Jan 1972. If you are doing calculations on the historical time in Monrovia timezone, the time processing functions may give incorrect results. The results after 7 Jan 1972 will be correct nevertheless. diff --git a/docs/en/sql-reference/data-types/index.md b/docs/en/sql-reference/data-types/index.md index 88663968e50..508307a0543 100644 --- a/docs/en/sql-reference/data-types/index.md +++ b/docs/en/sql-reference/data-types/index.md @@ -27,7 +27,7 @@ ClickHouse data types include: - **Aggregation function types**: use [`SimpleAggregateFunction`](./simpleaggregatefunction.md) and [`AggregateFunction`](./aggregatefunction.md) for storing the intermediate status of aggregate function results - **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell - **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type. -- **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type) +- **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column settings its default value for the data type) - **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses - **Geo types**: for [geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon` - **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md) diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index f7b4be64851..43e9300c1ae 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -247,7 +247,7 @@ LAYOUT(FLAT(INITIAL_ARRAY_SIZE 50000 MAX_ARRAY_SIZE 5000000)) ### hashed -The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers In practice, the number of keys can reach tens of millions of items. +The dictionary is completely stored in memory in the form of a hash table. The dictionary can contain any number of elements with any identifiers. In practice, the number of keys can reach tens of millions of items. The dictionary key has the [UInt64](../../sql-reference/data-types/int-uint.md) type. @@ -984,7 +984,7 @@ SOURCE(ODBC(... invalidate_query 'SELECT update_time FROM dictionary_source wher ... ``` -For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronious and asynchronous updates are supported. +For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionaries both synchronous and asynchronous updates are supported. It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source. @@ -1243,8 +1243,8 @@ Setting fields: - `password` – Password required for the authentication. - `headers` – All custom HTTP headers entries used for the HTTP request. Optional parameter. - `header` – Single HTTP header entry. -- `name` – Identifiant name used for the header send on the request. -- `value` – Value set for a specific identifiant name. +- `name` – Identifier name used for the header send on the request. +- `value` – Value set for a specific identifier name. When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server. diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 3e70f94a0d2..bdd1445c990 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -140,7 +140,7 @@ range([start, ] end [, step]) **Implementation details** -- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments's. +- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments. - An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting. **Examples** @@ -1236,7 +1236,7 @@ arrayAUC(arr_scores, arr_labels) **Arguments** - `arr_scores` — scores prediction model gives. -- `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negtive sample. +- `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negative sample. **Returned value** diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 5b342fe4f24..3c07fe8bcbe 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -226,7 +226,7 @@ Result: Returns result of [logical conjuction](https://en.wikipedia.org/wiki/Logical_conjunction) (AND operator) of all bits at given positions. The countdown starts from 0 from the right to the left. -The conjuction for bitwise operations: +The conjuction for bit-wise operations: 0 AND 0 = 0 @@ -291,7 +291,7 @@ Result: Returns result of [logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) (OR operator) of all bits at given positions. The countdown starts from 0 from the right to the left. -The disjunction for bitwise operations: +The disjunction for bit-wise operations: 0 OR 0 = 0 diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md index 660af6912b0..67affb88a53 100644 --- a/docs/en/sql-reference/functions/distance-functions.md +++ b/docs/en/sql-reference/functions/distance-functions.md @@ -487,7 +487,7 @@ cosineDistance(vector1, vector2) **Returned value** -- Cosine of the angle between two vectors substracted from one. +- Cosine of the angle between two vectors subtracted from one. Type: [Float](../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index 1224b7bc92b..00c9ef376d3 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -31,9 +31,9 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad]) **Arguments** - `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). -- `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string). +- `plaintext` — Text that need to be encrypted. [String](../../sql-reference/data-types/string.md#string). - `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, optional for others. [String](../../sql-reference/data-types/string.md#string). - `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). **Returned value** @@ -165,7 +165,7 @@ Received exception from server (version 22.6.1): Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-ofb', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123'). ``` -While `aes_encrypt_mysql` produces MySQL-compatitalbe output: +While `aes_encrypt_mysql` produces MySQL-compatible output: Query: @@ -233,7 +233,7 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad]) - `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). - `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). - `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, Optional for others. [String](../../sql-reference/data-types/string.md#string). - `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). **Returned value** @@ -364,7 +364,7 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) - `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). - `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). - `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Optional. [String](../../sql-reference/data-types/string.md#string). **Returned value** diff --git a/docs/en/sql-reference/functions/files.md b/docs/en/sql-reference/functions/files.md index 5cd2d8e0a74..73d72aa50e5 100644 --- a/docs/en/sql-reference/functions/files.md +++ b/docs/en/sql-reference/functions/files.md @@ -6,7 +6,7 @@ sidebar_label: Files ## file -Reads file as string and loads the data into the specified column. The actual file content is not interpreted. +Reads a file as string and loads the data into the specified column. The file content is not interpreted. Also see table function [file](../table-functions/file.md). @@ -18,15 +18,13 @@ file(path[, default]) **Arguments** -- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports the following wildcards: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. -- `default` — The value that will be returned in the case the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). +- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. +- `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). **Example** Inserting data from files a.txt and b.txt into a table as strings: -Query: - ``` sql INSERT INTO table SELECT file('a.txt'), file('b.txt'); ``` diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 0b7df54b776..6f82fedaab7 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -8,7 +8,7 @@ sidebar_label: Nullable ## isNull -Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal). +Returns whether the argument is [NULL](../../sql-reference/syntax.md#null-literal). ``` sql isNull(x) @@ -18,7 +18,7 @@ Alias: `ISNULL`. **Arguments** -- `x` — A value with a non-compound data type. +- `x` — A value of non-compound data type. **Returned value** @@ -27,7 +27,7 @@ Alias: `ISNULL`. **Example** -Input table +Table: ``` text ┌─x─┬────y─┐ @@ -36,12 +36,14 @@ Input table └───┴──────┘ ``` -Query +Query: ``` sql SELECT x FROM t_null WHERE isNull(y); ``` +Result: + ``` text ┌─x─┐ │ 1 │ @@ -50,7 +52,7 @@ SELECT x FROM t_null WHERE isNull(y); ## isNotNull -Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal). +Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-literal). ``` sql isNotNull(x) @@ -58,16 +60,16 @@ isNotNull(x) **Arguments:** -- `x` — A value with a non-compound data type. +- `x` — A value of non-compound data type. **Returned value** -- `0` if `x` is `NULL`. - `1` if `x` is not `NULL`. +- `0` if `x` is `NULL`. **Example** -Input table +Table: ``` text ┌─x─┬────y─┐ @@ -76,12 +78,14 @@ Input table └───┴──────┘ ``` -Query +Query: ``` sql SELECT x FROM t_null WHERE isNotNull(y); ``` +Result: + ``` text ┌─x─┐ │ 2 │ @@ -90,7 +94,7 @@ SELECT x FROM t_null WHERE isNotNull(y); ## coalesce -Checks from left to right whether `NULL` arguments were passed and returns the first non-`NULL` argument. +Returns the leftmost non-`NULL` argument. ``` sql coalesce(x,...) @@ -98,11 +102,11 @@ coalesce(x,...) **Arguments:** -- Any number of parameters of a non-compound type. All parameters must be compatible by data type. +- Any number of parameters of non-compound type. All parameters must be of mutually compatible data types. **Returned values** -- The first non-`NULL` argument. +- The first non-`NULL` argument - `NULL`, if all arguments are `NULL`. **Example** @@ -110,10 +114,10 @@ coalesce(x,...) Consider a list of contacts that may specify multiple ways to contact a customer. ``` text -┌─name─────┬─mail─┬─phone─────┬──icq─┐ -│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │ -│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ -└──────────┴──────┴───────────┴──────┘ +┌─name─────┬─mail─┬─phone─────┬──telegram─┐ +│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │ +│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ +└──────────┴──────┴───────────┴───────────┘ ``` The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32`, so it needs to be converted to `String`. @@ -121,22 +125,22 @@ The `mail` and `phone` fields are of type String, but the `icq` field is `UInt32 Get the first available contact method for the customer from the contact list: ``` sql -SELECT name, coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook; +SELECT name, coalesce(mail, phone, CAST(telegram,'Nullable(String)')) FROM aBook; ``` ``` text -┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐ -│ client 1 │ 123-45-67 │ -│ client 2 │ ᴺᵁᴸᴸ │ -└──────────┴──────────────────────────────────────────────────────┘ +┌─name─────┬─coalesce(mail, phone, CAST(telegram, 'Nullable(String)'))─┐ +│ client 1 │ 123-45-67 │ +│ client 2 │ ᴺᵁᴸᴸ │ +└──────────┴───────────────────────────────────────────────────────────┘ ``` ## ifNull -Returns an alternative value if the main argument is `NULL`. +Returns an alternative value if the argument is `NULL`. ``` sql -ifNull(x,alt) +ifNull(x, alt) ``` **Arguments:** @@ -146,25 +150,33 @@ ifNull(x,alt) **Returned values** -- The value `x`, if `x` is not `NULL`. -- The value `alt`, if `x` is `NULL`. +- `x` if `x` is not `NULL`. +- `alt` if `x` is `NULL`. **Example** +Query: + ``` sql SELECT ifNull('a', 'b'); ``` +Result: + ``` text ┌─ifNull('a', 'b')─┐ │ a │ └──────────────────┘ ``` +Query: + ``` sql SELECT ifNull(NULL, 'b'); ``` +Result: + ``` text ┌─ifNull(NULL, 'b')─┐ │ b │ @@ -173,7 +185,7 @@ SELECT ifNull(NULL, 'b'); ## nullIf -Returns `NULL` if the arguments are equal. +Returns `NULL` if both arguments are equal. ``` sql nullIf(x, y) @@ -181,29 +193,37 @@ nullIf(x, y) **Arguments:** -`x`, `y` — Values for comparison. They must be compatible types, or ClickHouse will generate an exception. +`x`, `y` — Values to compare. Must be of compatible types. **Returned values** -- `NULL`, if the arguments are equal. -- The `x` value, if the arguments are not equal. +- `NULL` if the arguments are equal. +- `x` if the arguments are not equal. **Example** +Query: + ``` sql SELECT nullIf(1, 1); ``` +Result: + ``` text ┌─nullIf(1, 1)─┐ │ ᴺᵁᴸᴸ │ └──────────────┘ ``` +Query: + ``` sql SELECT nullIf(1, 2); ``` +Result: + ``` text ┌─nullIf(1, 2)─┐ │ 1 │ @@ -212,7 +232,7 @@ SELECT nullIf(1, 2); ## assumeNotNull -Results in an equivalent non-`Nullable` value for a [Nullable](../../sql-reference/data-types/nullable.md) type. In case the original value is `NULL` the result is undetermined. See also `ifNull` and `coalesce` functions. +Returns the corresponding non-`Nullable` value for a value of [Nullable](../../sql-reference/data-types/nullable.md) type. If the original value is `NULL`, an arbitrary result can be returned. See also functions `ifNull` and `coalesce`. ``` sql assumeNotNull(x) @@ -224,36 +244,29 @@ assumeNotNull(x) **Returned values** -- The original value from the non-`Nullable` type, if it is not `NULL`. -- Implementation specific result if the original value was `NULL`. +- The input value as non-`Nullable` type, if it is not `NULL`. +- An arbitrary value, if the input value is `NULL`. **Example** -Consider the `t_null` table. - -``` sql -SHOW CREATE TABLE t_null; -``` +Table: ``` text -┌─statement─────────────────────────────────────────────────────────────────┐ -│ CREATE TABLE default.t_null ( x Int8, y Nullable(Int8)) ENGINE = TinyLog │ -└───────────────────────────────────────────────────────────────────────────┘ -``` -``` text ┌─x─┬────y─┐ │ 1 │ ᴺᵁᴸᴸ │ │ 2 │ 3 │ └───┴──────┘ ``` -Apply the `assumeNotNull` function to the `y` column. +Query: ``` sql -SELECT assumeNotNull(y) FROM t_null; +SELECT assumeNotNull(y) FROM table; ``` +Result: + ``` text ┌─assumeNotNull(y)─┐ │ 0 │ @@ -261,10 +274,14 @@ SELECT assumeNotNull(y) FROM t_null; └──────────────────┘ ``` +Query: + ``` sql SELECT toTypeName(assumeNotNull(y)) FROM t_null; ``` +Result: + ``` text ┌─toTypeName(assumeNotNull(y))─┐ │ Int8 │ @@ -282,28 +299,36 @@ toNullable(x) **Arguments:** -- `x` — The value of any non-compound type. +- `x` — A value of non-compound type. **Returned value** -- The input value with a `Nullable` type. +- The input value but of `Nullable` type. **Example** +Query: + ``` sql SELECT toTypeName(10); ``` +Result: + ``` text ┌─toTypeName(10)─┐ │ UInt8 │ └────────────────┘ ``` +Query: + ``` sql SELECT toTypeName(toNullable(10)); ``` +Result: + ``` text ┌─toTypeName(toNullable(10))─┐ │ Nullable(UInt8) │ diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 1f695a13598..29486c58e6a 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -12,7 +12,7 @@ A latitude and longitude pair can be transformed to a 64-bit H3 index, identifyi The H3 index is used primarily for bucketing locations and other geospatial manipulations. -The full description of the H3 system is available at [the Uber Engeneering site](https://eng.uber.com/h3/). +The full description of the H3 system is available at [the Uber Engineering site](https://eng.uber.com/h3/). ## h3IsValid diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md index 63fe5ca8530..f4702eff44b 100644 --- a/docs/en/sql-reference/functions/geo/s2.md +++ b/docs/en/sql-reference/functions/geo/s2.md @@ -249,7 +249,7 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point) **Returned values** - `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md). +- `s2PointHigh` — Height S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md). **Example** diff --git a/docs/en/sql-reference/functions/geo/svg.md b/docs/en/sql-reference/functions/geo/svg.md new file mode 100644 index 00000000000..9081ac71338 --- /dev/null +++ b/docs/en/sql-reference/functions/geo/svg.md @@ -0,0 +1,52 @@ +--- +slug: /en/sql-reference/functions/geo/svg +sidebar_label: SVG +title: "Functions for Generating SVG images from Geo data" +--- + +## Syntax + +``` sql +SVG(geometry,[style]) +``` + +### Parameters + +- `geometry` — Geo data +- `style` — Optional style name + +### Returned value + +- The SVG representation of the geometry: + - SVG circle + - SVG polygon + - SVG path + +Type: String + +## Examples + +### Circle +```sql +SELECT SVG((0., 0.)) +``` +```response + +``` + +### Polygon +```sql +SELECT SVG([(0., 0.), (10, 0), (10, 10), (0, 10)]) +``` +```response + +``` + +### Path +```sql +SELECT SVG([[(0., 0.), (10, 0), (10, 10), (0, 10)], [(4., 4.), (5, 4), (5, 5), (4, 5)]]) +``` +```response + +``` + diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 663b8468a94..68a45810404 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -697,7 +697,7 @@ SELECT murmurHash2_64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23: ## gccMurmurHash -Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191). It is portable between CLang and GCC builds. +Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [gcc](https://github.com/gcc-mirror/gcc/blob/41d6b10e96a1de98e90a7c0378437c3255814b16/libstdc%2B%2B-v3/include/bits/functional_hash.h#L191). It is portable between Clang and GCC builds. **Syntax** @@ -1161,7 +1161,7 @@ wordShingleSimHashUTF8(string[, shinglesize]) **Arguments** - `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optinal. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index f5a1a6aac12..138b804a575 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -12,7 +12,9 @@ Zero as an argument is considered `false`, non-zero values are considered `true` ## and -Calculates the logical conjunction between two or more values. +Calculates the logical conjunction of two or more values. + +Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `(val_1 AND val_2 AND ... AND val_{i-1})` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT and(number = 2, intDiv(1, number)) FROM numbers(5)`. **Syntax** @@ -20,9 +22,7 @@ Calculates the logical conjunction between two or more values. and(val1, val2...) ``` -Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `(val_1 AND val_2 AND ... AND val_{i-1})` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT and(number = 2, intDiv(1, number)) FROM numbers(5)`. - -Alias: The [AND Operator](../../sql-reference/operators/index.md#logical-and-operator). +Alias: The [AND operator](../../sql-reference/operators/index.md#logical-and-operator). **Arguments** @@ -30,8 +30,8 @@ Alias: The [AND Operator](../../sql-reference/operators/index.md#logical-and-ope **Returned value** -- `0`, if there at least one argument evaluates to `false`, -- `NULL`, if no argumetn evaluates to `false` and at least one argument is `NULL`, +- `0`, if at least one argument evaluates to `false`, +- `NULL`, if no argument evaluates to `false` and at least one argument is `NULL`, - `1`, otherwise. Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). @@ -66,7 +66,9 @@ Result: ## or -Calculates the logical disjunction between two or more values. +Calculates the logical disjunction of two or more values. + +Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `((NOT val_1) AND (NOT val_2) AND ... AND (NOT val_{i-1}))` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT or(number = 0, intDiv(1, number) != 0) FROM numbers(5)`. **Syntax** @@ -74,9 +76,7 @@ Calculates the logical disjunction between two or more values. or(val1, val2...) ``` -Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `((NOT val_1) AND (NOT val_2) AND ... AND (NOT val_{i-1}))` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT or(number = 0, intDiv(1, number) != 0) FROM numbers(5)`. - -Alias: The [OR Operator](../../sql-reference/operators/index.md#logical-or-operator). +Alias: The [OR operator](../../sql-reference/operators/index.md#logical-or-operator). **Arguments** @@ -120,7 +120,7 @@ Result: ## not -Calculates logical negation of a value. +Calculates the logical negation of a value. **Syntax** @@ -128,7 +128,7 @@ Calculates logical negation of a value. not(val); ``` -Alias: The [Negation Operator](../../sql-reference/operators/index.md#logical-negation-operator). +Alias: The [Negation operator](../../sql-reference/operators/index.md#logical-negation-operator). **Arguments** @@ -158,7 +158,7 @@ Result: ## xor -Calculates the logical exclusive disjunction between two or more values. For more than two values the function first xor-s the first two values, then xor-s the result with the third value etc. +Calculates the logical exclusive disjunction of two or more values. For more than two input values, the function first xor-s the first two values, then xor-s the result with the third value etc. **Syntax** diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 9851378d4fd..9eab2274210 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -52,7 +52,7 @@ Alias: `ln(x)` ## exp2 -Returns 2 to the power of the given argumetn +Returns 2 to the power of the given argument **Syntax** @@ -82,7 +82,7 @@ log2(x) ## exp10 -Returns 10 to the power of the given argumetn +Returns 10 to the power of the given argument. **Syntax** diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index efe1a77c285..26dcccfd42e 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -8,12 +8,12 @@ sidebar_label: Other ## hostName() -Returns a string with the name of the host that this function was performed on. For distributed processing, this is the name of the remote server host, if the function is performed on a remote server. -If it is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. +Returns the name of the host on which this function was executed. If the function executes on a remote server (distributed processing), the remote server name is returned. +If the function executes in the context of a distributed table, it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. ## getMacro -Gets a named value from the [macros](../../operations/server-configuration-parameters/settings.md#macros) section of the server configuration. +Returns a named value from the [macros](../../operations/server-configuration-parameters/settings.md#macros) section of the server configuration. **Syntax** @@ -23,7 +23,7 @@ getMacro(name); **Arguments** -- `name` — Name to retrieve from the `macros` section. [String](../../sql-reference/data-types/string.md#string). +- `name` — Macro name to retrieve from the `` section. [String](../../sql-reference/data-types/string.md#string). **Returned value** @@ -33,7 +33,7 @@ Type: [String](../../sql-reference/data-types/string.md). **Example** -The example `macros` section in the server configuration file: +Example `` section in the server configuration file: ``` xml @@ -55,7 +55,7 @@ Result: └──────────────────┘ ``` -An alternative way to get the same value: +The same value can be retrieved as follows: ``` sql SELECT * FROM system.macros @@ -70,7 +70,7 @@ WHERE macro = 'test'; ## FQDN -Returns the fully qualified domain name. +Returns the fully qualified domain name of the ClickHouse server. **Syntax** @@ -88,8 +88,6 @@ Type: `String`. **Example** -Query: - ``` sql SELECT FQDN(); ``` @@ -104,52 +102,61 @@ Result: ## basename -Extracts the trailing part of a string after the last slash or backslash. This function if often used to extract the filename from a path. +Extracts the tail of a string following its last slash or backslash. This function if often used to extract the filename from a path. ``` sql -basename( expr ) +basename(expr) ``` **Arguments** -- `expr` — Expression resulting in a [String](../../sql-reference/data-types/string.md) type value. All the backslashes must be escaped in the resulting value. +- `expr` — A value of type [String](../../sql-reference/data-types/string.md). Backslashes must be escaped. **Returned Value** A string that contains: -- The trailing part of a string after the last slash or backslash. - - If the input string contains a path ending with slash or backslash, for example, `/` or `c:\`, the function returns an empty string. - +- The tail of the input string after its last slash or backslash. If the input string ends with a slash or backslash (e.g. `/` or `c:\`), the function returns an empty string. - The original string if there are no slashes or backslashes. **Example** +Query: + ``` sql SELECT 'some/long/path/to/file' AS a, basename(a) ``` +Result: + ``` text ┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐ │ some\long\path\to\file │ file │ └────────────────────────┴────────────────────────────────────────┘ ``` +Query: + ``` sql SELECT 'some\\long\\path\\to\\file' AS a, basename(a) ``` +Result: + ``` text ┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐ │ some\long\path\to\file │ file │ └────────────────────────┴────────────────────────────────────────┘ ``` +Query: + ``` sql SELECT 'some-file-name' AS a, basename(a) ``` +Result: + ``` text ┌─a──────────────┬─basename('some-file-name')─┐ │ some-file-name │ some-file-name │ @@ -159,7 +166,7 @@ SELECT 'some-file-name' AS a, basename(a) ## visibleWidth(x) Calculates the approximate width when outputting values to the console in text format (tab-separated). -This function is used by the system for implementing Pretty formats. +This function is used by the system to implement Pretty formats. `NULL` is represented as a string corresponding to `NULL` in `Pretty` formats. @@ -175,18 +182,18 @@ SELECT visibleWidth(NULL) ## toTypeName(x) -Returns a string containing the type name of the passed argument. +Returns the type name of the passed argument. -If `NULL` is passed to the function as input, then it returns the `Nullable(Nothing)` type, which corresponds to an internal `NULL` representation in ClickHouse. +If `NULL` is passed, then the function returns type `Nullable(Nothing)`, which corresponds to ClickHouse's internal `NULL` representation. ## blockSize() -Gets the size of the block. -In ClickHouse, queries are always run on blocks (sets of column parts). This function allows getting the size of the block that you called it for. +In ClickHouse, queries are processed in blocks (chunks). +This function returns the size (row count) of the block the function is called on. ## byteSize -Returns estimation of uncompressed byte size of its arguments in memory. +Returns an estimation of uncompressed byte size of its arguments in memory. **Syntax** @@ -206,7 +213,7 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). **Examples** -For [String](../../sql-reference/data-types/string.md) arguments the funtion returns the string length + 9 (terminating zero + length). +For [String](../../sql-reference/data-types/string.md) arguments, the function returns the string length + 9 (terminating zero + length). Query: @@ -265,7 +272,7 @@ byteSize(Float32): 4 byteSize(Float64): 8 ``` -If the function takes multiple arguments, it returns their combined byte size. +If the function has multiple arguments, the function accumulates their byte sizes. Query: @@ -283,30 +290,30 @@ Result: ## materialize(x) -Turns a constant into a full column containing just one value. -In ClickHouse, full columns and constants are represented differently in memory. Functions work differently for constant arguments and normal arguments (different code is executed), although the result is almost always the same. This function is for debugging this behavior. +Turns a constant into a full column containing a single value. +Full columns and constants are represented differently in memory. Functions usually execute different code for normal and constant arguments, although the result should typically be the same. This function can be used to debug this behavior. ## ignore(…) -Accepts any arguments, including `NULL`. Always returns 0. -However, the argument is still evaluated. This can be used for benchmarks. +Accepts any arguments, including `NULL` and does nothing. Always returns 0. +The argument is internally still evaluated. Useful e.g. for benchmarks. ## sleep(seconds) -Sleeps ‘seconds’ seconds on each data block. You can specify an integer or a floating-point number. +Sleeps ‘seconds’ seconds for each data block. The sleep time can be specified as integer or as floating-point number. ## sleepEachRow(seconds) -Sleeps ‘seconds’ seconds on each row. You can specify an integer or a floating-point number. +Sleeps ‘seconds’ seconds for each row. The sleep time can be specified as integer or as floating-point number. ## currentDatabase() Returns the name of the current database. -You can use this function in table engine parameters in a CREATE TABLE query where you need to specify the database. +Useful in table engine parameters of `CREATE TABLE` queries where you need to specify the database. ## currentUser() -Returns the login of current user. Login of user, that initiated query, will be returned in case distributed query. +Returns the name of the current user. In case of a distributed query, the name of the user who initiated the query is returned. ``` sql SELECT currentUser(); @@ -316,15 +323,13 @@ Alias: `user()`, `USER()`. **Returned values** -- Login of current user. -- Login of user that initiated query in case of distributed query. +- The name of the current user. +- In distributed queries, the login of the user who initiated the query. Type: `String`. **Example** -Query: - ``` sql SELECT currentUser(); ``` @@ -339,11 +344,11 @@ Result: ## isConstant -Checks whether the argument is a constant expression. +Returns whether the argument is a constant expression. -A constant expression means an expression whose resulting value is known at the query analysis (i.e. before execution). For example, expressions over [literals](../../sql-reference/syntax.md#literals) are constant expressions. +A constant expression is an expression whose result is known during query analysis, i.e. before execution. For example, expressions over [literals](../../sql-reference/syntax.md#literals) are constant expressions. -The function is intended for development, debugging and demonstration. +This function is mostly intended for development, debugging and demonstration. **Syntax** @@ -357,8 +362,8 @@ isConstant(x) **Returned values** -- `1` — `x` is constant. -- `0` — `x` is non-constant. +- `1` if `x` is constant. +- `0` if `x` is non-constant. Type: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -408,23 +413,25 @@ Result: ## isFinite(x) -Accepts Float32 and Float64 and returns UInt8 equal to 1 if the argument is not infinite and not a NaN, otherwise 0. +Returns 1 if the Float32 or Float64 argument not infinite and not a NaN, otherwise this function returns 0. ## isInfinite(x) -Accepts Float32 and Float64 and returns UInt8 equal to 1 if the argument is infinite, otherwise 0. Note that 0 is returned for a NaN. +Returns 1 if the Float32 or Float64 argument is infinite, otherwise this function returns 0. Note that 0 is returned for a NaN. ## ifNotFinite -Checks whether floating point value is finite. +Checks whether a floating point value is finite. **Syntax** - ifNotFinite(x,y) +``` sql +ifNotFinite(x,y) +``` **Arguments** -- `x` — Value to be checked for infinity. Type: [Float\*](../../sql-reference/data-types/float.md). +- `x` — Value to check for infinity. Type: [Float\*](../../sql-reference/data-types/float.md). - `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md). **Returned value** @@ -444,23 +451,23 @@ Result: │ inf │ 42 │ └─────────┴───────────────────────────────┘ -You can get similar result by using [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`. +You can get similar result by using the [ternary operator](../../sql-reference/functions/conditional-functions.md#ternary-operator): `isFinite(x) ? x : y`. ## isNaN(x) -Accepts Float32 and Float64 and returns UInt8 equal to 1 if the argument is a NaN, otherwise 0. +Returns 1 if the Float32 and Float64 argument is NaN, otherwise this function 0. ## hasColumnInTable(\[‘hostname’\[, ‘username’\[, ‘password’\]\],\] ‘database’, ‘table’, ‘column’) -Accepts constant strings: database name, table name, and column name. Returns a UInt8 constant expression equal to 1 if there is a column, otherwise 0. If the hostname parameter is set, the test will run on a remote server. -The function throws an exception if the table does not exist. +Given the database name, the table name, and the column name as constant strings, returns 1 if the given column exists, otherwise 0. If parameter `hostname` is given, the check is performed on a remote server. +If the table does not exist, an exception is thrown. For elements in a nested data structure, the function checks for the existence of a column. For the nested data structure itself, the function returns 0. ## bar -Allows building a unicode-art diagram. +Builds a bar chart. -`bar(x, min, max, width)` draws a band with a width proportional to `(x - min)` and equal to `width` characters when `x = max`. +`bar(x, min, max, width)` draws a band with width proportional to `(x - min)` and equal to `width` characters when `x = max`. **Arguments** @@ -520,23 +527,23 @@ There are two variations of this function: `x` – What to transform. -`array_from` – Constant array of values for converting. +`array_from` – Constant array of values to convert. `array_to` – Constant array of values to convert the values in ‘from’ to. `default` – Which value to use if ‘x’ is not equal to any of the values in ‘from’. -`array_from` and `array_to` – Arrays of the same size. +`array_from` and `array_to` must have equally many elements. -Types: +Signature: + +For `x` equal to one of the elements in `array_from`, the function returns the corresponding element in `array_to`, i.e. the one at the same array index. Otherwise, it returns `default`. If multiple matching elements exist `array_from`, an arbitrary corresponding element from `array_to` is returned. `transform(T, Array(T), Array(U), U) -> U` `T` and `U` can be numeric, string, or Date or DateTime types. -Where the same letter is indicated (T or U), for numeric types these might not be matching types, but types that have a common type. -For example, the first argument can have the Int64 type, while the second has the Array(UInt16) type. - -If the ‘x’ value is equal to one of the elements in the ‘array_from’ array, it returns the existing element (that is numbered the same) from the ‘array_to’ array. Otherwise, it returns ‘default’. If there are multiple matching elements in ‘array_from’, it returns one of the matches. +The same letter (T or U) means that types must be mutually compatible and not necessarily equal. +For example, the first argument could have type `Int64`, while the second argument could have type `Array(UInt16)`. Example: @@ -560,12 +567,7 @@ ORDER BY c DESC ### transform(x, array_from, array_to) -Differs from the first variation in that the ‘default’ argument is omitted. -If the ‘x’ value is equal to one of the elements in the ‘array_from’ array, it returns the matching element (that is numbered the same) from the ‘array_to’ array. Otherwise, it returns ‘x’. - -Types: - -`transform(T, Array(T), Array(T)) -> T` +Similar to the other variation but has no ‘default’ argument. In case no match can be found, `x` is returned. Example: @@ -595,7 +597,7 @@ LIMIT 10 ## formatReadableDecimalSize(x) -Accepts the size (number of bytes). Returns a rounded size with a suffix (KB, MB, etc.) as a string. +Given a size (number of bytes), this function returns a readable, rounded size with suffix (KB, MB, etc.) as string. Example: @@ -616,7 +618,7 @@ SELECT ## formatReadableSize(x) -Accepts the size (number of bytes). Returns a rounded size with a suffix (KiB, MiB, etc.) as a string. +Given a size (number of bytes), this function returns a readable, rounded size with suffix (KiB, MiB, etc.) as string. Example: @@ -637,9 +639,7 @@ SELECT ## formatReadableQuantity(x) -Accepts the number. Returns a rounded number with a suffix (thousand, million, billion, etc.) as a string. - -It is useful for reading big numbers by human. +Given a number, this function returns a rounded number with suffix (thousand, million, billion, etc.) as string. Example: @@ -660,7 +660,7 @@ SELECT ## formatReadableTimeDelta -Accepts the time delta in seconds. Returns a time delta with (year, month, day, hour, minute, second) as a string. +Given a time interval (delta) in seconds, this function returns a time delta with year/month/day/hour/minute/second as string. **Syntax** @@ -670,8 +670,8 @@ formatReadableTimeDelta(column[, maximum_unit]) **Arguments** -- `column` — A column with numeric time delta. -- `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years. +- `column` — A column with a numeric time delta. +- `maximum_unit` — Optional. Maximum unit to show. Acceptable values `seconds`, `minutes`, `hours`, `days`, `months`, `years`. Example: @@ -746,33 +746,32 @@ SELECT parseTimeDelta('1yr2mo') ## least(a, b) -Returns the smallest value from a and b. +Returns the smaller value of a and b. ## greatest(a, b) -Returns the largest value of a and b. +Returns the larger value of a and b. ## uptime() Returns the server’s uptime in seconds. -If it is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. +If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value. ## version() -Returns the version of the server as a string. -If it is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. +Returns the server version as a string. +If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value. ## buildId() Returns the build ID generated by a compiler for the running ClickHouse server binary. -If it is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. +If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value. - -## blockNumber +## blockNumber() Returns the sequence number of the data block where the row is located. -## rowNumberInBlock +## rowNumberInBlock() Returns the ordinal number of the row in the data block. Different data blocks are always recalculated. @@ -782,7 +781,7 @@ Returns the ordinal number of the row in the data block. This function only cons ## neighbor -The window function that provides access to a row at a specified offset which comes before or after the current row of a given column. +The window function that provides access to a row at a specified offset before or after the current row of a given column. **Syntax** @@ -792,23 +791,23 @@ neighbor(column, offset[, default_value]) The result of the function depends on the affected data blocks and the order of data in the block. -:::tip -It can reach the neighbor rows only inside the currently processed data block. +:::note +Only returns neighbor inside the currently processed data block. ::: -The rows order used during the calculation of `neighbor` can differ from the order of rows returned to the user. -To prevent that you can make a subquery with [ORDER BY](../../sql-reference/statements/select/order-by.md) and call the function from outside the subquery. +The order of rows during calculation of `neighbor()` can differ from the order of rows returned to the user. +To prevent that you can create a subquery with [ORDER BY](../../sql-reference/statements/select/order-by.md) and call the function from outside the subquery. **Arguments** - `column` — A column name or scalar expression. -- `offset` — The number of rows forwards or backwards from the current row of `column`. [Int64](../../sql-reference/data-types/int-uint.md). -- `default_value` — Optional. The value to be returned if offset goes beyond the scope of the block. Type of data blocks affected. +- `offset` — The number of rows to look before or ahead of the current row in `column`. [Int64](../../sql-reference/data-types/int-uint.md). +- `default_value` — Optional. The returned value if offset is beyond the block boundaries. Type of data blocks affected. **Returned values** -- Value for `column` in `offset` distance from current row if `offset` value is not outside block bounds. -- Default value for `column` if `offset` value is outside block bounds. If `default_value` is given, then it will be used. +- Value of `column` with `offset` distance from current row, if `offset` is not outside the block boundaries. +- The default value of `column` or `default_value` (if given), if `offset` is outside the block boundaries. Type: type of data blocks affected or default value type. @@ -899,17 +898,17 @@ Result: ## runningDifference(x) -Calculates the difference between successive row values ​​in the data block. -Returns 0 for the first row and the difference from the previous row for each subsequent row. +Calculates the difference between two consecutive row values in the data block. +Returns 0 for the first row, and for subsequent rows the difference to the previous row. -:::tip -It can reach the previous row only inside the currently processed data block. +:::note +Only returns differences inside the currently processed data block. ::: The result of the function depends on the affected data blocks and the order of data in the block. -The rows order used during the calculation of `runningDifference` can differ from the order of rows returned to the user. -To prevent that you can make a subquery with [ORDER BY](../../sql-reference/statements/select/order-by.md) and call the function from outside the subquery. +The order of rows during calculation of `runningDifference()` can differ from the order of rows returned to the user. +To prevent that you can create a subquery with [ORDER BY](../../sql-reference/statements/select/order-by.md) and call the function from outside the subquery. Example: @@ -940,7 +939,7 @@ FROM └─────────┴─────────────────────┴───────┘ ``` -Please note - block size affects the result. With each new block, the `runningDifference` state is reset. +Please note that the block size affects the result. The internal state of `runningDifference` state is reset for each new block. ``` sql SELECT @@ -977,7 +976,7 @@ WHERE diff != 1 ## runningDifferenceStartingWithFirstValue -Same as for [runningDifference](./other-functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row. +Same as [runningDifference](./other-functions.md#other_functions-runningdifference), but returns the value of the first row as the value on the first row. ## runningConcurrency @@ -1039,7 +1038,7 @@ Result: ## MACNumToString(num) -Accepts a UInt64 number. Interprets it as a MAC address in big endian. Returns a string containing the corresponding MAC address in the format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form). +Interprets a UInt64 number as a MAC address in big endian format. Returns the corresponding MAC address in format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form) as string. ## MACStringToNum(s) @@ -1047,11 +1046,12 @@ The inverse function of MACNumToString. If the MAC address has an invalid format ## MACStringToOUI(s) -Accepts a MAC address in the format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form). Returns the first three octets as a UInt64 number. If the MAC address has an invalid format, it returns 0. +Given a MAC address in format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form), returns the first three octets as a UInt64 number. If the MAC address has an invalid format, it returns 0. ## getSizeOfEnumType Returns the number of fields in [Enum](../../sql-reference/data-types/enum.md). +An exception is thrown if the type is not `Enum`. ``` sql getSizeOfEnumType(value) @@ -1064,7 +1064,6 @@ getSizeOfEnumType(value) **Returned values** - The number of fields with `Enum` input values. -- An exception is thrown if the type is not `Enum`. **Example** @@ -1080,7 +1079,7 @@ SELECT getSizeOfEnumType( CAST('a' AS Enum8('a' = 1, 'b' = 2) ) ) AS x ## blockSerializedSize -Returns size on disk (without taking into account compression). +Returns the size on disk without considering compression. ``` sql blockSerializedSize(value[, value[, ...]]) @@ -1092,7 +1091,7 @@ blockSerializedSize(value[, value[, ...]]) **Returned values** -- The number of bytes that will be written to disk for block of values (without compression). +- The number of bytes that will be written to disk for block of values without compression. **Example** @@ -1112,7 +1111,7 @@ Result: ## toColumnTypeName -Returns the name of the class that represents the data type of the column in RAM. +Returns the internal name of the data type that represents the value. ``` sql toColumnTypeName(value) @@ -1124,31 +1123,39 @@ toColumnTypeName(value) **Returned values** -- A string with the name of the class that is used for representing the `value` data type in RAM. +- The internal data type name used to represent `value`. -**Example of the difference between`toTypeName ' and ' toColumnTypeName`** +**Example** + +Difference between `toTypeName ' and ' toColumnTypeName`: ``` sql SELECT toTypeName(CAST('2018-01-01 01:02:03' AS DateTime)) ``` +Result: + ``` text ┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ │ DateTime │ └─────────────────────────────────────────────────────┘ ``` +Query: + ``` sql SELECT toColumnTypeName(CAST('2018-01-01 01:02:03' AS DateTime)) ``` +Result: + ``` text ┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ │ Const(UInt32) │ └───────────────────────────────────────────────────────────┘ ``` -The example shows that the `DateTime` data type is stored in memory as `Const(UInt32)`. +The example shows that the `DateTime` data type is internally stored as `Const(UInt32)`. ## dumpColumnStructure @@ -1164,7 +1171,7 @@ dumpColumnStructure(value) **Returned values** -- A string describing the structure that is used for representing the `value` data type in RAM. +- A description of the column structure used for representing `value`. **Example** @@ -1180,7 +1187,7 @@ SELECT dumpColumnStructure(CAST('2018-01-01 01:02:03', 'DateTime')) ## defaultValueOfArgumentType -Outputs the default value for the data type. +Returns the default value for the given data type. Does not include default values for custom columns set by the user. @@ -1200,20 +1207,28 @@ defaultValueOfArgumentType(expression) **Example** +Query: + ``` sql SELECT defaultValueOfArgumentType( CAST(1 AS Int8) ) ``` +Result: + ``` text ┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐ │ 0 │ └─────────────────────────────────────────────┘ ``` +Query: + ``` sql SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) ) ``` +Result: + ``` text ┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐ │ ᴺᵁᴸᴸ │ @@ -1222,7 +1237,7 @@ SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) ) ## defaultValueOfTypeName -Outputs the default value for given type name. +Returns the default value for the given type name. Does not include default values for custom columns set by the user. @@ -1242,20 +1257,28 @@ defaultValueOfTypeName(type) **Example** +Query: + ``` sql SELECT defaultValueOfTypeName('Int8') ``` +Result: + ``` text ┌─defaultValueOfTypeName('Int8')─┐ │ 0 │ └────────────────────────────────┘ ``` +Query: + ``` sql SELECT defaultValueOfTypeName('Nullable(Int8)') ``` +Result: + ``` text ┌─defaultValueOfTypeName('Nullable(Int8)')─┐ │ ᴺᵁᴸᴸ │ @@ -1263,9 +1286,10 @@ SELECT defaultValueOfTypeName('Nullable(Int8)') ``` ## indexHint -The function is intended for debugging and introspection purposes. The function ignores it's argument and always returns 1. Arguments are not even evaluated. -But for the purpose of index analysis, the argument of this function is analyzed as if it was present directly without being wrapped inside `indexHint` function. This allows to select data in index ranges by the corresponding condition but without further filtering by this condition. The index in ClickHouse is sparse and using `indexHint` will yield more data than specifying the same condition directly. +This function is intended for debugging and introspection. It ignores its argument and always returns 1. The arguments are not evaluated. + +But during index analysis, the argument of this function is assumed to be not wrapped in `indexHint`. This allows to select data in index ranges by the corresponding condition but without further filtering by this condition. The index in ClickHouse is sparse and using `indexHint` will yield more data than specifying the same condition directly. **Syntax** @@ -1275,13 +1299,13 @@ SELECT * FROM table WHERE indexHint() **Returned value** -1. Type: [Uint8](https://clickhouse.com/docs/en/data_types/int_uint/#diapazony-uint). +Type: [Uint8](https://clickhouse.com/docs/en/data_types/int_uint/#diapazony-uint). **Example** Here is the example of test data from the table [ontime](../../getting-started/example-datasets/ontime.md). -Input table: +Table: ```sql SELECT count() FROM ontime @@ -1295,9 +1319,7 @@ SELECT count() FROM ontime The table has indexes on the fields `(FlightDate, (Year, FlightDate))`. -Create a query, where the index is not used. - -Query: +Create a query which does not use the index: ```sql SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k @@ -1318,15 +1340,13 @@ Result: └────────────┴─────────┘ ``` -To apply the index, select a specific date. - -Query: +To apply the index, select a specific date: ```sql SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k ``` -By using the index, ClickHouse processed a significantly smaller number of rows (`Processed 32.74 thousand rows`). +ClickHouse now uses the index to process a significantly smaller number of rows (`Processed 32.74 thousand rows`). Result: @@ -1336,7 +1356,7 @@ Result: └────────────┴─────────┘ ``` -Now wrap the expression `k = '2017-09-15'` into `indexHint` function. +Now wrap the expression `k = '2017-09-15'` in function `indexHint`: Query: @@ -1350,9 +1370,9 @@ GROUP BY k ORDER BY k ASC ``` -ClickHouse used the index in the same way as the previous time (`Processed 32.74 thousand rows`). +ClickHouse used the index the same way as previously (`Processed 32.74 thousand rows`). The expression `k = '2017-09-15'` was not used when generating the result. -In examle the `indexHint` function allows to see adjacent dates. +In example, the `indexHint` function allows to see adjacent dates. Result: @@ -1369,7 +1389,7 @@ Result: Creates an array with a single value. -Used for internal implementation of [arrayJoin](../../sql-reference/functions/array-join.md#functions_arrayjoin). +Used for the internal implementation of [arrayJoin](../../sql-reference/functions/array-join.md#functions_arrayjoin). ``` sql SELECT replicate(x, arr); @@ -1377,12 +1397,12 @@ SELECT replicate(x, arr); **Arguments:** -- `arr` — Original array. ClickHouse creates a new array of the same length as the original and fills it with the value `x`. -- `x` — The value that the resulting array will be filled with. +- `arr` — An array. +- `x` — The value to fill the result array with. **Returned value** -An array filled with the value `x`. +An array of the lame length as `arr` filled with value `x`. Type: `Array`. @@ -1404,7 +1424,7 @@ Result: ## filesystemAvailable -Returns amount of remaining space on the filesystem where the files of the databases located. It is always smaller than total free space ([filesystemFree](#filesystemfree)) because some space is reserved for OS. +Returns the amount of free space in the filesystem hosting the database persistence. The returned value is always smaller than total free space ([filesystemFree](#filesystemfree)) because some space is reserved for the operating system. **Syntax** @@ -1423,20 +1443,20 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: ``` sql -SELECT formatReadableSize(filesystemAvailable()) AS "Available space", toTypeName(filesystemAvailable()) AS "Type"; +SELECT formatReadableSize(filesystemAvailable()) AS "Available space"; ``` Result: ``` text -┌─Available space─┬─Type───┐ -│ 30.75 GiB │ UInt64 │ -└─────────────────┴────────┘ +┌─Available space─┐ +│ 30.75 GiB │ +└─────────────────┘ ``` ## filesystemFree -Returns total amount of the free space on the filesystem where the files of the databases located. See also `filesystemAvailable` +Returns the total amount of the free space on the filesystem hosting the database persistence. See also `filesystemAvailable` **Syntax** @@ -1446,7 +1466,7 @@ filesystemFree() **Returned value** -- Amount of free space in bytes. +- The amount of free space in bytes. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -1455,20 +1475,20 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: ``` sql -SELECT formatReadableSize(filesystemFree()) AS "Free space", toTypeName(filesystemFree()) AS "Type"; +SELECT formatReadableSize(filesystemFree()) AS "Free space"; ``` Result: ``` text -┌─Free space─┬─Type───┐ -│ 32.39 GiB │ UInt64 │ -└────────────┴────────┘ +┌─Free space─┐ +│ 32.39 GiB │ +└────────────┘ ``` ## filesystemCapacity -Returns the capacity of the filesystem in bytes. For evaluation, the [path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-path) to the data directory must be configured. +Returns the capacity of the filesystem in bytes. Needs the [path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-path) to the data directory to be configured. **Syntax** @@ -1478,7 +1498,7 @@ filesystemCapacity() **Returned value** -- Capacity information of the filesystem in bytes. +- Capacity of the filesystem in bytes. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -1487,20 +1507,20 @@ Type: [UInt64](../../sql-reference/data-types/int-uint.md). Query: ``` sql -SELECT formatReadableSize(filesystemCapacity()) AS "Capacity", toTypeName(filesystemCapacity()) AS "Type" +SELECT formatReadableSize(filesystemCapacity()) AS "Capacity"; ``` Result: ``` text -┌─Capacity──┬─Type───┐ -│ 39.32 GiB │ UInt64 │ -└───────────┴────────┘ +┌─Capacity──┐ +│ 39.32 GiB │ +└───────────┘ ``` ## initializeAggregation -Calculates result of aggregate function based on single value. It is intended to use this function to initialize aggregate functions with combinator [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state). You can create states of aggregate functions and insert them to columns of type [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction) or use initialized aggregates as default values. +Calculates the result of an aggregate function based on a single value. This function can be used to initialize aggregate functions with combinator [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state). You can create states of aggregate functions and insert them to columns of type [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction) or use initialized aggregates as default values. **Syntax** @@ -1539,6 +1559,7 @@ Query: ```sql SELECT finalizeAggregation(state), toTypeName(state) FROM (SELECT initializeAggregation('sumState', number % 3) AS state FROM numbers(5)); ``` + Result: ```text @@ -1568,11 +1589,12 @@ INSERT INTO metrics VALUES (0, initializeAggregation('sumState', toUInt64(42))) ``` **See Also** + - [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce) ## finalizeAggregation -Takes state of aggregate function. Returns result of aggregation (or finalized state when using[-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) combinator). +Given a state of aggregate function, this function returns the result of aggregation (or finalized state when using a [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) combinator). **Syntax** @@ -1667,15 +1689,16 @@ Result: ``` **See Also** + - [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce) - [initializeAggregation](#initializeaggregation) ## runningAccumulate -Accumulates states of an aggregate function for each row of a data block. +Accumulates the states of an aggregate function for each row of a data block. -:::tip -The state is reset for each new data block. +:::note +The state is reset for each new block of data. ::: **Syntax** @@ -1726,10 +1749,10 @@ The subquery generates `sumState` for every number from `0` to `9`. `sumState` r The whole query does the following: -1. For the first row, `runningAccumulate` takes `sumState(0)` and returns `0`. -2. For the second row, the function merges `sumState(0)` and `sumState(1)` resulting in `sumState(0 + 1)`, and returns `1` as a result. -3. For the third row, the function merges `sumState(0 + 1)` and `sumState(2)` resulting in `sumState(0 + 1 + 2)`, and returns `3` as a result. -4. The actions are repeated until the block ends. +1. For the first row, `runningAccumulate` takes `sumState(0)` and returns `0`. +2. For the second row, the function merges `sumState(0)` and `sumState(1)` resulting in `sumState(0 + 1)`, and returns `1` as a result. +3. For the third row, the function merges `sumState(0 + 1)` and `sumState(2)` resulting in `sumState(0 + 1 + 2)`, and returns `3` as a result. +4. The actions are repeated until the block ends. The following example shows the `groupping` parameter usage: @@ -1780,7 +1803,7 @@ As you can see, `runningAccumulate` merges states for each group of rows separat The function lets you extract data from the table the same way as from a [dictionary](../../sql-reference/dictionaries/index.md). -Gets data from [Join](../../engines/table-engines/special/join.md#creating-a-table) tables using the specified join key. +Gets the data from [Join](../../engines/table-engines/special/join.md#creating-a-table) tables using the specified join key. Only supports tables created with the `ENGINE = Join(ANY, LEFT, )` statement. @@ -1792,13 +1815,13 @@ joinGet(join_storage_table_name, `value_column`, join_keys) **Arguments** -- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicates where search is performed. The identifier is searched in the default database (see parameter `default_database` in the config file). To override the default database, use the `USE db_name` or specify the database and the table through the separator `db_name.db_table`, see the example. +- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicating where the search is performed. The identifier is searched in the default database (see setting `default_database` in the config file). To override the default database, use `USE db_name` or specify the database and the table through the separator `db_name.db_table` as in the example. - `value_column` — name of the column of the table that contains required data. - `join_keys` — list of keys. **Returned value** -Returns list of values corresponded to list of keys. +Returns a list of values corresponded to list of keys. If certain does not exist in source table then `0` or `null` will be returned based on [join_use_nulls](../../operations/settings/settings.md#join_use_nulls) setting. @@ -1825,7 +1848,7 @@ INSERT INTO db_test.id_val VALUES (1,11)(2,12)(4,13) Query: ``` sql -SELECT joinGet(db_test.id_val,'val',toUInt32(number)) from numbers(4) SETTINGS join_use_nulls = 1 +SELECT joinGet(db_test.id_val, 'val', toUInt32(number)) from numbers(4) SETTINGS join_use_nulls = 1 ``` Result: @@ -1845,7 +1868,7 @@ Result: This function is not available in ClickHouse Cloud. ::: -Evaluate external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learing. +Evaluate an external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learning. Accepts a path to a catboost model and model arguments (features). Returns Float64. ``` sql @@ -1886,16 +1909,24 @@ See [Training and applying models](https://catboost.ai/docs/features/training.ht ## throwIf(x\[, message\[, error_code\]\]) -Throw an exception if the argument is non zero. -`message` - is an optional parameter: a constant string providing a custom error message -`error_code` - is an optional parameter: a constant integer providing a custom error code +Throw an exception if argument `x` is true. + +**Arguments** + +- `x` - the condition to check. +- `message` - a constant string providing a custom error message. Optional. +- `error_code` - A constant integer providing a custom error code. Optional. To use the `error_code` argument, configuration parameter `allow_custom_error_code_in_throwif` must be enabled. +**Example** + ``` sql SELECT throwIf(number = 3, 'Too many') FROM numbers(10); ``` +Result: + ``` text ↙ Progress: 0.00 rows, 0.00 B (0.00 rows/s., 0.00 B/s.) Received exception from server (version 19.14.1): Code: 395. DB::Exception: Received from localhost:9000. DB::Exception: Too many. @@ -1903,7 +1934,7 @@ Code: 395. DB::Exception: Received from localhost:9000. DB::Exception: Too many. ## identity -Returns the same value that was used as its argument. Used for debugging and testing, allows to cancel using index, and get the query performance of a full scan. When query is analyzed for possible use of index, the analyzer does not look inside `identity` functions. Also constant folding is not applied too. +Returns its argument. Intended for debugging and testing. Allows to cancel using index, and get the query performance of a full scan. When the query is analyzed for possible use of an index, the analyzer ignores everything in `identity` functions. Also disables constant folding. **Syntax** @@ -1916,7 +1947,7 @@ identity(x) Query: ``` sql -SELECT identity(42) +SELECT identity(42); ``` Result: @@ -1927,164 +1958,6 @@ Result: └──────────────┘ ``` -## randomPrintableASCII - -Generates a string with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. - -**Syntax** - -``` sql -randomPrintableASCII(length) -``` - -**Arguments** - -- `length` — Resulting string length. Positive integer. - - If you pass `length < 0`, behavior of the function is undefined. - -**Returned value** - -- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. - -Type: [String](../../sql-reference/data-types/string.md) - -**Example** - -``` sql -SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers LIMIT 3 -``` - -``` text -┌─number─┬─str────────────────────────────┬─length(randomPrintableASCII(30))─┐ -│ 0 │ SuiCOSTvC0csfABSw=UcSzp2.`rv8x │ 30 │ -│ 1 │ 1Ag NlJ &RCN:*>HVPG;PE-nO"SUFD │ 30 │ -│ 2 │ /"+<"wUTh:=LjJ Vm!c&hI*m#XTfzz │ 30 │ -└────────┴────────────────────────────────┴──────────────────────────────────┘ -``` - -## randomString - -Generates a binary string of the specified length filled with random bytes (including zero bytes). - -**Syntax** - -``` sql -randomString(length) -``` - -**Arguments** - -- `length` — String length. Positive integer. - -**Returned value** - -- String filled with random bytes. - -Type: [String](../../sql-reference/data-types/string.md). - -**Example** - -Query: - -``` sql -SELECT randomString(30) AS str, length(str) AS len FROM numbers(2) FORMAT Vertical; -``` - -Result: - -``` text -Row 1: -────── -str: 3 G : pT ?w тi k aV f6 -len: 30 - -Row 2: -────── -str: 9 ,] ^ ) ]?? 8 -len: 30 -``` - -**See Also** - -- [generateRandom](../../sql-reference/table-functions/generate.md#generaterandom) -- [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii) - - -## randomFixedString - -Generates a binary string of the specified length filled with random bytes (including zero bytes). - -**Syntax** - -``` sql -randomFixedString(length); -``` - -**Arguments** - -- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). - -**Returned value(s)** - -- String filled with random bytes. - -Type: [FixedString](../../sql-reference/data-types/fixedstring.md). - -**Example** - -Query: - -```sql -SELECT randomFixedString(13) as rnd, toTypeName(rnd) -``` - -Result: - -```text -┌─rnd──────┬─toTypeName(randomFixedString(13))─┐ -│ j▒h㋖HɨZ'▒ │ FixedString(13) │ -└──────────┴───────────────────────────────────┘ - -``` - -## randomStringUTF8 - -Generates a random string of a specified length. Result string contains valid UTF-8 code points. The value of code points may be outside of the range of assigned Unicode. - -**Syntax** - -``` sql -randomStringUTF8(length); -``` - -**Arguments** - -- `length` — Required length of the resulting string in code points. [UInt64](../../sql-reference/data-types/int-uint.md). - -**Returned value(s)** - -- UTF-8 random string. - -Type: [String](../../sql-reference/data-types/string.md). - -**Example** - -Query: - -```sql -SELECT randomStringUTF8(13) -``` - -Result: - -```text -┌─randomStringUTF8(13)─┐ -│ 𘤗𙉝д兠庇󡅴󱱎󦐪􂕌𔊹𓰛 │ -└──────────────────────┘ - -``` - ## getSetting Returns the current value of a [custom setting](../../operations/settings/index.md#custom_settings). @@ -2101,7 +1974,7 @@ getSetting('custom_setting'); **Returned value** -- The setting current value. +- The setting's current value. **Example** @@ -2110,7 +1983,7 @@ SET custom_a = 123; SELECT getSetting('custom_a'); ``` -**Result** +Result: ``` 123 @@ -2122,7 +1995,7 @@ SELECT getSetting('custom_a'); ## isDecimalOverflow -Checks whether the [Decimal](../../sql-reference/data-types/decimal.md) value is out of its (or specified) precision. +Checks whether the [Decimal](../../sql-reference/data-types/decimal.md) value is outside its precision or outside the specified precision. **Syntax** @@ -2133,11 +2006,11 @@ isDecimalOverflow(d, [p]) **Arguments** - `d` — value. [Decimal](../../sql-reference/data-types/decimal.md). -- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). +- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. This parameter can be helpful to migrate data from/to another database or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). **Returned values** -- `1` — Decimal value has more digits then it's precision allow, +- `1` — Decimal value has more digits then allowed by its precision, - `0` — Decimal value satisfies the specified precision. **Example** @@ -2159,7 +2032,7 @@ Result: ## countDigits -Returns number of decimal digits you need to represent the value. +Returns number of decimal digits need to represent a value. **Syntax** @@ -2199,9 +2072,7 @@ Result: ## errorCodeToName -**Returned value** - -- Variable name for the error code. +Returns the textual name of an error code. Type: [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md). @@ -2220,7 +2091,7 @@ UNSUPPORTED_METHOD ## tcpPort Returns [native interface](../../interfaces/tcp.md) TCP port number listened by this server. -If it is executed in the context of a distributed table, then it generates a normal column, otherwise it produces a constant value. +If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value. **Syntax** @@ -2310,7 +2181,7 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere ## currentRoles -Returns the names of the roles which are current for the current user. The current roles can be changed by the [SET ROLE](../../sql-reference/statements/set-role.md#set-role-statement) statement. If the `SET ROLE` statement was not used, the function `currentRoles` returns the same as `defaultRoles`. +Returns the roles assigned to the current user. The roles can be changed by the [SET ROLE](../../sql-reference/statements/set-role.md#set-role-statement) statement. If no `SET ROLE` statement was not, the function `currentRoles` returns the same as `defaultRoles`. **Syntax** @@ -2320,7 +2191,7 @@ currentRoles() **Returned value** -- List of the current roles for the current user. +- A list of the current roles for the current user. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). @@ -2342,7 +2213,7 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere ## defaultRoles -Returns the names of the roles which are enabled by default for the current user when he logins. Initially these are all roles granted to the current user (see [GRANT](../../sql-reference/statements/grant.md#grant-select)), but that can be changed with the [SET DEFAULT ROLE](../../sql-reference/statements/set-role.md#set-default-role-statement) statement. +Returns the roles which are enabled by default for the current user when he logs in. Initially these are all roles granted to the current user (see [GRANT](../../sql-reference/statements/grant.md#grant-select)), but that can be changed with the [SET DEFAULT ROLE](../../sql-reference/statements/set-role.md#set-default-role-statement) statement. **Syntax** @@ -2358,7 +2229,7 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere ## getServerPort -Returns the number of the server port. When the port is not used by the server, throws an exception. +Returns the server port number. When the port is not used by the server, throws an exception. **Syntax** @@ -2407,7 +2278,7 @@ Result: Returns the ID of the current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `query_id`. -In contrast to [initialQueryID](#initial-query-id) function, `queryID` can return different results on different shards (see example). +In contrast to [initialQueryID](#initial-query-id) function, `queryID` can return different results on different shards (see the example). **Syntax** @@ -2477,7 +2348,7 @@ Result: ## shardNum -Returns the index of a shard which processes a part of data for a distributed query. Indices are started from `1`. +Returns the index of a shard which processes a part of data in a distributed query. Indices are started from `1`. If a query is not distributed then constant value `0` is returned. **Syntax** @@ -2609,3 +2480,75 @@ Result: │ 286 │ └──────────────────────────┘ ``` + +## generateRandomStructure + +Generates random table structure in a format `column1_name column1_type, column2_name column2_type, ...`. + +**Syntax** + +``` sql +generateRandomStructure([number_of_columns, seed]) +``` + +**Arguments** + +- `number_of_columns` — The desired number of columns in the result table structure. If set to 0 or `Null`, the number of columns will be random from 1 to 128. Default value: `Null`. +- `seed` - Random seed to produce stable results. If seed is not specified or set to `Null`, it is randomly generated. + +All arguments must be constant. + +**Returned value** + +- Randomly generated table structure. + +Type: [String](../../sql-reference/data-types/string.md). + +**Examples** + +Query: + +``` sql +SELECT generateRandomStructure() +``` + +Result: + +``` text +┌─generateRandomStructure()─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ c1 Decimal32(5), c2 Date, c3 Tuple(LowCardinality(String), Int128, UInt64, UInt16, UInt8, IPv6), c4 Array(UInt128), c5 UInt32, c6 IPv4, c7 Decimal256(64), c8 Decimal128(3), c9 UInt256, c10 UInt64, c11 DateTime │ +└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT generateRandomStructure(1) +``` + +Result: + +``` text +┌─generateRandomStructure(1)─┐ +│ c1 Map(UInt256, UInt16) │ +└────────────────────────────┘ +``` + +Query: + +``` sql +SELECT generateRandomStructure(NULL, 33) +``` + +Result: + +``` text +┌─generateRandomStructure(NULL, 33)─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ c1 DateTime, c2 Enum8('c2V0' = 0, 'c2V1' = 1, 'c2V2' = 2, 'c2V3' = 3), c3 LowCardinality(Nullable(FixedString(30))), c4 Int16, c5 Enum8('c5V0' = 0, 'c5V1' = 1, 'c5V2' = 2, 'c5V3' = 3), c6 Nullable(UInt8), c7 String, c8 Nested(e1 IPv4, e2 UInt8, e3 UInt16, e4 UInt16, e5 Int32, e6 Map(Date, Decimal256(70))) │ +└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**Note**: the maximum nesting depth of complex types (Array, Tuple, Map, Nested) is limited to 16. + +This function can be used together with [generateRandom](../../sql-reference/table-functions/generate.md) to generate completely random tables. + diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index e90d537fb74..e593d9458f0 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -31,7 +31,7 @@ Uses a linear congruential generator. ## randCanonical -Returns a Float64 value, evenly distributed in [0, 1). +Returns a random Float64 value, evenly distributed in interval [0, 1). ## randConstant @@ -54,11 +54,9 @@ Result: └────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘ ``` -# Functions for Generating Random Numbers based on a Distribution - ## randUniform -Returns a Float64 drawn uniformly from the interval between `min` and `max` ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)). +Returns a random Float64 drawn uniformly from interval [`min`, `max`) ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)). **Syntax** @@ -68,8 +66,8 @@ randUniform(min, max) **Arguments** -- `min` - `Float64` - min value of the range, -- `max` - `Float64` - max value of the range. +- `min` - `Float64` - left boundary of the range, +- `max` - `Float64` - right boundary of the range. **Returned value** @@ -97,7 +95,7 @@ Result: ## randNormal -Returns a Float64 drawn from a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution). +Returns a random Float64 drawn from a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution). **Syntax** @@ -108,7 +106,7 @@ randNormal(mean, variance) **Arguments** - `mean` - `Float64` - mean value of distribution, -- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance). +- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance) of the distribution. **Returned value** @@ -136,7 +134,7 @@ Result: ## randLogNormal -Returns a Float64 drawn from a [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution). +Returns a random Float64 drawn from a [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution). **Syntax** @@ -147,7 +145,7 @@ randLogNormal(mean, variance) **Arguments** - `mean` - `Float64` - mean value of distribution, -- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance). +- `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance) of the distribution. **Returned value** @@ -175,7 +173,7 @@ Result: ## randBinomial -Returns a UInt64 drawn from a [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution). +Returns a random UInt64 drawn from a [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution). **Syntax** @@ -186,7 +184,7 @@ randBinomial(experiments, probability) **Arguments** - `experiments` - `UInt64` - number of experiments, -- `probability` - `Float64` - probability of success in each experiment (values in `0...1` range only). +- `probability` - `Float64` - probability of success in each experiment, a value between 0 and 1. **Returned value** @@ -214,7 +212,7 @@ Result: ## randNegativeBinomial -Returns a UInt64 drawn from a [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution). +Returns a random UInt64 drawn from a [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution). **Syntax** @@ -225,7 +223,7 @@ randNegativeBinomial(experiments, probability) **Arguments** - `experiments` - `UInt64` - number of experiments, -- `probability` - `Float64` - probability of failure in each experiment (values in `0...1` range only). +- `probability` - `Float64` - probability of failure in each experiment, a value between 0 and 1. **Returned value** @@ -253,7 +251,7 @@ Result: ## randPoisson -Returns a UInt64 drawn from a [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution). +Returns a random UInt64 drawn from a [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution). **Syntax** @@ -291,7 +289,7 @@ Result: ## randBernoulli -Returns a UInt64 drawn from a [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution). +Returns a random UInt64 drawn from a [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution). **Syntax** @@ -301,7 +299,7 @@ randBernoulli(probability) **Arguments** -- `probability` - `Float64` - probability of success (values in `0...1` range only). +- `probability` - `Float64` - probability of success, a value between 0 and 1. **Returned value** @@ -329,7 +327,7 @@ Result: ## randExponential -Returns a Float64 drawn from a [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution). +Returns a random Float64 drawn from a [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution). **Syntax** @@ -367,7 +365,7 @@ Result: ## randChiSquared -Returns a Float64 drawn from a [Chi-square distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution) - a distribution of a sum of the squares of k independent standard normal random variables. +Returns a random Float64 drawn from a [Chi-square distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution) - a distribution of a sum of the squares of k independent standard normal random variables. **Syntax** @@ -405,7 +403,7 @@ Result: ## randStudentT -Returns a Float64 drawn from a [Student's t-distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution). +Returns a random Float64 drawn from a [Student's t-distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution). **Syntax** @@ -443,7 +441,7 @@ Result: ## randFisherF -Returns a Float64 drawn from a [F-distribution](https://en.wikipedia.org/wiki/F-distribution). +Returns a random Float64 drawn from a [F-distribution](https://en.wikipedia.org/wiki/F-distribution). **Syntax** @@ -480,47 +478,160 @@ Result: └─────────────────────┘ ``` -# Functions for Generating Random Strings - ## randomString -Returns a random String of specified `length`. Not all characters may be printable. +Generates a string of the specified length filled with random bytes (including zero bytes). Not all characters may be printable. **Syntax** -```sql +``` sql randomString(length) ``` +**Arguments** + +- `length` — String length in bytes. Positive integer. + +**Returned value** + +- String filled with random bytes. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +``` sql +SELECT randomString(30) AS str, length(str) AS len FROM numbers(2) FORMAT Vertical; +``` + +Result: + +``` text +Row 1: +────── +str: 3 G : pT ?w тi k aV f6 +len: 30 + +Row 2: +────── +str: 9 ,] ^ ) ]?? 8 +len: 30 +``` + ## randomFixedString -Like `randomString` but returns a FixedString. - -## randomPrintableASCII - -Returns a random String of specified `length`. All characters are printable. +Generates a binary string of the specified length filled with random bytes (including zero bytes). Not all characters may be printable. **Syntax** +``` sql +randomFixedString(length); +``` + +**Arguments** + +- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). + +**Returned value(s)** + +- String filled with random bytes. + +Type: [FixedString](../../sql-reference/data-types/fixedstring.md). + +**Example** + +Query: + ```sql +SELECT randomFixedString(13) as rnd, toTypeName(rnd) +``` + +Result: + +```text +┌─rnd──────┬─toTypeName(randomFixedString(13))─┐ +│ j▒h㋖HɨZ'▒ │ FixedString(13) │ +└──────────┴───────────────────────────────────┘ +``` + +## randomPrintableASCII + +Generates a string with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) characters. All characters are printable. +If you pass `length < 0`, the behavior of the function is undefined. + +**Syntax** + +``` sql randomPrintableASCII(length) ``` +**Arguments** + +- `length` — String length in bytes. Positive integer. + +**Returned value** + +- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. + +Type: [String](../../sql-reference/data-types/string.md) + +**Example** + +``` sql +SELECT number, randomPrintableASCII(30) as str, length(str) FROM system.numbers LIMIT 3 +``` + +``` text +┌─number─┬─str────────────────────────────┬─length(randomPrintableASCII(30))─┐ +│ 0 │ SuiCOSTvC0csfABSw=UcSzp2.`rv8x │ 30 │ +│ 1 │ 1Ag NlJ &RCN:*>HVPG;PE-nO"SUFD │ 30 │ +│ 2 │ /"+<"wUTh:=LjJ Vm!c&hI*m#XTfzz │ 30 │ +└────────┴────────────────────────────────┴──────────────────────────────────┘ +``` + ## randomStringUTF8 -Returns a random String containing `length` many UTF8 codepoints. Not all characters may be printable +Generates a random string of a specified length. Result string contains valid UTF-8 code points. The value of code points may be outside of the range of assigned Unicode. **Syntax** +``` sql +randomStringUTF8(length); +``` + +**Arguments** + +- `length` — Length of the string in code points. [UInt64](../../sql-reference/data-types/int-uint.md). + +**Returned value(s)** + +- UTF-8 random string. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + ```sql -randomStringUTF8(length) +SELECT randomStringUTF8(13) +``` + +Result: + +```text +┌─randomStringUTF8(13)─┐ +│ 𘤗𙉝д兠庇󡅴󱱎󦐪􂕌𔊹𓰛 │ +└──────────────────────┘ ``` ## fuzzBits **Syntax** -Inverts the bits of String or FixedString `s`, each with probability `prob`. +Flips the bits of String or FixedString `s`, each with probability `prob`. **Syntax** @@ -529,8 +640,8 @@ fuzzBits(s, prob) ``` **Arguments** -- `s` - `String` or `FixedString` -- `prob` - constant `Float32/64` +- `s` - `String` or `FixedString`, +- `prob` - constant `Float32/64` between 0.0 and 1.0. **Returned value** diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 8662d08431c..5175bbf0615 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -393,7 +393,7 @@ Reverses a sequence of Unicode code points in a string. Assumes that the string ## format -Format the `pattern` string with the strings listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitely given monotonically increasing numbers). +Format the `pattern` string with the strings listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitly given monotonically increasing numbers). **Syntax** diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 56c527d734e..74d5d747193 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -6,7 +6,7 @@ sidebar_label: Replacing in Strings # Functions for Replacing in Strings -[General strings functions](string-functions.md) and [functions for searchin in strings](string-search-functions.md) are described separately. +[General strings functions](string-functions.md) and [functions for searching in strings](string-search-functions.md) are described separately. ## replaceOne diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index c7c66cc771f..f6c99b168ac 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -793,7 +793,7 @@ toDecimalString(number, scale) **Returned value** - Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale). - The number is rounded up or down according to common arithmetics in case requested scale is smaller than original number's scale. + The number is rounded up or down according to common arithmetic in case requested scale is smaller than original number's scale. **Example** diff --git a/docs/en/sql-reference/functions/udf.md b/docs/en/sql-reference/functions/udf.md index a58c1364780..9c6b1b0c66b 100644 --- a/docs/en/sql-reference/functions/udf.md +++ b/docs/en/sql-reference/functions/udf.md @@ -19,7 +19,7 @@ A function configuration contains the following settings: - `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number. - `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command. - `return_type` - the type of a returned value. -- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`. +- `return_name` - name of returned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`. - `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created. - `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`. - `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`. diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md index 8a8c86624d2..bfad16f8365 100644 --- a/docs/en/sql-reference/operators/in.md +++ b/docs/en/sql-reference/operators/in.md @@ -222,7 +222,7 @@ It also makes sense to specify a local table in the `GLOBAL IN` clause, in case ### Distributed Subqueries and max_rows_in_set -You can use [`max_rows_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) and [`max_bytes_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) to control how much data is tranferred during distributed queries. +You can use [`max_rows_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) and [`max_bytes_in_set`](../../operations/settings/query-complexity.md#max-rows-in-set) to control how much data is transferred during distributed queries. This is specially important if the `global in` query returns a large amount of data. Consider the following sql - ```sql diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index c8ed2627e2b..120e464e009 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -24,7 +24,7 @@ For tuple negation: [tupleNegate](../../sql-reference/functions/tuple-functions. `a * b` – The `multiply (a, b)` function. -For multiplying tuple by number: [tupleMultiplyByNumber](../../sql-reference/functions/tuple-functions.md#tuplemultiplybynumber), for scalar profuct: [dotProduct](../../sql-reference/functions/tuple-functions.md#dotproduct). +For multiplying tuple by number: [tupleMultiplyByNumber](../../sql-reference/functions/tuple-functions.md#tuplemultiplybynumber), for scalar product: [dotProduct](../../sql-reference/functions/tuple-functions.md#dotproduct). `a / b` – The `divide(a, b)` function. diff --git a/docs/en/sql-reference/statements/alter/quota.md b/docs/en/sql-reference/statements/alter/quota.md index 74a184c1479..d41e2ff0f61 100644 --- a/docs/en/sql-reference/statements/alter/quota.md +++ b/docs/en/sql-reference/statements/alter/quota.md @@ -32,7 +32,7 @@ Limit the maximum number of queries for the current user with 123 queries in 15 ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER; ``` -For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters: +For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quarters: ``` sql ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default; diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md index 7c31f93fff7..c69285171ab 100644 --- a/docs/en/sql-reference/statements/create/quota.md +++ b/docs/en/sql-reference/statements/create/quota.md @@ -32,7 +32,7 @@ Limit the maximum number of queries for the current user with 123 queries in 15 CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER; ``` -For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters: +For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quarters: ``` sql CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default; diff --git a/docs/en/sql-reference/statements/explain.md b/docs/en/sql-reference/statements/explain.md index befe85ab4a4..9afc7099108 100644 --- a/docs/en/sql-reference/statements/explain.md +++ b/docs/en/sql-reference/statements/explain.md @@ -127,7 +127,7 @@ CROSS JOIN system.numbers AS c Settings: -- `run_passes` — Run all query tree passes before dumping the query tree. Defaul: `1`. +- `run_passes` — Run all query tree passes before dumping the query tree. Default: `1`. - `dump_passes` — Dump information about used passes before dumping the query tree. Default: `0`. - `passes` — Specifies how many passes to run. If set to `-1`, runs all the passes. Default: `-1`. @@ -475,5 +475,5 @@ Result: ``` :::note -The validation is not complete, so a successfull query does not guarantee that the override would not cause issues. +The validation is not complete, so a successful query does not guarantee that the override would not cause issues. ::: diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index 4ca8e8287c0..a4f449ad321 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -34,7 +34,7 @@ Queries that use `FINAL` are executed slightly slower than similar queries that - Data is merged during query execution. - Queries with `FINAL` read primary key columns in addition to the columns specified in the query. -**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). +**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine haven’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). `FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile. diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index 1da6c1d8468..53bdc9041a1 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -289,7 +289,7 @@ When `FROM const_expr` not defined sequence of filling use minimal `expr` field When `TO const_expr` not defined sequence of filling use maximum `expr` field value from `ORDER BY`. When `STEP const_numeric_expr` defined then `const_numeric_expr` interprets `as is` for numeric types, as `days` for Date type, as `seconds` for DateTime type. It also supports [INTERVAL](https://clickhouse.com/docs/en/sql-reference/data-types/special-data-types/interval/) data type representing time and date intervals. When `STEP const_numeric_expr` omitted then sequence of filling use `1.0` for numeric type, `1 day` for Date type and `1 second` for DateTime type. -`INTERPOLATE` can be applied to columns not participating in `ORDER BY WITH FILL`. Such columns are filled based on previous fields values by applying `expr`. If `expr` is not present will repeate previous value. Omitted list will result in including all allowed columns. +`INTERPOLATE` can be applied to columns not participating in `ORDER BY WITH FILL`. Such columns are filled based on previous fields values by applying `expr`. If `expr` is not present will repeat previous value. Omitted list will result in including all allowed columns. Example of a query without `WITH FILL`: diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md index bfc114daa72..724f6d4a1f2 100644 --- a/docs/en/sql-reference/table-functions/generate.md +++ b/docs/en/sql-reference/table-functions/generate.md @@ -11,7 +11,7 @@ Allows to populate test tables with data. Not all types are supported. ``` sql -generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]]) +generateRandom(['name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]]]) ``` **Arguments** @@ -53,5 +53,49 @@ SELECT * FROM random; └──────────────────────────────┴──────────────┴────────────────────────────────────────────────────────────────────┘ ``` +In combination with [generateRandomStructure](../../sql-reference/functions/other-functions.md#generateRandomStructure): + +```sql +SELECT * FROM generateRandom(generateRandomStructure(4, 101), 101) LIMIT 3; +``` + +```text +┌──────────────────c1─┬──────────────────c2─┬─c3─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─c4──────────────────────────────────────┐ +│ 1996-04-15 06:40:05 │ 33954608387.2844801 │ ['232.78.216.176','9.244.59.211','211.21.80.152','44.49.94.109','165.77.195.182','68.167.134.239','212.13.24.185','1.197.255.35','192.55.131.232'] │ 45d9:2b52:ab6:1c59:185b:515:c5b6:b781 │ +│ 2063-01-13 01:22:27 │ 36155064970.9514454 │ ['176.140.188.101'] │ c65a:2626:41df:8dee:ec99:f68d:c6dd:6b30 │ +│ 2090-02-28 14:50:56 │ 3864327452.3901373 │ ['155.114.30.32'] │ 57e9:5229:93ab:fbf3:aae7:e0e4:d1eb:86b │ +└─────────────────────┴─────────────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────────────────────────────┘ +``` + +With missing `structure` argument (in this case the structure is random): + +```sql +SELECT * FROM generateRandom() LIMIT 3; +``` + +```text +┌───c1─┬─────────c2─┬─────────────────────c3─┬──────────────────────c4─┬─c5───────┐ +│ -128 │ 317300854 │ 2030-08-16 08:22:20.65 │ 1994-08-16 12:08:56.745 │ R0qgiC46 │ +│ 40 │ -744906827 │ 2059-04-16 06:31:36.98 │ 1975-07-16 16:28:43.893 │ PuH4M*MZ │ +│ -55 │ 698652232 │ 2052-08-04 20:13:39.68 │ 1998-09-20 03:48:29.279 │ │ +└──────┴────────────┴────────────────────────┴─────────────────────────┴──────────┘ +``` + +With random seed both for random structure and random data: + +```sql +SELECT * FROM generateRandom(11) LIMIT 3; +``` + +```text +┌───────────────────────────────────────c1─┬─────────────────────────────────────────────────────────────────────────────c2─┬─────────────────────────────────────────────────────────────────────────────c3─┬─────────c4─┬─────────────────────────────────────────────────────────────────────────────c5─┬──────────────────────c6─┬─c7──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─c8──────────────────────────────────────┬─────────c9─┐ +│ -77422512305044606600216318673365695785 │ 636812099959807642229.503817849012019401335326013846687285151335352272727523 │ -34944452809785978175157829109276115789694605299387223845886143311647505037529 │ 544473976 │ 111220388331710079615337037674887514156741572807049614590010583571763691328563 │ 22016.22623506465 │ {'2052-01-31 20:25:33':4306400876908509081044405485378623663,'1993-04-16 15:58:49':164367354809499452887861212674772770279,'2101-08-19 03:07:18':-60676948945963385477105077735447194811,'2039-12-22 22:31:39':-59227773536703059515222628111999932330} │ a7b2:8f58:4d07:6707:4189:80cf:92f5:902d │ 1950-07-14 │ +│ -159940486888657488786004075627859832441 │ 629206527868163085099.8195700356331771569105231840157308480121506729741348442 │ -53203761250367440823323469081755775164053964440214841464405368882783634063735 │ 2187136525 │ 94881662451116595672491944222189810087991610568040618106057495823910493624275 │ 1.3095786748458954e-104 │ {} │ a051:e3da:2e0a:c69:7835:aed6:e8b:3817 │ 1943-03-25 │ +│ -5239084224358020595591895205940528518 │ -529937657954363597180.1709207212648004850138812370209091520162977548101577846 │ 47490343304582536176125359129223180987770215457970451211489086575421345731671 │ 1637451978 │ 101899445785010192893461828129714741298630410942962837910400961787305271699002 │ 2.4344456058391296e223 │ {'2013-12-22 17:42:43':80271108282641375975566414544777036006,'2041-03-08 10:28:17':169706054082247533128707458270535852845,'1986-08-31 23:07:38':-54371542820364299444195390357730624136,'2094-04-23 21:26:50':7944954483303909347454597499139023465} │ 1293:a726:e899:9bfc:8c6f:2aa1:22c9:b635 │ 1924-11-20 │ +└──────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────┴─────────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────────────────────────────┴────────────┘ +``` + +**Note:** `generateRandom(generateRandomStructure(), [random seed], max_string_length, max_array_length)` with large enough `max_array_length` can generate really huge output due to possible big nesting depth (up to 16) of complex types (`Array`, `Tuple`, `Map`, `Nested`). + ## Related content - Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) diff --git a/docs/en/sql-reference/table-functions/urlCluster.md b/docs/en/sql-reference/table-functions/urlCluster.md index 8f19632c433..07d3f4a7362 100644 --- a/docs/en/sql-reference/table-functions/urlCluster.md +++ b/docs/en/sql-reference/table-functions/urlCluster.md @@ -6,7 +6,7 @@ sidebar_label: urlCluster # urlCluster Table Function -Allows processing files from URL in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in URL file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. +Allows processing files from URL in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisk in URL file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. **Syntax** @@ -59,4 +59,4 @@ Character `|` inside patterns is used to specify failover addresses. They are it **See Also** - [HDFS engine](../../engines/table-engines/special/url.md) -- [URL table function](../../sql-reference/table-functions/url.md) \ No newline at end of file +- [URL table function](../../sql-reference/table-functions/url.md) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 7ee2102c14d..a8f494a5afc 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -21,7 +21,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | `lag/lead(value, offset)` | Not supported. Workarounds: | | | 1) replace with `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead` | | | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | -| ntile(buckets) | Supported. Specify window like, (partition by x order by y rows between unbounded preceding and unounded following). | +| ntile(buckets) | Supported. Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | ## ClickHouse-specific Window Functions @@ -39,7 +39,7 @@ The computed value is the following for each row: The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097). -All GitHub issues related to window funtions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag. +All GitHub issues related to window functions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag. ### Tests diff --git a/docs/ru/faq/integration/json-import.md b/docs/ru/faq/integration/json-import.md index bc65b5a614a..a3c89aed429 100644 --- a/docs/ru/faq/integration/json-import.md +++ b/docs/ru/faq/integration/json-import.md @@ -19,7 +19,7 @@ $ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test При помощи [интефейса CLI](../../interfaces/cli.md): ``` bash -$ echo '{"foo":"bar"}' | clickhouse-client ---query="INSERT INTO test FORMAT JSONEachRow" +$ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSONEachRow" ``` Чтобы не вставлять данные вручную, используйте одну из [готовых библиотек](../../interfaces/index.md). @@ -31,4 +31,4 @@ $ echo '{"foo":"bar"}' | clickhouse-client ---query="INSERT INTO test FORMAT JS :::note "Примечание" В HTTP-интерфейсе настройки передаются через параметры `GET` запроса, в `CLI` interface — как дополнительные аргументы командной строки, начинающиеся с `--`. - ::: \ No newline at end of file + ::: diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 47017a94cb5..c00d1f5349f 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -55,6 +55,8 @@ option (ENABLE_CLICKHOUSE_KEEPER "ClickHouse alternative to ZooKeeper" ${ENABLE_ option (ENABLE_CLICKHOUSE_KEEPER_CONVERTER "Util allows to convert ZooKeeper logs and snapshots into clickhouse-keeper snapshot" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_KEEPER_CLIENT "ClickHouse Keeper Client" ${ENABLE_CLICKHOUSE_ALL}) + option (ENABLE_CLICKHOUSE_SU "A tool similar to 'su'" ${ENABLE_CLICKHOUSE_ALL}) option (ENABLE_CLICKHOUSE_DISKS "A tool to manage disks" ${ENABLE_CLICKHOUSE_ALL}) @@ -169,6 +171,13 @@ else() message(STATUS "ClickHouse keeper-converter mode: OFF") endif() +if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) + message(STATUS "ClickHouse keeper-client mode: ON") +else() + message(STATUS "ClickHouse keeper-client mode: OFF") +endif() + + if (ENABLE_CLICKHOUSE_DISKS) message(STATUS "Clickhouse disks mode: ON") else() @@ -237,6 +246,10 @@ if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) add_subdirectory (keeper-converter) endif() +if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) + add_subdirectory (keeper-client) +endif() + if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) add_subdirectory (odbc-bridge) endif () @@ -301,6 +314,9 @@ endif() if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) clickhouse_target_link_split_lib(clickhouse keeper-converter) endif() +if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) + clickhouse_target_link_split_lib(clickhouse keeper-client) +endif() if (ENABLE_CLICKHOUSE_INSTALL) clickhouse_target_link_split_lib(clickhouse install) endif () @@ -392,6 +408,11 @@ if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-converter" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter) endif () +if (ENABLE_CLICKHOUSE_KEEPER_CLIENT) + add_custom_target (clickhouse-keeper-client ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper-client DEPENDS clickhouse) + install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper-client" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-client) +endif () if (ENABLE_CLICKHOUSE_DISKS) add_custom_target (clickhouse-disks ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-disks DEPENDS clickhouse) install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-disks" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/programs/config_tools.h.in b/programs/config_tools.h.in index 30444e8c84e..65ef3ca762b 100644 --- a/programs/config_tools.h.in +++ b/programs/config_tools.h.in @@ -17,6 +17,7 @@ #cmakedefine01 ENABLE_CLICKHOUSE_ODBC_BRIDGE #cmakedefine01 ENABLE_CLICKHOUSE_LIBRARY_BRIDGE #cmakedefine01 ENABLE_CLICKHOUSE_KEEPER +#cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CLIENT #cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CONVERTER #cmakedefine01 ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER #cmakedefine01 ENABLE_CLICKHOUSE_SU diff --git a/programs/keeper-client/CMakeLists.txt b/programs/keeper-client/CMakeLists.txt new file mode 100644 index 00000000000..f54892fe559 --- /dev/null +++ b/programs/keeper-client/CMakeLists.txt @@ -0,0 +1,9 @@ +set (CLICKHOUSE_KEEPER_CLIENT_SOURCES KeeperClient.cpp Parser.cpp Commands.cpp) + +set (CLICKHOUSE_KEEPER_CLIENT_LINK + PRIVATE + boost::program_options + dbms +) + +clickhouse_program_add(keeper-client) diff --git a/programs/keeper-client/Commands.cpp b/programs/keeper-client/Commands.cpp new file mode 100644 index 00000000000..05928a0d20b --- /dev/null +++ b/programs/keeper-client/Commands.cpp @@ -0,0 +1,196 @@ + +#include "Commands.h" +#include "KeeperClient.h" + + +namespace DB +{ + +bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String arg; + if (!parseKeeperPath(pos, expected, arg)) + return true; + + node->args.push_back(std::move(arg)); + return true; +} + +void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + String path; + if (!query->args.empty()) + path = client->getAbsolutePath(query->args[0].safeGet()); + else + path = client->cwd; + + for (const auto & child : client->zookeeper->getChildren(path)) + std::cout << child << " "; + std::cout << "\n"; +} + +bool CDCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String arg; + if (!parseKeeperPath(pos, expected, arg)) + return true; + + node->args.push_back(std::move(arg)); + return true; +} + +void CDCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + if (query->args.empty()) + return; + + auto new_path = client->getAbsolutePath(query->args[0].safeGet()); + if (!client->zookeeper->exists(new_path)) + std::cerr << "Path " << new_path << " does not exists\n"; + else + client->cwd = new_path; +} + +bool SetCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String arg; + if (!parseKeeperPath(pos, expected, arg)) + return false; + node->args.push_back(std::move(arg)); + + if (!parseKeeperArg(pos, expected, arg)) + return false; + node->args.push_back(std::move(arg)); + + ASTPtr version; + if (ParserNumber{}.parse(pos, version, expected)) + node->args.push_back(version->as().value); + + return true; +} + +void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + if (query->args.size() == 2) + client->zookeeper->set(client->getAbsolutePath(query->args[0].safeGet()), query->args[1].safeGet()); + else + client->zookeeper->set( + client->getAbsolutePath(query->args[0].safeGet()), + query->args[1].safeGet(), + static_cast(query->args[2].safeGet())); +} + +bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String arg; + if (!parseKeeperPath(pos, expected, arg)) + return false; + node->args.push_back(std::move(arg)); + + if (!parseKeeperArg(pos, expected, arg)) + return false; + node->args.push_back(std::move(arg)); + + int mode = zkutil::CreateMode::Persistent; + + if (ParserKeyword{"PERSISTENT"}.ignore(pos, expected)) + mode = zkutil::CreateMode::Persistent; + else if (ParserKeyword{"EPHEMERAL"}.ignore(pos, expected)) + mode = zkutil::CreateMode::Ephemeral; + else if (ParserKeyword{"EPHEMERAL SEQUENTIAL"}.ignore(pos, expected)) + mode = zkutil::CreateMode::EphemeralSequential; + else if (ParserKeyword{"PERSISTENT SEQUENTIAL"}.ignore(pos, expected)) + mode = zkutil::CreateMode::PersistentSequential; + + node->args.push_back(mode); + + return true; +} + +void CreateCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + client->zookeeper->create( + client->getAbsolutePath(query->args[0].safeGet()), + query->args[1].safeGet(), + static_cast(query->args[2].safeGet())); +} + +bool GetCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String arg; + if (!parseKeeperPath(pos, expected, arg)) + return false; + node->args.push_back(std::move(arg)); + + return true; +} + +void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet())) << "\n"; +} + +bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String arg; + if (!parseKeeperPath(pos, expected, arg)) + return false; + node->args.push_back(std::move(arg)); + + return true; +} + +void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet())); +} + +bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + String arg; + if (!parseKeeperPath(pos, expected, arg)) + return false; + node->args.push_back(std::move(arg)); + + return true; +} + +void RMRCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + String path = client->getAbsolutePath(query->args[0].safeGet()); + client->askConfirmation("You are going to recursively delete path " + path, + [client, path]{ client->zookeeper->removeRecursive(path); }); +} + +bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptr & /* node */, Expected & /* expected */) const +{ + return true; +} + +void HelpCommand::execute(const ASTKeeperQuery * /* query */, KeeperClient * /* client */) const +{ + for (const auto & pair : KeeperClient::commands) + std::cout << pair.second->getHelpMessage() << "\n"; +} + +bool FourLetterWordCommand::parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const +{ + expected.add(pos, "four-letter-word command"); + if (pos->type != TokenType::BareWord) + return false; + + String cmd(pos->begin, pos->end); + if (cmd.size() != 4) + return false; + + ++pos; + node->args.push_back(std::move(cmd)); + return true; +} + +void FourLetterWordCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const +{ + std::cout << client->executeFourLetterCommand(query->args[0].safeGet()) << "\n"; +} + +} diff --git a/programs/keeper-client/Commands.h b/programs/keeper-client/Commands.h new file mode 100644 index 00000000000..e4debd53e42 --- /dev/null +++ b/programs/keeper-client/Commands.h @@ -0,0 +1,131 @@ +#pragma once + +#include "Parser.h" + +namespace DB +{ + +class KeeperClient; + +class IKeeperClientCommand +{ +public: + static const String name; + + virtual bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const = 0; + + virtual void execute(const ASTKeeperQuery * query, KeeperClient * client) const = 0; + + virtual String getHelpMessage() const = 0; + + virtual String getName() const = 0; + + virtual ~IKeeperClientCommand() = default; +}; + +using Command = std::shared_ptr; + + +class LSCommand : public IKeeperClientCommand +{ + String getName() const override { return "ls"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "ls [path] -- Lists the nodes for the given path (default: cwd)"; } +}; + +class CDCommand : public IKeeperClientCommand +{ + String getName() const override { return "cd"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "cd [path] -- Change the working path (default `.`)"; } +}; + +class SetCommand : public IKeeperClientCommand +{ + String getName() const override { return "set"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override + { + return "set [version] -- Updates the node's value. Only update if version matches (default: -1)"; + } +}; + +class CreateCommand : public IKeeperClientCommand +{ + String getName() const override { return "create"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "create -- Creates new node"; } +}; + +class GetCommand : public IKeeperClientCommand +{ + String getName() const override { return "get"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "get -- Returns the node's value"; } +}; + +class RMCommand : public IKeeperClientCommand +{ + String getName() const override { return "rm"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "remove -- Remove the node"; } +}; + +class RMRCommand : public IKeeperClientCommand +{ + String getName() const override { return "rmr"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "rmr -- Recursively deletes path. Confirmation required"; } +}; + +class HelpCommand : public IKeeperClientCommand +{ + String getName() const override { return "help"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "help -- Prints this message"; } +}; + +class FourLetterWordCommand : public IKeeperClientCommand +{ + String getName() const override { return "flwc"; } + + bool parse(IParser::Pos & pos, std::shared_ptr & node, Expected & expected) const override; + + void execute(const ASTKeeperQuery * query, KeeperClient * client) const override; + + String getHelpMessage() const override { return "flwc -- Executes four-letter-word command"; } +}; + +} diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp new file mode 100644 index 00000000000..f38da1b72aa --- /dev/null +++ b/programs/keeper-client/KeeperClient.cpp @@ -0,0 +1,343 @@ +#include "KeeperClient.h" +#include "Commands.h" +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +String KeeperClient::executeFourLetterCommand(const String & command) +{ + /// We need to create a new socket every time because ZooKeeper forcefully shuts down the connection after a four-letter-word command. + Poco::Net::StreamSocket socket; + socket.connect(Poco::Net::SocketAddress{zk_args.hosts[0]}, zk_args.connection_timeout_ms * 1000); + + socket.setReceiveTimeout(zk_args.operation_timeout_ms * 1000); + socket.setSendTimeout(zk_args.operation_timeout_ms * 1000); + socket.setNoDelay(true); + + ReadBufferFromPocoSocket in(socket); + WriteBufferFromPocoSocket out(socket); + + out.write(command.data(), command.size()); + out.next(); + + String result; + readStringUntilEOF(result, in); + in.next(); + return result; +} + +std::vector KeeperClient::getCompletions(const String & prefix) const +{ + Tokens tokens(prefix.data(), prefix.data() + prefix.size(), 0, false); + IParser::Pos pos(tokens, 0); + + if (pos->type != TokenType::BareWord) + return registered_commands_and_four_letter_words; + + ++pos; + if (pos->isEnd()) + return registered_commands_and_four_letter_words; + + ParserToken{TokenType::Whitespace}.ignore(pos); + + std::vector result; + String string_path; + Expected expected; + if (!parseKeeperPath(pos, expected, string_path)) + string_path = cwd; + + if (!pos->isEnd()) + return result; + + fs::path path = string_path; + String parent_path; + if (string_path.ends_with("/")) + parent_path = getAbsolutePath(string_path); + else + parent_path = getAbsolutePath(path.parent_path()); + + try + { + for (const auto & child : zookeeper->getChildren(parent_path)) + result.push_back(child); + } + catch (Coordination::Exception &) {} + + std::sort(result.begin(), result.end()); + + return result; +} + +void KeeperClient::askConfirmation(const String & prompt, std::function && callback) +{ + std::cout << prompt << " Continue?\n"; + need_confirmation = true; + confirmation_callback = callback; +} + +fs::path KeeperClient::getAbsolutePath(const String & relative) const +{ + String result; + if (relative.starts_with('/')) + result = fs::weakly_canonical(relative); + else + result = fs::weakly_canonical(cwd / relative); + + if (result.ends_with('/') && result.size() > 1) + result.pop_back(); + + return result; +} + +void KeeperClient::loadCommands(std::vector && new_commands) +{ + for (const auto & command : new_commands) + { + String name = command->getName(); + commands.insert({name, command}); + registered_commands_and_four_letter_words.push_back(std::move(name)); + } + + for (const auto & command : four_letter_word_commands) + registered_commands_and_four_letter_words.push_back(command); + + std::sort(registered_commands_and_four_letter_words.begin(), registered_commands_and_four_letter_words.end()); +} + +void KeeperClient::defineOptions(Poco::Util::OptionSet & options) +{ + Poco::Util::Application::defineOptions(options); + + options.addOption( + Poco::Util::Option("help", "", "show help and exit") + .binding("help")); + + options.addOption( + Poco::Util::Option("host", "h", "server hostname. default `localhost`") + .argument("host") + .binding("host")); + + options.addOption( + Poco::Util::Option("port", "p", "server port. default `2181`") + .argument("port") + .binding("port")); + + options.addOption( + Poco::Util::Option("query", "q", "will execute given query, then exit.") + .argument("query") + .binding("query")); + + options.addOption( + Poco::Util::Option("connection-timeout", "", "set connection timeout in seconds. default 10s.") + .argument("connection-timeout") + .binding("connection-timeout")); + + options.addOption( + Poco::Util::Option("session-timeout", "", "set session timeout in seconds. default 10s.") + .argument("session-timeout") + .binding("session-timeout")); + + options.addOption( + Poco::Util::Option("operation-timeout", "", "set operation timeout in seconds. default 10s.") + .argument("operation-timeout") + .binding("operation-timeout")); + + options.addOption( + Poco::Util::Option("history-file", "", "set path of history file. default `~/.keeper-client-history`") + .argument("history-file") + .binding("history-file")); + + options.addOption( + Poco::Util::Option("log-level", "", "set log level") + .argument("log-level") + .binding("log-level")); +} + +void KeeperClient::initialize(Poco::Util::Application & /* self */) +{ + suggest.setCompletionsCallback( + [&](const String & prefix, size_t /* prefix_length */) { return getCompletions(prefix); }); + + loadCommands({ + std::make_shared(), + std::make_shared(), + std::make_shared(), + std::make_shared(), + std::make_shared(), + std::make_shared(), + std::make_shared(), + std::make_shared(), + std::make_shared(), + }); + + String home_path; + const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) + if (home_path_cstr) + home_path = home_path_cstr; + + if (config().has("history-file")) + history_file = config().getString("history-file"); + else + history_file = home_path + "/.keeper-client-history"; + + if (!history_file.empty() && !fs::exists(history_file)) + { + try + { + FS::createFile(history_file); + } + catch (const ErrnoException & e) + { + if (e.getErrno() != EEXIST) + throw; + } + } + + Poco::Logger::root().setLevel(config().getString("log-level", "error")); + + EventNotifier::init(); +} + +void KeeperClient::executeQuery(const String & query) +{ + std::vector queries; + boost::algorithm::split(queries, query, boost::is_any_of(";")); + + for (const auto & query_text : queries) + { + if (!query_text.empty()) + processQueryText(query_text); + } +} + +bool KeeperClient::processQueryText(const String & text) +{ + if (exit_strings.find(text) != exit_strings.end()) + return false; + + try + { + if (need_confirmation) + { + need_confirmation = false; + if (text.size() == 1 && (text == "y" || text == "Y")) + confirmation_callback(); + return true; + } + + KeeperParser parser; + String message; + const char * begin = text.data(); + ASTPtr res = tryParseQuery(parser, begin, begin + text.size(), message, true, "", false, 0, 0, false); + + if (!res) + { + std::cerr << message << "\n"; + return true; + } + + auto * query = res->as(); + + auto command = KeeperClient::commands.find(query->command); + command->second->execute(query, this); + } + catch (Coordination::Exception & err) + { + std::cerr << err.message() << "\n"; + } + return true; +} + +void KeeperClient::runInteractive() +{ + + LineReader::Patterns query_extenders = {"\\"}; + LineReader::Patterns query_delimiters = {}; + + ReplxxLineReader lr(suggest, history_file, false, query_extenders, query_delimiters, {}); + lr.enableBracketedPaste(); + + while (true) + { + String prompt; + if (need_confirmation) + prompt = "[y/n] "; + else + prompt = cwd.string() + " :) "; + + auto input = lr.readLine(prompt, ":-] "); + if (input.empty()) + break; + + if (!processQueryText(input)) + break; + } +} + +int KeeperClient::main(const std::vector & /* args */) +{ + if (config().hasOption("help")) + { + Poco::Util::HelpFormatter help_formatter(KeeperClient::options()); + auto header_str = fmt::format("{} [OPTION]\n", commandName()); + help_formatter.setHeader(header_str); + help_formatter.format(std::cout); + return 0; + } + + auto host = config().getString("host", "localhost"); + auto port = config().getString("port", "2181"); + zk_args.hosts = {host + ":" + port}; + zk_args.connection_timeout_ms = config().getInt("connection-timeout", 10) * 1000; + zk_args.session_timeout_ms = config().getInt("session-timeout", 10) * 1000; + zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000; + zookeeper = std::make_unique(zk_args); + + if (config().has("query")) + executeQuery(config().getString("query")); + else + runInteractive(); + + return 0; +} + +} + + +int mainEntryClickHouseKeeperClient(int argc, char ** argv) +{ + try + { + DB::KeeperClient client; + client.init(argc, argv); + return client.run(); + } + catch (const DB::Exception & e) + { + std::cerr << DB::getExceptionMessage(e, false) << std::endl; + return 1; + } + catch (const boost::program_options::error & e) + { + std::cerr << "Bad arguments: " << e.what() << std::endl; + return DB::ErrorCodes::BAD_ARGUMENTS; + } + catch (...) + { + std::cerr << DB::getCurrentExceptionMessage(true) << std::endl; + return 1; + } +} diff --git a/programs/keeper-client/KeeperClient.h b/programs/keeper-client/KeeperClient.h new file mode 100644 index 00000000000..e7fa5cdc5fe --- /dev/null +++ b/programs/keeper-client/KeeperClient.h @@ -0,0 +1,69 @@ +#pragma once + +#include "Parser.h" +#include "Commands.h" +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace fs = std::filesystem; + +namespace DB +{ + +static const NameSet four_letter_word_commands + { + "ruok", "mntr", "srvr", "stat", "srst", "conf", + "cons", "crst", "envi", "dirs", "isro", "wchs", + "wchc", "wchp", "dump", "csnp", "lgif", "rqld", + }; + +class KeeperClient: public Poco::Util::Application +{ +public: + KeeperClient() = default; + + void initialize(Poco::Util::Application & self) override; + + int main(const std::vector & args) override; + + void defineOptions(Poco::Util::OptionSet & options) override; + + fs::path getAbsolutePath(const String & relative) const; + + void askConfirmation(const String & prompt, std::function && callback); + + String executeFourLetterCommand(const String & command); + + zkutil::ZooKeeperPtr zookeeper; + std::filesystem::path cwd = "/"; + std::function confirmation_callback; + + inline static std::map commands; + +protected: + void runInteractive(); + bool processQueryText(const String & text); + void executeQuery(const String & query); + + void loadCommands(std::vector && new_commands); + + std::vector getCompletions(const String & prefix) const; + + String history_file; + LineReader::Suggest suggest; + + zkutil::ZooKeeperArgs zk_args; + + bool need_confirmation = false; + + std::vector registered_commands_and_four_letter_words; +}; + +} diff --git a/programs/keeper-client/Parser.cpp b/programs/keeper-client/Parser.cpp new file mode 100644 index 00000000000..0f3fc39704e --- /dev/null +++ b/programs/keeper-client/Parser.cpp @@ -0,0 +1,94 @@ +#include "Parser.h" +#include "KeeperClient.h" + + +namespace DB +{ + +bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result) +{ + expected.add(pos, getTokenName(TokenType::BareWord)); + + if (pos->type == TokenType::BareWord) + { + result = String(pos->begin, pos->end); + ++pos; + ParserToken{TokenType::Whitespace}.ignore(pos); + return true; + } + + bool status = parseIdentifierOrStringLiteral(pos, expected, result); + ParserToken{TokenType::Whitespace}.ignore(pos); + return status; +} + +bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path) +{ + expected.add(pos, "path"); + + if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral) + return parseIdentifierOrStringLiteral(pos, expected, path); + + String result; + while (pos->type == TokenType::BareWord || pos->type == TokenType::Slash || pos->type == TokenType::Dot) + { + result.append(pos->begin, pos->end); + ++pos; + } + ParserToken{TokenType::Whitespace}.ignore(pos); + + if (result.empty()) + return false; + + path = result; + return true; +} + +bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) +{ + auto query = std::make_shared(); + + for (const auto & pair : KeeperClient::commands) + expected.add(pos, pair.first.data()); + + for (const auto & flwc : four_letter_word_commands) + expected.add(pos, flwc.data()); + + if (pos->type != TokenType::BareWord) + return false; + + String command_name(pos->begin, pos->end); + Command command; + + auto iter = KeeperClient::commands.find(command_name); + if (iter == KeeperClient::commands.end()) + { + if (command_name.size() == 4) + { + /// Treat it like four-letter command + /// Since keeper server can potentially have different version we don't want to match this command with embedded list + command = std::make_shared(); + command_name = command->getName(); + /// We also don't move the position, so the command will be parsed as an argument + } + else + return false; + } + else + { + command = iter->second; + ++pos; + ParserToken{TokenType::Whitespace}.ignore(pos); + } + + query->command = command_name; + if (!command->parse(pos, query, expected)) + return false; + + ParserToken{TokenType::Whitespace}.ignore(pos); + + node = query; + return true; +} + +} diff --git a/programs/keeper-client/Parser.h b/programs/keeper-client/Parser.h new file mode 100644 index 00000000000..57ee6ce4a18 --- /dev/null +++ b/programs/keeper-client/Parser.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include +#include +#include +#include + + +namespace DB +{ + +bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result); + +bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path); + + +class ASTKeeperQuery : public IAST +{ +public: + String getID(char) const override { return "KeeperQuery"; } + ASTPtr clone() const override { return std::make_shared(*this); } + + String command; + std::vector args; +}; + +class KeeperParser : public IParserBase +{ +protected: + const char * getName() const override { return "Keeper client query"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 944a60d4e4c..96c1ca261b5 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -138,7 +138,7 @@ void LocalServer::initialize(Poco::Util::Application & self) OutdatedPartsLoadingThreadPool::initialize( config().getUInt("max_outdated_parts_loading_thread_pool_size", 16), 0, // We don't need any threads one all the parts will be loaded - config().getUInt("outdated_part_loading_thread_pool_queue_size", 10000)); + config().getUInt("max_outdated_parts_loading_thread_pool_size", 16)); } diff --git a/programs/main.cpp b/programs/main.cpp index 83e64b8c932..9a3ad47a86e 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -62,6 +62,9 @@ int mainEntryClickHouseKeeper(int argc, char ** argv); #if ENABLE_CLICKHOUSE_KEEPER_CONVERTER int mainEntryClickHouseKeeperConverter(int argc, char ** argv); #endif +#if ENABLE_CLICKHOUSE_KEEPER_CLIENT +int mainEntryClickHouseKeeperClient(int argc, char ** argv); +#endif #if ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv); #endif @@ -133,6 +136,9 @@ std::pair clickhouse_applications[] = #if ENABLE_CLICKHOUSE_KEEPER_CONVERTER {"keeper-converter", mainEntryClickHouseKeeperConverter}, #endif +#if ENABLE_CLICKHOUSE_KEEPER_CLIENT + {"keeper-client", mainEntryClickHouseKeeperClient}, +#endif #if ENABLE_CLICKHOUSE_INSTALL {"install", mainEntryClickHouseInstall}, {"start", mainEntryClickHouseStart}, diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 03ada89e86f..9eb3e6c9ebc 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -696,7 +696,7 @@ try OutdatedPartsLoadingThreadPool::initialize( server_settings.max_outdated_parts_loading_thread_pool_size, 0, // We don't need any threads one all the parts will be loaded - server_settings.outdated_part_loading_thread_pool_queue_size); + server_settings.max_outdated_parts_loading_thread_pool_size); /// Initialize global local cache for remote filesystem. if (config().has("local_cache_for_remote_fs")) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index faddfe4e323..1de047b634f 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -90,14 +90,6 @@ namespace CurrentMetrics namespace DB { -static const NameSet exit_strings -{ - "exit", "quit", "logout", "учше", "йгше", "дщпщге", - "exit;", "quit;", "logout;", "учшеж", "йгшеж", "дщпщгеж", - "q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй" -}; - - namespace ErrorCodes { extern const int BAD_ARGUMENTS; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 8d56c7eb051..ee4567a0922 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -24,6 +24,14 @@ namespace po = boost::program_options; namespace DB { + +static const NameSet exit_strings +{ + "exit", "quit", "logout", "учше", "йгше", "дщпщге", + "exit;", "quit;", "logout;", "учшеж", "йгшеж", "дщпщгеж", + "q", "й", "\\q", "\\Q", "\\й", "\\Й", ":q", "Жй" +}; + namespace ErrorCodes { extern const int NOT_IMPLEMENTED; diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index e328d0c4e43..b579645dcc9 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -190,7 +190,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts) connected = true; sendHello(); - receiveHello(); + receiveHello(timeouts.handshake_timeout); if (server_revision >= DBMS_MIN_PROTOCOL_VERSION_WITH_ADDENDUM) sendAddendum(); @@ -321,8 +321,10 @@ void Connection::sendAddendum() } -void Connection::receiveHello() +void Connection::receiveHello(const Poco::Timespan & handshake_timeout) { + TimeoutSetter timeout_setter(*socket, socket->getSendTimeout(), handshake_timeout); + /// Receive hello packet. UInt64 packet_type = 0; @@ -375,6 +377,10 @@ void Connection::receiveHello() receiveException()->rethrow(); else { + /// Reset timeout_setter before disconnect, + /// because after disconnect socket will be invalid. + timeout_setter.reset(); + /// Close connection, to not stay in unsynchronised state. disconnect(); throwUnexpectedPacket(packet_type, "Hello or Exception"); diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 77dbe5e3398..cb3f2507cb9 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -256,7 +256,7 @@ private: void connect(const ConnectionTimeouts & timeouts); void sendHello(); void sendAddendum(); - void receiveHello(); + void receiveHello(const Poco::Timespan & handshake_timeout); #if USE_SSL void sendClusterNameAndSalt(); diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 2031036eb58..c47d217d432 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -67,7 +67,8 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati Poco::Timespan(config.getInt("connect_timeout", DBMS_DEFAULT_CONNECT_TIMEOUT_SEC), 0), Poco::Timespan(config.getInt("send_timeout", DBMS_DEFAULT_SEND_TIMEOUT_SEC), 0), Poco::Timespan(config.getInt("receive_timeout", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), 0), - Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0)); + Poco::Timespan(config.getInt("tcp_keep_alive_timeout", 0), 0), + Poco::Timespan(config.getInt("handshake_timeout_ms", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC * 1000), 0)); timeouts.sync_request_timeout = Poco::Timespan(config.getInt("sync_request_timeout", DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC), 0); } diff --git a/src/Client/LineReader.cpp b/src/Client/LineReader.cpp index 04b387c9f7d..82dbe03e5d3 100644 --- a/src/Client/LineReader.cpp +++ b/src/Client/LineReader.cpp @@ -81,18 +81,36 @@ replxx::Replxx::completions_t LineReader::Suggest::getCompletions(const String & std::lock_guard lock(mutex); + Words to_search; + bool no_case = false; /// Only perform case sensitive completion when the prefix string contains any uppercase characters if (std::none_of(prefix.begin(), prefix.end(), [](char32_t x) { return iswupper(static_cast(x)); })) + { + to_search = words_no_case; + no_case = true; + } + else + to_search = words; + + if (custom_completions_callback) + { + auto new_words = custom_completions_callback(prefix, prefix_length); + assert(std::is_sorted(new_words.begin(), new_words.end())); + addNewWords(to_search, new_words, std::less{}); + } + + if (no_case) range = std::equal_range( - words_no_case.begin(), words_no_case.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched) + to_search.begin(), to_search.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched) { return strncasecmp(s.data(), prefix_searched.data(), prefix_length) < 0; }); else - range = std::equal_range(words.begin(), words.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched) - { - return strncmp(s.data(), prefix_searched.data(), prefix_length) < 0; - }); + range = std::equal_range( + to_search.begin(), to_search.end(), last_word, [prefix_length](std::string_view s, std::string_view prefix_searched) + { + return strncmp(s.data(), prefix_searched.data(), prefix_length) < 0; + }); return replxx::Replxx::completions_t(range.first, range.second); } diff --git a/src/Client/LineReader.h b/src/Client/LineReader.h index df64a3a85a9..ff297dd03b4 100644 --- a/src/Client/LineReader.h +++ b/src/Client/LineReader.h @@ -18,15 +18,20 @@ public: struct Suggest { using Words = std::vector; + using Callback = std::function; /// Get vector for the matched range of words if any. replxx::Replxx::completions_t getCompletions(const String & prefix, size_t prefix_length); void addWords(Words && new_words); + void setCompletionsCallback(Callback && callback) { custom_completions_callback = callback; } + private: Words words TSA_GUARDED_BY(mutex); Words words_no_case TSA_GUARDED_BY(mutex); + Callback custom_completions_callback = nullptr; + std::mutex mutex; }; diff --git a/src/Common/OpenTelemetryTraceContext.cpp b/src/Common/OpenTelemetryTraceContext.cpp index a6af27ec32b..3afc96816bd 100644 --- a/src/Common/OpenTelemetryTraceContext.cpp +++ b/src/Common/OpenTelemetryTraceContext.cpp @@ -124,7 +124,7 @@ SpanHolder::SpanHolder(std::string_view _operation_name, SpanKind _kind) this->start_time_us = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); - /// Add new initialization here + this->addAttribute("clickhouse.thread_id", getThreadId()); } catch (...) { diff --git a/src/Common/TaskStatsInfoGetter.cpp b/src/Common/TaskStatsInfoGetter.cpp index 25030ee9670..867a50c8cce 100644 --- a/src/Common/TaskStatsInfoGetter.cpp +++ b/src/Common/TaskStatsInfoGetter.cpp @@ -9,6 +9,7 @@ #include "hasLinuxCapability.h" #include +#include #include #include @@ -202,10 +203,12 @@ bool checkPermissionsImpl() /// Check that we can successfully initialize TaskStatsInfoGetter. /// It will ask about family id through Netlink. /// On some LXC containers we have capability but we still cannot use Netlink. + /// There is an evidence that Linux fedora-riscv 6.1.22 gives something strange instead of the expected result. try { - TaskStatsInfoGetter(); + ::taskstats stats{}; + TaskStatsInfoGetter().getStat(stats, static_cast(getThreadId())); } catch (const Exception & e) { diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 600dfc56d2b..061959d9f1f 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -37,7 +37,6 @@ class QueryThreadLog; class TasksStatsCounters; struct RUsageCounters; struct PerfEventsCounters; -class TaskStatsInfoGetter; class InternalTextLogsQueue; struct ViewRuntimeData; class QueryViewsLog; diff --git a/src/Coordination/KeeperStateMachine.cpp b/src/Coordination/KeeperStateMachine.cpp index 6635c74149a..f787cc8722e 100644 --- a/src/Coordination/KeeperStateMachine.cpp +++ b/src/Coordination/KeeperStateMachine.cpp @@ -272,7 +272,8 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req } catch (...) { - rollbackRequest(request_for_session, true); + tryLogCurrentException(__PRETTY_FUNCTION__); + rollbackRequestNoLock(request_for_session, true); throw; } @@ -411,6 +412,14 @@ void KeeperStateMachine::rollbackRequest(const KeeperStorage::RequestForSession storage->rollbackRequest(request_for_session.zxid, allow_missing); } +void KeeperStateMachine::rollbackRequestNoLock(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing) +{ + if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) + return; + + storage->rollbackRequest(request_for_session.zxid, allow_missing); +} + nuraft::ptr KeeperStateMachine::last_snapshot() { /// Just return the latest snapshot. diff --git a/src/Coordination/KeeperStateMachine.h b/src/Coordination/KeeperStateMachine.h index fbd97fd8631..f6d81d23056 100644 --- a/src/Coordination/KeeperStateMachine.h +++ b/src/Coordination/KeeperStateMachine.h @@ -68,6 +68,8 @@ public: // (can happen in case of exception during preprocessing) void rollbackRequest(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing); + void rollbackRequestNoLock(const KeeperStorage::RequestForSession & request_for_session, bool allow_missing); + uint64_t last_commit_index() override { return last_committed_idx; } /// Apply preliminarily saved (save_logical_snp_obj) snapshot to our state. diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 511b8d68f6d..cb43d62ecd1 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -22,7 +22,6 @@ namespace DB M(UInt64, max_io_thread_pool_free_size, 0, "Max free size for IO thread pool.", 0) \ M(UInt64, io_thread_pool_queue_size, 10000, "Queue size for IO thread pool.", 0) \ M(UInt64, max_outdated_parts_loading_thread_pool_size, 32, "The maximum number of threads that would be used for loading outdated data parts on startup", 0) \ - M(UInt64, outdated_part_loading_thread_pool_queue_size, 10000, "Queue size for parts loading thread pool.", 0) \ M(UInt64, max_replicated_fetches_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated fetches. Zero means unlimited.", 0) \ M(UInt64, max_replicated_sends_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for replicated sends. Zero means unlimited.", 0) \ M(UInt64, max_remote_read_network_bandwidth_for_server, 0, "The maximum speed of data exchange over the network in bytes per second for read. Zero means unlimited.", 0) \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 607be1522db..db65cd0f178 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -55,6 +55,7 @@ class IColumn; M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "The maximum number of bytes of a query string parsed by the SQL parser. Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction.", 0) \ M(UInt64, interactive_delay, 100000, "The interval in microseconds to check if the request is cancelled, and to send progress info.", 0) \ M(Seconds, connect_timeout, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, "Connection timeout if there are no replicas.", 0) \ + M(Milliseconds, handshake_timeout_ms, 10000, "Timeout for receiving HELLO packet from replicas.", 0) \ M(Milliseconds, connect_timeout_with_failover_ms, 1000, "Connection timeout for selecting first healthy replica.", 0) \ M(Milliseconds, connect_timeout_with_failover_secure_ms, 1000, "Connection timeout for selecting first healthy replica (for secure connections).", 0) \ M(Seconds, receive_timeout, DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, "Timeout for receiving data from network, in seconds. If no bytes were received in this interval, exception is thrown. If you set this setting on client, the 'send_timeout' for the socket will be also set on the corresponding connection end on the server.", 0) \ @@ -135,7 +136,7 @@ class IColumn; M(Bool, allow_suspicious_indices, false, "Reject primary/secondary indexes and sorting keys with identical expressions", 0) \ M(Bool, compile_expressions, true, "Compile some scalar functions and operators to native code.", 0) \ M(UInt64, min_count_to_compile_expression, 3, "The number of identical expressions before they are JIT-compiled", 0) \ - M(Bool, compile_aggregate_expressions, false, "Compile aggregate functions to native code. This feature has a bug and should not be used.", 0) \ + M(Bool, compile_aggregate_expressions, true, "Compile aggregate functions to native code. This feature has a bug and should not be used.", 0) \ M(UInt64, min_count_to_compile_aggregate_expression, 3, "The number of identical aggregate expressions before they are JIT-compiled", 0) \ M(Bool, compile_sort_description, true, "Compile sort description to native code.", 0) \ M(UInt64, min_count_to_compile_sort_description, 3, "The number of identical sort descriptions before they are JIT-compiled", 0) \ @@ -143,7 +144,7 @@ class IColumn; M(UInt64, group_by_two_level_threshold_bytes, 50000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.", 0) \ M(Bool, distributed_aggregation_memory_efficient, true, "Is the memory-saving mode of distributed aggregation enabled.", 0) \ M(UInt64, aggregation_memory_efficient_merge_threads, 0, "Number of threads to use for merge intermediate aggregation results in memory efficient mode. When bigger, then more memory is consumed. 0 means - same as 'max_threads'.", 0) \ - M(Bool, enable_memory_bound_merging_of_aggregation_results, false, "Enable memory bound merging strategy for aggregation. Set it to true only if all nodes of your clusters have versions >= 22.12.", 0) \ + M(Bool, enable_memory_bound_merging_of_aggregation_results, true, "Enable memory bound merging strategy for aggregation.", 0) \ M(Bool, enable_positional_arguments, true, "Enable positional arguments in ORDER BY, GROUP BY and LIMIT BY", 0) \ M(Bool, enable_extended_results_for_datetime_functions, false, "Enable date functions like toLastDayOfMonth return Date32 results (instead of Date results) for Date32/DateTime64 arguments.", 0) \ \ @@ -650,6 +651,8 @@ class IColumn; M(UInt64, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \ M(UInt64, merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \ M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead of read with ignore.", 0) \ + M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) \ + M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \ \ M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \ @@ -832,7 +835,6 @@ class IColumn; M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \ M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \ M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \ - /* TODO: Consider unifying this with https://github.com/ClickHouse/ClickHouse/issues/38755 */ \ M(Bool, input_format_parquet_preserve_order, false, "Avoid reordering rows when reading from Parquet files. Usually makes it much slower.", 0) \ M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \ M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \ diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index f029ac6ba27..9ee803c4235 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -71,7 +71,7 @@ std::string extractTableName(const std::string & nested_name) } -Block flatten(const Block & block) +static Block flattenImpl(const Block & block, bool flatten_named_tuple) { Block res; @@ -114,7 +114,7 @@ Block flatten(const Block & block) else res.insert(elem); } - else if (const DataTypeTuple * type_tuple = typeid_cast(elem.type.get())) + else if (const DataTypeTuple * type_tuple = typeid_cast(elem.type.get()); type_tuple && flatten_named_tuple) { if (type_tuple->haveExplicitNames()) { @@ -143,6 +143,17 @@ Block flatten(const Block & block) return res; } +Block flatten(const Block & block) +{ + return flattenImpl(block, true); +} + + +Block flattenArrayOfTuples(const Block & block) +{ + return flattenImpl(block, false); +} + namespace { diff --git a/src/DataTypes/NestedUtils.h b/src/DataTypes/NestedUtils.h index 90fdd683493..e009ceb18fe 100644 --- a/src/DataTypes/NestedUtils.h +++ b/src/DataTypes/NestedUtils.h @@ -23,6 +23,9 @@ namespace Nested /// 2) For an Array with named Tuple element column, a Array(Tuple(x ..., y ..., ...)), replace it with multiple Array Columns, a.x ..., a.y ..., ... Block flatten(const Block & block); + /// Same as flatten but only for Array with named Tuple element column. + Block flattenArrayOfTuples(const Block & block); + /// Collect Array columns in a form of `column_name.element_name` to single Array(Tuple(...)) column. NamesAndTypesList collect(const NamesAndTypesList & names_and_types); diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp index 4aff69505a1..d75ec9f09e0 100644 --- a/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp @@ -42,18 +42,23 @@ namespace ErrorCodes extern const int ARGUMENT_OUT_OF_BOUND; } +static size_t chooseBufferSize(const ReadSettings & settings, size_t file_size) +{ + /// Buffers used for prefetch or pre-download better to have enough size, but not bigger than the whole file. + return std::min(std::max(settings.prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE), file_size); +} AsynchronousBoundedReadBuffer::AsynchronousBoundedReadBuffer( - ImplPtr impl_, - IAsynchronousReader & reader_, - const ReadSettings & settings_, - AsyncReadCountersPtr async_read_counters_, - FilesystemReadPrefetchesLogPtr prefetches_log_) - : ReadBufferFromFileBase(settings_.remote_fs_buffer_size, nullptr, 0) + ImplPtr impl_, + IAsynchronousReader & reader_, + const ReadSettings & settings_, + AsyncReadCountersPtr async_read_counters_, + FilesystemReadPrefetchesLogPtr prefetches_log_) + : ReadBufferFromFileBase(chooseBufferSize(settings_, impl_->getFileSize()), nullptr, 0) , impl(std::move(impl_)) , read_settings(settings_) , reader(reader_) - , prefetch_buffer(settings_.prefetch_buffer_size) + , prefetch_buffer(chooseBufferSize(settings_, impl->getFileSize())) , query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr ? CurrentThread::getQueryId() : "") , current_reader_id(getRandomASCIIString(8)) , log(&Poco::Logger::get("AsynchronousBoundedReadBuffer")) @@ -103,12 +108,10 @@ void AsynchronousBoundedReadBuffer::prefetch(Priority priority) if (!hasPendingDataToRead()) return; - last_prefetch_info.submit_time = std::chrono::duration_cast( - std::chrono::system_clock::now().time_since_epoch()).count(); + last_prefetch_info.submit_time = std::chrono::system_clock::now(); last_prefetch_info.priority = priority; - chassert(prefetch_buffer.size() == read_settings.prefetch_buffer_size - || prefetch_buffer.size() == read_settings.remote_fs_buffer_size); + chassert(prefetch_buffer.size() == chooseBufferSize(read_settings, impl->getFileSize())); prefetch_future = asyncReadInto(prefetch_buffer.data(), prefetch_buffer.size(), priority); ProfileEvents::increment(ProfileEvents::RemoteFSPrefetches); } @@ -187,7 +190,7 @@ bool AsynchronousBoundedReadBuffer::nextImpl() { ProfileEventTimeIncrement watch(ProfileEvents::SynchronousRemoteReadWaitMicroseconds); - chassert(memory.size() == read_settings.prefetch_buffer_size || memory.size() == read_settings.remote_fs_buffer_size); + chassert(memory.size() == chooseBufferSize(read_settings, impl->getFileSize())); std::tie(size, offset) = impl->readInto(memory.data(), memory.size(), file_offset_of_buffer_end, bytes_to_ignore); ProfileEvents::increment(ProfileEvents::RemoteFSUnprefetchedReads); diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.h b/src/Disks/IO/AsynchronousBoundedReadBuffer.h index cafc5b6d691..c307bd8e214 100644 --- a/src/Disks/IO/AsynchronousBoundedReadBuffer.h +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.h @@ -1,11 +1,12 @@ #pragma once -#include "config.h" -#include +#include +#include #include +#include #include #include -#include +#include "config.h" namespace Poco { class Logger; } @@ -71,7 +72,7 @@ private: struct LastPrefetchInfo { - UInt64 submit_time = 0; + std::chrono::system_clock::time_point submit_time; Priority priority; }; LastPrefetchInfo last_prefetch_info; diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index a60f5dffa96..7c497baa450 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -124,7 +124,7 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size) else { CreateFileSegmentSettings create_settings(is_persistent ? FileSegmentKind::Persistent : FileSegmentKind::Regular); - file_segments = cache->getOrSet(cache_key, offset, size, create_settings); + file_segments = cache->getOrSet(cache_key, offset, size, file_size.value(), create_settings); } /** @@ -529,6 +529,9 @@ void CachedOnDiskReadBufferFromFile::predownload(FileSegment & file_segment) ProfileEvents::FileSegmentPredownloadMicroseconds, predownload_watch.elapsedMicroseconds()); }); + OpenTelemetry::SpanHolder span{ + fmt::format("CachedOnDiskReadBufferFromFile::predownload(key={}, size={})", file_segment.key().toString(), bytes_to_predownload)}; + if (bytes_to_predownload) { /// Consider this case. Some user needed segment [a, b] and downloaded it partially. @@ -795,6 +798,8 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() if (file_segments->empty()) return false; + const size_t original_buffer_size = internal_buffer.size(); + bool implementation_buffer_can_be_reused = false; SCOPE_EXIT({ try @@ -820,6 +825,9 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() } } + if (use_external_buffer && !internal_buffer.empty()) + internal_buffer.resize(original_buffer_size); + chassert(!file_segment.isDownloader()); } catch (...) @@ -846,6 +854,11 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() chassert(!internal_buffer.empty()); + /// We allocate buffers not less than 1M so that s3 requests will not be too small. But the same buffers (members of AsynchronousReadIndirectBufferFromRemoteFS) + /// are used for reading from files. Some of these readings are fairly small and their performance degrade when we use big buffers (up to ~20% for queries like Q23 from ClickBench). + if (use_external_buffer && read_type == ReadType::CACHED && settings.local_fs_buffer_size < internal_buffer.size()) + internal_buffer.resize(settings.local_fs_buffer_size); + // Pass a valid external buffer for implementation_buffer to read into. // We then take it back with another swap() after reading is done. // (If we get an exception in between, we'll be left with an invalid internal_buffer. That's ok, as long as diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index b7e598e2a87..988a445cfd0 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -79,7 +79,7 @@ std::future ThreadPoolRemoteFSReader::submit(Reques auto async_read_counters = remote_fs_fd->getReadCounters(); std::optional increment = async_read_counters ? std::optional(async_read_counters) : std::nullopt; - auto watch = std::make_unique(CLOCK_MONOTONIC); + auto watch = std::make_unique(CLOCK_REALTIME); Result result = remote_fs_fd->readInto(request.buf, request.size, request.offset, request.ignore); watch->stop(); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index dd6252b96f1..586e1bb7251 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -221,68 +221,16 @@ template FormatSettings getFormatSettings(ContextPtr context, const Se InputFormatPtr FormatFactory::getInput( const String & name, - ReadBuffer & buf, + ReadBuffer & _buf, const Block & sample, ContextPtr context, UInt64 max_block_size, - const std::optional & format_settings, - std::optional max_parsing_threads) const -{ - return getInputImpl( - name, - nullptr, - &buf, - sample, - context, - max_block_size, - /* is_remote_fs */ false, - CompressionMethod::None, - format_settings, - /* max_download_threads */ 1, - max_parsing_threads); -} - -InputFormatPtr FormatFactory::getInputRandomAccess( - const String & name, - SeekableReadBufferFactoryPtr buf_factory, - const Block & sample, - ContextPtr context, - UInt64 max_block_size, - bool is_remote_fs, - CompressionMethod compression, - const std::optional & format_settings, - std::optional max_download_threads, - std::optional max_parsing_threads) const -{ - return getInputImpl( - name, - std::move(buf_factory), - nullptr, - sample, - context, - max_block_size, - is_remote_fs, - compression, - format_settings, - max_download_threads, - max_parsing_threads); -} - -InputFormatPtr FormatFactory::getInputImpl( - const String & name, - // exactly one of the following two is nullptr - SeekableReadBufferFactoryPtr buf_factory, - ReadBuffer * _buf, - const Block & sample, - ContextPtr context, - UInt64 max_block_size, - bool is_remote_fs, - CompressionMethod compression, const std::optional & _format_settings, + std::optional _max_parsing_threads, std::optional _max_download_threads, - std::optional _max_parsing_threads) const + bool is_remote_fs, + CompressionMethod compression) const { - chassert((!_buf) != (!buf_factory)); const auto& creators = getCreators(name); if (!creators.input_creator && !creators.random_access_input_creator) throw Exception(ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT, "Format {} is not suitable for input", name); @@ -302,14 +250,12 @@ InputFormatPtr FormatFactory::getInputImpl( if (context->hasQueryContext() && settings.log_queries) context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Format, name); - // Prepare a read buffer. + // Add ParallelReadBuffer and decompression if needed. - std::unique_ptr owned_buf; - if (buf_factory) - owned_buf = prepareReadBuffer(buf_factory, compression, creators, format_settings, settings, max_download_threads); - auto * buf = owned_buf ? owned_buf.get() : _buf; + auto owned_buf = wrapReadBufferIfNeeded(_buf, compression, creators, format_settings, settings, is_remote_fs, max_download_threads); + auto & buf = owned_buf ? *owned_buf : _buf; - // Decide whether to use parallel ParallelParsingInputFormat. + // Decide whether to use ParallelParsingInputFormat. bool parallel_parsing = max_parsing_threads > 1 && settings.input_format_parallel_parsing && creators.file_segmentation_engine && !creators.random_access_input_creator; @@ -322,7 +268,7 @@ InputFormatPtr FormatFactory::getInputImpl( { const auto & non_trivial_prefix_and_suffix_checker = creators.non_trivial_prefix_and_suffix_checker; /// Disable parallel parsing for input formats with non-trivial readPrefix() and readSuffix(). - if (non_trivial_prefix_and_suffix_checker && non_trivial_prefix_and_suffix_checker(*buf)) + if (non_trivial_prefix_and_suffix_checker && non_trivial_prefix_and_suffix_checker(buf)) parallel_parsing = false; } @@ -340,7 +286,7 @@ InputFormatPtr FormatFactory::getInputImpl( { return input_getter(input, sample, row_input_format_params, format_settings); }; ParallelParsingInputFormat::Params params{ - *buf, sample, parser_creator, creators.file_segmentation_engine, name, max_parsing_threads, + buf, sample, parser_creator, creators.file_segmentation_engine, name, max_parsing_threads, settings.min_chunk_bytes_for_parallel_parsing, max_block_size, context->getApplicationType() == Context::ApplicationType::SERVER}; format = std::make_shared(params); @@ -349,7 +295,6 @@ InputFormatPtr FormatFactory::getInputImpl( { format = creators.random_access_input_creator( buf, - std::move(buf_factory), sample, format_settings, context->getReadSettings(), @@ -359,7 +304,7 @@ InputFormatPtr FormatFactory::getInputImpl( } else { - format = creators.input_creator(*buf, sample, row_input_format_params, format_settings); + format = creators.input_creator(buf, sample, row_input_format_params, format_settings); } if (owned_buf) @@ -375,26 +320,28 @@ InputFormatPtr FormatFactory::getInputImpl( return format; } -std::unique_ptr FormatFactory::prepareReadBuffer( - SeekableReadBufferFactoryPtr & buf_factory, +std::unique_ptr FormatFactory::wrapReadBufferIfNeeded( + ReadBuffer & buf, CompressionMethod compression, const Creators & creators, const FormatSettings & format_settings, const Settings & settings, + bool is_remote_fs, size_t max_download_threads) const { std::unique_ptr res; - bool parallel_read = max_download_threads > 1 && buf_factory && format_settings.seekable_read; + bool parallel_read = is_remote_fs && max_download_threads > 1 && format_settings.seekable_read && isBufferWithFileSize(buf); if (creators.random_access_input_creator) parallel_read &= compression != CompressionMethod::None; + size_t file_size = 0; if (parallel_read) { try { - parallel_read = buf_factory->checkIfActuallySeekable() - && buf_factory->getFileSize() >= 2 * settings.max_download_buffer_size; + file_size = getFileSizeFromReadBuffer(buf); + parallel_read = file_size >= 2 * settings.max_download_buffer_size; } catch (const Poco::Exception & e) { @@ -415,23 +362,18 @@ std::unique_ptr FormatFactory::prepareReadBuffer( max_download_threads, settings.max_download_buffer_size); - res = std::make_unique( - std::move(buf_factory), - threadPoolCallbackRunner(IOThreadPool::get(), "ParallelRead"), - max_download_threads, - settings.max_download_buffer_size); + res = wrapInParallelReadBufferIfSupported( + buf, threadPoolCallbackRunner(IOThreadPool::get(), "ParallelRead"), + max_download_threads, settings.max_download_buffer_size, file_size); } if (compression != CompressionMethod::None) { if (!res) - res = buf_factory->getReader(); // NOLINT + res = wrapReadBufferReference(buf); res = wrapReadBufferWithCompressionMethod(std::move(res), compression, static_cast(settings.zstd_window_log_max)); } - if (!creators.random_access_input_creator && !res) - res = buf_factory->getReader(); - return res; } diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 677e34845d8..1d258beca8d 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -90,15 +90,11 @@ private: const FormatSettings & settings)>; // Incompatible with FileSegmentationEngine. - // When created using SeekableReadBufferFactoryPtr, the IInputFormat doesn't support - // resetParser() and setReadBuffer(). // // In future we may also want to pass some information about WHERE conditions (SelectQueryInfo?) // and get some information about projections (min/max/count per column per row group). using RandomAccessInputCreator = std::function & format_settings = std::nullopt, - std::optional max_parsing_threads = std::nullopt) const; - - // Format parser from a random-access source (factory of seekable read buffers). - // Parallelizes both parsing and reading when possible. - // Prefer this over getInput() when reading from random-access source like file or HTTP. - InputFormatPtr getInputRandomAccess( - const String & name, - SeekableReadBufferFactoryPtr buf_factory, - const Block & sample, - ContextPtr context, - UInt64 max_block_size, - bool is_remote_fs, - CompressionMethod compression, - // if nullopt, getFormatSettings(context) is used - const std::optional & format_settings = std::nullopt, + std::optional max_parsing_threads = std::nullopt, std::optional max_download_threads = std::nullopt, - std::optional max_parsing_threads = std::nullopt) const; + // affects things like buffer sizes and parallel reading + bool is_remote_fs = false, + // allows to do: buf -> parallel read -> decompression, + // because parallel read after decompression is not possible + CompressionMethod compression = CompressionMethod::None) const; /// Checks all preconditions. Returns ordinary format if parallel formatting cannot be done. OutputFormatPtr getOutputFormatParallelIfPossible( @@ -272,28 +260,14 @@ private: const Creators & getCreators(const String & name) const; - InputFormatPtr getInputImpl( - const String & name, - // exactly one of the following two is nullptr - SeekableReadBufferFactoryPtr buf_factory, - ReadBuffer * buf, - const Block & sample, - ContextPtr context, - UInt64 max_block_size, - bool is_remote_fs, - CompressionMethod compression, - const std::optional & format_settings, - std::optional max_download_threads, - std::optional max_parsing_threads) const; - - // Creates a ReadBuffer to give to an input format. - // Returns nullptr if we should give it the whole factory. - std::unique_ptr prepareReadBuffer( - SeekableReadBufferFactoryPtr & buf_factory, + // Creates a ReadBuffer to give to an input format. Returns nullptr if we should use `buf` directly. + std::unique_ptr wrapReadBufferIfNeeded( + ReadBuffer & buf, CompressionMethod compression, const Creators & creators, const FormatSettings & format_settings, const Settings & settings, + bool is_remote_fs, size_t max_download_threads) const; }; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index c88af650671..e332bd749a1 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -214,8 +214,6 @@ struct FormatSettings std::unordered_set skip_row_groups = {}; bool output_string_as_string = false; bool output_fixed_string_as_fixed_byte_array = true; - // TODO: This should probably be shared among all formats and with - // https://github.com/ClickHouse/ClickHouse/issues/38755 bool preserve_order = false; UInt64 max_block_size = 8192; ParquetVersion output_version; diff --git a/src/Functions/FunctionGenerateRandomStructure.cpp b/src/Functions/FunctionGenerateRandomStructure.cpp new file mode 100644 index 00000000000..f85b2596530 --- /dev/null +++ b/src/Functions/FunctionGenerateRandomStructure.cpp @@ -0,0 +1,446 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int BAD_ARGUMENTS; +} + +namespace +{ + const size_t MAX_NUMBER_OF_COLUMNS = 128; + const size_t MAX_TUPLE_ELEMENTS = 16; + const size_t MAX_DATETIME64_PRECISION = 9; + const size_t MAX_DECIMAL32_PRECISION = 9; + const size_t MAX_DECIMAL64_PRECISION = 18; + const size_t MAX_DECIMAL128_PRECISION = 38; + const size_t MAX_DECIMAL256_PRECISION = 76; + const size_t MAX_DEPTH = 16; + + constexpr std::array simple_types + { + TypeIndex::Int8, + TypeIndex::UInt8, + TypeIndex::Int16, + TypeIndex::UInt16, + TypeIndex::Int32, + TypeIndex::UInt32, + TypeIndex::Int64, + TypeIndex::UInt64, + TypeIndex::Int128, + TypeIndex::UInt128, + TypeIndex::Int256, + TypeIndex::UInt256, + TypeIndex::Float32, + TypeIndex::Float64, + TypeIndex::Decimal32, + TypeIndex::Decimal64, + TypeIndex::Decimal128, + TypeIndex::Decimal256, + TypeIndex::Date, + TypeIndex::Date32, + TypeIndex::DateTime, + TypeIndex::DateTime64, + TypeIndex::String, + TypeIndex::FixedString, + TypeIndex::Enum8, + TypeIndex::Enum16, + TypeIndex::IPv4, + TypeIndex::IPv6, + TypeIndex::UUID, + }; + + constexpr std::array complex_types + { + TypeIndex::Nullable, + TypeIndex::LowCardinality, + TypeIndex::Array, + TypeIndex::Tuple, + TypeIndex::Map, + }; + + constexpr std::array map_key_types + { + TypeIndex::Int8, + TypeIndex::UInt8, + TypeIndex::Int16, + TypeIndex::UInt16, + TypeIndex::Int32, + TypeIndex::UInt32, + TypeIndex::Int64, + TypeIndex::UInt64, + TypeIndex::Int128, + TypeIndex::UInt128, + TypeIndex::Int256, + TypeIndex::UInt256, + TypeIndex::Date, + TypeIndex::Date32, + TypeIndex::DateTime, + TypeIndex::String, + TypeIndex::FixedString, + TypeIndex::IPv4, + TypeIndex::Enum8, + TypeIndex::Enum16, + TypeIndex::UUID, + TypeIndex::LowCardinality, + }; + + constexpr std::array suspicious_lc_types + { + TypeIndex::Int8, + TypeIndex::UInt8, + TypeIndex::Int16, + TypeIndex::UInt16, + TypeIndex::Int32, + TypeIndex::UInt32, + TypeIndex::Int64, + TypeIndex::UInt64, + TypeIndex::Int128, + TypeIndex::UInt128, + TypeIndex::Int256, + TypeIndex::UInt256, + TypeIndex::Float32, + TypeIndex::Float64, + TypeIndex::Date, + TypeIndex::Date32, + TypeIndex::DateTime, + TypeIndex::String, + TypeIndex::FixedString, + TypeIndex::IPv4, + TypeIndex::IPv6, + TypeIndex::UUID, + }; + + template + constexpr auto getAllTypes() + { + constexpr size_t complex_types_size = complex_types.size() * allow_complex_types; + constexpr size_t result_size = simple_types.size() + complex_types_size; + std::array result; + size_t index = 0; + + for (size_t i = 0; i != simple_types.size(); ++i, ++index) + result[index] = simple_types[i]; + + for (size_t i = 0; i != complex_types_size; ++i, ++index) + result[index] = complex_types[i]; + + return result; + } + + size_t generateNumberOfColumns(pcg64 & rng) + { + return rng() % MAX_NUMBER_OF_COLUMNS + 1; + } + + void writeLowCardinalityNestedType(pcg64 & rng, WriteBuffer & buf, bool allow_suspicious_lc_types) + { + bool make_nullable = rng() % 2; + if (make_nullable) + writeCString("Nullable(", buf); + + if (allow_suspicious_lc_types) + { + TypeIndex type = suspicious_lc_types[rng() % suspicious_lc_types.size()]; + + if (type == TypeIndex::FixedString) + writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf); + else + writeString(magic_enum::enum_name(type), buf); + } + else + { + /// Support only String and FixedString. + if (rng() % 2) + writeCString("String", buf); + else + writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf); + } + + if (make_nullable) + writeChar(')', buf); + } + + void writeEnumValues(const String & column_name, pcg64 & rng, WriteBuffer & buf, ssize_t max_value) + { + /// Don't generate big enums, because it will lead to really big result + /// and slowness of this function, and it can lead to `Max query size exceeded` + /// while using this function with generateRandom. + size_t num_values = rng() % 16 + 1; + std::vector values(num_values); + + /// Generate random numbers from range [-(max_value + 1), max_value - num_values + 1]. + for (Int16 & x : values) + x = rng() % (2 * max_value + 3 - num_values) - max_value - 1; + /// Make all numbers unique. + std::sort(values.begin(), values.end()); + for (size_t i = 0; i < num_values; ++i) + values[i] += i; + std::shuffle(values.begin(), values.end(), rng); + for (size_t i = 0; i != num_values; ++i) + { + if (i != 0) + writeCString(", ", buf); + writeString("'" + column_name + "V" + std::to_string(i) + "' = " + std::to_string(values[i]), buf); + } + } + + void writeMapKeyType(const String & column_name, pcg64 & rng, WriteBuffer & buf) + { + TypeIndex type = map_key_types[rng() % map_key_types.size()]; + switch (type) + { + case TypeIndex::FixedString: + writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf); + break; + case TypeIndex::LowCardinality: + writeCString("LowCardinality(", buf); + /// Map key supports only String and FixedString inside LowCardinality. + if (rng() % 2) + writeCString("String", buf); + else + writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf); + writeChar(')', buf); + break; + case TypeIndex::Enum8: + writeCString("Enum8(", buf); + writeEnumValues(column_name, rng, buf, INT8_MAX); + writeChar(')', buf); + break; + case TypeIndex::Enum16: + writeCString("Enum16(", buf); + writeEnumValues(column_name, rng, buf, INT16_MAX); + writeChar(')', buf); + break; + default: + writeString(magic_enum::enum_name(type), buf); + break; + } + } + + template + void writeRandomType(const String & column_name, pcg64 & rng, WriteBuffer & buf, bool allow_suspicious_lc_types, size_t depth = 0) + { + if (allow_complex_types && depth > MAX_DEPTH) + writeRandomType(column_name, rng, buf, depth); + + constexpr auto all_types = getAllTypes(); + auto type = all_types[rng() % all_types.size()]; + + switch (type) + { + case TypeIndex::UInt8: + if (rng() % 2) + writeCString("UInt8", buf); + else + writeCString("Bool", buf); + return; + case TypeIndex::FixedString: + writeString("FixedString(" + std::to_string(rng() % MAX_FIXEDSTRING_SIZE_WITHOUT_SUSPICIOUS + 1) + ")", buf); + return; + case TypeIndex::DateTime64: + writeString("DateTime64(" + std::to_string(rng() % MAX_DATETIME64_PRECISION + 1) + ")", buf); + return; + case TypeIndex::Decimal32: + writeString("Decimal32(" + std::to_string(rng() % MAX_DECIMAL32_PRECISION + 1) + ")", buf); + return; + case TypeIndex::Decimal64: + writeString("Decimal64(" + std::to_string(rng() % MAX_DECIMAL64_PRECISION + 1) + ")", buf); + return; + case TypeIndex::Decimal128: + writeString("Decimal128(" + std::to_string(rng() % MAX_DECIMAL128_PRECISION + 1) + ")", buf); + return; + case TypeIndex::Decimal256: + writeString("Decimal256(" + std::to_string(rng() % MAX_DECIMAL256_PRECISION + 1) + ")", buf); + return; + case TypeIndex::Enum8: + writeCString("Enum8(", buf); + writeEnumValues(column_name, rng, buf, INT8_MAX); + writeChar(')', buf); + return; + case TypeIndex::Enum16: + writeCString("Enum16(", buf); + writeEnumValues(column_name, rng, buf, INT16_MAX); + writeChar(')', buf); + return; + case TypeIndex::LowCardinality: + writeCString("LowCardinality(", buf); + writeLowCardinalityNestedType(rng, buf, allow_suspicious_lc_types); + writeChar(')', buf); + return; + case TypeIndex::Nullable: + { + writeCString("Nullable(", buf); + writeRandomType(column_name, rng, buf, allow_suspicious_lc_types, depth + 1); + writeChar(')', buf); + return; + } + case TypeIndex::Array: + { + writeCString("Array(", buf); + writeRandomType(column_name, rng, buf, allow_suspicious_lc_types, depth + 1); + writeChar(')', buf); + return; + } + case TypeIndex::Map: + { + writeCString("Map(", buf); + writeMapKeyType(column_name, rng, buf); + writeCString(", ", buf); + writeRandomType(column_name, rng, buf, allow_suspicious_lc_types, depth + 1); + writeChar(')', buf); + return; + } + case TypeIndex::Tuple: + { + size_t elements = rng() % MAX_TUPLE_ELEMENTS + 1; + bool generate_nested = rng() % 2; + bool generate_named_tuple = rng() % 2; + if (generate_nested) + writeCString("Nested(", buf); + else + writeCString("Tuple(", buf); + + for (size_t i = 0; i != elements; ++i) + { + if (i != 0) + writeCString(", ", buf); + + String element_name = "e" + std::to_string(i + 1); + if (generate_named_tuple || generate_nested) + { + writeString(element_name, buf); + writeChar(' ', buf); + } + writeRandomType(element_name, rng, buf, allow_suspicious_lc_types, depth + 1); + } + writeChar(')', buf); + return; + } + default: + writeString(magic_enum::enum_name(type), buf); + return; + } + } + + void writeRandomStructure(pcg64 & rng, size_t number_of_columns, WriteBuffer & buf, bool allow_suspicious_lc_types) + { + for (size_t i = 0; i != number_of_columns; ++i) + { + if (i != 0) + writeCString(", ", buf); + String column_name = "c" + std::to_string(i + 1); + writeString(column_name, buf); + writeChar(' ', buf); + writeRandomType(column_name, rng, buf, allow_suspicious_lc_types); + } + } +} + +DataTypePtr FunctionGenerateRandomStructure::getReturnTypeImpl(const DataTypes & arguments) const +{ + if (arguments.size() > 2) + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, expected from 0 to 2", + getName(), arguments.size()); + + + for (size_t i = 0; i != arguments.size(); ++i) + { + if (!isUnsignedInteger(arguments[i]) && !arguments[i]->onlyNull()) + { + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of the {} argument of function {}, expected unsigned integer or Null", + arguments[i]->getName(), + i + 1, + getName()); + } + } + + return std::make_shared(); +} + +ColumnPtr FunctionGenerateRandomStructure::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const +{ + size_t seed = randomSeed(); + size_t number_of_columns = 0; + + if (!arguments.empty() && !arguments[0].column->onlyNull()) + { + number_of_columns = arguments[0].column->getUInt(0); + if (number_of_columns > MAX_NUMBER_OF_COLUMNS) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Maximum allowed number of columns is {}, got {}", + MAX_NUMBER_OF_COLUMNS, + number_of_columns); + } + + if (arguments.size() > 1 && !arguments[1].column->onlyNull()) + seed = arguments[1].column->getUInt(0); + + pcg64 rng(seed); + if (number_of_columns == 0) + number_of_columns = generateNumberOfColumns(rng); + + auto col_res = ColumnString::create(); + auto & string_column = assert_cast(*col_res); + auto & chars = string_column.getChars(); + WriteBufferFromVector buf(chars); + writeRandomStructure(rng, number_of_columns, buf, allow_suspicious_lc_types); + buf.finalize(); + chars.push_back(0); + string_column.getOffsets().push_back(chars.size()); + return ColumnConst::create(std::move(col_res), input_rows_count); +} + +String FunctionGenerateRandomStructure::generateRandomStructure(size_t seed, const ContextPtr & context) +{ + pcg64 rng(seed); + size_t number_of_columns = generateNumberOfColumns(rng); + WriteBufferFromOwnString buf; + writeRandomStructure(rng, number_of_columns, buf, context->getSettingsRef().allow_suspicious_low_cardinality_types); + return buf.str(); +} + +REGISTER_FUNCTION(GenerateRandomStructure) +{ + factory.registerFunction(FunctionDocumentation + { + .description=R"( +Generates a random table structure. +This function takes 2 optional constant arguments: +the number of columns in the result structure (random by default) and random seed (random by default) +The maximum number of columns is 128. +The function returns a value of type String. +)", + .examples{ + {"random", "SELECT generateRandomStructure()", "c1 UInt32, c2 FixedString(25)"}, + {"with specified number of columns", "SELECT generateRandomStructure(3)", "c1 String, c2 Array(Int32), c3 LowCardinality(String)"}, + {"with specified seed", "SELECT generateRandomStructure(1, 42)", "c1 UInt128"}, + }, + .categories{"Random"} + }, + FunctionFactory::CaseSensitive); +} + +} diff --git a/src/Functions/FunctionGenerateRandomStructure.h b/src/Functions/FunctionGenerateRandomStructure.h new file mode 100644 index 00000000000..894096a6e07 --- /dev/null +++ b/src/Functions/FunctionGenerateRandomStructure.h @@ -0,0 +1,47 @@ +#pragma once + +#include +#include + +#include + +namespace DB +{ + +class FunctionGenerateRandomStructure : public IFunction +{ +public: + static constexpr auto name = "generateRandomStructure"; + + explicit FunctionGenerateRandomStructure(bool allow_suspicious_lc_types_) : allow_suspicious_lc_types(allow_suspicious_lc_types_) + { + } + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context->getSettingsRef().allow_suspicious_low_cardinality_types.value); + } + + String getName() const override { return name; } + + size_t getNumberOfArguments() const override { return 0; } + + bool isVariadic() const override { return true; } + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return false; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 1}; } + bool useDefaultImplementationForConstants() const override { return false; } + bool useDefaultImplementationForNulls() const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override; + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override; + + static String generateRandomStructure(size_t seed, const ContextPtr & context); + +private: + bool allow_suspicious_lc_types; +}; + +} diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 3de757bfa3f..32e3fbbd4ea 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -1073,55 +1073,72 @@ private: size_t size = vec_from.size(); for (size_t i = 0; i < size; ++i) { - ToType h; + ToType hash; if constexpr (Impl::use_int_hash_for_pods) { if constexpr (std::is_same_v) - h = IntHash64Impl::apply(bit_cast(vec_from[i])); + hash = IntHash64Impl::apply(bit_cast(vec_from[i])); else - h = IntHash32Impl::apply(bit_cast(vec_from[i])); + hash = IntHash32Impl::apply(bit_cast(vec_from[i])); } else { if constexpr (std::is_same_v) - h = JavaHashImpl::apply(vec_from[i]); + hash = JavaHashImpl::apply(vec_from[i]); else { - FromType v = vec_from[i]; + FromType value = vec_from[i]; if constexpr (std::endian::native == std::endian::big) { - FromType tmp_v; - reverseMemcpy(&tmp_v, &v, sizeof(v)); - v = tmp_v; + FromType value_reversed; + reverseMemcpy(&value_reversed, &value, sizeof(value)); + value = value_reversed; } - h = apply(key, reinterpret_cast(&v), sizeof(v)); - } + hash = apply(key, reinterpret_cast(&value), sizeof(value)); + } } if constexpr (first) - vec_to[i] = h; + vec_to[i] = hash; else - vec_to[i] = combineHashes(key, vec_to[i], h); + vec_to[i] = combineHashes(key, vec_to[i], hash); } } else if (auto col_from_const = checkAndGetColumnConst(column)) { auto value = col_from_const->template getValue(); ToType hash; - if constexpr (std::is_same_v) - hash = IntHash64Impl::apply(bit_cast(value)); + + if constexpr (Impl::use_int_hash_for_pods) + { + if constexpr (std::is_same_v) + hash = IntHash64Impl::apply(bit_cast(value)); + else + hash = IntHash32Impl::apply(bit_cast(value)); + } else - hash = IntHash32Impl::apply(bit_cast(value)); + { + if constexpr (std::is_same_v) + hash = JavaHashImpl::apply(value); + else + { + if constexpr (std::endian::native == std::endian::big) + { + FromType value_reversed; + reverseMemcpy(&value_reversed, &value, sizeof(value)); + value = value_reversed; + } + hash = apply(key, reinterpret_cast(&value), sizeof(value)); + } + } size_t size = vec_to.size(); if constexpr (first) vec_to.assign(size, hash); else - { for (size_t i = 0; i < size; ++i) vec_to[i] = combineHashes(key, vec_to[i], hash); - } } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", @@ -1139,46 +1156,40 @@ private: size_t size = vec_from.size(); for (size_t i = 0; i < size; ++i) { - ToType h; + ToType hash; if constexpr (std::endian::native == std::endian::little) - { - h = apply(key, reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); - } + hash = apply(key, reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); else { char tmp_buffer[sizeof(vec_from[i])]; reverseMemcpy(tmp_buffer, &vec_from[i], sizeof(vec_from[i])); - h = apply(key, reinterpret_cast(tmp_buffer), sizeof(vec_from[i])); + hash = apply(key, reinterpret_cast(tmp_buffer), sizeof(vec_from[i])); } if constexpr (first) - vec_to[i] = h; + vec_to[i] = hash; else - vec_to[i] = combineHashes(key, vec_to[i], h); + vec_to[i] = combineHashes(key, vec_to[i], hash); } } else if (auto col_from_const = checkAndGetColumnConst(column)) { auto value = col_from_const->template getValue(); - ToType h; + ToType hash; if constexpr (std::endian::native == std::endian::little) - { - h = apply(key, reinterpret_cast(&value), sizeof(value)); - } + hash = apply(key, reinterpret_cast(&value), sizeof(value)); else { char tmp_buffer[sizeof(value)]; reverseMemcpy(tmp_buffer, &value, sizeof(value)); - h = apply(key, reinterpret_cast(tmp_buffer), sizeof(value)); + hash = apply(key, reinterpret_cast(tmp_buffer), sizeof(value)); } size_t size = vec_to.size(); if constexpr (first) - vec_to.assign(size, h); + vec_to.assign(size, hash); else - { for (size_t i = 0; i < size; ++i) - vec_to[i] = combineHashes(key, vec_to[i], h); - } + vec_to[i] = combineHashes(key, vec_to[i], hash); } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", @@ -1191,11 +1202,11 @@ private: for (size_t i = 0, size = column->size(); i < size; ++i) { StringRef bytes = column->getDataAt(i); - const ToType h = apply(key, bytes.data, bytes.size); + const ToType hash = apply(key, bytes.data, bytes.size); if constexpr (first) - vec_to[i] = h; + vec_to[i] = hash; else - vec_to[i] = combineHashes(key, vec_to[i], h); + vec_to[i] = combineHashes(key, vec_to[i], hash); } } @@ -1211,14 +1222,14 @@ private: ColumnString::Offset current_offset = 0; for (size_t i = 0; i < size; ++i) { - const ToType h = apply(key, + const ToType hash = apply(key, reinterpret_cast(&data[current_offset]), offsets[i] - current_offset - 1); if constexpr (first) - vec_to[i] = h; + vec_to[i] = hash; else - vec_to[i] = combineHashes(key, vec_to[i], h); + vec_to[i] = combineHashes(key, vec_to[i], hash); current_offset = offsets[i]; } @@ -1231,11 +1242,11 @@ private: for (size_t i = 0; i < size; ++i) { - const ToType h = apply(key, reinterpret_cast(&data[i * n]), n); + const ToType hash = apply(key, reinterpret_cast(&data[i * n]), n); if constexpr (first) - vec_to[i] = h; + vec_to[i] = hash; else - vec_to[i] = combineHashes(key, vec_to[i], h); + vec_to[i] = combineHashes(key, vec_to[i], hash); } } else if (const ColumnConst * col_from_const = checkAndGetColumnConstStringOrFixedString(column)) @@ -1245,16 +1256,10 @@ private: const size_t size = vec_to.size(); if constexpr (first) - { vec_to.assign(size, hash); - } else - { for (size_t i = 0; i < size; ++i) - { vec_to[i] = combineHashes(key, vec_to[i], hash); - } - } } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", @@ -1283,16 +1288,16 @@ private: { ColumnArray::Offset next_offset = offsets[i]; - ToType h; + ToType hash; if constexpr (std::is_same_v) - h = IntHash64Impl::apply(next_offset - current_offset); + hash = IntHash64Impl::apply(next_offset - current_offset); else - h = IntHash32Impl::apply(next_offset - current_offset); + hash = IntHash32Impl::apply(next_offset - current_offset); if constexpr (first) - vec_to[i] = h; + vec_to[i] = hash; else - vec_to[i] = combineHashes(key, vec_to[i], h); + vec_to[i] = combineHashes(key, vec_to[i], hash); for (size_t j = current_offset; j < next_offset; ++j) vec_to[i] = combineHashes(key, vec_to[i], vec_temp[j]); diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp index 401afb7baac..01fbaa4f817 100644 --- a/src/IO/ConnectionTimeouts.cpp +++ b/src/IO/ConnectionTimeouts.cpp @@ -17,22 +17,7 @@ ConnectionTimeouts::ConnectionTimeouts( , secure_connection_timeout(connection_timeout) , hedged_connection_timeout(receive_timeout_) , receive_data_timeout(receive_timeout_) -{ -} - -ConnectionTimeouts::ConnectionTimeouts( - Poco::Timespan connection_timeout_, - Poco::Timespan send_timeout_, - Poco::Timespan receive_timeout_, - Poco::Timespan tcp_keep_alive_timeout_) - : connection_timeout(connection_timeout_) - , send_timeout(send_timeout_) - , receive_timeout(receive_timeout_) - , tcp_keep_alive_timeout(tcp_keep_alive_timeout_) - , http_keep_alive_timeout(0) - , secure_connection_timeout(connection_timeout) - , hedged_connection_timeout(receive_timeout_) - , receive_data_timeout(receive_timeout_) + , handshake_timeout(receive_timeout_) { } @@ -41,7 +26,26 @@ ConnectionTimeouts::ConnectionTimeouts( Poco::Timespan send_timeout_, Poco::Timespan receive_timeout_, Poco::Timespan tcp_keep_alive_timeout_, - Poco::Timespan http_keep_alive_timeout_) + Poco::Timespan handshake_timeout_) + : connection_timeout(connection_timeout_) + , send_timeout(send_timeout_) + , receive_timeout(receive_timeout_) + , tcp_keep_alive_timeout(tcp_keep_alive_timeout_) + , http_keep_alive_timeout(0) + , secure_connection_timeout(connection_timeout) + , hedged_connection_timeout(receive_timeout_) + , receive_data_timeout(receive_timeout_) + , handshake_timeout(handshake_timeout_) +{ +} + +ConnectionTimeouts::ConnectionTimeouts( + Poco::Timespan connection_timeout_, + Poco::Timespan send_timeout_, + Poco::Timespan receive_timeout_, + Poco::Timespan tcp_keep_alive_timeout_, + Poco::Timespan http_keep_alive_timeout_, + Poco::Timespan handshake_timeout_) : connection_timeout(connection_timeout_) , send_timeout(send_timeout_) , receive_timeout(receive_timeout_) @@ -50,6 +54,7 @@ ConnectionTimeouts::ConnectionTimeouts( , secure_connection_timeout(connection_timeout) , hedged_connection_timeout(receive_timeout_) , receive_data_timeout(receive_timeout_) + , handshake_timeout(handshake_timeout_) { } @@ -60,16 +65,18 @@ ConnectionTimeouts::ConnectionTimeouts( Poco::Timespan tcp_keep_alive_timeout_, Poco::Timespan http_keep_alive_timeout_, Poco::Timespan secure_connection_timeout_, - Poco::Timespan receive_hello_timeout_, - Poco::Timespan receive_data_timeout_) + Poco::Timespan hedged_connection_timeout_, + Poco::Timespan receive_data_timeout_, + Poco::Timespan handshake_timeout_) : connection_timeout(connection_timeout_) , send_timeout(send_timeout_) , receive_timeout(receive_timeout_) , tcp_keep_alive_timeout(tcp_keep_alive_timeout_) , http_keep_alive_timeout(http_keep_alive_timeout_) , secure_connection_timeout(secure_connection_timeout_) - , hedged_connection_timeout(receive_hello_timeout_) + , hedged_connection_timeout(hedged_connection_timeout_) , receive_data_timeout(receive_data_timeout_) + , handshake_timeout(handshake_timeout_) { } @@ -90,13 +97,14 @@ ConnectionTimeouts ConnectionTimeouts::getSaturated(Poco::Timespan limit) const saturate(http_keep_alive_timeout, limit), saturate(secure_connection_timeout, limit), saturate(hedged_connection_timeout, limit), - saturate(receive_data_timeout, limit)); + saturate(receive_data_timeout, limit), + saturate(handshake_timeout, limit)); } /// Timeouts for the case when we have just single attempt to connect. ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithoutFailover(const Settings & settings) { - return ConnectionTimeouts(settings.connect_timeout, settings.send_timeout, settings.receive_timeout, settings.tcp_keep_alive_timeout); + return ConnectionTimeouts(settings.connect_timeout, settings.send_timeout, settings.receive_timeout, settings.tcp_keep_alive_timeout, settings.handshake_timeout_ms); } /// Timeouts for the case when we will try many addresses in a loop. @@ -110,7 +118,8 @@ ConnectionTimeouts ConnectionTimeouts::getTCPTimeoutsWithFailover(const Settings 0, settings.connect_timeout_with_failover_secure_ms, settings.hedged_connection_timeout_ms, - settings.receive_data_timeout_ms); + settings.receive_data_timeout_ms, + settings.handshake_timeout_ms); } ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings, Poco::Timespan http_keep_alive_timeout) @@ -120,7 +129,8 @@ ConnectionTimeouts ConnectionTimeouts::getHTTPTimeouts(const Settings & settings settings.http_send_timeout, settings.http_receive_timeout, settings.tcp_keep_alive_timeout, - http_keep_alive_timeout); + http_keep_alive_timeout, + settings.http_receive_timeout); } } diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h index 368288ee022..684af42827f 100644 --- a/src/IO/ConnectionTimeouts.h +++ b/src/IO/ConnectionTimeouts.h @@ -23,6 +23,9 @@ struct ConnectionTimeouts Poco::Timespan hedged_connection_timeout; Poco::Timespan receive_data_timeout; + /// Timeout for receiving HELLO packet + Poco::Timespan handshake_timeout; + /// Timeout for synchronous request-result protocol call (like Ping or TablesStatus) Poco::Timespan sync_request_timeout = Poco::Timespan(DBMS_DEFAULT_SYNC_REQUEST_TIMEOUT_SEC, 0); @@ -35,13 +38,15 @@ struct ConnectionTimeouts ConnectionTimeouts(Poco::Timespan connection_timeout_, Poco::Timespan send_timeout_, Poco::Timespan receive_timeout_, - Poco::Timespan tcp_keep_alive_timeout_); + Poco::Timespan tcp_keep_alive_timeout_, + Poco::Timespan handshake_timeout_); ConnectionTimeouts(Poco::Timespan connection_timeout_, Poco::Timespan send_timeout_, Poco::Timespan receive_timeout_, Poco::Timespan tcp_keep_alive_timeout_, - Poco::Timespan http_keep_alive_timeout_); + Poco::Timespan http_keep_alive_timeout_, + Poco::Timespan handshake_timeout_); ConnectionTimeouts(Poco::Timespan connection_timeout_, Poco::Timespan send_timeout_, @@ -49,8 +54,9 @@ struct ConnectionTimeouts Poco::Timespan tcp_keep_alive_timeout_, Poco::Timespan http_keep_alive_timeout_, Poco::Timespan secure_connection_timeout_, - Poco::Timespan receive_hello_timeout_, - Poco::Timespan receive_data_timeout_); + Poco::Timespan hedged_connection_timeout_, + Poco::Timespan receive_data_timeout_, + Poco::Timespan handshake_timeout_); static Poco::Timespan saturate(Poco::Timespan timespan, Poco::Timespan limit); ConnectionTimeouts getSaturated(Poco::Timespan limit) const; diff --git a/src/IO/MMapReadBufferFromFileDescriptor.cpp b/src/IO/MMapReadBufferFromFileDescriptor.cpp index c0eb73f8638..9b1c132cc01 100644 --- a/src/IO/MMapReadBufferFromFileDescriptor.cpp +++ b/src/IO/MMapReadBufferFromFileDescriptor.cpp @@ -91,4 +91,15 @@ size_t MMapReadBufferFromFileDescriptor::getFileSize() { return getSizeFromFileDescriptor(getFD(), getFileName()); } + +size_t MMapReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function &) +{ + if (offset >= mapped.getLength()) + return 0; + + n = std::min(n, mapped.getLength() - offset); + memcpy(to, mapped.getData() + offset, n); + return n; +} + } diff --git a/src/IO/MMapReadBufferFromFileDescriptor.h b/src/IO/MMapReadBufferFromFileDescriptor.h index 1a4bcd4f3ed..2a039e04971 100644 --- a/src/IO/MMapReadBufferFromFileDescriptor.h +++ b/src/IO/MMapReadBufferFromFileDescriptor.h @@ -39,6 +39,9 @@ public: int getFD() const; size_t getFileSize() override; + + size_t readBigAt(char * to, size_t n, size_t offset, const std::function &) override; + bool supportsReadAt() override { return true; } }; } diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp index fff02db1bd6..07240ab3a4f 100644 --- a/src/IO/ParallelReadBuffer.cpp +++ b/src/IO/ParallelReadBuffer.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -13,51 +14,44 @@ namespace ErrorCodes } -// A subrange of the input, read by one SeekableReadBuffer. +// A subrange of the input, read by one thread. struct ParallelReadBuffer::ReadWorker { - ReadWorker(std::unique_ptr reader_, size_t offset_, size_t size) - : reader(std::move(reader_)), offset(offset_), bytes_left(size), range_end(offset + bytes_left) + ReadWorker(SeekableReadBuffer & input_, size_t offset, size_t size) + : input(input_), start_offset(offset), segment(size) { - assert(bytes_left); + chassert(size); + chassert(segment.size() == size); } - auto hasSegment() const { return current_segment_index < segments.size(); } + bool hasBytesToConsume() const { return bytes_produced > bytes_consumed; } + bool hasBytesToProduce() const { return bytes_produced < segment.size(); } - auto nextSegment() - { - assert(hasSegment()); - auto next_segment = std::move(segments[current_segment_index]); - ++current_segment_index; - offset += next_segment.size(); - return next_segment; - } + SeekableReadBuffer & input; + const size_t start_offset; // start of the segment - std::unique_ptr reader; - // Reader thread produces segments, nextImpl() consumes them. - std::vector> segments; // segments that were produced - size_t current_segment_index = 0; // first segment that's not consumed - bool finished{false}; // no more segments will be produced - size_t offset; // start of segments[current_segment_idx] - size_t bytes_left; // bytes left to produce above segments end - size_t range_end; // segments end + bytes_left, i.e. how far this worker will read - - // segments[current_segment_idx..end] range_end - // |-------------|--------------------------------------|------------| - // offset bytes_left + Memory<> segment; + /// Reader thread produces data, nextImpl() consumes it. + /// segment[bytes_consumed..bytes_produced-1] is data waiting to be picked up by nextImpl() + /// segment[bytes_produced..] needs to be read from the input ReadBuffer + size_t bytes_produced = 0; + size_t bytes_consumed = 0; std::atomic_bool cancel{false}; std::mutex worker_mutex; }; ParallelReadBuffer::ParallelReadBuffer( - std::unique_ptr reader_factory_, ThreadPoolCallbackRunner schedule_, size_t max_working_readers_, size_t range_step_) + SeekableReadBuffer & input_, ThreadPoolCallbackRunner schedule_, size_t max_working_readers_, size_t range_step_, size_t file_size_) : SeekableReadBuffer(nullptr, 0) , max_working_readers(max_working_readers_) , schedule(std::move(schedule_)) - , reader_factory(std::move(reader_factory_)) + , input(input_) + , file_size(file_size_) , range_step(std::max(1ul, range_step_)) { + LOG_TRACE(&Poco::Logger::get("ParallelReadBuffer"), "Parallel reading is used"); + try { addReaders(); @@ -71,22 +65,15 @@ ParallelReadBuffer::ParallelReadBuffer( bool ParallelReadBuffer::addReaderToPool() { - size_t file_size = reader_factory->getFileSize(); if (next_range_start >= file_size) return false; size_t range_start = next_range_start; size_t size = std::min(range_step, file_size - range_start); next_range_start += size; - auto reader = reader_factory->getReader(); - if (!reader) - { - return false; - } + auto worker = read_workers.emplace_back(std::make_shared(input, range_start, size)); - auto worker = read_workers.emplace_back(std::make_shared(std::move(reader), range_start, size)); - - ++active_working_reader; + ++active_working_readers; schedule([this, my_worker = std::move(worker)]() mutable { readerThreadFunction(std::move(my_worker)); }, Priority{}); return true; @@ -116,9 +103,9 @@ off_t ParallelReadBuffer::seek(off_t offset, int whence) } const auto offset_is_in_range - = [&](const auto & worker) { return static_cast(offset) >= worker->offset && static_cast(offset) < worker->range_end; }; + = [&](const auto & worker) { return static_cast(offset) >= worker->start_offset && static_cast(offset) < worker->start_offset + worker->segment.size(); }; - while (!read_workers.empty() && (offset < current_position || !offset_is_in_range(read_workers.front()))) + while (!read_workers.empty() && !offset_is_in_range(read_workers.front())) { read_workers.front()->cancel = true; read_workers.pop_front(); @@ -126,32 +113,31 @@ off_t ParallelReadBuffer::seek(off_t offset, int whence) if (!read_workers.empty()) { - auto & front_worker = read_workers.front(); - current_position = front_worker->offset; + auto & w = read_workers.front(); + size_t diff = static_cast(offset) - w->start_offset; while (true) { - std::unique_lock lock{front_worker->worker_mutex}; - next_condvar.wait(lock, [&] { return emergency_stop || front_worker->hasSegment(); }); + std::unique_lock lock{w->worker_mutex}; if (emergency_stop) handleEmergencyStop(); - auto next_segment = front_worker->nextSegment(); - current_position += next_segment.size(); - if (offset < current_position) + if (w->bytes_produced > diff) { - current_segment = std::move(next_segment); - working_buffer = internal_buffer = Buffer(current_segment.data(), current_segment.data() + current_segment.size()); - pos = working_buffer.end() - (current_position - offset); + working_buffer = internal_buffer = Buffer( + w->segment.data() + diff, w->segment.data() + w->bytes_produced); + w->bytes_consumed = w->bytes_produced; + current_position += w->start_offset + w->bytes_consumed; addReaders(); return offset; } + + next_condvar.wait_for(lock, std::chrono::seconds(10)); } } finishAndWait(); - all_completed = false; read_workers.clear(); next_range_start = offset; @@ -166,7 +152,7 @@ off_t ParallelReadBuffer::seek(off_t offset, int whence) size_t ParallelReadBuffer::getFileSize() { - return reader_factory->getFileSize(); + return file_size; } off_t ParallelReadBuffer::getPosition() @@ -174,17 +160,6 @@ off_t ParallelReadBuffer::getPosition() return current_position - available(); } -bool ParallelReadBuffer::currentWorkerReady() const -{ - assert(!read_workers.empty()); - return read_workers.front()->finished || read_workers.front()->hasSegment(); -} - -bool ParallelReadBuffer::currentWorkerCompleted() const -{ - return read_workers.front()->finished && !read_workers.front()->hasSegment(); -} - void ParallelReadBuffer::handleEmergencyStop() { // this can only be called from the main thread when there is an exception @@ -194,106 +169,99 @@ void ParallelReadBuffer::handleEmergencyStop() bool ParallelReadBuffer::nextImpl() { - if (all_completed) - return false; - while (true) { - std::unique_lock lock{read_workers.front()->worker_mutex}; - next_condvar.wait( - lock, - [this]() - { - /// Check if no more readers left or current reader can be processed - return emergency_stop || currentWorkerReady(); - }); - - bool worker_removed = false; - /// Remove completed units - while (currentWorkerCompleted() && !emergency_stop) - { - lock.unlock(); - read_workers.pop_front(); - worker_removed = true; - - if (read_workers.empty()) - break; - - lock = std::unique_lock{read_workers.front()->worker_mutex}; - } - - if (emergency_stop) - handleEmergencyStop(); - - if (worker_removed) - addReaders(); - /// All readers processed, stop if (read_workers.empty()) { - all_completed = true; + chassert(next_range_start >= file_size); return false; } - auto & front_worker = read_workers.front(); - /// Read data from first segment of the first reader - if (front_worker->hasSegment()) + auto * w = read_workers.front().get(); + + std::unique_lock lock{w->worker_mutex}; + + if (emergency_stop) + handleEmergencyStop(); // throws + + /// Read data from front reader + if (w->bytes_produced > w->bytes_consumed) { - current_segment = front_worker->nextSegment(); - if (currentWorkerCompleted()) - { - lock.unlock(); - read_workers.pop_front(); - all_completed = !addReaderToPool() && read_workers.empty(); - } - break; + chassert(w->start_offset + w->bytes_consumed == static_cast(current_position)); + + working_buffer = internal_buffer = Buffer( + w->segment.data() + w->bytes_consumed, w->segment.data() + w->bytes_produced); + current_position += working_buffer.size(); + w->bytes_consumed = w->bytes_produced; + + return true; } + + /// Front reader is done, remove it and add another + if (!w->hasBytesToProduce()) + { + lock.unlock(); + read_workers.pop_front(); + addReaders(); + + continue; + } + + /// Nothing to do right now, wait for something to change. + /// + /// The timeout is a workaround for a race condition. + /// emergency_stop is assigned while holding a *different* mutex from the one we're holding + /// (exception_mutex vs worker_mutex). So it's possible that our emergency_stop check (above) + /// happens before a onBackgroundException() call, but our wait(lock) happens after it. + /// Then the wait may get stuck forever. + /// + /// Note that using wait(lock, [&]{ return emergency_stop || ...; }) wouldn't help because + /// it does effectively the same "check, then wait" sequence. + /// + /// One possible proper fix would be to make onBackgroundException() lock all read_workers + /// mutexes too (not necessarily simultaneously - just locking+unlocking them one by one + /// between the emergency_stop change and the notify_all() would be enough), but then we + /// need another mutex to protect read_workers itself... + next_condvar.wait_for(lock, std::chrono::seconds(10)); } - working_buffer = internal_buffer = Buffer(current_segment.data(), current_segment.data() + current_segment.size()); - current_position += working_buffer.size(); - return true; + chassert(false); + return false; } void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker) { SCOPE_EXIT({ - if (active_working_reader.fetch_sub(1) == 1) - active_working_reader.notify_all(); + if (active_working_readers.fetch_sub(1) == 1) + active_working_readers.notify_all(); }); try { - read_worker->reader->setReadUntilPosition(read_worker->range_end); - read_worker->reader->seek(read_worker->offset, SEEK_SET); - - while (!emergency_stop && !read_worker->cancel) + auto on_progress = [&](size_t bytes_read) -> bool { - if (!read_worker->reader->next()) - throw Exception( - ErrorCodes::LOGICAL_ERROR, "Failed to read all the data from the reader, missing {} bytes", read_worker->bytes_left); - if (emergency_stop || read_worker->cancel) - break; + return true; - Buffer buffer = read_worker->reader->buffer(); - size_t bytes_to_copy = std::min(buffer.size(), read_worker->bytes_left); - Memory<> new_segment(bytes_to_copy); - memcpy(new_segment.data(), buffer.begin(), bytes_to_copy); - read_worker->reader->ignore(bytes_to_copy); - read_worker->bytes_left -= bytes_to_copy; - { - /// New data ready to be read - std::lock_guard lock(read_worker->worker_mutex); - read_worker->segments.emplace_back(std::move(new_segment)); - read_worker->finished = read_worker->bytes_left == 0; + std::lock_guard lock(read_worker->worker_mutex); + if (bytes_read <= read_worker->bytes_produced) + return false; + + bool need_notify = read_worker->bytes_produced == read_worker->bytes_consumed; + read_worker->bytes_produced = bytes_read; + if (need_notify) next_condvar.notify_all(); - } - if (read_worker->finished) - { - break; - } - } + return false; + }; + + size_t r = input.readBigAt(read_worker->segment.data(), read_worker->segment.size(), read_worker->start_offset); + + if (!on_progress(r) && r < read_worker->segment.size()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Failed to read all the data from the reader at offset {}, got {}/{} bytes", + read_worker->start_offset, r, read_worker->segment.size()); } catch (...) { @@ -315,12 +283,24 @@ void ParallelReadBuffer::finishAndWait() { emergency_stop = true; - size_t active_readers = active_working_reader.load(); + size_t active_readers = active_working_readers.load(); while (active_readers != 0) { - active_working_reader.wait(active_readers); - active_readers = active_working_reader.load(); + active_working_readers.wait(active_readers); + active_readers = active_working_readers.load(); } } +std::unique_ptr wrapInParallelReadBufferIfSupported( + ReadBuffer & buf, ThreadPoolCallbackRunner schedule, size_t max_working_readers, + size_t range_step, size_t file_size) +{ + auto * seekable = dynamic_cast(&buf); + if (!seekable || !seekable->supportsReadAt()) + return nullptr; + + return std::make_unique( + *seekable, schedule, max_working_readers, range_step, file_size); +} + } diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h index 70f925f9735..e76b40f77b7 100644 --- a/src/IO/ParallelReadBuffer.h +++ b/src/IO/ParallelReadBuffer.h @@ -10,18 +10,17 @@ namespace DB { /** - * Reads from multiple ReadBuffers in parallel. - * Preserves order of readers obtained from SeekableReadBufferFactory. + * Reads from multiple positions in a ReadBuffer in parallel. + * Then reassembles the data into one stream in the original order. * - * It consumes multiple readers and yields data from them in order as it passed. - * Each working reader save segments of data to internal queue. + * Each working reader reads its segment of data into a buffer. * - * ParallelReadBuffer in nextImpl method take first available segment from first reader in deque and fed it to user. - * When first reader finish reading, they will be removed from worker deque and data from next reader consumed. + * ParallelReadBuffer in nextImpl method take first available segment from first reader in deque and reports it it to user. + * When first reader finishes reading, they will be removed from worker deque and data from next reader consumed. * * Number of working readers limited by max_working_readers. */ -class ParallelReadBuffer : public SeekableReadBuffer +class ParallelReadBuffer : public SeekableReadBuffer, public WithFileSize { private: /// Blocks until data occurred in the first reader or this reader indicate finishing @@ -29,19 +28,19 @@ private: bool nextImpl() override; public: - ParallelReadBuffer(SeekableReadBufferFactoryPtr reader_factory_, ThreadPoolCallbackRunner schedule_, size_t max_working_readers, size_t range_step_); + ParallelReadBuffer(SeekableReadBuffer & input, ThreadPoolCallbackRunner schedule_, size_t max_working_readers, size_t range_step_, size_t file_size); ~ParallelReadBuffer() override { finishAndWait(); } off_t seek(off_t off, int whence) override; - size_t getFileSize(); + size_t getFileSize() override; off_t getPosition() override; - const SeekableReadBufferFactory & getReadBufferFactory() const { return *reader_factory; } - SeekableReadBufferFactory & getReadBufferFactory() { return *reader_factory; } + const SeekableReadBuffer & getReadBuffer() const { return input; } + SeekableReadBuffer & getReadBuffer() { return input; } private: - /// Reader in progress with a list of read segments + /// Reader in progress with a buffer for the segment struct ReadWorker; using ReadWorkerPtr = std::shared_ptr; @@ -55,28 +54,28 @@ private: void addReaders(); bool addReaderToPool(); - /// Process read_worker, read data and save into internal segments queue + /// Process read_worker, read data and save into the buffer void readerThreadFunction(ReadWorkerPtr read_worker); void onBackgroundException(); void finishAndWait(); - Memory<> current_segment; - size_t max_working_readers; - std::atomic_size_t active_working_reader{0}; + std::atomic_size_t active_working_readers{0}; ThreadPoolCallbackRunner schedule; - std::unique_ptr reader_factory; + SeekableReadBuffer & input; + size_t file_size; size_t range_step; size_t next_range_start{0}; /** * FIFO queue of readers. - * Each worker contains reader itself and downloaded segments. - * When reader read all available data it will be removed from - * deque and data from next reader will be consumed to user. + * Each worker contains a buffer for the downloaded segment. + * After all data for the segment is read and delivered to the user, the reader will be removed + * from deque and data from next reader will be delivered. + * After removing from deque, call addReaders(). */ std::deque read_workers; @@ -92,4 +91,10 @@ private: bool all_completed{false}; }; +/// If `buf` is a SeekableReadBuffer with supportsReadAt() == true, creates a ParallelReadBuffer +/// from it. Otherwise returns nullptr; +std::unique_ptr wrapInParallelReadBufferIfSupported( + ReadBuffer & buf, ThreadPoolCallbackRunner schedule, size_t max_working_readers, + size_t range_step, size_t file_size); + } diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index bf44d9d10da..67bc01279c3 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -50,30 +50,30 @@ std::string ReadBufferFromFileDescriptor::getFileName() const } -bool ReadBufferFromFileDescriptor::nextImpl() +size_t ReadBufferFromFileDescriptor::readImpl(char * to, size_t min_bytes, size_t max_bytes, size_t offset) { - /// If internal_buffer size is empty, then read() cannot be distinguished from EOF - assert(!internal_buffer.empty()); + chassert(min_bytes <= max_bytes); - /// This is a workaround of a read pass EOF bug in linux kernel with pread() - if (file_size.has_value() && file_offset_of_buffer_end >= *file_size) - return false; + /// This is a workaround of a read past EOF bug in linux kernel with pread() + if (file_size.has_value() && offset >= *file_size) + return 0; size_t bytes_read = 0; - while (!bytes_read) + while (bytes_read < min_bytes) { ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorRead); Stopwatch watch(profile_callback ? clock_type : CLOCK_MONOTONIC); ssize_t res = 0; + size_t to_read = max_bytes - bytes_read; { CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; if (use_pread) - res = ::pread(fd, internal_buffer.begin(), internal_buffer.size(), file_offset_of_buffer_end); + res = ::pread(fd, to + bytes_read, to_read, offset + bytes_read); else - res = ::read(fd, internal_buffer.begin(), internal_buffer.size()); + res = ::read(fd, to + bytes_read, to_read); } if (!res) break; @@ -102,18 +102,31 @@ bool ReadBufferFromFileDescriptor::nextImpl() if (profile_callback) { ProfileInfo info; - info.bytes_requested = internal_buffer.size(); + info.bytes_requested = to_read; info.bytes_read = res; info.nanoseconds = watch.elapsed(); profile_callback(info); } } + if (bytes_read) + ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read); + + return bytes_read; +} + + +bool ReadBufferFromFileDescriptor::nextImpl() +{ + /// If internal_buffer size is empty, then read() cannot be distinguished from EOF + assert(!internal_buffer.empty()); + + size_t bytes_read = readImpl(internal_buffer.begin(), 1, internal_buffer.size(), file_offset_of_buffer_end); + file_offset_of_buffer_end += bytes_read; if (bytes_read) { - ProfileEvents::increment(ProfileEvents::ReadBufferFromFileDescriptorReadBytes, bytes_read); working_buffer = internal_buffer; working_buffer.resize(bytes_read); } @@ -259,4 +272,17 @@ size_t ReadBufferFromFileDescriptor::getFileSize() return getSizeFromFileDescriptor(fd, getFileName()); } +bool ReadBufferFromFileDescriptor::checkIfActuallySeekable() +{ + struct stat stat; + auto res = ::fstat(fd, &stat); + return res == 0 && S_ISREG(stat.st_mode); +} + +size_t ReadBufferFromFileDescriptor::readBigAt(char * to, size_t n, size_t offset, const std::function &) +{ + chassert(use_pread); + return readImpl(to, n, n, offset); +} + } diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index 10f140275bb..64340770cf2 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -30,6 +30,12 @@ protected: /// Name or some description of file. std::string getFileName() const override; + /// Does the read()/pread(), with all the metric increments, error handling, throttling, etc. + /// Doesn't seek (`offset` must match fd's position if !use_pread). + /// Stops after min_bytes or eof. Returns 0 if eof. + /// Thread safe. + size_t readImpl(char * to, size_t min_bytes, size_t max_bytes, size_t offset); + public: explicit ReadBufferFromFileDescriptor( int fd_, @@ -65,6 +71,11 @@ public: size_t getFileSize() override; + bool checkIfActuallySeekable() override; + + size_t readBigAt(char * to, size_t n, size_t offset, const std::function &) override; + bool supportsReadAt() override { return use_pread; } + private: /// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout. bool poll(size_t timeout_microseconds) const; diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 93e2c46b080..d1cb1ec9ab0 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -109,9 +109,12 @@ bool ReadBufferFromS3::nextImpl() } size_t sleep_time_with_backoff_milliseconds = 100; - for (size_t attempt = 0; attempt < request_settings.max_single_read_retries && !next_result; ++attempt) + for (size_t attempt = 0; !next_result; ++attempt) { - Stopwatch watch; + bool last_attempt = attempt + 1 >= request_settings.max_single_read_retries; + + ProfileEventTimeIncrement watch(ProfileEvents::ReadBufferFromS3Microseconds); + try { if (!impl) @@ -133,44 +136,11 @@ bool ReadBufferFromS3::nextImpl() /// Try to read a next portion of data. next_result = impl->next(); - watch.stop(); - ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Microseconds, watch.elapsedMicroseconds()); break; } catch (Exception & e) { - watch.stop(); - ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Microseconds, watch.elapsedMicroseconds()); - ProfileEvents::increment(ProfileEvents::ReadBufferFromS3RequestsErrors, 1); - - if (auto * s3_exception = dynamic_cast(&e)) - { - /// It doesn't make sense to retry Access Denied or No Such Key - if (!s3_exception->isRetryableError()) - { - s3_exception->addMessage("while reading key: {}, from bucket: {}", key, bucket); - throw; - } - } - - /// It doesn't make sense to retry allocator errors - if (e.code() == ErrorCodes::CANNOT_ALLOCATE_MEMORY) - { - tryLogCurrentException(log); - throw; - } - - LOG_DEBUG( - log, - "Caught exception while reading S3 object. Bucket: {}, Key: {}, Version: {}, Offset: {}, Attempt: {}, Message: {}", - bucket, - key, - version_id.empty() ? "Latest" : version_id, - getPosition(), - attempt, - e.message()); - - if (attempt + 1 == request_settings.max_single_read_retries) + if (!processException(e, getPosition(), attempt) || last_attempt) throw; /// Pause before next attempt. @@ -197,6 +167,74 @@ bool ReadBufferFromS3::nextImpl() } +size_t ReadBufferFromS3::readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) +{ + if (n == 0) + return 0; + + size_t sleep_time_with_backoff_milliseconds = 100; + for (size_t attempt = 0;; ++attempt) + { + bool last_attempt = attempt + 1 >= request_settings.max_single_read_retries; + + ProfileEventTimeIncrement watch(ProfileEvents::ReadBufferFromS3Microseconds); + + try + { + auto result = sendRequest(range_begin, range_begin + n - 1); + std::istream & istr = result.GetBody(); + + size_t bytes = copyFromIStreamWithProgressCallback(istr, to, n, progress_callback); + + ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Bytes, bytes); + + if (read_settings.remote_throttler) + read_settings.remote_throttler->add(bytes, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds); + + return bytes; + } + catch (Poco::Exception & e) + { + if (!processException(e, range_begin, attempt) || last_attempt) + throw; + + sleepForMilliseconds(sleep_time_with_backoff_milliseconds); + sleep_time_with_backoff_milliseconds *= 2; + } + } +} + +bool ReadBufferFromS3::processException(Poco::Exception & e, size_t read_offset, size_t attempt) const +{ + ProfileEvents::increment(ProfileEvents::ReadBufferFromS3RequestsErrors, 1); + + LOG_DEBUG( + log, + "Caught exception while reading S3 object. Bucket: {}, Key: {}, Version: {}, Offset: {}, " + "Attempt: {}, Message: {}", + bucket, key, version_id.empty() ? "Latest" : version_id, read_offset, attempt, e.message()); + + if (auto * s3_exception = dynamic_cast(&e)) + { + /// It doesn't make sense to retry Access Denied or No Such Key + if (!s3_exception->isRetryableError()) + { + s3_exception->addMessage("while reading key: {}, from bucket: {}", key, bucket); + return false; + } + } + + /// It doesn't make sense to retry allocator errors + if (e.code() == ErrorCodes::CANNOT_ALLOCATE_MEMORY) + { + tryLogCurrentException(log); + return false; + } + + return true; +} + + off_t ReadBufferFromS3::seek(off_t offset_, int whence) { if (offset_ == getPosition() && whence == SEEK_SET) @@ -315,44 +353,40 @@ bool ReadBufferFromS3::atEndOfRequestedRangeGuess() std::unique_ptr ReadBufferFromS3::initialize() { - S3::GetObjectRequest req; - req.SetBucket(bucket); - req.SetKey(key); - if (!version_id.empty()) - { - req.SetVersionId(version_id); - } - /** * If remote_filesystem_read_method = 'threadpool', then for MergeTree family tables * exact byte ranges to read are always passed here. */ - if (read_until_position) - { - if (offset >= read_until_position) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1); + if (read_until_position && offset >= read_until_position) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read beyond right offset ({} > {})", offset, read_until_position - 1); - req.SetRange(fmt::format("bytes={}-{}", offset, read_until_position - 1)); - LOG_TEST( - log, - "Read S3 object. Bucket: {}, Key: {}, Version: {}, Range: {}-{}", - bucket, - key, - version_id.empty() ? "Latest" : version_id, - offset, - read_until_position - 1); - } - else + read_result = sendRequest(offset, read_until_position ? std::make_optional(read_until_position - 1) : std::nullopt); + + size_t buffer_size = use_external_buffer ? 0 : read_settings.remote_fs_buffer_size; + return std::make_unique(read_result.GetBody(), buffer_size); +} + +Aws::S3::Model::GetObjectResult ReadBufferFromS3::sendRequest(size_t range_begin, std::optional range_end_incl) const +{ + S3::GetObjectRequest req; + req.SetBucket(bucket); + req.SetKey(key); + if (!version_id.empty()) + req.SetVersionId(version_id); + + if (range_end_incl) { - if (offset) - req.SetRange(fmt::format("bytes={}-", offset)); + req.SetRange(fmt::format("bytes={}-{}", range_begin, *range_end_incl)); LOG_TEST( - log, - "Read S3 object. Bucket: {}, Key: {}, Version: {}, Offset: {}", - bucket, - key, - version_id.empty() ? "Latest" : version_id, - offset); + log, "Read S3 object. Bucket: {}, Key: {}, Version: {}, Range: {}-{}", + bucket, key, version_id.empty() ? "Latest" : version_id, range_begin, *range_end_incl); + } + else if (range_begin) + { + req.SetRange(fmt::format("bytes={}-", range_begin)); + LOG_TEST( + log, "Read S3 object. Bucket: {}, Key: {}, Version: {}, Offset: {}", + bucket, key, version_id.empty() ? "Latest" : version_id, range_begin); } ProfileEvents::increment(ProfileEvents::S3GetObject); @@ -371,9 +405,7 @@ std::unique_ptr ReadBufferFromS3::initialize() { ResourceCost bytes_read = outcome.GetResult().GetContentLength(); read_settings.resource_link.adjust(estimated_cost, bytes_read); - size_t buffer_size = use_external_buffer ? 0 : read_settings.remote_fs_buffer_size; - read_result = outcome.GetResultWithOwnership(); - return std::make_unique(read_result.GetBody(), buffer_size); + return outcome.GetResultWithOwnership(); } else { @@ -383,21 +415,6 @@ std::unique_ptr ReadBufferFromS3::initialize() } } -std::unique_ptr ReadBufferS3Factory::getReader() -{ - return std::make_unique( - client_ptr, - bucket, - key, - version_id, - request_settings, - read_settings.adjustBufferSize(object_size)); -} - -size_t ReadBufferS3Factory::getFileSize() -{ - return object_size; -} } #endif diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index 52dd74bdd14..0f665861a1e 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -77,12 +77,22 @@ public: String getFileName() const override { return bucket + "/" + key; } + size_t readBigAt(char * to, size_t n, size_t range_begin, const std::function & progress_callback) override; + + bool supportsReadAt() override { return true; } + private: std::unique_ptr initialize(); - // If true, if we destroy impl now, no work was wasted. Just for metrics. + /// If true, if we destroy impl now, no work was wasted. Just for metrics. bool atEndOfRequestedRangeGuess(); + /// Call inside catch() block if GetObject fails. Bumps metrics, logs the error. + /// Returns true if the error looks retriable. + bool processException(Poco::Exception & e, size_t read_offset, size_t attempt) const; + + Aws::S3::Model::GetObjectResult sendRequest(size_t range_begin, std::optional range_end_incl) const; + ReadSettings read_settings; bool use_external_buffer; @@ -92,43 +102,6 @@ private: bool restricted_seek; }; -/// Creates separate ReadBufferFromS3 for sequence of ranges of particular object -class ReadBufferS3Factory : public SeekableReadBufferFactory, public WithFileName -{ -public: - explicit ReadBufferS3Factory( - std::shared_ptr client_ptr_, - const String & bucket_, - const String & key_, - const String & version_id_, - size_t object_size_, - const S3Settings::RequestSettings & request_settings_, - const ReadSettings & read_settings_) - : client_ptr(client_ptr_) - , bucket(bucket_) - , key(key_) - , version_id(version_id_) - , read_settings(read_settings_) - , object_size(object_size_) - , request_settings(request_settings_) - {} - - std::unique_ptr getReader() override; - - size_t getFileSize() override; - - String getFileName() const override { return bucket + "/" + key; } - -private: - std::shared_ptr client_ptr; - const String bucket; - const String key; - const String version_id; - ReadSettings read_settings; - size_t object_size; - const S3Settings::RequestSettings request_settings; -}; - } #endif diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index 8c3ab704d2b..cf1159bfb4b 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -40,6 +40,12 @@ void UpdatableSession::updateSession(const Poco::URI & uri) throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, "Too many redirects while trying to access {}", initial_uri.toString()); } +template +typename UpdatableSession::SessionPtr UpdatableSession::createDetachedSession(const Poco::URI & uri) +{ + return session_factory->buildNewSession(uri); +} + template std::shared_ptr> UpdatableSession::clone(const Poco::URI & uri) { @@ -89,21 +95,11 @@ bool ReadWriteBufferFromHTTPBase::withPartialContent(const } template -size_t ReadWriteBufferFromHTTPBase::getRangeBegin() const { return read_range.begin.value_or(0); } +size_t ReadWriteBufferFromHTTPBase::getOffset() const { return read_range.begin.value_or(0) + offset_from_begin_pos; } template -size_t ReadWriteBufferFromHTTPBase::getOffset() const { return getRangeBegin() + offset_from_begin_pos; } - -template -std::istream * ReadWriteBufferFromHTTPBase::callImpl( - UpdatableSessionPtr & current_session, Poco::URI uri_, Poco::Net::HTTPResponse & response, - const std::string & method_, bool for_object_info) +void ReadWriteBufferFromHTTPBase::prepareRequest(Poco::Net::HTTPRequest & request, Poco::URI uri_, std::optional range) const { - // With empty path poco will send "POST HTTP/1.1" its bug. - if (uri_.getPath().empty()) - uri_.setPath("/"); - - Poco::Net::HTTPRequest request(method_, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); request.setHost(uri_.getHost()); // use original, not resolved host name in header if (out_stream_callback) @@ -111,16 +107,9 @@ std::istream * ReadWriteBufferFromHTTPBase::callImpl( else if (method == Poco::Net::HTTPRequest::HTTP_POST) request.setContentLength(0); /// No callback - no body - for (auto & [header, value] : http_header_entries) + for (const auto & [header, value] : http_header_entries) request.set(header, value); - std::optional range; - if (!for_object_info) - { - if (withPartialContent(read_range)) - range = HTTPRange{getOffset(), read_range.end}; - } - if (range) { String range_header_value; @@ -134,6 +123,25 @@ std::istream * ReadWriteBufferFromHTTPBase::callImpl( if (!credentials.getUsername().empty()) credentials.authenticate(request); +} + +template +std::istream * ReadWriteBufferFromHTTPBase::callImpl( + UpdatableSessionPtr & current_session, Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_, bool for_object_info) +{ + // With empty path poco will send "POST HTTP/1.1" its bug. + if (uri_.getPath().empty()) + uri_.setPath("/"); + + std::optional range; + if (!for_object_info) + { + if (withPartialContent(read_range)) + range = HTTPRange{getOffset(), read_range.end}; + } + + Poco::Net::HTTPRequest request(method_, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); + prepareRequest(request, uri_, range); LOG_TRACE(log, "Sending request to {}", uri_.toString()); @@ -176,6 +184,14 @@ size_t ReadWriteBufferFromHTTPBase::getFileSize() throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", uri.toString()); } +template +bool ReadWriteBufferFromHTTPBase::supportsReadAt() +{ + if (!file_info) + file_info = getFileInfo(); + return method == Poco::Net::HTTPRequest::HTTP_GET && file_info->seekable; +} + template bool ReadWriteBufferFromHTTPBase::checkIfActuallySeekable() { @@ -405,7 +421,7 @@ void ReadWriteBufferFromHTTPBase::initialize() { /// We could have range.begin == 0 and range.end != 0 in case of DiskWeb and failing to read with partial content /// will affect only performance, so a warning is enough. - LOG_WARNING(log, "Unable to read with range header: [{}, {}]", getRangeBegin(), *read_range.end); + LOG_WARNING(log, "Unable to read with range header: [{}, {}]", read_range.begin.value_or(0), *read_range.end); } } @@ -538,8 +554,8 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() throw; /** Retry request unconditionally if nothing has been read yet. - * Otherwise if it is GET method retry with range header. - */ + * Otherwise if it is GET method retry with range header. + */ bool can_retry_request = !offset_from_begin_pos || method == Poco::Net::HTTPRequest::HTTP_GET; if (!can_retry_request) throw; @@ -574,6 +590,83 @@ bool ReadWriteBufferFromHTTPBase::nextImpl() return true; } +template +size_t ReadWriteBufferFromHTTPBase::readBigAt(char * to, size_t n, size_t offset, const std::function & progress_callback) +{ + /// Caller must have checked supportsReadAt(). + /// This ensures we've sent at least one HTTP request and populated saved_uri_redirect. + chassert(file_info && file_info->seekable); + + if (n == 0) + return 0; + + Poco::URI uri_ = saved_uri_redirect.value_or(uri); + if (uri_.getPath().empty()) + uri_.setPath("/"); + + size_t milliseconds_to_wait = settings.http_retry_initial_backoff_ms; + + for (size_t attempt = 0;; ++attempt) + { + bool last_attempt = attempt + 1 >= settings.http_max_tries; + + Poco::Net::HTTPRequest request(method, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1); + prepareRequest(request, uri_, HTTPRange { .begin = offset, .end = offset + n - 1}); + + LOG_TRACE(log, "Sending request to {} for range [{}, {})", uri_.toString(), offset, offset + n); + + auto sess = session->createDetachedSession(uri_); + + Poco::Net::HTTPResponse response; + std::istream * result_istr; + + try + { + sess->sendRequest(request); + result_istr = receiveResponse(*sess, request, response, /*allow_redirects*/ false); + + if (response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT && + (offset != 0 || offset + n < *file_info->file_size)) + throw Exception( + ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, + "Expected 206 Partial Content, got {} when reading {} range [{}, {})", + toString(response.getStatus()), uri_.toString(), offset, offset + n); + + bool cancelled; + size_t r = copyFromIStreamWithProgressCallback(*result_istr, to, n, progress_callback, &cancelled); + + return r; + } + catch (const Poco::Exception & e) + { + sess->attachSessionData(e.message()); + + LOG_ERROR( + log, + "HTTP request (positioned) to `{}` with range [{}, {}) failed at try {}/{}: {}", + uri_.toString(), offset, offset + n, attempt + 1, settings.http_max_tries, + e.what()); + + /// Decide whether to retry. + + if (last_attempt) + throw; + + /// Too many open files - non-retryable. + if (e.code() == POCO_EMFILE) + throw; + + if (const auto * h = dynamic_cast(&e); + h && !isRetriableError(static_cast(h->getHTTPStatus()))) + throw; + + sleepForMilliseconds(milliseconds_to_wait); + milliseconds_to_wait = std::min(milliseconds_to_wait * 2, settings.http_retry_max_backoff_ms); + continue; + } + } +} + template off_t ReadWriteBufferFromHTTPBase::getPosition() { return getOffset() - available(); } @@ -793,75 +886,6 @@ ReadWriteBufferFromHTTP::ReadWriteBufferFromHTTP( skip_not_found_url_, file_info_) {} -RangedReadWriteBufferFromHTTPFactory::RangedReadWriteBufferFromHTTPFactory( - Poco::URI uri_, - std::string method_, - OutStreamCallback out_stream_callback_, - ConnectionTimeouts timeouts_, - const Poco::Net::HTTPBasicCredentials & credentials_, - UInt64 max_redirects_, - size_t buffer_size_, - ReadSettings settings_, - HTTPHeaderEntries http_header_entries_, - const RemoteHostFilter * remote_host_filter_, - bool delay_initialization_, - bool use_external_buffer_, - bool skip_not_found_url_) - : uri(uri_) - , method(std::move(method_)) - , out_stream_callback(out_stream_callback_) - , timeouts(std::move(timeouts_)) - , credentials(credentials_) - , max_redirects(max_redirects_) - , buffer_size(buffer_size_) - , settings(std::move(settings_)) - , http_header_entries(std::move(http_header_entries_)) - , remote_host_filter(remote_host_filter_) - , delay_initialization(delay_initialization_) - , use_external_buffer(use_external_buffer_) - , skip_not_found_url(skip_not_found_url_) {} - -std::unique_ptr RangedReadWriteBufferFromHTTPFactory::getReader() -{ - return std::make_unique( - uri, - method, - out_stream_callback, - timeouts, - credentials, - max_redirects, - buffer_size, - settings, - http_header_entries, - remote_host_filter, - delay_initialization, - use_external_buffer, - skip_not_found_url, - file_info); -} - -size_t RangedReadWriteBufferFromHTTPFactory::getFileSize() -{ - auto s = getFileInfo().file_size; - if (!s) - throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for: {}", uri.toString()); - return *s; -} - -bool RangedReadWriteBufferFromHTTPFactory::checkIfActuallySeekable() -{ - return getFileInfo().seekable; -} - -HTTPFileInfo RangedReadWriteBufferFromHTTPFactory::getFileInfo() -{ - if (!file_info) - file_info = static_cast(getReader().get())->getFileInfo(); - return *file_info; -} - -String RangedReadWriteBufferFromHTTPFactory::getFileName() const { return uri.toString(); } - PooledSessionFactory::PooledSessionFactory( const ConnectionTimeouts & timeouts_, size_t per_endpoint_pool_size_) @@ -891,6 +915,7 @@ PooledReadWriteBufferFromHTTP::PooledReadWriteBufferFromHTTP( out_stream_callback_, buffer_size_) {} + template class UpdatableSession; template class UpdatableSession; template class detail::ReadWriteBufferFromHTTPBase>>; diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index de1946ced48..2d2ae5fe724 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -42,6 +42,9 @@ public: void updateSession(const Poco::URI & uri); + /// Thread safe. + SessionPtr createDetachedSession(const Poco::URI & uri); + std::shared_ptr> clone(const Poco::URI & uri); private: @@ -110,14 +113,16 @@ namespace detail bool withPartialContent(const HTTPRange & range) const; - size_t getRangeBegin() const; - size_t getOffset() const; + void prepareRequest(Poco::Net::HTTPRequest & request, Poco::URI uri_, std::optional range) const; + std::istream * callImpl(UpdatableSessionPtr & current_session, Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_, bool for_object_info = false); size_t getFileSize() override; + bool supportsReadAt() override; + bool checkIfActuallySeekable() override; String getFileName() const override; @@ -171,6 +176,8 @@ namespace detail bool nextImpl() override; + size_t readBigAt(char * to, size_t n, size_t offset, const std::function & progress_callback) override; + off_t getPosition() override; off_t seek(off_t offset_, int whence) override; @@ -237,53 +244,6 @@ public: std::optional file_info_ = std::nullopt); }; -class RangedReadWriteBufferFromHTTPFactory : public SeekableReadBufferFactory, public WithFileName -{ - using OutStreamCallback = ReadWriteBufferFromHTTP::OutStreamCallback; - -public: - RangedReadWriteBufferFromHTTPFactory( - Poco::URI uri_, - std::string method_, - OutStreamCallback out_stream_callback_, - ConnectionTimeouts timeouts_, - const Poco::Net::HTTPBasicCredentials & credentials_, - UInt64 max_redirects_ = 0, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - ReadSettings settings_ = {}, - HTTPHeaderEntries http_header_entries_ = {}, - const RemoteHostFilter * remote_host_filter_ = nullptr, - bool delay_initialization_ = true, - bool use_external_buffer_ = false, - bool skip_not_found_url_ = false); - - std::unique_ptr getReader() override; - - size_t getFileSize() override; - - bool checkIfActuallySeekable() override; - - HTTPFileInfo getFileInfo(); - - String getFileName() const override; - -private: - Poco::URI uri; - std::string method; - OutStreamCallback out_stream_callback; - ConnectionTimeouts timeouts; - const Poco::Net::HTTPBasicCredentials & credentials; - UInt64 max_redirects; - size_t buffer_size; - ReadSettings settings; - HTTPHeaderEntries http_header_entries; - const RemoteHostFilter * remote_host_filter; - std::optional file_info; - bool delay_initialization; - bool use_external_buffer; - bool skip_not_found_url; -}; - class PooledSessionFactory { public: @@ -292,7 +252,9 @@ public: using SessionType = PooledHTTPSessionPtr; + /// Thread safe. SessionType buildNewSession(const Poco::URI & uri); + private: ConnectionTimeouts timeouts; size_t per_endpoint_pool_size; @@ -315,6 +277,7 @@ public: size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT); }; + extern template class UpdatableSession; extern template class UpdatableSession; extern template class detail::ReadWriteBufferFromHTTPBase>>; diff --git a/src/IO/SeekableReadBuffer.cpp b/src/IO/SeekableReadBuffer.cpp index 99c43d6671b..b83e382db01 100644 --- a/src/IO/SeekableReadBuffer.cpp +++ b/src/IO/SeekableReadBuffer.cpp @@ -3,6 +3,10 @@ namespace DB { +namespace ErrorCodes +{ + extern const int CANNOT_READ_FROM_ISTREAM; +} namespace { @@ -60,4 +64,46 @@ std::unique_ptr wrapSeekableReadBufferPointer(SeekableReadBu return std::make_unique>(*ptr, SeekableReadBufferPtr{ptr}); } +size_t copyFromIStreamWithProgressCallback(std::istream & istr, char * to, size_t n, const std::function & progress_callback, bool * out_cancelled) +{ + const size_t chunk = DBMS_DEFAULT_BUFFER_SIZE; + if (out_cancelled) + *out_cancelled = false; + + size_t copied = 0; + while (copied < n) + { + size_t to_copy = std::min(chunk, n - copied); + istr.read(to + copied, to_copy); + size_t gcount = istr.gcount(); + + copied += gcount; + + bool cancelled = false; + if (gcount && progress_callback) + cancelled = progress_callback(copied); + + if (gcount != to_copy) + { + if (!istr.eof()) + throw Exception( + ErrorCodes::CANNOT_READ_FROM_ISTREAM, + "{} at offset {}", + istr.fail() ? "Cannot read from istream" : "Unexpected state of istream", + copied); + + break; + } + + if (cancelled) + { + if (out_cancelled != nullptr) + *out_cancelled = true; + break; + } + } + + return copied; +} + } diff --git a/src/IO/SeekableReadBuffer.h b/src/IO/SeekableReadBuffer.h index 736ab5bbc71..8ced9d752de 100644 --- a/src/IO/SeekableReadBuffer.h +++ b/src/IO/SeekableReadBuffer.h @@ -59,39 +59,41 @@ public: /// * Sometimes when we create such buffer we don't know in advance whether we'll need it to be /// seekable or not. So we don't want to pay the price for this check in advance. virtual bool checkIfActuallySeekable() { return true; } + + /// Unbuffered positional read. + /// Doesn't affect the buffer state (position, working_buffer, etc). + /// + /// `progress_callback` may be called periodically during the read, reporting that to[0..m-1] + /// has been filled. If it returns true, reading is stopped, and readBigAt() returns bytes read + /// so far. Called only from inside readBigAt(), from the same thread, with increasing m. + /// + /// Stops either after n bytes, or at end of file, or on exception. Returns number of bytes read. + /// If offset is past the end of file, may return 0 or throw exception. + /// + /// Caller needs to be careful: + /// * supportsReadAt() must be checked (called and return true) before calling readBigAt(). + /// Otherwise readBigAt() may crash. + /// * Thread safety: multiple readBigAt() calls may be performed in parallel. + /// But readBigAt() may not be called in parallel with any other methods + /// (e.g. next() or supportsReadAt()). + /// * Performance: there's no buffering. Each readBigAt() call typically translates into actual + /// IO operation (e.g. HTTP request). Don't use it for small adjacent reads. + virtual size_t readBigAt(char * /*to*/, size_t /*n*/, size_t /*offset*/, const std::function & /*progress_callback*/ = nullptr) + { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method readBigAt() not implemented"); } + + /// Checks if readBigAt() is allowed. May be slow, may throw (e.g. it may do an HTTP request or an fstat). + virtual bool supportsReadAt() { return false; } }; -/// Useful for reading in parallel. -/// The created read buffers may outlive the factory. -/// -/// There are 2 ways to use this: -/// (1) Never call seek() or getFileSize(), read the file sequentially. -/// For HTTP, this usually translates to just one HTTP request. -/// (2) Call checkIfActuallySeekable(), then: -/// a. If it returned false, go to (1). seek() and getFileSize() are not available (throw if called). -/// b. If it returned true, seek() and getFileSize() are available, knock yourself out. -/// For HTTP, checkIfActuallySeekable() sends a HEAD request and returns false if the web server -/// doesn't support ranges (or doesn't support HEAD requests). -class SeekableReadBufferFactory : public WithFileSize -{ -public: - ~SeekableReadBufferFactory() override = default; - - // We usually call setReadUntilPosition() and seek() on the returned buffer before reading. - // So it's recommended that the returned implementation be lazy, i.e. don't start reading - // before the first call to nextImpl(). - virtual std::unique_ptr getReader() = 0; - - virtual bool checkIfActuallySeekable() { return true; } -}; using SeekableReadBufferPtr = std::shared_ptr; -using SeekableReadBufferFactoryPtr = std::unique_ptr; - /// Wraps a reference to a SeekableReadBuffer into an unique pointer to SeekableReadBuffer. /// This function is like wrapReadBufferReference() but for SeekableReadBuffer. std::unique_ptr wrapSeekableReadBufferReference(SeekableReadBuffer & ref); std::unique_ptr wrapSeekableReadBufferPointer(SeekableReadBufferPtr ptr); +/// Helper for implementing readBigAt(). +size_t copyFromIStreamWithProgressCallback(std::istream & istr, char * to, size_t n, const std::function & progress_callback, bool * out_cancelled = nullptr); + } diff --git a/src/IO/TimeoutSetter.cpp b/src/IO/TimeoutSetter.cpp index ed21383ccd4..b8b7a814703 100644 --- a/src/IO/TimeoutSetter.cpp +++ b/src/IO/TimeoutSetter.cpp @@ -29,14 +29,12 @@ TimeoutSetter::TimeoutSetter(Poco::Net::StreamSocket & socket_, Poco::Timespan t TimeoutSetter::~TimeoutSetter() { + if (was_reset) + return; + try { - bool connected = socket.impl()->initialized(); - if (!connected) - return; - - socket.setSendTimeout(old_send_timeout); - socket.setReceiveTimeout(old_receive_timeout); + reset(); } catch (...) { @@ -44,4 +42,15 @@ TimeoutSetter::~TimeoutSetter() } } +void TimeoutSetter::reset() +{ + bool connected = socket.impl()->initialized(); + if (!connected) + return; + + socket.setSendTimeout(old_send_timeout); + socket.setReceiveTimeout(old_receive_timeout); + was_reset = true; +} + } diff --git a/src/IO/TimeoutSetter.h b/src/IO/TimeoutSetter.h index 31c37ea07af..3479986d7fe 100644 --- a/src/IO/TimeoutSetter.h +++ b/src/IO/TimeoutSetter.h @@ -6,7 +6,7 @@ namespace DB { -/// Temporarily overrides socket send/receive timeouts and reset them back into destructor +/// Temporarily overrides socket send/receive timeouts and reset them back into destructor (or manually by calling reset method) /// If "limit_max_timeout" is true, timeouts could be only decreased (maxed by previous value). struct TimeoutSetter { @@ -19,6 +19,9 @@ struct TimeoutSetter ~TimeoutSetter(); + /// Reset timeouts back. + void reset(); + Poco::Net::StreamSocket & socket; Poco::Timespan send_timeout; @@ -26,5 +29,6 @@ struct TimeoutSetter Poco::Timespan old_send_timeout; Poco::Timespan old_receive_timeout; + bool was_reset = false; }; } diff --git a/src/IO/WithFileName.cpp b/src/IO/WithFileName.cpp index 9d9f264c861..2383182f7e7 100644 --- a/src/IO/WithFileName.cpp +++ b/src/IO/WithFileName.cpp @@ -19,7 +19,7 @@ String getFileNameFromReadBuffer(const ReadBuffer & in) if (const auto * compressed = dynamic_cast(&in)) return getFileName(compressed->getWrappedReadBuffer()); else if (const auto * parallel = dynamic_cast(&in)) - return getFileName(parallel->getReadBufferFactory()); + return getFileName(parallel->getReadBuffer()); else if (const auto * peekable = dynamic_cast(&in)) return getFileNameFromReadBuffer(peekable->getSubBuffer()); else diff --git a/src/IO/WithFileSize.cpp b/src/IO/WithFileSize.cpp index f71690fcdee..28542db7a73 100644 --- a/src/IO/WithFileSize.cpp +++ b/src/IO/WithFileSize.cpp @@ -33,10 +33,6 @@ size_t getFileSizeFromReadBuffer(ReadBuffer & in) { return getFileSize(compressed->getWrappedReadBuffer()); } - else if (auto * parallel = dynamic_cast(&in)) - { - return getFileSize(parallel->getReadBufferFactory()); - } return getFileSize(in); } @@ -51,10 +47,6 @@ bool isBufferWithFileSize(const ReadBuffer & in) { return isBufferWithFileSize(compressed->getWrappedReadBuffer()); } - else if (const auto * parallel = dynamic_cast(&in)) - { - return dynamic_cast(¶llel->getReadBufferFactory()) != nullptr; - } return dynamic_cast(&in) != nullptr; } diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 462cf2674c3..6992c3ea4ac 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -195,18 +195,14 @@ void WriteBufferFromS3::finalizeImpl() if (request_settings.check_objects_after_upload) { - LOG_TRACE(log, "Checking object {} exists after upload", key); S3::checkObjectExists(*client_ptr, bucket, key, {}, request_settings, /* for_disk_s3= */ write_settings.for_object_storage, "Immediately after upload"); - LOG_TRACE(log, "Checking object {} has size as expected {}", key, total_size); size_t actual_size = S3::getObjectSize(*client_ptr, bucket, key, {}, request_settings, /* for_disk_s3= */ write_settings.for_object_storage); if (actual_size != total_size) throw Exception( ErrorCodes::S3_ERROR, "Object {} from bucket {} has unexpected size {} after upload, expected size {}, it's a bug in S3 or S3 API.", key, bucket, actual_size, total_size); - - LOG_TRACE(log, "Object {} exists after upload", key); } } @@ -292,8 +288,6 @@ void WriteBufferFromS3::reallocateFirstBuffer() WriteBuffer::set(memory.data() + hidden_size, memory.size() - hidden_size); chassert(offset() == 0); - - LOG_TRACE(log, "Reallocated first buffer with size {}. {}", memory.size(), getLogDetails()); } void WriteBufferFromS3::detachBuffer() @@ -316,8 +310,6 @@ void WriteBufferFromS3::allocateFirstBuffer() const auto size = std::min(size_t(DBMS_DEFAULT_BUFFER_SIZE), max_first_buffer); memory = Memory(size); WriteBuffer::set(memory.data(), memory.size()); - - LOG_TRACE(log, "Allocated first buffer with size {}. {}", memory.size(), getLogDetails()); } void WriteBufferFromS3::allocateBuffer() diff --git a/src/IO/WriteBufferFromS3TaskTracker.cpp b/src/IO/WriteBufferFromS3TaskTracker.cpp index f8ae9598a38..2790d71db3d 100644 --- a/src/IO/WriteBufferFromS3TaskTracker.cpp +++ b/src/IO/WriteBufferFromS3TaskTracker.cpp @@ -36,8 +36,6 @@ ThreadPoolCallbackRunner WriteBufferFromS3::TaskTracker::syncRunner() void WriteBufferFromS3::TaskTracker::waitAll() { - LOG_TEST(log, "waitAll, in queue {}", futures.size()); - /// Exceptions are propagated for (auto & future : futures) { @@ -51,8 +49,6 @@ void WriteBufferFromS3::TaskTracker::waitAll() void WriteBufferFromS3::TaskTracker::safeWaitAll() { - LOG_TEST(log, "safeWaitAll, wait in queue {}", futures.size()); - for (auto & future : futures) { if (future.valid()) @@ -76,7 +72,6 @@ void WriteBufferFromS3::TaskTracker::safeWaitAll() void WriteBufferFromS3::TaskTracker::waitIfAny() { - LOG_TEST(log, "waitIfAny, in queue {}", futures.size()); if (futures.empty()) return; @@ -101,8 +96,6 @@ void WriteBufferFromS3::TaskTracker::waitIfAny() watch.stop(); ProfileEvents::increment(ProfileEvents::WriteBufferFromS3WaitInflightLimitMicroseconds, watch.elapsedMicroseconds()); - - LOG_TEST(log, "waitIfAny ended, in queue {}", futures.size()); } void WriteBufferFromS3::TaskTracker::add(Callback && func) @@ -147,8 +140,6 @@ void WriteBufferFromS3::TaskTracker::waitTilInflightShrink() if (!max_tasks_inflight) return; - LOG_TEST(log, "waitTilInflightShrink, in queue {}", futures.size()); - Stopwatch watch; /// Alternative approach is to wait until at least futures.size() - max_tasks_inflight element are finished @@ -171,8 +162,6 @@ void WriteBufferFromS3::TaskTracker::waitTilInflightShrink() watch.stop(); ProfileEvents::increment(ProfileEvents::WriteBufferFromS3WaitInflightLimitMicroseconds, watch.elapsedMicroseconds()); - - LOG_TEST(log, "waitTilInflightShrink ended, in queue {}", futures.size()); } bool WriteBufferFromS3::TaskTracker::isAsync() const diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 65dca790183..ba160a31b73 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1,16 +1,35 @@ #include "FileCache.h" -#include +#include +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include -#include #include +#include +#include + +#include + + +namespace fs = std::filesystem; + +namespace +{ + +size_t roundDownToMultiple(size_t num, size_t multiple) +{ + return (num / multiple) * multiple; +} + +size_t roundUpToMultiple(size_t num, size_t multiple) +{ + return roundDownToMultiple(num + multiple - 1, multiple); +} +} namespace DB { @@ -26,6 +45,7 @@ FileCache::FileCache(const FileCacheSettings & settings) , delayed_cleanup_interval_ms(settings.delayed_cleanup_interval_ms) , log(&Poco::Logger::get("FileCache")) , metadata(settings.base_path) + , boundary_alignment(settings.boundary_alignment) { main_priority = std::make_unique(settings.max_size, settings.max_elements); @@ -385,15 +405,16 @@ FileSegmentsHolderPtr FileCache::set( return std::make_unique(std::move(file_segments)); } -FileSegmentsHolderPtr FileCache::getOrSet( - const Key & key, - size_t offset, - size_t size, - const CreateFileSegmentSettings & settings) +FileSegmentsHolderPtr +FileCache::getOrSet(const Key & key, size_t offset, size_t size, size_t file_size, const CreateFileSegmentSettings & settings) { assertInitialized(); - FileSegment::Range range(offset, offset + size - 1); + const auto aligned_offset = roundDownToMultiple(offset, boundary_alignment); + const auto aligned_end = std::min(roundUpToMultiple(offset + size, boundary_alignment), file_size); + const auto aligned_size = aligned_end - aligned_offset; + + FileSegment::Range range(aligned_offset, aligned_offset + aligned_size - 1); auto locked_key = metadata.lockKeyMetadata(key, CacheMetadata::KeyNotFoundPolicy::CREATE_EMPTY); @@ -401,8 +422,7 @@ FileSegmentsHolderPtr FileCache::getOrSet( auto file_segments = getImpl(*locked_key, range); if (file_segments.empty()) { - file_segments = splitRangeIntoFileSegments( - *locked_key, offset, size, FileSegment::State::EMPTY, settings); + file_segments = splitRangeIntoFileSegments(*locked_key, range.left, range.size(), FileSegment::State::EMPTY, settings); } else { @@ -410,6 +430,12 @@ FileSegmentsHolderPtr FileCache::getOrSet( *locked_key, file_segments, range, /* fill_with_detached */false, settings); } + while (!file_segments.empty() && file_segments.front()->range().right < offset) + file_segments.pop_front(); + + while (!file_segments.empty() && file_segments.back()->range().left >= offset + size) + file_segments.pop_back(); + chassert(!file_segments.empty()); return std::make_unique(std::move(file_segments)); } diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 2ceb6825a54..71fc1722844 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -67,7 +67,8 @@ public: * As long as pointers to returned file segments are held * it is guaranteed that these file segments are not removed from cache. */ - FileSegmentsHolderPtr getOrSet(const Key & key, size_t offset, size_t size, const CreateFileSegmentSettings & settings); + FileSegmentsHolderPtr + getOrSet(const Key & key, size_t offset, size_t size, size_t file_size, const CreateFileSegmentSettings & settings); /** * Segments in returned list are ordered in ascending order and represent a full contiguous @@ -179,6 +180,8 @@ private: void assertInitialized() const; + size_t boundary_alignment; + void assertCacheCorrectness(); void loadMetadata(); diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp index b6bf77cb306..1fe51bf5f3e 100644 --- a/src/Interpreters/Cache/FileCacheSettings.cpp +++ b/src/Interpreters/Cache/FileCacheSettings.cpp @@ -49,6 +49,8 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration & do_not_evict_index_and_mark_files = config.getUInt64(config_prefix + ".do_not_evict_index_and_mark_files", true); + boundary_alignment = config.getUInt64(config_prefix + ".boundary_alignment", DBMS_DEFAULT_BUFFER_SIZE); + delayed_cleanup_interval_ms = config.getUInt64(config_prefix + ".delayed_cleanup_interval_ms", FILECACHE_DELAYED_CLEANUP_INTERVAL_MS); } diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h index e316cc6d6fe..eeb2a02c131 100644 --- a/src/Interpreters/Cache/FileCacheSettings.h +++ b/src/Interpreters/Cache/FileCacheSettings.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -27,6 +28,8 @@ struct FileCacheSettings size_t bypass_cache_threashold = FILECACHE_BYPASS_THRESHOLD; size_t delayed_cleanup_interval_ms = FILECACHE_DELAYED_CLEANUP_INTERVAL_MS; + size_t boundary_alignment = DBMS_DEFAULT_BUFFER_SIZE; + void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); }; diff --git a/src/Interpreters/Cache/FileCache_fwd.h b/src/Interpreters/Cache/FileCache_fwd.h index afd8d86074e..dbb85fa0e7a 100644 --- a/src/Interpreters/Cache/FileCache_fwd.h +++ b/src/Interpreters/Cache/FileCache_fwd.h @@ -4,7 +4,7 @@ namespace DB { -static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 100 * 1024 * 1024; +static constexpr int FILECACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE = 8 * 1024 * 1024; static constexpr int FILECACHE_DEFAULT_MAX_ELEMENTS = 1024 * 1024; static constexpr int FILECACHE_DEFAULT_HITS_THRESHOLD = 0; static constexpr size_t FILECACHE_BYPASS_THRESHOLD = 256 * 1024 * 1024; diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 60228573666..9370b64b2d4 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -1,13 +1,14 @@ #include "FileSegment.h" -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include +#include +#include #include @@ -399,6 +400,8 @@ void FileSegment::write(const char * from, size_t size, size_t offset) FileSegment::State FileSegment::wait(size_t offset) { + OpenTelemetry::SpanHolder span{fmt::format("FileSegment::wait({})", key().toString())}; + auto lock = segment_guard.lock(); if (downloader_id.empty() || offset < getCurrentWriteOffset(true)) diff --git a/src/Interpreters/FilesystemReadPrefetchesLog.cpp b/src/Interpreters/FilesystemReadPrefetchesLog.cpp index 221cb11f09a..d1cc61b94ba 100644 --- a/src/Interpreters/FilesystemReadPrefetchesLog.cpp +++ b/src/Interpreters/FilesystemReadPrefetchesLog.cpp @@ -1,9 +1,9 @@ -#include #include #include #include #include #include +#include namespace DB @@ -39,12 +39,12 @@ void FilesystemReadPrefetchesLogElement::appendToBlock(MutableColumns & columns) columns[i++]->insert(path); columns[i++]->insert(offset); columns[i++]->insert(size); - columns[i++]->insert(prefetch_submit_time); + columns[i++]->insert(std::chrono::duration_cast(prefetch_submit_time.time_since_epoch()).count()); columns[i++]->insert(priority.value); if (execution_watch) { - columns[i++]->insert(execution_watch->getStart()); - columns[i++]->insert(execution_watch->getEnd()); + columns[i++]->insert(execution_watch->getStart() / 1000); + columns[i++]->insert(execution_watch->getEnd() / 1000); columns[i++]->insert(execution_watch->elapsedMicroseconds()); } else diff --git a/src/Interpreters/FilesystemReadPrefetchesLog.h b/src/Interpreters/FilesystemReadPrefetchesLog.h index cf36f513f5a..313c8ab5872 100644 --- a/src/Interpreters/FilesystemReadPrefetchesLog.h +++ b/src/Interpreters/FilesystemReadPrefetchesLog.h @@ -24,7 +24,7 @@ struct FilesystemReadPrefetchesLogElement String path; UInt64 offset; Int64 size; /// -1 means unknown - Decimal64 prefetch_submit_time{}; + std::chrono::system_clock::time_point prefetch_submit_time; std::optional execution_watch; Priority priority; FilesystemPrefetchState state; diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 146b57049a6..9306c9b99eb 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -543,13 +543,17 @@ namespace template struct Inserter { - static ALWAYS_INLINE void insertOne(const HashJoin & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, + static ALWAYS_INLINE bool insertOne(const HashJoin & join, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) { auto emplace_result = key_getter.emplaceKey(map, i, pool); if (emplace_result.isInserted() || join.anyTakeLastRow()) + { new (&emplace_result.getMapped()) typename Map::mapped_type(stored_block, i); + return true; + } + return false; } static ALWAYS_INLINE void insertAll(const HashJoin &, Map & map, KeyGetter & key_getter, Block * stored_block, size_t i, Arena & pool) @@ -582,7 +586,7 @@ namespace template size_t NO_INLINE insertFromBlockImplTypeCase( HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, - const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool) + const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) { [[maybe_unused]] constexpr bool mapped_one = std::is_same_v; constexpr bool is_asof_join = STRICTNESS == JoinStrictness::Asof; @@ -593,10 +597,18 @@ namespace auto key_getter = createKeyGetter(key_columns, key_sizes); + /// For ALL and ASOF join always insert values + is_inserted = !mapped_one || is_asof_join; + for (size_t i = 0; i < rows; ++i) { if (has_null_map && (*null_map)[i]) + { + /// nulls are not inserted into hash table, + /// keep them for RIGHT and FULL joins + is_inserted = true; continue; + } /// Check condition for right table from ON section if (join_mask && !(*join_mask)[i]) @@ -605,7 +617,7 @@ namespace if constexpr (is_asof_join) Inserter::insertAsof(join, map, key_getter, stored_block, i, pool, *asof_column); else if constexpr (mapped_one) - Inserter::insertOne(join, map, key_getter, stored_block, i, pool); + is_inserted |= Inserter::insertOne(join, map, key_getter, stored_block, i, pool); else Inserter::insertAll(join, map, key_getter, stored_block, i, pool); } @@ -616,32 +628,37 @@ namespace template size_t insertFromBlockImplType( HashJoin & join, Map & map, size_t rows, const ColumnRawPtrs & key_columns, - const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool) + const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) { if (null_map) return insertFromBlockImplTypeCase( - join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool); + join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); else return insertFromBlockImplTypeCase( - join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool); + join, map, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); } template size_t insertFromBlockImpl( HashJoin & join, HashJoin::Type type, Maps & maps, size_t rows, const ColumnRawPtrs & key_columns, - const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool) + const Sizes & key_sizes, Block * stored_block, ConstNullMapPtr null_map, UInt8ColumnDataPtr join_mask, Arena & pool, bool & is_inserted) { switch (type) { - case HashJoin::Type::EMPTY: return 0; - case HashJoin::Type::CROSS: return 0; /// Do nothing. We have already saved block, and it is enough. + case HashJoin::Type::EMPTY: + [[fallthrough]]; + case HashJoin::Type::CROSS: + /// Do nothing. We will only save block, and it is enough + is_inserted = true; + return 0; #define M(TYPE) \ case HashJoin::Type::TYPE: \ return insertFromBlockImplType>::Type>(\ - join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool); \ + join, *maps.TYPE, rows, key_columns, key_sizes, stored_block, null_map, join_mask, pool, is_inserted); \ break; + APPLY_FOR_JOIN_VARIANTS(M) #undef M } @@ -816,6 +833,7 @@ bool HashJoin::addJoinedBlock(const Block & source_block_, bool check_limits) } } + bool is_inserted = false; if (kind != JoinKind::Cross) { joinDispatch(kind, strictness, data->maps[onexpr_idx], [&](auto kind_, auto strictness_, auto & map) @@ -824,28 +842,35 @@ bool HashJoin::addJoinedBlock(const Block & source_block_, bool check_limits) *this, data->type, map, rows, key_columns, key_sizes[onexpr_idx], stored_block, null_map, /// If mask is false constant, rows are added to hashmap anyway. It's not a happy-flow, so this case is not optimized join_mask_col.getData(), - data->pool); + data->pool, is_inserted); if (multiple_disjuncts) used_flags.reinit(stored_block); - else + else if (is_inserted) /// Number of buckets + 1 value from zero storage used_flags.reinit(size + 1); }); } - if (!multiple_disjuncts && save_nullmap) + if (!multiple_disjuncts && save_nullmap && is_inserted) { data->blocks_nullmaps_allocated_size += null_map_holder->allocatedBytes(); data->blocks_nullmaps.emplace_back(stored_block, null_map_holder); } - if (!multiple_disjuncts && not_joined_map) + if (!multiple_disjuncts && not_joined_map && is_inserted) { data->blocks_nullmaps_allocated_size += not_joined_map->allocatedBytes(); data->blocks_nullmaps.emplace_back(stored_block, std::move(not_joined_map)); } + if (!multiple_disjuncts && !is_inserted) + { + LOG_TRACE(log, "Skipping inserting block with {} rows", rows); + data->blocks_allocated_size -= stored_block->allocatedBytes(); + data->blocks.pop_back(); + } + if (!check_limits) return true; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 6e13afa9f43..de2e2b9ad92 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -322,6 +322,8 @@ static std::tuple executeQueryImpl( /// This does not have impact on the final span logs, because these internal queries are issued by external queries, /// we still have enough span logs for the execution of external queries. std::shared_ptr query_span = internal ? nullptr : std::make_shared("query"); + if (query_span) + LOG_DEBUG(&Poco::Logger::get("executeQuery"), "Query span trace_id for opentelemetry log: {}", query_span->trace_id); auto query_start_time = std::chrono::system_clock::now(); diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index e26d412b35e..fe9e3a18024 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -192,6 +192,9 @@ TEST_F(FileCacheTest, get) settings.base_path = cache_base_path; settings.max_size = 30; settings.max_elements = 5; + settings.boundary_alignment = 1; + + const size_t file_size = -1; // the value doesn't really matter because boundary_alignment == 1. { std::cerr << "Step 1\n"; @@ -200,7 +203,7 @@ TEST_F(FileCacheTest, get) auto key = cache.createKeyForPath("key1"); { - auto holder = cache.getOrSet(key, 0, 10, {}); /// Add range [0, 9] + auto holder = cache.getOrSet(key, 0, 10, file_size, {}); /// Add range [0, 9] assertEqual(holder, { Range(0, 9) }, { State::EMPTY }); download(holder->front()); assertEqual(holder, { Range(0, 9) }, { State::DOWNLOADED }); @@ -219,7 +222,7 @@ TEST_F(FileCacheTest, get) { /// Want range [5, 14], but [0, 9] already in cache, so only [10, 14] will be put in cache. - auto holder = cache.getOrSet(key, 5, 10, {}); + auto holder = cache.getOrSet(key, 5, 10, file_size, {}); assertEqual(holder, { Range(0, 9), Range(10, 14) }, { State::DOWNLOADED, State::EMPTY }); download(get(holder, 1)); assertEqual(holder, { Range(0, 9), Range(10, 14) }, { State::DOWNLOADED, State::DOWNLOADED }); @@ -238,20 +241,18 @@ TEST_F(FileCacheTest, get) /// Get [9, 9] { - auto holder = cache.getOrSet(key, 9, 1, {}); + auto holder = cache.getOrSet(key, 9, 1, file_size, {}); assertEqual(holder, { Range(0, 9) }, { State::DOWNLOADED }); increasePriority(holder); } assertEqual(cache.dumpQueue(), { Range(10, 14), Range(0, 9) }); /// Get [9, 10] - assertEqual(cache.getOrSet(key, 9, 2, {}), - { Range(0, 9), Range(10, 14) }, - { State::DOWNLOADED, State::DOWNLOADED }); + assertEqual(cache.getOrSet(key, 9, 2, file_size, {}), {Range(0, 9), Range(10, 14)}, {State::DOWNLOADED, State::DOWNLOADED}); /// Get [10, 10] { - auto holder = cache.getOrSet(key, 10, 1, {}); + auto holder = cache.getOrSet(key, 10, 1, file_size, {}); assertEqual(holder, { Range(10, 14) }, { State::DOWNLOADED }); increasePriority(holder); } @@ -264,19 +265,19 @@ TEST_F(FileCacheTest, get) std::cerr << "Step 4\n"; { - auto holder = cache.getOrSet(key, 17, 4, {}); + auto holder = cache.getOrSet(key, 17, 4, file_size, {}); download(holder); /// Get [17, 20] increasePriority(holder); } { - auto holder = cache.getOrSet(key, 24, 3, {}); + auto holder = cache.getOrSet(key, 24, 3, file_size, {}); download(holder); /// Get [24, 26] increasePriority(holder); } { - auto holder = cache.getOrSet(key, 27, 1, {}); + auto holder = cache.getOrSet(key, 27, 1, file_size, {}); download(holder); /// Get [27, 27] increasePriority(holder); } @@ -292,7 +293,7 @@ TEST_F(FileCacheTest, get) std::cerr << "Step 5\n"; { - auto holder = cache.getOrSet(key, 0, 26, {}); /// Get [0, 25] + auto holder = cache.getOrSet(key, 0, 26, file_size, {}); /// Get [0, 25] assertEqual(holder, { Range(0, 9), Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 23), Range(24, 26) }, { State::DOWNLOADED, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED }); @@ -309,12 +310,12 @@ TEST_F(FileCacheTest, get) /// Let's not invalidate pointers to returned segments from range [0, 25] and /// as max elements size is reached, next attempt to put something in cache should fail. /// This will also check that [27, 27] was indeed evicted. - auto holder2 = cache.getOrSet(key, 27, 1, {}); + auto holder2 = cache.getOrSet(key, 27, 1, file_size, {}); assertEqual(holder2, { Range(27, 27) }, { State::EMPTY }); assertDownloadFails(holder2->front()); assertEqual(holder2, { Range(27, 27) }, { State::DETACHED }); - auto holder3 = cache.getOrSet(key, 28, 3, {}); + auto holder3 = cache.getOrSet(key, 28, 3, file_size, {}); assertEqual(holder3, { Range(28, 30) }, { State::EMPTY }); assertDownloadFails(holder3->front()); assertEqual(holder3, { Range(28, 30) }, { State::DETACHED }); @@ -336,7 +337,7 @@ TEST_F(FileCacheTest, get) std::cerr << "Step 6\n"; { - auto holder = cache.getOrSet(key, 12, 10, {}); /// Get [12, 21] + auto holder = cache.getOrSet(key, 12, 10, file_size, {}); /// Get [12, 21] assertEqual(holder, { Range(10, 14), Range(15, 16), Range(17, 20), Range(21, 21) }, { State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY }); @@ -357,7 +358,7 @@ TEST_F(FileCacheTest, get) std::cerr << "Step 7\n"; { - auto holder = cache.getOrSet(key, 23, 5, {}); /// Get [23, 27] + auto holder = cache.getOrSet(key, 23, 5, file_size, {}); /// Get [23, 27] assertEqual(holder, { Range(23, 23), Range(24, 26), Range(27, 27) }, { State::EMPTY, State::DOWNLOADED, State::EMPTY }); @@ -376,25 +377,25 @@ TEST_F(FileCacheTest, get) std::cerr << "Step 8\n"; { - auto holder = cache.getOrSet(key, 2, 3, {}); /// Get [2, 4] + auto holder = cache.getOrSet(key, 2, 3, file_size, {}); /// Get [2, 4] assertEqual(holder, { Range(2, 4) }, { State::EMPTY }); - auto holder2 = cache.getOrSet(key, 30, 2, {}); /// Get [30, 31] + auto holder2 = cache.getOrSet(key, 30, 2, file_size, {}); /// Get [30, 31] assertEqual(holder2, { Range(30, 31) }, { State::EMPTY }); download(get(holder, 0)); download(get(holder2, 0)); - auto holder3 = cache.getOrSet(key, 23, 1, {}); /// Get [23, 23] + auto holder3 = cache.getOrSet(key, 23, 1, file_size, {}); /// Get [23, 23] assertEqual(holder3, { Range(23, 23) }, { State::DOWNLOADED }); - auto holder4 = cache.getOrSet(key, 24, 3, {}); /// Get [24, 26] + auto holder4 = cache.getOrSet(key, 24, 3, file_size, {}); /// Get [24, 26] assertEqual(holder4, { Range(24, 26) }, { State::DOWNLOADED }); - auto holder5 = cache.getOrSet(key, 27, 1, {}); /// Get [27, 27] + auto holder5 = cache.getOrSet(key, 27, 1, file_size, {}); /// Get [27, 27] assertEqual(holder5, { Range(27, 27) }, { State::DOWNLOADED }); - auto holder6 = cache.getOrSet(key, 0, 40, {}); + auto holder6 = cache.getOrSet(key, 0, 40, file_size, {}); assertEqual(holder6, { Range(0, 1), Range(2, 4), Range(5, 22), Range(23, 23), Range(24, 26), Range(27, 27), Range(28, 29), Range(30, 31), Range(32, 39) }, { State::EMPTY, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::EMPTY, State::DOWNLOADED, State::EMPTY }); @@ -422,14 +423,14 @@ TEST_F(FileCacheTest, get) /// Get [2, 4] { - auto holder = cache.getOrSet(key, 2, 3, {}); + auto holder = cache.getOrSet(key, 2, 3, file_size, {}); assertEqual(holder, { Range(2, 4) }, { State::DOWNLOADED }); increasePriority(holder); } { - auto holder = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29] + auto holder = cache.getOrSet(key, 25, 5, file_size, {}); /// Get [25, 29] assertEqual(holder, { Range(24, 26), Range(27, 27), Range(28, 29) }, { State::DOWNLOADED, State::DOWNLOADED, State::EMPTY }); @@ -451,7 +452,7 @@ TEST_F(FileCacheTest, get) chassert(&DB::CurrentThread::get() == &thread_status_1); DB::CurrentThread::QueryScope query_scope_holder_1(query_context_1); - auto holder2 = cache.getOrSet(key, 25, 5, {}); /// Get [25, 29] once again. + auto holder2 = cache.getOrSet(key, 25, 5, file_size, {}); /// Get [25, 29] once again. assertEqual(holder2, { Range(24, 26), Range(27, 27), Range(28, 29) }, { State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADING }); @@ -494,7 +495,7 @@ TEST_F(FileCacheTest, get) /// state is changed not manually via segment->completeWithState(state) but from destructor of holder /// and notify_all() is also called from destructor of holder. - auto holder = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25] + auto holder = cache.getOrSet(key, 3, 23, file_size, {}); /// Get [3, 25] assertEqual(holder, { Range(2, 4), Range(5, 23), Range(24, 26) }, { State::DOWNLOADED, State::EMPTY, State::DOWNLOADED }); @@ -516,7 +517,7 @@ TEST_F(FileCacheTest, get) chassert(&DB::CurrentThread::get() == &thread_status_1); DB::CurrentThread::QueryScope query_scope_holder_1(query_context_1); - auto holder2 = cache.getOrSet(key, 3, 23, {}); /// Get [3, 25] once again + auto holder2 = cache.getOrSet(key, 3, 23, file_size, {}); /// Get [3, 25] once again assertEqual(holder, { Range(2, 4), Range(5, 23), Range(24, 26) }, { State::DOWNLOADED, State::DOWNLOADING, State::DOWNLOADED }); @@ -560,9 +561,10 @@ TEST_F(FileCacheTest, get) auto key = cache2.createKeyForPath("key1"); /// Get [2, 29] - assertEqual(cache2.getOrSet(key, 2, 28, {}), - { Range(2, 4), Range(5, 23), Range(24, 26), Range(27, 27), Range(28, 29) }, - { State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED }); + assertEqual( + cache2.getOrSet(key, 2, 28, file_size, {}), + {Range(2, 4), Range(5, 23), Range(24, 26), Range(27, 27), Range(28, 29)}, + {State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED, State::DOWNLOADED}); } std::cerr << "Step 12\n"; @@ -578,9 +580,10 @@ TEST_F(FileCacheTest, get) auto key = cache2.createKeyForPath("key1"); /// Get [0, 24] - assertEqual(cache2.getOrSet(key, 0, 25, {}), - { Range(0, 9), Range(10, 19), Range(20, 24) }, - { State::EMPTY, State::EMPTY, State::EMPTY }); + assertEqual( + cache2.getOrSet(key, 0, 25, file_size, {}), + {Range(0, 9), Range(10, 19), Range(20, 24)}, + {State::EMPTY, State::EMPTY, State::EMPTY}); } std::cerr << "Step 13\n"; @@ -598,7 +601,7 @@ TEST_F(FileCacheTest, get) ASSERT_TRUE(!fs::exists(key_path)); ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path())); - download(cache.getOrSet(key, 0, 10, {})); + download(cache.getOrSet(key, 0, 10, file_size, {})); ASSERT_EQ(cache.getUsedCacheSize(), 10); ASSERT_TRUE(fs::exists(cache.getPathInLocalCache(key, 0, FileSegmentKind::Regular))); @@ -628,7 +631,7 @@ TEST_F(FileCacheTest, get) ASSERT_TRUE(!fs::exists(key_path)); ASSERT_TRUE(!fs::exists(fs::path(key_path).parent_path())); - download(cache.getOrSet(key, 0, 10, {})); + download(cache.getOrSet(key, 0, 10, file_size, {})); ASSERT_EQ(cache.getUsedCacheSize(), 10); ASSERT_TRUE(fs::exists(key_path)); @@ -756,7 +759,7 @@ TEST_F(FileCacheTest, temporaryData) auto tmp_data_scope = std::make_shared(nullptr, &file_cache, 0); - auto some_data_holder = file_cache.getOrSet(file_cache.createKeyForPath("some_data"), 0, 5_KiB, CreateFileSegmentSettings{}); + auto some_data_holder = file_cache.getOrSet(file_cache.createKeyForPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{}); { ASSERT_EQ(some_data_holder->size(), 5); diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h index 55c6a848b77..f7324bfafe6 100644 --- a/src/Interpreters/threadPoolCallbackRunner.h +++ b/src/Interpreters/threadPoolCallbackRunner.h @@ -13,7 +13,7 @@ namespace DB template > using ThreadPoolCallbackRunner = std::function(Callback &&, Priority)>; -/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()'. +/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrowOnError()'. template > ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name) { @@ -44,7 +44,7 @@ ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & auto future = task->get_future(); - my_pool->scheduleOrThrow([my_task = std::move(task)]{ (*my_task)(); }, priority); + my_pool->scheduleOrThrowOnError([my_task = std::move(task)]{ (*my_task)(); }, priority); return future; }; diff --git a/src/Parsers/TokenIterator.cpp b/src/Parsers/TokenIterator.cpp index 6633ddb9563..fa792e7c8b5 100644 --- a/src/Parsers/TokenIterator.cpp +++ b/src/Parsers/TokenIterator.cpp @@ -4,7 +4,7 @@ namespace DB { -Tokens::Tokens(const char * begin, const char * end, size_t max_query_size) +Tokens::Tokens(const char * begin, const char * end, size_t max_query_size, bool skip_insignificant) { Lexer lexer(begin, end, max_query_size); @@ -13,7 +13,7 @@ Tokens::Tokens(const char * begin, const char * end, size_t max_query_size) { Token token = lexer.nextToken(); stop = token.isEnd() || token.type == TokenType::ErrorMaxQuerySizeExceeded; - if (token.isSignificant()) + if (token.isSignificant() || (!skip_insignificant && !data.empty() && data.back().isSignificant())) data.emplace_back(std::move(token)); } while (!stop); } diff --git a/src/Parsers/TokenIterator.h b/src/Parsers/TokenIterator.h index c9ac61dfef9..192f2f55e6a 100644 --- a/src/Parsers/TokenIterator.h +++ b/src/Parsers/TokenIterator.h @@ -24,7 +24,7 @@ private: std::size_t last_accessed_index = 0; public: - Tokens(const char * begin, const char * end, size_t max_query_size = 0); + Tokens(const char * begin, const char * end, size_t max_query_size = 0, bool skip_insignificant = true); ALWAYS_INLINE inline const Token & operator[](size_t index) { diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 8d794409f78..dd9a6023b0b 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -233,10 +233,11 @@ ASTPtr tryParseQuery( const std::string & query_description, bool allow_multi_statements, size_t max_query_size, - size_t max_parser_depth) + size_t max_parser_depth, + bool skip_insignificant) { const char * query_begin = _out_query_end; - Tokens tokens(query_begin, all_queries_end, max_query_size); + Tokens tokens(query_begin, all_queries_end, max_query_size, skip_insignificant); /// NOTE: consider use UInt32 for max_parser_depth setting. IParser::Pos token_iterator(tokens, static_cast(max_parser_depth)); diff --git a/src/Parsers/parseQuery.h b/src/Parsers/parseQuery.h index cc077bbdab2..a087f145d2c 100644 --- a/src/Parsers/parseQuery.h +++ b/src/Parsers/parseQuery.h @@ -18,7 +18,8 @@ ASTPtr tryParseQuery( bool allow_multi_statements, /// If false, check for non-space characters after semicolon and set error message if any. size_t max_query_size, /// If (end - pos) > max_query_size and query is longer than max_query_size then throws "Max query size exceeded". /// Disabled if zero. Is used in order to check query size if buffer can contains data for INSERT query. - size_t max_parser_depth); + size_t max_parser_depth, + bool skip_insignificant = true); /// If true, lexer will skip all insignificant tokens (e.g. whitespaces) /// Parse query or throw an exception with error message. diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp index 54f3b76ff60..37505f94e98 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp @@ -146,45 +146,19 @@ arrow::Status ArrowInputStreamFromReadBuffer::Close() return arrow::Status(); } -RandomAccessFileFromManyReadBuffers::RandomAccessFileFromManyReadBuffers(SeekableReadBufferFactory & factory) : buf_factory(factory) {} +RandomAccessFileFromRandomAccessReadBuffer::RandomAccessFileFromRandomAccessReadBuffer(SeekableReadBuffer & in_, size_t file_size_) : in(in_), file_size(file_size_) {} -arrow::Result RandomAccessFileFromManyReadBuffers::GetSize() +arrow::Result RandomAccessFileFromRandomAccessReadBuffer::GetSize() { - return buf_factory.getFileSize(); + return file_size; } -arrow::Result RandomAccessFileFromManyReadBuffers::ReadAt(int64_t position, int64_t nbytes, void* out) +arrow::Result RandomAccessFileFromRandomAccessReadBuffer::ReadAt(int64_t position, int64_t nbytes, void* out) { - std::unique_lock lock(mutex); - if (free_bufs.empty()) - free_bufs.push_back(buf_factory.getReader()); - auto buf = std::move(free_bufs.back()); - free_bufs.pop_back(); - lock.unlock(); - - // To work well with this, ReadBuffer implementations need to respect setReadUntilPosition() and - // not read above it. We often do very small reads here. - // Also nice if they: - // * Make readBig() read directly into the provided memory, instead of copying from internal - // buffer. - // * Allocate the internal buffer (if any) lazily in first nextImpl() call. If all reads are - // tiny readBig() calls (as is typical here), it won't allocate an unnecessary 1 MB buffer. - - buf->seek(position, SEEK_SET); - buf->setReadUntilPosition(position + nbytes); - size_t bytes_read = buf->readBig(reinterpret_cast(out), nbytes); - - // Seeking to a position above a previous setReadUntilPosition() confuses some of the - // ReadBuffer implementations. So we reset it before next seek. - buf->setReadUntilEnd(); - - lock.lock(); - free_bufs.push_back(std::move(buf)); - - return static_cast(bytes_read); + return in.readBigAt(reinterpret_cast(out), nbytes, position); } -arrow::Result> RandomAccessFileFromManyReadBuffers::ReadAt(int64_t position, int64_t nbytes) +arrow::Result> RandomAccessFileFromRandomAccessReadBuffer::ReadAt(int64_t position, int64_t nbytes) { ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes)) ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, ReadAt(position, nbytes, buffer->mutable_data())) @@ -195,22 +169,23 @@ arrow::Result> RandomAccessFileFromManyReadBuffer return buffer; } -arrow::Future> RandomAccessFileFromManyReadBuffers::ReadAsync(const arrow::io::IOContext&, int64_t position, int64_t nbytes) +arrow::Future> RandomAccessFileFromRandomAccessReadBuffer::ReadAsync(const arrow::io::IOContext&, int64_t position, int64_t nbytes) { return arrow::Future>::MakeFinished(ReadAt(position, nbytes)); } -arrow::Status RandomAccessFileFromManyReadBuffers::Close() +arrow::Status RandomAccessFileFromRandomAccessReadBuffer::Close() { chassert(is_open); is_open = false; return arrow::Status::OK(); } -arrow::Status RandomAccessFileFromManyReadBuffers::Seek(int64_t) { return arrow::Status::NotImplemented(""); } -arrow::Result RandomAccessFileFromManyReadBuffers::Tell() const { return arrow::Status::NotImplemented(""); } -arrow::Result RandomAccessFileFromManyReadBuffers::Read(int64_t, void*) { return arrow::Status::NotImplemented(""); } -arrow::Result> RandomAccessFileFromManyReadBuffers::Read(int64_t) { return arrow::Status::NotImplemented(""); } +arrow::Status RandomAccessFileFromRandomAccessReadBuffer::Seek(int64_t) { return arrow::Status::NotImplemented(""); } +arrow::Result RandomAccessFileFromRandomAccessReadBuffer::Tell() const { return arrow::Status::NotImplemented(""); } +arrow::Result RandomAccessFileFromRandomAccessReadBuffer::Read(int64_t, void*) { return arrow::Status::NotImplemented(""); } +arrow::Result> RandomAccessFileFromRandomAccessReadBuffer::Read(int64_t) { return arrow::Status::NotImplemented(""); } + std::shared_ptr asArrowFile( ReadBuffer & in, @@ -220,19 +195,16 @@ std::shared_ptr asArrowFile( const std::string & magic_bytes, bool avoid_buffering) { - if (auto * fd_in = dynamic_cast(&in)) + bool has_file_size = isBufferWithFileSize(in); + auto * seekable_in = dynamic_cast(&in); + + if (has_file_size && seekable_in && settings.seekable_read) { - struct stat stat; - auto res = ::fstat(fd_in->getFD(), &stat); - // if fd is a regular file i.e. not stdin - if (res == 0 && S_ISREG(stat.st_mode)) - return std::make_shared(*fd_in, stat.st_size, avoid_buffering); - } - else if (auto * seekable_in = dynamic_cast(&in); - seekable_in && settings.seekable_read && isBufferWithFileSize(in) && - seekable_in->checkIfActuallySeekable()) - { - return std::make_shared(in, std::nullopt, avoid_buffering); + if (avoid_buffering && seekable_in->supportsReadAt()) + return std::make_shared(*seekable_in, getFileSizeFromReadBuffer(in)); + + if (seekable_in->checkIfActuallySeekable()) + return std::make_shared(*seekable_in, std::nullopt, avoid_buffering); } // fallback to loading the entire file in memory @@ -245,26 +217,16 @@ std::shared_ptr asArrowFileLoadIntoMemory( const std::string & format_name, const std::string & magic_bytes) { - std::string file_data; - { - PeekableReadBuffer buf(in); - std::string magic_bytes_from_data; - magic_bytes_from_data.resize(magic_bytes.size()); - bool read_magic_bytes = false; - try - { - PeekableReadBufferCheckpoint checkpoint(buf, true); - buf.readStrict(magic_bytes_from_data.data(), magic_bytes_from_data.size()); - read_magic_bytes = true; - } - catch (const Exception &) {} + std::string file_data(magic_bytes.size(), '\0'); - if (!read_magic_bytes || magic_bytes_from_data != magic_bytes) - throw Exception(ErrorCodes::INCORRECT_DATA, "Not a {} file", format_name); + /// Avoid loading the whole file if it doesn't seem to even be in the correct format. + size_t bytes_read = in.read(file_data.data(), magic_bytes.size()); + if (bytes_read < magic_bytes.size() || file_data != magic_bytes) + throw Exception(ErrorCodes::INCORRECT_DATA, "Not a {} file", format_name); - WriteBufferFromString file_buffer(file_data); - copyData(buf, file_buffer, is_cancelled); - } + WriteBufferFromString file_buffer(file_data, AppendModeTag{}); + copyData(in, file_buffer, is_cancelled); + file_buffer.finalize(); return std::make_shared(arrow::Buffer::FromString(std::move(file_data))); } diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.h b/src/Processors/Formats/Impl/ArrowBufferedStreams.h index 9307172cb11..f455bcdfb1a 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.h +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.h @@ -18,7 +18,6 @@ class ReadBuffer; class WriteBuffer; class SeekableReadBuffer; -class SeekableReadBufferFactory; struct FormatSettings; class ArrowBufferedOutputStream : public arrow::io::OutputStream @@ -78,19 +77,17 @@ private: ARROW_DISALLOW_COPY_AND_ASSIGN(RandomAccessFileFromSeekableReadBuffer); }; -// Thread-safe. -// Maintains a pool of SeekableReadBuffer-s. For each ReadAt(), takes a buffer, seeks it, and reads. -class RandomAccessFileFromManyReadBuffers : public arrow::io::RandomAccessFile +class RandomAccessFileFromRandomAccessReadBuffer : public arrow::io::RandomAccessFile { public: - explicit RandomAccessFileFromManyReadBuffers(SeekableReadBufferFactory & factory); + explicit RandomAccessFileFromRandomAccessReadBuffer(SeekableReadBuffer & in_, size_t file_size_); // These are thread safe. arrow::Result GetSize() override; arrow::Result ReadAt(int64_t position, int64_t nbytes, void* out) override; arrow::Result> ReadAt(int64_t position, int64_t nbytes) override; - arrow::Future> ReadAsync(const arrow::io::IOContext&, int64_t position, - int64_t nbytes) override; + arrow::Future> ReadAsync( + const arrow::io::IOContext&, int64_t position, int64_t nbytes) override; // These are not thread safe, and arrow shouldn't call them. Return NotImplemented error. arrow::Status Seek(int64_t) override; @@ -102,13 +99,11 @@ public: bool closed() const override { return !is_open; } private: - SeekableReadBufferFactory & buf_factory; + SeekableReadBuffer & in; + size_t file_size; bool is_open = true; - std::mutex mutex; - std::vector> free_bufs; - - ARROW_DISALLOW_COPY_AND_ASSIGN(RandomAccessFileFromManyReadBuffers); + ARROW_DISALLOW_COPY_AND_ASSIGN(RandomAccessFileFromRandomAccessReadBuffer); }; class ArrowInputStreamFromReadBuffer : public arrow::io::InputStream diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 7f90c1197ce..2f3c68aa481 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -43,14 +43,12 @@ namespace ErrorCodes } while (false) ParquetBlockInputFormat::ParquetBlockInputFormat( - ReadBuffer * buf, - SeekableReadBufferFactoryPtr buf_factory_, + ReadBuffer & buf, const Block & header_, const FormatSettings & format_settings_, size_t max_decoding_threads_, size_t min_bytes_for_seek_) - : IInputFormat(header_, buf) - , buf_factory(std::move(buf_factory_)) + : IInputFormat(header_, &buf) , format_settings(format_settings_) , skip_row_groups(format_settings.parquet.skip_row_groups) , max_decoding_threads(max_decoding_threads_) @@ -71,17 +69,7 @@ void ParquetBlockInputFormat::initializeIfNeeded() // Create arrow file adapter. // TODO: Make the adapter do prefetching on IO threads, based on the full set of ranges that // we'll need to read (which we know in advance). Use max_download_threads for that. - if (buf_factory) - { - if (format_settings.seekable_read && buf_factory->checkIfActuallySeekable()) - arrow_file = std::make_shared(*buf_factory); - else - arrow_file = asArrowFileLoadIntoMemory(*buf_factory->getReader(), is_stopped, "Parquet", PARQUET_MAGIC_BYTES); - } - else - { - arrow_file = asArrowFile(*in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES, /* avoid_buffering */ true); - } + arrow_file = asArrowFile(*in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES, /* avoid_buffering */ true); if (is_stopped) return; @@ -388,7 +376,7 @@ ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings NamesAndTypesList ParquetSchemaReader::readSchema() { std::atomic is_stopped{0}; - auto file = asArrowFile(in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES); + auto file = asArrowFile(in, format_settings, is_stopped, "Parquet", PARQUET_MAGIC_BYTES, /* avoid_buffering */ true); auto metadata = parquet::ReadMetaData(file); @@ -406,8 +394,7 @@ void registerInputFormatParquet(FormatFactory & factory) { factory.registerRandomAccessInputFormat( "Parquet", - [](ReadBuffer * buf, - SeekableReadBufferFactoryPtr buf_factory, + [](ReadBuffer & buf, const Block & sample, const FormatSettings & settings, const ReadSettings& read_settings, @@ -418,7 +405,6 @@ void registerInputFormatParquet(FormatFactory & factory) size_t min_bytes_for_seek = is_remote_fs ? read_settings.remote_read_min_bytes_for_seek : 8 * 1024; return std::make_shared( buf, - std::move(buf_factory), sample, settings, max_parsing_threads, diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index f17eee59414..ad7074547fc 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -15,7 +15,6 @@ namespace DB { class ArrowColumnToCHColumn; -class SeekableReadBufferFactory; // Parquet files contain a metadata block with the following information: // * list of columns, @@ -48,9 +47,7 @@ class ParquetBlockInputFormat : public IInputFormat { public: ParquetBlockInputFormat( - // exactly one of these two is nullptr - ReadBuffer * buf, - std::unique_ptr buf_factory, + ReadBuffer & buf, const Block & header, const FormatSettings & format_settings, size_t max_decoding_threads, @@ -234,7 +231,6 @@ private: }; }; - std::unique_ptr buf_factory; const FormatSettings format_settings; const std::unordered_set & skip_row_groups; size_t max_decoding_threads; diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index db29038999b..37bc894339f 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -272,7 +272,7 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes { /// If totals step has HAVING expression, skip it for now. /// TODO: - /// We can merge HAVING expression with current filter. + /// We can merge HAVING expression with current filer. /// Also, we can push down part of HAVING which depend only on aggregation keys. if (totals_having->getActions()) return 0; @@ -323,9 +323,9 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes { const auto & table_join = join ? join->getJoin()->getTableJoin() : filled_join->getJoin()->getTableJoin(); - /// Only inner, cross and left(/right) join are supported. Other types may generate default values for left table keys. + /// Only inner and left(/right) join are supported. Other types may generate default values for left table keys. /// So, if we push down a condition like `key != 0`, not all rows may be filtered. - if (table_join.kind() != JoinKind::Inner && table_join.kind() != JoinKind::Cross && table_join.kind() != kind) + if (table_join.kind() != JoinKind::Inner && table_join.kind() != kind) return 0; bool is_left = kind == JoinKind::Left; diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index b8ef2152a99..239a534ca93 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1334,6 +1334,10 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctions( arguments.push_back(const_arg); kind = FunctionWithOptionalConstArg::Kind::RIGHT_CONST; } + + /// If constant arg of binary operator is NULL, there will be no monotonicity. + if (const_arg.column->isNullAt(0)) + return false; } else arguments.push_back({ nullptr, key_column_type, "" }); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 338a221e45e..32665429051 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4854,6 +4854,9 @@ void MergeTreeData::checkAlterPartitionIsPossible( void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition, ContextPtr local_context) { + if (!supportsReplication() && isStaticStorage()) + return; + DataPartsVector parts_to_remove; const auto * partition_ast = partition->as(); if (partition_ast && partition_ast->all) @@ -4874,6 +4877,9 @@ void MergeTreeData::checkPartitionCanBeDropped(const ASTPtr & partition, Context void MergeTreeData::checkPartCanBeDropped(const String & part_name) { + if (!supportsReplication() && isStaticStorage()) + return; + auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active}); if (!part) throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No part {} in committed state", part_name); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index fce7d989a2f..1c41de6fa19 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -865,7 +865,7 @@ public: DiskPtr tryGetDiskForDetachedPart(const String & part_name) const; DiskPtr getDiskForDetachedPart(const String & part_name) const; - bool storesDataOnDisk() const override { return true; } + bool storesDataOnDisk() const override { return !isStaticStorage(); } Strings getDataPaths() const override; /// Reserves space at least 1MB. diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 2b16ea43179..16b27c2c820 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1729,7 +1729,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( std::max(ranges[i].begin, index_mark * index_granularity), std::min(ranges[i].end, (index_mark + 1) * index_granularity)); - if (res.empty() || res.back().end - data_range.begin > min_marks_for_seek) + if (res.empty() || data_range.begin - res.back().end > min_marks_for_seek) res.push_back(data_range); else res.back().end = data_range.end; @@ -1829,7 +1829,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingMergedIndex( std::max(range.begin, index_mark * index_granularity), std::min(range.end, (index_mark + 1) * index_granularity)); - if (res.empty() || res.back().end - data_range.begin > min_marks_for_seek) + if (res.empty() || data_range.begin - res.back().end > min_marks_for_seek) res.push_back(data_range); else res.back().end = data_range.end; diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 0ffc91aca57..ba8c2c6385f 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -1,11 +1,11 @@ -#include -#include #include +#include +#include +#include #include #include #include #include -#include namespace ProfileEvents @@ -22,6 +22,14 @@ namespace ErrorCodes namespace DB { +size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & columns_to_read) +{ + ColumnSize columns_size{}; + for (const auto & col_name : columns_to_read) + columns_size.add(part.getColumnSize(col_name)); + return columns_size.data_compressed; +} + MergeTreeReadPool::MergeTreeReadPool( size_t threads_, size_t sum_marks_, @@ -45,16 +53,43 @@ MergeTreeReadPool::MergeTreeReadPool( , parts_ranges(std::move(parts_)) , predict_block_size_bytes(context_->getSettingsRef().preferred_block_size_bytes > 0) , do_not_steal_tasks(do_not_steal_tasks_) + , merge_tree_use_const_size_tasks_for_remote_reading(context_->getSettingsRef().merge_tree_use_const_size_tasks_for_remote_reading) , backoff_settings{context_->getSettingsRef()} , backoff_state{threads_} { /// parts don't contain duplicate MergeTreeDataPart's. const auto per_part_sum_marks = fillPerPartInfo( parts_ranges, storage_snapshot, is_part_on_remote_disk, - do_not_steal_tasks, predict_block_size_bytes, + predict_block_size_bytes, column_names, virtual_column_names, prewhere_info, actions_settings, reader_settings, per_part_params); + if (std::ranges::count(is_part_on_remote_disk, true)) + { + const auto & settings = context_->getSettingsRef(); + + size_t total_compressed_bytes = 0; + size_t total_marks = 0; + for (const auto & part : parts_ranges) + { + total_compressed_bytes += getApproxSizeOfPart( + *part.data_part, prewhere_info ? prewhere_info->prewhere_actions->getRequiredColumnsNames() : column_names_); + total_marks += part.getMarksCount(); + } + + if (total_marks) + { + const auto min_bytes_per_task = settings.merge_tree_min_bytes_per_task_for_remote_reading; + const auto avg_mark_bytes = std::max(total_compressed_bytes / total_marks, 1); + /// We're taking min here because number of tasks shouldn't be too low - it will make task stealing impossible. + const auto heuristic_min_marks = std::min(total_marks / threads_, min_bytes_per_task / avg_mark_bytes); + if (heuristic_min_marks > min_marks_for_concurrent_read) + { + min_marks_for_concurrent_read = heuristic_min_marks; + } + } + } + fillPerThreadInfo(threads_, sum_marks_, per_part_sum_marks, parts_ranges); } @@ -62,7 +97,6 @@ std::vector MergeTreeReadPool::fillPerPartInfo( const RangesInDataParts & parts, const StorageSnapshotPtr & storage_snapshot, std::vector & is_part_on_remote_disk, - bool & do_not_steal_tasks, bool & predict_block_size_bytes, const Names & column_names, const Names & virtual_column_names, @@ -84,7 +118,6 @@ std::vector MergeTreeReadPool::fillPerPartInfo( bool part_on_remote_disk = part.data_part->isStoredOnRemoteDisk(); is_part_on_remote_disk[i] = part_on_remote_disk; - do_not_steal_tasks |= part_on_remote_disk; /// Read marks for every data part. size_t sum_marks = 0; @@ -160,14 +193,13 @@ MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t thread) auto & marks_in_part = thread_tasks.sum_marks_in_parts.back(); size_t need_marks; - if (is_part_on_remote_disk[part_idx]) /// For better performance with remote disks + if (is_part_on_remote_disk[part_idx] && !merge_tree_use_const_size_tasks_for_remote_reading) need_marks = marks_in_part; else /// Get whole part to read if it is small enough. need_marks = std::min(marks_in_part, min_marks_for_concurrent_read); /// Do not leave too little rows in part for next time. - if (marks_in_part > need_marks && - marks_in_part - need_marks < min_marks_for_concurrent_read) + if (marks_in_part > need_marks && marks_in_part - need_marks < min_marks_for_concurrent_read / 2) need_marks = marks_in_part; MarkRanges ranges_to_get_from_part; @@ -300,6 +332,8 @@ void MergeTreeReadPool::fillPerThreadInfo( parts_queue.push(std::move(info.second)); } + LOG_DEBUG(log, "min_marks_for_concurrent_read={}", min_marks_for_concurrent_read); + const size_t min_marks_per_thread = (sum_marks - 1) / threads + 1; for (size_t i = 0; i < threads && !parts_queue.empty(); ++i) diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index e621a02a9d6..21273904e00 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -94,7 +94,6 @@ public: const RangesInDataParts & parts, const StorageSnapshotPtr & storage_snapshot, std::vector & is_part_on_remote_disk, - bool & do_not_steal_tasks, bool & predict_block_size_bytes, const Names & column_names, const Names & virtual_column_names, @@ -119,6 +118,7 @@ private: RangesInDataParts parts_ranges; bool predict_block_size_bytes; bool do_not_steal_tasks; + bool merge_tree_use_const_size_tasks_for_remote_reading = false; std::vector per_part_params; std::vector is_part_on_remote_disk; @@ -189,7 +189,7 @@ public: , parts_ranges(std::move(parts_)) { MergeTreeReadPool::fillPerPartInfo( - parts_ranges, storage_snapshot, is_part_on_remote_disk, do_not_steal_tasks, + parts_ranges, storage_snapshot, is_part_on_remote_disk, predict_block_size_bytes, column_names, virtual_column_names, prewhere_info, actions_settings, reader_settings, per_part_params); @@ -226,7 +226,6 @@ private: const Names virtual_column_names; RangesInDataParts parts_ranges; - bool do_not_steal_tasks = false; bool predict_block_size_bytes = false; std::vector is_part_on_remote_disk; std::vector per_part_params; diff --git a/src/Storages/MergeTree/MergeTreeSource.cpp b/src/Storages/MergeTree/MergeTreeSource.cpp index 53695639769..b65f044a13b 100644 --- a/src/Storages/MergeTree/MergeTreeSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSource.cpp @@ -207,6 +207,7 @@ std::optional MergeTreeSource::tryGenerate() try { + OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; holder->setResult(algorithm->read()); } catch (...) @@ -221,6 +222,7 @@ std::optional MergeTreeSource::tryGenerate() } #endif + OpenTelemetry::SpanHolder span{"MergeTreeSource::tryGenerate()"}; return processReadResult(algorithm->read()); } diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 567c674ded9..e48d3187cb2 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -470,7 +470,7 @@ class GenerateSource : public ISource { public: GenerateSource(UInt64 block_size_, UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_, Block block_header_, ContextPtr context_) - : ISource(Nested::flatten(prepareBlockToFill(block_header_))) + : ISource(Nested::flattenArrayOfTuples(prepareBlockToFill(block_header_))) , block_size(block_size_), max_array_length(max_array_length_), max_string_length(max_string_length_) , block_to_fill(std::move(block_header_)), rng(random_seed_), context(context_) {} @@ -485,7 +485,7 @@ protected: for (const auto & elem : block_to_fill) columns.emplace_back(fillColumnWithRandomData(elem.type, block_size, max_array_length, max_string_length, rng, context)); - columns = Nested::flatten(block_to_fill.cloneWithColumns(columns)).getColumns(); + columns = Nested::flattenArrayOfTuples(block_to_fill.cloneWithColumns(columns)).getColumns(); return {std::move(columns), block_size}; } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index a721dd30cd7..3da4724471d 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -114,7 +114,7 @@ StorageMergeTree::StorageMergeTree( loadDataParts(has_force_restore_data_flag); - if (!attach && !getDataPartsForInternalUsage().empty()) + if (!attach && !getDataPartsForInternalUsage().empty() && !isStaticStorage()) throw Exception(ErrorCodes::INCORRECT_DATA, "Data directory for table already containing data parts - probably " "it was unclean DROP table or manual intervention. " @@ -283,6 +283,9 @@ StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & met void StorageMergeTree::checkTableCanBeDropped() const { + if (!supportsReplication() && isStaticStorage()) + return; + auto table_id = getStorageID(); getContext()->checkTableCanBeDropped(table_id.database_name, table_id.table_name, getTotalActiveSizeInBytes()); } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 2233feb7ee2..2d8aaec0f07 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -418,6 +418,7 @@ public: ASTPtr query_, const Block & virtual_header_, ContextPtr context_, + bool need_total_size, KeysWithInfo * read_keys_) : WithContext(context_) , bucket(bucket_) @@ -458,8 +459,13 @@ public: for (auto && key : all_keys) { - auto info = S3::getObjectInfo(client_, bucket, key, version_id_, request_settings_); - total_size += info.size; + std::optional info; + if (need_total_size) + { + info = S3::getObjectInfo(client_, bucket, key, version_id_, request_settings_); + total_size += info->size; + } + keys.emplace_back(std::move(key), std::move(info)); } @@ -501,10 +507,11 @@ StorageS3Source::KeysIterator::KeysIterator( ASTPtr query, const Block & virtual_header, ContextPtr context, + bool need_total_size, KeysWithInfo * read_keys) : pimpl(std::make_shared( client_, version_id_, keys_, bucket_, request_settings_, - query, virtual_header, context, read_keys)) + query, virtual_header, context, need_total_size, read_keys)) { } @@ -575,31 +582,11 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader() size_t object_size = info ? info->size : S3::getObjectSize(*client, bucket, current_key, version_id, request_settings); auto compression_method = chooseCompressionMethod(current_key, compression_hint); - InputFormatPtr input_format; - std::unique_ptr owned_read_buf; - - auto read_buf_or_factory = createS3ReadBuffer(current_key, object_size); - if (read_buf_or_factory.buf_factory) - { - input_format = FormatFactory::instance().getInputRandomAccess( - format, - std::move(read_buf_or_factory.buf_factory), - sample_block, - getContext(), - max_block_size, - /* is_remote_fs */ true, - compression_method, - format_settings); - } - else - { - owned_read_buf = wrapReadBufferWithCompressionMethod( - std::move(read_buf_or_factory.buf), - compression_method, - static_cast(getContext()->getSettingsRef().zstd_window_log_max)); - input_format = FormatFactory::instance().getInput( - format, *owned_read_buf, sample_block, getContext(), max_block_size, format_settings); - } + auto read_buf = createS3ReadBuffer(current_key, object_size); + auto input_format = FormatFactory::instance().getInput( + format, *read_buf, sample_block, getContext(), max_block_size, + format_settings, std::nullopt, std::nullopt, + /* is_remote_fs */ true, compression_method); QueryPipelineBuilder builder; builder.init(Pipe(input_format)); @@ -614,7 +601,7 @@ StorageS3Source::ReaderHolder StorageS3Source::createReader() auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); auto current_reader = std::make_unique(*pipeline); - return ReaderHolder{fs::path(bucket) / current_key, std::move(owned_read_buf), std::move(pipeline), std::move(current_reader)}; + return ReaderHolder{fs::path(bucket) / current_key, std::move(read_buf), std::move(pipeline), std::move(current_reader)}; } std::future StorageS3Source::createReaderAsync() @@ -622,7 +609,7 @@ std::future StorageS3Source::createReaderAsync() return create_reader_scheduler([this] { return createReader(); }, Priority{}); } -StorageS3Source::ReadBufferOrFactory StorageS3Source::createS3ReadBuffer(const String & key, size_t object_size) +std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & key, size_t object_size) { auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); read_settings.enable_filesystem_cache = false; @@ -635,12 +622,13 @@ StorageS3Source::ReadBufferOrFactory StorageS3Source::createS3ReadBuffer(const S if (object_too_small && read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { LOG_TRACE(log, "Downloading object of size {} from S3 with initial prefetch", object_size); - return {.buf = createAsyncS3ReadBuffer(key, read_settings, object_size)}; + return createAsyncS3ReadBuffer(key, read_settings, object_size); } - auto factory = std::make_unique( - client, bucket, key, version_id, object_size, request_settings, read_settings); - return {.buf_factory = std::move(factory)}; + return std::make_unique( + client, bucket, key, version_id, request_settings, read_settings, + /*use_external_buffer*/ false, /*offset_*/ 0, /*read_until_position_*/ 0, + /*restricted_seek_*/ false, object_size); } std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( @@ -980,6 +968,7 @@ std::shared_ptr StorageS3::createFileIterator( ContextPtr local_context, ASTPtr query, const Block & virtual_block, + bool need_total_size, KeysWithInfo * read_keys) { if (distributed_processing) @@ -998,7 +987,7 @@ std::shared_ptr StorageS3::createFileIterator( return std::make_shared( *configuration.client, configuration.url.version_id, configuration.keys, configuration.url.bucket, configuration.request_settings, query, - virtual_block, local_context, read_keys); + virtual_block, local_context, need_total_size, read_keys); } } @@ -1448,7 +1437,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( { KeysWithInfo read_keys; - auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, &read_keys); + auto file_iterator = createFileIterator(configuration, false, ctx, nullptr, {}, false, &read_keys); std::optional columns_from_cache; size_t prev_read_keys_size = read_keys.size(); diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index bca804a7f53..a4c120b99a6 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -94,6 +94,7 @@ public: ASTPtr query, const Block & virtual_header, ContextPtr context, + bool need_total_size = true, KeysWithInfo * read_keys = nullptr); KeyWithInfo next() override; @@ -203,12 +204,6 @@ private: std::unique_ptr reader; }; - struct ReadBufferOrFactory - { - std::unique_ptr buf; - SeekableReadBufferFactoryPtr buf_factory; - }; - ReaderHolder reader; std::vector requested_virtual_columns; @@ -229,7 +224,7 @@ private: ReaderHolder createReader(); std::future createReaderAsync(); - ReadBufferOrFactory createS3ReadBuffer(const String & key, size_t object_size); + std::unique_ptr createS3ReadBuffer(const String & key, size_t object_size); std::unique_ptr createAsyncS3ReadBuffer(const String & key, const ReadSettings & read_settings, size_t object_size); }; @@ -348,6 +343,7 @@ private: ContextPtr local_context, ASTPtr query, const Block & virtual_block, + bool need_total_size = true, KeysWithInfo * read_keys = nullptr); static ColumnsDescription getTableStructureFromDataImpl( diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 4c5ed08e26e..efc44a069dd 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -248,7 +248,7 @@ StorageURLSource::StorageURLSource( throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty url list"); auto first_option = uri_options.begin(); - auto [actual_uri, buf_factory] = getFirstAvailableURIAndReadBuffer( + auto [actual_uri, buf] = getFirstAvailableURIAndReadBuffer( first_option, uri_options.end(), context, @@ -262,10 +262,11 @@ StorageURLSource::StorageURLSource( uri_options.size() == 1); curr_uri = actual_uri; + read_buf = std::move(buf); try { - total_size += buf_factory->getFileSize(); + total_size += getFileSizeFromReadBuffer(*read_buf); } catch (...) { @@ -273,16 +274,17 @@ StorageURLSource::StorageURLSource( } // TODO: Pass max_parsing_threads and max_download_threads adjusted for num_streams. - auto input_format = FormatFactory::instance().getInputRandomAccess( + auto input_format = FormatFactory::instance().getInput( format, - std::move(buf_factory), + *read_buf, sample_block, context, max_block_size, - /* is_remote_fs */ true, - compression_method, format_settings, - download_threads); + download_threads, + /*max_download_threads*/ std::nullopt, + /* is_remote_fs */ true, + compression_method); QueryPipelineBuilder builder; builder.init(Pipe(input_format)); @@ -348,7 +350,7 @@ Chunk StorageURLSource::generate() return {}; } -std::tuple StorageURLSource::getFirstAvailableURIAndReadBuffer( +std::tuple> StorageURLSource::getFirstAvailableURIAndReadBuffer( std::vector::const_iterator & option, const std::vector::const_iterator & end, ContextPtr context, @@ -376,40 +378,38 @@ std::tuple StorageURLSource::getFirstAv setCredentials(credentials, request_uri); const auto settings = context->getSettings(); - auto res = std::make_unique( - request_uri, - http_method, - callback, - timeouts, - credentials, - settings.max_http_get_redirects, - settings.max_read_buffer_size, - read_settings, - headers, - &context->getRemoteHostFilter(), - delay_initialization, - /* use_external_buffer */ false, - /* skip_url_not_found_error */ skip_url_not_found_error); - if (options > 1) + try { - // Send a HEAD request to check availability. - try - { - res->getFileInfo(); - } - catch (...) - { - if (first_exception_message.empty()) - first_exception_message = getCurrentExceptionMessage(false); + auto res = std::make_unique( + request_uri, + http_method, + callback, + timeouts, + credentials, + settings.max_http_get_redirects, + settings.max_read_buffer_size, + read_settings, + headers, + &context->getRemoteHostFilter(), + delay_initialization, + /* use_external_buffer */ false, + /* skip_url_not_found_error */ skip_url_not_found_error); - tryLogCurrentException(__PRETTY_FUNCTION__); - - continue; - } + return std::make_tuple(request_uri, std::move(res)); } + catch (...) + { + if (options == 1) + throw; - return std::make_tuple(request_uri, std::move(res)); + if (first_exception_message.empty()) + first_exception_message = getCurrentExceptionMessage(false); + + tryLogCurrentException(__PRETTY_FUNCTION__); + + continue; + } } throw Exception(ErrorCodes::NETWORK_ERROR, "All uri ({}) options are unreachable: {}", options, first_exception_message); @@ -598,7 +598,7 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( if (it == urls_to_check.cend()) return nullptr; - auto [_, buf_factory] = StorageURLSource::getFirstAvailableURIAndReadBuffer( + auto [_, buf] = StorageURLSource::getFirstAvailableURIAndReadBuffer( it, urls_to_check.cend(), context, @@ -612,7 +612,7 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( false); ++it; return wrapReadBufferWithCompressionMethod( - buf_factory->getReader(), + std::move(buf), compression_method, static_cast(context->getSettingsRef().zstd_window_log_max)); }; diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index d53b72105e4..316b142aec0 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -183,7 +183,7 @@ public: static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); - static std::tuple getFirstAvailableURIAndReadBuffer( + static std::tuple> getFirstAvailableURIAndReadBuffer( std::vector::const_iterator & option, const std::vector::const_iterator & end, ContextPtr context, @@ -205,6 +205,7 @@ private: std::shared_ptr uri_iterator; Poco::URI curr_uri; + std::unique_ptr read_buf; std::unique_ptr pipeline; std::unique_ptr reader; diff --git a/src/TableFunctions/TableFunctionGenerateRandom.cpp b/src/TableFunctions/TableFunctionGenerateRandom.cpp index 5f1a13d8857..08059796660 100644 --- a/src/TableFunctions/TableFunctionGenerateRandom.cpp +++ b/src/TableFunctions/TableFunctionGenerateRandom.cpp @@ -1,18 +1,19 @@ -#include #include -#include #include #include #include #include -#include #include #include #include +#include #include +#include + +#include #include "registerTableFunctions.h" @@ -25,10 +26,9 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int LOGICAL_ERROR; - extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; } -void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/) +void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, ContextPtr context) { ASTs & args_func = ast_function->children; @@ -40,10 +40,21 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co if (args.empty()) return; - if (args.size() > 4) + /// First, check if first argument is structure or seed. + const auto * first_arg_literal = args[0]->as(); + bool first_argument_is_structure = !first_arg_literal || first_arg_literal->value.getType() == Field::Types::String; + size_t max_args = first_argument_is_structure ? 4 : 3; + + if (args.size() > max_args) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Table function '{}' requires at most four arguments: " - " structure, [random_seed, max_string_length, max_array_length].", getName()); + "Table function '{}' requires at most four (or three if structure is missing) arguments: " + " [structure, random_seed, max_string_length, max_array_length].", getName()); + + if (first_argument_is_structure) + { + /// Allow constant expression for structure argument, it can be generated using generateRandomStructure function. + args[0] = evaluateConstantExpressionAsLiteral(args[0], context); + } // All the arguments must be literals. for (const auto & arg : args) @@ -51,26 +62,39 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co if (!arg->as()) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "All arguments of table function '{}' must be literals. " + "All arguments of table function '{}' except structure argument must be literals. " "Got '{}' instead", getName(), arg->formatForErrorMessage()); } } - /// Parsing first argument as table structure and creating a sample block - structure = checkAndGetLiteralArgument(args[0], "structure"); + size_t arg_index = 0; - if (args.size() >= 2) + if (first_argument_is_structure) { - const auto & literal = args[1]->as(); + /// Parsing first argument as table structure and creating a sample block + structure = checkAndGetLiteralArgument(args[arg_index], "structure"); + ++arg_index; + } + + if (args.size() >= arg_index + 1) + { + const auto & literal = args[arg_index]->as(); + ++arg_index; if (!literal.value.isNull()) random_seed = checkAndGetLiteralArgument(literal, "random_seed"); } - if (args.size() >= 3) - max_string_length = checkAndGetLiteralArgument(args[2], "max_string_length"); + if (args.size() >= arg_index + 1) + { + max_string_length = checkAndGetLiteralArgument(args[arg_index], "max_string_length"); + ++arg_index; + } - if (args.size() == 4) - max_array_length = checkAndGetLiteralArgument(args[3], "max_string_length"); + if (args.size() == arg_index + 1) + { + max_array_length = checkAndGetLiteralArgument(args[arg_index], "max_string_length"); + ++arg_index; + } } ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextPtr context) const @@ -78,11 +102,11 @@ ColumnsDescription TableFunctionGenerateRandom::getActualTableStructure(ContextP if (structure == "auto") { if (structure_hint.empty()) - throw Exception( - ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Table function '{}' was used without structure argument but structure could not be determined automatically. Please, " - "provide structure manually", - getName()); + { + auto random_structure = FunctionGenerateRandomStructure::generateRandomStructure(random_seed.value_or(randomSeed()), context); + return parseColumnsListFromString(random_structure, context); + } + return structure_hint; } diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index d36315151aa..07cdcc76c3a 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -71,11 +71,11 @@ This pull-request will be merged automatically as it reaches the mergeable state ### If the PR was closed and then reopened If it stuck, check {pr_url} for `{backport_created_label}` and delete it if \ -necessary. Manually merging will do nothing, since `{label_backports_created}` \ +necessary. Manually merging will do nothing, since `{backport_created_label}` \ prevents the original PR {pr_url} from being processed. If you want to recreate the PR: delete the `{label_cherrypick}` label and delete this branch. -You may also need to delete the `{label_backports_created}` label from the original PR. +You may also need to delete the `{backport_created_label}` label from the original PR. """ BACKPORT_DESCRIPTION = """This pull-request is a last step of an automated \ backporting. diff --git a/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py b/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py index 94c788f8f91..d76c4eba409 100644 --- a/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py +++ b/tests/integration/test_backward_compatibility/test_memory_bound_aggregation.py @@ -74,7 +74,7 @@ def test_backward_compatability(start_cluster): from remote('node{1,2,3}', default, t) group by a limit 1 offset 12345 - settings optimize_aggregation_in_order = 1 + settings optimize_aggregation_in_order = 1, enable_memory_bound_merging_of_aggregation_results = 0 """ ) == "30\n" diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index fd71389f71a..719de5e8bef 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -10,16 +10,22 @@ def cluster(): try: cluster = ClickHouseCluster(__file__) cluster.add_instance( - "node1", main_configs=["configs/storage_conf.xml"], with_nginx=True + "node1", + main_configs=["configs/storage_conf.xml"], + with_nginx=True, ) cluster.add_instance( "node2", main_configs=["configs/storage_conf_web.xml"], with_nginx=True, stay_alive=True, + with_zookeeper=True, ) cluster.add_instance( - "node3", main_configs=["configs/storage_conf_web.xml"], with_nginx=True + "node3", + main_configs=["configs/storage_conf_web.xml"], + with_nginx=True, + with_zookeeper=True, ) cluster.add_instance( @@ -95,7 +101,7 @@ def test_usage(cluster, node_name): for i in range(3): node2.query( """ - ATTACH TABLE test{} UUID '{}' + CREATE TABLE test{} UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'; """.format( @@ -140,7 +146,7 @@ def test_incorrect_usage(cluster): global uuids node2.query( """ - ATTACH TABLE test0 UUID '{}' + CREATE TABLE test0 UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'; """.format( @@ -173,7 +179,7 @@ def test_cache(cluster, node_name): for i in range(3): node2.query( """ - ATTACH TABLE test{} UUID '{}' + CREATE TABLE test{} UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'cached_web'; """.format( @@ -238,7 +244,7 @@ def test_unavailable_server(cluster): global uuids node2.query( """ - ATTACH TABLE test0 UUID '{}' + CREATE TABLE test0 UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'; """.format( @@ -276,3 +282,35 @@ def test_unavailable_server(cluster): ) node2.start_clickhouse() node2.query("DROP TABLE test0 SYNC") + + +def test_replicated_database(cluster): + node1 = cluster.instances["node3"] + node1.query( + "CREATE DATABASE rdb ENGINE=Replicated('/test/rdb', 's1', 'r1')", + settings={"allow_experimental_database_replicated": 1}, + ) + + global uuids + node1.query( + """ + CREATE TABLE rdb.table0 UUID '{}' + (id Int32) ENGINE = MergeTree() ORDER BY id + SETTINGS storage_policy = 'web'; + """.format( + uuids[0] + ) + ) + + node2 = cluster.instances["node2"] + node2.query( + "CREATE DATABASE rdb ENGINE=Replicated('/test/rdb', 's1', 'r2')", + settings={"allow_experimental_database_replicated": 1}, + ) + node2.query("SYSTEM SYNC DATABASE REPLICA rdb") + + assert node1.query("SELECT count() FROM rdb.table0") == "5000000\n" + assert node2.query("SELECT count() FROM rdb.table0") == "5000000\n" + + node1.query("DROP DATABASE rdb SYNC") + node2.query("DROP DATABASE rdb SYNC") diff --git a/tests/integration/test_keeper_client/__init__.py b/tests/integration/test_keeper_client/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_client/configs/keeper_config.xml b/tests/integration/test_keeper_client/configs/keeper_config.xml new file mode 100644 index 00000000000..7e912283ac0 --- /dev/null +++ b/tests/integration/test_keeper_client/configs/keeper_config.xml @@ -0,0 +1,3 @@ + + + diff --git a/tests/integration/test_keeper_client/test.py b/tests/integration/test_keeper_client/test.py new file mode 100644 index 00000000000..00c7908eeed --- /dev/null +++ b/tests/integration/test_keeper_client/test.py @@ -0,0 +1,63 @@ +import pytest +from helpers.client import CommandRequest +from helpers.cluster import ClickHouseCluster + + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/keeper_config.xml"], + with_zookeeper=True, + stay_alive=True, +) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_base_commands(started_cluster): + _ = started_cluster + + command = CommandRequest( + [ + started_cluster.server_bin_path, + "keeper-client", + "--host", + str(cluster.get_instance_ip("zoo1")), + "--port", + str(cluster.zookeeper_port), + "-q", + "create test_create_zk_node1 testvalue1;create test_create_zk_node_2 testvalue2;get test_create_zk_node1;", + ], + stdin="", + ) + + assert command.get_answer() == "testvalue1\n" + + +def test_four_letter_word_commands(started_cluster): + _ = started_cluster + + command = CommandRequest( + [ + started_cluster.server_bin_path, + "keeper-client", + "--host", + str(cluster.get_instance_ip("zoo1")), + "--port", + str(cluster.zookeeper_port), + "-q", + "ruok", + ], + stdin="", + ) + + assert command.get_answer() == "imok\n" diff --git a/tests/integration/test_redirect_url_storage/test.py b/tests/integration/test_redirect_url_storage/test.py index 06ff78707d7..b2178655444 100644 --- a/tests/integration/test_redirect_url_storage/test.py +++ b/tests/integration/test_redirect_url_storage/test.py @@ -151,7 +151,7 @@ def test_url_reconnect(started_cluster): result = node1.query( "select sum(cityHash64(id)) from url('http://hdfs1:50075/webhdfs/v1/storage_big?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'id Int32') settings http_max_tries = 10, http_retry_max_backoff_ms=1000" ) - assert (int(result), 6581218782194912115) + assert int(result) == 6581218782194912115 thread = threading.Thread(target=select) thread.start() @@ -161,5 +161,5 @@ def test_url_reconnect(started_cluster): thread.join() - assert (int(result), 6581218782194912115) + assert int(result) == 6581218782194912115 assert node1.contains_in_log("Timeout: connect timed out") diff --git a/tests/integration/test_reload_clusters_config/test.py b/tests/integration/test_reload_clusters_config/test.py index a52871890e9..cb003bbe04e 100644 --- a/tests/integration/test_reload_clusters_config/test.py +++ b/tests/integration/test_reload_clusters_config/test.py @@ -169,7 +169,9 @@ test_config3 = """ def send_repeated_query(table, count=5): for i in range(count): node.query_and_get_error( - "SELECT count() FROM {} SETTINGS receive_timeout=1".format(table) + "SELECT count() FROM {} SETTINGS receive_timeout=1, handshake_timeout_ms=1".format( + table + ) ) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index d9ac70f51ad..f983bd618e3 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1636,7 +1636,7 @@ def test_ast_auth_headers(started_cluster): filename = "test.csv" result = instance.query_and_get_error( - f"select count() from s3('http://resolver:8080/{bucket}/{filename}', 'CSV')" + f"select count() from s3('http://resolver:8080/{bucket}/{filename}', 'CSV', 'dummy String')" ) assert "HTTP response code: 403" in result diff --git a/tests/integration/test_system_clusters_actual_information/test.py b/tests/integration/test_system_clusters_actual_information/test.py index 0658d0c7576..e90a6cdeb3f 100644 --- a/tests/integration/test_system_clusters_actual_information/test.py +++ b/tests/integration/test_system_clusters_actual_information/test.py @@ -40,8 +40,8 @@ def test(started_cluster): cluster.pause_container("node_1") node.query("SYSTEM RELOAD CONFIG") - node.query_and_get_error( - "SELECT count() FROM distributed SETTINGS receive_timeout=1" + error = node.query_and_get_error( + "SELECT count() FROM distributed SETTINGS receive_timeout=1, handshake_timeout_ms=1" ) result = node.query( diff --git a/tests/queries/0_stateless/00534_functions_bad_arguments10.sh b/tests/queries/0_stateless/00534_functions_bad_arguments10.sh index 8525b63a989..b9733f92812 100755 --- a/tests/queries/0_stateless/00534_functions_bad_arguments10.sh +++ b/tests/queries/0_stateless/00534_functions_bad_arguments10.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-debug +# Tags: no-tsan, no-debug, no-msan # Tag no-tsan: Too long for TSan # shellcheck disable=SC2016 diff --git a/tests/queries/0_stateless/00534_functions_bad_arguments5.sh b/tests/queries/0_stateless/00534_functions_bad_arguments5.sh index a8b0ce77677..812ba9f97fa 100755 --- a/tests/queries/0_stateless/00534_functions_bad_arguments5.sh +++ b/tests/queries/0_stateless/00534_functions_bad_arguments5.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-debug, no-fasttest +# Tags: no-tsan, no-debug, no-fasttest, no-msan # Tag no-tsan: Too long for TSan # shellcheck disable=SC2016 diff --git a/tests/queries/0_stateless/00534_functions_bad_arguments6.sh b/tests/queries/0_stateless/00534_functions_bad_arguments6.sh index b0080c3b418..6626a6dfe55 100755 --- a/tests/queries/0_stateless/00534_functions_bad_arguments6.sh +++ b/tests/queries/0_stateless/00534_functions_bad_arguments6.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-debug +# Tags: no-tsan, no-debug, no-msan # Tag no-tsan: Too long for TSan # shellcheck disable=SC2016 diff --git a/tests/queries/0_stateless/00534_functions_bad_arguments9.sh b/tests/queries/0_stateless/00534_functions_bad_arguments9.sh index 2975643020b..c7659db8621 100755 --- a/tests/queries/0_stateless/00534_functions_bad_arguments9.sh +++ b/tests/queries/0_stateless/00534_functions_bad_arguments9.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-debug +# Tags: no-tsan, no-debug, no-msan # Tag no-tsan: Too long for TSan # shellcheck disable=SC2016 diff --git a/tests/queries/0_stateless/01256_negative_generate_random.sql b/tests/queries/0_stateless/01256_negative_generate_random.sql index 14f1d947108..7e05a394b8d 100644 --- a/tests/queries/0_stateless/01256_negative_generate_random.sql +++ b/tests/queries/0_stateless/01256_negative_generate_random.sql @@ -1,5 +1,4 @@ SELECT * FROM generateRandom('i8', 1, 10, 10); -- { serverError 62 } SELECT * FROM generateRandom; -- { serverError 60 } -SELECT * FROM generateRandom(); -- { serverError CANNOT_EXTRACT_TABLE_STRUCTURE } SELECT * FROM generateRandom('i8 UInt8', 1, 10, 10, 10, 10); -- { serverError 42 } SELECT * FROM generateRandom('', 1, 10, 10); -- { serverError 62 } diff --git a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference index 7df35e2948d..5aa2e645509 100644 --- a/tests/queries/0_stateless/01763_filter_push_down_bugs.reference +++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.reference @@ -6,22 +6,3 @@ String1_0 String2_0 String3_0 String4_0 1 String1_0 String2_0 String3_0 String4_0 1 1 [0,1,2] 1 -Expression ((Projection + Before ORDER BY)) - Filter (WHERE) - Join (JOIN FillRightFirst) - Filter (( + Before JOIN)) - ReadFromMergeTree (default.t1) - Indexes: - PrimaryKey - Keys: - id - Condition: (id in [101, 101]) - Parts: 1/1 - Granules: 1/1 - Expression ((Joined actions + (Rename joined columns + (Projection + Before ORDER BY)))) - ReadFromMergeTree (default.t2) - Indexes: - PrimaryKey - Condition: true - Parts: 1/1 - Granules: 1/1 diff --git a/tests/queries/0_stateless/01763_filter_push_down_bugs.sql b/tests/queries/0_stateless/01763_filter_push_down_bugs.sql index 2ee249b5ce7..1058bf75144 100644 --- a/tests/queries/0_stateless/01763_filter_push_down_bugs.sql +++ b/tests/queries/0_stateless/01763_filter_push_down_bugs.sql @@ -38,25 +38,6 @@ DROP TABLE IF EXISTS Test; select x, y from (select [0, 1, 2] as y, 1 as a, 2 as b) array join y as x where a = 1 and b = 2 and (x = 1 or x != 1) and x = 1; -DROP TABLE IF EXISTS t; create table t(a UInt8) engine=MergeTree order by a; insert into t select * from numbers(2); select a from t t1 join t t2 on t1.a = t2.a where t1.a; -DROP TABLE IF EXISTS t; - -DROP TABLE IF EXISTS t1; -DROP TABLE IF EXISTS t2; -CREATE TABLE t1 (id Int64, create_time DateTime) ENGINE = MergeTree ORDER BY id; -CREATE TABLE t2 (delete_time DateTime) ENGINE = MergeTree ORDER BY delete_time; - -insert into t1 values (101, '2023-05-28 00:00:00'), (102, '2023-05-28 00:00:00'); -insert into t2 values ('2023-05-31 00:00:00'); - -EXPLAIN indexes=1 SELECT id, delete_time FROM t1 - CROSS JOIN ( - SELECT delete_time - FROM t2 -) AS d WHERE create_time < delete_time AND id = 101; - -DROP TABLE IF EXISTS t1; -DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/01910_view_dictionary_check_refresh.reference b/tests/queries/0_stateless/01910_view_dictionary_check_refresh.reference new file mode 100644 index 00000000000..c1be003ebef --- /dev/null +++ b/tests/queries/0_stateless/01910_view_dictionary_check_refresh.reference @@ -0,0 +1,4 @@ +view 1 2022-10-20 first +dict 1 2022-10-20 first +view 1 2022-10-21 second +dict 1 2022-10-21 second diff --git a/tests/queries/0_stateless/01910_view_dictionary_check_refresh.sql b/tests/queries/0_stateless/01910_view_dictionary_check_refresh.sql new file mode 100644 index 00000000000..b36a378d827 --- /dev/null +++ b/tests/queries/0_stateless/01910_view_dictionary_check_refresh.sql @@ -0,0 +1,54 @@ +-- Tags: long + +DROP DICTIONARY IF EXISTS TestTblDict; +DROP VIEW IF EXISTS TestTbl_view; +DROP TABLE IF EXISTS TestTbl; + +CREATE TABLE TestTbl +( + `id` UInt16, + `dt` Date, + `val` String +) +ENGINE = MergeTree +PARTITION BY dt +ORDER BY (id); + +CREATE VIEW TestTbl_view +AS +SELECT * +FROM TestTbl +WHERE dt = ( SELECT max(dt) FROM TestTbl ); + +CREATE DICTIONARY IF NOT EXISTS TestTblDict +( + `id` UInt16, + `dt` Date, + `val` String +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE TestTbl_view DB currentDatabase())) +LIFETIME(1) +LAYOUT(COMPLEX_KEY_HASHED()); + +select 'view' src,* FROM TestTbl_view; +select 'dict' src,* FROM TestTblDict ; + +insert into TestTbl values(1, '2022-10-20', 'first'); + +SELECT sleep(3) from numbers(4) settings max_block_size= 1 format Null; + +select 'view' src,* FROM TestTbl_view; +select 'dict' src,* FROM TestTblDict ; + +insert into TestTbl values(1, '2022-10-21', 'second'); + +SELECT sleep(3) from numbers(4) settings max_block_size= 1 format Null; + +select 'view' src,* FROM TestTbl_view; +select 'dict' src,* FROM TestTblDict ; + +DROP DICTIONARY IF EXISTS TestTblDict; +DROP VIEW IF EXISTS TestTbl_view; +DROP TABLE IF EXISTS TestTbl; + diff --git a/tests/queries/0_stateless/02344_describe_cache.reference b/tests/queries/0_stateless/02344_describe_cache.reference index 7561b32bae1..a803ca1fab1 100644 --- a/tests/queries/0_stateless/02344_describe_cache.reference +++ b/tests/queries/0_stateless/02344_describe_cache.reference @@ -1,2 +1,2 @@ -134217728 1048576 104857600 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 0 +134217728 1048576 8388608 1 0 0 0 /var/lib/clickhouse/caches/s3_cache/ 0 134217728 1048576 104857600 0 0 0 0 /var/lib/clickhouse/caches/s3_cache_2/ 0 diff --git a/tests/queries/0_stateless/02435_rollback_cancelled_queries.reference b/tests/queries/0_stateless/02435_rollback_cancelled_queries.reference index 2d32c17ec7c..38ff81b2371 100644 --- a/tests/queries/0_stateless/02435_rollback_cancelled_queries.reference +++ b/tests/queries/0_stateless/02435_rollback_cancelled_queries.reference @@ -1,3 +1,2 @@ 1000000 0 -1 diff --git a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh index 8f8e8cc7ee0..776d1f850b0 100755 --- a/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh +++ b/tests/queries/0_stateless/02435_rollback_cancelled_queries.sh @@ -110,11 +110,12 @@ insert_data 1 $CLICKHOUSE_CLIENT --implicit_transaction=1 -q 'select throwIf(count() % 1000000 != 0 or count() = 0) from dedup_test' \ || $CLICKHOUSE_CLIENT -q "select name, rows, active, visible, creation_tid, creation_csn from system.parts where database=currentDatabase();" -# Ensure that thread_cancel actually did something -$CLICKHOUSE_CLIENT -q "select count() > 0 from system.text_log where event_date >= yesterday() and query_id like '$TEST_MARK%' and ( - message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes', - 'Query was cancelled or a client has unexpectedly dropped the connection') or - message like '%Connection reset by peer%' or message like '%Broken pipe, while writing to socket%')" +# Ensure that thread_cancel actually did something (useful when editing this test) +# We cannot check it in the CI, because sometimes it fails due to randomization +# $CLICKHOUSE_CLIENT -q "select count() > 0 from system.text_log where event_date >= yesterday() and query_id like '$TEST_MARK%' and ( +# message_format_string in ('Unexpected end of file while reading chunk header of HTTP chunked data', 'Unexpected EOF, got {} of {} bytes', +# 'Query was cancelled or a client has unexpectedly dropped the connection') or +# message like '%Connection reset by peer%' or message like '%Broken pipe, while writing to socket%')" wait_for_queries_to_finish 30 $CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=0 -q "drop table dedup_test" diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference index 3606b9a41db..ccc514e7ea2 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.reference +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -194,3 +194,6 @@ E28DBDE7FE22E41C 1 E28DBDE7FE22E41C 1 +Check bug with hashing of const integer values +11862823756610506724 +11862823756610506724 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql index 9c914f586f0..900b99f548a 100644 --- a/tests/queries/0_stateless/02534_keyed_siphash.sql +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -272,3 +272,12 @@ select hex(sipHash64()); SELECT hex(sipHash128()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128()) = '1CE422FEE7BD8DE20000000000000000'; select hex(sipHash64Keyed()); SELECT hex(sipHash128Keyed()) = hex(reverse(unhex('1CE422FEE7BD8DE20000000000000000'))) or hex(sipHash128Keyed()) = '1CE422FEE7BD8DE20000000000000000'; + +SELECT 'Check bug with hashing of const integer values'; +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (key Tuple(UInt64, UInt64), val UInt64) ENGINE=Memory; +INSERT INTO tab VALUES ((2, 2), 4); +-- these two statements must produce the same result +SELECT sipHash64Keyed(key, val) FROM tab; +SELECT sipHash64Keyed(key, 4::UInt64) FROM tab; +DROP TABLE tab; diff --git a/tests/queries/0_stateless/02586_generate_random_structure.reference b/tests/queries/0_stateless/02586_generate_random_structure.reference new file mode 100644 index 00000000000..d2929fb4564 --- /dev/null +++ b/tests/queries/0_stateless/02586_generate_random_structure.reference @@ -0,0 +1,19 @@ +c1 String, c2 UInt256, c3 String, c4 Decimal128(8), c5 UInt128 +String +Const(String) +` 90465455320735604871982424534384518837533904778028808627865442405232847164685 5& -303477100882544888461471906106.82821046 75820566154622566322847299106656624693 +c1 FixedString(125) +c2 IPv4 +c3.e1 Array(Enum16(\'e1V3\' = -24827, \'e1V14\' = -24479, \'e1V8\' = -22478, \'e1V10\' = -13735, \'e1V15\' = -12641, \'e1V11\' = -10191, \'e1V0\' = -8579, \'e1V7\' = -8104, \'e1V6\' = 712, \'e1V12\' = 5683, \'e1V13\' = 13678, \'e1V9\' = 19740, \'e1V5\' = 23066, \'e1V2\' = 23292, \'e1V4\' = 23736, \'e1V1\' = 31672)) +c3.e2 Array(Map(Int8, Int32)) +c3.e3 Array(Decimal(76, 64)) +c3.e4 Array(Int32) +c3.e5 Array(Nullable(Int64)) +c3.e6 Array(Int256) +c4 FixedString(183) +c5 IPv4 +c6 UInt256 +Tb#yV[>M*ܨ(OR8V1n)H}C\'I7tqnV)䳆qLPoRg<{3iH_m!q\'G 127.48.9.45 ['e1V10','e1V0','e1V10','e1V14','e1V10','e1V14'] [{-13:777622572,102:-1122882357,62:1647813163,-94:2094022166},{-32:1448633509},{},{},{34:1536340393,19:-2049677851,74:65643868,-46:-1990799930,97:-531041081,46:-2634833,14:1581632600,89:-771229823,-105:1238603584},{47:1458809010,109:1640682510,86:1945730198,85:1505847247,35:-35189402}] [153363749503.3642648494826450951141750747382772821825909005880434540971999557,79828591186.7378041015337066268618633118713347614941338787453473118807106292,81672688565.9633830721322966111551266731935181670389237071708068971548883315,573768486971.1812413548839655834002608768736215115033958693122764224003897029,-393925092368.4893467278351090742501814120269109477445490969167853713051140487,46027399426.0865278566391382610843315130162915324295037009704113636499519839] [755855942,1804001770,-78103159,-866181765,731736602,-79599206] [5253556148991564114,4681434929596395351,-7302160004580855709,-3686747220178471318,6288582051009949273,646864891160092871] [17035203905051045016266537043565487029724162173062647021612805252288722534904,-42105881403933504641593145676742477006499618886131028341247993701618141933523,45346626822580305846120377917274679004279343244238782744860626882886217433843,-3660165069803677989574889324494857545543653453780976182221584349306428201647,-23316760935816288837287058499520670431785615691220162210524162590241529297823,6184785563808848524970564618169964412151721224362412457508264894603779018817] ڡ|A"x>rwzZ:j8tZD"Tu2h!WIytPa|\'yofFO\0Ֆ6\fIrESacW<~e lT>P3})w%4@_2N"ІXp$^ҘͰ\04@n\b\r4H 16.177.117.209 7882774382721411359365561736453116698030365959050344381263687375357052837130 +Tb#yV[>M*ܨ(OR8V1n)H}C\'I7tqnV)䳆qLPoRg<{3iH_m!q\'G 127.48.9.45 ['e1V10','e1V0','e1V10','e1V14','e1V10','e1V14'] [{-13:777622572,102:-1122882357,62:1647813163,-94:2094022166},{-32:1448633509},{},{},{34:1536340393,19:-2049677851,74:65643868,-46:-1990799930,97:-531041081,46:-2634833,14:1581632600,89:-771229823,-105:1238603584},{47:1458809010,109:1640682510,86:1945730198,85:1505847247,35:-35189402}] [153363749503.3642648494826450951141750747382772821825909005880434540971999557,79828591186.7378041015337066268618633118713347614941338787453473118807106292,81672688565.9633830721322966111551266731935181670389237071708068971548883315,573768486971.1812413548839655834002608768736215115033958693122764224003897029,-393925092368.4893467278351090742501814120269109477445490969167853713051140487,46027399426.0865278566391382610843315130162915324295037009704113636499519839] [755855942,1804001770,-78103159,-866181765,731736602,-79599206] [5253556148991564114,4681434929596395351,-7302160004580855709,-3686747220178471318,6288582051009949273,646864891160092871] [17035203905051045016266537043565487029724162173062647021612805252288722534904,-42105881403933504641593145676742477006499618886131028341247993701618141933523,45346626822580305846120377917274679004279343244238782744860626882886217433843,-3660165069803677989574889324494857545543653453780976182221584349306428201647,-23316760935816288837287058499520670431785615691220162210524162590241529297823,6184785563808848524970564618169964412151721224362412457508264894603779018817] ڡ|A"x>rwzZ:j8tZD"Tu2h!WIytPa|\'yofFO\0Ֆ6\fIrESacW<~e lT>P3})w%4@_2N"ІXp$^ҘͰ\04@n\b\r4H 16.177.117.209 7882774382721411359365561736453116698030365959050344381263687375357052837130 +Tb#yV[>M*ܨ(OR8V1n)H}C\'I7tqnV)䳆qLPoRg<{3iH_m!q\'G 127.48.9.45 ['e1V10'] [{}] [825002272867.1157788721157301271303736024856710948164507982705676578804195475] [1865150610] [7514464811443271056] [33504961604882608369857530219353040639899064613284394558131808339620328539033] ڡ|A"x>rwzZ:j8tZD"Tu2h!WIytPa|\'yofFO\0Ֆ6\fIrESacW<~e lT>P3})w%4@_2N"ІXp$^ҘͰ\04@n\b\r4H 16.177.117.209 7882774382721411359365561736453116698030365959050344381263687375357052837130 +c1 LowCardinality(Nullable(UInt64)), c2 Date32, c3 LowCardinality(Nullable(Float64)), c4 Int256, c5 Date32 diff --git a/tests/queries/0_stateless/02586_generate_random_structure.sql b/tests/queries/0_stateless/02586_generate_random_structure.sql new file mode 100644 index 00000000000..e2e8409b35c --- /dev/null +++ b/tests/queries/0_stateless/02586_generate_random_structure.sql @@ -0,0 +1,20 @@ +select generateRandomStructure(5, 42); +select toTypeName(generateRandomStructure(5, 42)); +select toColumnTypeName(generateRandomStructure(5, 42)); +SELECT * FROM generateRandom(generateRandomStructure(5, 42), 42) LIMIT 1; + +select generateRandomStructure(5, 42, 42); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +select generateRandomStructure('5'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +select generateRandomStructure(5, '42'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +select generateRandomStructure(materialize(5), 42); -- {serverError ILLEGAL_COLUMN} +select generateRandomStructure(5, materialize(42)); -- {serverError ILLEGAL_COLUMN} + +desc generateRandom(10000000); +select * from generateRandom(10000000) limit 1; +select * from generateRandom(10000000, 2) limit 1; +select * from generateRandom(10000000, 2, 2) limit 1; +select * from generateRandom(10000000, 2, 2, 2) limit 1; -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} + +set allow_suspicious_low_cardinality_types=1; +select generateRandomStructure(5, 4); + diff --git a/tests/queries/0_stateless/02725_any_join_single_row.reference b/tests/queries/0_stateless/02725_any_join_single_row.reference new file mode 100644 index 00000000000..1e940bdc71e --- /dev/null +++ b/tests/queries/0_stateless/02725_any_join_single_row.reference @@ -0,0 +1,6 @@ +Join(ANY, LEFT, key) 0 1 +Join(ANY, LEFT, key) 1 1 +Join(ANY, LEFT, key) 1 1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02725_any_join_single_row.sql b/tests/queries/0_stateless/02725_any_join_single_row.sql new file mode 100644 index 00000000000..f7ddd2f402b --- /dev/null +++ b/tests/queries/0_stateless/02725_any_join_single_row.sql @@ -0,0 +1,41 @@ +DROP TABLE IF EXISTS join_test; +DROP TABLE IF EXISTS join_test_right; + +CREATE TABLE join_test ( `key` UInt64, `value` UInt64 ) ENGINE = Join(ANY, LEFT, key); + +-- Save table size before inserting any rows +CREATE TEMPORARY TABLE initial_table_size AS + SELECT engine_full, total_rows, total_bytes FROM system.tables WHERE (name = 'join_test') AND (database = currentDatabase()); + +-- Check that table size is less than 100K +SELECT engine_full, total_rows, total_bytes < 100_000 FROM initial_table_size; + +INSERT INTO join_test (key, value) SELECT 1, number FROM numbers(1); + +-- Save table size after inserting one row +CREATE TEMPORARY TABLE one_row_table_size AS + SELECT engine_full, total_rows, total_bytes FROM system.tables WHERE (name = 'join_test') AND (database = currentDatabase()); + +-- Check that table size is less than 2x after inserting one row +SELECT engine_full, total_rows, total_bytes < 2 * (SELECT total_bytes FROM initial_table_size) FROM one_row_table_size; + +-- Insert some more rows with the same key +INSERT INTO join_test (key, value) SELECT 1, number FROM numbers(1); +INSERT INTO join_test (key, value) SELECT 1, number FROM numbers(10_000); + +-- Check that rows with the same key are not duplicated +SELECT engine_full, total_rows, total_bytes == (SELECT total_bytes FROM one_row_table_size) FROM system.tables WHERE (name = 'join_test') AND (database = currentDatabase()); + +-- For RIGHT join we save all rows from the right table +CREATE TABLE join_test_right ( `key` UInt64, `value` UInt64 ) ENGINE = Join(ANY, RIGHT, key); + +INSERT INTO join_test_right (key, value) SELECT 1, number FROM numbers(1); +INSERT INTO join_test_right (key, value) SELECT 1, number FROM numbers(1); +INSERT INTO join_test_right (key, value) SELECT 1, number FROM numbers(1); +SELECT count() == 3 FROM (SELECT 1 as key) t1 ANY RIGHT JOIN join_test_right ON t1.key = join_test_right.key; +INSERT INTO join_test_right (key, value) SELECT 1, number FROM numbers(7); +SELECT count() == 10 FROM (SELECT 1 as key) t1 ANY RIGHT JOIN join_test_right ON t1.key = join_test_right.key; +SELECT count() == 10 FROM (SELECT 2 as key) t1 ANY RIGHT JOIN join_test_right ON t1.key = join_test_right.key; + +DROP TABLE IF EXISTS join_test; +DROP TABLE IF EXISTS join_test_right; diff --git a/tests/queries/0_stateless/02746_index_analysis_binary_operator_with_null.reference b/tests/queries/0_stateless/02746_index_analysis_binary_operator_with_null.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02746_index_analysis_binary_operator_with_null.sql b/tests/queries/0_stateless/02746_index_analysis_binary_operator_with_null.sql new file mode 100644 index 00000000000..f9613735bbf --- /dev/null +++ b/tests/queries/0_stateless/02746_index_analysis_binary_operator_with_null.sql @@ -0,0 +1,12 @@ +drop table if exists tab; + +create table tab (x DateTime) engine MergeTree order by x; + +SELECT toDateTime(65537, toDateTime(NULL), NULL) +FROM tab +WHERE ((x + CAST('1', 'Nullable(UInt8)')) <= 2) AND ((x + CAST('', 'Nullable(UInt8)')) <= 256) +ORDER BY + toDateTime(toDateTime(-2, NULL, NULL) + 100.0001, NULL, -2, NULL) DESC NULLS LAST, + x ASC NULLS LAST; + +drop table tab; diff --git a/tests/queries/0_stateless/02781_data_skipping_index_merge_tree_min_for_seek.reference b/tests/queries/0_stateless/02781_data_skipping_index_merge_tree_min_for_seek.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02781_data_skipping_index_merge_tree_min_for_seek.sql b/tests/queries/0_stateless/02781_data_skipping_index_merge_tree_min_for_seek.sql new file mode 100644 index 00000000000..4cebdde3dfe --- /dev/null +++ b/tests/queries/0_stateless/02781_data_skipping_index_merge_tree_min_for_seek.sql @@ -0,0 +1,22 @@ +-- Tags: no-random-merge-tree-settings, no-random-settings + +DROP TABLE IF EXISTS data; + +CREATE TABLE data +( + key Int, + v1 DateTime, + INDEX v1_index v1 TYPE minmax GRANULARITY 1 +) ENGINE=AggregatingMergeTree() +ORDER BY key +SETTINGS index_granularity=8192; + +SYSTEM STOP MERGES data; + +-- generate 50% of marks that cannot be skipped with v1_index +-- this will create a gap in marks +INSERT INTO data SELECT number, if(number/8192 % 2 == 0, now(), now() - INTERVAL 200 DAY) FROM numbers(1e6); +INSERT INTO data SELECT number+1e6, if(number/8192 % 2 == 0, now(), now() - INTERVAL 200 DAY) FROM numbers(1e6); + +SELECT * FROM data WHERE v1 >= now() - INTERVAL 180 DAY FORMAT Null SETTINGS max_threads=1, max_final_threads=1, force_data_skipping_indices='v1_index', merge_tree_min_rows_for_seek=0, max_rows_to_read=1999999; +SELECT * FROM data WHERE v1 >= now() - INTERVAL 180 DAY FORMAT Null SETTINGS max_threads=1, max_final_threads=1, force_data_skipping_indices='v1_index', merge_tree_min_rows_for_seek=1, max_rows_to_read=1999999; -- { serverError TOO_MANY_ROWS } diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index ded7a4643a9..0455556ae96 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1,69 +1,367 @@ personal_ws-1.1 en 543 AArch ACLs +ALTERs +AMPLab AMQP +ANNIndex +ANNIndexes +AORM +APIs ARMv ASLR +ASOF ASan Actian +ActionsMenu +ActiveRecord AddressSanitizer +AggregateFunction +Aggregatefunction +AggregatingMergeTree +AggregatorThreads +AggregatorThreadsActive +Akka +AlertManager +Alexey +AnyEvent AppleClang +ArrayJoin ArrowStream +AsyncInsertCacheSize +AsynchronousHeavyMetricsCalculationTimeSpent +AsynchronousHeavyMetricsUpdateInterval +AsynchronousInsert +AsynchronousInsertThreads +AsynchronousInsertThreadsActive +AsynchronousMetricsCalculationTimeSpent +AsynchronousMetricsUpdateInterval +AsynchronousReadWait +Authenticator +Authenticators +AutoML +Autocompletion AvroConfluent +BIGINT +BIGSERIAL +BORO BSON BSONEachRow +BackgroundBufferFlushSchedulePool +BackgroundBufferFlushSchedulePoolSize +BackgroundBufferFlushSchedulePoolTask +BackgroundCommonPoolSize +BackgroundCommonPoolTask +BackgroundDistributedSchedulePool +BackgroundDistributedSchedulePoolSize +BackgroundDistributedSchedulePoolTask +BackgroundFetchesPoolSize +BackgroundFetchesPoolTask +BackgroundMergesAndMutationsPoolSize +BackgroundMergesAndMutationsPoolTask +BackgroundMessageBrokerSchedulePoolSize +BackgroundMessageBrokerSchedulePoolTask +BackgroundMovePoolSize +BackgroundMovePoolTask +BackgroundProcessingPool +BackgroundSchedulePool +BackgroundSchedulePoolSize +BackgroundSchedulePoolTask +BackupsIO +BackupsIOThreads +BackupsIOThreadsActive +BackupsThreads +BackupsThreadsActive +BestEffort +BestEffortOrNull +BestEffortOrZero +BestEffortUS +BestEffortUSOrNull +BestEffortUSOrZero +Blazingly +BlockActiveTime +BlockDiscardBytes +BlockDiscardMerges +BlockDiscardOps +BlockDiscardTime +BlockInFlightOps +BlockQueueTime +BlockReadBytes +BlockReadMerges +BlockReadOps +BlockReadTime +BlockWriteBytes +BlockWriteMerges +BlockWriteOps +BlockWriteTime Bool +BrokenDistributedFilesToInsert +BuildID BuilderBinAarch BuilderBinAmd +Bytebase CCTOOLS +CDATA +CDFs +CDMA +CESU +CIDR +CIDRToRange +CLOB CLion +CMPLNT CMake CMakeLists +CODECS +COVID +CPUFrequencyMHz CPUs CSVWithNames CSVWithNamesAndTypes +CSVs +CTEs +CacheDetachedFileSegments +CacheDictionaries +CacheDictionary +CacheDictionaryThreads +CacheDictionaryThreadsActive +CacheDictionaryUpdateQueueBatches +CacheDictionaryUpdateQueueKeys +CacheFileSegments CamelCase +Cap'n +CapContains +CapUnion CapnProto +CatBoost +CellAreaM +CellAreaRads +CellsIntersect CentOS +CertificateHandler +Chadmin +ChannelID +Cidr +Ciphertext +CityHash +ClickCat ClickHouse ClickHouse's +ClickHouseClient +ClickHouseMigrator +ClickHouseNIO +ClickHouseVapor +ClickVisual ClickableSquare +CloudDetails CodeBlock CodeLLDB +Codecs +CollapsingMergeTree +Combinators Compat +CompiledExpressionCacheBytes +CompiledExpressionCacheCount +ComplexKeyCache +ComplexKeyDirect +ComplexKeyHashed Config ConnectionDetails +Const +ContextLockWait Contrib +Covid +Cramer's +Criteo +Crotty +Crowdsourced Ctrl +CurrentMetrics CustomSeparated CustomSeparatedWithNames CustomSeparatedWithNamesAndTypes +DBAs DBMSs +DBeaver +DDLWORKER +DDLWorker +DDLWorkerThreads +DDLWorkerThreadsActive +DECRYPT +DESC +DOGEFI +DataGrip +DataLens +DataTime +DataTypes +DatabaseCatalog +DatabaseCatalogThreads +DatabaseCatalogThreadsActive +DatabaseOnDisk +DatabaseOnDiskThreads +DatabaseOnDiskThreadsActive +DatabaseOrdinaryThreads +DatabaseOrdinaryThreadsActive DateTime DateTimes +DbCL +Decrypted +Deduplicate +Deduplication +DelayedInserts +DeliveryTag +DeltaLake +Denormalize +DestroyAggregatesThreads +DestroyAggregatesThreadsActive +DictCacheRequests +DiskAvailable +DiskObjectStorage +DiskObjectStorageAsyncThreads +DiskObjectStorageAsyncThreadsActive +DiskSpaceReservedForMerge +DiskTotal +DiskUnreserved +DiskUsed +DistributedFilesToInsert +DistributedSend DockerHub +DoubleDelta Doxygen +ECMA +Ecto +EdgeAngle +EdgeLengthKm +EdgeLengthM +EmbeddedRocksDB +Embeddings Encodings Enum +Enums Eoan +EphemeralNode +Ethereum +ExactEdgeLengthKm +ExactEdgeLengthM +ExactEdgeLengthRads +ExecutablePool +ExtType +ExternalDistributed +FFFD +FIPS +FOSDEM +FQDN +Failover +FarmHash +FilesystemCacheBytes +FilesystemCacheElements +FilesystemCacheFiles +FilesystemCacheReadBuffers +FilesystemCacheSize +FilesystemLogsPathAvailableBytes +FilesystemLogsPathAvailableINodes +FilesystemLogsPathTotalBytes +FilesystemLogsPathTotalINodes +FilesystemLogsPathUsedBytes +FilesystemLogsPathUsedINodes +FilesystemMainPathAvailableBytes +FilesystemMainPathAvailableINodes +FilesystemMainPathTotalBytes +FilesystemMainPathTotalINodes +FilesystemMainPathUsedBytes +FilesystemMainPathUsedINodes FixedString +Flink +ForEach FreeBSD Fuzzer Fuzzers +GTID GTest Gb +Gbit Gcc +GenerateRandom +GeoCoord +Geobases +Geohash +Geoid +GetBaseCell +GetDestinationIndexFromUnidirectionalEdge +GetFaces +GetIndexesFromUnidirectionalEdge +GetNeighbors +GetOriginIndexFromUnidirectionalEdge +GetPentagonIndexes +GetRes +GetResolution +GetUnidirectionalEdge +GetUnidirectionalEdgeBoundary +GetUnidirectionalEdgesFromHexagon +GitLab +GlobalThread +GlobalThreadActive +GoLand GoogleTest +Grafana +GraphQL +GraphiteMergeTree +Greenwald HDDs +HHMM +HMAC +HSTS +HTTPConnection +HTTPThreads +HashedDictionary +HashedDictionaryThreads +HashedDictionaryThreadsActive +Haversine Heredoc +HexAreaKm +HexAreaM +HexRing +Holistics Homebrew Homebrew's HorizontalDivide Hostname +HouseOps +Hudi +HyperLogLog +Hypot +IANA +IMDS +INFILE INSERTed +INSERTs +IOPrefetchThreads +IOPrefetchThreadsActive +IOThreads +IOThreadsActive +IOUringInFlightEvents +IOUringPendingEvents +IOWriterThreads +IOWriterThreadsActive +IPTrie IPv +Identifiant +Incrementing +IndexesAreNeighbors +InfluxDB +Instana IntN Integrations +IntelliJ +InterserverConnection +InterserverThreads +IsPentagon +IsResClassIII +IsValid +JBOD +JOINed +JOINs +JSONArrayLength JSONAsObject JSONAsString JSONColumns @@ -79,56 +377,296 @@ JSONCompactStringsEachRowWithNames JSONCompactStringsEachRowWithNamesAndTypes JSONEachRow JSONEachRowWithProgress +JSONExtract +JSONExtractArrayRaw +JSONExtractBool +JSONExtractFloat +JSONExtractInt +JSONExtractKeys +JSONExtractKeysAndValues +JSONExtractKeysAndValuesRaw +JSONExtractRaw +JSONExtractString +JSONExtractUInt +JSONHas +JSONLength JSONObjectEachRow JSONStrings JSONStringsEachRow JSONStringsEachRowWithProgress +JSONType JSONs Jaeger +Jannis +JavaHash Jemalloc Jepsen +JetBrains +Jitter +Joda +JumpConsistentHash +Jupyter KDevelop +KafkaAssignedPartitions +KafkaBackgroundReads +KafkaConsumers +KafkaConsumersInUse +KafkaConsumersWithAssignment +KafkaLibrdkafkaThreads +KafkaProducers +KafkaWrites +Kahan +KeeperAliveConnections +KeeperMap +KeeperOutstandingRequets +Kerberos +Khanna +KittenHouse +Klickhouse +Kolmogorov +Kubernetes +LDAP LGPL LLDB LLVM's LOCALTIME LOCALTIMESTAMP +LONGLONG LibFuzzer +LightHouse LineAsString +Linf +LinfDistance +LinfNorm +LinfNormalize LinksDeployment +Linq +LoadAverage +LocalThread +LocalThreadActive +LogQL +Logstash +LookML LowCardinality +LpDistance +LpNorm +LpNormalize +Luebbe +Lyft +MACNumToString +MACStringToNum +MACStringToOUI +MEDIUMINT MEMTABLE +MMapCacheCells +MMappedAllocBytes +MMappedAllocs +MMappedFileBytes +MMappedFiles +MSSQL MSan MVCC +MacBook MacOS +MarkCacheBytes +MarkCacheFiles +MarksLoaderThreads +MarksLoaderThreadsActive +MaterializedMySQL +MaterializedPostgreSQL +MaterializedView +MaxDDLEntryID +MaxMind +MaxPartCountForPartition +MaxPushedDDLEntryID +Mbps Memcheck +MemoryCode +MemoryDataAndStack +MemoryResident MemorySanitizer +MemoryShared +MemoryTracking +MemoryVirtual +MergeJoin +MergeState MergeTree +MergeTreeAllRangesAnnouncementsSent +MergeTreeBackgroundExecutor +MergeTreeBackgroundExecutorThreads +MergeTreeBackgroundExecutorThreadsActive +MergeTreeDataSelectExecutor +MergeTreeDataSelectExecutorThreads +MergeTreeDataSelectExecutorThreadsActive +MergeTreePartsCleanerThreads +MergeTreePartsCleanerThreadsActive +MergeTreePartsLoaderThreads +MergeTreePartsLoaderThreadsActive +MergeTreeReadTaskRequestsSent +MergeTreeSettings MessagePack +Metastore +MetroHash MiB +Milli +Milovidov +MinHash +MinIO +MinMax +MindsDB +Mongodb MsgPack +MultiPolygon Multiline +Multiqueries Multithreading +Multiword +MurmurHash +MySQLConnection MySQLDump +MySQLThreads +NATS +NCHAR NEKUDOTAYIM +NEWDATE +NEWDECIMAL +NFKC +NFKD NULLIF NVME +NVMe NYPD +NaNs +Nagios +Namenode +NamesAndTypesList +Nano +Nesterov +NetworkReceive +NetworkReceiveBytes +NetworkReceiveDrop +NetworkReceiveErrors +NetworkReceivePackets +NetworkSend +NetworkSendBytes +NetworkSendDrop +NetworkSendErrors +NetworkSendPackets +NodeJs NuRaft +NumHexagons +NumToString +NumToStringClassC +NumberOfDatabases +NumberOfDetachedByUserParts +NumberOfDetachedParts +NumberOfTables +OFNS OLAP OLTP +OSContextSwitches +OSGuestNiceTime +OSGuestNiceTimeCPU +OSGuestNiceTimeNormalized +OSGuestTime +OSGuestTimeCPU +OSGuestTimeNormalized +OSIOWaitTime +OSIOWaitTimeCPU +OSIOWaitTimeNormalized +OSIdleTime +OSIdleTimeCPU +OSIdleTimeNormalized +OSInterrupts +OSIrqTime +OSIrqTimeCPU +OSIrqTimeNormalized +OSMemoryAvailable +OSMemoryBuffers +OSMemoryCached +OSMemoryFreePlusCached +OSMemoryFreeWithoutCached +OSMemoryTotal +OSNiceTime +OSNiceTimeCPU +OSNiceTimeNormalized +OSOpenFiles +OSProcessesBlocked +OSProcessesCreated +OSProcessesRunning +OSSoftIrqTime +OSSoftIrqTimeCPU +OSSoftIrqTimeNormalized +OSStealTime +OSStealTimeCPU +OSStealTimeNormalized +OSSystemTime +OSSystemTimeCPU +OSSystemTimeNormalized +OSThreadsRunnable +OSThreadsTotal +OSUptime +OSUserTime +OSUserTimeCPU +OSUserTimeNormalized +OTLP +OUTFILE ObjectId Observability +Octonica Ok +OnTime +OpenCelliD +OpenFileForRead +OpenFileForWrite OpenSSL OpenSUSE +OpenSky OpenStack OpenTelemetry +OrDefault +OrNull +OrZero +OvercommitTracker PAAMAYIM +PCRE +PREWHERE +PROCESSLIST +PagerDuty +ParallelFormattingOutputFormatThreads +ParallelFormattingOutputFormatThreadsActive +ParallelParsingInputFormat +ParallelParsingInputFormatThreads +ParallelParsingInputFormatThreadsActive +Parametrized ParquetMetadata Parsers +PartMutation +Partitioner +PartsActive +PartsCommitted +PartsCompact +PartsDeleteOnDestroy +PartsDeleting +PartsInMemory +PartsOutdated +PartsPreActive +PartsPreCommitted +PartsTemporary +PartsWide +PendingAsyncInsert +Percona +PhpStorm +PlantUML +PointDistKm +PointDistM +PointDistRads +PostgreSQLConnection +PostgreSQLThreads Postgres +PostgresSQL Precompiled +Preprocess PrettyCompact PrettyCompactMonoBlock PrettyCompactNoEscapes @@ -141,16 +679,83 @@ PrettySpace PrettySpaceMonoBlock PrettySpaceNoEscapes PrettySpaceNoEscapesMonoBlock +Prewhere +PrivateKeyPassphraseHandler +ProfileEvents +Profiler +Proleptic +PromHouse +PromQL +Promql +Promtail Protobuf ProtobufSingle +ProxySQL +PyCharm QEMU QTCreator +Quantile QueryCacheHits QueryCacheMisses +QueryPreempted +QueryThread +QuoteMeta RBAC +RClickHouse +RHEL +ROLLUP +RWLock +RWLockActiveReaders +RWLockActiveWriters +RWLockWaitingReaders +RWLockWaitingWriters +RabbitMQ +RangeHashed RawBLOB +ReDoS +ReadTaskRequestsSent +ReadonlyReplica +RecipeNLG +Recompressing +Recompression +RectAdd +RectContains +RectIntersection +RectUnion RedHat +Redash +Reddit +Refactorings +ReferenceKeyed +RegexpTree +RemoteRead +ReplacingMergeTree +ReplicasMaxAbsoluteDelay +ReplicasMaxInsertsInQueue +ReplicasMaxMergesInQueue +ReplicasMaxQueueSize +ReplicasMaxRelativeDelay +ReplicasSumInsertsInQueue +ReplicasSumMergesInQueue +ReplicasSumQueueSize +ReplicatedAggregatingMergeTree +ReplicatedChecks +ReplicatedCollapsingMergeTree +ReplicatedFetch +ReplicatedGraphiteMergeTree ReplicatedMergeTree +ReplicatedReplacingMergeTree +ReplicatedSend +ReplicatedSummingMergeTree +ReplicatedVersionedCollapsingMergeTree +Resample +RestartReplicaThreads +RestartReplicaThreadsActive +RestoreThreads +RestoreThreadsActive +RoaringBitmap +RocksDB +Rollup RowBinary RowBinaryWithNames RowBinaryWithNamesAndTypes @@ -158,19 +763,93 @@ Runtime SATA SELECTs SERIALIZABLE +SIGTERM SIMD SLES +SLRU SMALLINT +SPNEGO +SQEs +SQLAlchemy +SQLConsoleDetail SQLInsert SQLSTATE +SSDCache +SSDComplexKeyCache +SSDs +SSLManager +SSRF SSSE +SaaS +Sanjeev +Sankey +Scalable +Scatterplot +Schaefer Schemas +Schwartzian +SeasClick +SeekTable SelfManaged +Sematext +SendExternalTables +SendScalars +ShareAlike +SimHash +Simhash +SimpleAggregateFunction +SimpleState +SipHash +Smirnov's +Smirnov'test +Soundex +SpanKind +Spearman's +StartTLS +StartTime +StartupSystemTables +StartupSystemTablesThreads +StartupSystemTablesThreadsActive Stateful +StorageBufferBytes +StorageBufferRows +StorageDistributed +StorageDistributedThreads +StorageDistributedThreadsActive +StorageHive +StorageHiveThreads +StorageHiveThreadsActive +StorageODBC +StorageS +StringToNum +StringToNumOrDefault +StringToNumOrNull +StripeLog +Stripelog +Strohmeier +Subcolumns +Subexpression Submodules Subqueries +Substrings +SummingMergeTree +SuperSet +Superset +SupersetDocker +SystemReplicasThreads +SystemReplicasThreadsActive +TABLUM +TCPConnection +TCPThreads +TINYINT +TKSV +TLSv +TPCH +TSDB TSVRaw +TSVs TSan +TThe TabItem TabSeparated TabSeparatedRaw @@ -178,166 +857,756 @@ TabSeparatedRawWithNames TabSeparatedRawWithNamesAndTypes TabSeparatedWithNames TabSeparatedWithNamesAndTypes +Tabix +TablesLoaderThreads +TablesLoaderThreadsActive +TablesToDropQueueSize TargetSpecific +Telegraf TemplateIgnoreSpaces +TemporaryFilesForAggregation +TemporaryFilesForJoin +TemporaryFilesForSort +TemporaryFilesUnknown Testflows Tgz +Theil's +ThreadPoolFSReaderThreads +ThreadPoolFSReaderThreadsActive +ThreadPoolRemoteFSReaderThreads +ThreadPoolRemoteFSReaderThreadsActive +ThreadsActive +ThreadsInOvercommitTracker +Timeunit +TinyLog +Tkachenko +ToCenterChild +ToChildren +ToGeo +ToGeoBoundary +ToIPv +ToParent +ToSnowflake +ToString Toolset +TopK +TotalBytesOfMergeTreeTables +TotalPartsOfMergeTreeTables +TotalRowsOfMergeTreeTables +TotalTemporaryFiles Tradeoff Transactional TwoColumnList UBSan +UDFs UInt UIntN +ULID +ULIDStringToDateTime +UMTS +UNDROP UPDATEs +URIs +URL's +URLHash +URLHierarchy +URLPathHierarchy +UUIDNumToString +UUIDStringToNum +UUIDs +UUid +Uber Uint +UncompressedCacheBytes +UncompressedCacheCells +UnidirectionalEdgeIsValid +UniqThetaSketch Updatable +Uppercased +Uptime +Uptrace +UserID Util +VARCHAR +VIEWs +Vadim Valgrind Vectorized +VersionInteger +VersionedCollapsingMergeTree VideoContainer ViewAllLink VirtualBox +WALs +Welch's Werror +Wether +WikiStat +WindowView +WithNames WithNamesAndTypes Woboq +WordNet WriteBuffer WriteBuffers XCode +XHTML +XORs +Xeon YAML +YAMLRegExpTree YYYY +Yandex Yasm +Zabbix Zipkin ZooKeeper ZooKeeper's +ZooKeeperRequest +ZooKeeperSession +ZooKeeperWatch +ZooKeepers aarch +accurateCast +accurateCastOrDefault +accurateCastOrNull +acos +acosh +activecube +activerecord +addDays +addHours +addMinutes +addMonths +addQuarters +addSeconds +addWeeks +addYears +addr +addressToLine +addressToLineWithInlines +addressToSymbol +adviced +aggregatefunction +aggregatingmergetree +aggregatio +aggretate +aiochclient allocator +alphaTokens +amplab analytics +anonymize anonymized ansi +anyHeavy +anyIf +anyLast +anyheavy +anylast +appendTrailingCharIfAbsent +argMax +argMin +argmax +argmin +arrayAUC +arrayAll +arrayAvg +arrayCompact +arrayConcat +arrayCount +arrayCumSum +arrayCumSumNonNegative +arrayDifference +arrayDistinct +arrayElement +arrayEnumerate +arrayEnumerateDense +arrayEnumerateUniq +arrayExists +arrayFill +arrayFilter +arrayFirst +arrayFirstIndex +arrayFlatten +arrayIntersect +arrayJoin +arrayLast +arrayLastIndex +arrayMap +arrayMax +arrayMin +arrayPartialReverseSort +arrayPartialSort +arrayPopBack +arrayPopFront +arrayProduct +arrayPushBack +arrayPushFront +arrayReduce +arrayReduceInRanges +arrayResize +arrayReverse +arrayReverseFill +arrayReverseSort +arrayReverseSplit +arraySlice +arraySort +arraySplit +arrayStringConcat +arraySum +arrayUniq +arrayWithConstant +arrayZip +ascii +asin +asinh +assumeNotNull async +asynch +atan +atanh atomicity +auth +authenticator +authenticators +autocompletion +autodetect +autodetected autogenerated autogeneration autostart +avgWeighted +avgweighted avro avx aws backend backoff backticks +balancer +basename +bcrypt benchmarking +bfloat +binlog +bitAnd +bitCount +bitHammingDistance +bitNot +bitOr +bitPositionsToArray +bitRotateLeft +bitRotateRight +bitShiftLeft +bitShiftRight +bitSlice +bitTest +bitTestAll +bitTestAny +bitXor +bitmapAnd +bitmapAndCardinality +bitmapAndnot +bitmapAndnotCardinality +bitmapBuild +bitmapCardinality +bitmapContains +bitmapHasAll +bitmapHasAny +bitmapMax +bitmapMin +bitmapOr +bitmapOrCardinality +bitmapSubsetInRange +bitmapSubsetLimit +bitmapToArray +bitmapTransform +bitmapXor +bitmapXorCardinality +bitmask +bitmaskToArray +bitmaskToList +bitov blake +blockNumber +blockSerializedSize blockSize +blockinfo +blockreader +blocksize +bool boolean bools boringssl +boundingRatio +bozerkins +broadcasted brotli bson bsoneachrow +buffersize +buildId buildable +builtins +byteSize +bytebase +bytesToCutForIPv +cLoki +caConfig +cacheSessions +cachesize camelCase capn capnproto +cardinalities cardinality +cartesian cassandra +casted +catboost +catboostEvaluate +categoricalInformationValue +categoricalinformationvalue +cathetus cbindgen +cbrt ccache cctz +ceil +centroid +certificateFile +cetera cfg +chadmin changelog changelogs charset charsets +chconn checkouting checksummed checksumming checksums +childern +chproxy +chunksize +cickhouse +cipherList +ciphertext +cityHash cityhash cli +clickcat clickhouse +clickhousedb +clickhousex +clickmate clickstream +clickvisual +clockhour +clusterAllReplicas cmake codebase codec +codecs +codepoint +codepoints +collapsingmergetree +combinator +combinators comparising +compressability +concat +concatAssumeInjective +concatWithSeparator +concatWithSeparatorAssumeInjective +cond +conf config configs +congruential +conjuction +conjuctive +const contrib +convertCharset coroutines +cosineDistance +countDigits +countEqual +countMatches +countSubstrings +covarPop +covarSamp +covariates +covarpop +covarsamp +covid cpp cppkafka cpu +cramersV +cramersVBiasCorrected +cramersv +cramersvbiascorrected +criteo crlf croaring cronjob +cryptocurrencies +cryptocurrency +cryptographic csv csvwithnames csvwithnamesandtypes +currentDatabase +currentProfiles +currentRoles +currentUser +customizable +customizations customseparated customseparatedwithnames customseparatedwithnamesandtypes +cutFragment +cutIPv +cutQueryString +cutQueryStringAndFragment +cutToFirstSignificantSubdomain +cutToFirstSignificantSubdomainCustom +cutToFirstSignificantSubdomainCustomWithWWW +cutToFirstSignificantSubdomainWithWWW +cutURLParameter +cutWWW cyrus datacenter +datacenters datafiles +datagrip +datalens +datanode dataset datasets +datasource +datatypes +dateName +dateTime +dateTimeToSnowflake datetime datetimes +dayofyear +dbal +dbeaver +dbgen dbms ddl deallocation +deallocations debian +decodeURLComponent +decodeURLFormComponent +decodeXMLComponent decompressor +decrypt +decrypted +decrypts +deduplicate +deduplicated +deduplicating +deduplication +defaultProfiles +defaultRoles +defaultValueOfArgumentType +defaultValueOfTypeName +deltaLake +deltaSum +deltaSumTimestamp +deltalake +deltasum +deltasumtimestamp +demangle +denormalize +denormalized denormalizing denormals deserialization deserialized +deserializing destructor destructors +detectCharset +detectLanguage +detectLanguageMixed +detectLanguageUnknown +determinator +deterministically +dictGet +dictGetChildren +dictGetDescendant +dictGetHierarchy +dictGetOrDefault +dictGetOrNull +dictGetUUID +dictHas +dictIsIn +disableProtocols +disjunction +disjunctions +displaySecretsInShowAndSelect +distro +divideDecimal dmesg +domainWithoutWWW dont +dotProduct +dplyr dragonbox +dropoff +dumpColumnStructure durations +ecto +embeddings +emptyArray +emptyArrayDate +emptyArrayDateTime +emptyArrayFloat +emptyArrayInt +emptyArrayString +emptyArrayToSingle +emptyArrayUInt +enabledProfiles +enabledRoles +encodeURLComponent +encodeURLFormComponent +encodeXMLComponent encodings +encryptions endian +endsWith enum +enum's +enums +erfc +errorCodeToName +evalMLMethod exFAT +expiryMsec +exponentialMovingAverage +exponentialmovingaverage +expr +exprN +extendedVerification +extractAll +extractAllGroups +extractAllGroupsHorizontal +extractAllGroupsVertical +extractKeyValuePairs +extractKeyValuePairsWithEscaping +extractTextFromHTML +extractURLParameter +extractURLParameterNames +extractURLParameters +failover +farmFingerprint +farmHash fastops fcoverage filesystem +filesystemAvailable +filesystemCapacity +filesystemFree filesystems +finalizeAggregation +fips +firstSignificantSubdomain +firstSignificantSubdomainCustom +fixedstring +flamegraph flatbuffers +flink +fluentd fmtlib +formatDateTime +formatDateTimeInJoda +formatDateTimeInJodaSyntax +formatReadableDecimalSize +formatReadableQuantity +formatReadableSize +formatReadableTimeDelta +formatRow +formatRowNoNewline +formated formatschema formatter +freezed +fromModifiedJulianDay +fromModifiedJulianDayOrNull +fromUnixTimestamp +fromUnixTimestampInJodaSyntax fsync +func +fuzzBits fuzzer fuzzers gRPC +gccMurmurHash gcem +generateRandom +generateRandomStructure +generateULID +generateUUIDv +geoDistance +geoToH +geoToS +geobase +geobases +geocode +geohash +geohashDecode +geohashEncode +geohashesInBox +geoip +geospatial +getMacro +getOSKernelVersion +getServerPort +getSetting +getSizeOfEnumType +getblockinfo +getevents github glibc +globalIn +globalNotIn +glushkovds +golang googletest +grafana +graphitemergetree +graphouse +graphql +greatCircleAngle +greatCircleDistance +greaterOrEquals +greenspace +groupArray +groupArrayInsertAt +groupArrayLast +groupArrayMovingAvg +groupArrayMovingSum +groupArraySample +groupBitAnd +groupBitOr +groupBitXor +groupBitmap +groupBitmapAnd +groupBitmapOr +groupBitmapXor +groupUniqArray +grouparray +grouparrayinsertat +grouparraylast +grouparraymovingavg +grouparraymovingsum +grouparraysample +groupbitand +groupbitmap +groupbitmapand +groupbitmapor +groupbitmapxor +groupbitor +groupbitxor +groupuniqarray grpc grpcio gtest +gtid +gzip +gzipped +hadoop +halfMD +halfday hardlinks +hasAll +hasAny +hasColumnInTable +hasSubstr +hasToken +hasTokenCaseInsensitive +hasTokenCaseInsensitiveOrNull +hasTokenOrNull +hashtables +haversine +hdbc hdfs +hdfsCluster heredoc heredocs +hiveHash +holistics homebrew +hopEnd +hopStart +horgh +hostName +hostname +hostnames +houseops +hsts +html http https +hudi hyperscan +hypot +hyvor +icosahedron icudata +idempotency +ifNotFinite +ifNull +iframe +ilike +incrementing +indexHint +indexOf +infi +initialQueryID +initializeAggregation +injective +innogames +inodes instantiation +intDiv +intDivOrZero +intExp +intHash integrational integrations interserver +intervalLengthSum +invalidCertificateHandler invariants +invertedindexes +isConstant +isDecimalOverflow +isFinite +isIPAddressInRange +isIPv +isInfinite +isNaN +isNotNull +isNull +isValidJSON +isValidUTF +iteratively +javaHash +javaHashUTF +jbod jdbc jemalloc +joinGet json jsonasstring jsoncolumns @@ -357,14 +1626,44 @@ jsonobjecteachrow jsonstrings jsonstringseachrow jsonstringseachrowwithprogress +jumpConsistentHash +kRing kafka +kafkaMurmurHash kafkacat +keepermap +kerberized +kerberos +kernal +keyspace +keytab +kittenhouse +kolmogorovSmirnovTest +kolmogorovsmirnovtest +kolya konsole +kurtPop +kurtSamp +kurtosis +kurtpop +kurtsamp laion +lang +laravel latencies +ldap +leftPad +leftPadUTF +lemmatization +lemmatize +lemmatized +lengthUTF +lessOrEquals lexicographically +lgamma libFuzzer libc +libcatboost libcpuid libcxx libcxxabi @@ -383,54 +1682,261 @@ libuv libvirt linearizability linearizable +linearized lineasstring linefeeds lineorder linux llvm +loadDefaultCAFile localhost +localread +logTrace +logagent +loghouse +london +lowcardinality +lowerUTF +lowercased +lzma macOS +mailrugo +mailto +makeDate +makeDateTime +mannWhitneyUTest +mannwhitneyutest +mapAdd +mapAll +mapApply +mapConcat +mapContains +mapContainsKeyLike +mapExists +mapExtractKeyLike +mapFilter +mapFromArrays +mapKeys +mapPopulateSeries +mapReverseSort +mapSort +mapSubtract +mapUpdate +mapValues +mappedfile mariadb +matcher +materializedview +maxIntersections +maxIntersectionsPosition +maxMap +maxintersections +maxintersectionsposition +maxmap +maxmind mdadm +meanZTest +meanztest +mebibytes +mergeable +mergetree +messageID +metacharacters +metasymbols +metrica +metroHash +mfedotov +minMap +mindsdb +minimalistic +mininum miniselect +minmap +minmax +mins +misconfiguration +mispredictions +mmap +mmapped +moduloOrZero +mongodb +monthName +moscow msgpack msgpk +multiFuzzyMatchAllIndices +multiFuzzyMatchAny +multiFuzzyMatchAnyIndex +multiIf +multiMatchAllIndices +multiMatchAny +multiMatchAnyIndex +multiSearchAllPositions +multiSearchAllPositionsUTF +multiSearchAny +multiSearchFirstIndex +multiSearchFirstPosition +multibyte multiline +multiplyDecimal +multipolygon +multisets multithread +multiword +munmap +murmurHash murmurhash +musqldump mutex +mydb +myfilter mysql mysqldump mysqljs +mytable +namedatabases +namenetworks +namenode +namepassword +nameprofile +namequota +namespaces natively +nats +nestjs +netloc +ngram +ngramDistance +ngramMinHash +ngramMinHashArg +ngramMinHashArgCaseInsensitive +ngramMinHashArgCaseInsensitiveUTF +ngramMinHashArgUTF +ngramMinHashCaseInsensitive +ngramMinHashCaseInsensitiveUTF +ngramMinHashUTF +ngramSearch +ngramSimHash +ngramSimHashCaseInsensitive +ngramSimHashCaseInsensitiveUTF +ngramSimHashUTF +ngrambf +ngrams +nonNegativeDerivative noop +normalizeQuery +normalizeUTF +normalizedQueryHash +notEmpty +notEquals +notILike +notIn +notLike +notretry +nowInBlock +ntile +nullIf nullability nullable +nullables num +numerics +nypd obfuscator +observability odbc ok +omclickhouse +onstraints +ontime +openSSL openSUSE openldap +opensky +openssl opentelemetry +outfile overcommit +overcommitted +overfitting +packetpool +packetsize +pageviews +pandahouse parallelization parallelize parallelized +params +parseDateTime +parseDateTimeBestEffort +parseDateTimeBestEffortOrNull +parseDateTimeBestEffortOrZero +parseDateTimeBestEffortUS +parseDateTimeBestEffortUSOrNull +parseDateTimeBestEffortUSOrZero +parseDateTimeInJodaSyntax +parseDateTimeInJodaSyntaxOrNull +parseDateTimeInJodaSyntaxOrZero +parseDateTimeOrNull +parseDateTimeOrZero +parseTimeDelta +parseable parsers +pathFull pclmulqdq +pcre performant +perl +persistency +phpclickhouse +pipelining +plaintext +plantuml poco +pointInEllipses +pointInPolygon +polygonAreaCartesian +polygonAreaSpherical +polygonConvexHullCartesian +polygonPerimeterCartesian +polygonPerimeterSpherical +polygonsDistanceCartesian +polygonsDistanceSpherical +polygonsEqualsCartesian +polygonsIntersectionCartesian +polygonsIntersectionSpherical +polygonsSymDifferenceCartesian +polygonsSymDifferenceSpherical +polygonsUnionCartesian +polygonsUnionSpherical +polygonsWithinCartesian +polygonsWithinSpherical popcnt +porthttps +positionCaseInsensitive +positionCaseInsensitiveUTF +positionUTF +positiveModulo postfix postfixes postgresql pre +pread +preallocate prebuild prebuilt preemptable +preferServerCiphers +prefertch +prefetch +prefetchsize preloaded +prepend +prepended +prepends +preprocess preprocessed +preprocessing preprocessor presentational prestable @@ -446,77 +1952,317 @@ prettyspace prettyspacemonoblock prettyspacenoescapes prettyspacenoescapesmonoblock +prewhere +privateKeyFile +privateKeyPassphraseHandler prlimit +procfs +profiler prometheus proto protobuf protobufsingle +proxied +pseudorandom +pseudorandomize psql ptrs +pushdown +pwrite py +qryn +quantile +quantileBFloat +quantileDeterministic +quantileExact +quantileExactExclusive +quantileExactHigh +quantileExactInclusive +quantileExactLow +quantileExactWeighted +quantileGK +quantileInterpolatedWeighted +quantileTDigest +quantileTDigestWeighted +quantileTiming +quantileTimingWeighted +quantilebfloat +quantiledeterministic +quantileexact +quantileexactweighted +quantiles +quantilesExactExclusive +quantilesExactInclusive +quantilesGK +quantilesTimingWeighted +quantiletdigest +quantiletdigestweighted +quantiletiming +quantiletimingweighted +quartile +queryID +queryString +queryStringAndFragment +rabbitmq +raduis +randBernoulli +randBinomial +randCanonical +randChiSquared +randConstant +randExponential +randFisherF +randLogNormal +randNegativeBinomial +randNormal +randPoisson +randStudentT +randUniform +randomFixedString +randomPrintableASCII +randomString +randomStringUTF +rankCorr rapidjson rawblob +readWKTMultiPolygon +readWKTPolygon readahead readline readme readonly +rebalance rebalanced +recency +recompress +recompressed +recompressing +recompression +reconnection +recurse +redash +reddit +redisstreams +refcounter +regexpExtract +regexpQuoteMeta +regionHierarchy +regionIn +regionToArea +regionToCity +regionToContinent +regionToCountry +regionToDistrict +regionToName +regionToPopulation +regionToTopContinent +reinitialization +reinitializing +reinterpretAs +reinterpretAsDate +reinterpretAsDateTime +reinterpretAsFixedString +reinterpretAsFloat +reinterpretAsInt +reinterpretAsString +reinterpretAsUInt +reinterpretAsUUID +remoteSecure +replaceAll +replaceOne +replaceRegexpAll +replaceRegexpOne +replacingmergetree +replicatable +replicatedmergetree replxx repo representable requestor +requireTLSv +resharding +reshards resultset +retentions rethrow +retransmit retriable +retuned +reverseDNSQuery +reverseUTF +rightPad +rightPadUTF risc riscv ro +roadmap rocksdb +rollup +roundAge +roundBankers +roundDown +roundDuration +roundToExp +routineley +rowNumberInAllBlocks rowNumberInBlock rowbinary rowbinarywithnames rowbinarywithnamesandtypes rsync +rsyslog runnable runningAccumulate +runningConcurrency +runningDifference +runningDifferenceStartingWithFirstValue runtime russian rw sasl +satisfiable +scala schemas +seekable +seektable +sequenceCount +sequenceMatch +sequenceNextNode +serverUUID +sessionCacheSize +sessionIdContext +sessionTimeout +seva +shardCount +shardNum +sharded +sharding +shortcircuit +shoutout simdjson +simpleLinearRegression +simpleaggregatefunction +simplelinearregression +simpod +singlepart +sinh +sipHash +siphash +skewPop +skewSamp +skewness +skewpop +skewsamp skippingerrors +sleepEachRow +snowflakeToDateTime +socketcache +soundex +sparkbar sparsehash +speedscope +splitByChar +splitByNonAlpha +splitByRegexp +splitByString +splitByWhitespace sql +sqlalchemy sqlinsert +sqlite +sqrt src +stacktrace stacktraces +startsWith statbox stateful +stddev +stddevPop +stddevSamp +stddevpop +stddevsamp stderr stdin stdout +stochasticLinearRegression +stochasticLogisticRegression +stochastically +stochasticlinearregression +stochasticlogisticregression +storages +storig +stringToH +stripelog strtod strtoll strtoull +struct structs +studentTTest +studentttest +subBitmap +subarray +subarrays +subcolumn +subcolumns subdirectories +subdirectory +subexpression subexpressions +subfolder +subinterval +subintervals +subkey +submatch submodule submodules +subnet +subnetwork subpattern subpatterns subqueries subquery +subranges +subreddits subseconds substring +substringUTF +substrings +subtitiles +subtractDays +subtractHours +subtractMinutes +subtractMonths +subtractQuarters +subtractSeconds +subtractWeeks +subtractYears subtree subtype sudo +sumCount +sumKahan +sumMap +sumMapFiltered +sumWithOverflow +sumcount +sumkahan +summap +summingmergetree +sumwithoverflow +superaggregates +supremum symlink symlinks syntaxes +syscall +syscalls +syslog +syslogd systemd +tabix +tablum tabseparated tabseparatedraw tabseparatedrawwithnames @@ -524,50 +2270,257 @@ tabseparatedrawwithnamesandtypes tabseparatedwithnames tabseparatedwithnamesandtypes tcp +tcpPort +tcpnodelay templateignorespaces +tgamma tgz th +theilsU +theilsu +themself +threadpool +throwIf +timeSlot +timeSlots +timeZone +timeZoneOf +timeZoneOffset +timezones +tinylog tmp +toColumnTypeName +toDate +toDateOrDefault +toDateOrNull +toDateOrZero +toDateTime +toDateTimeOrDefault +toDateTimeOrNull +toDateTimeOrZero +toDayOfMonth +toDayOfWeek +toDayOfYear +toDecimal +toDecimalString +toFixedString +toFloat +toHour +toIPv +toISOWeek +toISOYear +toInt +toInterval +toJSONString +toLastDayOfMonth +toLastDayOfWeek +toLowCardinality +toMinute +toModifiedJulianDay +toModifiedJulianDayOrNull +toMonday +toMonth +toNullable +toQuarter +toRelativeDayNum +toRelativeHourNum +toRelativeMinuteNum +toRelativeMonthNum +toRelativeQuarterNum +toRelativeSecondNum +toRelativeWeekNum +toRelativeYearNum +toSecond +toStartOfDay +toStartOfFifteenMinutes +toStartOfFiveMinutes +toStartOfHour +toStartOfISOYear +toStartOfInterval +toStartOfMinute +toStartOfMonth +toStartOfQuarter +toStartOfSecond +toStartOfTenMinutes +toStartOfWeek +toStartOfYear +toString +toStringCutToZero +toTime +toTimeZone +toType +toTypeName +toUInt +toUUID +toUUIDOrDefault +toUUIDOrNull +toUUIDOrZero +toUnixTimestamp +toValidUTF +toWeek +toYYYYMM +toYYYYMMDD +toYYYYMMDDhhmmss +toYear +toYearWeek +tokenbf tokenization +tokenized +tokenizer toml toolchain toolset +topK +topKWeighted +topLevelDomain +topk +topkweighted transactional transactionally +translateUTF +translocality +trie +trimBoth +trimLeft +trimRight +trunc +tryBase +tryDecrypt tskv tsv tui +tumbleEnd +tumbleStart +tupleDivide +tupleDivideByNumber +tupleElement +tupleHammingDistance +tupleMinus +tupleMultiply +tupleMultiplyByNumber +tupleNegate +tuplePlus +tupleToNameValuePairs turbostat txt +typename ubuntu uint +ulid unary +unbin +uncomment +undrop +unencoded unencrypted +unescaped +unescaping +unhex +unicode +unidimensional +uniq +uniqCombined +uniqExact +uniqHLL +uniqTheta +uniqThetaIntersect +uniqThetaNot +uniqThetaSketch +uniqThetaUnion +uniqUpTo +uniqcombined +uniqexact +uniqhll +uniqtheta +uniqthetasketch +unix +unixODBC unixodbc unoptimized +unparsed +unrealiable +unreplicated +unresolvable +unrounded +untracked +untrusted +untuple +uploaders +upperUTF +uptime +uptrace +uring url +urlCluster +urls userspace userver utils uuid +varPop +varSamp variadic varint +varpop +varsamp vectorized vectorscan +verificationDepth +verificationMode +versionedcollapsingmergetree +vhost +virtualized +visibleWidth +visitParam +visitParamExtractBool +visitParamExtractFloat +visitParamExtractInt +visitParamExtractRaw +visitParamExtractString +visitParamExtractUInt +visitParamHas wchc wchs webpage webserver +weekyear +welchTTest +welchttest wget +which's whitespace whitespaces +wikistat +windowFunnel +wordShingleMinHash +wordShingleMinHashArg +wordShingleMinHashArgCaseInsensitive +wordShingleMinHashArgCaseInsensitiveUTF +wordShingleMinHashArgUTF +wordShingleMinHashCaseInsensitive +wordShingleMinHashCaseInsensitiveUTF +wordShingleMinHashUTF +wordShingleSimHash +wordShingleSimHashCaseInsensitive +wordShingleSimHashCaseInsensitiveUTF +wordShingleSimHashUTF +wordshingleMinHash wrt xcode +xeus +xkcd +xlarge xml +xxHash xz +yandex +youtube zLib zLinux +zabbix zkcopy zlib +znode znodes +zookeeperSessionUptime zstd diff --git a/utils/check-style/check-doc-aspell b/utils/check-style/check-doc-aspell index d39769aa930..952dbd5b507 100755 --- a/utils/check-style/check-doc-aspell +++ b/utils/check-style/check-doc-aspell @@ -1,5 +1,8 @@ #!/usr/bin/env bash +# force-enable double star globbing +shopt -s globstar + # Perform spell checking on the docs if [[ ${1:-} == "--help" ]] || [[ ${1:-} == "-h" ]]; then