diff --git a/.gitmodules b/.gitmodules index dbca3f3f6bc..f0984fec4db 100644 --- a/.gitmodules +++ b/.gitmodules @@ -267,7 +267,7 @@ url = https://github.com/ClickHouse/nats.c [submodule "contrib/vectorscan"] path = contrib/vectorscan - url = https://github.com/ClickHouse/vectorscan.git + url = https://github.com/VectorCamp/vectorscan.git [submodule "contrib/c-ares"] path = contrib/c-ares url = https://github.com/ClickHouse/c-ares diff --git a/README.md b/README.md index 4a619eb4fd3..bbedea364fc 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,6 @@ curl https://clickhouse.com/ | sh ## Upcoming Events * [**v23.5 Release Webinar**](https://clickhouse.com/company/events/v23-5-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-05) - May 31 - 23.5 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. -* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16 * [**ClickHouse Meetup in Barcelona**](https://www.meetup.com/clickhouse-barcelona-user-group/events/292892669) - May 25 * [**ClickHouse Meetup in London**](https://www.meetup.com/clickhouse-london-user-group/events/292892824) - May 25 * [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/293426725/) - Jun 7 diff --git a/contrib/aws b/contrib/aws index ecccfc026a4..ca02358dcc7 160000 --- a/contrib/aws +++ b/contrib/aws @@ -1 +1 @@ -Subproject commit ecccfc026a42b30023289410a67024d561f4bf3e +Subproject commit ca02358dcc7ce3ab733dd4cbcc32734eecfa4ee3 diff --git a/contrib/aws-c-auth b/contrib/aws-c-auth index 30df6c407e2..97133a2b5db 160000 --- a/contrib/aws-c-auth +++ b/contrib/aws-c-auth @@ -1 +1 @@ -Subproject commit 30df6c407e2df43bd244e2c34c9b4a4b87372bfb +Subproject commit 97133a2b5dbca1ccdf88cd6f44f39d0531d27d12 diff --git a/contrib/aws-c-common b/contrib/aws-c-common index 324fd1d973c..45dcb2849c8 160000 --- a/contrib/aws-c-common +++ b/contrib/aws-c-common @@ -1 +1 @@ -Subproject commit 324fd1d973ccb25c813aa747bf1759cfde5121c5 +Subproject commit 45dcb2849c891dba2100b270b4676765c92949ff diff --git a/contrib/aws-c-event-stream b/contrib/aws-c-event-stream index 39bfa94a14b..2f9b60c42f9 160000 --- a/contrib/aws-c-event-stream +++ b/contrib/aws-c-event-stream @@ -1 +1 @@ -Subproject commit 39bfa94a14b7126bf0c1330286ef8db452d87e66 +Subproject commit 2f9b60c42f90840ec11822acda3d8cdfa97a773d diff --git a/contrib/aws-c-http b/contrib/aws-c-http index 2c5a2a7d555..dd344619879 160000 --- a/contrib/aws-c-http +++ b/contrib/aws-c-http @@ -1 +1 @@ -Subproject commit 2c5a2a7d5556600b9782ffa6c9d7e09964df1abc +Subproject commit dd34461987947672444d0bc872c5a733dfdb9711 diff --git a/contrib/aws-c-io b/contrib/aws-c-io index 5d32c453560..d58ed4f272b 160000 --- a/contrib/aws-c-io +++ b/contrib/aws-c-io @@ -1 +1 @@ -Subproject commit 5d32c453560d0823df521a686bf7fbacde7f9be3 +Subproject commit d58ed4f272b1cb4f89ac9196526ceebe5f2b0d89 diff --git a/contrib/aws-c-mqtt b/contrib/aws-c-mqtt index 882c689561a..33c3455cec8 160000 --- a/contrib/aws-c-mqtt +++ b/contrib/aws-c-mqtt @@ -1 +1 @@ -Subproject commit 882c689561a3db1466330ccfe3b63637e0a575d3 +Subproject commit 33c3455cec82b16feb940e12006cefd7b3ef4194 diff --git a/contrib/aws-c-s3 b/contrib/aws-c-s3 index a41255ece72..d7bfe602d69 160000 --- a/contrib/aws-c-s3 +++ b/contrib/aws-c-s3 @@ -1 +1 @@ -Subproject commit a41255ece72a7c887bba7f9d998ca3e14f4c8a1b +Subproject commit d7bfe602d6925948f1fff95784e3613cca6a3900 diff --git a/contrib/aws-c-sdkutils b/contrib/aws-c-sdkutils index 25bf5cf225f..208a701fa01 160000 --- a/contrib/aws-c-sdkutils +++ b/contrib/aws-c-sdkutils @@ -1 +1 @@ -Subproject commit 25bf5cf225f977c3accc6a05a0a7a181ef2a4a30 +Subproject commit 208a701fa01e99c7c8cc3dcebc8317da71362972 diff --git a/contrib/aws-checksums b/contrib/aws-checksums index 48e7c0e0147..ad53be196a2 160000 --- a/contrib/aws-checksums +++ b/contrib/aws-checksums @@ -1 +1 @@ -Subproject commit 48e7c0e01479232f225c8044d76c84e74192889d +Subproject commit ad53be196a25bbefa3700a01187fdce573a7d2d0 diff --git a/contrib/aws-cmake/CMakeLists.txt b/contrib/aws-cmake/CMakeLists.txt index 52533cd6483..950a0e06cd0 100644 --- a/contrib/aws-cmake/CMakeLists.txt +++ b/contrib/aws-cmake/CMakeLists.txt @@ -52,8 +52,8 @@ endif() # Directories. SET(AWS_SDK_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws") -SET(AWS_SDK_CORE_DIR "${AWS_SDK_DIR}/aws-cpp-sdk-core") -SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/aws-cpp-sdk-s3") +SET(AWS_SDK_CORE_DIR "${AWS_SDK_DIR}/src/aws-cpp-sdk-core") +SET(AWS_SDK_S3_DIR "${AWS_SDK_DIR}/generated/src/aws-cpp-sdk-s3") SET(AWS_AUTH_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-auth") SET(AWS_CAL_DIR "${ClickHouse_SOURCE_DIR}/contrib/aws-c-cal") @@ -118,7 +118,7 @@ configure_file("${AWS_SDK_CORE_DIR}/include/aws/core/SDKConfig.h.in" list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MAJOR=1") list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_MINOR=10") list(APPEND AWS_PUBLIC_COMPILE_DEFS "-DAWS_SDK_VERSION_PATCH=36") - + list(APPEND AWS_SOURCES ${AWS_SDK_CORE_SRC} ${AWS_SDK_CORE_NET_SRC} ${AWS_SDK_CORE_PLATFORM_SRC}) list(APPEND AWS_PUBLIC_INCLUDES diff --git a/contrib/aws-crt-cpp b/contrib/aws-crt-cpp index ec0bea288f4..8a301b7e842 160000 --- a/contrib/aws-crt-cpp +++ b/contrib/aws-crt-cpp @@ -1 +1 @@ -Subproject commit ec0bea288f451d884c0d80d534bc5c66241c39a4 +Subproject commit 8a301b7e842f1daed478090c869207300972379f diff --git a/contrib/aws-s2n-tls b/contrib/aws-s2n-tls index 0f1ba9e5c4a..71f4794b758 160000 --- a/contrib/aws-s2n-tls +++ b/contrib/aws-s2n-tls @@ -1 +1 @@ -Subproject commit 0f1ba9e5c4a67cb3898de0c0b4f911d4194dc8de +Subproject commit 71f4794b7580cf780eb4aca77d69eded5d3c7bb4 diff --git a/contrib/libpqxx b/contrib/libpqxx index a4e83483927..bdd6540fb95 160000 --- a/contrib/libpqxx +++ b/contrib/libpqxx @@ -1 +1 @@ -Subproject commit a4e834839270a8c1f7ff1db351ba85afced3f0e2 +Subproject commit bdd6540fb95ff56c813691ceb5da5a3266cf235d diff --git a/contrib/sparse-checkout/update-aws.sh b/contrib/sparse-checkout/update-aws.sh index c8d4c5a89c2..f86acb54d95 100755 --- a/contrib/sparse-checkout/update-aws.sh +++ b/contrib/sparse-checkout/update-aws.sh @@ -5,8 +5,8 @@ echo "Using sparse checkout for aws" FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout echo '/*' > $FILES_TO_CHECKOUT echo '!/*/*' >> $FILES_TO_CHECKOUT -echo '/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT -echo '/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT +echo '/src/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT +echo '/generated/src/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT git config core.sparsecheckout true git checkout $1 diff --git a/contrib/vectorscan b/contrib/vectorscan index 1f4d448314e..38431d11178 160000 --- a/contrib/vectorscan +++ b/contrib/vectorscan @@ -1 +1 @@ -Subproject commit 1f4d448314e581473103187765e4c949d01b4259 +Subproject commit 38431d111781843741a781a57a6381a527d900a4 diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index e509809c028..5d0a7b50741 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -132,6 +132,9 @@ function run_tests() ADDITIONAL_OPTIONS+=('--report-logs-stats') + clickhouse-test "00001_select_1" > /dev/null ||: + clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" ||: + set +e clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index 10ba597a33a..bd0c59a12cd 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -65,6 +65,9 @@ sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ > /etc/clickhouse-server/config.d/storage_conf.xml.tmp sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml +# it contains some new settings, but we can safely remove it +rm /etc/clickhouse-server/config.d/merge_tree.xml + start stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log @@ -94,6 +97,9 @@ sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \ > /etc/clickhouse-server/config.d/storage_conf.xml.tmp sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml +# it contains some new settings, but we can safely remove it +rm /etc/clickhouse-server/config.d/merge_tree.xml + start clickhouse-client --query="SELECT 'Server version: ', version()" diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 6d5395d46e3..f670d464006 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -177,11 +177,11 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--user, -u` – The username. Default value: default. - `--password` – The password. Default value: empty string. - `--ask-password` - Prompt the user to enter a password. -- `--query, -q` – The query to process when using non-interactive mode. You must specify either `query` or `queries-file` option. -- `--queries-file` – file path with queries to execute. You must specify either `query` or `queries-file` option. -- `--database, -d` – Select the current default database. Default value: the current database from the server settings (‘default’ by default). -- `--multiline, -m` – If specified, allow multiline queries (do not send the query on Enter). +- `--query, -q` – The query to process when using non-interactive mode. Cannot be used simultaneously with `--queries-file`. +- `--queries-file` – file path with queries to execute. Cannot be used simultaneously with `--query`. - `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`. +- `--multiline, -m` – If specified, allow multiline queries (do not send the query on Enter). +- `--database, -d` – Select the current default database. Default value: the current database from the server settings (‘default’ by default). - `--format, -f` – Use the specified default format to output the result. - `--vertical, -E` – If specified, use the [Vertical format](../interfaces/formats.md#vertical) by default to output the result. This is the same as `–format=Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables. - `--time, -t` – If specified, print the query execution time to ‘stderr’ in non-interactive mode. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 2239084a429..50b114fcb00 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4220,3 +4220,12 @@ Possible values: - false — Disallow. Default value: `false`. + +## zstd_window_log_max + +Allows you to select the max window log of ZSTD (it will not be used for MergeTree family) + +Type: Int64 + +Default: 0 + diff --git a/docs/en/operations/system-tables/processors_profile_log.md b/docs/en/operations/system-tables/processors_profile_log.md index a6ff15642a1..5eedb5a5dae 100644 --- a/docs/en/operations/system-tables/processors_profile_log.md +++ b/docs/en/operations/system-tables/processors_profile_log.md @@ -5,16 +5,18 @@ This table contains profiling on processors level (that you can find in [`EXPLAI Columns: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened. -- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the event happened. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time with microseconds precision when the event happened. - `id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of processor - `parent_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Parent processors IDs +- `plan_step` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the query plan step which created this processor. The value is zero if the processor was not added from any step. +- `plan_group` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result. +- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). - `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query - `name` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Name of the processor. - `elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was executed. - `input_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting for data (from other processor). - `output_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting because output port was full. -- `plan_step` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the query plan step which created this processor. The value is zero if the processor was not added from any step. -- `plan_group` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result. - `input_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows consumed by processor. - `input_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes consumed by processor. - `output_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows generated by processor. diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 1bcecfeb161..71e1452cef1 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -59,9 +59,10 @@ Columns: - `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Type of the query. - `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the databases present in the query. - `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the tables present in the query. -- `views` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the (materialized or live) views present in the query. - `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the columns present in the query. +- `partitions` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the partitions present in the query. - `projections` ([String](../../sql-reference/data-types/string.md)) — Names of the projections used during the query execution. +- `views` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the (materialized or live) views present in the query. - `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception. - `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message. - `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully. diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index d6587602990..0443a80cf17 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -183,12 +183,12 @@ Arguments: - `-S`, `--structure` — table structure for input data. - `--input-format` — input format, `TSV` by default. - `-f`, `--file` — path to data, `stdin` by default. -- `-q`, `--query` — queries to execute with `;` as delimiter. You must specify either `query` or `queries-file` option. -- `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option. +- `-q`, `--query` — queries to execute with `;` as delimiter. Cannot be used simultaneously with `--queries-file`. +- `--queries-file` - file path with queries to execute. Cannot be used simultaneously with `--query`. +- `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`. - `-N`, `--table` — table name where to put output data, `table` by default. - `--format`, `--output-format` — output format, `TSV` by default. - `-d`, `--database` — default database, `_local` by default. -- `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`. - `--stacktrace` — whether to dump debug output in case of exception. - `--echo` — print query before execution. - `--verbose` — more details on query execution. diff --git a/docs/en/sql-reference/aggregate-functions/reference/greatest.md b/docs/en/sql-reference/aggregate-functions/reference/greatest.md deleted file mode 100644 index d5efea44790..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/greatest.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -slug: /en/sql-reference/aggregate-functions/reference/greatest -title: greatest ---- - -Aggregate function that returns the greatest across a list of values. All of the list members must be of comparable types. - -Examples: - -```sql -SELECT - toTypeName(greatest(toUInt8(1), 2, toUInt8(3), 3.)), - greatest(1, 2, toUInt8(3), 3.) -``` -```response -┌─toTypeName(greatest(toUInt8(1), 2, toUInt8(3), 3.))─┬─greatest(1, 2, toUInt8(3), 3.)─┐ -│ Float64 │ 3 │ -└─────────────────────────────────────────────────────┴────────────────────────────────┘ -``` - -:::note -The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison. -::: - -```sql -SELECT greatest(['hello'], ['there'], ['world']) -``` -```response -┌─greatest(['hello'], ['there'], ['world'])─┐ -│ ['world'] │ -└───────────────────────────────────────────┘ -``` - -```sql -SELECT greatest(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3)) -``` -```response -┌─greatest(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐ -│ 2023-05-12 01:16:59.000 │ -└──---──────────────────────────────────────────────────────────────────────────┘ -``` - -:::note -The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison. -::: - -Also see [least](/docs/en/sql-reference/aggregate-functions/reference/least.md). - diff --git a/docs/en/sql-reference/aggregate-functions/reference/last_value.md b/docs/en/sql-reference/aggregate-functions/reference/last_value.md index ebf002e6ae2..7b6e14e4a55 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/last_value.md +++ b/docs/en/sql-reference/aggregate-functions/reference/last_value.md @@ -3,7 +3,7 @@ slug: /en/sql-reference/aggregate-functions/reference/last_value sidebar_position: 8 --- -# first_value +# last_value Selects the last encountered value, similar to `anyLast`, but could accept NULL. diff --git a/docs/en/sql-reference/aggregate-functions/reference/least.md b/docs/en/sql-reference/aggregate-functions/reference/least.md deleted file mode 100644 index ae4b1d43182..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/least.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -slug: /en/sql-reference/aggregate-functions/reference/least -title: least ---- - -Aggregate function that returns the least across a list of values. All of the list members must be of comparable types. - -Examples: - -```sql -SELECT - toTypeName(least(toUInt8(1), 2, toUInt8(3), 3.)), - least(1, 2, toUInt8(3), 3.) -``` -```response -┌─toTypeName(least(toUInt8(1), 2, toUInt8(3), 3.))─┬─least(1, 2, toUInt8(3), 3.)─┐ -│ Float64 │ 1 │ -└──────────────────────────────────────────────────┴─────────────────────────────┘ -``` - -:::note -The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison. -::: - -```sql -SELECT least(['hello'], ['there'], ['world']) -``` -```response -┌─least(['hello'], ['there'], ['world'])─┐ -│ ['hello'] │ -└────────────────────────────────────────┘ -``` - -```sql -SELECT least(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3)) -``` -```response -┌─least(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐ -│ 2023-05-12 01:16:59.000 │ -└────────────────────────────────────────────────────────────────────────────┘ -``` - -:::note -The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison. -::: - -Also see [greatest](/docs/en/sql-reference/aggregate-functions/reference/greatest.md). - diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index f230cbae100..65f1a81f849 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -865,16 +865,34 @@ LIFETIME(3600); The key must have only one `String` type attribute that contains an allowed IP prefix. Other types are not supported yet. -For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys. The syntax is: +The syntax is: ``` sql -dictGetT('dict_name', 'attr_name', tuple(ip)) +dictGetT('dict_name', 'attr_name', ip) ``` The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6. For example: ``` sql -select dictGet('my_ip_trie_dictionary', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) +SELECT dictGet('my_ip_trie_dictionary', 'cca2', toIPv4('202.79.32.10')) AS result; + +┌─result─┐ +│ NP │ +└────────┘ + + +SELECT dictGet('my_ip_trie_dictionary', 'asn', IPv6StringToNum('2001:db8::1')) AS result; + +┌─result─┐ +│ 65536 │ +└────────┘ + + +SELECT dictGet('my_ip_trie_dictionary', ('asn', 'cca2'), IPv6StringToNum('2001:db8::1')) AS result; + +┌─result───────┐ +│ (65536,'ZZ') │ +└──────────────┘ ``` Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned. diff --git a/docs/en/sql-reference/functions/conditional-functions.md b/docs/en/sql-reference/functions/conditional-functions.md index eb86a6e551a..eb4e98961f1 100644 --- a/docs/en/sql-reference/functions/conditional-functions.md +++ b/docs/en/sql-reference/functions/conditional-functions.md @@ -152,3 +152,85 @@ FROM LEFT_RIGHT │ 4 │ ᴺᵁᴸᴸ │ Both equal │ └──────┴───────┴──────────────────┘ ``` + +## greatest + +Returns the greatest across a list of values. All of the list members must be of comparable types. + +Examples: + +```sql +SELECT greatest(1, 2, toUInt8(3), 3.) result, toTypeName(result) type; +``` +```response +┌─result─┬─type────┐ +│ 3 │ Float64 │ +└────────┴─────────┘ +``` + +:::note +The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison. +::: + +```sql +SELECT greatest(['hello'], ['there'], ['world']) +``` +```response +┌─greatest(['hello'], ['there'], ['world'])─┐ +│ ['world'] │ +└───────────────────────────────────────────┘ +``` + +```sql +SELECT greatest(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3)) +``` +```response +┌─greatest(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐ +│ 2023-05-12 01:16:59.000 │ +└──---──────────────────────────────────────────────────────────────────────────┘ +``` + +:::note +The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison. +::: + +## least + +Returns the least across a list of values. All of the list members must be of comparable types. + +Examples: + +```sql +SELECT least(1, 2, toUInt8(3), 3.) result, toTypeName(result) type; +``` +```response +┌─result─┬─type────┐ +│ 1 │ Float64 │ +└────────┴─────────┘ +``` + +:::note +The type returned is a Float64 as the UInt8 must be promoted to 64 bit for the comparison. +::: + +```sql +SELECT least(['hello'], ['there'], ['world']) +``` +```response +┌─least(['hello'], ['there'], ['world'])─┐ +│ ['hello'] │ +└────────────────────────────────────────┘ +``` + +```sql +SELECT least(toDateTime32(now() + toIntervalDay(1)), toDateTime64(now(), 3)) +``` +```response +┌─least(toDateTime32(plus(now(), toIntervalDay(1))), toDateTime64(now(), 3))─┐ +│ 2023-05-12 01:16:59.000 │ +└────────────────────────────────────────────────────────────────────────────┘ +``` + +:::note +The type returned is a DateTime64 as the DataTime32 must be promoted to 64 bit for the comparison. +::: diff --git a/docs/en/sql-reference/functions/geo/polygon.md b/docs/en/sql-reference/functions/geo/polygon.md new file mode 100644 index 00000000000..4a8653965c2 --- /dev/null +++ b/docs/en/sql-reference/functions/geo/polygon.md @@ -0,0 +1,396 @@ +--- +slug: /en/sql-reference/functions/geo/polygons +sidebar_label: Polygons +title: "Functions for Working with Polygons" +--- + +## readWKTMultiPolygon + +Converts a WKT (Well Known Text) MultiPolygon into a MultiPolygon type. + +### Example + +``` sql +SELECT + toTypeName(readWKTMultiPolygon('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))')) AS type, + readWKTMultiPolygon('MULTIPOLYGON(((2 0,10 0,10 10,0 10,2 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))') AS output FORMAT Markdown + +``` +| type | output | +|:-|:-| +| MultiPolygon | [[[(2,0),(10,0),(10,10),(0,10),(2,0)],[(4,4),(5,4),(5,5),(4,5),(4,4)]],[[(-10,-10),(-10,-9),(-9,10),(-10,-10)]]] | + + +### Input parameters + +String starting with `MULTIPOLYGON` + +### Returned value + +MultiPolygon + +## readWKTPolygon + +Converts a WKT (Well Known Text) MultiPolygon into a Polygon type. + +### Example + +``` sql +SELECT + toTypeName(readWKTPolygon('POLYGON((2 0,10 0,10 10,0 10,2 0))')) AS type, + readWKTPolygon('POLYGON((2 0,10 0,10 10,0 10,2 0))') AS output +FORMAT Markdown +``` +| type | output | +|:-|:-| +| Polygon | [[(2,0),(10,0),(10,10),(0,10),(2,0)]] | + +### Input parameters + +String starting with `POLYGON` + +### Returned value + +Polygon + +## polygonsWithinSpherical + +Returns true or false depending on whether or not one polygon lies completely inside another polygon. Reference https://www.boost.org/doc/libs/1_62_0/libs/geometry/doc/html/geometry/reference/algorithms/within/within_2.html + +### Example + +``` sql +select polygonsWithinSpherical([[[(4.3613577, 50.8651821), (4.349556, 50.8535879), (4.3602419, 50.8435626), (4.3830299, 50.8428851), (4.3904543, 50.8564867), (4.3613148, 50.8651279)]]], [[[(4.346693, 50.858306), (4.367945, 50.852455), (4.366227, 50.840809), (4.344961, 50.833264), (4.338074, 50.848677), (4.346693, 50.858306)]]]); +``` +```response +0 +``` + +### Input parameters + +### Returned value + +UInt8, 0 for false, 1 for true + +## polygonsDistanceSpherical + +Calculates the minimal distance between two points where one point belongs to the first polygon and the second to another polygon. Spherical means that coordinates are interpreted as coordinates on a pure and ideal sphere, which is not true for the Earth. Using this type of coordinate system speeds up execution, but of course is not precise. + +### Example + +``` sql +SELECT polygonsDistanceSpherical([[[(0, 0), (0, 0.1), (0.1, 0.1), (0.1, 0)]]], [[[(10., 10.), (10., 40.), (40., 40.), (40., 10.), (10., 10.)]]]) +``` +```response +0.24372872211133834 +``` + +### Input parameters + +Two polygons + +### Returned value + +Float64 + +## polygonsDistanceCartesian + +Calculates distance between two polygons + +### Example + +``` sql +SELECT polygonsDistanceCartesian([[[(0, 0), (0, 0.1), (0.1, 0.1), (0.1, 0)]]], [[[(10., 10.), (10., 40.), (40., 40.), (40., 10.), (10., 10.)]]]) +``` +```response +14.000714267493642 +``` + +### Input parameters + +Two polygons + +### Returned value + +Float64 + +## polygonsEqualsCartesian + +Returns true if two polygons are equal + +### Example + +``` sql +SELECT polygonsEqualsCartesian([[[(1., 1.), (1., 4.), (4., 4.), (4., 1.)]]], [[[(1., 1.), (1., 4.), (4., 4.), (4., 1.), (1., 1.)]]]) +``` +```response +1 +``` + +### Input parameters + +Two polygons + +### Returned value + +UInt8, 0 for false, 1 for true + +## polygonsSymDifferenceSpherical + +Calculates the spatial set theoretic symmetric difference (XOR) between two polygons + +### Example + +``` sql +SELECT wkt(arraySort(polygonsSymDifferenceSpherical([[(50., 50.), (50., -50.), (-50., -50.), (-50., 50.), (50., 50.)], [(10., 10.), (10., 40.), (40., 40.), (40., 10.), (10., 10.)], [(-10., -10.), (-10., -40.), (-40., -40.), (-40., -10.), (-10., -10.)]], [[(-20., -20.), (-20., 20.), (20., 20.), (20., -20.), (-20., -20.)]]))); +``` +```response +MULTIPOLYGON(((-20 -10.3067,-10 -10,-10 -20.8791,-20 -20,-20 -10.3067)),((10 20.8791,20 20,20 10.3067,10 10,10 20.8791)),((50 50,50 -50,-50 -50,-50 50,50 50),(20 10.3067,40 10,40 40,10 40,10 20.8791,-20 20,-20 -10.3067,-40 -10,-40 -40,-10 -40,-10 -20.8791,20 -20,20 10.3067))) +``` + +### Input parameters + +Polygons + +### Returned value + +MultiPolygon + +## polygonsSymDifferenceCartesian + +The same as `polygonsSymDifferenceSpherical`, but the coordinates are in the Cartesian coordinate system; which is more close to the model of the real Earth. + +### Example + +``` sql +SELECT wkt(polygonsSymDifferenceCartesian([[[(0, 0), (0, 3), (1, 2.9), (2, 2.6), (2.6, 2), (2.9, 1), (3, 0), (0, 0)]]], [[[(1., 1.), (1., 4.), (4., 4.), (4., 1.), (1., 1.)]]])) +``` +```response +MULTIPOLYGON(((1 2.9,1 1,2.9 1,3 0,0 0,0 3,1 2.9)),((1 2.9,1 4,4 4,4 1,2.9 1,2.6 2,2 2.6,1 2.9))) +``` + +### Input parameters + +Polygons + +### Returned value + +MultiPolygon + +## polygonsIntersectionSpherical + +Calculates the intersection (AND) between polygons, coordinates are spherical. + +### Example + +``` sql +SELECT wkt(arrayMap(a -> arrayMap(b -> arrayMap(c -> (round(c.1, 6), round(c.2, 6)), b), a), polygonsIntersectionSpherical([[[(4.3613577, 50.8651821), (4.349556, 50.8535879), (4.3602419, 50.8435626), (4.3830299, 50.8428851), (4.3904543, 50.8564867), (4.3613148, 50.8651279)]]], [[[(4.346693, 50.858306), (4.367945, 50.852455), (4.366227, 50.840809), (4.344961, 50.833264), (4.338074, 50.848677), (4.346693, 50.858306)]]]))) +``` +```response +MULTIPOLYGON(((4.3666 50.8434,4.36024 50.8436,4.34956 50.8536,4.35268 50.8567,4.36794 50.8525,4.3666 50.8434))) +``` + +### Input parameters + +Polygons + +### Returned value + +MultiPolygon + +## polygonsWithinCartesian + +Returns true if the second polygon is within the first polygon. + +### Example + +``` sql +SELECT polygonsWithinCartesian([[[(2., 2.), (2., 3.), (3., 3.), (3., 2.)]]], [[[(1., 1.), (1., 4.), (4., 4.), (4., 1.), (1., 1.)]]]) +``` +```response +1 +``` + +### Input parameters + +Two polygons + +### Returned value + +UInt8, 0 for false, 1 for true + +## polygonConvexHullCartesian + +Calculates a convex hull. [Reference](https://www.boost.org/doc/libs/1_61_0/libs/geometry/doc/html/geometry/reference/algorithms/convex_hull.html) + +Coordinates are in Cartesian coordinate system. + +### Example + +``` sql +SELECT wkt(polygonConvexHullCartesian([[[(0., 0.), (0., 5.), (5., 5.), (5., 0.), (2., 3.)]]])) +``` +```response +POLYGON((0 0,0 5,5 5,5 0,0 0)) +``` + +### Input parameters + +MultiPolygon + +### Returned value + +Polygon + +## polygonAreaSpherical + +Calculates the surface area of a polygon. + +### Example + +``` sql +SELECT round(polygonAreaSpherical([[[(4.346693, 50.858306), (4.367945, 50.852455), (4.366227, 50.840809), (4.344961, 50.833264), (4.338074, 50.848677), (4.346693, 50.858306)]]]), 14) +``` +```response +9.387704e-8 +``` + +### Input parameters + +Polygon + +### Returned value + +Float + +## polygonsUnionSpherical + +Calculates a union (OR). + +### Example + +``` sql +SELECT wkt(polygonsUnionSpherical([[[(4.3613577, 50.8651821), (4.349556, 50.8535879), (4.3602419, 50.8435626), (4.3830299, 50.8428851), (4.3904543, 50.8564867), (4.3613148, 50.8651279)]]], [[[(4.346693, 50.858306), (4.367945, 50.852455), (4.366227, 50.840809), (4.344961, 50.833264), (4.338074, 50.848677), (4.346693, 50.858306)]]])) +``` +```response +MULTIPOLYGON(((4.36661 50.8434,4.36623 50.8408,4.34496 50.8333,4.33807 50.8487,4.34669 50.8583,4.35268 50.8567,4.36136 50.8652,4.36131 50.8651,4.39045 50.8565,4.38303 50.8429,4.36661 50.8434))) +``` + +### Input parameters + +Polygons + +### Returned value + +MultiPolygon + +## polygonPerimeterSpherical + +Calculates the perimeter of the polygon. + +### Example + +This is the polygon representing Zimbabwe: + + +``` +POLYGON((30.0107 -15.6462,30.0502 -15.6401,30.09 -15.6294,30.1301 -15.6237,30.1699 -15.6322,30.1956 -15.6491,30.2072 -15.6532,30.2231 -15.6497,30.231 -15.6447,30.2461 -15.6321,30.2549 -15.6289,30.2801 -15.6323,30.2962 -15.639,30.3281 -15.6524,30.3567 -15.6515,30.3963 -15.636,30.3977 -15.7168,30.3993 -15.812,30.4013 -15.9317,30.4026 -16.0012,30.5148 -16.0004,30.5866 -16,30.7497 -15.9989,30.8574 -15.9981,30.9019 -16.0071,30.9422 -16.0345,30.9583 -16.0511,30.9731 -16.062,30.9898 -16.0643,31.012 -16.0549,31.0237 -16.0452,31.0422 -16.0249,31.0569 -16.0176,31.0654 -16.0196,31.0733 -16.0255,31.0809 -16.0259,31.089 -16.0119,31.1141 -15.9969,31.1585 -16.0002,31.26 -16.0235,31.2789 -16.0303,31.2953 -16.0417,31.3096 -16.059,31.3284 -16.0928,31.3409 -16.1067,31.3603 -16.1169,31.3703 -16.1237,31.3746 -16.1329,31.3778 -16.1422,31.384 -16.1488,31.3877 -16.1496,31.3956 -16.1477,31.3996 -16.1473,31.4043 -16.1499,31.4041 -16.1545,31.4027 -16.1594,31.4046 -16.1623,31.4241 -16.1647,31.4457 -16.165,31.4657 -16.1677,31.4806 -16.178,31.5192 -16.1965,31.6861 -16.2072,31.7107 -16.2179,31.7382 -16.2398,31.7988 -16.3037,31.8181 -16.3196,31.8601 -16.3408,31.8719 -16.3504,31.8807 -16.368,31.8856 -16.4063,31.8944 -16.4215,31.9103 -16.4289,32.0141 -16.4449,32.2118 -16.4402,32.2905 -16.4518,32.3937 -16.4918,32.5521 -16.5534,32.6718 -16.5998,32.6831 -16.6099,32.6879 -16.6243,32.6886 -16.6473,32.6987 -16.6868,32.7252 -16.7064,32.7309 -16.7087,32.7313 -16.7088,32.7399 -16.7032,32.7538 -16.6979,32.7693 -16.6955,32.8007 -16.6973,32.862 -16.7105,32.8934 -16.7124,32.9096 -16.7081,32.9396 -16.6898,32.9562 -16.6831,32.9685 -16.6816,32.9616 -16.7103,32.9334 -16.8158,32.9162 -16.8479,32.9005 -16.8678,32.8288 -16.9351,32.8301 -16.9415,32.8868 -17.0382,32.9285 -17.1095,32.9541 -17.1672,32.9678 -17.2289,32.9691 -17.2661,32.9694 -17.2761,32.9732 -17.2979,32.9836 -17.3178,32.9924 -17.3247,33.0147 -17.3367,33.0216 -17.3456,33.0225 -17.3615,33.0163 -17.3772,33.0117 -17.384,32.9974 -17.405,32.9582 -17.4785,32.9517 -17.4862,32.943 -17.4916,32.9366 -17.4983,32.9367 -17.5094,32.9472 -17.5432,32.9517 -17.5514,32.9691 -17.5646,33.0066 -17.581,33.0204 -17.5986,33.0245 -17.6192,33.0206 -17.6385,33.0041 -17.6756,33.0002 -17.7139,33.0032 -17.7577,32.9991 -17.7943,32.9736 -17.8106,32.957 -17.818,32.9461 -17.8347,32.9397 -17.8555,32.9369 -17.875,32.9384 -17.8946,32.9503 -17.9226,32.9521 -17.9402,32.9481 -17.9533,32.9404 -17.96,32.9324 -17.9649,32.9274 -17.9729,32.929 -17.9823,32.9412 -17.9963,32.9403 -18.0048,32.9349 -18.0246,32.9371 -18.0471,32.9723 -18.1503,32.9755 -18.1833,32.9749 -18.1908,32.9659 -18.2122,32.9582 -18.2254,32.9523 -18.233,32.9505 -18.2413,32.955 -18.2563,32.9702 -18.2775,33.0169 -18.3137,33.035 -18.3329,33.0428 -18.352,33.0381 -18.3631,33.0092 -18.3839,32.9882 -18.4132,32.9854 -18.4125,32.9868 -18.4223,32.9995 -18.4367,33.003 -18.4469,32.9964 -18.4671,32.9786 -18.4801,32.9566 -18.4899,32.9371 -18.501,32.9193 -18.51,32.9003 -18.5153,32.8831 -18.5221,32.8707 -18.5358,32.8683 -18.5526,32.8717 -18.5732,32.8845 -18.609,32.9146 -18.6659,32.9223 -18.6932,32.9202 -18.7262,32.9133 -18.753,32.9025 -18.7745,32.8852 -18.7878,32.8589 -18.79,32.8179 -18.787,32.7876 -18.7913,32.6914 -18.8343,32.6899 -18.8432,32.6968 -18.8972,32.7032 -18.9119,32.7158 -18.9198,32.7051 -18.9275,32.6922 -18.9343,32.6825 -18.9427,32.6811 -18.955,32.6886 -18.9773,32.6903 -18.9882,32.6886 -19.001,32.6911 -19.0143,32.699 -19.0222,32.7103 -19.026,32.7239 -19.0266,32.786 -19.0177,32.8034 -19.0196,32.8142 -19.0238,32.82 -19.0283,32.823 -19.0352,32.8253 -19.0468,32.8302 -19.0591,32.8381 -19.0669,32.8475 -19.0739,32.8559 -19.0837,32.8623 -19.1181,32.8332 -19.242,32.8322 -19.2667,32.8287 -19.2846,32.8207 -19.3013,32.8061 -19.3234,32.7688 -19.3636,32.7665 -19.3734,32.7685 -19.4028,32.7622 -19.4434,32.7634 -19.464,32.7739 -19.4759,32.7931 -19.4767,32.8113 -19.4745,32.8254 -19.4792,32.8322 -19.5009,32.8325 -19.5193,32.8254 -19.5916,32.8257 -19.6008,32.8282 -19.6106,32.8296 -19.6237,32.8254 -19.6333,32.8195 -19.642,32.8163 -19.6521,32.8196 -19.6743,32.831 -19.6852,32.8491 -19.6891,32.8722 -19.6902,32.8947 -19.6843,32.9246 -19.6553,32.9432 -19.6493,32.961 -19.6588,32.9624 -19.6791,32.9541 -19.7178,32.9624 -19.7354,32.9791 -19.7514,33.0006 -19.7643,33.0228 -19.7731,33.0328 -19.7842,33.0296 -19.8034,33.0229 -19.8269,33.0213 -19.8681,33.002 -19.927,32.9984 -20.0009,33.0044 -20.0243,33.0073 -20.032,32.9537 -20.0302,32.9401 -20.0415,32.9343 -20.0721,32.9265 -20.0865,32.9107 -20.0911,32.8944 -20.094,32.8853 -20.103,32.8779 -20.1517,32.8729 -20.1672,32.8593 -20.1909,32.8571 -20.2006,32.8583 -20.2075,32.8651 -20.2209,32.8656 -20.2289,32.8584 -20.2595,32.853 -20.2739,32.8452 -20.2867,32.8008 -20.3386,32.7359 -20.4142,32.7044 -20.4718,32.6718 -20.5318,32.6465 -20.558,32.6037 -20.5648,32.5565 -20.5593,32.5131 -20.5646,32.4816 -20.603,32.4711 -20.6455,32.4691 -20.6868,32.4835 -20.7942,32.4972 -20.8981,32.491 -20.9363,32.4677 -20.9802,32.4171 -21.0409,32.3398 -21.1341,32.3453 -21.1428,32.3599 -21.1514,32.3689 -21.163,32.3734 -21.1636,32.3777 -21.1634,32.3806 -21.1655,32.3805 -21.1722,32.3769 -21.1785,32.373 -21.184,32.3717 -21.1879,32.4446 -21.3047,32.4458 -21.309,32.4472 -21.3137,32.4085 -21.2903,32.373 -21.3279,32.3245 -21.3782,32.2722 -21.4325,32.2197 -21.4869,32.1673 -21.5413,32.1148 -21.5956,32.0624 -21.65,32.01 -21.7045,31.9576 -21.7588,31.9052 -21.8132,31.8527 -21.8676,31.8003 -21.922,31.7478 -21.9764,31.6955 -22.0307,31.6431 -22.0852,31.5907 -22.1396,31.5382 -22.1939,31.4858 -22.2483,31.4338 -22.302,31.3687 -22.345,31.2889 -22.3973,31.2656 -22.3655,31.2556 -22.358,31.2457 -22.3575,31.2296 -22.364,31.2215 -22.3649,31.2135 -22.3619,31.1979 -22.3526,31.1907 -22.3506,31.1837 -22.3456,31.1633 -22.3226,31.1526 -22.3164,31.1377 -22.3185,31.1045 -22.3334,31.097 -22.3349,31.0876 -22.3369,31.0703 -22.3337,31.0361 -22.3196,30.9272 -22.2957,30.8671 -22.2896,30.8379 -22.2823,30.8053 -22.2945,30.6939 -22.3028,30.6743 -22.3086,30.6474 -22.3264,30.6324 -22.3307,30.6256 -22.3286,30.6103 -22.3187,30.6011 -22.3164,30.5722 -22.3166,30.5074 -22.3096,30.4885 -22.3102,30.4692 -22.3151,30.4317 -22.3312,30.4127 -22.3369,30.3721 -22.3435,30.335 -22.3447,30.3008 -22.337,30.2693 -22.3164,30.2553 -22.3047,30.2404 -22.2962,30.2217 -22.2909,30.197 -22.2891,30.1527 -22.2948,30.1351 -22.2936,30.1111 -22.2823,30.0826 -22.2629,30.0679 -22.2571,30.0381 -22.2538,30.0359 -22.2506,30.0345 -22.2461,30.0155 -22.227,30.0053 -22.2223,29.9838 -22.2177,29.974 -22.214,29.9467 -22.1983,29.9321 -22.1944,29.896 -22.1914,29.8715 -22.1793,29.8373 -22.1724,29.7792 -22.1364,29.7589 -22.1309,29.6914 -22.1341,29.6796 -22.1383,29.6614 -22.1265,29.6411 -22.1292,29.604 -22.1451,29.5702 -22.142,29.551 -22.146,29.5425 -22.1625,29.5318 -22.1724,29.5069 -22.1701,29.4569 -22.1588,29.4361 -22.1631,29.3995 -22.1822,29.378 -22.1929,29.3633 -22.1923,29.3569 -22.1909,29.3501 -22.1867,29.2736 -22.1251,29.2673 -22.1158,29.2596 -22.0961,29.2541 -22.0871,29.2444 -22.0757,29.2393 -22.0726,29.1449 -22.0753,29.108 -22.0692,29.0708 -22.051,29.0405 -22.0209,29.0216 -21.9828,29.0138 -21.9404,29.0179 -21.8981,29.0289 -21.8766,29.0454 -21.8526,29.0576 -21.8292,29.0553 -21.81,29.0387 -21.7979,28.9987 -21.786,28.9808 -21.7748,28.9519 -21.7683,28.891 -21.7649,28.8609 -21.7574,28.7142 -21.6935,28.6684 -21.68,28.6297 -21.6513,28.6157 -21.6471,28.5859 -21.6444,28.554 -21.6366,28.5429 -21.6383,28.5325 -21.6431,28.4973 -21.6515,28.4814 -21.6574,28.4646 -21.6603,28.4431 -21.6558,28.3618 -21.6163,28.3219 -21.6035,28.2849 -21.5969,28.1657 -21.5952,28.0908 -21.5813,28.0329 -21.5779,28.0166 -21.5729,28.0026 -21.5642,27.9904 -21.5519,27.9847 -21.5429,27.9757 -21.5226,27.9706 -21.5144,27.9637 -21.5105,27.9581 -21.5115,27.9532 -21.5105,27.9493 -21.5008,27.9544 -21.4878,27.9504 -21.482,27.9433 -21.4799,27.9399 -21.478,27.9419 -21.4685,27.9496 -21.4565,27.953 -21.4487,27.9502 -21.4383,27.9205 -21.3812,27.9042 -21.3647,27.8978 -21.3554,27.8962 -21.3479,27.8967 -21.3324,27.8944 -21.3243,27.885 -21.3102,27.8491 -21.2697,27.8236 -21.2317,27.7938 -21.1974,27.7244 -21.1497,27.7092 -21.1345,27.6748 -21.0901,27.6666 -21.0712,27.6668 -21.0538,27.679 -21.0007,27.6804 -20.9796,27.6727 -20.9235,27.6726 -20.9137,27.6751 -20.8913,27.6748 -20.8799,27.676 -20.8667,27.6818 -20.8576,27.689 -20.849,27.6944 -20.8377,27.7096 -20.7567,27.7073 -20.7167,27.6825 -20.6373,27.6904 -20.6015,27.7026 -20.5661,27.7056 -20.5267,27.6981 -20.5091,27.6838 -20.4961,27.666 -20.4891,27.6258 -20.4886,27.5909 -20.4733,27.5341 -20.483,27.4539 -20.4733,27.3407 -20.473,27.306 -20.4774,27.2684 -20.4958,27.284 -20.3515,27.266 -20.2342,27.2149 -20.1105,27.2018 -20.093,27.1837 -20.0823,27.1629 -20.0766,27.1419 -20.0733,27.1297 -20.0729,27.1198 -20.0739,27.1096 -20.0732,27.0973 -20.0689,27.0865 -20.0605,27.0692 -20.0374,27.0601 -20.0276,27.0267 -20.0101,26.9943 -20.0068,26.9611 -20.0072,26.9251 -20.0009,26.8119 -19.9464,26.7745 -19.9398,26.7508 -19.9396,26.731 -19.9359,26.7139 -19.9274,26.6986 -19.9125,26.6848 -19.8945,26.6772 -19.8868,26.6738 -19.8834,26.6594 -19.8757,26.6141 -19.8634,26.5956 -19.8556,26.5819 -19.8421,26.5748 -19.8195,26.5663 -19.8008,26.5493 -19.7841,26.5089 -19.7593,26.4897 -19.7519,26.4503 -19.7433,26.4319 -19.7365,26.4128 -19.7196,26.3852 -19.6791,26.3627 -19.6676,26.3323 -19.6624,26.3244 -19.6591,26.3122 -19.6514,26.3125 -19.6496,26.3191 -19.6463,26.3263 -19.6339,26.3335 -19.613,26.331 -19.605,26.3211 -19.592,26.3132 -19.5842,26.3035 -19.5773,26.2926 -19.5725,26.2391 -19.5715,26.1945 -19.5602,26.1555 -19.5372,26.1303 -19.5011,26.0344 -19.2437,26.0114 -19.1998,25.9811 -19.1618,25.9565 -19.1221,25.9486 -19.1033,25.9449 -19.0792,25.9481 -19.0587,25.9644 -19.0216,25.9678 -19.001,25.9674 -18.9999,25.9407 -18.9213,25.8153 -18.814,25.7795 -18.7388,25.7734 -18.6656,25.7619 -18.6303,25.7369 -18.6087,25.6983 -18.5902,25.6695 -18.566,25.6221 -18.5011,25.6084 -18.4877,25.5744 -18.4657,25.5085 -18.3991,25.4956 -18.3789,25.4905 -18.3655,25.4812 -18.3234,25.4732 -18.3034,25.4409 -18.2532,25.4088 -18.176,25.3875 -18.139,25.3574 -18.1158,25.3234 -18.0966,25.2964 -18.0686,25.255 -18.0011,25.2261 -17.9319,25.2194 -17.908,25.2194 -17.8798,25.2598 -17.7941,25.2667 -17.8009,25.2854 -17.8093,25.3159 -17.8321,25.3355 -17.8412,25.3453 -17.8426,25.3765 -17.8412,25.4095 -17.853,25.4203 -17.8549,25.4956 -17.8549,25.5007 -17.856,25.5102 -17.8612,25.5165 -17.8623,25.5221 -17.8601,25.5309 -17.851,25.5368 -17.8487,25.604 -17.8362,25.657 -17.8139,25.6814 -17.8115,25.6942 -17.8194,25.7064 -17.8299,25.7438 -17.8394,25.766 -17.8498,25.786 -17.8622,25.7947 -17.8727,25.8044 -17.8882,25.8497 -17.9067,25.8636 -17.9238,25.8475 -17.9294,25.8462 -17.9437,25.8535 -17.96,25.8636 -17.9716,25.9245 -17.999,25.967 -18.0005,25.9785 -17.999,26.0337 -17.9716,26.0406 -17.9785,26.0466 -17.9663,26.0625 -17.9629,26.0812 -17.9624,26.0952 -17.9585,26.0962 -17.9546,26.0942 -17.9419,26.0952 -17.9381,26.1012 -17.9358,26.1186 -17.9316,26.1354 -17.9226,26.1586 -17.9183,26.1675 -17.9136,26.203 -17.8872,26.2119 -17.8828,26.2211 -17.8863,26.2282 -17.8947,26.2339 -17.904,26.2392 -17.9102,26.2483 -17.9134,26.2943 -17.9185,26.3038 -17.9228,26.312 -17.9284,26.3183 -17.9344,26.3255 -17.936,26.3627 -17.9306,26.4086 -17.939,26.4855 -17.9793,26.5271 -17.992,26.5536 -17.9965,26.5702 -18.0029,26.5834 -18.0132,26.5989 -18.03,26.6127 -18.0412,26.6288 -18.0492,26.6857 -18.0668,26.7 -18.0692,26.7119 -18.0658,26.7406 -18.0405,26.7536 -18.033,26.7697 -18.029,26.794 -18.0262,26.8883 -17.9846,26.912 -17.992,26.9487 -17.9689,26.9592 -17.9647,27.0063 -17.9627,27.0213 -17.9585,27.0485 -17.9443,27.0782 -17.917,27.1154 -17.8822,27.149 -17.8425,27.1465 -17.8189,27.1453 -17.7941,27.147 -17.7839,27.1571 -17.7693,27.4221 -17.5048,27.5243 -17.4151,27.5773 -17.3631,27.6045 -17.3128,27.6249 -17.2333,27.6412 -17.1985,27.7773 -17.0012,27.8169 -16.9596,27.8686 -16.9297,28.023 -16.8654,28.1139 -16.8276,28.2125 -16.7486,28.2801 -16.7065,28.6433 -16.5688,28.6907 -16.5603,28.7188 -16.5603,28.7328 -16.5581,28.7414 -16.5507,28.7611 -16.5323,28.7693 -16.5152,28.8089 -16.4863,28.8225 -16.4708,28.8291 -16.4346,28.8331 -16.4264,28.8572 -16.3882,28.857 -16.3655,28.8405 -16.3236,28.8368 -16.3063,28.8403 -16.2847,28.8642 -16.2312,28.8471 -16.2027,28.8525 -16.1628,28.8654 -16.1212,28.871 -16.0872,28.8685 -16.0822,28.8638 -16.0766,28.8593 -16.0696,28.8572 -16.0605,28.8603 -16.0494,28.8741 -16.0289,28.8772 -16.022,28.8989 -15.9955,28.9324 -15.9637,28.9469 -15.9572,28.9513 -15.9553,28.9728 -15.9514,29.0181 -15.9506,29.0423 -15.9463,29.0551 -15.9344,29.0763 -15.8954,29.0862 -15.8846,29.1022 -15.8709,29.1217 -15.8593,29.1419 -15.8545,29.151 -15.8488,29.1863 -15.8128,29.407 -15.7142,29.4221 -15.711,29.5085 -15.7036,29.5262 -15.6928,29.5634 -15.6621,29.5872 -15.6557,29.6086 -15.6584,29.628 -15.6636,29.6485 -15.6666,29.6728 -15.6633,29.73 -15.6447,29.7733 -15.6381,29.8143 -15.6197,29.8373 -15.6148,29.8818 -15.6188,29.9675 -15.6415,30.0107 -15.6462)) +``` + +``` sql +SELECT round(polygonPerimeterSpherical([(30.010654, -15.646227), (30.050238, -15.640129), (30.090029, -15.629381), (30.130129, -15.623696), (30.16992, -15.632171), (30.195552, -15.649121), (30.207231, -15.653152), (30.223147, -15.649741), (30.231002, -15.644677), (30.246091, -15.632068), (30.254876, -15.628864), (30.280094, -15.632275), (30.296196, -15.639042), (30.32805, -15.652428), (30.356679, -15.651498), (30.396263, -15.635995), (30.39771, -15.716817), (30.39926, -15.812005), (30.401327, -15.931688), (30.402568, -16.001244), (30.514809, -16.000418), (30.586587, -16.000004), (30.74973, -15.998867), (30.857424, -15.998144), (30.901865, -16.007136), (30.942173, -16.034524), (30.958296, -16.05106), (30.973075, -16.062016), (30.989767, -16.06429), (31.012039, -16.054885), (31.023718, -16.045169), (31.042218, -16.024912), (31.056895, -16.017574), (31.065421, -16.019641), (31.073328, -16.025532), (31.080872, -16.025946), (31.089037, -16.01189), (31.1141, -15.996904), (31.15849, -16.000211), (31.259983, -16.023465), (31.278897, -16.030287), (31.29533, -16.041655), (31.309592, -16.059019), (31.328351, -16.092815), (31.340908, -16.106664), (31.360339, -16.116896), (31.37026, -16.123718), (31.374601, -16.132916), (31.377754, -16.142218), (31.384006, -16.148832), (31.387727, -16.149556), (31.395582, -16.147695), (31.399613, -16.147282), (31.404315, -16.149866), (31.404057, -16.154517), (31.402713, -16.159374), (31.404574, -16.162268), (31.424107, -16.164749), (31.445708, -16.164955), (31.465655, -16.167746), (31.480641, -16.177978), (31.519192, -16.196478), (31.686107, -16.207227), (31.710705, -16.217872), (31.738197, -16.239783), (31.798761, -16.303655), (31.818088, -16.319571), (31.86005, -16.340759), (31.871935, -16.35037), (31.88072, -16.368044), (31.88563, -16.406284), (31.894363, -16.421477), (31.910279, -16.428919), (32.014149, -16.444938), (32.211759, -16.440184), (32.290463, -16.45176), (32.393661, -16.491757), (32.5521, -16.553355), (32.671783, -16.599761), (32.6831, -16.609889), (32.687906, -16.624255), (32.68863, -16.647303), (32.698655, -16.686784), (32.725217, -16.706421), (32.73095, -16.708656), (32.731314, -16.708798), (32.739893, -16.703217), (32.753845, -16.697946), (32.769348, -16.695466), (32.800664, -16.697326), (32.862004, -16.710452), (32.893372, -16.712415), (32.909598, -16.708075), (32.93957, -16.689781), (32.95621, -16.683063), (32.968509, -16.681615999999998), (32.961585, -16.710348), (32.933369, -16.815768), (32.916213, -16.847911), (32.900503, -16.867755), (32.828776, -16.935141), (32.83012, -16.941549), (32.886757, -17.038184), (32.928512, -17.109497), (32.954143, -17.167168), (32.967786, -17.22887), (32.96909, -17.266115), (32.969439, -17.276102), (32.973212, -17.297909), (32.983599, -17.317753), (32.992384, -17.324678), (33.014656, -17.336667), (33.021633, -17.345555), (33.022459, -17.361471), (33.016258, -17.377181), (33.011651, -17.383991), (32.997448, -17.404983), (32.958174, -17.478467), (32.951663, -17.486218), (32.942981, -17.491593), (32.936573, -17.498311), (32.936676, -17.509369), (32.947218, -17.543166), (32.951663, -17.551434), (32.969129, -17.56456), (33.006646, -17.580993), (33.020392, -17.598563), (33.024526, -17.619233), (33.020599, -17.638457), (33.004063, -17.675561), (33.000238, -17.713905), (33.003184, -17.757726), (32.999102, -17.794313), (32.973573, -17.810643), (32.957037, -17.817981), (32.946082, -17.834724), (32.939674, -17.855498), (32.936883, -17.875032), (32.938433, -17.894566), (32.950267, -17.922574), (32.952128, -17.940247), (32.948149, -17.95327), (32.940397, -17.959988), (32.932439, -17.964949), (32.927375, -17.972907), (32.928977, -17.982312), (32.941224, -17.996265), (32.940294, -18.004843), (32.934919, -18.024583), (32.93709, -18.047114), (32.972282, -18.150261), (32.975537, -18.183333), (32.974865, -18.190775), (32.965925, -18.212169), (32.958174, -18.225398), (32.952283, -18.233046), (32.950525999999996, -18.241314), (32.95497, -18.256301), (32.970163, -18.277488), (33.016878, -18.313661), (33.034965, -18.332885), (33.042768, -18.352005), (33.038066, -18.363064), (33.00923, -18.383941), (32.988198, -18.41319), (32.985356, -18.412467), (32.986803, -18.422285), (32.999515, -18.436651), (33.003029, -18.446883), (32.996414, -18.46714), (32.978586, -18.48006), (32.956624, -18.489878), (32.937142, -18.50104), (32.919313, -18.510032), (32.900296, -18.515303), (32.88314, -18.522124), (32.870737, -18.535767), (32.868257, -18.552613), (32.871668, -18.57318), (32.884483, -18.609044), (32.914559, -18.665888), (32.92231, -18.693173), (32.920243, -18.726246), (32.913267, -18.753014), (32.902518, -18.774512), (32.885207, -18.787844), (32.858852, -18.790015), (32.817924, -18.787018), (32.787642, -18.791255), (32.69142, -18.83425), (32.68987, -18.843241), (32.696794, -18.897192), (32.703202, -18.911868), (32.71576, -18.919826), (32.705063, -18.927474), (32.692247, -18.934295), (32.682532, -18.942667), (32.681085, -18.954966), (32.68863, -18.97729), (32.690283, -18.988246), (32.68863, -19.000958), (32.691058, -19.01429), (32.698965, -19.022249), (32.710282, -19.025969), (32.723873, -19.026589), (32.785988, -19.017701), (32.803351, -19.019561), (32.814203, -19.023799), (32.819991, -19.028346), (32.822988, -19.035168), (32.825262, -19.046847), (32.830223, -19.059146), (32.83813, -19.066897), (32.847483, -19.073925), (32.855906, -19.083744), (32.862262, -19.118057), (32.83322, -19.241977), (32.832187, -19.266678), (32.828673, -19.284558), (32.820715, -19.301301), (32.806142, -19.323419), (32.768831, -19.363623), (32.766454, -19.373442), (32.768521, -19.402794), (32.762217, -19.443412), (32.763354, -19.463979), (32.773947, -19.475864), (32.793119, -19.476691), (32.811309, -19.474521), (32.825365, -19.479172), (32.832187, -19.500876), (32.832497000000004, -19.519273), (32.825365, -19.59162), (32.825675, -19.600818), (32.828156, -19.610636), (32.829603, -19.623659), (32.825365, -19.633271), (32.819474, -19.641952), (32.81627, -19.652081), (32.819629, -19.674302), (32.83105, -19.685154), (32.849137, -19.689081), (32.872184, -19.690218), (32.894715, -19.684327), (32.924584, -19.655285), (32.943188, -19.64929), (32.960964, -19.658799), (32.962411, -19.679056), (32.954143, -19.717813), (32.962411, -19.735383), (32.979051, -19.751403), (33.0006, -19.764322), (33.022769, -19.773107), (33.032795, -19.784166), (33.029642, -19.80339), (33.022873, -19.826851), (33.021322, -19.868088), (33.001995, -19.927), (32.998378, -20.000897), (33.004373, -20.024255), (33.007266, -20.032006), (32.95373, -20.030249), (32.940087, -20.041515), (32.934299, -20.072107), (32.926548, -20.086473), (32.910683, -20.091124), (32.894405, -20.094018), (32.88531, -20.10301), (32.877869, -20.151689), (32.872908, -20.167192), (32.859265, -20.190859), (32.857095, -20.200575), (32.858335, -20.207499), (32.865053, -20.220935), (32.86557, -20.228893), (32.858438, -20.259486), (32.852961, -20.273852), (32.845209, -20.286668), (32.800767, -20.338551), (32.735862, -20.414205), (32.704443, -20.471773), (32.671783, -20.531821), (32.646462, -20.557969), (32.603674, -20.56479), (32.556545, -20.559312), (32.513136, -20.564583), (32.481614, -20.603031), (32.471072, -20.645509), (32.469108, -20.68685), (32.483474, -20.794233), (32.49722, -20.898103), (32.491019, -20.936344), (32.467661, -20.980165), (32.417122, -21.040937), (32.339814, -21.134058), (32.345343, -21.142843), (32.359864, -21.151421), (32.368856, -21.162997), (32.373352, -21.163617), (32.377744, -21.16341), (32.380638, -21.165477), (32.380535, -21.172195), (32.376866, -21.178499), (32.37299, -21.183977), (32.37175, -21.187905), (32.444613, -21.304693), (32.445849, -21.308994), (32.447197, -21.313685), (32.408543, -21.290327), (32.37299, -21.327948), (32.324517, -21.378177), (32.272221, -21.432541), (32.219718, -21.486904), (32.167318, -21.541268), (32.114814, -21.595632), (32.062415, -21.649995), (32.010015, -21.704462), (31.957615, -21.758826), (31.905215, -21.813189), (31.852712, -21.867553), (31.800312, -21.92202), (31.747808, -21.976384), (31.695512, -22.030747), (31.643112, -22.085214), (31.590712, -22.139578), (31.538209, -22.193941), (31.485809, -22.248305), (31.433822, -22.302048), (31.36871, -22.345043), (31.288922, -22.39734), (31.265616, -22.365507), (31.255642, -22.357962), (31.24572, -22.357549), (31.229597, -22.363957), (31.221536, -22.364887), (31.213474, -22.36189), (31.197868, -22.352588), (31.190685, -22.350624), (31.183657, -22.34556), (31.163348, -22.322616), (31.152599, -22.316414), (31.137717, -22.318482), (31.10454, -22.333364), (31.097048, -22.334922), (31.087642, -22.336878), (31.07033, -22.333674), (31.036121, -22.319618), (30.927187, -22.295744), (30.867087, -22.289646), (30.83789, -22.282308), (30.805282, -22.294504), (30.693919, -22.302772), (30.674282, -22.30856), (30.647410999999998, -22.32644), (30.632424, -22.330677), (30.625551, -22.32861), (30.610307, -22.318688), (30.601108, -22.316414), (30.57217, -22.316621), (30.507367, -22.309593), (30.488454, -22.310213), (30.46923, -22.315071), (30.431713, -22.331194), (30.412696, -22.336878), (30.372078, -22.343493), (30.334975, -22.344733), (30.300765, -22.336982), (30.269346, -22.316414), (30.25529, -22.304736), (30.240407, -22.296157), (30.2217, -22.290886), (30.196999, -22.289129), (30.15266, -22.294814), (30.13509, -22.293574), (30.111113, -22.282308), (30.082587, -22.262878), (30.067911, -22.25709), (30.038145, -22.253783), (30.035872, -22.250579), (30.034528, -22.246135), (30.015511, -22.227014), (30.005279, -22.22226), (29.983782, -22.217713), (29.973963, -22.213992), (29.946678, -22.198282), (29.932105, -22.194355), (29.896035, -22.191358), (29.871489, -22.179265), (29.837331, -22.172444), (29.779246, -22.136374), (29.758886, -22.130896), (29.691448, -22.1341), (29.679614, -22.138338), (29.661424, -22.126452), (29.641064, -22.129242), (29.60396, -22.145055), (29.570164, -22.141955), (29.551043, -22.145986), (29.542517, -22.162522), (29.53182, -22.172444), (29.506912, -22.170067), (29.456889, -22.158801), (29.436115, -22.163142), (29.399528, -22.182159), (29.378031, -22.192908), (29.363250999999998, -22.192288), (29.356947, -22.190944000000002), (29.350074, -22.186707), (29.273644, -22.125108), (29.26734, -22.115807), (29.259588, -22.096066), (29.254111, -22.087074), (29.244395, -22.075706), (29.239331, -22.072605), (29.144867, -22.075292), (29.10797, -22.069194), (29.070763, -22.051004), (29.040532, -22.020929), (29.021567, -21.982791), (29.013815, -21.940417), (29.017949, -21.898145), (29.028905, -21.876648), (29.045441, -21.852567), (29.057637, -21.829209), (29.05526, -21.809985), (29.038723, -21.797893), (28.998726, -21.786008), (28.980846, -21.774845), (28.951907, -21.768334), (28.891032, -21.764924), (28.860853, -21.757379), (28.714195, -21.693507), (28.66841, -21.679968), (28.629704, -21.651339), (28.6157, -21.647101), (28.585934, -21.644414), (28.553998, -21.636559), (28.542939, -21.638316), (28.532501, -21.643071), (28.497309, -21.651546), (28.481393, -21.657437), (28.464598, -21.660331), (28.443101, -21.655783), (28.361762, -21.616302), (28.321919, -21.603486), (28.284867, -21.596872), (28.165702, -21.595218), (28.090771, -21.581266), (28.032893, -21.577855), (28.016563, -21.572894), (28.002559, -21.564212), (27.990415, -21.551913), (27.984731, -21.542922), (27.975739, -21.522561), (27.970571, -21.514396), (27.963698, -21.510469), (27.958066, -21.511502), (27.953208, -21.510469), (27.949281, -21.500754), (27.954448, -21.487835), (27.950418, -21.482047), (27.943338, -21.479876), (27.939876, -21.478016), (27.941943, -21.468508), (27.949642, -21.456519), (27.953001, -21.448664), (27.950211, -21.438329), (27.920549, -21.381174), (27.904219, -21.364741), (27.897811, -21.35544), (27.896157, -21.347895), (27.896674, -21.332392), (27.8944, -21.32433), (27.884995, -21.310171), (27.849132, -21.269657), (27.823604, -21.231726), (27.793838, -21.197413), (27.724385, -21.149664), (27.709192, -21.134471), (27.674775, -21.090133), (27.666611, -21.071219), (27.666817, -21.053753), (27.678961, -21.000733), (27.680356, -20.979649), (27.672657, -20.923528), (27.672605, -20.913709), (27.675085, -20.891282), (27.674775, -20.879913), (27.676016, -20.866684), (27.681803, -20.857589), (27.689038, -20.849011), (27.694412, -20.837744999999998), (27.709605, -20.756716), (27.707332, -20.716719), (27.682475, -20.637344), (27.690382, -20.60148), (27.702629, -20.566134), (27.705575, -20.526653), (27.698133, -20.509083), (27.683767, -20.49606), (27.66599, -20.489136), (27.625786, -20.488619), (27.590853, -20.473323), (27.534112, -20.483038), (27.45391, -20.473323), (27.340739, -20.473013), (27.306012, -20.477354), (27.268392, -20.49575), (27.283998, -20.35147), (27.266015, -20.234164), (27.214907, -20.110451), (27.201781, -20.092984), (27.183746, -20.082339), (27.16292, -20.076551), (27.141888, -20.073347), (27.129692, -20.072934), (27.119771, -20.073864), (27.109642, -20.073244), (27.097343, -20.068903), (27.086491, -20.060532), (27.069231, -20.03738), (27.060136, -20.027562), (27.02665, -20.010095), (26.9943, -20.006788), (26.961072, -20.007201), (26.925054, -20.000897), (26.811882, -19.94643), (26.774469, -19.939815), (26.750801, -19.939609), (26.730957, -19.935888), (26.713904, -19.927413), (26.698608, -19.91253), (26.684758, -19.894547), (26.67717, -19.886815), (26.673803, -19.883385), (26.659437, -19.875737), (26.614065, -19.863438), (26.595565, -19.855583), (26.581922, -19.842147), (26.574791, -19.819513), (26.566316, -19.800806), (26.549263, -19.784063), (26.508852, -19.759258), (26.489731, -19.75192), (26.450251, -19.743342), (26.431854, -19.73652), (26.412837, -19.71957), (26.385242, -19.679056), (26.362711, -19.667584), (26.332325, -19.662416), (26.324367, -19.659109), (26.312171, -19.651358), (26.312481, -19.649601), (26.319096, -19.646293), (26.326331, -19.633891), (26.333462, -19.613014), (26.330981, -19.604952), (26.32106, -19.592033), (26.313205, -19.584178), (26.30349, -19.577254), (26.292638, -19.572499), (26.239101, -19.571466), (26.194452, -19.560200000000002), (26.155488, -19.537153), (26.13027, -19.501082), (26.034359, -19.243734), (26.011414, -19.199809), (25.981132, -19.161775), (25.956534, -19.122088), (25.948576, -19.103277), (25.944855, -19.079196), (25.948059, -19.058732), (25.964389, -19.021629), (25.9678, -19.000958), (25.967449, -18.999925), (25.940721, -18.921273), (25.815251, -18.813993), (25.779491, -18.738752), (25.773393, -18.665578), (25.761921, -18.630335), (25.736909, -18.608734), (25.698255, -18.590234), (25.669523, -18.566049), (25.622084, -18.501143), (25.608442, -18.487708), (25.574439, -18.465693), (25.508499, -18.399134), (25.49558, -18.378877), (25.490516, -18.365545), (25.481163, -18.323377), (25.473204, -18.303429), (25.440855, -18.2532), (25.408816, -18.175995), (25.387525, -18.138995), (25.357449, -18.115844), (25.323446, -18.09662), (25.296368, -18.068612), (25.255026, -18.001122), (25.226088, -17.931876), (25.21937, -17.908001), (25.21937, -17.879786), (25.259781, -17.794107), (25.266705, -17.800928), (25.285412, -17.809299), (25.315901, -17.83214), (25.335538, -17.841235), (25.345254, -17.842579), (25.376466, -17.841235), (25.409539, -17.853018), (25.420288, -17.854878), (25.49558, -17.854878), (25.500748, -17.856015), (25.510153, -17.861183), (25.516458, -17.862319), (25.522142, -17.860149), (25.530927, -17.850951), (25.536818, -17.848677), (25.603997, -17.836171), (25.657017, -17.81395), (25.681409, -17.81147), (25.694224, -17.819428), (25.70642, -17.829867), (25.743834, -17.839375), (25.765951, -17.849814), (25.786002, -17.862216), (25.794683, -17.872655), (25.804399, -17.888158), (25.849667, -17.906658), (25.86362, -17.923814), (25.847497, -17.929395), (25.846153, -17.943658), (25.853490999999998, -17.959988), (25.86362, -17.971563), (25.924495, -17.998952), (25.966973, -18.000502), (25.978548, -17.998952), (26.033739, -17.971563), (26.04056, -17.978488), (26.046554, -17.966292), (26.062471, -17.962882), (26.081178, -17.962365), (26.095234, -17.958541), (26.096164, -17.954614), (26.0942, -17.941901), (26.095234, -17.938077), (26.101228, -17.935803), (26.118591, -17.931566), (26.135438, -17.922574), (26.158589, -17.918337), (26.167477, -17.913582), (26.203031, -17.887227), (26.211919, -17.882783), (26.221117, -17.886297), (26.228249, -17.894669), (26.233933, -17.903971), (26.239204, -17.910172), (26.248299, -17.913376), (26.294291, -17.918543), (26.3038, -17.922781), (26.311965, -17.928362), (26.318269, -17.934356), (26.325504, -17.93601), (26.362711, -17.930636), (26.408599, -17.939007), (26.485494, -17.979315), (26.527145, -17.992027), (26.553604, -17.996471), (26.570243, -18.002879), (26.583369, -18.013215), (26.598872, -18.029958), (26.612721, -18.041223), (26.628844, -18.049181), (26.685689, -18.066751), (26.700003, -18.069232), (26.71194, -18.065821), (26.740569, -18.0405), (26.753591, -18.032955), (26.769714, -18.029028), (26.794002, -18.026237), (26.88826, -17.984586), (26.912031, -17.992027), (26.94867, -17.968876), (26.95916, -17.964742), (27.006289, -17.962675), (27.021275, -17.958541), (27.048457, -17.944278), (27.078171, -17.916993), (27.11543, -17.882163), (27.149019, -17.842476), (27.146539, -17.818911), (27.145299, -17.794107), (27.146952, -17.783875), (27.157081, -17.769302), (27.422078, -17.504822), (27.524294, -17.415112), (27.577314, -17.363125), (27.604495, -17.312792), (27.624856, -17.233314), (27.641186, -17.198484), (27.777301, -17.001183), (27.816886, -16.959636), (27.868562, -16.929663), (28.022993, -16.865393), (28.113922, -16.827551), (28.21252, -16.748589), (28.280113, -16.706524), (28.643295, -16.568755), (28.690734, -16.56028), (28.718794, -16.56028), (28.73285, -16.55811), (28.741377, -16.550668), (28.761117, -16.532271), (28.769282, -16.515218), (28.808866, -16.486279), (28.822509, -16.470776), (28.829124, -16.434603), (28.833051, -16.426438), (28.857236, -16.388198), (28.857029, -16.36546), (28.840492, -16.323602), (28.836772, -16.306342), (28.840286, -16.284741), (28.86416, -16.231205), (28.847107, -16.202679), (28.852481, -16.162785), (28.8654, -16.121237), (28.870981, -16.087234), (28.868501, -16.08217), (28.86385, -16.076589), (28.859303, -16.069561), (28.857236, -16.060466), (28.860336, -16.049407), (28.874082, -16.028943), (28.877183, -16.022018), (28.898887, -15.995457), (28.932373, -15.963727), (28.946862, -15.957235), (28.951287, -15.955252), (28.972784, -15.951428), (29.018053, -15.950602), (29.042341, -15.946261), (29.055053, -15.934375), (29.076344, -15.895411), (29.086162, -15.884559), (29.102182, -15.870916), (29.121716, -15.859341), (29.141869, -15.854483), (29.150964, -15.848799), (29.186311, -15.812832), (29.406969, -15.714233), (29.422059, -15.711030000000001), (29.508462, -15.703588), (29.526239, -15.692839), (29.563446, -15.662144), (29.587217, -15.655736), (29.608559, -15.658422999999999), (29.62799, -15.663591), (29.648505, -15.666588), (29.672793, -15.663281), (29.73005, -15.644677), (29.773252, -15.638062), (29.814283, -15.619666), (29.837331, -15.614808), (29.881773, -15.618839), (29.967504, -15.641473), (30.010654, -15.646227)]), 6) +``` +```response +0.45539 +``` + +### Input parameters + +### Returned value + +## polygonsIntersectionCartesian + +Calculates the intersection of polygons. + +### Example + +``` sql +SELECT wkt(polygonsIntersectionCartesian([[[(0., 0.), (0., 3.), (1., 2.9), (2., 2.6), (2.6, 2.), (2.9, 1.), (3., 0.), (0., 0.)]]], [[[(1., 1.), (1., 4.), (4., 4.), (4., 1.), (1., 1.)]]])) +``` +```response +MULTIPOLYGON(((1 2.9,2 2.6,2.6 2,2.9 1,1 1,1 2.9))) +``` + +### Input parameters + +Polygons + +### Returned value + +MultiPolygon + +## polygonAreaCartesian + +Calculates the area of a polygon + +### Example + +``` sql +SELECT polygonAreaCartesian([[[(0., 0.), (0., 5.), (5., 5.), (5., 0.)]]]) +``` +```response +25 +``` + +### Input parameters + +Polygon + +### Returned value + +Float64 + +## polygonPerimeterCartesian + +Calculates the perimeter of a polygon. + +### Example + +``` sql +SELECT polygonPerimeterCartesian([[[(0., 0.), (0., 5.), (5., 5.), (5., 0.)]]]) +``` +```response +15 +``` + +### Input parameters + +Polygon + +### Returned value + +Float64 + +## polygonsUnionCartesian + +Calculates the union of polygons. + +### Example + +``` sql +SELECT wkt(polygonsUnionCartesian([[[(0., 0.), (0., 3.), (1., 2.9), (2., 2.6), (2.6, 2.), (2.9, 1), (3., 0.), (0., 0.)]]], [[[(1., 1.), (1., 4.), (4., 4.), (4., 1.), (1., 1.)]]])) +``` +```response +MULTIPOLYGON(((1 2.9,1 4,4 4,4 1,2.9 1,3 0,0 0,0 3,1 2.9))) +``` + +### Input parameters + +Polygons + +### Returned value + +MultiPolygon + +For more information on geometry systems, see this [presentation](https://archive.fosdem.org/2020/schedule/event/working_with_spatial_trajectories_in_boost_geometry/attachments/slides/3988/export/events/attachments/working_with_spatial_trajectories_in_boost_geometry/slides/3988/FOSDEM20_vissarion.pdf) about the Boost library, which is what ClickHouse uses. + diff --git a/docs/en/sql-reference/statements/create/function.md b/docs/en/sql-reference/statements/create/function.md index 15c2356445b..db65cb4448c 100644 --- a/docs/en/sql-reference/statements/create/function.md +++ b/docs/en/sql-reference/statements/create/function.md @@ -2,11 +2,10 @@ slug: /en/sql-reference/statements/create/function sidebar_position: 38 sidebar_label: FUNCTION +title: "CREATE FUNCTION -user defined function (UDF)" --- -# CREATE FUNCTION - user defined function (UDF) - -Creates a user defined function from a lambda expression. The expression must consist of function parameters, constants, operators, or other function calls. +Creates a user defined function (UDF) from a lambda expression. The expression must consist of function parameters, constants, operators, or other function calls. **Syntax** diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index 62feca9ecf6..f1efd6c4718 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -544,6 +544,54 @@ Result: └─────┴──────────┴───────┘ ``` +##Filling grouped by sorting prefix + +It can be useful to fill rows which have the same values in particular columns independently, - a good example is filling missing values in time series. +Assume there is the following time series table +``` sql +CREATE TABLE timeseries +( + `sensor_id` UInt64, + `timestamp` DateTime64(3, 'UTC'), + `value` Float64 +) +ENGINE = Memory; + +SELECT * FROM timeseries; + +┌─sensor_id─┬───────────────timestamp─┬─value─┐ +│ 234 │ 2021-12-01 00:00:03.000 │ 3 │ +│ 432 │ 2021-12-01 00:00:01.000 │ 1 │ +│ 234 │ 2021-12-01 00:00:07.000 │ 7 │ +│ 432 │ 2021-12-01 00:00:05.000 │ 5 │ +└───────────┴─────────────────────────┴───────┘ +``` +And we'd like to fill missing values for each sensor independently with 1 second interval. +The way to achieve it is to use `sensor_id` column as sorting prefix for filling column `timestamp` +``` +SELECT * +FROM timeseries +ORDER BY + sensor_id, + timestamp WITH FILL +INTERPOLATE ( value AS 9999 ) + +┌─sensor_id─┬───────────────timestamp─┬─value─┐ +│ 234 │ 2021-12-01 00:00:03.000 │ 3 │ +│ 234 │ 2021-12-01 00:00:04.000 │ 9999 │ +│ 234 │ 2021-12-01 00:00:05.000 │ 9999 │ +│ 234 │ 2021-12-01 00:00:06.000 │ 9999 │ +│ 234 │ 2021-12-01 00:00:07.000 │ 7 │ +│ 432 │ 2021-12-01 00:00:01.000 │ 1 │ +│ 432 │ 2021-12-01 00:00:02.000 │ 9999 │ +│ 432 │ 2021-12-01 00:00:03.000 │ 9999 │ +│ 432 │ 2021-12-01 00:00:04.000 │ 9999 │ +│ 432 │ 2021-12-01 00:00:05.000 │ 5 │ +└───────────┴─────────────────────────┴───────┘ +``` +Here, the `value` column was interpolated with `9999` just to make filled rows more noticeable +This behavior is controlled by setting `use_with_fill_by_sorting_prefix` (enabled by default) + ## Related content - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 9b4a02e2393..f157a850a12 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -13,7 +13,7 @@ sidebar_label: url **Syntax** ``` sql -url(URL [,format] [,structure]) +url(URL [,format] [,structure] [,headers]) ``` **Parameters** @@ -21,6 +21,7 @@ url(URL [,format] [,structure]) - `URL` — HTTP or HTTPS server address, which can accept `GET` or `POST` requests (for `SELECT` or `INSERT` queries correspondingly). Type: [String](../../sql-reference/data-types/string.md). - `format` — [Format](../../interfaces/formats.md#formats) of the data. Type: [String](../../sql-reference/data-types/string.md). - `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md). +- `headers` - Headers in `'headers('key1'='value1', 'key2'='value2')'` format. You can set headers for HTTP call. **Returned value** @@ -31,7 +32,7 @@ A table with the specified format and structure and with data from the defined ` Getting the first 3 lines of a table that contains columns of `String` and [UInt32](../../sql-reference/data-types/int-uint.md) type from HTTP-server which answers in [CSV](../../interfaces/formats.md#csv) format. ``` sql -SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3; +SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32', headers('Accept'='text/csv; charset=utf-8')) LIMIT 3; ``` Inserting data from a `URL` into a table: diff --git a/docs/en/sql-reference/table-functions/urlCluster.md b/docs/en/sql-reference/table-functions/urlCluster.md new file mode 100644 index 00000000000..8f19632c433 --- /dev/null +++ b/docs/en/sql-reference/table-functions/urlCluster.md @@ -0,0 +1,62 @@ +--- +slug: /en/sql-reference/table-functions/urlCluster +sidebar_position: 55 +sidebar_label: urlCluster +--- + +# urlCluster Table Function + +Allows processing files from URL in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterics in URL file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. + +**Syntax** + +``` sql +urlCluster(cluster_name, URL, format, structure) +``` + +**Arguments** + +- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `URL` — HTTP or HTTPS server address, which can accept `GET` requests. Type: [String](../../sql-reference/data-types/string.md). +- `format` — [Format](../../interfaces/formats.md#formats) of the data. Type: [String](../../sql-reference/data-types/string.md). +- `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md). + +**Returned value** + +A table with the specified format and structure and with data from the defined `URL`. + +**Examples** + +Getting the first 3 lines of a table that contains columns of `String` and [UInt32](../../sql-reference/data-types/int-uint.md) type from HTTP-server which answers in [CSV](../../interfaces/formats.md#csv) format. + +1. Create a basic HTTP server using the standard Python 3 tools and start it: + +```python +from http.server import BaseHTTPRequestHandler, HTTPServer + +class CSVHTTPServer(BaseHTTPRequestHandler): + def do_GET(self): + self.send_response(200) + self.send_header('Content-type', 'text/csv') + self.end_headers() + + self.wfile.write(bytes('Hello,1\nWorld,2\n', "utf-8")) + +if __name__ == "__main__": + server_address = ('127.0.0.1', 12345) + HTTPServer(server_address, CSVHTTPServer).serve_forever() +``` + +``` sql +SELECT * FROM urlCluster('cluster_simple','http://127.0.0.1:12345', CSV, 'column1 String, column2 UInt32') +``` + +## Globs in URL + +Patterns in curly brackets `{ }` are used to generate a set of shards or to specify failover addresses. Supported pattern types and examples see in the description of the [remote](remote.md#globs-in-addresses) function. +Character `|` inside patterns is used to specify failover addresses. They are iterated in the same order as listed in the pattern. The number of generated addresses is limited by [glob_expansion_max_elements](../../operations/settings/settings.md#glob_expansion_max_elements) setting. + +**See Also** + +- [HDFS engine](../../engines/table-engines/special/url.md) +- [URL table function](../../sql-reference/table-functions/url.md) \ No newline at end of file diff --git a/docs/ru/sql-reference/table-functions/url.md b/docs/ru/sql-reference/table-functions/url.md index ec9548229c8..c90968d92af 100644 --- a/docs/ru/sql-reference/table-functions/url.md +++ b/docs/ru/sql-reference/table-functions/url.md @@ -21,6 +21,7 @@ url(URL [,format] [,structure]) - `URL` — HTTP или HTTPS-адрес сервера, который может принимать запросы `GET` или `POST` (для запросов `SELECT` или `INSERT` соответственно). Тип: [String](../../sql-reference/data-types/string.md). - `format` — [формат](../../interfaces/formats.md#formats) данных. Тип: [String](../../sql-reference/data-types/string.md). - `structure` — структура таблицы в формате `'UserID UInt64, Name String'`. Определяет имена и типы столбцов. Тип: [String](../../sql-reference/data-types/string.md). +- `headers` - HTTP-заголовки в формате `'headers('key1'='value1', 'key2'='value2')'`. Определяет заголовки для HTTP вызова. **Возвращаемое значение** @@ -31,7 +32,7 @@ url(URL [,format] [,structure]) Получение с HTTP-сервера первых 3 строк таблицы с данными в формате [CSV](../../interfaces/formats.md#csv), содержащей столбцы типа [String](../../sql-reference/data-types/string.md) и [UInt32](../../sql-reference/data-types/int-uint.md). ``` sql -SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3; +SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32', headers('Accept'='text/csv; charset=utf-8')) LIMIT 3; ``` Вставка данных в таблицу: diff --git a/src/Backups/BackupEntryFromAppendOnlyFile.cpp b/src/Backups/BackupEntryFromAppendOnlyFile.cpp index 1d73ab52820..5303d9abffd 100644 --- a/src/Backups/BackupEntryFromAppendOnlyFile.cpp +++ b/src/Backups/BackupEntryFromAppendOnlyFile.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB diff --git a/src/Backups/BackupEntryFromImmutableFile.cpp b/src/Backups/BackupEntryFromImmutableFile.cpp index cc635dd8541..d066db0c6ae 100644 --- a/src/Backups/BackupEntryFromImmutableFile.cpp +++ b/src/Backups/BackupEntryFromImmutableFile.cpp @@ -1,5 +1,7 @@ #include +#include #include +#include namespace DB diff --git a/src/Backups/BackupIO_Default.cpp b/src/Backups/BackupIO_Default.cpp index f7ba061cf3a..b36cb22498d 100644 --- a/src/Backups/BackupIO_Default.cpp +++ b/src/Backups/BackupIO_Default.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp index 7fcb42ec378..306236534b6 100644 --- a/src/Backups/BackupImpl.cpp +++ b/src/Backups/BackupImpl.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index fe3acd7cc7b..f8966847e5a 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -174,7 +174,7 @@ void HedgedConnections::sendQuery( modified_settings.group_by_two_level_threshold_bytes = 0; } - const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && !settings.allow_experimental_parallel_reading_from_replicas; + const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; if (offset_states.size() > 1 && enable_sample_offset_parallel_processing) { diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 7df767a4bcf..71f536b9687 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -142,7 +142,7 @@ void MultiplexedConnections::sendQuery( } } - const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && !settings.allow_experimental_parallel_reading_from_replicas; + const bool enable_sample_offset_parallel_processing = settings.max_parallel_replicas > 1 && settings.allow_experimental_parallel_reading_from_replicas == 0; size_t num_replicas = replica_states.size(); if (num_replicas > 1) diff --git a/src/Common/AsyncTaskExecutor.h b/src/Common/AsyncTaskExecutor.h index 55dc2913c13..10a9556a88b 100644 --- a/src/Common/AsyncTaskExecutor.h +++ b/src/Common/AsyncTaskExecutor.h @@ -113,8 +113,8 @@ private: void createFiber(); void destroyFiber(); - Fiber fiber; FiberStack fiber_stack; + Fiber fiber; std::mutex fiber_lock; std::exception_ptr exception; diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 86f9a388644..3eb5819df90 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -466,7 +467,7 @@ public: /// Useful to check owner of ephemeral node. virtual int64_t getSessionID() const = 0; - virtual String getConnectedAddress() const = 0; + virtual Poco::Net::SocketAddress getConnectedAddress() const = 0; /// If the method will throw an exception, callbacks won't be called. /// diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index 11e56daf6b4..4bffa4e1d4f 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -39,7 +39,7 @@ public: bool isExpired() const override { return expired; } int64_t getSessionID() const override { return 0; } - String getConnectedAddress() const override { return connected_zk_address; } + Poco::Net::SocketAddress getConnectedAddress() const override { return connected_zk_address; } void create( @@ -127,7 +127,7 @@ private: zkutil::ZooKeeperArgs args; - String connected_zk_address; + Poco::Net::SocketAddress connected_zk_address; std::mutex push_request_mutex; std::atomic expired{false}; diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index c423e4fd498..a587ad6caf4 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -112,11 +112,10 @@ void ZooKeeper::init(ZooKeeperArgs args_) else LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(args.hosts, ","), args.chroot); - String address = impl->getConnectedAddress(); + Poco::Net::SocketAddress address = impl->getConnectedAddress(); - size_t colon_pos = address.find(':'); - connected_zk_host = address.substr(0, colon_pos); - connected_zk_port = address.substr(colon_pos + 1); + connected_zk_host = address.host().toString(); + connected_zk_port = address.port(); connected_zk_index = 0; @@ -124,7 +123,7 @@ void ZooKeeper::init(ZooKeeperArgs args_) { for (size_t i = 0; i < args.hosts.size(); i++) { - if (args.hosts[i] == address) + if (args.hosts[i] == address.toString()) { connected_zk_index = i; break; diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 9b85938c726..96f9914b597 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -524,7 +524,7 @@ public: void setServerCompletelyStarted(); String getConnectedZooKeeperHost() const { return connected_zk_host; } - String getConnectedZooKeeperPort() const { return connected_zk_port; } + UInt16 getConnectedZooKeeperPort() const { return connected_zk_port; } size_t getConnectedZooKeeperIndex() const { return connected_zk_index; } private: @@ -591,7 +591,7 @@ private: ZooKeeperArgs args; String connected_zk_host; - String connected_zk_port; + UInt16 connected_zk_port; size_t connected_zk_index; std::mutex mutex; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 34be8aa1332..7f23ac00efe 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -433,7 +433,7 @@ void ZooKeeper::connect( } connected = true; - connected_zk_address = node.address.toString(); + connected_zk_address = node.address; break; } @@ -450,7 +450,7 @@ void ZooKeeper::connect( if (!connected) { WriteBufferFromOwnString message; - connected_zk_address = ""; + connected_zk_address = Poco::Net::SocketAddress(); message << "All connection tries failed while connecting to ZooKeeper. nodes: "; bool first = true; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 6715607ca88..944c5032fac 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -125,7 +125,7 @@ public: /// Useful to check owner of ephemeral node. int64_t getSessionID() const override { return session_id; } - String getConnectedAddress() const override { return connected_zk_address; } + Poco::Net::SocketAddress getConnectedAddress() const override { return connected_zk_address; } void executeGenericRequest( const ZooKeeperRequestPtr & request, @@ -203,7 +203,7 @@ public: private: ACLs default_acls; - String connected_zk_address; + Poco::Net::SocketAddress connected_zk_address; zkutil::ZooKeeperArgs args; diff --git a/src/Coordination/KeeperStateManager.cpp b/src/Coordination/KeeperStateManager.cpp index 70687ba471c..8736fb7d4e3 100644 --- a/src/Coordination/KeeperStateManager.cpp +++ b/src/Coordination/KeeperStateManager.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 2a73930836a..1441c22507d 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -63,7 +63,7 @@ namespace DB \ M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \ M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \ - M(UInt32, dns_max_consecutive_failures, 1024, "Max server connections.", 0) \ + M(UInt32, dns_max_consecutive_failures, 1024, "Max connection failures before dropping host from ClickHouse DNS cache.", 0) \ \ M(UInt64, max_table_size_to_drop, 50000000000lu, "If size of a table is greater than this value (in bytes) than table could not be dropped with any DROP query.", 0) \ M(UInt64, max_partition_size_to_drop, 50000000000lu, "Same as max_table_size_to_drop, but for the partitions.", 0) \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 1df0a8af24f..47d4ba452f7 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -154,7 +154,7 @@ class IColumn; M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \ \ M(String, cluster_for_parallel_replicas, "default", "Cluster for a shard in which current server is located", 0) \ - M(Bool, allow_experimental_parallel_reading_from_replicas, false, "If true, ClickHouse will send a SELECT query to all replicas of a table. It will work for any kind on MergeTree table.", 0) \ + M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \ M(Float, parallel_replicas_single_task_marks_count_multiplier, 2, "A multiplier which will be added during calculation for minimal number of marks to retrieve from coordinator. This will be applied only for remote replicas.", 0) \ M(Bool, parallel_replicas_for_non_replicated_merge_tree, false, "If true, ClickHouse will use parallel replicas algorithm also for non-replicated MergeTree tables", 0) \ \ @@ -729,6 +729,7 @@ class IColumn; M(UInt64, http_max_request_param_data_size, 10_MiB, "Limit on size of request data used as a query parameter in predefined HTTP requests.", 0) \ M(Bool, function_json_value_return_type_allow_nullable, false, "Allow function JSON_VALUE to return nullable type.", 0) \ M(Bool, function_json_value_return_type_allow_complex, false, "Allow function JSON_VALUE to return complex type, such as: struct, array, map.", 0) \ + M(Bool, use_with_fill_by_sorting_prefix, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently", 0) \ \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 388cad54791..c0f10b13282 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -82,6 +82,7 @@ static std::map sett { {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."}, {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."}, + {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"}, {"output_format_parquet_compliant_nested_types", false, true, "Change an internal field name in output Parquet file schema."}}}, {"23.4", {{"allow_suspicious_indices", true, false, "If true, index can defined with identical expressions"}, {"connect_timeout_with_failover_ms", 50, 1000, "Increase default connect timeout because of async connect"}, diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 2827ec0ce77..9bbf5b9565d 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1455,7 +1455,16 @@ bool DatabaseReplicated::shouldReplicateQuery(const ContextPtr & query_context, } if (query_ptr->as() != nullptr) - return !is_keeper_map_table(query_ptr); + { + if (is_keeper_map_table(query_ptr)) + return false; + + /// If there is only 1 shard then there is no need to replicate DELETE query. + auto current_cluster = tryGetCluster(); + return + !current_cluster || /// Couldn't get the cluster, so we don't know how many shards there are. + current_cluster->getShardsInfo().size() > 1; + } return true; } diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 6bbd7c26bec..5d75f3b70e5 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include namespace fs = std::filesystem; diff --git a/src/Disks/IDiskTransaction.h b/src/Disks/IDiskTransaction.h index f0c32e04f48..935cd6b2c65 100644 --- a/src/Disks/IDiskTransaction.h +++ b/src/Disks/IDiskTransaction.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp similarity index 72% rename from src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp rename to src/Disks/IO/AsynchronousBoundedReadBuffer.cpp index 24b7042e459..611fd00a023 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.cpp @@ -1,4 +1,4 @@ -#include "AsynchronousReadIndirectBufferFromRemoteFS.h" +#include "AsynchronousBoundedReadBuffer.h" #include #include @@ -43,105 +43,77 @@ namespace ErrorCodes } -AsynchronousReadIndirectBufferFromRemoteFS::AsynchronousReadIndirectBufferFromRemoteFS( +AsynchronousBoundedReadBuffer::AsynchronousBoundedReadBuffer( + ImplPtr impl_, IAsynchronousReader & reader_, const ReadSettings & settings_, - std::shared_ptr impl_, - std::shared_ptr async_read_counters_, - std::shared_ptr prefetches_log_) + AsyncReadCountersPtr async_read_counters_, + FilesystemReadPrefetchesLogPtr prefetches_log_) : ReadBufferFromFileBase(settings_.remote_fs_buffer_size, nullptr, 0) + , impl(std::move(impl_)) , read_settings(settings_) , reader(reader_) - , base_priority(settings_.priority) - , impl(impl_) , prefetch_buffer(settings_.prefetch_buffer_size) - , query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr - ? CurrentThread::getQueryId() : "") + , query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr ? CurrentThread::getQueryId() : "") , current_reader_id(getRandomASCIIString(8)) -#ifndef NDEBUG - , log(&Poco::Logger::get("AsynchronousBufferFromRemoteFS")) -#else - , log(&Poco::Logger::get("AsyncBuffer(" + impl->getFileName() + ")")) -#endif + , log(&Poco::Logger::get("AsynchronousBoundedReadBuffer")) , async_read_counters(async_read_counters_) , prefetches_log(prefetches_log_) { ProfileEvents::increment(ProfileEvents::RemoteFSBuffers); } -String AsynchronousReadIndirectBufferFromRemoteFS::getFileName() const +bool AsynchronousBoundedReadBuffer::hasPendingDataToRead() { - return impl->getFileName(); -} - - -String AsynchronousReadIndirectBufferFromRemoteFS::getInfoForLog() -{ - return impl->getInfoForLog(); -} - -size_t AsynchronousReadIndirectBufferFromRemoteFS::getFileSize() -{ - return impl->getFileSize(); -} - -bool AsynchronousReadIndirectBufferFromRemoteFS::hasPendingDataToRead() -{ - /** - * Note: read_until_position here can be std::nullopt only for non-MergeTree tables. - * For mergeTree tables it must be guaranteed that setReadUntilPosition() or - * setReadUntilEnd() is called before any read or prefetch. - * setReadUntilEnd() always sets read_until_position to file size. - * setReadUntilPosition(pos) always has pos > 0, because if - * right_offset_in_compressed_file is 0, then setReadUntilEnd() is used. - */ if (read_until_position) { - /// Everything is already read. - if (file_offset_of_buffer_end == *read_until_position) + if (file_offset_of_buffer_end == *read_until_position) /// Everything is already read. return false; if (file_offset_of_buffer_end > *read_until_position) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Read beyond last offset ({} > {}, info: {})", - file_offset_of_buffer_end, *read_until_position, impl->getInfoForLog()); + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Read beyond last offset ({} > {}, info: {})", + file_offset_of_buffer_end, *read_until_position, impl->getInfoForLog()); + } } return true; } - -std::future AsynchronousReadIndirectBufferFromRemoteFS::asyncReadInto(char * data, size_t size, int64_t priority) +std::future +AsynchronousBoundedReadBuffer::asyncReadInto(char * data, size_t size, int64_t priority) { IAsynchronousReader::Request request; request.descriptor = std::make_shared(*impl, async_read_counters); request.buf = data; request.size = size; request.offset = file_offset_of_buffer_end; - request.priority = base_priority + priority; + request.priority = read_settings.priority + priority; request.ignore = bytes_to_ignore; return reader.submit(request); } - -void AsynchronousReadIndirectBufferFromRemoteFS::prefetch(int64_t priority) +void AsynchronousBoundedReadBuffer::prefetch(int64_t priority) { if (prefetch_future.valid()) return; - /// Check boundary, which was set in readUntilPosition(). if (!hasPendingDataToRead()) return; - last_prefetch_info.submit_time = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + last_prefetch_info.submit_time = std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()).count(); last_prefetch_info.priority = priority; - /// Prefetch even in case hasPendingData() == true. - chassert(prefetch_buffer.size() == read_settings.prefetch_buffer_size || prefetch_buffer.size() == read_settings.remote_fs_buffer_size); + chassert(prefetch_buffer.size() == read_settings.prefetch_buffer_size + || prefetch_buffer.size() == read_settings.remote_fs_buffer_size); prefetch_future = asyncReadInto(prefetch_buffer.data(), prefetch_buffer.size(), priority); ProfileEvents::increment(ProfileEvents::RemoteFSPrefetches); } -void AsynchronousReadIndirectBufferFromRemoteFS::setReadUntilPosition(size_t position) +void AsynchronousBoundedReadBuffer::setReadUntilPosition(size_t position) { if (!read_until_position || position != *read_until_position) { @@ -157,21 +129,16 @@ void AsynchronousReadIndirectBufferFromRemoteFS::setReadUntilPosition(size_t pos } } - -void AsynchronousReadIndirectBufferFromRemoteFS::setReadUntilEnd() +void AsynchronousBoundedReadBuffer::appendToPrefetchLog( + FilesystemPrefetchState state, + int64_t size, + const std::unique_ptr & execution_watch) { - setReadUntilPosition(impl->getFileSize()); -} - - -void AsynchronousReadIndirectBufferFromRemoteFS::appendToPrefetchLog(FilesystemPrefetchState state, int64_t size, const std::unique_ptr & execution_watch) -{ - const auto & object = impl->getCurrentObject(); FilesystemReadPrefetchesLogElement elem { .event_time = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()), .query_id = query_id, - .path = object.local_path, + .path = impl->getFileName(), .offset = file_offset_of_buffer_end, .size = size, .prefetch_submit_time = last_prefetch_info.submit_time, @@ -187,7 +154,7 @@ void AsynchronousReadIndirectBufferFromRemoteFS::appendToPrefetchLog(FilesystemP } -bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() +bool AsynchronousBoundedReadBuffer::nextImpl() { if (!hasPendingDataToRead()) return false; @@ -245,14 +212,14 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() /// In case of multiple files for the same file in clickhouse (i.e. log family) /// file_offset_of_buffer_end will not match getImplementationBufferOffset() /// so we use [impl->getImplementationBufferOffset(), impl->getFileSize()] - chassert(file_offset_of_buffer_end >= impl->getImplementationBufferOffset()); + chassert(file_offset_of_buffer_end >= impl->getFileOffsetOfBufferEnd()); chassert(file_offset_of_buffer_end <= impl->getFileSize()); return bytes_read; } -off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset, int whence) +off_t AsynchronousBoundedReadBuffer::seek(off_t offset, int whence) { ProfileEvents::increment(ProfileEvents::RemoteFSSeeks); @@ -268,7 +235,7 @@ off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset, int whence) } else { - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "ReadBufferFromFileDescriptor::seek expects SEEK_SET or SEEK_CUR as whence"); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Expected SEEK_SET or SEEK_CUR as whence"); } /// Position is unchanged. @@ -322,9 +289,8 @@ off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset, int whence) if (read_until_position && new_pos > *read_until_position) { ProfileEvents::increment(ProfileEvents::RemoteFSSeeksWithReset); - impl->reset(); - file_offset_of_buffer_end = new_pos = *read_until_position; /// read_until_position is a non-included boundary. + impl->seek(file_offset_of_buffer_end, SEEK_SET); return new_pos; } @@ -332,8 +298,7 @@ off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset, int whence) * Lazy ignore. Save number of bytes to ignore and ignore it either for prefetch buffer or current buffer. * Note: we read in range [file_offset_of_buffer_end, read_until_position). */ - if (impl->initialized() - && read_until_position && new_pos < *read_until_position + if (read_until_position && new_pos < *read_until_position && new_pos > file_offset_of_buffer_end && new_pos < file_offset_of_buffer_end + read_settings.remote_read_min_bytes_for_seek) { @@ -342,31 +307,21 @@ off_t AsynchronousReadIndirectBufferFromRemoteFS::seek(off_t offset, int whence) } else { - if (impl->initialized()) - { - ProfileEvents::increment(ProfileEvents::RemoteFSSeeksWithReset); - impl->reset(); - } + ProfileEvents::increment(ProfileEvents::RemoteFSSeeksWithReset); file_offset_of_buffer_end = new_pos; + impl->seek(file_offset_of_buffer_end, SEEK_SET); } return new_pos; } -off_t AsynchronousReadIndirectBufferFromRemoteFS::getPosition() -{ - return file_offset_of_buffer_end - available() + bytes_to_ignore; -} - - -void AsynchronousReadIndirectBufferFromRemoteFS::finalize() +void AsynchronousBoundedReadBuffer::finalize() { resetPrefetch(FilesystemPrefetchState::UNNEEDED); } - -AsynchronousReadIndirectBufferFromRemoteFS::~AsynchronousReadIndirectBufferFromRemoteFS() +AsynchronousBoundedReadBuffer::~AsynchronousBoundedReadBuffer() { try { @@ -378,7 +333,7 @@ AsynchronousReadIndirectBufferFromRemoteFS::~AsynchronousReadIndirectBufferFromR } } -void AsynchronousReadIndirectBufferFromRemoteFS::resetPrefetch(FilesystemPrefetchState state) +void AsynchronousBoundedReadBuffer::resetPrefetch(FilesystemPrefetchState state) { if (!prefetch_future.valid()) return; diff --git a/src/Disks/IO/AsynchronousBoundedReadBuffer.h b/src/Disks/IO/AsynchronousBoundedReadBuffer.h new file mode 100644 index 00000000000..45256cdfac2 --- /dev/null +++ b/src/Disks/IO/AsynchronousBoundedReadBuffer.h @@ -0,0 +1,96 @@ +#pragma once + +#include "config.h" +#include +#include +#include +#include +#include + +namespace Poco { class Logger; } + +namespace DB +{ + +struct AsyncReadCounters; +using AsyncReadCountersPtr = std::shared_ptr; +class ReadBufferFromRemoteFSGather; + +class AsynchronousBoundedReadBuffer : public ReadBufferFromFileBase +{ +public: + using Impl = ReadBufferFromFileBase; + using ImplPtr = std::unique_ptr; + + explicit AsynchronousBoundedReadBuffer( + ImplPtr impl_, + IAsynchronousReader & reader_, + const ReadSettings & settings_, + AsyncReadCountersPtr async_read_counters_ = nullptr, + FilesystemReadPrefetchesLogPtr prefetches_log_ = nullptr); + + ~AsynchronousBoundedReadBuffer() override; + + String getFileName() const override { return impl->getFileName(); } + + size_t getFileSize() override { return impl->getFileSize(); } + + String getInfoForLog() override { return impl->getInfoForLog(); } + + off_t seek(off_t offset_, int whence) override; + + void prefetch(int64_t priority) override; + + void setReadUntilPosition(size_t position) override; /// [..., position). + + void setReadUntilEnd() override { return setReadUntilPosition(getFileSize()); } + + off_t getPosition() override { return file_offset_of_buffer_end - available() + bytes_to_ignore; } + +private: + const ImplPtr impl; + const ReadSettings read_settings; + IAsynchronousReader & reader; + + size_t file_offset_of_buffer_end = 0; + std::optional read_until_position; + /// If nonzero then working_buffer is empty. + /// If a prefetch is in flight, the prefetch task has been instructed to ignore this many bytes. + size_t bytes_to_ignore = 0; + + Memory<> prefetch_buffer; + std::future prefetch_future; + + const std::string query_id; + const std::string current_reader_id; + + Poco::Logger * log; + + AsyncReadCountersPtr async_read_counters; + FilesystemReadPrefetchesLogPtr prefetches_log; + + struct LastPrefetchInfo + { + UInt64 submit_time = 0; + size_t priority = 0; + }; + LastPrefetchInfo last_prefetch_info; + + bool nextImpl() override; + + void finalize(); + + bool hasPendingDataToRead(); + + void appendToPrefetchLog( + FilesystemPrefetchState state, + int64_t size, + const std::unique_ptr & execution_watch); + + std::future asyncReadInto(char * data, size_t size, int64_t priority); + + void resetPrefetch(FilesystemPrefetchState state); + +}; + +} diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h deleted file mode 100644 index e8fb3fe248b..00000000000 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h +++ /dev/null @@ -1,111 +0,0 @@ -#pragma once - -#include "config.h" -#include -#include -#include -#include -#include - -namespace Poco { class Logger; } - -namespace DB -{ - -struct AsyncReadCounters; -class ReadBufferFromRemoteFSGather; - -/** - * Reads data from S3/HDFS/Web using stored paths in metadata. -* This class is an asynchronous version of ReadIndirectBufferFromRemoteFS. -* -* Buffers chain for diskS3: -* AsynchronousIndirectReadBufferFromRemoteFS -> ReadBufferFromRemoteFS -> -* -> ReadBufferFromS3 -> ReadBufferFromIStream. -* -* Buffers chain for diskWeb: -* AsynchronousIndirectReadBufferFromRemoteFS -> ReadBufferFromRemoteFS -> -* -> ReadIndirectBufferFromWebServer -> ReadBufferFromHTTP -> ReadBufferFromIStream. -* -* We pass either `memory` or `prefetch_buffer` through all this chain and return it back. -*/ -class AsynchronousReadIndirectBufferFromRemoteFS : public ReadBufferFromFileBase -{ -public: - explicit AsynchronousReadIndirectBufferFromRemoteFS( - IAsynchronousReader & reader_, const ReadSettings & settings_, - std::shared_ptr impl_, - std::shared_ptr async_read_counters_, - std::shared_ptr prefetches_log_); - - ~AsynchronousReadIndirectBufferFromRemoteFS() override; - - off_t seek(off_t offset_, int whence) override; - - off_t getPosition() override; - - String getFileName() const override; - - void prefetch(int64_t priority) override; - - void setReadUntilPosition(size_t position) override; /// [..., position). - - void setReadUntilEnd() override; - - String getInfoForLog() override; - - size_t getFileSize() override; - - bool isIntegratedWithFilesystemCache() const override { return true; } - -private: - bool nextImpl() override; - - void finalize(); - - bool hasPendingDataToRead(); - - void appendToPrefetchLog(FilesystemPrefetchState state, int64_t size, const std::unique_ptr & execution_watch); - - std::future asyncReadInto(char * data, size_t size, int64_t priority); - - void resetPrefetch(FilesystemPrefetchState state); - - ReadSettings read_settings; - - IAsynchronousReader & reader; - - int64_t base_priority; - - std::shared_ptr impl; - - std::future prefetch_future; - - size_t file_offset_of_buffer_end = 0; - - Memory<> prefetch_buffer; - - std::string query_id; - - std::string current_reader_id; - - /// If nonzero then working_buffer is empty. - /// If a prefetch is in flight, the prefetch task has been instructed to ignore this many bytes. - size_t bytes_to_ignore = 0; - - std::optional read_until_position; - - Poco::Logger * log; - - std::shared_ptr async_read_counters; - std::shared_ptr prefetches_log; - - struct LastPrefetchInfo - { - UInt64 submit_time = 0; - size_t priority = 0; - }; - LastPrefetchInfo last_prefetch_info; -}; - -} diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 68b5a9c9d96..12fbbbcf747 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -12,22 +13,24 @@ namespace DB { +namespace ErrorCodes +{ + extern const int CANNOT_SEEK_THROUGH_FILE; +} ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather( ReadBufferCreator && read_buffer_creator_, const StoredObjects & blobs_to_read_, const ReadSettings & settings_, std::shared_ptr cache_log_) - : ReadBuffer(nullptr, 0) - , read_buffer_creator(std::move(read_buffer_creator_)) - , blobs_to_read(blobs_to_read_) + : ReadBufferFromFileBase(0, nullptr, 0) , settings(settings_) + , blobs_to_read(blobs_to_read_) + , read_buffer_creator(std::move(read_buffer_creator_)) + , cache_log(settings.enable_filesystem_cache_log ? cache_log_ : nullptr) , query_id(CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() != nullptr ? CurrentThread::getQueryId() : "") , log(&Poco::Logger::get("ReadBufferFromRemoteFSGather")) { - if (cache_log_ && settings.enable_filesystem_cache_log) - cache_log = cache_log_; - if (!blobs_to_read.empty()) current_object = blobs_to_read.front(); @@ -38,13 +41,12 @@ ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather( SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(const StoredObject & object) { - if (current_buf != nullptr && !with_cache) + if (current_buf && !with_cache) { - appendFilesystemCacheLog(); + appendUncachedReadInfo(); } current_object = object; - total_bytes_read_from_current_file = 0; const auto & object_path = object.remote_path; size_t current_read_until_position = read_until_position ? read_until_position : object.bytes_size; @@ -70,7 +72,7 @@ SeekableReadBufferPtr ReadBufferFromRemoteFSGather::createImplementationBuffer(c return current_read_buffer_creator(); } -void ReadBufferFromRemoteFSGather::appendFilesystemCacheLog() +void ReadBufferFromRemoteFSGather::appendUncachedReadInfo() { if (!cache_log || current_object.remote_path.empty()) return; @@ -82,7 +84,7 @@ void ReadBufferFromRemoteFSGather::appendFilesystemCacheLog() .source_file_path = current_object.remote_path, .file_segment_range = { 0, current_object.bytes_size }, .cache_type = FilesystemCacheLogElement::CacheType::READ_FROM_FS_BYPASSING_CACHE, - .file_segment_size = total_bytes_read_from_current_file, + .file_segment_size = current_object.bytes_size, .read_from_cache_attempted = false, }; cache_log->add(elem); @@ -174,7 +176,7 @@ bool ReadBufferFromRemoteFSGather::moveToNextBuffer() bool ReadBufferFromRemoteFSGather::readImpl() { - swap(*current_buf); + SwapHelper swap(*this, *current_buf); bool result = false; @@ -185,7 +187,6 @@ bool ReadBufferFromRemoteFSGather::readImpl() */ if (bytes_to_ignore) { - total_bytes_read_from_current_file += bytes_to_ignore; current_buf->ignore(bytes_to_ignore); result = current_buf->hasPendingData(); file_offset_of_buffer_end += bytes_to_ignore; @@ -205,57 +206,41 @@ bool ReadBufferFromRemoteFSGather::readImpl() file_offset_of_buffer_end += current_buf->available(); } - swap(*current_buf); - /// Required for non-async reads. if (result) { - assert(available()); - nextimpl_working_buffer_offset = offset(); - total_bytes_read_from_current_file += available(); + assert(current_buf->available()); + nextimpl_working_buffer_offset = current_buf->offset(); } return result; } -size_t ReadBufferFromRemoteFSGather::getFileOffsetOfBufferEnd() const -{ - return file_offset_of_buffer_end; -} - void ReadBufferFromRemoteFSGather::setReadUntilPosition(size_t position) { - if (position != read_until_position) - { - read_until_position = position; - reset(); - } + if (position == read_until_position) + return; + + reset(); + read_until_position = position; } void ReadBufferFromRemoteFSGather::reset() { + current_object = {}; + current_buf_idx = {}; current_buf.reset(); + bytes_to_ignore = 0; } -String ReadBufferFromRemoteFSGather::getFileName() const +off_t ReadBufferFromRemoteFSGather::seek(off_t offset, int whence) { - return current_object.remote_path; -} + if (whence != SEEK_SET) + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only seeking with SEEK_SET is allowed"); -size_t ReadBufferFromRemoteFSGather::getFileSize() const -{ - size_t size = 0; - for (const auto & object : blobs_to_read) - size += object.bytes_size; - return size; -} - -String ReadBufferFromRemoteFSGather::getInfoForLog() -{ - if (!current_buf) - return ""; - - return current_buf->getInfoForLog(); + reset(); + file_offset_of_buffer_end = offset; + return file_offset_of_buffer_end; } size_t ReadBufferFromRemoteFSGather::getImplementationBufferOffset() const @@ -269,7 +254,7 @@ size_t ReadBufferFromRemoteFSGather::getImplementationBufferOffset() const ReadBufferFromRemoteFSGather::~ReadBufferFromRemoteFSGather() { if (!with_cache) - appendFilesystemCacheLog(); + appendUncachedReadInfo(); } } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index 8c55f747e5b..39b81d6f9ac 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -10,12 +10,13 @@ namespace Poco { class Logger; } namespace DB { +class FilesystemCacheLog; /** * Remote disk might need to split one clickhouse file into multiple files in remote fs. * This class works like a proxy to allow transition from one file into multiple. */ -class ReadBufferFromRemoteFSGather final : public ReadBuffer +class ReadBufferFromRemoteFSGather final : public ReadBufferFromFileBase { friend class ReadIndirectBufferFromRemoteFS; @@ -30,25 +31,25 @@ public: ~ReadBufferFromRemoteFSGather() override; - String getFileName() const; + String getFileName() const override { return current_object.remote_path; } - void reset(); + String getInfoForLog() override { return current_buf ? current_buf->getInfoForLog() : ""; } void setReadUntilPosition(size_t position) override; IAsynchronousReader::Result readInto(char * data, size_t size, size_t offset, size_t ignore) override; - size_t getFileSize() const; + size_t getFileSize() override { return getTotalSize(blobs_to_read); } - size_t getFileOffsetOfBufferEnd() const; + size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; } bool initialized() const { return current_buf != nullptr; } - String getInfoForLog(); - size_t getImplementationBufferOffset() const; - const StoredObject & getCurrentObject() const { return current_object; } + off_t seek(off_t offset, int whence) override; + + off_t getPosition() override { return file_offset_of_buffer_end - available() + bytes_to_ignore; } private: SeekableReadBufferPtr createImplementationBuffer(const StoredObject & object); @@ -61,40 +62,26 @@ private: bool moveToNextBuffer(); - void appendFilesystemCacheLog(); + void appendUncachedReadInfo(); - ReadBufferCreator read_buffer_creator; - - StoredObjects blobs_to_read; - - ReadSettings settings; - - size_t read_until_position = 0; - - StoredObject current_object; + void reset(); + const ReadSettings settings; + const StoredObjects blobs_to_read; + const ReadBufferCreator read_buffer_creator; + const std::shared_ptr cache_log; + const String query_id; bool with_cache; - String query_id; - - Poco::Logger * log; - - SeekableReadBufferPtr current_buf; - - size_t current_buf_idx = 0; - + size_t read_until_position = 0; size_t file_offset_of_buffer_end = 0; - - /** - * File: |___________________| - * Buffer: |~~~~~~~| - * file_offset_of_buffer_end: ^ - */ size_t bytes_to_ignore = 0; - size_t total_bytes_read_from_current_file = 0; + StoredObject current_object; + size_t current_buf_idx = 0; + SeekableReadBufferPtr current_buf; - std::shared_ptr cache_log; + Poco::Logger * log; }; } diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp index 8a33a6ce9a1..a559b47f2cc 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp @@ -82,7 +82,7 @@ off_t ReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence) else throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET or SEEK_CUR modes are allowed."); - impl->reset(); + impl->seek(impl->file_offset_of_buffer_end, SEEK_SET); resetWorkingBuffer(); file_offset_of_buffer_end = impl->file_offset_of_buffer_end; diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h index 0424c2e56d3..19647b1fa39 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h @@ -31,8 +31,6 @@ public: void setReadUntilEnd() override; - bool isIntegratedWithFilesystemCache() const override { return true; } - size_t getFileSize() override; private: diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp index 1d0eca0ddd4..c4dd0161c70 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -112,8 +113,8 @@ std::unique_ptr AzureObjectStorage::readObjects( /// NOL if (disk_read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { auto & reader = global_context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); - return std::make_unique( - reader, disk_read_settings, std::move(reader_impl), + return std::make_unique( + std::move(reader_impl), reader, disk_read_settings, global_context->getAsyncReadCounters(), global_context->getFilesystemReadPrefetchesLog()); } diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h index 276c992e4d7..0c2aecd5c62 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h +++ b/src/Disks/ObjectStorages/AzureBlobStorage/AzureObjectStorage.h @@ -5,7 +5,6 @@ #include #include -#include #include #include #include diff --git a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h index 19011a04722..cb8d9b8a5af 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp index c0e4b5ec3d9..a3092bc6f12 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.cpp @@ -7,7 +7,6 @@ #include #include -#include #include #include #include diff --git a/src/Disks/ObjectStorages/IObjectStorage.cpp b/src/Disks/ObjectStorages/IObjectStorage.cpp index 1ee55a7b342..a810db0cdf8 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.cpp +++ b/src/Disks/ObjectStorages/IObjectStorage.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index e875cb7260e..8babb2fbf1a 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -12,12 +12,14 @@ #include #include #include +#include -#include #include #include #include #include +#include +#include namespace DB diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index e1e90df591b..05c0c8f3961 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -63,12 +64,12 @@ std::unique_ptr LocalObjectStorage::readObjects( /// NOL global_context->getFilesystemCacheLog()); /// We use `remove_fs_method` (not `local_fs_method`) because we are about to use - /// AsynchronousReadIndirectBufferFromRemoteFS which works by the remote_fs_* settings. + /// AsynchronousBoundedReadBuffer which works by the remote_fs_* settings. if (modified_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { auto & reader = global_context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); - return std::make_unique( - reader, modified_settings, std::move(impl), + return std::make_unique( + std::move(impl), reader, modified_settings, global_context->getAsyncReadCounters(), global_context->getFilesystemReadPrefetchesLog()); } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index f7eb8d00ff7..79e9e1141bb 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include @@ -127,8 +127,8 @@ std::unique_ptr S3ObjectStorage::readObjects( /// NOLINT if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { auto & reader = global_context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); - return std::make_unique( - reader, disk_read_settings, std::move(s3_impl), + return std::make_unique( + std::move(s3_impl), reader, disk_read_settings, global_context->getAsyncReadCounters(), global_context->getFilesystemReadPrefetchesLog()); } diff --git a/src/Disks/ObjectStorages/StoredObject.cpp b/src/Disks/ObjectStorages/StoredObject.cpp new file mode 100644 index 00000000000..6a363c64107 --- /dev/null +++ b/src/Disks/ObjectStorages/StoredObject.cpp @@ -0,0 +1,14 @@ +#include + +namespace DB +{ + +size_t getTotalSize(const StoredObjects & objects) +{ + size_t size = 0; + for (const auto & object : objects) + size += object.bytes_size; + return size; +} + +} diff --git a/src/Disks/ObjectStorages/StoredObject.h b/src/Disks/ObjectStorages/StoredObject.h index 94c9fd0946d..8afbb116a83 100644 --- a/src/Disks/ObjectStorages/StoredObject.h +++ b/src/Disks/ObjectStorages/StoredObject.h @@ -29,4 +29,6 @@ struct StoredObject using StoredObjects = std::vector; +size_t getTotalSize(const StoredObjects & objects); + } diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index 4ff18383713..4f34f3eed9c 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -189,8 +190,8 @@ std::unique_ptr WebObjectStorage::readObject( /// NOLINT if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { auto & reader = global_context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); - return std::make_unique( - reader, read_settings, std::move(web_impl), + return std::make_unique( + std::move(web_impl), reader, read_settings, global_context->getAsyncReadCounters(), global_context->getFilesystemReadPrefetchesLog()); } diff --git a/src/IO/AsyncReadCounters.h b/src/IO/AsyncReadCounters.h index 1d941b4fc47..1f84b2a214b 100644 --- a/src/IO/AsyncReadCounters.h +++ b/src/IO/AsyncReadCounters.h @@ -27,5 +27,6 @@ struct AsyncReadCounters void dumpToMapColumn(IColumn * column) const; }; +using AsyncReadCountersPtr = std::shared_ptr; } diff --git a/src/IO/ReadBufferFromFileDecorator.h b/src/IO/ReadBufferFromFileDecorator.h index 1d035e8d74b..6e62c7f741b 100644 --- a/src/IO/ReadBufferFromFileDecorator.h +++ b/src/IO/ReadBufferFromFileDecorator.h @@ -27,8 +27,6 @@ public: ReadBuffer & getWrappedReadBuffer() { return *impl; } - bool isIntegratedWithFilesystemCache() const override { return impl->isIntegratedWithFilesystemCache(); } - size_t getFileSize() override; protected: diff --git a/src/IO/SeekableReadBuffer.h b/src/IO/SeekableReadBuffer.h index b055aa57975..736ab5bbc71 100644 --- a/src/IO/SeekableReadBuffer.h +++ b/src/IO/SeekableReadBuffer.h @@ -49,8 +49,6 @@ public: /// If true, setReadUntilPosition() guarantees that eof will be reported at the given position. virtual bool supportsRightBoundedReads() const { return false; } - virtual bool isIntegratedWithFilesystemCache() const { return false; } - /// Returns true if seek() actually works, false if seek() will always throw (or make subsequent /// nextImpl() calls throw). /// diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 5ec12d7e0ea..c594feb73c2 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1386,6 +1386,20 @@ void Context::addQueryAccessInfo( query_access_info.views.emplace(view_name); } +void Context::addQueryAccessInfo(const Names & partition_names) +{ + if (isGlobalContext()) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have query access info"); + } + + std::lock_guard lock(query_access_info.mutex); + for (const auto & partition_name : partition_names) + { + query_access_info.partitions.emplace(partition_name); + } +} + void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const { if (isGlobalContext()) @@ -2796,11 +2810,7 @@ zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const std::map Context::getAuxiliaryZooKeepers() const { std::lock_guard lock(shared->auxiliary_zookeepers_mutex); - - if (!shared->auxiliary_zookeepers.empty()) - return shared->auxiliary_zookeepers; - else - return std::map(); + return shared->auxiliary_zookeepers; } #if USE_ROCKSDB @@ -4314,7 +4324,7 @@ Context::ParallelReplicasMode Context::getParallelReplicasMode() const if (!settings_.parallel_replicas_custom_key.value.empty()) return CUSTOM_KEY; - if (settings_.allow_experimental_parallel_reading_from_replicas + if (settings_.allow_experimental_parallel_reading_from_replicas > 0 && !settings_.use_hedged_requests) return READ_TASKS; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 3862984bb6f..fcf035cefca 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -295,6 +295,7 @@ private: databases = rhs.databases; tables = rhs.tables; columns = rhs.columns; + partitions = rhs.partitions; projections = rhs.projections; views = rhs.views; } @@ -312,6 +313,7 @@ private: std::swap(databases, rhs.databases); std::swap(tables, rhs.tables); std::swap(columns, rhs.columns); + std::swap(partitions, rhs.partitions); std::swap(projections, rhs.projections); std::swap(views, rhs.views); } @@ -321,6 +323,7 @@ private: std::set databases{}; std::set tables{}; std::set columns{}; + std::set partitions{}; std::set projections{}; std::set views{}; }; @@ -629,6 +632,7 @@ public: const Names & column_names, const String & projection_name = {}, const String & view_name = {}); + void addQueryAccessInfo(const Names & partition_names); /// Supported factories for records in query_log diff --git a/src/Interpreters/FilesystemReadPrefetchesLog.h b/src/Interpreters/FilesystemReadPrefetchesLog.h index a7672c49d91..7052cf2769d 100644 --- a/src/Interpreters/FilesystemReadPrefetchesLog.h +++ b/src/Interpreters/FilesystemReadPrefetchesLog.h @@ -45,4 +45,6 @@ public: using SystemLog::SystemLog; }; +using FilesystemReadPrefetchesLogPtr = std::shared_ptr; + } diff --git a/src/Interpreters/FillingRow.cpp b/src/Interpreters/FillingRow.cpp index 05795842902..7efbb62b0d2 100644 --- a/src/Interpreters/FillingRow.cpp +++ b/src/Interpreters/FillingRow.cpp @@ -50,7 +50,16 @@ bool FillingRow::operator>=(const FillingRow & other) const return !(*this < other); } -bool FillingRow::next(const FillingRow & to_row) +bool FillingRow::isNull() const +{ + for (const auto & field : row) + if (!field.isNull()) + return false; + + return true; +} + +std::pair FillingRow::next(const FillingRow & to_row) { const size_t row_size = size(); size_t pos = 0; @@ -61,22 +70,24 @@ bool FillingRow::next(const FillingRow & to_row) break; if (pos == row_size || less(to_row.row[pos], row[pos], getDirection(pos))) - return false; + return {false, false}; /// If we have any 'fill_to' value at position greater than 'pos', /// we need to generate rows up to 'fill_to' value. for (size_t i = row_size - 1; i > pos; --i) { - if (getFillDescription(i).fill_to.isNull() || row[i].isNull()) + auto & fill_column_desc = getFillDescription(i); + + if (fill_column_desc.fill_to.isNull() || row[i].isNull()) continue; - auto next_value = row[i]; - getFillDescription(i).step_func(next_value); - if (less(next_value, getFillDescription(i).fill_to, getDirection(i))) + Field next_value = row[i]; + fill_column_desc.step_func(next_value); + if (less(next_value, fill_column_desc.fill_to, getDirection(i))) { row[i] = next_value; initFromDefaults(i + 1); - return true; + return {true, true}; } } @@ -84,14 +95,13 @@ bool FillingRow::next(const FillingRow & to_row) getFillDescription(pos).step_func(next_value); if (less(to_row.row[pos], next_value, getDirection(pos)) || equals(next_value, getFillDescription(pos).fill_to)) - return false; + return {false, false}; row[pos] = next_value; if (equals(row[pos], to_row.row[pos])) { bool is_less = false; - size_t i = pos + 1; - for (; i < row_size; ++i) + for (size_t i = pos + 1; i < row_size; ++i) { const auto & fill_from = getFillDescription(i).fill_from; if (!fill_from.isNull()) @@ -101,11 +111,11 @@ bool FillingRow::next(const FillingRow & to_row) is_less |= less(row[i], to_row.row[i], getDirection(i)); } - return is_less; + return {is_less, true}; } initFromDefaults(pos + 1); - return true; + return {true, true}; } void FillingRow::initFromDefaults(size_t from_pos) diff --git a/src/Interpreters/FillingRow.h b/src/Interpreters/FillingRow.h index c56bd875151..004b417542c 100644 --- a/src/Interpreters/FillingRow.h +++ b/src/Interpreters/FillingRow.h @@ -19,7 +19,10 @@ public: explicit FillingRow(const SortDescription & sort_description); /// Generates next row according to fill 'from', 'to' and 'step' values. - bool next(const FillingRow & to_row); + /// Return pair of boolean + /// apply - true if filling values should be inserted into result set + /// value_changed - true if filling row value was changed + std::pair next(const FillingRow & to_row); void initFromDefaults(size_t from_pos = 0); @@ -29,9 +32,11 @@ public: bool operator<(const FillingRow & other) const; bool operator==(const FillingRow & other) const; bool operator>=(const FillingRow & other) const; + bool isNull() const; int getDirection(size_t index) const { return sort_description[index].direction; } FillColumnDescription & getFillDescription(size_t index) { return sort_description[index].fill_description; } + const FillColumnDescription & getFillDescription(size_t index) const { return sort_description[index].fill_description; } String dump() const; diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 4a4c69ff473..f54ee9d85c7 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -571,7 +571,13 @@ IBlocksStreamPtr GraceHashJoin::getDelayedBlocks() size_t bucket_idx = current_bucket->idx; - hash_join = makeInMemoryJoin(); + size_t prev_keys_num = 0; + // If there is only one bucket, don't take this check. + if (hash_join && buckets.size() > 1) + { + // Use previous hash_join's keys number to estimate next hash_join's size is reasonable. + prev_keys_num = hash_join->getTotalRowCount(); + } for (bucket_idx = bucket_idx + 1; bucket_idx < buckets.size(); ++bucket_idx) { @@ -585,6 +591,7 @@ IBlocksStreamPtr GraceHashJoin::getDelayedBlocks() continue; } + hash_join = makeInMemoryJoin(prev_keys_num); auto right_reader = current_bucket->startJoining(); size_t num_rows = 0; /// count rows that were written and rehashed while (Block block = right_reader.read()) @@ -604,9 +611,9 @@ IBlocksStreamPtr GraceHashJoin::getDelayedBlocks() return nullptr; } -GraceHashJoin::InMemoryJoinPtr GraceHashJoin::makeInMemoryJoin() +GraceHashJoin::InMemoryJoinPtr GraceHashJoin::makeInMemoryJoin(size_t reserve_num) { - return std::make_unique(table_join, right_sample_block, any_take_last_row); + return std::make_unique(table_join, right_sample_block, any_take_last_row, reserve_num); } Block GraceHashJoin::prepareRightBlock(const Block & block) @@ -646,6 +653,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) if (!current_block.rows()) return; } + auto prev_keys_num = hash_join->getTotalRowCount(); hash_join->addJoinedBlock(current_block, /* check_limits = */ false); if (!hasMemoryOverflow(hash_join)) @@ -654,7 +662,6 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) current_block = {}; auto right_blocks = hash_join->releaseJoinedBlocks(/* restructure */ false); - hash_join = nullptr; buckets_snapshot = rehashBuckets(buckets_snapshot.size() * 2); @@ -674,7 +681,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) current_block = concatenateBlocks(current_blocks); } - hash_join = makeInMemoryJoin(); + hash_join = makeInMemoryJoin(prev_keys_num); if (current_block.rows() > 0) hash_join->addJoinedBlock(current_block, /* check_limits = */ false); diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index eb39ee09208..ec611f373ed 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -90,7 +90,8 @@ public: private: void initBuckets(); /// Create empty join for in-memory processing. - InMemoryJoinPtr makeInMemoryJoin(); + /// reserve_num for reserving space in hash table. + InMemoryJoinPtr makeInMemoryJoin(size_t reserve_num = 0); /// Add right table block to the @join. Calls @rehash on overflow. void addJoinedBlockImpl(Block block); diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 0af33a8bd20..146b57049a6 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -217,7 +217,7 @@ static void correctNullabilityInplace(ColumnWithTypeAndName & column, bool nulla JoinCommon::removeColumnNullability(column); } -HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_sample_block_, bool any_take_last_row_) +HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_sample_block_, bool any_take_last_row_, size_t reserve_num) : table_join(table_join_) , kind(table_join->kind()) , strictness(table_join->strictness()) @@ -302,7 +302,7 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s } for (auto & maps : data->maps) - dataMapInit(maps); + dataMapInit(maps, reserve_num); } HashJoin::Type HashJoin::chooseMethod(JoinKind kind, const ColumnRawPtrs & key_columns, Sizes & key_sizes) @@ -454,13 +454,15 @@ struct KeyGetterForType using Type = typename KeyGetterForTypeImpl::Type; }; -void HashJoin::dataMapInit(MapsVariant & map) +void HashJoin::dataMapInit(MapsVariant & map, size_t reserve_num) { if (kind == JoinKind::Cross) return; joinDispatchInit(kind, strictness, map); joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.create(data->type); }); + if (reserve_num) + joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { map_.reserve(data->type, reserve_num); }); } bool HashJoin::empty() const diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 50eda4482bd..58e47432d41 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -146,7 +146,7 @@ public: class HashJoin : public IJoin { public: - HashJoin(std::shared_ptr table_join_, const Block & right_sample_block, bool any_take_last_row_ = false); + HashJoin(std::shared_ptr table_join_, const Block & right_sample_block, bool any_take_last_row_ = false, size_t reserve_num = 0); ~HashJoin() override; @@ -217,6 +217,16 @@ public: M(keys256) \ M(hashed) + /// Only for maps using hash table. + #define APPLY_FOR_HASH_JOIN_VARIANTS(M) \ + M(key32) \ + M(key64) \ + M(key_string) \ + M(key_fixed_string) \ + M(keys128) \ + M(keys256) \ + M(hashed) + /// Used for reading from StorageJoin and applying joinGet function #define APPLY_FOR_JOIN_VARIANTS_LIMITED(M) \ @@ -266,6 +276,22 @@ public: } } + void reserve(Type which, size_t num) + { + switch (which) + { + case Type::EMPTY: break; + case Type::CROSS: break; + case Type::key8: break; + case Type::key16: break; + + #define M(NAME) \ + case Type::NAME: NAME->reserve(num); break; + APPLY_FOR_HASH_JOIN_VARIANTS(M) + #undef M + } + } + size_t getTotalRowCount(Type which) const { switch (which) @@ -409,7 +435,7 @@ private: /// If set HashJoin instance is not available for modification (addJoinedBlock) TableLockHolder storage_join_lock = nullptr; - void dataMapInit(MapsVariant &); + void dataMapInit(MapsVariant &, size_t); void initRightBlockStructure(Block & saved_block_sample); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d68f9c8e6a6..ebaf88ea5d5 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -116,6 +116,7 @@ namespace ErrorCodes extern const int ACCESS_DENIED; extern const int UNKNOWN_IDENTIFIER; extern const int BAD_ARGUMENTS; + extern const int SUPPORT_IS_DISABLED; } /// Assumes `storage` is set and the table filter (row-level security) is not empty. @@ -385,6 +386,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( query_info.ignore_projections = options.ignore_projections; query_info.is_projection_query = options.is_projection_query; + query_info.is_internal = options.is_internal; initSettings(); const Settings & settings = context->getSettingsRef(); @@ -408,6 +410,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( ApplyWithSubqueryVisitor().visit(query_ptr); } + query_info.query = query_ptr->clone(); query_info.original_query = query_ptr->clone(); if (settings.count_distinct_optimization) @@ -455,25 +458,35 @@ InterpreterSelectQuery::InterpreterSelectQuery( } } - if (joined_tables.tablesCount() > 1 && (!settings.parallel_replicas_custom_key.value.empty() || settings.allow_experimental_parallel_reading_from_replicas)) + /// Check support for JOINs for parallel replicas + if (joined_tables.tablesCount() > 1 && (!settings.parallel_replicas_custom_key.value.empty() || settings.allow_experimental_parallel_reading_from_replicas > 0)) { - LOG_WARNING(log, "Joins are not supported with parallel replicas. Query will be executed without using them."); - context->setSetting("allow_experimental_parallel_reading_from_replicas", false); - context->setSetting("parallel_replicas_custom_key", String{""}); + if (settings.allow_experimental_parallel_reading_from_replicas == 1) + { + LOG_WARNING(log, "JOINs are not supported with parallel replicas. Query will be executed without using them."); + context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + context->setSetting("parallel_replicas_custom_key", String{""}); + } + else if (settings.allow_experimental_parallel_reading_from_replicas == 2) + { + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JOINs are not supported with parallel replicas"); + } } - /// Try to execute query without parallel replicas if we find that there is a FINAL modifier there. - bool is_query_with_final = false; - if (query_info.table_expression_modifiers) - is_query_with_final = query_info.table_expression_modifiers->hasFinal(); - else if (query_info.query) - is_query_with_final = query_info.query->as().final(); - - if (is_query_with_final && (!settings.parallel_replicas_custom_key.value.empty() || settings.allow_experimental_parallel_reading_from_replicas)) + /// Check support for FINAL for parallel replicas + bool is_query_with_final = isQueryWithFinal(query_info); + if (is_query_with_final && (!settings.parallel_replicas_custom_key.value.empty() || settings.allow_experimental_parallel_reading_from_replicas > 0)) { - LOG_WARNING(log, "FINAL modifier is supported with parallel replicas. Will try to execute the query without using them."); - context->setSetting("allow_experimental_parallel_reading_from_replicas", false); - context->setSetting("parallel_replicas_custom_key", String{""}); + if (settings.allow_experimental_parallel_reading_from_replicas == 1) + { + LOG_WARNING(log, "FINAL modifier is not supported with parallel replicas. Query will be executed without using them."); + context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + context->setSetting("parallel_replicas_custom_key", String{""}); + } + else if (settings.allow_experimental_parallel_reading_from_replicas == 2) + { + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "FINAL modifier is not supported with parallel replicas"); + } } /// Rewrite JOINs @@ -2994,20 +3007,27 @@ void InterpreterSelectQuery::executeWithFill(QueryPlan & query_plan) auto & query = getSelectQuery(); if (query.orderBy()) { - SortDescription order_descr = getSortDescription(query, context); - SortDescription fill_descr; - for (auto & desc : order_descr) + SortDescription sort_description = getSortDescription(query, context); + SortDescription fill_description; + for (auto & desc : sort_description) { if (desc.with_fill) - fill_descr.push_back(desc); + fill_description.push_back(desc); } - if (fill_descr.empty()) + if (fill_description.empty()) return; InterpolateDescriptionPtr interpolate_descr = getInterpolateDescription(query, source_header, result_header, syntax_analyzer_result->aliases, context); - auto filling_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(fill_descr), interpolate_descr); + + const Settings & settings = context->getSettingsRef(); + auto filling_step = std::make_unique( + query_plan.getCurrentDataStream(), + std::move(sort_description), + std::move(fill_description), + interpolate_descr, + settings.use_with_fill_by_sorting_prefix); query_plan.addStep(std::move(filling_step)); } } @@ -3126,4 +3146,14 @@ void InterpreterSelectQuery::initSettings() } } +bool InterpreterSelectQuery::isQueryWithFinal(const SelectQueryInfo & info) +{ + bool result = info.query->as().final(); + if (info.table_expression_modifiers) + result |= info.table_expression_modifiers->hasFinal(); + + return result; +} + + } diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index 58fddb8ffe9..e39dd675136 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -131,6 +131,8 @@ public: static SortDescription getSortDescription(const ASTSelectQuery & query, const ContextPtr & context); static UInt64 getLimitForSorting(const ASTSelectQuery & query, const ContextPtr & context); + static bool isQueryWithFinal(const SelectQueryInfo & info); + private: InterpreterSelectQuery( const ASTPtr & query_ptr_, diff --git a/src/Interpreters/OptimizeDateFilterVisitor.cpp b/src/Interpreters/OptimizeDateFilterVisitor.cpp new file mode 100644 index 00000000000..58e1b3335f9 --- /dev/null +++ b/src/Interpreters/OptimizeDateFilterVisitor.cpp @@ -0,0 +1,121 @@ +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +ASTPtr generateOptimizedDateFilterAST(const String & comparator, const String & converter, const String & column, UInt64 year) +{ + const DateLUTImpl & date_lut = DateLUT::instance(); + + if (converter != "toYear") return {}; + + String start_date = date_lut.dateToString(date_lut.makeDayNum(year, 1, 1)); + String end_date = date_lut.dateToString(date_lut.makeDayNum(year, 12, 31)); + + if (comparator == "equals") + { + return makeASTFunction("and", + makeASTFunction("greaterOrEquals", + std::make_shared(column), + std::make_shared(start_date) + ), + makeASTFunction("lessOrEquals", + std::make_shared(column), + std::make_shared(end_date) + ) + ); + } + else if (comparator == "notEquals") + { + return makeASTFunction("or", + makeASTFunction("less", + std::make_shared(column), + std::make_shared(start_date) + ), + makeASTFunction("greater", + std::make_shared(column), + std::make_shared(end_date) + ) + ); + } + else if (comparator == "less" || comparator == "greaterOrEquals") + { + return makeASTFunction(comparator, + std::make_shared(column), + std::make_shared(start_date) + ); + } + else + { + return makeASTFunction(comparator, + std::make_shared(column), + std::make_shared(end_date) + ); + } +} + +bool rewritePredicateInPlace(ASTFunction & function, ASTPtr & ast) +{ + const static std::unordered_map swap_relations = { + {"equals", "equals"}, + {"notEquals", "notEquals"}, + {"less", "greater"}, + {"greater", "less"}, + {"lessOrEquals", "greaterOrEquals"}, + {"greaterOrEquals", "lessOrEquals"}, + }; + + if (!swap_relations.contains(function.name)) return false; + + if (!function.arguments || function.arguments->children.size() != 2) return false; + + size_t func_id = function.arguments->children.size(); + + for (size_t i = 0; i < function.arguments->children.size(); i++) + { + if (const auto * func = function.arguments->children[i]->as(); func) + { + if (func->name == "toYear") + { + func_id = i; + } + } + } + + if (func_id == function.arguments->children.size()) return false; + + size_t literal_id = 1 - func_id; + const auto * literal = function.arguments->children[literal_id]->as(); + + if (!literal || literal->value.getType() != Field::Types::UInt64) return false; + + UInt64 compare_to = literal->value.get(); + String comparator = literal_id > func_id ? function.name : swap_relations.at(function.name); + + const auto * func = function.arguments->children[func_id]->as(); + const auto * column_id = func->arguments->children.at(0)->as(); + + if (!column_id) return false; + + String column = column_id->name(); + + const auto new_ast = generateOptimizedDateFilterAST(comparator, func->name, column, compare_to); + + if (!new_ast) return false; + + ast = new_ast; + return true; +} + +void OptimizeDateFilterInPlaceData::visit(ASTFunction & function, ASTPtr & ast) const +{ + rewritePredicateInPlace(function, ast); +} +} diff --git a/src/Interpreters/OptimizeDateFilterVisitor.h b/src/Interpreters/OptimizeDateFilterVisitor.h new file mode 100644 index 00000000000..84394372901 --- /dev/null +++ b/src/Interpreters/OptimizeDateFilterVisitor.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace DB +{ + +class ASTFunction; + +/// Rewrite the predicates in place +class OptimizeDateFilterInPlaceData +{ +public: + using TypeToVisit = ASTFunction; + void visit(ASTFunction & function, ASTPtr & ast) const; +}; + +using OptimizeDateFilterInPlaceMatcher = OneTypeMatcher; +using OptimizeDateFilterInPlaceVisitor = InDepthNodeVisitor; +} diff --git a/src/Interpreters/ProcessorsProfileLog.cpp b/src/Interpreters/ProcessorsProfileLog.cpp index f6ce801605a..e78a07bb752 100644 --- a/src/Interpreters/ProcessorsProfileLog.cpp +++ b/src/Interpreters/ProcessorsProfileLog.cpp @@ -29,6 +29,7 @@ NamesAndTypesList ProcessorProfileLogElement::getNamesAndTypes() {"plan_step", std::make_shared()}, {"plan_group", std::make_shared()}, + {"initial_query_id", std::make_shared()}, {"query_id", std::make_shared()}, {"name", std::make_shared(std::make_shared())}, {"elapsed_us", std::make_shared()}, @@ -60,6 +61,7 @@ void ProcessorProfileLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(plan_step); columns[i++]->insert(plan_group); + columns[i++]->insertData(initial_query_id.data(), initial_query_id.size()); columns[i++]->insertData(query_id.data(), query_id.size()); columns[i++]->insertData(processor_name.data(), processor_name.size()); columns[i++]->insert(elapsed_us); diff --git a/src/Interpreters/ProcessorsProfileLog.h b/src/Interpreters/ProcessorsProfileLog.h index 07837bdd10e..81d58edd913 100644 --- a/src/Interpreters/ProcessorsProfileLog.h +++ b/src/Interpreters/ProcessorsProfileLog.h @@ -19,6 +19,7 @@ struct ProcessorProfileLogElement UInt64 plan_step{}; UInt64 plan_group{}; + String initial_query_id; String query_id; String processor_name; diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index 4746954edf2..ec0315c2f95 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -70,6 +70,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"databases", array_low_cardinality_string}, {"tables", array_low_cardinality_string}, {"columns", array_low_cardinality_string}, + {"partitions", array_low_cardinality_string}, {"projections", array_low_cardinality_string}, {"views", array_low_cardinality_string}, {"exception_code", std::make_shared()}, @@ -176,6 +177,7 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const auto & column_databases = typeid_cast(*columns[i++]); auto & column_tables = typeid_cast(*columns[i++]); auto & column_columns = typeid_cast(*columns[i++]); + auto & column_partitions = typeid_cast(*columns[i++]); auto & column_projections = typeid_cast(*columns[i++]); auto & column_views = typeid_cast(*columns[i++]); @@ -194,6 +196,7 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const fill_column(query_databases, column_databases); fill_column(query_tables, column_tables); fill_column(query_columns, column_columns); + fill_column(query_partitions, column_partitions); fill_column(query_projections, column_projections); fill_column(query_views, column_views); } diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index 44780f530e0..570d1297239 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -65,6 +65,7 @@ struct QueryLogElement std::set query_databases; std::set query_tables; std::set query_columns; + std::set query_partitions; std::set query_projections; std::set query_views; diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index c38b3c79026..825114b20b7 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -677,6 +678,21 @@ void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, ContextPtr context) RemoveInjectiveFunctionsVisitor(data).visit(query); } +void optimizeDateFilters(ASTSelectQuery * select_query) +{ + /// Predicates in HAVING clause has been moved to WHERE clause. + if (select_query->where()) + { + OptimizeDateFilterInPlaceVisitor::Data data; + OptimizeDateFilterInPlaceVisitor(data).visit(select_query->refWhere()); + } + if (select_query->prewhere()) + { + OptimizeDateFilterInPlaceVisitor::Data data; + OptimizeDateFilterInPlaceVisitor(data).visit(select_query->refPrewhere()); + } +} + void transformIfStringsIntoEnum(ASTPtr & query) { std::unordered_set function_names = {"if", "transform"}; @@ -780,6 +796,9 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, tables_with_columns, result.storage_snapshot->metadata, result.storage); } + /// Rewrite date filters to avoid the calls of converters such as toYear, toYYYYMM, toISOWeek, etc. + optimizeDateFilters(select_query); + /// GROUP BY injective function elimination. optimizeGroupBy(select_query, context); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 9a2750f399c..08753b9dfb7 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -837,6 +837,7 @@ static std::tuple executeQueryImpl( elem.query_databases = info.databases; elem.query_tables = info.tables; elem.query_columns = info.columns; + elem.query_partitions = info.partitions; elem.query_projections = info.projections; elem.query_views = info.views; } @@ -901,6 +902,7 @@ static std::tuple executeQueryImpl( element.query_databases.insert(access_info.databases.begin(), access_info.databases.end()); element.query_tables.insert(access_info.tables.begin(), access_info.tables.end()); element.query_columns.insert(access_info.columns.begin(), access_info.columns.end()); + element.query_partitions.insert(access_info.partitions.begin(), access_info.partitions.end()); element.query_projections.insert(access_info.projections.begin(), access_info.projections.end()); element.query_views.insert(access_info.views.begin(), access_info.views.end()); @@ -1003,6 +1005,7 @@ static std::tuple executeQueryImpl( ProcessorProfileLogElement processor_elem; processor_elem.event_time = elem.event_time; processor_elem.event_time_microseconds = elem.event_time_microseconds; + processor_elem.initial_query_id = elem.client_info.initial_query_id; processor_elem.query_id = elem.client_info.current_query_id; auto get_proc_id = [](const IProcessor & proc) -> UInt64 diff --git a/src/Interpreters/interpretSubquery.cpp b/src/Interpreters/interpretSubquery.cpp index 2358b0ab42a..550fa2912ba 100644 --- a/src/Interpreters/interpretSubquery.cpp +++ b/src/Interpreters/interpretSubquery.cpp @@ -113,7 +113,7 @@ std::shared_ptr interpretSubquery( } /// We don't want to execute reading for subqueries in parallel - subquery_context->setSetting("allow_experimental_parallel_reading_from_replicas", false); + subquery_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); return std::make_shared(query, subquery_context, subquery_options, required_source_columns); } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index d036c895fbb..3c9df29534d 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -83,6 +83,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int TOO_DEEP_SUBQUERIES; extern const int NOT_IMPLEMENTED; + extern const int SUPPORT_IS_DISABLED; } /** ClickHouse query planner. @@ -622,7 +623,14 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan, interpolate_description = std::make_shared(std::move(interpolate_actions_dag), empty_aliases); } - auto filling_step = std::make_unique(query_plan.getCurrentDataStream(), std::move(fill_description), interpolate_description); + const auto & query_context = planner_context->getQueryContext(); + const Settings & settings = query_context->getSettingsRef(); + auto filling_step = std::make_unique( + query_plan.getCurrentDataStream(), + sort_description, + std::move(fill_description), + interpolate_description, + settings.use_with_fill_by_sorting_prefix); query_plan.addStep(std::move(filling_step)); } @@ -1185,16 +1193,25 @@ void Planner::buildPlanForQueryNode() const auto & settings = query_context->getSettingsRef(); if (planner_context->getTableExpressionNodeToData().size() > 1 - && (!settings.parallel_replicas_custom_key.value.empty() || settings.allow_experimental_parallel_reading_from_replicas)) + && (!settings.parallel_replicas_custom_key.value.empty() || settings.allow_experimental_parallel_reading_from_replicas > 0)) { - LOG_WARNING( - &Poco::Logger::get("Planner"), "Joins are not supported with parallel replicas. Query will be executed without using them."); + if (settings.allow_experimental_parallel_reading_from_replicas == 1) + { + LOG_WARNING( + &Poco::Logger::get("Planner"), "JOINs are not supported with parallel replicas. Query will be executed without using them."); - auto & mutable_context = planner_context->getMutableQueryContext(); - mutable_context->setSetting("allow_experimental_parallel_reading_from_replicas", false); - mutable_context->setSetting("parallel_replicas_custom_key", String{""}); + auto & mutable_context = planner_context->getMutableQueryContext(); + mutable_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); + mutable_context->setSetting("parallel_replicas_custom_key", String{""}); + } + else if (settings.allow_experimental_parallel_reading_from_replicas == 2) + { + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JOINs are not supported with parallel replicas"); + } } + /// TODO: Also disable parallel replicas in case of FINAL + auto top_level_identifiers = collectTopLevelColumnIdentifiers(query_tree, planner_context); auto join_tree_query_plan = buildJoinTreeQueryPlan(query_tree, select_query_info, @@ -1432,7 +1449,8 @@ void Planner::buildPlanForQueryNode() addLimitByStep(query_plan, limit_by_analysis_result, query_node); } - addWithFillStepIfNeeded(query_plan, query_analysis_result, planner_context, query_node); + if (query_node.hasOrderBy()) + addWithFillStepIfNeeded(query_plan, query_analysis_result, planner_context, query_node); bool apply_offset = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit; diff --git a/src/Processors/QueryPlan/FillingStep.cpp b/src/Processors/QueryPlan/FillingStep.cpp index 20d7d6d0f8f..65c9cf11661 100644 --- a/src/Processors/QueryPlan/FillingStep.cpp +++ b/src/Processors/QueryPlan/FillingStep.cpp @@ -27,9 +27,17 @@ static ITransformingStep::Traits getTraits() }; } -FillingStep::FillingStep(const DataStream & input_stream_, SortDescription sort_description_, InterpolateDescriptionPtr interpolate_description_) +FillingStep::FillingStep( + const DataStream & input_stream_, + SortDescription sort_description_, + SortDescription fill_description_, + InterpolateDescriptionPtr interpolate_description_, + bool use_with_fill_by_sorting_prefix_) : ITransformingStep(input_stream_, FillingTransform::transformHeader(input_stream_.header, sort_description_), getTraits()) - , sort_description(std::move(sort_description_)), interpolate_description(interpolate_description_) + , sort_description(std::move(sort_description_)) + , fill_description(std::move(fill_description_)) + , interpolate_description(interpolate_description_) + , use_with_fill_by_sorting_prefix(use_with_fill_by_sorting_prefix_) { if (!input_stream_.has_single_port) throw Exception(ErrorCodes::LOGICAL_ERROR, "FillingStep expects single input"); @@ -40,9 +48,10 @@ void FillingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) -> ProcessorPtr { if (stream_type == QueryPipelineBuilder::StreamType::Totals) - return std::make_shared(header, sort_description); + return std::make_shared(header, fill_description); - return std::make_shared(header, sort_description, std::move(interpolate_description)); + return std::make_shared( + header, sort_description, fill_description, std::move(interpolate_description), use_with_fill_by_sorting_prefix); }); } diff --git a/src/Processors/QueryPlan/FillingStep.h b/src/Processors/QueryPlan/FillingStep.h index 4e1b5b3654d..3e0e1e79060 100644 --- a/src/Processors/QueryPlan/FillingStep.h +++ b/src/Processors/QueryPlan/FillingStep.h @@ -10,7 +10,12 @@ namespace DB class FillingStep : public ITransformingStep { public: - FillingStep(const DataStream & input_stream_, SortDescription sort_description_, InterpolateDescriptionPtr interpolate_description_); + FillingStep( + const DataStream & input_stream_, + SortDescription sort_description_, + SortDescription fill_description_, + InterpolateDescriptionPtr interpolate_description_, + bool use_with_fill_by_sorting_prefix); String getName() const override { return "Filling"; } @@ -25,7 +30,9 @@ private: void updateOutputStream() override; SortDescription sort_description; + SortDescription fill_description; InterpolateDescriptionPtr interpolate_description; + const bool use_with_fill_by_sorting_prefix; }; } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 1c5b625656c..b38c3422be1 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -99,7 +100,6 @@ namespace ErrorCodes extern const int INDEX_NOT_USED; extern const int LOGICAL_ERROR; extern const int TOO_MANY_ROWS; - extern const int SUPPORT_IS_DISABLED; } static MergeTreeReaderSettings getMergeTreeReaderSettings( @@ -1314,7 +1314,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( auto reader_settings = getMergeTreeReaderSettings(context, query_info); bool use_skip_indexes = settings.use_skip_indexes; - bool final = isFinal(query_info); + bool final = InterpreterSelectQuery::isQueryWithFinal(query_info); if (final && !settings.use_skip_indexes_if_final) use_skip_indexes = false; @@ -1377,7 +1377,7 @@ bool ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, /// Disable read-in-order optimization for reverse order with final. /// Otherwise, it can lead to incorrect final behavior because the implementation may rely on the reading in direct order). - if (direction != 1 && isFinal(query_info)) + if (direction != 1 && isQueryWithFinal()) return false; auto order_info = std::make_shared(SortDescription{}, prefix_size, direction, limit); @@ -1500,11 +1500,7 @@ ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const bool ReadFromMergeTree::isQueryWithFinal() const { - const auto & select = query_info.query->as(); - if (query_info.table_expression_modifiers) - return query_info.table_expression_modifiers->hasFinal(); - else - return select.final(); + return InterpreterSelectQuery::isQueryWithFinal(query_info); } bool ReadFromMergeTree::isQueryWithSampling() const @@ -1522,7 +1518,7 @@ bool ReadFromMergeTree::isQueryWithSampling() const Pipe ReadFromMergeTree::spreadMarkRanges( RangesInDataParts && parts_with_ranges, size_t num_streams, AnalysisResult & result, ActionsDAGPtr & result_projection) { - bool final = isQueryWithFinal(); + const bool final = isQueryWithFinal(); const auto & input_order_info = query_info.getInputOrderInfo(); Names column_names_to_read = result.column_names_to_read; @@ -1539,8 +1535,7 @@ Pipe ReadFromMergeTree::spreadMarkRanges( if (final) { - if (is_parallel_reading_from_replicas) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "FINAL modifier is not supported with parallel replicas"); + chassert(!is_parallel_reading_from_replicas); if (output_each_partition_through_separate_port) throw Exception(ErrorCodes::LOGICAL_ERROR, "Optimisation isn't supposed to be used for queries with final"); @@ -1618,6 +1613,18 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons result.selected_marks, result.selected_ranges); + // Adding partition info to QueryAccessInfo. + if (context->hasQueryContext() && !query_info.is_internal) + { + Names partition_names; + for (const auto & part : result.parts_with_ranges) + { + partition_names.emplace_back( + fmt::format("{}.{}", data.getStorageID().getFullNameNotQuoted(), part.data_part->info.partition_id)); + } + context->getQueryContext()->addQueryAccessInfo(partition_names); + } + ProfileEvents::increment(ProfileEvents::SelectedParts, result.selected_parts); ProfileEvents::increment(ProfileEvents::SelectedRanges, result.selected_ranges); ProfileEvents::increment(ProfileEvents::SelectedMarks, result.selected_marks); @@ -1948,15 +1955,6 @@ void ReadFromMergeTree::describeIndexes(JSONBuilder::JSONMap & map) const } } -bool ReadFromMergeTree::isFinal(const SelectQueryInfo & query_info) -{ - if (query_info.table_expression_modifiers) - return query_info.table_expression_modifiers->hasFinal(); - - const auto & select = query_info.query->as(); - return select.final(); -} - bool MergeTreeDataSelectAnalysisResult::error() const { return std::holds_alternative(result); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 5e4ba117967..545ffe84e0c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -159,7 +159,6 @@ public: void updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info_value); - static bool isFinal(const SelectQueryInfo & query_info); bool isQueryWithFinal() const; bool isQueryWithSampling() const; diff --git a/src/Processors/Sources/PostgreSQLSource.cpp b/src/Processors/Sources/PostgreSQLSource.cpp index 77c2fc41aa1..115e24d5740 100644 --- a/src/Processors/Sources/PostgreSQLSource.cpp +++ b/src/Processors/Sources/PostgreSQLSource.cpp @@ -176,12 +176,10 @@ template void PostgreSQLSource::onFinish() { if (stream) - { - stream->complete(); + stream->close(); - if (auto_commit) - tx->commit(); - } + if (tx && auto_commit) + tx->commit(); } template diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 4c5b74aad7c..e75f83b8c80 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -187,25 +187,31 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & } FillingTransform::FillingTransform( - const Block & header_, const SortDescription & sort_description_, InterpolateDescriptionPtr interpolate_description_) - : ISimpleTransform(header_, transformHeader(header_, sort_description_), true) - , sort_description(sort_description_) - , interpolate_description(interpolate_description_) - , filling_row(sort_description_) - , next_row(sort_description_) + const Block & header_, + const SortDescription & sort_description_, + const SortDescription & fill_description_, + InterpolateDescriptionPtr interpolate_description_, + const bool use_with_fill_by_sorting_prefix_) + : ISimpleTransform(header_, transformHeader(header_, fill_description_), true) + , sort_description(sort_description_) + , fill_description(fill_description_) + , interpolate_description(interpolate_description_) + , filling_row(fill_description_) + , next_row(fill_description_) + , use_with_fill_by_sorting_prefix(use_with_fill_by_sorting_prefix_) { if (interpolate_description) interpolate_actions = std::make_shared(interpolate_description->actions); std::vector is_fill_column(header_.columns()); - for (size_t i = 0, size = sort_description.size(); i < size; ++i) + for (size_t i = 0, size = fill_description.size(); i < size; ++i) { - if (interpolate_description && interpolate_description->result_columns_set.contains(sort_description[i].column_name)) + if (interpolate_description && interpolate_description->result_columns_set.contains(fill_description[i].column_name)) throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "Column '{}' is participating in ORDER BY ... WITH FILL expression and can't be INTERPOLATE output", - sort_description[i].column_name); + fill_description[i].column_name); - size_t block_position = header_.getPositionByName(sort_description[i].column_name); + size_t block_position = header_.getPositionByName(fill_description[i].column_name); is_fill_column[block_position] = true; fill_column_positions.push_back(block_position); @@ -226,21 +232,40 @@ FillingTransform::FillingTransform( "WITH FILL bound values cannot be negative for unsigned type {}", type->getName()); } } + logDebug("fill description", dumpSortDescription(fill_description)); std::set unique_positions; for (auto pos : fill_column_positions) if (!unique_positions.insert(pos).second) throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "Multiple WITH FILL for identical expressions is not supported in ORDER BY"); + if (use_with_fill_by_sorting_prefix) + { + /// build sorting prefix for first fill column + for (const auto & desc : sort_description) + { + if (desc.column_name == fill_description[0].column_name) + break; + + size_t pos = header_.getPositionByName(desc.column_name); + sort_prefix_positions.push_back(pos); + + sort_prefix.push_back(desc); + } + logDebug("sort prefix", dumpSortDescription(sort_prefix)); + last_range_sort_prefix.reserve(sort_prefix.size()); + } + size_t idx = 0; for (const ColumnWithTypeAndName & column : header_.getColumnsWithTypeAndName()) { if (interpolate_description) if (const auto & p = interpolate_description->required_columns_map.find(column.name); p != interpolate_description->required_columns_map.end()) - input_positions.emplace_back(idx, p->second); + input_positions.emplace_back(idx, p->second); - if (!is_fill_column[idx] && !(interpolate_description && interpolate_description->result_columns_set.contains(column.name))) + if (!is_fill_column[idx] && !(interpolate_description && interpolate_description->result_columns_set.contains(column.name)) + && sort_prefix_positions.end() == std::find(sort_prefix_positions.begin(), sort_prefix_positions.end(), idx)) other_column_positions.push_back(idx); ++idx; @@ -249,6 +274,20 @@ FillingTransform::FillingTransform( if (interpolate_description) for (const auto & name : interpolate_description->result_columns_order) interpolate_column_positions.push_back(header_.getPositionByName(name)); + + /// check conflict in positions between interpolate and sorting prefix columns + if (!sort_prefix_positions.empty() && !interpolate_column_positions.empty()) + { + std::unordered_set interpolate_positions(interpolate_column_positions.begin(), interpolate_column_positions.end()); + for (auto sort_prefix_pos : sort_prefix_positions) + { + if (interpolate_positions.contains(sort_prefix_pos)) + throw Exception( + ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "The same column in ORDER BY before WITH FILL (sorting prefix) and INTERPOLATE is not allowed. Column: {}", + (header_.begin() + sort_prefix_pos)->name); + } + } } /// prepare() is overrididen to call transform() after all chunks are processed @@ -313,9 +352,14 @@ void FillingTransform::interpolate(const MutableColumns & result_columns, Block using MutableColumnRawPtrs = std::vector; -static void insertFromFillingRow(const MutableColumnRawPtrs & filling_columns, const MutableColumnRawPtrs & interpolate_columns, const MutableColumnRawPtrs & other_columns, - const FillingRow & filling_row, const Block & interpolate_block) +void FillingTransform::insertFromFillingRow( + const MutableColumnRawPtrs & filling_columns, + const MutableColumnRawPtrs & interpolate_columns, + const MutableColumnRawPtrs & other_columns, + const Block & interpolate_block) { + logDebug("insertFromFillingRow", filling_row); + for (size_t i = 0, size = filling_columns.size(); i < size; ++i) { if (filling_row[i].isNull()) @@ -338,10 +382,14 @@ static void insertFromFillingRow(const MutableColumnRawPtrs & filling_columns, c for (auto * other_column : other_columns) other_column->insertDefault(); + + filling_row_inserted = true; } static void copyRowFromColumns(const MutableColumnRawPtrs & dest, const Columns & source, size_t row_num) { + chassert(dest.size() == source.size()); + for (size_t i = 0, size = source.size(); i < size; ++i) dest[i]->insertFrom(*source[i], row_num); } @@ -353,7 +401,7 @@ static void initColumnsByPositions( MutableColumnRawPtrs & output_columns_by_position, const std::vector & positions) { - for (size_t pos : positions) + for (const size_t pos : positions) { input_columns_by_positions.push_back(input_columns[pos]); output_columns_by_position.push_back(output_columns[pos].get()); @@ -364,10 +412,12 @@ void FillingTransform::initColumns( const Columns & input_columns, Columns & input_fill_columns, Columns & input_interpolate_columns, + Columns & input_sort_prefix_columns, Columns & input_other_columns, MutableColumns & output_columns, MutableColumnRawPtrs & output_fill_columns, MutableColumnRawPtrs & output_interpolate_columns, + MutableColumnRawPtrs & output_sort_prefix_columns, MutableColumnRawPtrs & output_other_columns) { Columns non_const_columns; @@ -382,65 +432,236 @@ void FillingTransform::initColumns( initColumnsByPositions(non_const_columns, input_fill_columns, output_columns, output_fill_columns, fill_column_positions); initColumnsByPositions( non_const_columns, input_interpolate_columns, output_columns, output_interpolate_columns, interpolate_column_positions); + initColumnsByPositions(non_const_columns, input_sort_prefix_columns, output_columns, output_sort_prefix_columns, sort_prefix_positions); initColumnsByPositions(non_const_columns, input_other_columns, output_columns, output_other_columns, other_column_positions); } bool FillingTransform::generateSuffixIfNeeded(const Columns & input_columns, MutableColumns & result_columns) { - logDebug("generateSuffixIfNeeded() filling_row", filling_row); - logDebug("generateSuffixIfNeeded() next_row", next_row); - logDebug("generateSuffixIfNeeded() first", first); - - /// Determines should we insert filling row before start generating next rows. - bool should_insert_first = next_row < filling_row || first; - - for (size_t i = 0, size = filling_row.size(); i < size; ++i) - next_row[i] = filling_row.getFillDescription(i).fill_to; - - logDebug("generateSuffixIfNeeded() next_row updated", next_row); - - if (!first && filling_row >= next_row) - { - logDebug("generateSuffixIfNeeded()", "no need to generate suffix"); - return false; - } - Columns input_fill_columns; Columns input_interpolate_columns; + Columns input_sort_prefix_columns; Columns input_other_columns; MutableColumnRawPtrs res_fill_columns; MutableColumnRawPtrs res_interpolate_columns; + MutableColumnRawPtrs res_sort_prefix_columns; MutableColumnRawPtrs res_other_columns; initColumns( input_columns, input_fill_columns, input_interpolate_columns, + input_sort_prefix_columns, input_other_columns, result_columns, res_fill_columns, res_interpolate_columns, + res_sort_prefix_columns, res_other_columns); - if (first) - filling_row.initFromDefaults(); + return generateSuffixIfNeeded(result_columns, res_fill_columns, res_interpolate_columns, res_sort_prefix_columns, res_other_columns); +} + +bool FillingTransform::generateSuffixIfNeeded( + const MutableColumns & result_columns, + MutableColumnRawPtrs res_fill_columns, + MutableColumnRawPtrs res_interpolate_columns, + MutableColumnRawPtrs res_sort_prefix_columns, + MutableColumnRawPtrs res_other_columns) +{ + logDebug("generateSuffixIfNeeded() filling_row", filling_row); + logDebug("generateSuffixIfNeeded() next_row", next_row); + + /// Determines if we should insert filling row before start generating next rows + bool should_insert_first = (next_row < filling_row && !filling_row_inserted) || next_row.isNull(); + logDebug("should_insert_first", should_insert_first); + + for (size_t i = 0, size = filling_row.size(); i < size; ++i) + next_row[i] = filling_row.getFillDescription(i).fill_to; + + logDebug("generateSuffixIfNeeded() next_row updated", next_row); + + if (filling_row >= next_row) + { + logDebug("generateSuffixIfNeeded()", "no need to generate suffix"); + return false; + } Block interpolate_block; if (should_insert_first && filling_row < next_row) { interpolate(result_columns, interpolate_block); - insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block); + insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, interpolate_block); + /// fulfill sort prefix columns with last row values or defaults + if (!last_range_sort_prefix.empty()) + copyRowFromColumns(res_sort_prefix_columns, last_range_sort_prefix, 0); + else + for (auto * sort_prefix_column : res_sort_prefix_columns) + sort_prefix_column->insertDefault(); } - while (filling_row.next(next_row)) + bool filling_row_changed = false; + while (true) { + const auto [apply, changed] = filling_row.next(next_row); + filling_row_changed = changed; + if (!apply) + break; + interpolate(result_columns, interpolate_block); - insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block); + insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, interpolate_block); + /// fulfill sort prefix columns with last row values or defaults + if (!last_range_sort_prefix.empty()) + copyRowFromColumns(res_sort_prefix_columns, last_range_sort_prefix, 0); + else + for (auto * sort_prefix_column : res_sort_prefix_columns) + sort_prefix_column->insertDefault(); } + /// new valid filling row was generated but not inserted + if (filling_row_changed) + filling_row_inserted = false; return true; } +template +size_t getRangeEnd(size_t begin, size_t end, Predicate pred) +{ + chassert(begin < end); + + const size_t linear_probe_threadhold = 16; + size_t linear_probe_end = begin + linear_probe_threadhold; + if (linear_probe_end > end) + linear_probe_end = end; + + for (size_t pos = begin; pos < linear_probe_end; ++pos) + { + if (!pred(begin, pos)) + return pos; + } + + size_t low = linear_probe_end; + size_t high = end - 1; + while (low <= high) + { + size_t mid = low + (high - low) / 2; + if (pred(begin, mid)) + low = mid + 1; + else + { + high = mid - 1; + end = mid; + } + } + return end; +} + +void FillingTransform::transformRange( + const Columns & input_fill_columns, + const Columns & input_interpolate_columns, + const Columns & input_sort_prefix_columns, + const Columns & input_other_columns, + const MutableColumns & result_columns, + const MutableColumnRawPtrs & res_fill_columns, + const MutableColumnRawPtrs & res_interpolate_columns, + const MutableColumnRawPtrs & res_sort_prefix_columns, + const MutableColumnRawPtrs & res_other_columns, + std::pair range, + const bool new_sorting_prefix) +{ + const size_t range_begin = range.first; + const size_t range_end = range.second; + + Block interpolate_block; + if (new_sorting_prefix) + { + logDebug("--- new range ---", range_end); + for (size_t i = 0, size = filling_row.size(); i < size; ++i) + { + const auto current_value = (*input_fill_columns[i])[range_begin]; + const auto & fill_from = filling_row.getFillDescription(i).fill_from; + + if (!fill_from.isNull() && !equals(current_value, fill_from)) + { + filling_row.initFromDefaults(i); + filling_row_inserted = false; + if (less(fill_from, current_value, filling_row.getDirection(i))) + { + interpolate(result_columns, interpolate_block); + insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, interpolate_block); + copyRowFromColumns(res_sort_prefix_columns, input_sort_prefix_columns, range_begin); + } + break; + } + filling_row[i] = current_value; + } + } + + for (size_t row_ind = range_begin; row_ind < range_end; ++row_ind) + { + logDebug("row", row_ind); + logDebug("filling_row", filling_row); + logDebug("next_row", next_row); + + bool should_insert_first = next_row < filling_row; + logDebug("should_insert_first", should_insert_first); + + for (size_t i = 0, size = filling_row.size(); i < size; ++i) + { + const auto current_value = (*input_fill_columns[i])[row_ind]; + const auto & fill_to = filling_row.getFillDescription(i).fill_to; + + if (fill_to.isNull() || less(current_value, fill_to, filling_row.getDirection(i))) + next_row[i] = current_value; + else + next_row[i] = fill_to; + } + logDebug("next_row updated", next_row); + + /// The condition is true when filling row is initialized by value(s) in FILL FROM, + /// and there are row(s) in current range with value(s) < then in the filling row. + /// It can happen only once for a range. + if (should_insert_first && filling_row < next_row) + { + interpolate(result_columns, interpolate_block); + insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, interpolate_block); + copyRowFromColumns(res_sort_prefix_columns, input_sort_prefix_columns, row_ind); + } + + bool filling_row_changed = false; + while (true) + { + const auto [apply, changed] = filling_row.next(next_row); + filling_row_changed = changed; + if (!apply) + break; + + interpolate(result_columns, interpolate_block); + insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, interpolate_block); + copyRowFromColumns(res_sort_prefix_columns, input_sort_prefix_columns, row_ind); + } + /// new valid filling row was generated but not inserted, will use it during suffix generation + if (filling_row_changed) + filling_row_inserted = false; + + logDebug("filling_row after", filling_row); + + copyRowFromColumns(res_fill_columns, input_fill_columns, row_ind); + copyRowFromColumns(res_interpolate_columns, input_interpolate_columns, row_ind); + copyRowFromColumns(res_sort_prefix_columns, input_sort_prefix_columns, row_ind); + copyRowFromColumns(res_other_columns, input_other_columns, row_ind); + } + + /// save sort prefix of last row in the range, it's used to generate suffix + last_range_sort_prefix.clear(); + for (const auto & sort_prefix_column : input_sort_prefix_columns) + { + auto column = sort_prefix_column->cloneEmpty(); + column->insertFrom(*sort_prefix_column, range_end - 1); + last_range_sort_prefix.push_back(std::move(column)); + } +} + void FillingTransform::transform(Chunk & chunk) { logDebug("new chunk rows", chunk.getNumRows()); @@ -453,9 +674,11 @@ void FillingTransform::transform(Chunk & chunk) Columns input_fill_columns; Columns input_interpolate_columns; + Columns input_sort_prefix_columns; Columns input_other_columns; MutableColumnRawPtrs res_fill_columns; MutableColumnRawPtrs res_interpolate_columns; + MutableColumnRawPtrs res_sort_prefix_columns; MutableColumnRawPtrs res_other_columns; MutableColumns result_columns; @@ -468,6 +691,14 @@ void FillingTransform::transform(Chunk & chunk) /// if all chunks are processed, then we may need to generate suffix for the following cases: /// (1) when all data are processed and WITH FILL .. TO is provided /// (2) for empty result set when WITH FILL FROM .. TO is provided (see PR #30888) + + /// if no data was processed, then need to initialize filling_row + if (last_row.empty()) + { + filling_row.initFromDefaults(); + filling_row_inserted = false; + } + if (generateSuffixIfNeeded(input.getHeader().getColumns(), result_columns)) { size_t num_output_rows = result_columns[0]->size(); @@ -485,72 +716,95 @@ void FillingTransform::transform(Chunk & chunk) input_columns, input_fill_columns, input_interpolate_columns, + input_sort_prefix_columns, input_other_columns, result_columns, res_fill_columns, res_interpolate_columns, + res_sort_prefix_columns, res_other_columns); - if (first) + if (sort_prefix.empty() || !use_with_fill_by_sorting_prefix) { - for (size_t i = 0, size = filling_row.size(); i < size; ++i) - { - auto current_value = (*input_fill_columns[i])[0]; - const auto & fill_from = filling_row.getFillDescription(i).fill_from; + transformRange( + input_fill_columns, + input_interpolate_columns, + input_sort_prefix_columns, + input_other_columns, + result_columns, + res_fill_columns, + res_interpolate_columns, + res_sort_prefix_columns, + res_other_columns, + {0, num_rows}, + last_row.empty()); - if (!fill_from.isNull() && !equals(current_value, fill_from)) - { - filling_row.initFromDefaults(i); - if (less(fill_from, current_value, filling_row.getDirection(i))) - { - interpolate(result_columns, interpolate_block); - insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block); - } - break; - } - filling_row[i] = current_value; - } - first = false; + saveLastRow(result_columns); + size_t num_output_rows = result_columns[0]->size(); + chunk.setColumns(std::move(result_columns), num_output_rows); + return; } - for (size_t row_ind = 0; row_ind < num_rows; ++row_ind) + /// check if last row in prev chunk had the same sorting prefix as the first in new one + /// if not, we need to reinitialize filling row + bool new_sort_prefix = last_row.empty(); + if (!last_row.empty()) { - logDebug("row", row_ind); - logDebug("filling_row", filling_row); - logDebug("next_row", next_row); + ColumnRawPtrs last_sort_prefix_columns; + last_sort_prefix_columns.reserve(sort_prefix.size()); + for (size_t pos : sort_prefix_positions) + last_sort_prefix_columns.push_back(last_row[pos].get()); - bool should_insert_first = next_row < filling_row; - logDebug("should_insert_first", should_insert_first); - - for (size_t i = 0, size = filling_row.size(); i < size; ++i) + new_sort_prefix = false; + for (size_t i = 0; i < input_sort_prefix_columns.size(); ++i) { - auto current_value = (*input_fill_columns[i])[row_ind]; - const auto & fill_to = filling_row.getFillDescription(i).fill_to; - - if (fill_to.isNull() || less(current_value, fill_to, filling_row.getDirection(i))) - next_row[i] = current_value; - else - next_row[i] = fill_to; + const int res = input_sort_prefix_columns[i]->compareAt(0, 0, *last_sort_prefix_columns[i], sort_prefix[i].nulls_direction); + if (res != 0) + { + new_sort_prefix = true; + break; + } } - logDebug("next_row updated", next_row); + } - /// A case, when at previous step row was initialized from defaults 'fill_from' values - /// and probably we need to insert it to block. - if (should_insert_first && filling_row < next_row) - { - interpolate(result_columns, interpolate_block); - insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block); - } + for (size_t row_ind = 0; row_ind < num_rows;) + { + /// find next range + auto current_sort_prefix_end_pos = getRangeEnd( + row_ind, + num_rows, + [&](size_t pos_with_current_sort_prefix, size_t row_pos) + { + for (size_t i = 0; i < input_sort_prefix_columns.size(); ++i) + { + const int res = input_sort_prefix_columns[i]->compareAt( + pos_with_current_sort_prefix, row_pos, *input_sort_prefix_columns[i], sort_prefix[i].nulls_direction); + if (res != 0) + return false; + } + return true; + }); - while (filling_row.next(next_row)) - { - interpolate(result_columns, interpolate_block); - insertFromFillingRow(res_fill_columns, res_interpolate_columns, res_other_columns, filling_row, interpolate_block); - } + /// generate suffix for the previous range + if (!last_range_sort_prefix.empty() && new_sort_prefix) + generateSuffixIfNeeded(result_columns, res_fill_columns, res_interpolate_columns, res_sort_prefix_columns, res_other_columns); - copyRowFromColumns(res_fill_columns, input_fill_columns, row_ind); - copyRowFromColumns(res_interpolate_columns, input_interpolate_columns, row_ind); - copyRowFromColumns(res_other_columns, input_other_columns, row_ind); + transformRange( + input_fill_columns, + input_interpolate_columns, + input_sort_prefix_columns, + input_other_columns, + result_columns, + res_fill_columns, + res_interpolate_columns, + res_sort_prefix_columns, + res_other_columns, + {row_ind, current_sort_prefix_end_pos}, + new_sort_prefix); + + logDebug("range end", current_sort_prefix_end_pos); + row_ind = current_sort_prefix_end_pos; + new_sort_prefix = true; } saveLastRow(result_columns); diff --git a/src/Processors/Transforms/FillingTransform.h b/src/Processors/Transforms/FillingTransform.h index 7aa5e4c1e8a..85da544ef2d 100644 --- a/src/Processors/Transforms/FillingTransform.h +++ b/src/Processors/Transforms/FillingTransform.h @@ -16,7 +16,12 @@ namespace DB class FillingTransform : public ISimpleTransform { public: - FillingTransform(const Block & header_, const SortDescription & sort_description_, InterpolateDescriptionPtr interpolate_description_); + FillingTransform( + const Block & header_, + const SortDescription & sort_description_, + const SortDescription & fill_description_, + InterpolateDescriptionPtr interpolate_description_, + bool use_with_fill_by_sorting_prefix_); String getName() const override { return "FillingTransform"; } @@ -25,42 +30,72 @@ public: static Block transformHeader(Block header, const SortDescription & sort_description); protected: - void transform(Chunk & Chunk) override; + void transform(Chunk & chunk) override; private: + using MutableColumnRawPtrs = std::vector; + void transformRange( + const Columns & input_fill_columns, + const Columns & input_interpolate_columns, + const Columns & input_sort_prefix_columns, + const Columns & input_other_columns, + const MutableColumns & result_columns, + const MutableColumnRawPtrs & res_fill_columns, + const MutableColumnRawPtrs & res_interpolate_columns, + const MutableColumnRawPtrs & res_sort_prefix_columns, + const MutableColumnRawPtrs & res_other_columns, + std::pair range, + bool new_sorting_prefix); + void saveLastRow(const MutableColumns & result_columns); void interpolate(const MutableColumns & result_columns, Block & interpolate_block); - using MutableColumnRawPtrs = std::vector; void initColumns( const Columns & input_columns, Columns & input_fill_columns, Columns & input_interpolate_columns, + Columns & input_sort_prefix_columns, Columns & input_other_columns, MutableColumns & output_columns, MutableColumnRawPtrs & output_fill_columns, MutableColumnRawPtrs & output_interpolate_columns, + MutableColumnRawPtrs & output_sort_prefix_columns, MutableColumnRawPtrs & output_other_columns); bool generateSuffixIfNeeded( - const Columns & input_columns, - MutableColumns & result_columns); + const MutableColumns & result_columns, + MutableColumnRawPtrs res_fill_columns, + MutableColumnRawPtrs res_interpolate_columns, + MutableColumnRawPtrs res_sort_prefix_columns, + MutableColumnRawPtrs res_other_columns); + bool generateSuffixIfNeeded(const Columns & input_columns, MutableColumns & result_columns); - const SortDescription sort_description; /// Contains only columns with WITH FILL. + void insertFromFillingRow( + const MutableColumnRawPtrs & filling_columns, + const MutableColumnRawPtrs & interpolate_columns, + const MutableColumnRawPtrs & other_columns, + const Block & interpolate_block); + + const SortDescription sort_description; + const SortDescription fill_description; /// Contains only columns with WITH FILL. + SortDescription sort_prefix; const InterpolateDescriptionPtr interpolate_description; /// Contains INTERPOLATE columns FillingRow filling_row; /// Current row, which is used to fill gaps. FillingRow next_row; /// Row to which we need to generate filling rows. + bool filling_row_inserted = false; using Positions = std::vector; Positions fill_column_positions; Positions interpolate_column_positions; Positions other_column_positions; + Positions sort_prefix_positions; std::vector> input_positions; /// positions in result columns required for actions ExpressionActionsPtr interpolate_actions; Columns last_row; - bool first = true; /// flag to determine if transform is/will be called for the first time + Columns last_range_sort_prefix; bool all_chunks_processed = false; /// flag to determine if we have already processed all chunks + const bool use_with_fill_by_sorting_prefix; }; class FillingNoopTransform : public ISimpleTransform diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index 856d3eb2b27..46e67b623e2 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -5,40 +5,38 @@ #include -#include #include #include -#include -#include #include -#include #include -#include -#include -#include #include #include #include -#include #include +#include +#include #include #include -#include -#include -#include +#include +#include #include namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + StorageHDFSCluster::StorageHDFSCluster( ContextPtr context_, - String cluster_name_, + const String & cluster_name_, const String & uri_, const StorageID & table_id_, const String & format_name_, @@ -46,12 +44,10 @@ StorageHDFSCluster::StorageHDFSCluster( const ConstraintsDescription & constraints_, const String & compression_method_, bool structure_argument_was_provided_) - : IStorageCluster(table_id_) - , cluster_name(cluster_name_) + : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageHDFSCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) , uri(uri_) , format_name(format_name_) , compression_method(compression_method_) - , structure_argument_was_provided(structure_argument_was_provided_) { checkHDFSURL(uri_); context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); @@ -70,84 +66,17 @@ StorageHDFSCluster::StorageHDFSCluster( setInMemoryMetadata(storage_metadata); } -/// The code executes on initiator -Pipe StorageHDFSCluster::read( - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context, - QueryProcessingStage::Enum processed_stage, - size_t /*max_block_size*/, - size_t /*num_streams*/) +void StorageHDFSCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) { - auto cluster = getCluster(context); - auto extension = getTaskIteratorExtension(query_info.query, context); + ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); + if (!expression_list) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function hdfsCluster, got '{}'", queryToString(query)); - /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) - Block header; - - if (context->getSettingsRef().allow_experimental_analyzer) - header = InterpreterSelectQueryAnalyzer::getSampleBlock(query_info.query, context, SelectQueryOptions(processed_stage).analyze()); - else - header = InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); - - const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{}; - - Pipes pipes; - - const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; - - auto query_to_send = query_info.original_query->clone(); - if (!structure_argument_was_provided) - addColumnsStructureToQueryWithClusterEngine( - query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), 3, getName()); - - auto new_context = IStorageCluster::updateSettingsForTableFunctionCluster(context, context->getSettingsRef()); - const auto & current_settings = new_context->getSettingsRef(); - auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); - for (const auto & shard_info : cluster->getShardsInfo()) - { - auto try_results = shard_info.pool->getMany(timeouts, ¤t_settings, PoolMode::GET_MANY); - for (auto & try_result : try_results) - { - auto remote_query_executor = std::make_shared( - std::vector{try_result}, - queryToString(query_to_send), - header, - new_context, - /*throttler=*/nullptr, - scalars, - Tables(), - processed_stage, - extension); - - pipes.emplace_back(std::make_shared(remote_query_executor, add_agg_info, false, false)); - } - } - - storage_snapshot->check(column_names); - return Pipe::unitePipes(std::move(pipes)); -} - -QueryProcessingStage::Enum StorageHDFSCluster::getQueryProcessingStage( - ContextPtr context, QueryProcessingStage::Enum to_stage, const StorageSnapshotPtr &, SelectQueryInfo &) const -{ - /// Initiator executes query on remote node. - if (context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) - if (to_stage >= QueryProcessingStage::Enum::WithMergeableState) - return QueryProcessingStage::Enum::WithMergeableState; - - /// Follower just reads the data. - return QueryProcessingStage::Enum::FetchColumns; + TableFunctionHDFSCluster::addColumnsStructureToArguments(expression_list->children, structure, context); } -ClusterPtr StorageHDFSCluster::getCluster(ContextPtr context) const -{ - return context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); -} - -RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(ASTPtr, ContextPtr context) const +RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(ASTPtr, const ContextPtr & context) const { auto iterator = std::make_shared(context, uri); auto callback = std::make_shared([iter = std::move(iterator)]() mutable -> String { return iter->next(); }); @@ -161,7 +90,6 @@ NamesAndTypesList StorageHDFSCluster::getVirtuals() const {"_file", std::make_shared(std::make_shared())}}; } - } #endif diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 4d6548a6b78..350051ab089 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -22,7 +22,7 @@ class StorageHDFSCluster : public IStorageCluster public: StorageHDFSCluster( ContextPtr context_, - String cluster_name_, + const String & cluster_name_, const String & uri_, const StorageID & table_id_, const String & format_name_, @@ -33,23 +33,16 @@ public: std::string getName() const override { return "HDFSCluster"; } - Pipe read(const Names &, const StorageSnapshotPtr &, SelectQueryInfo &, - ContextPtr, QueryProcessingStage::Enum, size_t /*max_block_size*/, size_t /*num_streams*/) override; - - QueryProcessingStage::Enum - getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; - NamesAndTypesList getVirtuals() const override; - ClusterPtr getCluster(ContextPtr context) const override; - RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, ContextPtr context) const override; + RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; private: - String cluster_name; + void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; + String uri; String format_name; String compression_method; - bool structure_argument_was_provided; }; diff --git a/src/Storages/IStorageCluster.cpp b/src/Storages/IStorageCluster.cpp new file mode 100644 index 00000000000..9eeed18e788 --- /dev/null +++ b/src/Storages/IStorageCluster.cpp @@ -0,0 +1,149 @@ +#include "Storages/IStorageCluster.h" + +#include "Common/Exception.h" +#include "Core/QueryProcessingStage.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +IStorageCluster::IStorageCluster( + const String & cluster_name_, + const StorageID & table_id_, + Poco::Logger * log_, + bool structure_argument_was_provided_) + : IStorage(table_id_) + , log(log_) + , cluster_name(cluster_name_) + , structure_argument_was_provided(structure_argument_was_provided_) +{ +} + + +/// The code executes on initiator +Pipe IStorageCluster::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t /*max_block_size*/, + size_t /*num_streams*/) +{ + updateBeforeRead(context); + + auto cluster = getCluster(context); + auto extension = getTaskIteratorExtension(query_info.query, context); + + /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) + + Block sample_block; + ASTPtr query_to_send = query_info.query; + + if (context->getSettingsRef().allow_experimental_analyzer) + { + sample_block = InterpreterSelectQueryAnalyzer::getSampleBlock(query_info.query, context, SelectQueryOptions(processed_stage)); + } + else + { + auto interpreter = InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze()); + sample_block = interpreter.getSampleBlock(); + query_to_send = interpreter.getQueryInfo().query->clone(); + } + + const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{}; + + Pipes pipes; + + const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; + + if (!structure_argument_was_provided) + addColumnsStructureToQuery(query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), context); + + RestoreQualifiedNamesVisitor::Data data; + data.distributed_table = DatabaseAndTableWithAlias(*getTableExpression(query_info.query->as(), 0)); + data.remote_table.database = context->getCurrentDatabase(); + data.remote_table.table = getName(); + RestoreQualifiedNamesVisitor(data).visit(query_to_send); + AddDefaultDatabaseVisitor visitor(context, context->getCurrentDatabase(), + /* only_replace_current_database_function_= */false, + /* only_replace_in_join_= */true); + visitor.visit(query_to_send); + + auto new_context = updateSettings(context, context->getSettingsRef()); + const auto & current_settings = new_context->getSettingsRef(); + auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); + for (const auto & shard_info : cluster->getShardsInfo()) + { + auto try_results = shard_info.pool->getMany(timeouts, ¤t_settings, PoolMode::GET_MANY); + for (auto & try_result : try_results) + { + auto remote_query_executor = std::make_shared( + std::vector{try_result}, + queryToString(query_to_send), + sample_block, + new_context, + /*throttler=*/nullptr, + scalars, + Tables(), + processed_stage, + extension); + + remote_query_executor->setLogger(log); + pipes.emplace_back(std::make_shared(remote_query_executor, add_agg_info, false, false)); + } + } + + storage_snapshot->check(column_names); + return Pipe::unitePipes(std::move(pipes)); +} + +QueryProcessingStage::Enum IStorageCluster::getQueryProcessingStage( + ContextPtr context, QueryProcessingStage::Enum to_stage, const StorageSnapshotPtr &, SelectQueryInfo &) const +{ + /// Initiator executes query on remote node. + if (context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) + if (to_stage >= QueryProcessingStage::Enum::WithMergeableState) + return QueryProcessingStage::Enum::WithMergeableState; + + /// Follower just reads the data. + return QueryProcessingStage::Enum::FetchColumns; +} + +ContextPtr IStorageCluster::updateSettings(ContextPtr context, const Settings & settings) +{ + Settings new_settings = settings; + + /// Cluster table functions should always skip unavailable shards. + new_settings.skip_unavailable_shards = true; + + auto new_context = Context::createCopy(context); + new_context->setSettings(new_settings); + return new_context; +} + +ClusterPtr IStorageCluster::getCluster(ContextPtr context) const +{ + return context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); +} + +} diff --git a/src/Storages/IStorageCluster.h b/src/Storages/IStorageCluster.h index 03185e7aee6..b15ed37202a 100644 --- a/src/Storages/IStorageCluster.h +++ b/src/Storages/IStorageCluster.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -15,26 +16,40 @@ namespace DB class IStorageCluster : public IStorage { public: + IStorageCluster( + const String & cluster_name_, + const StorageID & table_id_, + Poco::Logger * log_, + bool structure_argument_was_provided_); - explicit IStorageCluster(const StorageID & table_id_) : IStorage(table_id_) {} + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t /*max_block_size*/, + size_t /*num_streams*/) override; - virtual ClusterPtr getCluster(ContextPtr context) const = 0; + ClusterPtr getCluster(ContextPtr context) const; /// Query is needed for pruning by virtual columns (_file, _path) - virtual RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, ContextPtr context) const = 0; + virtual RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const = 0; + + QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; bool isRemote() const override { return true; } - static ContextPtr updateSettingsForTableFunctionCluster(ContextPtr context, const Settings & settings) - { - Settings new_settings = settings; +protected: + virtual void updateBeforeRead(const ContextPtr &) {} - /// Cluster table functions should always skip unavailable shards. - new_settings.skip_unavailable_shards = true; + virtual void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) = 0; - auto new_context = Context::createCopy(context); - new_context->setSettings(new_settings); - return new_context; - } +private: + ContextPtr updateSettings(ContextPtr context, const Settings & settings); + + Poco::Logger * log; + String cluster_name; + bool structure_argument_was_provided; }; diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index f2e35e2dcd2..21fdda28767 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -369,6 +369,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( const StorageMetadataPtr & metadata_snapshot, ContextPtr context, const String & part_name, + const String & zookeeper_name, const String & replica_path, const String & host, int port, @@ -401,13 +402,18 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( /// Validation of the input that may come from malicious replica. auto part_info = MergeTreePartInfo::fromPartName(part_name, data.format_version); + String endpoint_id = getEndpointId( + data_settings->enable_the_endpoint_id_with_zookeeper_name_prefix ? + zookeeper_name + ":" + replica_path : + replica_path); + Poco::URI uri; uri.setScheme(interserver_scheme); uri.setHost(host); uri.setPort(port); uri.setQueryParameters( { - {"endpoint", getEndpointId(replica_path)}, + {"endpoint", endpoint_id}, {"part", part_name}, {"client_protocol_version", toString(REPLICATION_PROTOCOL_VERSION_WITH_METADATA_VERSION)}, {"compress", "false"} @@ -630,7 +636,15 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( temporary_directory_lock = {}; /// Try again but without zero-copy - return fetchSelectedPart(metadata_snapshot, context, part_name, replica_path, host, port, timeouts, + return fetchSelectedPart( + metadata_snapshot, + context, + part_name, + zookeeper_name, + replica_path, + host, + port, + timeouts, user, password, interserver_scheme, throttler, to_detached, tmp_prefix, nullptr, false, disk); } } diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h index 57fd0d5cff0..704c1b0a226 100644 --- a/src/Storages/MergeTree/DataPartsExchange.h +++ b/src/Storages/MergeTree/DataPartsExchange.h @@ -70,6 +70,7 @@ public: const StorageMetadataPtr & metadata_snapshot, ContextPtr context, const String & part_name, + const String & zookeeper_name, const String & replica_path, const String & host, int port, diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 440c91e3082..1d5ac21f803 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -506,52 +506,48 @@ void MergeTreeData::checkProperties( auto all_columns = new_metadata.columns.getAllPhysical(); - /// Order by check AST - if (old_metadata.hasSortingKey()) + /// This is ALTER, not CREATE/ATTACH TABLE. Let us check that all new columns used in the sorting key + /// expression have just been added (so that the sorting order is guaranteed to be valid with the new key). + + Names new_primary_key_columns = new_primary_key.column_names; + Names new_sorting_key_columns = new_sorting_key.column_names; + + ASTPtr added_key_column_expr_list = std::make_shared(); + const auto & old_sorting_key_columns = old_metadata.getSortingKeyColumns(); + for (size_t new_i = 0, old_i = 0; new_i < sorting_key_size; ++new_i) { - /// This is ALTER, not CREATE/ATTACH TABLE. Let us check that all new columns used in the sorting key - /// expression have just been added (so that the sorting order is guaranteed to be valid with the new key). - - Names new_primary_key_columns = new_primary_key.column_names; - Names new_sorting_key_columns = new_sorting_key.column_names; - - ASTPtr added_key_column_expr_list = std::make_shared(); - const auto & old_sorting_key_columns = old_metadata.getSortingKeyColumns(); - for (size_t new_i = 0, old_i = 0; new_i < sorting_key_size; ++new_i) + if (old_i < old_sorting_key_columns.size()) { - if (old_i < old_sorting_key_columns.size()) - { - if (new_sorting_key_columns[new_i] != old_sorting_key_columns[old_i]) - added_key_column_expr_list->children.push_back(new_sorting_key.expression_list_ast->children[new_i]); - else - ++old_i; - } - else + if (new_sorting_key_columns[new_i] != old_sorting_key_columns[old_i]) added_key_column_expr_list->children.push_back(new_sorting_key.expression_list_ast->children[new_i]); + else + ++old_i; } + else + added_key_column_expr_list->children.push_back(new_sorting_key.expression_list_ast->children[new_i]); + } - if (!added_key_column_expr_list->children.empty()) + if (!added_key_column_expr_list->children.empty()) + { + auto syntax = TreeRewriter(getContext()).analyze(added_key_column_expr_list, all_columns); + Names used_columns = syntax->requiredSourceColumns(); + + NamesAndTypesList deleted_columns; + NamesAndTypesList added_columns; + old_metadata.getColumns().getAllPhysical().getDifference(all_columns, deleted_columns, added_columns); + + for (const String & col : used_columns) { - auto syntax = TreeRewriter(getContext()).analyze(added_key_column_expr_list, all_columns); - Names used_columns = syntax->requiredSourceColumns(); + if (!added_columns.contains(col) || deleted_columns.contains(col)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Existing column {} is used in the expression that was added to the sorting key. " + "You can add expressions that use only the newly added columns", + backQuoteIfNeed(col)); - NamesAndTypesList deleted_columns; - NamesAndTypesList added_columns; - old_metadata.getColumns().getAllPhysical().getDifference(all_columns, deleted_columns, added_columns); - - for (const String & col : used_columns) - { - if (!added_columns.contains(col) || deleted_columns.contains(col)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Existing column {} is used in the expression that was added to the sorting key. " - "You can add expressions that use only the newly added columns", - backQuoteIfNeed(col)); - - if (new_metadata.columns.getDefaults().contains(col)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Newly added column {} has a default expression, so adding expressions that use " - "it to the sorting key is forbidden", backQuoteIfNeed(col)); - } + if (new_metadata.columns.getDefaults().contains(col)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Newly added column {} has a default expression, so adding expressions that use " + "it to the sorting key is forbidden", backQuoteIfNeed(col)); } } @@ -1081,7 +1077,7 @@ void MergeTreeData::PartLoadingTree::add(const MergeTreePartInfo & info, const S else if (!prev_info.isDisjoint(info)) { throw Exception(ErrorCodes::LOGICAL_ERROR, - "Part {} intersects previous part {}. It is a bug!", + "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the server or ZooKeeper data", name, prev->second->name); } } @@ -1098,7 +1094,7 @@ void MergeTreeData::PartLoadingTree::add(const MergeTreePartInfo & info, const S else if (!next_info.isDisjoint(info)) { throw Exception(ErrorCodes::LOGICAL_ERROR, - "Part {} intersects next part {}. It is a bug!", + "Part {} intersects next part {}. It is a bug or a result of manual intervention in the server or ZooKeeper data", name, it->second->name); } } @@ -7202,9 +7198,17 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage( if (query_context->getClientInfo().collaborate_with_initiator) return QueryProcessingStage::Enum::FetchColumns; - if (query_context->canUseParallelReplicasOnInitiator() - && to_stage >= QueryProcessingStage::WithMergeableState) - return QueryProcessingStage::Enum::WithMergeableState; + /// Parallel replicas + if (query_context->canUseParallelReplicasOnInitiator() && to_stage >= QueryProcessingStage::WithMergeableState) + { + /// ReplicatedMergeTree + if (supportsReplication()) + return QueryProcessingStage::Enum::WithMergeableState; + + /// For non-replicated MergeTree we allow them only if parallel_replicas_for_non_replicated_merge_tree is enabled + if (query_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree) + return QueryProcessingStage::Enum::WithMergeableState; + } if (to_stage >= QueryProcessingStage::Enum::WithMergeableState) { diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index 503b4aac51d..09a04f13fc7 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index c9e81ce9103..5ea99009756 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -159,6 +159,7 @@ struct Settings; M(UInt64, min_bytes_to_rebalance_partition_over_jbod, 0, "Minimal amount of bytes to enable part rebalance over JBOD array (0 - disabled).", 0) \ M(Bool, check_sample_column_is_correct, true, "Check columns or columns by hash for sampling are unsigned integer.", 0) \ M(Bool, allow_vertical_merges_from_compact_to_wide_parts, false, "Allows vertical merges from compact to wide parts. This settings must have the same value on all replicas", 0) \ + M(Bool, enable_the_endpoint_id_with_zookeeper_name_prefix, false, "Enable the endpoint id with zookeeper name prefix for the replicated merge tree table", 0) \ \ /** Experimental/work in progress feature. Unsafe for production. */ \ M(UInt64, part_moves_between_shards_enable, 0, "Experimental/Incomplete feature to move parts between shards. Does not take into account sharding expressions.", 0) \ diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 78ea3e5b246..af643050504 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -152,7 +152,8 @@ static void splitAndModifyMutationCommands( /// But we don't know for sure what happened. auto part_metadata_version = part->getMetadataVersion(); auto table_metadata_version = table_metadata_snapshot->getMetadataVersion(); - if (table_metadata_version <= part_metadata_version) + /// StorageMergeTree does not have metadata version + if (table_metadata_version <= part_metadata_version && part->storage.supportsReplication()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} with metadata version {} contains column {} that is absent " "in table {} with metadata version {}", part->name, part_metadata_version, column.name, diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index fb895d04b8f..b3dfd44b2ad 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -251,6 +251,7 @@ struct SelectQueryInfo bool is_projection_query = false; bool merge_tree_empty_result = false; bool settings_limit_offset_done = false; + bool is_internal = false; Block minmax_count_projection_block; MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 86a2599b49c..dd2d3ebfaf0 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -145,7 +145,6 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int INCORRECT_NUMBER_OF_COLUMNS; extern const int INFINITE_LOOP; - extern const int ILLEGAL_FINAL; extern const int TYPE_MISMATCH; extern const int TOO_MANY_ROWS; extern const int UNABLE_TO_SKIP_UNUSED_SHARDS; @@ -1045,10 +1044,6 @@ void StorageDistributed::read( const size_t /*max_block_size*/, const size_t /*num_streams*/) { - const auto * select_query = query_info.query->as(); - if (select_query->final() && local_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas) - throw Exception(ErrorCodes::ILLEGAL_FINAL, "Final modifier is not allowed together with parallel reading from replicas feature"); - Block header; ASTPtr query_ast; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 7ecb424673d..b0ed242d14d 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -633,10 +633,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( auto & modified_select = modified_query_info.query->as(); QueryPipelineBuilderPtr builder; - - bool final = isFinal(modified_query_info); - - if (!final && storage->needRewriteQueryWithFinal(real_column_names)) + if (!InterpreterSelectQuery::isQueryWithFinal(modified_query_info) && storage->needRewriteQueryWithFinal(real_column_names)) { /// NOTE: It may not work correctly in some cases, because query was analyzed without final. /// However, it's needed for MaterializedMySQL and it's unlikely that someone will use it with Merge tables. @@ -1010,21 +1007,13 @@ bool ReadFromMerge::requestReadingInOrder(InputOrderInfoPtr order_info_) { /// Disable read-in-order optimization for reverse order with final. /// Otherwise, it can lead to incorrect final behavior because the implementation may rely on the reading in direct order). - if (order_info_->direction != 1 && isFinal(query_info)) + if (order_info_->direction != 1 && InterpreterSelectQuery::isQueryWithFinal(query_info)) return false; order_info = order_info_; return true; } -bool ReadFromMerge::isFinal(const SelectQueryInfo & query_info) -{ - if (query_info.table_expression_modifiers) - return query_info.table_expression_modifiers->hasFinal(); - const auto & select_query = query_info.query->as(); - return select_query.final(); -} - IStorage::ColumnSizeByName StorageMerge::getColumnSizes() const { ColumnSizeByName column_sizes; diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index c4b6d815935..babf0dd92e8 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -145,7 +145,6 @@ public: /// Returns `false` if requested reading cannot be performed. bool requestReadingInOrder(InputOrderInfoPtr order_info_); - static bool isFinal(const SelectQueryInfo & query_info); private: const size_t required_max_block_size; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ea9ffee4939..7b9a3093e40 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -240,6 +240,15 @@ zkutil::ZooKeeperPtr StorageReplicatedMergeTree::getZooKeeperAndAssertNotReadonl return res; } +String StorageReplicatedMergeTree::getEndpointName() const +{ + const MergeTreeSettings & settings = getContext()->getReplicatedMergeTreeSettings(); + if (settings.enable_the_endpoint_id_with_zookeeper_name_prefix) + return zookeeper_name + ":" + replica_path; + + return replica_path; +} + static ConnectionTimeouts getHTTPTimeouts(ContextPtr context) { return ConnectionTimeouts::getHTTPTimeouts(context->getSettingsRef(), {context->getConfigRef().getUInt("keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT), 0}); @@ -1841,6 +1850,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che String source_replica_path = fs::path(zookeeper_path) / "replicas" / replica; if (!fetchPart(part_name, metadata_snapshot, + zookeeper_name, source_replica_path, /* to_detached= */ false, entry.quorum, @@ -2341,7 +2351,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) interserver_scheme, address.scheme, address.host); part_desc->res_part = fetcher.fetchSelectedPart( - metadata_snapshot, getContext(), part_desc->found_new_part_name, source_replica_path, + metadata_snapshot, getContext(), part_desc->found_new_part_name, zookeeper_name, source_replica_path, address.host, address.replication_port, timeouts, credentials->getUser(), credentials->getPassword(), interserver_scheme, replicated_fetches_throttler, false, TMP_PREFIX + "fetch_"); @@ -2458,7 +2468,7 @@ void StorageReplicatedMergeTree::executeClonePartFromShard(const LogEntry & entr interserver_scheme, address.scheme, address.host); return fetcher.fetchSelectedPart( - metadata_snapshot, getContext(), entry.new_part_name, source_replica_path, + metadata_snapshot, getContext(), entry.new_part_name, zookeeper_name, source_replica_path, address.host, address.replication_port, timeouts, credentials->getUser(), credentials->getPassword(), interserver_scheme, replicated_fetches_throttler, true); @@ -4042,6 +4052,7 @@ bool StorageReplicatedMergeTree::partIsLastQuorumPart(const MergeTreePartInfo & bool StorageReplicatedMergeTree::fetchPart( const String & part_name, const StorageMetadataPtr & metadata_snapshot, + const String & source_zookeeper_name, const String & source_replica_path, bool to_detached, size_t quorum, @@ -4077,7 +4088,7 @@ bool StorageReplicatedMergeTree::fetchPart( currently_fetching_parts.erase(part_name); }); - LOG_DEBUG(log, "Fetching part {} from {}", part_name, source_replica_path); + LOG_DEBUG(log, "Fetching part {} from {}:{}", part_name, source_zookeeper_name, source_replica_path); auto settings_ptr = getSettings(); TableLockHolder table_lock_holder; @@ -4134,7 +4145,8 @@ bool StorageReplicatedMergeTree::fetchPart( } else { - LOG_INFO(log, "Not checking checksums of part {} with replica {} because part was removed from ZooKeeper", part_name, source_replica_path); + LOG_INFO(log, "Not checking checksums of part {} with replica {}:{} because part was removed from ZooKeeper", + part_name, source_zookeeper_name, source_replica_path); } } @@ -4187,6 +4199,7 @@ bool StorageReplicatedMergeTree::fetchPart( metadata_snapshot, getContext(), part_name, + source_zookeeper_name, source_replica_path, address.host, address.replication_port, @@ -4279,7 +4292,7 @@ bool StorageReplicatedMergeTree::fetchPart( if (part_to_clone) LOG_DEBUG(log, "Cloned part {} from {}{}", part_name, part_to_clone->name, to_detached ? " (to 'detached' directory)" : ""); else - LOG_DEBUG(log, "Fetched part {} from {}{}", part_name, source_replica_path, to_detached ? " (to 'detached' directory)" : ""); + LOG_DEBUG(log, "Fetched part {} from {}:{}{}", part_name, source_zookeeper_name, source_replica_path, to_detached ? " (to 'detached' directory)" : ""); return true; } @@ -4318,7 +4331,7 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( currently_fetching_parts.erase(part_name); }); - LOG_DEBUG(log, "Fetching already known part {} from {}", part_name, source_replica_path); + LOG_DEBUG(log, "Fetching already known part {} from {}:{}", part_name, zookeeper_name, source_replica_path); TableLockHolder table_lock_holder = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); @@ -4350,7 +4363,7 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( "'{}' != '{}', can't fetch part from {}", interserver_scheme, address.scheme, address.host); return fetcher.fetchSelectedPart( - metadata_snapshot, getContext(), part_name, source_replica_path, + metadata_snapshot, getContext(), part_name, zookeeper_name, source_replica_path, address.host, address.replication_port, timeouts, credentials->getUser(), credentials->getPassword(), interserver_scheme, replicated_fetches_throttler, false, "", nullptr, true, @@ -4387,7 +4400,7 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( ProfileEvents::increment(ProfileEvents::ReplicatedPartFetches); - LOG_DEBUG(log, "Fetched part {} from {}", part_name, source_replica_path); + LOG_DEBUG(log, "Fetched part {} from {}:{}", part_name, zookeeper_name, source_replica_path); return part->getDataPartStoragePtr(); } @@ -4430,7 +4443,16 @@ void StorageReplicatedMergeTree::startupImpl(bool from_attach_thread) InterserverIOEndpointPtr data_parts_exchange_ptr = std::make_shared(*this); [[maybe_unused]] auto prev_ptr = std::atomic_exchange(&data_parts_exchange_endpoint, data_parts_exchange_ptr); assert(prev_ptr == nullptr); - getContext()->getInterserverIOHandler().addEndpoint(data_parts_exchange_ptr->getId(replica_path), data_parts_exchange_ptr); + + /// The endpoint id: + /// old format: DataPartsExchange:/clickhouse/tables/default/t1/{shard}/{replica} + /// new format: DataPartsExchange:{zookeeper_name}:/clickhouse/tables/default/t1/{shard}/{replica} + /// Notice: + /// They are incompatible and the default is the old format. + /// If you want to use the new format, please ensure that 'enable_the_endpoint_id_with_zookeeper_name_prefix' of all nodes is true . + /// + getContext()->getInterserverIOHandler().addEndpoint( + data_parts_exchange_ptr->getId(getEndpointName()), data_parts_exchange_ptr); startBeingLeader(); @@ -4555,7 +4577,7 @@ void StorageReplicatedMergeTree::shutdown() auto data_parts_exchange_ptr = std::atomic_exchange(&data_parts_exchange_endpoint, InterserverIOEndpointPtr{}); if (data_parts_exchange_ptr) { - getContext()->getInterserverIOHandler().removeEndpointIfExists(data_parts_exchange_ptr->getId(replica_path)); + getContext()->getInterserverIOHandler().removeEndpointIfExists(data_parts_exchange_ptr->getId(getEndpointName())); /// Ask all parts exchange handlers to finish asap. New ones will fail to start data_parts_exchange_ptr->blocker.cancelForever(); /// Wait for all of them @@ -6237,14 +6259,14 @@ void StorageReplicatedMergeTree::fetchPartition( info.table_id = getStorageID(); info.table_id.uuid = UUIDHelpers::Nil; auto expand_from = query_context->getMacros()->expand(from_, info); - String auxiliary_zookeeper_name = zkutil::extractZooKeeperName(expand_from); + String from_zookeeper_name = zkutil::extractZooKeeperName(expand_from); String from = zkutil::extractZooKeeperPath(expand_from, /* check_starts_with_slash */ true); if (from.empty()) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "ZooKeeper path should not be empty"); zkutil::ZooKeeperPtr zookeeper; - if (auxiliary_zookeeper_name != default_zookeeper_name) - zookeeper = getContext()->getAuxiliaryZooKeeper(auxiliary_zookeeper_name); + if (from_zookeeper_name != default_zookeeper_name) + zookeeper = getContext()->getAuxiliaryZooKeeper(from_zookeeper_name); else zookeeper = getZooKeeper(); @@ -6263,12 +6285,12 @@ void StorageReplicatedMergeTree::fetchPartition( */ if (checkIfDetachedPartExists(part_name)) throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Detached part {} already exists.", part_name); - LOG_INFO(log, "Will fetch part {} from shard {} (zookeeper '{}')", part_name, from_, auxiliary_zookeeper_name); + LOG_INFO(log, "Will fetch part {} from shard {}", part_name, from_); try { /// part name , metadata, part_path , true, 0, zookeeper - if (!fetchPart(part_name, metadata_snapshot, part_path, true, 0, zookeeper, /* try_fetch_shared = */ false)) + if (!fetchPart(part_name, metadata_snapshot, from_zookeeper_name, part_path, true, 0, zookeeper, /* try_fetch_shared = */ false)) throw Exception(ErrorCodes::UNFINISHED, "Failed to fetch part {} from {}", part_name, from_); } catch (const DB::Exception & e) @@ -6283,7 +6305,7 @@ void StorageReplicatedMergeTree::fetchPartition( } String partition_id = getPartitionIDFromQuery(partition, query_context); - LOG_INFO(log, "Will fetch partition {} from shard {} (zookeeper '{}')", partition_id, from_, auxiliary_zookeeper_name); + LOG_INFO(log, "Will fetch partition {} from shard {}", partition_id, from_); /** Let's check that there is no such partition in the `detached` directory (where we will write the downloaded parts). * Unreliable (there is a race condition) - such a partition may appear a little later. @@ -6307,7 +6329,7 @@ void StorageReplicatedMergeTree::fetchPartition( active_replicas.push_back(replica); if (active_replicas.empty()) - throw Exception(ErrorCodes::NO_ACTIVE_REPLICAS, "No active replicas for shard {}", from); + throw Exception(ErrorCodes::NO_ACTIVE_REPLICAS, "No active replicas for shard {}", from_); /** You must select the best (most relevant) replica. * This is a replica with the maximum `log_pointer`, then with the minimum `queue` size. @@ -6361,7 +6383,8 @@ void StorageReplicatedMergeTree::fetchPartition( LOG_INFO(log, "Some of parts ({}) are missing. Will try to fetch covering parts.", missing_parts.size()); if (try_no >= query_context->getSettings().max_fetch_partition_retries_count) - throw Exception(ErrorCodes::TOO_MANY_RETRIES_TO_FETCH_PARTS, "Too many retries to fetch parts from {}", best_replica_path); + throw Exception(ErrorCodes::TOO_MANY_RETRIES_TO_FETCH_PARTS, + "Too many retries to fetch parts from {}:{}", from_zookeeper_name, best_replica_path); Strings parts = zookeeper->getChildren(fs::path(best_replica_path) / "parts"); ActiveDataPartSet active_parts_set(format_version, parts); @@ -6382,7 +6405,8 @@ void StorageReplicatedMergeTree::fetchPartition( parts_to_fetch = std::move(parts_to_fetch_partition); if (parts_to_fetch.empty()) - throw Exception(ErrorCodes::PARTITION_DOESNT_EXIST, "Partition {} on {} doesn't exist", partition_id, best_replica_path); + throw Exception(ErrorCodes::PARTITION_DOESNT_EXIST, + "Partition {} on {}:{} doesn't exist", partition_id, from_zookeeper_name, best_replica_path); } else { @@ -6392,7 +6416,7 @@ void StorageReplicatedMergeTree::fetchPartition( if (!containing_part.empty()) parts_to_fetch.push_back(containing_part); else - LOG_WARNING(log, "Part {} on replica {} has been vanished.", missing_part, best_replica_path); + LOG_WARNING(log, "Part {} on replica {}:{} has been vanished.", missing_part, from_zookeeper_name, best_replica_path); } } @@ -6405,7 +6429,7 @@ void StorageReplicatedMergeTree::fetchPartition( try { - fetched = fetchPart(part, metadata_snapshot, best_replica_path, true, 0, zookeeper, /* try_fetch_shared = */ false); + fetched = fetchPart(part, metadata_snapshot, from_zookeeper_name, best_replica_path, true, 0, zookeeper, /* try_fetch_shared = */ false); } catch (const DB::Exception & e) { diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 29b6a4d6817..cb93dd0b5e3 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -382,6 +382,7 @@ private: zkutil::ZooKeeperPtr getZooKeeperIfTableShutDown() const; zkutil::ZooKeeperPtr getZooKeeperAndAssertNotReadonly() const; void setZooKeeper(); + String getEndpointName() const; /// If true, the table is offline and can not be written to it. /// This flag is managed by RestartingThread. @@ -699,6 +700,7 @@ private: bool fetchPart( const String & part_name, const StorageMetadataPtr & metadata_snapshot, + const String & source_zookeeper_name, const String & source_replica_path, bool to_detached, size_t quorum, diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 4723693ac3a..51acb6a3a7d 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -31,7 +31,7 @@ #include #include -#include +#include #include #include @@ -676,8 +676,8 @@ std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( modified_settings.remote_read_min_bytes_for_seek = modified_settings.remote_fs_buffer_size; auto & pool_reader = context->getThreadPoolReader(FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); - auto async_reader = std::make_unique( - pool_reader, modified_settings, std::move(s3_impl), + auto async_reader = std::make_unique( + std::move(s3_impl), pool_reader, modified_settings, context->getAsyncReadCounters(), context->getFilesystemReadPrefetchesLog()); async_reader->setReadUntilEnd(); @@ -1297,7 +1297,7 @@ void StorageS3::processNamedCollectionResult(StorageS3::Configuration & configur configuration.auth_settings.no_sign_request = collection.getOrDefault("no_sign_request", false); configuration.auth_settings.expiration_window_seconds = collection.getOrDefault("expiration_window_seconds", S3::DEFAULT_EXPIRATION_WINDOW_SECONDS); - configuration.format = collection.getOrDefault("format", "auto"); + configuration.format = collection.getOrDefault("format", configuration.format); configuration.compression_method = collection.getOrDefault("compression_method", collection.getOrDefault("compression", "auto")); configuration.structure = collection.getOrDefault("structure", "auto"); diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index bccf2a59b35..153a3b7f11b 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -4,36 +4,22 @@ #if USE_AWS_S3 -#include "Common/Exception.h" -#include "Client/Connection.h" -#include "Core/QueryProcessingStage.h" #include #include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include -#include +#include #include +#include #include -#include -#include -#include #include +#include #include -#include #include -#include -#include - -#include +#include +#include +#include +#include +#include #include #include @@ -47,21 +33,15 @@ namespace ErrorCodes } StorageS3Cluster::StorageS3Cluster( - const Configuration & configuration_, + const String & cluster_name_, + const StorageS3::Configuration & configuration_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, ContextPtr context_, - bool structure_argument_was_provided_, - bool format_argument_was_provided_) - : IStorageCluster(table_id_) - , log(&Poco::Logger::get("StorageS3Cluster (" + table_id_.table_name + ")")) + bool structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageS3Cluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) , s3_configuration{configuration_} - , cluster_name(configuration_.cluster_name) - , format_name(configuration_.format) - , compression_method(configuration_.compression_method) - , structure_argument_was_provided(structure_argument_was_provided_) - , format_argument_was_provided(format_argument_was_provided_) { context_->getGlobalContext()->getRemoteHostFilter().checkURL(configuration_.url.uri); StorageInMemoryMetadata storage_metadata; @@ -69,8 +49,6 @@ StorageS3Cluster::StorageS3Cluster( if (columns_.empty()) { - /// `distributed_processing` is set to false, because this code is executed on the initiator, so there is no callback set - /// for asking for the next tasks. /// `format_settings` is set to std::nullopt, because StorageS3Cluster is used only as table function auto columns = StorageS3::getTableStructureFromDataImpl(s3_configuration, /*format_settings=*/std::nullopt, context_); storage_metadata.setColumns(columns); @@ -91,131 +69,21 @@ StorageS3Cluster::StorageS3Cluster( virtual_block.insert({column.type->createColumn(), column.type, column.name}); } +void StorageS3Cluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) +{ + ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); + if (!expression_list) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function s3Cluster, got '{}'", queryToString(query)); + + TableFunctionS3Cluster::addColumnsStructureToArguments(expression_list->children, structure, context); +} + void StorageS3Cluster::updateConfigurationIfChanged(ContextPtr local_context) { s3_configuration.update(local_context); } -namespace -{ - -void addColumnsStructureToQueryWithS3ClusterEngine(ASTPtr & query, const String & structure, bool format_argument_was_provided, const String & function_name) -{ - ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); - if (!expression_list) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function {}, got '{}'", function_name, queryToString(query)); - - auto structure_literal = std::make_shared(structure); - - if (!format_argument_was_provided) - { - auto format_literal = std::make_shared("auto"); - expression_list->children.push_back(format_literal); - } - - expression_list->children.push_back(structure_literal); -} - -} - -/// The code executes on initiator -Pipe StorageS3Cluster::read( - const Names & column_names, - const StorageSnapshotPtr & storage_snapshot, - SelectQueryInfo & query_info, - ContextPtr context, - QueryProcessingStage::Enum processed_stage, - size_t /*max_block_size*/, - size_t /*num_streams*/) -{ - updateConfigurationIfChanged(context); - - auto cluster = getCluster(context); - auto extension = getTaskIteratorExtension(query_info.query, context); - - /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) - - Block sample_block; - ASTPtr query_to_send = query_info.query; - - if (context->getSettingsRef().allow_experimental_analyzer) - { - sample_block = InterpreterSelectQueryAnalyzer::getSampleBlock(query_info.query, context, SelectQueryOptions(processed_stage)); - } - else - { - auto interpreter = InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze()); - sample_block = interpreter.getSampleBlock(); - query_to_send = interpreter.getQueryInfo().query->clone(); - } - - const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{}; - - Pipes pipes; - - const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; - - if (!structure_argument_was_provided) - addColumnsStructureToQueryWithS3ClusterEngine( - query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), format_argument_was_provided, getName()); - - RestoreQualifiedNamesVisitor::Data data; - data.distributed_table = DatabaseAndTableWithAlias(*getTableExpression(query_info.query->as(), 0)); - data.remote_table.database = context->getCurrentDatabase(); - data.remote_table.table = getName(); - RestoreQualifiedNamesVisitor(data).visit(query_to_send); - AddDefaultDatabaseVisitor visitor(context, context->getCurrentDatabase(), - /* only_replace_current_database_function_= */false, - /* only_replace_in_join_= */true); - visitor.visit(query_to_send); - - auto new_context = IStorageCluster::updateSettingsForTableFunctionCluster(context, context->getSettingsRef()); - const auto & current_settings = new_context->getSettingsRef(); - auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(current_settings); - for (const auto & shard_info : cluster->getShardsInfo()) - { - auto try_results = shard_info.pool->getMany(timeouts, ¤t_settings, PoolMode::GET_MANY); - for (auto & try_result : try_results) - { - auto remote_query_executor = std::make_shared( - std::vector{try_result}, - queryToString(query_to_send), - sample_block, - new_context, - /*throttler=*/nullptr, - scalars, - Tables(), - processed_stage, - extension); - - remote_query_executor->setLogger(log); - pipes.emplace_back(std::make_shared(remote_query_executor, add_agg_info, false, false)); - } - } - - storage_snapshot->check(column_names); - return Pipe::unitePipes(std::move(pipes)); -} - -QueryProcessingStage::Enum StorageS3Cluster::getQueryProcessingStage( - ContextPtr context, QueryProcessingStage::Enum to_stage, const StorageSnapshotPtr &, SelectQueryInfo &) const -{ - /// Initiator executes query on remote node. - if (context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) - if (to_stage >= QueryProcessingStage::Enum::WithMergeableState) - return QueryProcessingStage::Enum::WithMergeableState; - - /// Follower just reads the data. - return QueryProcessingStage::Enum::FetchColumns; -} - - -ClusterPtr StorageS3Cluster::getCluster(ContextPtr context) const -{ - return context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef()); -} - -RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, ContextPtr context) const +RemoteQueryExecutor::Extension StorageS3Cluster::getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const { auto iterator = std::make_shared( *s3_configuration.client, s3_configuration.url, query, virtual_block, context); diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h index 84661d23640..5c2229875e5 100644 --- a/src/Storages/StorageS3Cluster.h +++ b/src/Storages/StorageS3Cluster.h @@ -21,46 +21,32 @@ class Context; class StorageS3Cluster : public IStorageCluster { public: - struct Configuration : public StorageS3::Configuration - { - std::string cluster_name; - }; - StorageS3Cluster( - const Configuration & configuration_, + const String & cluster_name_, + const StorageS3::Configuration & configuration_, const StorageID & table_id_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, ContextPtr context_, - bool structure_argument_was_provided_, - bool format_argument_was_provided_); + bool structure_argument_was_provided_); std::string getName() const override { return "S3Cluster"; } - Pipe read(const Names &, const StorageSnapshotPtr &, SelectQueryInfo &, - ContextPtr, QueryProcessingStage::Enum, size_t /*max_block_size*/, size_t /*num_streams*/) override; - - QueryProcessingStage::Enum - getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, const StorageSnapshotPtr &, SelectQueryInfo &) const override; - NamesAndTypesList getVirtuals() const override; - RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, ContextPtr context) const override; - ClusterPtr getCluster(ContextPtr context) const override; + RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; protected: void updateConfigurationIfChanged(ContextPtr local_context); private: - Poco::Logger * log; + void updateBeforeRead(const ContextPtr & context) override { updateConfigurationIfChanged(context); } + + void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; + StorageS3::Configuration s3_configuration; - String cluster_name; - String format_name; - String compression_method; NamesAndTypesList virtual_columns; Block virtual_block; - bool structure_argument_was_provided; - bool format_argument_was_provided; }; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 32721a0020b..00b5dbfc5e3 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index ec5e90da53c..da8f6a151b2 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -52,8 +52,8 @@ namespace ErrorCodes } static constexpr auto bad_arguments_error_message = "Storage URL requires 1-4 arguments: " - "url, name of used format (taken from file extension by default), " - "optional compression method, optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; + "url, name of used format (taken from file extension by default), " + "optional compression method, optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; static const std::unordered_set required_configuration_keys = { "url", @@ -101,7 +101,8 @@ IStorageURLBase::IStorageURLBase( const String & compression_method_, const HTTPHeaderEntries & headers_, const String & http_method_, - ASTPtr partition_by_) + ASTPtr partition_by_, + bool distributed_processing_) : IStorage(table_id_) , uri(uri_) , compression_method(chooseCompressionMethod(Poco::URI(uri_).getPath(), compression_method_)) @@ -110,6 +111,7 @@ IStorageURLBase::IStorageURLBase( , headers(headers_) , http_method(http_method_) , partition_by(partition_by_) + , distributed_processing(distributed_processing_) { FormatFactory::instance().checkFormatName(format_name); StorageInMemoryMetadata storage_metadata; @@ -135,7 +137,7 @@ namespace HTTPHeaderEntries headers(headers_.begin(), headers_.end()); // Propagate OpenTelemetry trace context, if any, downstream. - const auto ¤t_trace_context = OpenTelemetry::CurrentContext(); + const auto & current_trace_context = OpenTelemetry::CurrentContext(); if (current_trace_context.isTraceEnabled()) { headers.emplace_back("traceparent", current_trace_context.composeTraceparentHeader()); @@ -149,277 +151,268 @@ namespace return headers; } - - class StorageURLSource : public ISource + StorageURLSource::FailoverOptions getFailoverOptions(const String & uri, size_t max_addresses) { - using URIParams = std::vector>; + return parseRemoteDescription(uri, 0, uri.size(), '|', max_addresses); + } +} - public: - struct URIInfo - { - using FailoverOptions = std::vector; - std::vector uri_list_to_read; - std::atomic next_uri_to_read = 0; +class StorageURLSource::DisclosedGlobIterator::Impl +{ +public: + Impl(const String & uri, size_t max_addresses) + { + uris = parseRemoteDescription(uri, 0, uri.size(), ',', max_addresses); + } - bool need_path_column = false; - bool need_file_column = false; - }; - using URIInfoPtr = std::shared_ptr; - - static void setCredentials(Poco::Net::HTTPBasicCredentials & credentials, const Poco::URI & request_uri) - { - const auto & user_info = request_uri.getUserInfo(); - if (!user_info.empty()) - { - std::size_t n = user_info.find(':'); - if (n != std::string::npos) - { - credentials.setUsername(user_info.substr(0, n)); - credentials.setPassword(user_info.substr(n + 1)); - } - } - } - - static Block getBlockForSource(const Block & block_for_format, const URIInfoPtr & uri_info) - { - auto res = block_for_format; - if (uri_info->need_path_column) - { - res.insert( - {DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_path"}); - } - - if (uri_info->need_file_column) - { - res.insert( - {DataTypeLowCardinality{std::make_shared()}.createColumn(), - std::make_shared(std::make_shared()), - "_file"}); - } - return res; - } - - StorageURLSource( - URIInfoPtr uri_info_, - const std::string & http_method, - std::function callback, - const String & format, - const std::optional & format_settings, - String name_, - const Block & sample_block, - ContextPtr context, - const ColumnsDescription & columns, - UInt64 max_block_size, - const ConnectionTimeouts & timeouts, - CompressionMethod compression_method, - size_t download_threads, - const HTTPHeaderEntries & headers_ = {}, - const URIParams & params = {}, - bool glob_url = false) - : ISource(getBlockForSource(sample_block, uri_info_)), name(std::move(name_)), uri_info(uri_info_) - { - auto headers = getHeaders(headers_); - - /// Lazy initialization. We should not perform requests in constructor, because we need to do it in query pipeline. - initialize = [=, this](const URIInfo::FailoverOptions & uri_options) - { - if (uri_options.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty url list"); - - auto first_option = uri_options.begin(); - auto [actual_uri, buf_factory] = getFirstAvailableURIAndReadBuffer( - first_option, - uri_options.end(), - context, - params, - http_method, - callback, - timeouts, - credentials, - headers, - glob_url, - uri_options.size() == 1); - - curr_uri = actual_uri; - - try - { - total_size += buf_factory->getFileSize(); - } - catch (...) - { - // we simply continue without total_size - } - - // TODO: Pass max_parsing_threads and max_download_threads adjusted for num_streams. - auto input_format = FormatFactory::instance().getInputRandomAccess( - format, - std::move(buf_factory), - sample_block, - context, - max_block_size, - /* is_remote_fs */ true, - compression_method, - format_settings, - download_threads); - - QueryPipelineBuilder builder; - builder.init(Pipe(input_format)); - - builder.addSimpleTransform( - [&](const Block & cur_header) - { return std::make_shared(cur_header, columns, *input_format, context); }); - - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - reader = std::make_unique(*pipeline); - }; - } - - String getName() const override { return name; } - - Chunk generate() override - { - while (true) - { - if (isCancelled()) - { - if (reader) - reader->cancel(); - break; - } - - if (!reader) - { - auto current_uri_pos = uri_info->next_uri_to_read.fetch_add(1); - if (current_uri_pos >= uri_info->uri_list_to_read.size()) - return {}; - - auto current_uri_options = uri_info->uri_list_to_read[current_uri_pos]; - - initialize(current_uri_options); - } - - Chunk chunk; - if (reader->pull(chunk)) - { - UInt64 num_rows = chunk.getNumRows(); - - const String & path{curr_uri.getPath()}; - if (uri_info->need_path_column) - { - auto column = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, path); - chunk.addColumn(column->convertToFullColumnIfConst()); - } - - if (uri_info->need_file_column) - { - const size_t last_slash_pos = path.find_last_of('/'); - auto file_name = path.substr(last_slash_pos + 1); - auto column - = DataTypeLowCardinality{std::make_shared()}.createColumnConst(num_rows, std::move(file_name)); - chunk.addColumn(column->convertToFullColumnIfConst()); - } - - if (num_rows && total_size) - updateRowsProgressApprox( - *this, chunk, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); - - return chunk; - } - - pipeline->reset(); - reader.reset(); - } + String next() + { + size_t current_index = index.fetch_add(1, std::memory_order_relaxed); + if (current_index >= uris.size()) return {}; + + return uris[current_index]; + } + + size_t size() + { + return uris.size(); + } + +private: + Strings uris; + std::atomic_size_t index = 0; +}; + +StorageURLSource::DisclosedGlobIterator::DisclosedGlobIterator(const String & uri, size_t max_addresses) + : pimpl(std::make_shared(uri, max_addresses)) {} + +String StorageURLSource::DisclosedGlobIterator::next() +{ + return pimpl->next(); +} + +size_t StorageURLSource::DisclosedGlobIterator::size() +{ + return pimpl->size(); +} + +void StorageURLSource::setCredentials(Poco::Net::HTTPBasicCredentials & credentials, const Poco::URI & request_uri) +{ + const auto & user_info = request_uri.getUserInfo(); + if (!user_info.empty()) + { + std::size_t n = user_info.find(':'); + if (n != std::string::npos) + { + credentials.setUsername(user_info.substr(0, n)); + credentials.setPassword(user_info.substr(n + 1)); + } + } +} + +Block StorageURLSource::getHeader(Block sample_block, const std::vector & requested_virtual_columns) +{ + for (const auto & virtual_column : requested_virtual_columns) + sample_block.insert({virtual_column.type->createColumn(), virtual_column.type, virtual_column.name}); + + return sample_block; +} + +StorageURLSource::StorageURLSource( + const std::vector & requested_virtual_columns_, + std::shared_ptr uri_iterator_, + const std::string & http_method, + std::function callback, + const String & format, + const std::optional & format_settings, + String name_, + const Block & sample_block, + ContextPtr context, + const ColumnsDescription & columns, + UInt64 max_block_size, + const ConnectionTimeouts & timeouts, + CompressionMethod compression_method, + size_t download_threads, + const HTTPHeaderEntries & headers_, + const URIParams & params, + bool glob_url) + : ISource(getHeader(sample_block, requested_virtual_columns_)), name(std::move(name_)), requested_virtual_columns(requested_virtual_columns_), uri_iterator(uri_iterator_) +{ + auto headers = getHeaders(headers_); + + /// Lazy initialization. We should not perform requests in constructor, because we need to do it in query pipeline. + initialize = [=, this](const FailoverOptions & uri_options) + { + if (uri_options.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty url list"); + + auto first_option = uri_options.begin(); + auto [actual_uri, buf_factory] = getFirstAvailableURIAndReadBuffer( + first_option, + uri_options.end(), + context, + params, + http_method, + callback, + timeouts, + credentials, + headers, + glob_url, + uri_options.size() == 1); + + curr_uri = actual_uri; + + try + { + total_size += buf_factory->getFileSize(); + } + catch (...) + { + // we simply continue without total_size } - static std::tuple getFirstAvailableURIAndReadBuffer( - std::vector::const_iterator & option, - const std::vector::const_iterator & end, - ContextPtr context, - const URIParams & params, - const String & http_method, - std::function callback, - const ConnectionTimeouts & timeouts, - Poco::Net::HTTPBasicCredentials & credentials, - const HTTPHeaderEntries & headers, - bool glob_url, - bool delay_initialization) + // TODO: Pass max_parsing_threads and max_download_threads adjusted for num_streams. + auto input_format = FormatFactory::instance().getInputRandomAccess( + format, + std::move(buf_factory), + sample_block, + context, + max_block_size, + /* is_remote_fs */ true, + compression_method, + format_settings, + download_threads); + + QueryPipelineBuilder builder; + builder.init(Pipe(input_format)); + + builder.addSimpleTransform([&](const Block & cur_header) + { return std::make_shared(cur_header, columns, *input_format, context); }); + + pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + reader = std::make_unique(*pipeline); + }; +} + +Chunk StorageURLSource::generate() +{ + while (true) + { + if (isCancelled()) { - String first_exception_message; - ReadSettings read_settings = context->getReadSettings(); + if (reader) + reader->cancel(); + break; + } - size_t options = std::distance(option, end); - for (; option != end; ++option) + if (!reader) + { + auto current_uri = (*uri_iterator)(); + if (current_uri.empty()) + return {}; + + initialize(current_uri); + } + + Chunk chunk; + if (reader->pull(chunk)) + { + UInt64 num_rows = chunk.getNumRows(); + if (num_rows && total_size) + updateRowsProgressApprox( + *this, chunk, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); + + const String & path{curr_uri.getPath()}; + + for (const auto & virtual_column : requested_virtual_columns) { - bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end); - auto request_uri = Poco::URI(*option); - - for (const auto & [param, value] : params) - request_uri.addQueryParameter(param, value); - - setCredentials(credentials, request_uri); - - const auto settings = context->getSettings(); - auto res = std::make_unique( - request_uri, - http_method, - callback, - timeouts, - credentials, - settings.max_http_get_redirects, - settings.max_read_buffer_size, - read_settings, - headers, - &context->getRemoteHostFilter(), - delay_initialization, - /* use_external_buffer */ false, - /* skip_url_not_found_error */ skip_url_not_found_error); - - if (options > 1) + if (virtual_column.name == "_path") { - // Send a HEAD request to check availability. - try - { - res->getFileInfo(); - } - catch (...) - { - if (first_exception_message.empty()) - first_exception_message = getCurrentExceptionMessage(false); - - tryLogCurrentException(__PRETTY_FUNCTION__); - - continue; - } + chunk.addColumn(virtual_column.type->createColumnConst(num_rows, path)->convertToFullColumnIfConst()); + } + else if (virtual_column.name == "_file") + { + size_t last_slash_pos = path.find_last_of('/'); + auto column = virtual_column.type->createColumnConst(num_rows, path.substr(last_slash_pos + 1)); + chunk.addColumn(column->convertToFullColumnIfConst()); } - - return std::make_tuple(request_uri, std::move(res)); } - throw Exception(ErrorCodes::NETWORK_ERROR, "All uri ({}) options are unreachable: {}", options, first_exception_message); + return chunk; } - private: - using InitializeFunc = std::function; - InitializeFunc initialize; + pipeline->reset(); + reader.reset(); + } + return {}; +} - String name; - URIInfoPtr uri_info; - Poco::URI curr_uri; +std::tuple StorageURLSource::getFirstAvailableURIAndReadBuffer( + std::vector::const_iterator & option, + const std::vector::const_iterator & end, + ContextPtr context, + const URIParams & params, + const String & http_method, + std::function callback, + const ConnectionTimeouts & timeouts, + Poco::Net::HTTPBasicCredentials & credentials, + const HTTPHeaderEntries & headers, + bool glob_url, + bool delay_initialization) +{ + String first_exception_message; + ReadSettings read_settings = context->getReadSettings(); - std::unique_ptr pipeline; - std::unique_ptr reader; + size_t options = std::distance(option, end); + for (; option != end; ++option) + { + bool skip_url_not_found_error = glob_url && read_settings.http_skip_not_found_url_for_globs && option == std::prev(end); + auto request_uri = Poco::URI(*option); - Poco::Net::HTTPBasicCredentials credentials; + for (const auto & [param, value] : params) + request_uri.addQueryParameter(param, value); - size_t total_size = 0; - UInt64 total_rows_approx_max = 0; - size_t total_rows_count_times = 0; - UInt64 total_rows_approx_accumulated = 0; - }; + setCredentials(credentials, request_uri); + + const auto settings = context->getSettings(); + auto res = std::make_unique( + request_uri, + http_method, + callback, + timeouts, + credentials, + settings.max_http_get_redirects, + settings.max_read_buffer_size, + read_settings, + headers, + &context->getRemoteHostFilter(), + delay_initialization, + /* use_external_buffer */ false, + /* skip_url_not_found_error */ skip_url_not_found_error); + + if (options > 1) + { + // Send a HEAD request to check availability. + try + { + res->getFileInfo(); + } + catch (...) + { + if (first_exception_message.empty()) + first_exception_message = getCurrentExceptionMessage(false); + + tryLogCurrentException(__PRETTY_FUNCTION__); + + continue; + } + } + + return std::make_tuple(request_uri, std::move(res)); + } + + throw Exception(ErrorCodes::NETWORK_ERROR, "All uri ({}) options are unreachable: {}", options, first_exception_message); } StorageURLSink::StorageURLSink( @@ -674,61 +667,66 @@ Pipe IStorageURLBase::read( block_for_format = storage_snapshot->metadata->getSampleBlock(); } - size_t max_download_threads = local_context->getSettingsRef().max_download_threads; - - auto uri_info = std::make_shared(); - for (const auto & column : column_names) + std::unordered_set column_names_set(column_names.begin(), column_names.end()); + std::vector requested_virtual_columns; + for (const auto & virtual_column : getVirtuals()) { - if (column == "_path") - uri_info->need_path_column = true; - if (column == "_file") - uri_info->need_file_column = true; + if (column_names_set.contains(virtual_column.name)) + requested_virtual_columns.push_back(virtual_column); } - if (urlWithGlobs(uri)) + size_t max_download_threads = local_context->getSettingsRef().max_download_threads; + + std::shared_ptr iterator_wrapper{nullptr}; + bool is_url_with_globs = urlWithGlobs(uri); + size_t max_addresses = local_context->getSettingsRef().glob_expansion_max_elements; + if (distributed_processing) { - size_t max_addresses = local_context->getSettingsRef().glob_expansion_max_elements; - auto uri_descriptions = parseRemoteDescription(uri, 0, uri.size(), ',', max_addresses, "url"); - - if (num_streams > uri_descriptions.size()) - num_streams = uri_descriptions.size(); - - /// For each uri (which acts like shard) check if it has failover options - uri_info->uri_list_to_read.reserve(uri_descriptions.size()); - for (const auto & description : uri_descriptions) - uri_info->uri_list_to_read.emplace_back(parseRemoteDescription(description, 0, description.size(), '|', max_addresses, "url")); - - Pipes pipes; - pipes.reserve(num_streams); - - size_t download_threads = num_streams >= max_download_threads ? 1 : (max_download_threads / num_streams); - for (size_t i = 0; i < num_streams; ++i) + iterator_wrapper = std::make_shared( + [callback = local_context->getReadTaskCallback(), max_addresses]() + { + String next_uri = callback(); + if (next_uri.empty()) + return StorageURLSource::FailoverOptions{}; + return getFailoverOptions(next_uri, max_addresses); + }); + } + else if (is_url_with_globs) + { + /// Iterate through disclosed globs and make a source for each file + auto glob_iterator = std::make_shared(uri, max_addresses); + iterator_wrapper = std::make_shared([glob_iterator, max_addresses]() { - pipes.emplace_back(std::make_shared( - uri_info, - getReadMethod(), - getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size), - format_name, - format_settings, - getName(), - block_for_format, - local_context, - columns_description, - max_block_size, - getHTTPTimeouts(local_context), - compression_method, - download_threads, - headers, - params, - /* glob_url */ true)); - } - return Pipe::unitePipes(std::move(pipes)); + String next_uri = glob_iterator->next(); + if (next_uri.empty()) + return StorageURLSource::FailoverOptions{}; + return getFailoverOptions(next_uri, max_addresses); + }); + + if (num_streams > glob_iterator->size()) + num_streams = glob_iterator->size(); } else { - uri_info->uri_list_to_read.emplace_back(std::vector{uri}); - return Pipe(std::make_shared( - uri_info, + iterator_wrapper = std::make_shared([&, max_addresses, done = false]() mutable + { + if (done) + return StorageURLSource::FailoverOptions{}; + done = true; + return getFailoverOptions(uri, max_addresses); + }); + num_streams = 1; + } + + Pipes pipes; + pipes.reserve(num_streams); + + size_t download_threads = num_streams >= max_download_threads ? 1 : (max_download_threads / num_streams); + for (size_t i = 0; i < num_streams; ++i) + { + pipes.emplace_back(std::make_shared( + requested_virtual_columns, + iterator_wrapper, getReadMethod(), getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size), format_name, @@ -740,10 +738,13 @@ Pipe IStorageURLBase::read( max_block_size, getHTTPTimeouts(local_context), compression_method, - max_download_threads, + download_threads, headers, - params)); + params, + is_url_with_globs)); } + + return Pipe::unitePipes(std::move(pipes)); } @@ -771,11 +772,17 @@ Pipe StorageURLWithFailover::read( auto params = getReadURIParams(column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size); - auto uri_info = std::make_shared(); - uri_info->uri_list_to_read.emplace_back(uri_options); + auto iterator_wrapper = std::make_shared([&, done = false]() mutable + { + if (done) + return StorageURLSource::FailoverOptions{}; + done = true; + return uri_options; + }); auto pipe = Pipe(std::make_shared( - uri_info, + std::vector{}, + iterator_wrapper, getReadMethod(), getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size), format_name, @@ -935,7 +942,8 @@ StorageURL::StorageURL( const String & compression_method_, const HTTPHeaderEntries & headers_, const String & http_method_, - ASTPtr partition_by_) + ASTPtr partition_by_, + bool distributed_processing_) : IStorageURLBase( uri_, context_, @@ -948,7 +956,8 @@ StorageURL::StorageURL( compression_method_, headers_, http_method_, - partition_by_) + partition_by_, + distributed_processing_) { context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); } diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 69890f7cdd1..d53b72105e4 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -20,6 +21,7 @@ using OutputFormatPtr = std::shared_ptr; struct ConnectionTimeouts; class NamedCollection; +class PullingPipelineExecutor; /** * This class represents table engine for external urls. @@ -68,7 +70,8 @@ protected: const String & compression_method_, const HTTPHeaderEntries & headers_ = {}, const String & method_ = "", - ASTPtr partition_by = nullptr); + ASTPtr partition_by = nullptr, + bool distributed_processing_ = false); String uri; CompressionMethod compression_method; @@ -81,6 +84,7 @@ protected: HTTPHeaderEntries headers; String http_method; /// For insert can choose Put instead of default Post. ASTPtr partition_by; + bool distributed_processing; virtual std::string getReadMethod() const; @@ -131,6 +135,87 @@ private: const ContextPtr & context); }; + +class StorageURLSource : public ISource +{ + using URIParams = std::vector>; + +public: + class DisclosedGlobIterator + { + public: + DisclosedGlobIterator(const String & uri_, size_t max_addresses); + String next(); + size_t size(); + private: + class Impl; + /// shared_ptr to have copy constructor + std::shared_ptr pimpl; + }; + + using FailoverOptions = std::vector; + using IteratorWrapper = std::function; + + StorageURLSource( + const std::vector & requested_virtual_columns_, + std::shared_ptr uri_iterator_, + const std::string & http_method, + std::function callback, + const String & format, + const std::optional & format_settings, + String name_, + const Block & sample_block, + ContextPtr context, + const ColumnsDescription & columns, + UInt64 max_block_size, + const ConnectionTimeouts & timeouts, + CompressionMethod compression_method, + size_t download_threads, + const HTTPHeaderEntries & headers_ = {}, + const URIParams & params = {}, + bool glob_url = false); + + String getName() const override { return name; } + + Chunk generate() override; + + static void setCredentials(Poco::Net::HTTPBasicCredentials & credentials, const Poco::URI & request_uri); + + static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); + + static std::tuple getFirstAvailableURIAndReadBuffer( + std::vector::const_iterator & option, + const std::vector::const_iterator & end, + ContextPtr context, + const URIParams & params, + const String & http_method, + std::function callback, + const ConnectionTimeouts & timeouts, + Poco::Net::HTTPBasicCredentials & credentials, + const HTTPHeaderEntries & headers, + bool glob_url, + bool delay_initialization); + +private: + using InitializeFunc = std::function; + InitializeFunc initialize; + + String name; + std::vector requested_virtual_columns; + std::shared_ptr uri_iterator; + Poco::URI curr_uri; + + std::unique_ptr pipeline; + std::unique_ptr reader; + + Poco::Net::HTTPBasicCredentials credentials; + + size_t total_size = 0; + UInt64 total_rows_approx_max = 0; + size_t total_rows_count_times = 0; + UInt64 total_rows_approx_accumulated = 0; +}; + class StorageURLSink : public SinkToStorage { public: @@ -174,7 +259,8 @@ public: const String & compression_method_, const HTTPHeaderEntries & headers_ = {}, const String & method_ = "", - ASTPtr partition_by_ = nullptr); + ASTPtr partition_by_ = nullptr, + bool distributed_processing_ = false); String getName() const override { @@ -209,14 +295,14 @@ class StorageURLWithFailover final : public StorageURL { public: StorageURLWithFailover( - const std::vector & uri_options_, - const StorageID & table_id_, - const String & format_name_, - const std::optional & format_settings_, - const ColumnsDescription & columns_, - const ConstraintsDescription & constraints_, - ContextPtr context_, - const String & compression_method_); + const std::vector & uri_options_, + const StorageID & table_id_, + const String & format_name_, + const std::optional & format_settings_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_, + const String & compression_method_); Pipe read( const Names & column_names, diff --git a/src/Storages/StorageURLCluster.cpp b/src/Storages/StorageURLCluster.cpp new file mode 100644 index 00000000000..f652a40a561 --- /dev/null +++ b/src/Storages/StorageURLCluster.cpp @@ -0,0 +1,94 @@ +#include "Interpreters/Context_fwd.h" + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +StorageURLCluster::StorageURLCluster( + ContextPtr context_, + const String & cluster_name_, + const String & uri_, + const String & format_, + const String & compression_method_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const StorageURL::Configuration & configuration_, + bool structure_argument_was_provided_) + : IStorageCluster(cluster_name_, table_id_, &Poco::Logger::get("StorageURLCluster (" + table_id_.table_name + ")"), structure_argument_was_provided_) + , uri(uri_) +{ + context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); + + StorageInMemoryMetadata storage_metadata; + + if (columns_.empty()) + { + auto columns = StorageURL::getTableStructureFromData(format_, + uri, + chooseCompressionMethod(Poco::URI(uri).getPath(), compression_method_), + configuration_.headers, + std::nullopt, + context_); + storage_metadata.setColumns(columns); + } + else + storage_metadata.setColumns(columns_); + + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); +} + +void StorageURLCluster::addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) +{ + ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); + if (!expression_list) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function urlCluster, got '{}'", queryToString(query)); + + TableFunctionURLCluster::addColumnsStructureToArguments(expression_list->children, structure, context); +} + +RemoteQueryExecutor::Extension StorageURLCluster::getTaskIteratorExtension(ASTPtr, const ContextPtr & context) const +{ + auto iterator = std::make_shared(uri, context->getSettingsRef().glob_expansion_max_elements); + auto callback = std::make_shared([iter = std::move(iterator)]() mutable -> String { return iter->next(); }); + return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)}; +} + +NamesAndTypesList StorageURLCluster::getVirtuals() const +{ + return NamesAndTypesList{ + {"_path", std::make_shared(std::make_shared())}, + {"_file", std::make_shared(std::make_shared())}}; +} + +} diff --git a/src/Storages/StorageURLCluster.h b/src/Storages/StorageURLCluster.h new file mode 100644 index 00000000000..67771416771 --- /dev/null +++ b/src/Storages/StorageURLCluster.h @@ -0,0 +1,49 @@ +#pragma once + +#include "config.h" + +#include +#include + +#include +#include +#include +#include + +namespace DB +{ + +class Context; + +class StorageURLCluster : public IStorageCluster +{ +public: + StorageURLCluster( + ContextPtr context_, + const String & cluster_name_, + const String & uri_, + const String & format_, + const String & compression_method_, + const StorageID & table_id_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + const StorageURL::Configuration & configuration_, + bool structure_argument_was_provided_); + + std::string getName() const override { return "URLCluster"; } + + NamesAndTypesList getVirtuals() const override; + + RemoteQueryExecutor::Extension getTaskIteratorExtension(ASTPtr query, const ContextPtr & context) const override; + +private: + void addColumnsStructureToQuery(ASTPtr & query, const String & structure, const ContextPtr & context) override; + + String uri; + String format_name; + String compression_method; +}; + + +} + diff --git a/src/Storages/System/StorageSystemZooKeeperConnection.cpp b/src/Storages/System/StorageSystemZooKeeperConnection.cpp index f249097654e..559e12ad5ee 100644 --- a/src/Storages/System/StorageSystemZooKeeperConnection.cpp +++ b/src/Storages/System/StorageSystemZooKeeperConnection.cpp @@ -13,7 +13,7 @@ NamesAndTypesList StorageSystemZooKeeperConnection::getNamesAndTypes() return { {"name", std::make_shared()}, {"host", std::make_shared()}, - {"port", std::make_shared()}, + {"port", std::make_shared()}, {"index", std::make_shared()}, {"connected_time", std::make_shared()}, {"is_expired", std::make_shared()}, @@ -25,7 +25,7 @@ NamesAndTypesList StorageSystemZooKeeperConnection::getNamesAndTypes() void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { - res_columns[0]->insert("default_zookeeper"); + res_columns[0]->insert("default"); res_columns[1]->insert(context->getZooKeeper()->getConnectedZooKeeperHost()); res_columns[2]->insert(context->getZooKeeper()->getConnectedZooKeeperPort()); res_columns[3]->insert(context->getZooKeeper()->getConnectedZooKeeperIndex()); @@ -38,7 +38,6 @@ void StorageSystemZooKeeperConnection::fillData(MutableColumns & res_columns, Co { res_columns[0]->insert(elem.first); res_columns[1]->insert(elem.second->getConnectedZooKeeperHost()); - res_columns[1]->insert(elem.second->getConnectedZooKeeperHost()); res_columns[2]->insert(elem.second->getConnectedZooKeeperPort()); res_columns[3]->insert(elem.second->getConnectedZooKeeperIndex()); res_columns[4]->insert(elem.second->getSessionUptime()); diff --git a/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp b/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp deleted file mode 100644 index 106161ae620..00000000000 --- a/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - -ASTExpressionList * extractTableFunctionArgumentsFromSelectQuery(ASTPtr & query) -{ - auto * select_query = query->as(); - if (!select_query || !select_query->tables()) - return nullptr; - - auto * tables = select_query->tables()->as(); - auto * table_expression = tables->children[0]->as()->table_expression->as(); - if (!table_expression->table_function) - return nullptr; - - auto * table_function = table_expression->table_function->as(); - return table_function->arguments->as(); -} - -void addColumnsStructureToQueryWithClusterEngine(ASTPtr & query, const String & structure, size_t max_arguments, const String & function_name) -{ - ASTExpressionList * expression_list = extractTableFunctionArgumentsFromSelectQuery(query); - if (!expression_list) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected SELECT query from table function {}, got '{}'", function_name, queryToString(query)); - auto structure_literal = std::make_shared(structure); - - if (expression_list->children.size() < 2 || expression_list->children.size() > max_arguments) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 2 to {} arguments in {} table functions, got {}", - function_name, max_arguments, expression_list->children.size()); - - if (expression_list->children.size() == 2 || expression_list->children.size() == max_arguments - 1) - { - auto format_literal = std::make_shared("auto"); - expression_list->children.push_back(format_literal); - } - - expression_list->children.push_back(structure_literal); -} - -} diff --git a/src/Storages/addColumnsStructureToQueryWithClusterEngine.h b/src/Storages/addColumnsStructureToQueryWithClusterEngine.h deleted file mode 100644 index 5939f3f43aa..00000000000 --- a/src/Storages/addColumnsStructureToQueryWithClusterEngine.h +++ /dev/null @@ -1,14 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -ASTExpressionList * extractTableFunctionArgumentsFromSelectQuery(ASTPtr & query); - -/// Add structure argument for queries with s3Cluster/hdfsCluster table function. -void addColumnsStructureToQueryWithClusterEngine(ASTPtr & query, const String & structure, size_t max_arguments, const String & function_name); - -} diff --git a/src/Storages/extractTableFunctionArgumentsFromSelectQuery.cpp b/src/Storages/extractTableFunctionArgumentsFromSelectQuery.cpp new file mode 100644 index 00000000000..382964d9fe1 --- /dev/null +++ b/src/Storages/extractTableFunctionArgumentsFromSelectQuery.cpp @@ -0,0 +1,29 @@ +#include + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +ASTExpressionList * extractTableFunctionArgumentsFromSelectQuery(ASTPtr & query) +{ + auto * select_query = query->as(); + if (!select_query || !select_query->tables()) + return nullptr; + + auto * tables = select_query->tables()->as(); + auto * table_expression = tables->children[0]->as()->table_expression->as(); + if (!table_expression->table_function) + return nullptr; + + auto * table_function = table_expression->table_function->as(); + return table_function->arguments->as(); +} + +} diff --git a/src/Storages/extractTableFunctionArgumentsFromSelectQuery.h b/src/Storages/extractTableFunctionArgumentsFromSelectQuery.h new file mode 100644 index 00000000000..8bf5d95a42c --- /dev/null +++ b/src/Storages/extractTableFunctionArgumentsFromSelectQuery.h @@ -0,0 +1,11 @@ +#pragma once + +#include +#include + +namespace DB +{ + +ASTExpressionList * extractTableFunctionArgumentsFromSelectQuery(ASTPtr & query); + +} diff --git a/src/TableFunctions/ITableFunctionCluster.h b/src/TableFunctions/ITableFunctionCluster.h new file mode 100644 index 00000000000..ad88d7b54f0 --- /dev/null +++ b/src/TableFunctions/ITableFunctionCluster.h @@ -0,0 +1,72 @@ +#pragma once + +#include "config.h" + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_GET; + extern const int LOGICAL_ERROR; +} + +/// Base class for *Cluster table functions that require cluster_name for the first argument. +template +class ITableFunctionCluster : public Base +{ +public: + String getName() const override = 0; + String getSignature() const override = 0; + + static void addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context) + { + if (args.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected empty list of arguments for {}Cluster table function", Base::name); + + ASTPtr cluster_name_arg = args.front(); + args.erase(args.begin()); + Base::addColumnsStructureToArguments(args, desired_structure, context); + args.insert(args.begin(), cluster_name_arg); + } + +protected: + void parseArguments(const ASTPtr & ast, ContextPtr context) override + { + /// Clone ast function, because we can modify its arguments like removing cluster_name + Base::parseArguments(ast->clone(), context); + } + + void parseArgumentsImpl(ASTs & args, const ContextPtr & context) override + { + if (args.empty()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); + + /// Evaluate only first argument, everything else will be done Base class + args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context); + + /// Cluster name is always the first + cluster_name = checkAndGetLiteralArgument(args[0], "cluster_name"); + + if (!context->tryGetCluster(cluster_name)) + throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name); + + /// Just cut the first arg (cluster_name) and try to parse other table function arguments as is + args.erase(args.begin()); + + Base::parseArgumentsImpl(args, context); + } + + String cluster_name; +}; + +} diff --git a/src/TableFunctions/ITableFunctionDataLake.h b/src/TableFunctions/ITableFunctionDataLake.h index 1a20632430b..f87838cfb56 100644 --- a/src/TableFunctions/ITableFunctionDataLake.h +++ b/src/TableFunctions/ITableFunctionDataLake.h @@ -13,13 +13,9 @@ namespace DB { -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -} template -class ITableFunctionDataLake : public ITableFunction +class ITableFunctionDataLake : public TableFunction { public: static constexpr auto name = Name::name; @@ -33,11 +29,11 @@ protected: ColumnsDescription /*cached_columns*/) const override { ColumnsDescription columns; - if (configuration.structure != "auto") - columns = parseColumnsListFromString(configuration.structure, context); + if (TableFunction::configuration.structure != "auto") + columns = parseColumnsListFromString(TableFunction::configuration.structure, context); StoragePtr storage = std::make_shared( - configuration, context, StorageID(getDatabaseName(), table_name), + TableFunction::configuration, context, StorageID(TableFunction::getDatabaseName(), table_name), columns, ConstraintsDescription{}, String{}, std::nullopt); storage->startup(); @@ -48,34 +44,21 @@ protected: ColumnsDescription getActualTableStructure(ContextPtr context) const override { - if (configuration.structure == "auto") + if (TableFunction::configuration.structure == "auto") { - context->checkAccess(getSourceAccessType()); - return Storage::getTableStructureFromData(configuration, std::nullopt, context); + context->checkAccess(TableFunction::getSourceAccessType()); + return Storage::getTableStructureFromData(TableFunction::configuration, std::nullopt, context); } - return parseColumnsListFromString(configuration.structure, context); + return parseColumnsListFromString(TableFunction::configuration.structure, context); } void parseArguments(const ASTPtr & ast_function, ContextPtr context) override { - ASTs & args_func = ast_function->children; - - const auto message = fmt::format( - "The signature of table function '{}' could be the following:\n{}", getName(), TableFunction::signature); - - if (args_func.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments", getName()); - - auto & args = args_func.at(0)->children; - - TableFunction::parseArgumentsImpl(message, args, context, configuration, false); - - if (configuration.format == "auto") - configuration.format = "Parquet"; + /// Set default format to Parquet if it's not specified in arguments. + TableFunction::configuration.format = "Parquet"; + TableFunction::parseArguments(ast_function, context); } - - mutable typename Storage::Configuration configuration; }; } diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index bbaf2b68418..a60ab70d570 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -2,7 +2,6 @@ #include #include -#include #include @@ -19,8 +18,8 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int BAD_ARGUMENTS; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } void ITableFunctionFileLike::parseFirstArguments(const ASTPtr & arg, const ContextPtr &) @@ -47,9 +46,13 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' must have arguments.", getName()); ASTs & args = args_func.at(0)->children; + parseArgumentsImpl(args, context); +} - if (args.empty()) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires at least 1 argument", getName()); +void ITableFunctionFileLike::parseArgumentsImpl(ASTs & args, const ContextPtr & context) +{ + if (args.empty() || args.size() > 4) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); @@ -62,26 +65,51 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context if (format == "auto") format = getFormatFromFirstArgument(); - if (args.size() <= 2) - return; + if (args.size() > 2) + { + structure = checkAndGetLiteralArgument(args[2], "structure"); + if (structure.empty()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Table structure is empty for table function '{}'. If you want to use automatic schema inference, use 'auto'", + getName()); + } - if (args.size() != 3 && args.size() != 4) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Table function '{}' requires 1, 2, 3 or 4 arguments: " - "filename, format (default auto), structure (default auto) and compression method (default auto)", - getName()); - - structure = checkAndGetLiteralArgument(args[2], "structure"); - - if (structure.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Table structure is empty for table function '{}'. If you want to use automatic schema inference, use 'auto'", - ast_function->formatForErrorMessage()); - - if (args.size() == 4) + if (args.size() > 3) compression_method = checkAndGetLiteralArgument(args[3], "compression_method"); } +void ITableFunctionFileLike::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr &) +{ + if (args.empty() || args.size() > getMaxNumberOfArguments()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), args.size()); + + auto structure_literal = std::make_shared(structure); + + /// f(filename) + if (args.size() == 1) + { + /// Add format=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// f(filename, format) + else if (args.size() == 2) + { + args.push_back(structure_literal); + } + /// f(filename, format, 'auto') + else if (args.size() == 3) + { + args.back() = structure_literal; + } + /// f(filename, format, 'auto', compression) + else if (args.size() == 4) + { + args[args.size() - 2] = structure_literal; + } +} + StoragePtr ITableFunctionFileLike::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const { ColumnsDescription columns; diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index 589fce67638..8300cc27591 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -9,19 +9,35 @@ class ColumnsDescription; class Context; /* - * function(source, format, structure[, compression_method]) - creates a temporary storage from formatted source + * function(source, [format, structure, compression_method]) - creates a temporary storage from formatted source */ class ITableFunctionFileLike : public ITableFunction { public: + static constexpr auto signature = " - filename\n" + " - filename, format\n" + " - filename, format, structure\n" + " - filename, format, structure, compression_method\n"; + virtual String getSignature() const + { + return signature; + } + bool needStructureHint() const override { return structure == "auto"; } void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } bool supportsReadingSubsetOfColumns() override; + static size_t getMaxNumberOfArguments() { return 4; } + + static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr &); + protected: + void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; + virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); + virtual void parseFirstArguments(const ASTPtr & arg, const ContextPtr & context); virtual String getFormatFromFirstArgument(); diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h index c4c111de6e5..eec0a05fe8d 100644 --- a/src/TableFunctions/TableFunctionHDFS.h +++ b/src/TableFunctions/TableFunctionHDFS.h @@ -12,18 +12,28 @@ namespace DB class Context; -/* hdfs(URI, format[, structure, compression]) - creates a temporary storage from hdfs files +/* hdfs(URI, [format, structure, compression]) - creates a temporary storage from hdfs files * */ class TableFunctionHDFS : public ITableFunctionFileLike { public: static constexpr auto name = "hdfs"; - std::string getName() const override + static constexpr auto signature = " - uri\n" + " - uri, format\n" + " - uri, format, structure\n" + " - uri, format, structure, compression_method\n"; + + String getName() const override { return name; } + String getSignature() const override + { + return signature; + } + ColumnsDescription getActualTableStructure(ContextPtr context) const override; std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override diff --git a/src/TableFunctions/TableFunctionHDFSCluster.cpp b/src/TableFunctions/TableFunctionHDFSCluster.cpp index 7c84a281673..6fb7ed0fce5 100644 --- a/src/TableFunctions/TableFunctionHDFSCluster.cpp +++ b/src/TableFunctions/TableFunctionHDFSCluster.cpp @@ -2,86 +2,19 @@ #if USE_HDFS -#include - -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include -#include +#include +#include +#include #include "registerTableFunctions.h" #include -#include namespace DB { -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_GET; -} - - -void TableFunctionHDFSCluster::parseArguments(const ASTPtr & ast_function, ContextPtr context) -{ - auto ast_copy = ast_function->clone(); - /// Parse args - ASTs & args_func = ast_copy->children; - - if (args_func.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); - - ASTs & args = args_func.at(0)->children; - - if (args.size() < 2 || args.size() > 5) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "The signature of table function {} shall be the following:\n" - " - cluster, uri\n" - " - cluster, uri, format\n" - " - cluster, uri, format, structure\n" - " - cluster, uri, format, structure, compression_method", - getName()); - - for (auto & arg : args) - arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); - - /// This argument is always the first - cluster_name = checkAndGetLiteralArgument(args[0], "cluster_name"); - - if (!context->tryGetCluster(cluster_name)) - throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name); - - /// Just cut the first arg (cluster_name) and try to parse other table function arguments as is - args.erase(args.begin()); - - ITableFunctionFileLike::parseArguments(ast_copy, context); -} - - -ColumnsDescription TableFunctionHDFSCluster::getActualTableStructure(ContextPtr context) const -{ - if (structure == "auto") - { - context->checkAccess(getSourceAccessType()); - return StorageHDFS::getTableStructureFromData(format, filename, compression_method, context); - } - - return parseColumnsListFromString(structure, context); -} - - StoragePtr TableFunctionHDFSCluster::getStorage( const String & /*source*/, const String & /*format_*/, const ColumnsDescription & columns, ContextPtr context, const std::string & table_name, const String & /*compression_method_*/) const @@ -106,9 +39,14 @@ StoragePtr TableFunctionHDFSCluster::getStorage( { storage = std::make_shared( context, - cluster_name, filename, StorageID(getDatabaseName(), table_name), - format, columns, ConstraintsDescription{}, - compression_method, structure != "auto"); + cluster_name, + filename, + StorageID(getDatabaseName(), table_name), + format, + columns, + ConstraintsDescription{}, + compression_method, + structure != "auto"); } return storage; } @@ -118,7 +56,6 @@ void registerTableFunctionHDFSCluster(TableFunctionFactory & factory) factory.registerFunction(); } - } #endif diff --git a/src/TableFunctions/TableFunctionHDFSCluster.h b/src/TableFunctions/TableFunctionHDFSCluster.h index fff2c8ad116..0253217feb7 100644 --- a/src/TableFunctions/TableFunctionHDFSCluster.h +++ b/src/TableFunctions/TableFunctionHDFSCluster.h @@ -5,6 +5,8 @@ #if USE_HDFS #include +#include +#include namespace DB @@ -20,28 +22,31 @@ class Context; * On worker node it asks initiator about next task to process, processes it. * This is repeated until the tasks are finished. */ -class TableFunctionHDFSCluster : public ITableFunctionFileLike +class TableFunctionHDFSCluster : public ITableFunctionCluster { public: static constexpr auto name = "hdfsCluster"; - std::string getName() const override + static constexpr auto signature = " - cluster_name, uri\n" + " - cluster_name, uri, format\n" + " - cluster_name, uri, format, structure\n" + " - cluster_name, uri, format, structure, compression_method\n"; + + String getName() const override { return name; } + String getSignature() const override + { + return signature; + } + protected: StoragePtr getStorage( const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, const std::string & table_name, const String & compression_method_) const override; const char * getStorageTypeName() const override { return "HDFSCluster"; } - - AccessType getSourceAccessType() const override { return AccessType::HDFS; } - - ColumnsDescription getActualTableStructure(ContextPtr) const override; - void parseArguments(const ASTPtr &, ContextPtr) override; - - String cluster_name; }; } diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 01b44bc8380..c8cc0cddd30 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -10,13 +10,14 @@ #include #include #include +#include +#include #include #include #include #include #include #include "registerTableFunctions.h" -#include #include @@ -27,29 +28,23 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int LOGICAL_ERROR; } /// This is needed to avoid copy-pase. Because s3Cluster arguments only differ in additional argument (first) - cluster name -TableFunctionS3::ArgumentParseResult TableFunctionS3::parseArgumentsImpl( - const String & error_message, - ASTs & args, - ContextPtr context, - StorageS3::Configuration & s3_configuration, - bool get_format_from_file) +void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context) { - ArgumentParseResult result; - if (auto named_collection = tryGetNamedCollectionWithOverrides(args, context)) { - StorageS3::processNamedCollectionResult(s3_configuration, *named_collection); + StorageS3::processNamedCollectionResult(configuration, *named_collection); } else { if (args.empty() || args.size() > 6) - throw Exception::createDeprecated(error_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "The signature of table function {} shall be the following:\n{}", getName(), getSignature()); - auto * header_it = StorageURL::collectHeaders(args, s3_configuration.headers_from_ast, context); + auto * header_it = StorageURL::collectHeaders(args, configuration.headers_from_ast, context); if (header_it != args.end()) args.erase(header_it); @@ -136,54 +131,165 @@ TableFunctionS3::ArgumentParseResult TableFunctionS3::parseArgumentsImpl( } /// This argument is always the first - s3_configuration.url = S3::URI(checkAndGetLiteralArgument(args[0], "url")); + configuration.url = S3::URI(checkAndGetLiteralArgument(args[0], "url")); if (args_to_idx.contains("format")) { - s3_configuration.format = checkAndGetLiteralArgument(args[args_to_idx["format"]], "format"); - result.has_format_argument = true; + auto format = checkAndGetLiteralArgument(args[args_to_idx["format"]], "format"); + /// Set format to configuration only of it's not 'auto', + /// because we can have default format set in configuration. + if (format != "auto") + configuration.format = format; } if (args_to_idx.contains("structure")) - { - s3_configuration.structure = checkAndGetLiteralArgument(args[args_to_idx["structure"]], "structure"); - result.has_structure_argument = true; - } + configuration.structure = checkAndGetLiteralArgument(args[args_to_idx["structure"]], "structure"); if (args_to_idx.contains("compression_method")) - s3_configuration.compression_method = checkAndGetLiteralArgument(args[args_to_idx["compression_method"]], "compression_method"); + configuration.compression_method = checkAndGetLiteralArgument(args[args_to_idx["compression_method"]], "compression_method"); if (args_to_idx.contains("access_key_id")) - s3_configuration.auth_settings.access_key_id = checkAndGetLiteralArgument(args[args_to_idx["access_key_id"]], "access_key_id"); + configuration.auth_settings.access_key_id = checkAndGetLiteralArgument(args[args_to_idx["access_key_id"]], "access_key_id"); if (args_to_idx.contains("secret_access_key")) - s3_configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(args[args_to_idx["secret_access_key"]], "secret_access_key"); + configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(args[args_to_idx["secret_access_key"]], "secret_access_key"); - s3_configuration.auth_settings.no_sign_request = no_sign_request; + configuration.auth_settings.no_sign_request = no_sign_request; } - s3_configuration.keys = {s3_configuration.url.key}; + configuration.keys = {configuration.url.key}; - /// For DataLake table functions, we should specify default format. - if (s3_configuration.format == "auto" && get_format_from_file) - s3_configuration.format = FormatFactory::instance().getFormatFromFileName(s3_configuration.url.uri.getPath(), true); - - return result; + if (configuration.format == "auto") + configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url.uri.getPath(), true); } void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context) { + /// Clone ast function, because we can modify its arguments like removing headers. + auto ast_copy = ast_function->clone(); + /// Parse args ASTs & args_func = ast_function->children; - const auto message = fmt::format("The signature of table function '{}' could be the following:\n{}", getName(), signature); - if (args_func.size() != 1) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); auto & args = args_func.at(0)->children; - parseArgumentsImpl(message, args, context, configuration); + parseArgumentsImpl(args, context); +} + +void TableFunctionS3::addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context) +{ + if (tryGetNamedCollectionWithOverrides(args, context)) + { + /// In case of named collection, just add key-value pair "structure='...'" + /// at the end of arguments to override existed structure. + ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(structure)}; + auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); + args.push_back(equal_func); + } + else + { + /// If arguments contain headers, just remove it and add to the end of arguments later + /// (header argument can be at any position). + HTTPHeaderEntries tmp_headers; + auto * headers_it = StorageURL::collectHeaders(args, tmp_headers, context); + ASTPtr headers_ast; + if (headers_it != args.end()) + { + headers_ast = *headers_it; + args.erase(headers_it); + } + + if (args.empty() || args.size() > getMaxNumberOfArguments()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), args.size()); + + auto structure_literal = std::make_shared(structure); + + /// s3(s3_url) + if (args.size() == 1) + { + /// Add format=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// s3(s3_url, format) or s3(s3_url, NOSIGN) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN or not. + else if (args.size() == 2) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + /// If there is NOSIGN, add format=auto before structure. + if (boost::iequals(second_arg, "NOSIGN")) + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + /// s3(source, format, structure) or + /// s3(source, access_key_id, access_key_id) or + /// s3(source, NOSIGN, format) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. + else if (args.size() == 3) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + { + args.push_back(structure_literal); + } + else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + { + args.back() = structure_literal; + } + else + { + /// Add format=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } + } + /// s3(source, format, structure, compression_method) or + /// s3(source, access_key_id, access_key_id, format) or + /// s3(source, NOSIGN, format, structure) + /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. + else if (args.size() == 4) + { + auto second_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(second_arg, "NOSIGN")) + { + args.back() = structure_literal; + } + else if (second_arg == "auto" || FormatFactory::instance().getAllFormats().contains(second_arg)) + { + args[args.size() - 2] = structure_literal; + } + else + { + args.push_back(structure_literal); + } + } + /// s3(source, access_key_id, access_key_id, format, structure) or + /// s3(source, NOSIGN, format, structure, compression_method) + /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not. + else if (args.size() == 5) + { + auto sedond_arg = checkAndGetLiteralArgument(args[1], "format/NOSIGN"); + if (boost::iequals(sedond_arg, "NOSIGN")) + { + args[args.size() - 2] = structure_literal; + } + else + { + args.back() = structure_literal; + } + } + /// s3(source, access_key_id, access_key_id, format, structure, compression) + else if (args.size() == 6) + { + args[args.size() - 2] = structure_literal; + } + + if (headers_ast) + args.push_back(headers_ast); + } } ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context) const @@ -286,6 +392,7 @@ void registerTableFunctionS3(TableFunctionFactory & factory) .allow_readonly = false}); } + void registerTableFunctionCOS(TableFunctionFactory & factory) { factory.registerFunction(); diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index 4d2cbc49d76..c983bec9bf4 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -13,7 +13,7 @@ namespace DB class Context; -/* s3(source, [access_key_id, secret_access_key,] format, structure[, compression]) - creates a temporary storage for a file in S3. +/* s3(source, [access_key_id, secret_access_key,] [format, structure, compression]) - creates a temporary storage for a file in S3. */ class TableFunctionS3 : public ITableFunction { @@ -26,11 +26,21 @@ public: " - url, format, structure, compression_method\n" " - url, access_key_id, secret_access_key, format\n" " - url, access_key_id, secret_access_key, format, structure\n" - " - url, access_key_id, secret_access_key, format, structure, compression_method"; - std::string getName() const override + " - url, access_key_id, secret_access_key, format, structure, compression_method\n" + "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; + + static size_t getMaxNumberOfArguments() { return 6; } + + String getName() const override { return name; } + + virtual String getSignature() const + { + return signature; + } + bool hasStaticStructure() const override { return configuration.structure != "auto"; } bool needStructureHint() const override { return configuration.structure == "auto"; } @@ -44,18 +54,9 @@ public: return {"_path", "_file"}; } - struct ArgumentParseResult - { - bool has_format_argument = false; - bool has_structure_argument = false; - }; + virtual void parseArgumentsImpl(ASTs & args, const ContextPtr & context); - static ArgumentParseResult parseArgumentsImpl( - const String & error_message, - ASTs & args, - ContextPtr context, - StorageS3::Configuration & configuration, - bool get_format_from_file = true); + static void addColumnsStructureToArguments(ASTs & args, const String & structure, const ContextPtr & context); protected: diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index a456994054f..1d93132c411 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -2,100 +2,28 @@ #if USE_AWS_S3 -#include -#include - -#include -#include -#include -#include -#include -#include -#include #include +#include #include -#include -#include -#include -#include +#include #include "registerTableFunctions.h" #include -#include namespace DB { -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_GET; -} - - -void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, ContextPtr context) -{ - /// Parse args - ASTs & args_func = ast_function->children; - - if (args_func.size() != 1) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); - - ASTs & args = args_func.at(0)->children; - - constexpr auto fmt_string = "The signature of table function {} could be the following:\n" - " - cluster, url\n" - " - cluster, url, format\n" - " - cluster, url, format, structure\n" - " - cluster, url, access_key_id, secret_access_key\n" - " - cluster, url, format, structure, compression_method\n" - " - cluster, url, access_key_id, secret_access_key, format\n" - " - cluster, url, access_key_id, secret_access_key, format, structure\n" - " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method"; - auto message = PreformattedMessage{fmt::format(fmt_string, getName()), fmt_string}; - if (args.size() < 2 || args.size() > 7) - throw Exception::createDeprecated(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - /// evaluate only first argument, everything else will be done TableFunctionS3 - args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context); - - /// Cluster name is always the first - configuration.cluster_name = checkAndGetLiteralArgument(args[0], "cluster_name"); - - if (!context->tryGetCluster(configuration.cluster_name)) - throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", configuration.cluster_name); - - /// Just cut the first arg (cluster_name) and try to parse s3 table function arguments as is - ASTs clipped_args; - clipped_args.reserve(args.size() - 1); - std::copy(args.begin() + 1, args.end(), std::back_inserter(clipped_args)); - - /// StorageS3ClusterConfiguration inherints from StorageS3::Configuration, so it is safe to upcast it. - argument_parse_result = TableFunctionS3::parseArgumentsImpl(message.text, clipped_args, context, static_cast(configuration)); -} - - -ColumnsDescription TableFunctionS3Cluster::getActualTableStructure(ContextPtr context) const -{ - context->checkAccess(getSourceAccessType()); - - configuration.update(context); - if (configuration.structure == "auto") - return StorageS3::getTableStructureFromData(configuration, std::nullopt, context); - - return parseColumnsListFromString(configuration.structure, context); -} - StoragePtr TableFunctionS3Cluster::executeImpl( const ASTPtr & /*function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const { StoragePtr storage; ColumnsDescription columns; + bool structure_argument_was_provided = configuration.structure != "auto"; - if (argument_parse_result.has_structure_argument) + if (structure_argument_was_provided) { columns = parseColumnsListFromString(configuration.structure, context); } @@ -120,13 +48,13 @@ StoragePtr TableFunctionS3Cluster::executeImpl( else { storage = std::make_shared( + cluster_name, configuration, StorageID(getDatabaseName(), table_name), columns, ConstraintsDescription{}, context, - argument_parse_result.has_structure_argument, - argument_parse_result.has_format_argument); + structure_argument_was_provided); } storage->startup(); diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h index 7ca85bb4ea2..459ff144f02 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.h +++ b/src/TableFunctions/TableFunctionS3Cluster.h @@ -6,6 +6,7 @@ #include #include +#include #include @@ -15,27 +16,36 @@ namespace DB class Context; /** - * s3cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure) + * s3cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure, compression_method) * A table function, which allows to process many files from S3 on a specific cluster * On initiator it creates a connection to _all_ nodes in cluster, discloses asterisks * in S3 file path and dispatch each file dynamically. * On worker node it asks initiator about next task to process, processes it. * This is repeated until the tasks are finished. */ -class TableFunctionS3Cluster : public ITableFunction +class TableFunctionS3Cluster : public ITableFunctionCluster { public: static constexpr auto name = "s3Cluster"; - std::string getName() const override + static constexpr auto signature = " - cluster, url\n" + " - cluster, url, format\n" + " - cluster, url, format, structure\n" + " - cluster, url, access_key_id, secret_access_key\n" + " - cluster, url, format, structure, compression_method\n" + " - cluster, url, access_key_id, secret_access_key, format\n" + " - cluster, url, access_key_id, secret_access_key, format, structure\n" + " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method\n" + "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; + + String getName() const override { return name; } - bool hasStaticStructure() const override { return configuration.structure != "auto"; } - - bool needStructureHint() const override { return configuration.structure == "auto"; } - - void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } + String getSignature() const override + { + return signature; + } protected: StoragePtr executeImpl( @@ -45,15 +55,6 @@ protected: ColumnsDescription cached_columns) const override; const char * getStorageTypeName() const override { return "S3Cluster"; } - - AccessType getSourceAccessType() const override { return AccessType::S3; } - - ColumnsDescription getActualTableStructure(ContextPtr) const override; - void parseArguments(const ASTPtr &, ContextPtr) override; - - mutable StorageS3Cluster::Configuration configuration; - ColumnsDescription structure_hint; - TableFunctionS3::ArgumentParseResult argument_parse_result; }; } diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index d8c3e72efe1..4ed204a2af3 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -12,21 +12,12 @@ #include #include #include -#include #include - +#include +#include namespace DB { -static const String bad_arguments_error_message = "Table function URL can have the following arguments: " - "url, name of used format (taken from file extension by default), " - "optional table structure, optional compression method, " - "optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} std::vector TableFunctionURL::skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr) const { @@ -48,15 +39,13 @@ std::vector TableFunctionURL::skipAnalysisForArguments(const QueryTreeNo void TableFunctionURL::parseArguments(const ASTPtr & ast, ContextPtr context) { - const auto & ast_function = assert_cast(ast.get()); + /// Clone ast function, because we can modify it's arguments like removing headers. + ITableFunctionFileLike::parseArguments(ast->clone(), context); +} - const auto & args = ast_function->children; - if (args.empty()) - throw Exception::createDeprecated(bad_arguments_error_message, ErrorCodes::BAD_ARGUMENTS); - - auto & url_function_args = assert_cast(args[0].get())->children; - - if (auto named_collection = tryGetNamedCollectionWithOverrides(url_function_args, context)) +void TableFunctionURL::parseArgumentsImpl(ASTs & args, const ContextPtr & context) +{ + if (auto named_collection = tryGetNamedCollectionWithOverrides(args, context)) { StorageURL::processNamedCollectionResult(configuration, *named_collection); @@ -68,16 +57,46 @@ void TableFunctionURL::parseArguments(const ASTPtr & ast, ContextPtr context) if (format == "auto") format = FormatFactory::instance().getFormatFromFileName(Poco::URI(filename).getPath(), true); - StorageURL::collectHeaders(url_function_args, configuration.headers, context); + StorageURL::collectHeaders(args, configuration.headers, context); } else { - auto * headers_it = StorageURL::collectHeaders(url_function_args, configuration.headers, context); + auto * headers_it = StorageURL::collectHeaders(args, configuration.headers, context); /// ITableFunctionFileLike cannot parse headers argument, so remove it. - if (headers_it != url_function_args.end()) - url_function_args.erase(headers_it); + if (headers_it != args.end()) + args.erase(headers_it); - ITableFunctionFileLike::parseArguments(ast, context); + ITableFunctionFileLike::parseArgumentsImpl(args, context); + } +} + +void TableFunctionURL::addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context) +{ + if (tryGetNamedCollectionWithOverrides(args, context)) + { + /// In case of named collection, just add key-value pair "structure='...'" + /// at the end of arguments to override existed structure. + ASTs equal_func_args = {std::make_shared("structure"), std::make_shared(desired_structure)}; + auto equal_func = makeASTFunction("equals", std::move(equal_func_args)); + args.push_back(equal_func); + } + else + { + /// If arguments contain headers, just remove it and add to the end of arguments later + /// (header argument can be at any position). + HTTPHeaderEntries tmp_headers; + auto * headers_it = StorageURL::collectHeaders(args, tmp_headers, context); + ASTPtr headers_ast; + if (headers_it != args.end()) + { + headers_ast = *headers_it; + args.erase(headers_it); + } + + ITableFunctionFileLike::addColumnsStructureToArguments(args, desired_structure, context); + + if (headers_ast) + args.push_back(headers_ast); } } diff --git a/src/TableFunctions/TableFunctionURL.h b/src/TableFunctions/TableFunctionURL.h index dca5123fb69..021eb71df53 100644 --- a/src/TableFunctions/TableFunctionURL.h +++ b/src/TableFunctions/TableFunctionURL.h @@ -10,24 +10,41 @@ namespace DB class Context; -/* url(source, format[, structure, compression]) - creates a temporary storage from url. +/* url(source, [format, structure, compression]) - creates a temporary storage from url. */ -class TableFunctionURL final: public ITableFunctionFileLike +class TableFunctionURL : public ITableFunctionFileLike { public: static constexpr auto name = "url"; - std::string getName() const override + static constexpr auto signature = " - uri\n" + " - uri, format\n" + " - uri, format, structure\n" + " - uri, format, structure, compression_method\n" + "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; + + String getName() const override { return name; } + String getSignature() const override + { + return signature; + } + ColumnsDescription getActualTableStructure(ContextPtr context) const override; + static void addColumnsStructureToArguments(ASTs & args, const String & desired_structure, const ContextPtr & context); + +protected: + void parseArguments(const ASTPtr & ast, ContextPtr context) override; + void parseArgumentsImpl(ASTs & args, const ContextPtr & context) override; + + StorageURL::Configuration configuration; + private: std::vector skipAnalysisForArguments(const QueryTreeNodePtr & query_node_table_function, ContextPtr context) const override; - void parseArguments(const ASTPtr & ast, ContextPtr context) override; - StoragePtr getStorage( const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, const std::string & table_name, const String & compression_method_) const override; @@ -36,7 +53,6 @@ private: String getFormatFromFirstArgument() override; - StorageURL::Configuration configuration; }; } diff --git a/src/TableFunctions/TableFunctionURLCluster.cpp b/src/TableFunctions/TableFunctionURLCluster.cpp new file mode 100644 index 00000000000..c94943db758 --- /dev/null +++ b/src/TableFunctions/TableFunctionURLCluster.cpp @@ -0,0 +1,54 @@ +#include +#include + +#include "registerTableFunctions.h" + +namespace DB +{ + +StoragePtr TableFunctionURLCluster::getStorage( + const String & /*source*/, const String & /*format_*/, const ColumnsDescription & columns, ContextPtr context, + const std::string & table_name, const String & /*compression_method_*/) const +{ + StoragePtr storage; + if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) + { + //On worker node this uri won't contain globs + storage = std::make_shared( + filename, + StorageID(getDatabaseName(), table_name), + format, + std::nullopt /*format settings*/, + columns, + ConstraintsDescription{}, + String{}, + context, + compression_method, + configuration.headers, + configuration.http_method, + nullptr, + /*distributed_processing=*/ true); + } + else + { + storage = std::make_shared( + context, + cluster_name, + filename, + format, + compression_method, + StorageID(getDatabaseName(), table_name), + getActualTableStructure(context), + ConstraintsDescription{}, + configuration, + structure != "auto"); + } + return storage; +} + +void registerTableFunctionURLCluster(TableFunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/TableFunctions/TableFunctionURLCluster.h b/src/TableFunctions/TableFunctionURLCluster.h new file mode 100644 index 00000000000..be6992fcaaf --- /dev/null +++ b/src/TableFunctions/TableFunctionURLCluster.h @@ -0,0 +1,52 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +class Context; + +/** + * urlCluster(cluster, URI, format, structure, compression_method) + * A table function, which allows to process many files from url on a specific cluster + * On initiator it creates a connection to _all_ nodes in cluster, discloses asterics + * in url file path and dispatch each file dynamically. + * On worker node it asks initiator about next task to process, processes it. + * This is repeated until the tasks are finished. + */ +class TableFunctionURLCluster : public ITableFunctionCluster +{ +public: + static constexpr auto name = "urlCluster"; + static constexpr auto signature = " - cluster, uri\n" + " - cluster, uri, format\n" + " - cluster, uri, format, structure\n" + " - cluster, uri, format, structure, compression_method\n" + "All signatures supports optional headers (specified as `headers('name'='value', 'name2'='value2')`)"; + + String getName() const override + { + return name; + } + + String getSignature() const override + { + return signature; + } + +protected: + StoragePtr getStorage( + const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, + const std::string & table_name, const String & compression_method_) const override; + + const char * getStorageTypeName() const override { return "URLCluster"; } +}; + +} diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index d118723110f..4f3411df4c5 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -16,6 +16,7 @@ void registerTableFunctions() registerTableFunctionExecutable(factory); registerTableFunctionFile(factory); registerTableFunctionURL(factory); + registerTableFunctionURLCluster(factory); registerTableFunctionValues(factory); registerTableFunctionInput(factory); registerTableFunctionGenerate(factory); diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 5d2c0bd0763..c51522a5e99 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -13,6 +13,7 @@ void registerTableFunctionZeros(TableFunctionFactory & factory); void registerTableFunctionExecutable(TableFunctionFactory & factory); void registerTableFunctionFile(TableFunctionFactory & factory); void registerTableFunctionURL(TableFunctionFactory & factory); +void registerTableFunctionURLCluster(TableFunctionFactory & factory); void registerTableFunctionValues(TableFunctionFactory & factory); void registerTableFunctionInput(TableFunctionFactory & factory); void registerTableFunctionGenerate(TableFunctionFactory & factory); diff --git a/tests/broken_tests.txt b/tests/broken_tests.txt index e61c1316e17..faee1c5b295 100644 --- a/tests/broken_tests.txt +++ b/tests/broken_tests.txt @@ -137,3 +137,8 @@ 01600_parts_types_metrics_long 01287_max_execution_speed 02703_row_policy_for_database +02721_url_cluster +02534_s3_cluster_insert_select_schema_inference +02764_parallel_replicas_plain_merge_tree +02765_parallel_replicas_final_modifier + diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 13257eabb71..82c04ce82c5 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -73,6 +73,7 @@ def get_failed_report( sanitizer="unknown", status=message, elapsed_seconds=0, + comment="", ) return [build_result], [[""]], [GITHUB_RUN_URL] @@ -87,6 +88,7 @@ def process_report( sanitizer=build_config["sanitizer"], status="success" if build_report["status"] else "failure", elapsed_seconds=build_report["elapsed_seconds"], + comment=build_config["comment"], ) build_results = [] build_urls = [] diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index fd783192ef1..2fa562a1386 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -45,8 +45,10 @@ from ssh import SSHKey class Labels: MUST_BACKPORT = "pr-must-backport" + MUST_BACKPORT_CLOUD = "pr-must-backport-cloud" BACKPORT = "pr-backport" BACKPORTS_CREATED = "pr-backports-created" + BACKPORTS_CREATED_CLOUD = "pr-backports-created-cloud" CHERRYPICK = "pr-cherrypick" DO_NOT_TEST = "do not test" @@ -68,9 +70,9 @@ This pull-request will be merged automatically as it reaches the mergeable state ### If the PR was closed and then reopened -If it stuck, check #{pr_number} for `{label_backports_created}` and delete it if \ +If it stuck, check {pr_url} for `{label_backports_created}` and delete it if \ necessary. Manually merging will do nothing, since `{label_backports_created}` \ -prevents the original PR #{pr_number} from being processed. +prevents the original PR {pr_url} from being processed. """ BACKPORT_DESCRIPTION = """This pull-request is a last step of an automated \ backporting. @@ -80,14 +82,17 @@ close it. """ REMOTE = "" - def __init__(self, name: str, pr: PullRequest): + def __init__(self, name: str, pr: PullRequest, repo: Repository): self.name = name self.pr = pr + self.repo = repo + self.cherrypick_branch = f"cherrypick/{name}/{pr.merge_commit_sha}" self.backport_branch = f"backport/{name}/{pr.number}" self.cherrypick_pr = None # type: Optional[PullRequest] self.backport_pr = None # type: Optional[PullRequest] - self._backported = None # type: Optional[bool] + self._backported = False + self.git_prefix = ( # All commits to cherrypick are done as robot-clickhouse "git -c user.email=robot-clickhouse@users.noreply.github.com " "-c user.name=robot-clickhouse -c commit.gpgsign=false" @@ -188,7 +193,7 @@ close it. f"{self.cherrypick_branch} {self.pr.merge_commit_sha}" ) - # Check if there actually any changes between branches. If no, then no + # Check if there are actually any changes between branches. If no, then no # other actions are required. It's possible when changes are backported # manually to the release branch already try: @@ -216,10 +221,11 @@ close it. for branch in [self.cherrypick_branch, self.backport_branch]: git_runner(f"{self.git_prefix} push -f {self.REMOTE} {branch}:{branch}") - self.cherrypick_pr = self.pr.base.repo.create_pull( + self.cherrypick_pr = self.repo.create_pull( title=f"Cherry pick #{self.pr.number} to {self.name}: {self.pr.title}", body=self.CHERRYPICK_DESCRIPTION.format( pr_number=self.pr.number, + pr_url=self.pr.html_url, label_backports_created=Labels.BACKPORTS_CREATED, ), base=self.backport_branch, @@ -253,9 +259,9 @@ close it. f"{self.git_prefix} push -f {self.REMOTE} " f"{self.backport_branch}:{self.backport_branch}" ) - self.backport_pr = self.pr.base.repo.create_pull( + self.backport_pr = self.repo.create_pull( title=title, - body=f"Original pull-request #{self.pr.number}\n" + body=f"Original pull-request {self.pr.html_url}\n" f"Cherry-pick pull-request #{self.cherrypick_pr.number}\n\n" f"{self.BACKPORT_DESCRIPTION}", base=self.name, @@ -314,22 +320,33 @@ close it. @property def backported(self) -> bool: - if self._backported is not None: - return self._backported - return self.backport_pr is not None + return self._backported or self.backport_pr is not None def __repr__(self): return self.name class Backport: - def __init__(self, gh: GitHub, repo: str, dry_run: bool): + def __init__( + self, + gh: GitHub, + repo: str, + fetch_from: Optional[str], + dry_run: bool, + must_create_backport_label: str, + backport_created_label: str, + ): self.gh = gh self._repo_name = repo + self._fetch_from = fetch_from self.dry_run = dry_run - self._query = f"type:pr repo:{repo}" + self.must_create_backport_label = must_create_backport_label + self.backport_created_label = backport_created_label + self._remote = "" + self._remote_line = "" + self._repo = None # type: Optional[Repository] self.release_prs = [] # type: PullRequests self.release_branches = [] # type: List[str] @@ -338,25 +355,38 @@ class Backport: self.error = None # type: Optional[Exception] @property - def remote(self) -> str: - if not self._remote: + def remote_line(self) -> str: + if not self._remote_line: # lines of "origin git@github.com:ClickHouse/ClickHouse.git (fetch)" remotes = git_runner("git remote -v").split("\n") # We need the first word from the first matching result - self._remote = tuple( - remote.split(maxsplit=1)[0] - for remote in remotes - if f"github.com/{self._repo_name}" in remote # https - or f"github.com:{self._repo_name}" in remote # ssh - )[0] + self._remote_line = next( + iter( + remote + for remote in remotes + if f"github.com/{self._repo_name}" in remote # https + or f"github.com:{self._repo_name}" in remote # ssh + ) + ) + + return self._remote_line + + @property + def remote(self) -> str: + if not self._remote: + self._remote = self.remote_line.split(maxsplit=1)[0] git_runner(f"git fetch {self._remote}") ReleaseBranch.REMOTE = self._remote return self._remote + @property + def is_remote_ssh(self) -> bool: + return "github.com:" in self.remote_line + def receive_release_prs(self): logging.info("Getting release PRs") self.release_prs = self.gh.get_pulls_from_search( - query=f"{self._query} is:open", + query=f"type:pr repo:{self._repo_name} is:open", sort="created", order="asc", label="release", @@ -365,6 +395,14 @@ class Backport: self.labels_to_backport = [ f"v{branch}-must-backport" for branch in self.release_branches ] + + if self._fetch_from: + logging.info("Fetching from %s", self._fetch_from) + fetch_from_repo = self.gh.get_repo(self._fetch_from) + git_runner( + f"git fetch {fetch_from_repo.ssh_url if self.is_remote_ssh else fetch_from_repo.clone_url} {fetch_from_repo.default_branch} --no-tags" + ) + logging.info("Active releases: %s", ", ".join(self.release_branches)) def update_local_release_branches(self): @@ -396,9 +434,10 @@ class Backport: # To not have a possible TZ issues tomorrow = date.today() + timedelta(days=1) logging.info("Receive PRs suppose to be backported") + self.prs_for_backport = self.gh.get_pulls_from_search( - query=f"{self._query} -label:{Labels.BACKPORTS_CREATED}", - label=",".join(self.labels_to_backport + [Labels.MUST_BACKPORT]), + query=f"type:pr repo:{self._fetch_from} -label:{self.backport_created_label}", + label=",".join(self.labels_to_backport + [self.must_create_backport_label]), merged=[since_date, tomorrow], ) logging.info( @@ -418,13 +457,13 @@ class Backport: def process_pr(self, pr: PullRequest) -> None: pr_labels = [label.name for label in pr.labels] - if Labels.MUST_BACKPORT in pr_labels: + if self.must_create_backport_label in pr_labels: branches = [ - ReleaseBranch(br, pr) for br in self.release_branches + ReleaseBranch(br, pr, self.repo) for br in self.release_branches ] # type: List[ReleaseBranch] else: branches = [ - ReleaseBranch(br, pr) + ReleaseBranch(br, pr, self.repo) for br in [ label.split("-", 1)[0][1:] # v21.8-must-backport for label in pr_labels @@ -452,14 +491,14 @@ class Backport: ] ) bp_cp_prs = self.gh.get_pulls_from_search( - query=f"{self._query} {query_suffix}", + query=f"type:pr repo:{self._repo_name} {query_suffix}", ) for br in branches: br.pop_prs(bp_cp_prs) if bp_cp_prs: # This is definitely some error. All prs must be consumed by - # branches with ReleaseBranch.pop_prs. It also make the whole + # branches with ReleaseBranch.pop_prs. It also makes the whole # program exit code non-zero self.error = Exception( "The following PRs are not filtered by release branches:\n" @@ -483,22 +522,17 @@ class Backport: if self.dry_run: logging.info("DRY RUN: would mark PR #%s as done", pr.number) return - pr.add_to_labels(Labels.BACKPORTS_CREATED) + pr.add_to_labels(self.backport_created_label) logging.info( "PR #%s is successfully labeled with `%s`", pr.number, - Labels.BACKPORTS_CREATED, + self.backport_created_label, ) @property def repo(self) -> Repository: if self._repo is None: - try: - self._repo = self.release_prs[0].base.repo - except IndexError as exc: - raise Exception( - "`repo` is available only after the `receive_release_prs`" - ) from exc + self._repo = self.gh.get_repo(self._repo_name) return self._repo @property @@ -512,7 +546,27 @@ def parse_args(): parser.add_argument( "--repo", default="ClickHouse/ClickHouse", help="repo owner/name" ) + parser.add_argument( + "--from-repo", + default="ClickHouse/ClickHouse", + help="if set, the commits will be taken from this repo, but PRs will be created in the main repo", + ) parser.add_argument("--dry-run", action="store_true", help="do not create anything") + + parser.add_argument( + "--must-create-backport-label", + default=Labels.MUST_BACKPORT, + choices=(Labels.MUST_BACKPORT, Labels.MUST_BACKPORT_CLOUD), + help="label to filter PRs to backport", + ) + + parser.add_argument( + "--backport-created-label", + default=Labels.BACKPORTS_CREATED, + choices=(Labels.BACKPORTS_CREATED, Labels.BACKPORTS_CREATED_CLOUD), + help="label to mark PRs as backported", + ) + parser.add_argument( "--debug-helpers", action="store_true", @@ -564,7 +618,14 @@ def main(): token = args.token or get_best_robot_token() gh = GitHub(token, create_cache_dir=False) - bp = Backport(gh, args.repo, args.dry_run) + bp = Backport( + gh, + args.repo, + args.from_repo, + args.dry_run, + args.must_create_backport_label, + args.backport_created_label, + ) # https://github.com/python/mypy/issues/3004 bp.gh.cache_path = f"{TEMP_PATH}/gh_cache" # type: ignore bp.receive_release_prs() @@ -577,7 +638,7 @@ def main(): if __name__ == "__main__": - logging.basicConfig(level=logging.INFO) + logging.getLogger().setLevel(level=logging.INFO) assert not is_shallow() with stash(): diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 5bba85569b4..d829115cfe1 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -17,6 +17,7 @@ CI_CONFIG = { "additional_pkgs": True, "tidy": "disable", "with_coverage": False, + "comment": "", }, "coverity": { "compiler": "clang-16", @@ -26,6 +27,7 @@ CI_CONFIG = { "tidy": "disable", "with_coverage": False, "official": False, + "comment": "A special build for coverity", }, "package_aarch64": { "compiler": "clang-16-aarch64", @@ -36,6 +38,7 @@ CI_CONFIG = { "additional_pkgs": True, "tidy": "disable", "with_coverage": False, + "comment": "", }, "package_asan": { "compiler": "clang-16", @@ -44,6 +47,7 @@ CI_CONFIG = { "package_type": "deb", "tidy": "disable", "with_coverage": False, + "comment": "", }, "package_ubsan": { "compiler": "clang-16", @@ -52,6 +56,7 @@ CI_CONFIG = { "package_type": "deb", "tidy": "disable", "with_coverage": False, + "comment": "", }, "package_tsan": { "compiler": "clang-16", @@ -60,6 +65,7 @@ CI_CONFIG = { "package_type": "deb", "tidy": "disable", "with_coverage": False, + "comment": "", }, "package_msan": { "compiler": "clang-16", @@ -68,6 +74,7 @@ CI_CONFIG = { "package_type": "deb", "tidy": "disable", "with_coverage": False, + "comment": "", }, "package_debug": { "compiler": "clang-16", @@ -76,6 +83,7 @@ CI_CONFIG = { "package_type": "deb", "tidy": "disable", "with_coverage": False, + "comment": "Note: sparse checkout was used", }, "binary_release": { "compiler": "clang-16", @@ -84,6 +92,7 @@ CI_CONFIG = { "package_type": "binary", "tidy": "disable", "with_coverage": False, + "comment": "", }, "binary_tidy": { "compiler": "clang-16", @@ -93,6 +102,7 @@ CI_CONFIG = { "static_binary_name": "debug-amd64", "tidy": "enable", "with_coverage": False, + "comment": "clang-tidy is used for static analysis", }, "binary_darwin": { "compiler": "clang-16-darwin", @@ -102,6 +112,7 @@ CI_CONFIG = { "static_binary_name": "macos", "tidy": "disable", "with_coverage": False, + "comment": "", }, "binary_aarch64": { "compiler": "clang-16-aarch64", @@ -110,6 +121,7 @@ CI_CONFIG = { "package_type": "binary", "tidy": "disable", "with_coverage": False, + "comment": "", }, "binary_aarch64_v80compat": { "compiler": "clang-16-aarch64-v80compat", @@ -119,6 +131,7 @@ CI_CONFIG = { "static_binary_name": "aarch64v80compat", "tidy": "disable", "with_coverage": False, + "comment": "For ARMv8.1 and older", }, "binary_freebsd": { "compiler": "clang-16-freebsd", @@ -128,6 +141,7 @@ CI_CONFIG = { "static_binary_name": "freebsd", "tidy": "disable", "with_coverage": False, + "comment": "", }, "binary_darwin_aarch64": { "compiler": "clang-16-darwin-aarch64", @@ -137,6 +151,7 @@ CI_CONFIG = { "static_binary_name": "macos-aarch64", "tidy": "disable", "with_coverage": False, + "comment": "", }, "binary_ppc64le": { "compiler": "clang-16-ppc64le", @@ -146,6 +161,7 @@ CI_CONFIG = { "static_binary_name": "powerpc64le", "tidy": "disable", "with_coverage": False, + "comment": "", }, "binary_amd64_compat": { "compiler": "clang-16-amd64-compat", @@ -155,6 +171,7 @@ CI_CONFIG = { "static_binary_name": "amd64compat", "tidy": "disable", "with_coverage": False, + "comment": "SSE2-only build", }, }, "builds_report_config": { diff --git a/tests/ci/codebrowser_check.py b/tests/ci/codebrowser_check.py index f9883f0f975..2dba5176c8b 100644 --- a/tests/ci/codebrowser_check.py +++ b/tests/ci/codebrowser_check.py @@ -59,6 +59,9 @@ def main(): os.makedirs(temp_path) docker_image = get_image_with_version(IMAGES_PATH, "clickhouse/codebrowser") + # FIXME: the codebrowser is broken with clang-16, workaround with clang-15 + # See https://github.com/ClickHouse/ClickHouse/issues/50077 + docker_image.version = "49701-4dcdcf4c11b5604f1c5d3121c9c6fea3e957b605" s3_helper = S3Helper() result_path = temp_path / "result_path" diff --git a/tests/ci/report.py b/tests/ci/report.py index 89422298083..a9014acec12 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -243,6 +243,7 @@ class BuildResult: sanitizer: str status: str elapsed_seconds: int + comment: str BuildResults = List[BuildResult] @@ -452,6 +453,7 @@ tr:hover td {{filter: brightness(95%);}} Build log Build time Artifacts +Comment {rows} @@ -519,6 +521,8 @@ def create_build_html_report( links = links[: -len(link_separator)] row += f"{links}" + row += f"{build_result.comment}" + row += "" rows += row return HTML_BASE_BUILD_TEMPLATE.format( diff --git a/tests/config/config.d/database_replicated.xml b/tests/config/config.d/database_replicated.xml index 3fea87c4fd1..9a405f85908 100644 --- a/tests/config/config.d/database_replicated.xml +++ b/tests/config/config.d/database_replicated.xml @@ -14,6 +14,24 @@ + + + + localhost + 9181 + + + localhost + 19181 + + + localhost + 29181 + + /test/chroot/auxiliary_zookeeper2 + + + 9181 1 diff --git a/tests/config/config.d/merge_tree.xml b/tests/config/config.d/merge_tree.xml index 43bdb6aa07b..bf2da9b09a2 100644 --- a/tests/config/config.d/merge_tree.xml +++ b/tests/config/config.d/merge_tree.xml @@ -1,5 +1,6 @@ + 1 8 diff --git a/tests/config/config.d/zookeeper.xml b/tests/config/config.d/zookeeper.xml index 63057224ef9..75b4a00fe67 100644 --- a/tests/config/config.d/zookeeper.xml +++ b/tests/config/config.d/zookeeper.xml @@ -7,4 +7,13 @@ 9181 + + + + localhost + 9181 + + /test/chroot/auxiliary_zookeeper2 + + diff --git a/tests/integration/test_s3_cluster/configs/named_collections.xml b/tests/integration/test_s3_cluster/configs/named_collections.xml new file mode 100644 index 00000000000..64d1bd98df2 --- /dev/null +++ b/tests/integration/test_s3_cluster/configs/named_collections.xml @@ -0,0 +1,10 @@ + + + + http://minio1:9001/root/data/{clickhouse,database}/* + minio + minio123 + CSV> + + + diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py index 69e36dbf9b4..41f19cdd12d 100644 --- a/tests/integration/test_s3_cluster/test.py +++ b/tests/integration/test_s3_cluster/test.py @@ -67,20 +67,20 @@ def started_cluster(): cluster = ClickHouseCluster(__file__) cluster.add_instance( "s0_0_0", - main_configs=["configs/cluster.xml"], + main_configs=["configs/cluster.xml", "configs/named_collections.xml"], macros={"replica": "node1", "shard": "shard1"}, with_minio=True, with_zookeeper=True, ) cluster.add_instance( "s0_0_1", - main_configs=["configs/cluster.xml"], + main_configs=["configs/cluster.xml", "configs/named_collections.xml"], macros={"replica": "replica2", "shard": "shard1"}, with_zookeeper=True, ) cluster.add_instance( "s0_1_0", - main_configs=["configs/cluster.xml"], + main_configs=["configs/cluster.xml", "configs/named_collections.xml"], macros={"replica": "replica1", "shard": "shard2"}, with_zookeeper=True, ) @@ -406,3 +406,21 @@ def test_cluster_with_header(started_cluster): ) == "SomeValue\n" ) + + +def test_cluster_with_named_collection(started_cluster): + node = started_cluster.instances["s0_0_0"] + + pure_s3 = node.query("""SELECT * from s3(test_s3) ORDER BY (c1, c2, c3)""") + + s3_cluster = node.query( + """SELECT * from s3Cluster(cluster_simple, test_s3) ORDER BY (c1, c2, c3)""" + ) + + assert TSV(pure_s3) == TSV(s3_cluster) + + s3_cluster = node.query( + """SELECT * from s3Cluster(cluster_simple, test_s3, structure='auto') ORDER BY (c1, c2, c3)""" + ) + + assert TSV(pure_s3) == TSV(s3_cluster) diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 696a962eefe..0a3a01101d5 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -62,6 +62,16 @@ def test_postgres_select_insert(started_cluster): # for i in range(1, 1000): # assert (node1.query(check1)).rstrip() == '10000', f"Failed on {i}" + result = node1.query( + f""" + INSERT INTO TABLE FUNCTION {table} + SELECT number, concat('name_', toString(number)), 3 from numbers(1000000)""" + ) + check1 = f"SELECT count() FROM {table}" + check2 = f"SELECT count() FROM (SELECT * FROM {table} LIMIT 10)" + assert (node1.query(check1)).rstrip() == "1010000" + assert (node1.query(check2)).rstrip() == "10" + cursor.execute(f"DROP TABLE {table_name} ") diff --git a/tests/integration/test_storage_url/configs/named_collections.xml b/tests/integration/test_storage_url/configs/named_collections.xml new file mode 100644 index 00000000000..e8b0aa91f53 --- /dev/null +++ b/tests/integration/test_storage_url/configs/named_collections.xml @@ -0,0 +1,8 @@ + + + + http://nginx:80/test_1 + TSV> + + + diff --git a/tests/integration/test_storage_url/test.py b/tests/integration/test_storage_url/test.py index f3ed087d3d0..f360ec105ec 100644 --- a/tests/integration/test_storage_url/test.py +++ b/tests/integration/test_storage_url/test.py @@ -4,7 +4,9 @@ from helpers.test_tools import TSV cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance( - "node1", main_configs=["configs/conf.xml"], with_nginx=True + "node1", + main_configs=["configs/conf.xml", "configs/named_collections.xml"], + with_nginx=True, ) @@ -35,6 +37,33 @@ def test_partition_by(): assert result.strip() == "1\t2\t3" +def test_url_cluster(): + result = node1.query( + f"select * from urlCluster('test_cluster_two_shards', 'http://nginx:80/test_1', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')" + ) + assert result.strip() == "3\t2\t1" + result = node1.query( + f"select * from urlCluster('test_cluster_two_shards', 'http://nginx:80/test_2', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')" + ) + assert result.strip() == "1\t3\t2" + result = node1.query( + f"select * from urlCluster('test_cluster_two_shards', 'http://nginx:80/test_3', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32')" + ) + assert result.strip() == "1\t2\t3" + + +def test_url_cluster_with_named_collection(): + result = node1.query( + f"select * from urlCluster(test_cluster_one_shard_three_replicas_localhost, test_url)" + ) + assert result.strip() == "3\t2\t1" + + result = node1.query( + f"select * from urlCluster(test_cluster_one_shard_three_replicas_localhost, test_url, structure='auto')" + ) + assert result.strip() == "3\t2\t1" + + def test_table_function_url_access_rights(): node1.query("CREATE USER OR REPLACE u1") diff --git a/tests/queries/0_stateless/00754_alter_modify_order_by.sql b/tests/queries/0_stateless/00754_alter_modify_order_by.sql index 234bd61902b..9c7eee74c8c 100644 --- a/tests/queries/0_stateless/00754_alter_modify_order_by.sql +++ b/tests/queries/0_stateless/00754_alter_modify_order_by.sql @@ -1,6 +1,12 @@ SET send_logs_level = 'fatal'; SET optimize_on_insert = 0; +DROP TABLE IF EXISTS no_order; +CREATE TABLE no_order(a UInt32, b UInt32) ENGINE = MergeTree ORDER BY tuple(); +ALTER TABLE no_order MODIFY ORDER BY (a); -- { serverError 36} + +DROP TABLE no_order; + DROP TABLE IF EXISTS old_style; set allow_deprecated_syntax_for_merge_tree=1; CREATE TABLE old_style(d Date, x UInt32) ENGINE MergeTree(d, x, 8192); diff --git a/tests/queries/0_stateless/02210_processors_profile_log_2.reference b/tests/queries/0_stateless/02210_processors_profile_log_2.reference index 5467c7ef2ba..b9a848131fd 100644 --- a/tests/queries/0_stateless/02210_processors_profile_log_2.reference +++ b/tests/queries/0_stateless/02210_processors_profile_log_2.reference @@ -9,3 +9,4 @@ NullSource 0 0 0 0 NumbersMt 0 0 1000000 8000000 Resize 1 8 1 8 Resize 1 8 1 8 +1 diff --git a/tests/queries/0_stateless/02210_processors_profile_log_2.sh b/tests/queries/0_stateless/02210_processors_profile_log_2.sh index 93eabc2f0fe..044954a4e96 100755 --- a/tests/queries/0_stateless/02210_processors_profile_log_2.sh +++ b/tests/queries/0_stateless/02210_processors_profile_log_2.sh @@ -17,3 +17,5 @@ EOF ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" ${CLICKHOUSE_CLIENT} -q "select name, sum(input_rows), sum(input_bytes), sum(output_rows), sum(output_bytes) from system.processors_profile_log where query_id = '${QUERY_ID}' group by name, plan_step, plan_group order by name, sum(input_rows), sum(input_bytes), sum(output_rows), sum(output_bytes)" + +${CLICKHOUSE_CLIENT} -q "select countDistinct(initial_query_id) from system.processors_profile_log where query_id = '${QUERY_ID}'" \ No newline at end of file diff --git a/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.sql b/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.sql index a73993f6a5a..ef339b760aa 100644 --- a/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.sql +++ b/tests/queries/0_stateless/02414_all_new_table_functions_must_be_documented.sql @@ -2,5 +2,5 @@ -- Please help shorten this list down to zero elements. SELECT name FROM system.table_functions WHERE length(description) < 10 AND name NOT IN ( - 'cosn', 'oss', 'hdfs', 'hdfsCluster', 'hive', 'mysql', 'postgresql', 's3', 's3Cluster', 'sqlite' -- these functions are not enabled in fast test + 'cosn', 'oss', 'hdfs', 'hdfsCluster', 'hive', 'mysql', 'postgresql', 's3', 's3Cluster', 'sqlite', 'urlCluster' -- these functions are not enabled in fast test ) ORDER BY name; diff --git a/tests/queries/0_stateless/02442_auxiliary_zookeeper_endpoint_id.reference b/tests/queries/0_stateless/02442_auxiliary_zookeeper_endpoint_id.reference new file mode 100644 index 00000000000..4a9341ba3f6 --- /dev/null +++ b/tests/queries/0_stateless/02442_auxiliary_zookeeper_endpoint_id.reference @@ -0,0 +1 @@ +10013 diff --git a/tests/queries/0_stateless/02442_auxiliary_zookeeper_endpoint_id.sql b/tests/queries/0_stateless/02442_auxiliary_zookeeper_endpoint_id.sql new file mode 100644 index 00000000000..ff3552b2a42 --- /dev/null +++ b/tests/queries/0_stateless/02442_auxiliary_zookeeper_endpoint_id.sql @@ -0,0 +1,21 @@ +-- Tags: no-fasttest + +drop table if exists t1_r1 sync; +drop table if exists t1_r2 sync; +drop table if exists t2 sync; + +create table t1_r1 (x Int32) engine=ReplicatedMergeTree('/test/02442/{database}/t', 'r1') order by x; + +create table t1_r2 (x Int32) engine=ReplicatedMergeTree('/test/02442/{database}/t', 'r2') order by x; + +-- create table with same replica_path as t1_r1 +create table t2 (x Int32) engine=ReplicatedMergeTree('zookeeper2:/test/02442/{database}/t', 'r1') order by x; +drop table t2 sync; + +-- insert data into one replica +insert into t1_r1 select * from generateRandom('x Int32') LIMIT 10013; +system sync replica t1_r2; +select count() from t1_r2; + +drop table t1_r1 sync; +drop table t1_r2 sync; diff --git a/tests/queries/0_stateless/02457_s3_cluster_schema_inference.reference b/tests/queries/0_stateless/02457_s3_cluster_schema_inference.reference index b918bf2b155..8d744ba7b46 100644 --- a/tests/queries/0_stateless/02457_s3_cluster_schema_inference.reference +++ b/tests/queries/0_stateless/02457_s3_cluster_schema_inference.reference @@ -10,6 +10,138 @@ c3 Nullable(Int64) c1 Nullable(Int64) c2 Nullable(Int64) c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 0 0 0 0 0 0 1 2 3 diff --git a/tests/queries/0_stateless/02457_s3_cluster_schema_inference.sql b/tests/queries/0_stateless/02457_s3_cluster_schema_inference.sql index 03e8785b24b..6182a1a222e 100644 --- a/tests/queries/0_stateless/02457_s3_cluster_schema_inference.sql +++ b/tests/queries/0_stateless/02457_s3_cluster_schema_inference.sql @@ -5,9 +5,34 @@ desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localh desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV'); desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest'); desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'TSV'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'TSV', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'TSV', 'auto', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN, 'TSV'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN, 'TSV', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN, 'TSV', 'auto', 'auto'); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', headers(MyCustomHeader = 'SomeValue')); +desc s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', headers(MyCustomHeader = 'SomeValue'), 'auto'); + select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv') order by c1, c2, c3; select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV') order by c1, c2, c3; select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest') order by c1, c2, c3; select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'TSV') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'TSV', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'test', 'testtest', 'TSV', 'auto', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN) order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN, 'TSV') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN, 'TSV', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', NOSIGN, 'TSV', 'auto', 'auto') order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', headers(MyCustomHeader = 'SomeValue')) order by c1, c2, c3; +select * from s3Cluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', headers(MyCustomHeader = 'SomeValue'), 'auto') order by c1, c2, c3; diff --git a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh index 3035a191c8f..50e89cca4c9 100755 --- a/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh +++ b/tests/queries/0_stateless/02535_max_parallel_replicas_custom_key.sh @@ -41,6 +41,6 @@ run_count_with_custom_key "y" run_count_with_custom_key "cityHash64(y)" run_count_with_custom_key "cityHash64(y) + 1" -$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) as t1 JOIN 02535_custom_key USING y" --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "Joins are not supported with parallel replicas" +$CLICKHOUSE_CLIENT --query="SELECT count() FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), 02535_custom_key) as t1 JOIN 02535_custom_key USING y" --parallel_replicas_custom_key="y" --send_logs_level="trace" 2>&1 | grep -Fac "JOINs are not supported with parallel replicas" $CLICKHOUSE_CLIENT --query="DROP TABLE 02535_custom_key" diff --git a/tests/queries/0_stateless/02709_parallel_replicas_with_final_modifier.reference b/tests/queries/0_stateless/02709_parallel_replicas_with_final_modifier.reference deleted file mode 100644 index f347e8b5857..00000000000 --- a/tests/queries/0_stateless/02709_parallel_replicas_with_final_modifier.reference +++ /dev/null @@ -1 +0,0 @@ -1 1 2020-01-01 00:00:00 diff --git a/tests/queries/0_stateless/02709_parallel_replicas_with_final_modifier.sql b/tests/queries/0_stateless/02709_parallel_replicas_with_final_modifier.sql deleted file mode 100644 index 1f5daaae189..00000000000 --- a/tests/queries/0_stateless/02709_parallel_replicas_with_final_modifier.sql +++ /dev/null @@ -1,5 +0,0 @@ -DROP TABLE IF EXISTS t_02709; -CREATE TABLE t_02709 (key UInt32, sign Int8, date Datetime) ENGINE=CollapsingMergeTree(sign) PARTITION BY date ORDER BY key; -INSERT INTO t_02709 VALUES (1, 1, '2020-01-01'), (2, 1, '2020-01-02'), (1, -1, '2020-01-01'), (2, -1, '2020-01-02'), (1, 1, '2020-01-01'); -SELECT * FROM t_02709 FINAL ORDER BY key SETTINGS max_parallel_replicas=3, allow_experimental_parallel_reading_from_replicas=1, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas'; -DROP TABLE t_02709; diff --git a/tests/queries/0_stateless/02721_url_cluster.reference b/tests/queries/0_stateless/02721_url_cluster.reference new file mode 100644 index 00000000000..36ef7609920 --- /dev/null +++ b/tests/queries/0_stateless/02721_url_cluster.reference @@ -0,0 +1,136 @@ +0 0 0 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +20 21 22 +23 24 25 +26 27 28 +0 0 0 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +20 21 22 +23 24 25 +26 27 28 +0 0 0 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +20 21 22 +23 24 25 +26 27 28 +0 0 0 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +20 21 22 +23 24 25 +26 27 28 +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 UInt64 +c2 UInt64 +c3 UInt64 +c1 UInt64 +c2 UInt64 +c3 UInt64 +12 +12 +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +c1 Nullable(Int64) +c2 Nullable(Int64) +c3 Nullable(Int64) +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +0 0 0 +0 0 0 +1 2 3 +4 5 6 +7 8 9 +10 11 12 +13 14 15 +16 17 18 +1 2 3 +4 5 6 +7 8 9 +0 0 0 diff --git a/tests/queries/0_stateless/02721_url_cluster.sql b/tests/queries/0_stateless/02721_url_cluster.sql new file mode 100644 index 00000000000..c30b03495cd --- /dev/null +++ b/tests/queries/0_stateless/02721_url_cluster.sql @@ -0,0 +1,40 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: Depends on AWS + +select * from urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv') ORDER BY c1, c2, c3; +select * from urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV') ORDER BY c1, c2, c3; +select * from urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64') ORDER BY c1, c2, c3; +select * from urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto') ORDER BY c1, c2, c3; + +desc urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv'); +desc urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV'); +desc urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64'); +desc urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64', 'auto'); + +select COUNT() from urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv'); +select COUNT(*) from urlCluster('test_cluster_two_shards_localhost', 'http://localhost:11111/test/{a,b,c}.tsv'); + +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv'); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV'); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'auto'); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto'); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', 'auto'); + +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', headers('X-ClickHouse-Database'='default'), 'http://localhost:11111/test/{a,b}.tsv'); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', headers('X-ClickHouse-Database'='default'), 'TSV'); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'auto', headers('X-ClickHouse-Database'='default')); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', headers('X-ClickHouse-Database'='default')); +desc urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', headers('X-ClickHouse-Database'='default'), 'auto', 'auto'); + +select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv') order by c1, c2, c3; +select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV') order by c1, c2, c3; +select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'auto') order by c1, c2, c3; +select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto') order by c1, c2, c3; +select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/{a,b}.tsv', 'TSV', 'auto', 'auto') order by c1, c2, c3; + +drop table if exists test; +create table test (x UInt32, y UInt32, z UInt32) engine=Memory(); +insert into test select * from urlCluster('test_cluster_one_shard_three_replicas_localhost', 'http://localhost:11111/test/a.tsv', 'TSV'); +select * from test; +drop table test; + diff --git a/tests/queries/0_stateless/02730_with_fill_by_sorting_prefix.reference b/tests/queries/0_stateless/02730_with_fill_by_sorting_prefix.reference new file mode 100644 index 00000000000..260f08027f1 --- /dev/null +++ b/tests/queries/0_stateless/02730_with_fill_by_sorting_prefix.reference @@ -0,0 +1,227 @@ +-- { echoOn } +set use_with_fill_by_sorting_prefix=1; +-- corner case with constant sort prefix +SELECT number +FROM numbers(1) +ORDER BY 10 ASC, number DESC WITH FILL FROM 1 +SETTINGS enable_positional_arguments=0; +1 +0 +-- sensor table +drop table if exists ts; +create table ts (sensor_id UInt64, timestamp UInt64, value Float64) ENGINE=MergeTree() ORDER BY (sensor_id, timestamp); +insert into ts VALUES (1, 10, 1), (1, 12, 2), (3, 5, 1), (3, 7, 3), (5, 1, 1), (5, 3, 1); +-- FillingTransform: 6 rows will be processed in 1 chunks +select * from ts order by sensor_id, timestamp with fill step 1; +1 10 1 +1 11 0 +1 12 2 +3 5 1 +3 6 0 +3 7 3 +5 1 1 +5 2 0 +5 3 1 +drop table if exists ts; +create table ts (sensor_id UInt64, timestamp UInt64, value Float64) ENGINE=MergeTree() ORDER BY (sensor_id, timestamp); +system stop merges ts; +-- FillingTransform: 6 rows will be processed in 3 chunks with 2 rows each +insert into ts VALUES (1, 10, 1), (1, 12, 1); +insert into ts VALUES (3, 5, 1), (3, 7, 1); +insert into ts VALUES (5, 1, 1), (5, 3, 1); +select * from ts order by sensor_id, timestamp with fill step 1 settings max_block_size=2; +1 10 1 +1 11 0 +1 12 1 +3 5 1 +3 6 0 +3 7 1 +5 1 1 +5 2 0 +5 3 1 +drop table if exists ts; +create table ts (sensor_id UInt64, timestamp UInt64, value Float64) ENGINE=MergeTree() ORDER BY (sensor_id, timestamp); +system stop merges ts; +-- FillingTransform: 6 rows will be processed in 2 chunks with 3 rows each +insert into ts VALUES (1, 10, 1), (1, 12, 1), (3, 5, 1); +insert into ts VALUES (3, 7, 1), (5, 1, 1), (5, 3, 1); +select * from ts order by sensor_id, timestamp with fill step 1 settings max_block_size=3; +1 10 1 +1 11 0 +1 12 1 +3 5 1 +3 6 0 +3 7 1 +5 1 1 +5 2 0 +5 3 1 +-- FROM and TO +-- ASC order in sorting prefix +select * from ts order by sensor_id, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999); +1 6 9999 +1 7 9999 +1 8 9999 +1 9 9999 +1 10 1 +1 12 1 +3 5 1 +3 6 9999 +3 7 1 +3 8 9999 +3 9 9999 +5 1 1 +5 3 1 +5 6 9999 +5 7 9999 +5 8 9999 +5 9 9999 +select * from ts order by sensor_id, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +0 6 9999 +0 7 9999 +0 8 9999 +0 9 9999 +1 10 1 +1 12 1 +3 5 1 +3 7 1 +5 1 1 +5 3 1 +-- DESC order in sorting prefix +select * from ts order by sensor_id DESC, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999); +5 1 1 +5 3 1 +5 6 9999 +5 7 9999 +5 8 9999 +5 9 9999 +3 5 1 +3 6 9999 +3 7 1 +3 8 9999 +3 9 9999 +1 6 9999 +1 7 9999 +1 8 9999 +1 9 9999 +1 10 1 +1 12 1 +select * from ts order by sensor_id DESC, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +5 1 1 +5 3 1 +3 5 1 +0 6 9999 +3 7 1 +0 8 9999 +0 9 9999 +1 10 1 +1 12 1 +-- without TO +-- ASC order in sorting prefix +select * from ts order by sensor_id, timestamp with fill from 6 step 1 interpolate (value as 9999); +1 6 9999 +1 7 9999 +1 8 9999 +1 9 9999 +1 10 1 +1 11 9999 +1 12 1 +3 5 1 +3 6 9999 +3 7 1 +5 1 1 +5 3 1 +select * from ts order by sensor_id, timestamp with fill from 6 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +0 6 9999 +0 7 9999 +0 8 9999 +0 9 9999 +1 10 1 +0 11 9999 +1 12 1 +3 5 1 +3 7 1 +5 1 1 +5 3 1 +-- DESC order in sorting prefix +select * from ts order by sensor_id DESC, timestamp with fill from 6 step 1 interpolate (value as 9999); +5 1 1 +5 3 1 +3 5 1 +3 6 9999 +3 7 1 +1 6 9999 +1 7 9999 +1 8 9999 +1 9 9999 +1 10 1 +1 11 9999 +1 12 1 +select * from ts order by sensor_id DESC, timestamp with fill from 6 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +5 1 1 +5 3 1 +3 5 1 +0 6 9999 +3 7 1 +0 8 9999 +0 9 9999 +1 10 1 +0 11 9999 +1 12 1 +-- without FROM +-- ASC order in sorting prefix +select * from ts order by sensor_id, timestamp with fill to 10 step 1 interpolate (value as 9999); +1 10 1 +1 12 1 +3 5 1 +3 6 9999 +3 7 1 +3 8 9999 +3 9 9999 +5 1 1 +5 2 9999 +5 3 1 +5 4 9999 +5 5 9999 +5 6 9999 +5 7 9999 +5 8 9999 +5 9 9999 +select * from ts order by sensor_id, timestamp with fill to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +1 10 1 +1 12 1 +3 5 1 +3 7 1 +5 1 1 +5 3 1 +-- DESC order in sorting prefix +select * from ts order by sensor_id DESC, timestamp with fill to 10 step 1 interpolate (value as 9999); +5 1 1 +5 2 9999 +5 3 1 +5 4 9999 +5 5 9999 +5 6 9999 +5 7 9999 +5 8 9999 +5 9 9999 +3 5 1 +3 6 9999 +3 7 1 +3 8 9999 +3 9 9999 +1 10 1 +1 12 1 +select * from ts order by sensor_id DESC, timestamp with fill to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +5 1 1 +0 2 9999 +5 3 1 +0 4 9999 +3 5 1 +0 6 9999 +3 7 1 +0 8 9999 +0 9 9999 +1 10 1 +1 12 1 +-- checking that sorting prefix columns can't be used in INTERPOLATE +SELECT * FROM ts ORDER BY sensor_id, value, timestamp WITH FILL FROM 6 TO 10 INTERPOLATE ( value AS 1 ); -- { serverError INVALID_WITH_FILL_EXPRESSION } diff --git a/tests/queries/0_stateless/02730_with_fill_by_sorting_prefix.sql b/tests/queries/0_stateless/02730_with_fill_by_sorting_prefix.sql new file mode 100644 index 00000000000..e2f1ce29d5c --- /dev/null +++ b/tests/queries/0_stateless/02730_with_fill_by_sorting_prefix.sql @@ -0,0 +1,60 @@ +-- { echoOn } +set use_with_fill_by_sorting_prefix=1; + +-- corner case with constant sort prefix +SELECT number +FROM numbers(1) +ORDER BY 10 ASC, number DESC WITH FILL FROM 1 +SETTINGS enable_positional_arguments=0; + +-- sensor table +drop table if exists ts; +create table ts (sensor_id UInt64, timestamp UInt64, value Float64) ENGINE=MergeTree() ORDER BY (sensor_id, timestamp); +insert into ts VALUES (1, 10, 1), (1, 12, 2), (3, 5, 1), (3, 7, 3), (5, 1, 1), (5, 3, 1); +-- FillingTransform: 6 rows will be processed in 1 chunks +select * from ts order by sensor_id, timestamp with fill step 1; + +drop table if exists ts; +create table ts (sensor_id UInt64, timestamp UInt64, value Float64) ENGINE=MergeTree() ORDER BY (sensor_id, timestamp); +system stop merges ts; +-- FillingTransform: 6 rows will be processed in 3 chunks with 2 rows each +insert into ts VALUES (1, 10, 1), (1, 12, 1); +insert into ts VALUES (3, 5, 1), (3, 7, 1); +insert into ts VALUES (5, 1, 1), (5, 3, 1); +select * from ts order by sensor_id, timestamp with fill step 1 settings max_block_size=2; + +drop table if exists ts; +create table ts (sensor_id UInt64, timestamp UInt64, value Float64) ENGINE=MergeTree() ORDER BY (sensor_id, timestamp); +system stop merges ts; +-- FillingTransform: 6 rows will be processed in 2 chunks with 3 rows each +insert into ts VALUES (1, 10, 1), (1, 12, 1), (3, 5, 1); +insert into ts VALUES (3, 7, 1), (5, 1, 1), (5, 3, 1); +select * from ts order by sensor_id, timestamp with fill step 1 settings max_block_size=3; + +-- FROM and TO +-- ASC order in sorting prefix +select * from ts order by sensor_id, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999); +select * from ts order by sensor_id, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; + +-- DESC order in sorting prefix +select * from ts order by sensor_id DESC, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999); +select * from ts order by sensor_id DESC, timestamp with fill from 6 to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; + +-- without TO +-- ASC order in sorting prefix +select * from ts order by sensor_id, timestamp with fill from 6 step 1 interpolate (value as 9999); +select * from ts order by sensor_id, timestamp with fill from 6 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +-- DESC order in sorting prefix +select * from ts order by sensor_id DESC, timestamp with fill from 6 step 1 interpolate (value as 9999); +select * from ts order by sensor_id DESC, timestamp with fill from 6 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; + +-- without FROM +-- ASC order in sorting prefix +select * from ts order by sensor_id, timestamp with fill to 10 step 1 interpolate (value as 9999); +select * from ts order by sensor_id, timestamp with fill to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; +-- DESC order in sorting prefix +select * from ts order by sensor_id DESC, timestamp with fill to 10 step 1 interpolate (value as 9999); +select * from ts order by sensor_id DESC, timestamp with fill to 10 step 1 interpolate (value as 9999) settings use_with_fill_by_sorting_prefix=0; + +-- checking that sorting prefix columns can't be used in INTERPOLATE +SELECT * FROM ts ORDER BY sensor_id, value, timestamp WITH FILL FROM 6 TO 10 INTERPOLATE ( value AS 1 ); -- { serverError INVALID_WITH_FILL_EXPRESSION } diff --git a/tests/queries/0_stateless/02735_system_zookeeper_connection.reference b/tests/queries/0_stateless/02735_system_zookeeper_connection.reference index c9cc8adede8..1deabd88b88 100644 --- a/tests/queries/0_stateless/02735_system_zookeeper_connection.reference +++ b/tests/queries/0_stateless/02735_system_zookeeper_connection.reference @@ -1 +1,2 @@ -[ :1]:9181 0 +default ::1 9181 0 0 3 +zookeeper2 ::1 9181 0 0 0 diff --git a/tests/queries/0_stateless/02735_system_zookeeper_connection.sql b/tests/queries/0_stateless/02735_system_zookeeper_connection.sql index b409913d183..8b37c428413 100644 --- a/tests/queries/0_stateless/02735_system_zookeeper_connection.sql +++ b/tests/queries/0_stateless/02735_system_zookeeper_connection.sql @@ -5,9 +5,11 @@ DROP TABLE IF EXISTS test_zk_connection_table; CREATE TABLE test_zk_connection_table ( key UInt64 ) -ENGINE ReplicatedMergeTree('/clickhouse/{database}/02731_zk_connection/{shard}', '{replica}') +ENGINE ReplicatedMergeTree('zookeeper2:/clickhouse/{database}/02731_zk_connection/{shard}', '{replica}') ORDER BY tuple(); -select host, port, is_expired from system.zookeeper_connection where name='default_zookeeper'; +-- keeper_api_version will by 0 for auxiliary_zookeeper2, because we fail to get /api_version due to chroot +-- I'm not sure if it's a bug or a useful trick to fallback to basic api +select name, host, port, index, is_expired, keeper_api_version from system.zookeeper_connection order by name; DROP TABLE IF EXISTS test_zk_connection_table; diff --git a/tests/queries/0_stateless/02751_query_log_test_partitions.reference b/tests/queries/0_stateless/02751_query_log_test_partitions.reference new file mode 100644 index 00000000000..5a9f2163c0e --- /dev/null +++ b/tests/queries/0_stateless/02751_query_log_test_partitions.reference @@ -0,0 +1,2 @@ +3 3 +02751_query_log_test_partitions.3 diff --git a/tests/queries/0_stateless/02751_query_log_test_partitions.sql b/tests/queries/0_stateless/02751_query_log_test_partitions.sql new file mode 100644 index 00000000000..be047d1a46e --- /dev/null +++ b/tests/queries/0_stateless/02751_query_log_test_partitions.sql @@ -0,0 +1,20 @@ +set log_queries=1; +set log_queries_min_type='QUERY_FINISH'; + +DROP TABLE IF EXISTS 02751_query_log_test_partitions; +CREATE TABLE 02751_query_log_test_partitions (a Int64, b Int64) ENGINE = MergeTree PARTITION BY a ORDER BY b; + +INSERT INTO 02751_query_log_test_partitions SELECT number, number FROM numbers(10); + +SELECT * FROM 02751_query_log_test_partitions WHERE a = 3; + +SYSTEM FLUSH LOGS; + +SELECT + --Remove the prefix string which is a mutable database name. + arrayStringConcat(arrayPopFront(splitByString('.', partitions[1])), '.') +FROM + system.query_log +WHERE + current_database=currentDatabase() and + query = 'SELECT * FROM 02751_query_log_test_partitions WHERE a = 3;' diff --git a/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.reference b/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.reference new file mode 100644 index 00000000000..e5c608ddc1a --- /dev/null +++ b/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.reference @@ -0,0 +1,39 @@ +SELECT value1 +FROM t +WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE ((date1 < \'1993-01-01\') OR (date1 > \'1993-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 > \'1993-12-31\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 <= \'1993-12-31\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 >= \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1997-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) OR ((date1 >= \'1994-01-01\') AND (date1 <= \'1994-12-31\'))) AND ((id >= 1) AND (id <= 3)) +SELECT + value1, + toYear(date1) AS year1 +FROM t +WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE (date1 < \'1993-01-01\') AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +PREWHERE (date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\') +WHERE ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) AND ((id >= 1) AND (id <= 3)) +SELECT value1 +FROM t +WHERE ((id >= 1) AND (id <= 3)) AND ((date1 >= \'1993-01-01\') AND (date1 <= \'1993-12-31\')) diff --git a/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.sql b/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.sql new file mode 100644 index 00000000000..563468d4f82 --- /dev/null +++ b/tests/queries/0_stateless/02764_date_filter_predicate_optimizer.sql @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (id UInt32, value1 String, date1 Date) ENGINE ReplacingMergeTree() ORDER BY id; + +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) <> 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) < 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) > 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) <= 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) >= 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE toYear(date1) BETWEEN 1993 AND 1997 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE (toYear(date1) = 1993 OR toYear(date1) = 1994) AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1, toYear(date1) as year1 FROM t WHERE year1 = 1993 AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE 1993 > toYear(date1) AND id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t PREWHERE toYear(date1) = 1993 WHERE id BETWEEN 1 AND 3; +EXPLAIN SYNTAX SELECT value1 FROM t WHERE id BETWEEN 1 AND 3 HAVING toYear(date1) = 1993; + +DROP TABLE t; diff --git a/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.reference b/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.sql b/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.sql new file mode 100644 index 00000000000..ea8eb04bd07 --- /dev/null +++ b/tests/queries/0_stateless/02764_parallel_replicas_plain_merge_tree.sql @@ -0,0 +1,16 @@ +CREATE TABLE IF NOT EXISTS parallel_replicas_plain (x String) ENGINE=MergeTree() ORDER BY x; +INSERT INTO parallel_replicas_plain SELECT toString(number) FROM numbers(10); + +SET max_parallel_replicas=3, allow_experimental_parallel_reading_from_replicas=1, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas'; + +SET parallel_replicas_for_non_replicated_merge_tree = 0; + +SELECT x FROM parallel_replicas_plain LIMIT 1 FORMAT Null; +SELECT max(length(x)) FROM parallel_replicas_plain FORMAT Null; + +SET parallel_replicas_for_non_replicated_merge_tree = 1; + +SELECT x FROM parallel_replicas_plain LIMIT 1 FORMAT Null; +SELECT max(length(x)) FROM parallel_replicas_plain FORMAT Null; + +DROP TABLE IF EXISTS parallel_replicas_plain; diff --git a/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.reference b/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.sql b/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.sql new file mode 100644 index 00000000000..f447051e1e5 --- /dev/null +++ b/tests/queries/0_stateless/02765_parallel_replicas_final_modifier.sql @@ -0,0 +1,14 @@ +CREATE TABLE IF NOT EXISTS parallel_replicas_final (x String) ENGINE=ReplacingMergeTree() ORDER BY x; + +INSERT INTO parallel_replicas_final SELECT toString(number) FROM numbers(10); + +SET max_parallel_replicas=3, allow_experimental_parallel_reading_from_replicas=1, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas'; +SET parallel_replicas_for_non_replicated_merge_tree = 1; + +SELECT * FROM parallel_replicas_final FINAL FORMAT Null; + +SET allow_experimental_parallel_reading_from_replicas=2; + +SELECT * FROM parallel_replicas_final FINAL FORMAT Null; -- { serverError SUPPORT_IS_DISABLED } + +DROP TABLE IF EXISTS parallel_replicas_final; diff --git a/utils/check-style/shellcheck-run.sh b/utils/check-style/shellcheck-run.sh index c0063d4b191..bdb0f681c31 100755 --- a/utils/check-style/shellcheck-run.sh +++ b/utils/check-style/shellcheck-run.sh @@ -1,9 +1,14 @@ #!/usr/bin/env bash ROOT_PATH=$(git rev-parse --show-toplevel) -EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|memcpy/|consistent-hashing/|Parsers/New' +NPROC=$(($(nproc) + 3)) # Check sh tests with Shellcheck -(cd $ROOT_PATH/tests/queries/0_stateless/ && shellcheck --check-sourced --external-sources --severity info --exclude SC1071,SC2086,SC2016 *.sh ../1_stateful/*.sh) +( cd "$ROOT_PATH/tests/queries/0_stateless/" && \ + find "$ROOT_PATH/tests/queries/"{0_stateless,1_stateful} -name '*.sh' -print0 | \ + xargs -0 -P "$NPROC" -n 20 shellcheck --check-sourced --external-sources --severity info --exclude SC1071,SC2086,SC2016 +) # Check docker scripts with shellcheck -find "$ROOT_PATH/docker" -executable -type f -exec file -F' ' --mime-type {} \; | awk -F' ' '$2==" text/x-shellscript" {print $1}' | grep -v "entrypoint.alpine.sh" | grep -v "compare.sh"| xargs shellcheck - +find "$ROOT_PATH/docker" -executable -type f -exec file -F' ' --mime-type {} \; | \ + awk -F' ' '$2==" text/x-shellscript" {print $1}' | \ + grep -v "compare.sh" | \ + xargs -P "$NPROC" -n 20 shellcheck