diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e4785892c3..e22377e2332 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,7 +29,7 @@ * Add two new functions which allow for user-defined keys/seeds with SipHash{64,128}. [#45513](https://github.com/ClickHouse/ClickHouse/pull/45513) ([Salvatore Mesoraca](https://github.com/aiven-sal)). * Allow a three-argument version for table function `format`. close [#45808](https://github.com/ClickHouse/ClickHouse/issues/45808). [#45873](https://github.com/ClickHouse/ClickHouse/pull/45873) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). * Add `JodaTime` format support for 'x','w','S'. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. [#46073](https://github.com/ClickHouse/ClickHouse/pull/46073) ([zk_kiger](https://github.com/zk-kiger)). -* Support window function `ntile`. +* Support window function `ntile`. ([lgbo](https://github.com/lgbo-ustc)). * Add setting `final` to implicitly apply the `FINAL` modifier to every table. [#40945](https://github.com/ClickHouse/ClickHouse/pull/40945) ([Arthur Passos](https://github.com/arthurpassos)). * Added `arrayPartialSort` and `arrayPartialReverseSort` functions. [#46296](https://github.com/ClickHouse/ClickHouse/pull/46296) ([Joanna Hulboj](https://github.com/jh0x)). * The new http parameter `client_protocol_version` allows setting a client protocol version for HTTP responses using the Native format. [#40397](https://github.com/ClickHouse/ClickHouse/issues/40397). [#46360](https://github.com/ClickHouse/ClickHouse/pull/46360) ([Geoff Genz](https://github.com/genzgd)). diff --git a/README.md b/README.md index 1974de3ce94..fcbe65e8223 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ curl https://clickhouse.com/ | sh * [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster. * [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information. * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. -* [Slack](https://clickhousedb.slack.com/) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. +* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. * [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events. * [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation. * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev. diff --git a/base/poco/Foundation/src/NumberParser.cpp b/base/poco/Foundation/src/NumberParser.cpp index 4081f3b2663..8d32e1a722c 100644 --- a/base/poco/Foundation/src/NumberParser.cpp +++ b/base/poco/Foundation/src/NumberParser.cpp @@ -44,7 +44,7 @@ int NumberParser::parse(const std::string& s, char thSep) if (tryParse(s, result, thSep)) return result; else - throw SyntaxException("Not a valid integer", s); + throw SyntaxException("Not a valid integer", "'" + s + "'"); } @@ -60,7 +60,7 @@ unsigned NumberParser::parseUnsigned(const std::string& s, char thSep) if (tryParseUnsigned(s, result, thSep)) return result; else - throw SyntaxException("Not a valid unsigned integer", s); + throw SyntaxException("Not a valid unsigned integer", "'" + s + "'"); } @@ -76,7 +76,7 @@ unsigned NumberParser::parseHex(const std::string& s) if (tryParseHex(s, result)) return result; else - throw SyntaxException("Not a valid hexadecimal integer", s); + throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'"); } @@ -94,7 +94,7 @@ unsigned NumberParser::parseOct(const std::string& s) if (tryParseOct(s, result)) return result; else - throw SyntaxException("Not a valid hexadecimal integer", s); + throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'"); } @@ -112,7 +112,7 @@ Int64 NumberParser::parse64(const std::string& s, char thSep) if (tryParse64(s, result, thSep)) return result; else - throw SyntaxException("Not a valid integer", s); + throw SyntaxException("Not a valid integer", "'" + s + "'"); } @@ -128,7 +128,7 @@ UInt64 NumberParser::parseUnsigned64(const std::string& s, char thSep) if (tryParseUnsigned64(s, result, thSep)) return result; else - throw SyntaxException("Not a valid unsigned integer", s); + throw SyntaxException("Not a valid unsigned integer", "'" + s + "'"); } @@ -144,7 +144,7 @@ UInt64 NumberParser::parseHex64(const std::string& s) if (tryParseHex64(s, result)) return result; else - throw SyntaxException("Not a valid hexadecimal integer", s); + throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'"); } @@ -162,7 +162,7 @@ UInt64 NumberParser::parseOct64(const std::string& s) if (tryParseOct64(s, result)) return result; else - throw SyntaxException("Not a valid hexadecimal integer", s); + throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'"); } @@ -180,7 +180,7 @@ double NumberParser::parseFloat(const std::string& s, char decSep, char thSep) if (tryParseFloat(s, result, decSep, thSep)) return result; else - throw SyntaxException("Not a valid floating-point number", s); + throw SyntaxException("Not a valid floating-point number", "'" + s + "'"); } @@ -196,7 +196,7 @@ bool NumberParser::parseBool(const std::string& s) if (tryParseBool(s, result)) return result; else - throw SyntaxException("Not a valid bool number", s); + throw SyntaxException("Not a valid bool number", "'" + s + "'"); } diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index b0ff2349957..ca68b2ee49a 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -60,6 +60,23 @@ elseif (ARCH_AARCH64) set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs -Xclang=-target-feature -Xclang=+ldapr -Wno-unused-command-line-argument") endif () + # Best-effort check: The build generates and executes intermediate binaries, e.g. protoc and llvm-tablegen. If we build on ARM for ARM + # and the build machine is too old, i.e. doesn't satisfy above modern profile, then these intermediate binaries will not run (dump + # SIGILL). Even if they could run, the build machine wouldn't be able to run the ClickHouse binary. In that case, suggest to run the + # build with the compat profile. + if (OS_LINUX AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)" AND NOT NO_ARMV81_OR_HIGHER) + # CPU features in /proc/cpuinfo and compiler flags don't align :( ... pick some obvious flags contained in the modern but not in the + # legacy profile (full Graviton 3 /proc/cpuinfo is "fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm + # jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs paca pacg dcpodp svei8mm svebf16 i8mm + # bf16 dgh rng") + execute_process( + COMMAND grep -P "^(?=.*atomic)(?=.*ssbs)" /proc/cpuinfo + OUTPUT_VARIABLE FLAGS) + if (NOT FLAGS) + MESSAGE(FATAL_ERROR "The build machine does not satisfy the minimum CPU requirements, try to run cmake with -DNO_ARMV81_OR_HIGHER=1") + endif() + endif() + elseif (ARCH_PPC64LE) # By Default, build for power8 and up, allow building for power9 and up # Note that gcc and clang have support for x86 SSE2 intrinsics when building for PowerPC @@ -102,6 +119,22 @@ elseif (ARCH_AMD64) SET(ENABLE_AVX512_FOR_SPEC_OP 0) endif() + # Same best-effort check for x86 as above for ARM. + if (OS_LINUX AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64" AND NOT NO_SSE3_OR_HIGHER) + # Test for flags in standard profile but not in NO_SSE3_OR_HIGHER profile. + # /proc/cpuid for Intel Xeon 8124: "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse + # sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl xtopology nonstop_tsc cpuid aperfmperf + # tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c + # rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx + # avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke"" + execute_process( + COMMAND grep -P "^(?=.*ssse3)(?=.*sse4_1)(?=.*sse4_2)" /proc/cpuinfo + OUTPUT_VARIABLE FLAGS) + if (NOT FLAGS) + MESSAGE(FATAL_ERROR "The build machine does not satisfy the minimum CPU requirements, try to run cmake with -DNO_SSE3_OR_HIGHER=1") + endif() + endif() + # ClickHouse can be cross-compiled (e.g. on an ARM host for x86) but it is also possible to build ClickHouse on x86 w/o AVX for x86 w/ # AVX. We only check that the compiler can emit certain SIMD instructions, we don't care if the host system is able to run the binary. # Therefore, use check_cxx_source_compiles (= does the code compile+link?) instead of check_cxx_source_runs (= does the code diff --git a/contrib/libunwind b/contrib/libunwind index 5022f30f3e0..e48aa13f67d 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit 5022f30f3e092a54a7c101c335ce5e08769db366 +Subproject commit e48aa13f67dc722511b5af33a32ba9b7748176b5 diff --git a/contrib/xz-cmake/CMakeLists.txt b/contrib/xz-cmake/CMakeLists.txt index 9d08adc9c7a..c3a8203c83e 100644 --- a/contrib/xz-cmake/CMakeLists.txt +++ b/contrib/xz-cmake/CMakeLists.txt @@ -94,6 +94,10 @@ if (ARCH_AMD64 OR ARCH_AARCH64) add_compile_definitions(TUKLIB_FAST_UNALIGNED_ACCESS=1) endif () +if (ARCH_S390X) + add_compile_definitions(WORDS_BIGENDIAN) +endif () + find_package(Threads REQUIRED) diff --git a/contrib/zstd-cmake/CMakeLists.txt b/contrib/zstd-cmake/CMakeLists.txt index f44d5db12c4..a6262178dc7 100644 --- a/contrib/zstd-cmake/CMakeLists.txt +++ b/contrib/zstd-cmake/CMakeLists.txt @@ -30,25 +30,10 @@ # - zstd homepage : http://www.zstd.net/ # ################################################################ -# Get library version based on information from input content (use regular exp) -function(GetLibraryVersion _content _outputVar1 _outputVar2 _outputVar3) - string(REGEX MATCHALL ".*define ZSTD_VERSION_MAJOR+.* ([0-9]+).*define ZSTD_VERSION_MINOR+.* ([0-9]+).*define ZSTD_VERSION_RELEASE+.* ([0-9]+)" VERSION_REGEX "${_content}") - SET(${_outputVar1} ${CMAKE_MATCH_1} PARENT_SCOPE) - SET(${_outputVar2} ${CMAKE_MATCH_2} PARENT_SCOPE) - SET(${_outputVar3} ${CMAKE_MATCH_3} PARENT_SCOPE) -endfunction() - # Define library directory, where sources and header files are located SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib") INCLUDE_DIRECTORIES(BEFORE ${LIBRARY_DIR} "${LIBRARY_DIR}/common") -# Read file content -FILE(READ "${LIBRARY_DIR}/zstd.h" HEADER_CONTENT) - -# Parse version -GetLibraryVersion("${HEADER_CONTENT}" LIBVER_MAJOR LIBVER_MINOR LIBVER_RELEASE) -MESSAGE(STATUS "ZSTD VERSION ${LIBVER_MAJOR}.${LIBVER_MINOR}.${LIBVER_RELEASE}") - # cd contrib/zstd/lib # find . -name '*.c' -or -name '*.S' | grep -vP 'deprecated|legacy' | sort | sed 's/^\./ "${LIBRARY_DIR}/"' SET(Sources diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index eddae7b34e7..ef422632d3e 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -162,22 +162,59 @@ If you want to change the target table by using `ALTER`, we recommend disabling ## Configuration {#configuration} -Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (`kafka`) and topic-level (`kafka_*`). The global configuration is applied first, and then the topic-level configuration is applied (if it exists). +Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (below ``) and topic-level (below ``). The global configuration is applied first, and then the topic-level configuration is applied (if it exists). ``` xml - + + cgrp + smallest + + + + + logs + 250 + 100000 + + + + stats + 400 + 50000 + + + +``` + +
+ +Example in deprecated syntax + +``` xml + + cgrp smallest - + + + 250 100000 + + + 400 + 50000 + ``` +
+ + For a list of possible configuration options, see the [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Use the underscore (`_`) instead of a dot in the ClickHouse configuration. For example, `check.crcs=true` will be `true`. ### Kerberos support {#kafka-kerberos-support} diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 404c6c6f227..0867f3a0795 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -5,6 +5,10 @@ description: Install ClickHouse slug: /en/install --- +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; + # Install ClickHouse You have three options for getting up and running with ClickHouse: @@ -19,17 +23,27 @@ The quickest and easiest way to get up and running with ClickHouse is to create ## Self-Managed Install +:::tip +For production installs of a specific release version see the [installation options](#available-installation-options) down below. +::: + + + + 1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable: + ```bash curl https://clickhouse.com/ | sh ``` 1. Run the `install` command, which defines a collection of useful symlinks along with the files and folders used by ClickHouse - all of which you can see in the output of the install script: + ```bash sudo ./clickhouse install ``` 1. At the end of the install script, you are prompted for a password for the `default` user. Feel free to enter a password, or you can optionally leave it blank: + ```response Creating log directory /var/log/clickhouse-server. Creating data directory /var/lib/clickhouse. @@ -40,6 +54,7 @@ The quickest and easiest way to get up and running with ClickHouse is to create Enter password for default user: ``` You should see the following output: + ```response ClickHouse has been successfully installed. @@ -51,10 +66,45 @@ The quickest and easiest way to get up and running with ClickHouse is to create ``` 1. Run the following command to start the ClickHouse server: + ```bash + sudo clickhouse start + ``` + + + + +1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable: ```bash - sudo clickhouse start + curl https://clickhouse.com/ | sh ``` +1. Run the ClickHouse server: + + ```bash + ./clickhouse server + ``` + +1. Open a new terminal and use the **clickhouse-client** to connect to your service: + + ```bash + ./clickhouse client + ``` + + ```response + ./clickhouse client + ClickHouse client version 23.2.1.1501 (official build). + Connecting to localhost:9000 as user default. + Connected to ClickHouse server version 23.2.1 revision 54461. + + local-host :) + ``` + + You are ready to start sending DDL and SQL commands to ClickHouse! + + + + + :::tip The [Quick Start](/docs/en/quick-start.mdx/#step-1-get-clickhouse) walks through the steps to download and run ClickHouse, connect to it, and insert data. ::: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 71c712f73a4..f960d2df98e 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3948,3 +3948,71 @@ Default value: `0`. :::note Use this setting only for backward compatibility if your use cases depend on old syntax. ::: + +## final {#final} + +Automatically applies [FINAL](../../sql-reference/statements/select/from/#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from/#final-modifier) is applicable, including joined tables and tables in sub-queries, and +distributed tables. + +Possible values: + +- 0 - disabled +- 1 - enabled + +Default value: `0`. + +Example: + +```sql +CREATE TABLE test +( + key Int64, + some String +) +ENGINE = ReplacingMergeTree +ORDER BY key; + +INSERT INTO test FORMAT Values (1, 'first'); +INSERT INTO test FORMAT Values (1, 'second'); + +SELECT * FROM test; +┌─key─┬─some───┐ +│ 1 │ second │ +└─────┴────────┘ +┌─key─┬─some──┐ +│ 1 │ first │ +└─────┴───────┘ + +SELECT * FROM test SETTINGS final = 1; +┌─key─┬─some───┐ +│ 1 │ second │ +└─────┴────────┘ + +SET final = 1; +SELECT * FROM test; +┌─key─┬─some───┐ +│ 1 │ second │ +└─────┴────────┘ +``` + +## asterisk_include_materialized_columns {#asterisk_include_materialized_columns} + +Include [MATERIALIZED](../../sql-reference/statements/create/table/#materialized) columns for wildcard query (`SELECT *`). + +Possible values: + +- 0 - disabled +- 1 - enabled + +Default value: `0`. + +## asterisk_include_alias_columns {#asterisk_include_alias_columns} + +Include [ALIAS](../../sql-reference/statements/create/table/#alias) columns for wildcard query (`SELECT *`). + +Possible values: + +- 0 - disabled +- 1 - enabled + +Default value: `0`. diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index d1f0e44f6b4..50e15f70f5d 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/functions/string-replace-functions sidebar_position: 42 -sidebar_label: For Replacing in Strings +sidebar_label: Replacing in Strings --- # Functions for Searching and Replacing in Strings diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index e52eb00ab44..2f660d820d1 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/functions/string-search-functions sidebar_position: 41 -sidebar_label: For Searching in Strings +sidebar_label: Searching in Strings --- # Functions for Searching in Strings @@ -382,12 +382,12 @@ Checks whether string `haystack` matches the regular expression `pattern`. The p Returns 1 in case of a match, and 0 otherwise. Matching is based on UTF-8, e.g. `.` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. The regular expression must not contain null bytes. -If the haystack or pattern contain a sequence of bytes that are not valid UTF-8, the behavior is undefined. -No automatic Unicode normalization is performed, if you need it you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. +If the haystack or the pattern are not valid UTF-8, then the behavior is undefined. +No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. Unlike re2's default behavior, `.` matches line breaks. To disable this, prepend the pattern with `(?-s)`. -For patterns to search for substrings in a string, it is better to use LIKE or ‘position’, since they work much faster. +For patterns to search for substrings in a string, it is better to use functions [like](#like) or [position](#position) since they work much faster. ## multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) @@ -529,21 +529,25 @@ Result: ## like(haystack, pattern), haystack LIKE pattern operator -Checks whether a string matches a simple regular expression. -The regular expression can contain the metasymbols `%` and `_`. +Checks whether a string matches a LIKE expression. +A LIKE expression contains a mix of normal characters and the following metasymbols: -`%` indicates any quantity of any bytes (including zero characters). +- `%` indicates an arbitrary number of arbitrary characters (including zero characters). -`_` indicates any one byte. +- `_` indicates a single arbitrary character. -Use the backslash (`\`) for escaping metasymbols. See the note on escaping in the description of the ‘match’ function. +- `\` is for escaping literals `%`, `_` and `\`. Matching is based on UTF-8, e.g. `_` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. -If the haystack or pattern contain a sequence of bytes that are not valid UTF-8, then the behavior is undefined. -No automatic Unicode normalization is performed, if you need it you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. +If the haystack or the pattern are not valid UTF-8, then the behavior is undefined. +No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. -For regular expressions like `%needle%`, the code is more optimal and works as fast as the `position` function. -For other regular expressions, the code is the same as for the ‘match’ function. +To match against literals `%`, `_` and `/` (which are LIKE metacharacters), prepend them with a backslash, i.e. `\%`, `\_` and `\\`. +The backslash loses its special meaning, i.e. is interpreted literally, if it prepends a character different than `%`, `_` or `\`. +Note that ClickHouse requires backslashes in strings [to be quoted as well](../syntax.md#string), so you would actually need to write `\\%`, `\\_` and `\\\\`. + +For patterns of the form `%needle%`, the function is as fast as the `position` function. +Other LIKE expressions are internally converted to a regular expression and executed with a performance similar to function `match`. ## notLike(haystack, pattern), haystack NOT LIKE pattern operator diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 1905e53af3e..4a6780df292 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -1,8 +1,8 @@ --- slug: /en/sql-reference/functions/tuple-map-functions sidebar_position: 46 -sidebar_label: Working with maps -title: "Functions for maps" +sidebar_label: Maps +title: "Functions for Maps" --- ## map @@ -440,7 +440,7 @@ mapApply(func, map) **Parameters** -- `func` - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). - `map` — [Map](../../sql-reference/data-types/map.md). **Returned value** @@ -480,7 +480,7 @@ mapFilter(func, map) **Parameters** -- `func` - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). - `map` — [Map](../../sql-reference/data-types/map.md). **Returned value** diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 119f25d6d00..9e66afba613 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -127,6 +127,26 @@ Default expressions may be defined as an arbitrary expression from table constan Normal default value. If the INSERT query does not specify the corresponding column, it will be filled in by computing the corresponding expression. +Example: + +```sql +CREATE OR REPLACE TABLE test +( + id UInt64, + updated_at DateTime DEFAULT now(), + updated_at_date Date DEFAULT toDate(updated_at) +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO test (id) Values (1); + +SELECT * FROM test; +┌─id─┬──────────updated_at─┬─updated_at_date─┐ +│ 1 │ 2023-02-24 17:06:46 │ 2023-02-24 │ +└────┴─────────────────────┴─────────────────┘ +``` + ### MATERIALIZED `MATERIALIZED expr` @@ -135,6 +155,36 @@ Materialized expression. Such a column can’t be specified for INSERT, because For an INSERT without a list of columns, these columns are not considered. In addition, this column is not substituted when using an asterisk in a SELECT query. This is to preserve the invariant that the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns. +Example: + +```sql +CREATE OR REPLACE TABLE test +( + id UInt64, + updated_at DateTime MATERIALIZED now(), + updated_at_date Date MATERIALIZED toDate(updated_at) +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO test Values (1); + +SELECT * FROM test; +┌─id─┐ +│ 1 │ +└────┘ + +SELECT id, updated_at, updated_at_date FROM test; +┌─id─┬──────────updated_at─┬─updated_at_date─┐ +│ 1 │ 2023-02-24 17:08:08 │ 2023-02-24 │ +└────┴─────────────────────┴─────────────────┘ + +SELECT * FROM test SETTINGS asterisk_include_materialized_columns=1; +┌─id─┬──────────updated_at─┬─updated_at_date─┐ +│ 1 │ 2023-02-24 17:08:08 │ 2023-02-24 │ +└────┴─────────────────────┴─────────────────┘ +``` + ### EPHEMERAL `EPHEMERAL [expr]` @@ -142,6 +192,34 @@ In addition, this column is not substituted when using an asterisk in a SELECT q Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required. INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns. +Example: + +```sql +CREATE OR REPLACE TABLE test +( + id UInt64, + unhexed String EPHEMERAL, + hexed FixedString(4) DEFAULT unhex(unhexed) +) +ENGINE = MergeTree +ORDER BY id + +INSERT INTO test (id, unhexed) Values (1, '5a90b714'); + +SELECT + id, + hexed, + hex(hexed) +FROM test +FORMAT Vertical; + +Row 1: +────── +id: 1 +hexed: Z�� +hex(hexed): 5A90B714 +``` + ### ALIAS `ALIAS expr` @@ -156,6 +234,29 @@ If you add a new column to a table but later change its default expression, the It is not possible to set default values for elements in nested data structures. +```sql +CREATE OR REPLACE TABLE test +( + id UInt64, + size_bytes Int64, + size String Alias formatReadableSize(size_bytes) +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO test Values (1, 4678899); + +SELECT id, size_bytes, size FROM test; +┌─id─┬─size_bytes─┬─size─────┐ +│ 1 │ 4678899 │ 4.46 MiB │ +└────┴────────────┴──────────┘ + +SELECT * FROM test SETTINGS asterisk_include_alias_columns=1; +┌─id─┬─size_bytes─┬─size─────┐ +│ 1 │ 4678899 │ 4.46 MiB │ +└────┴────────────┴──────────┘ +``` + ## Primary Key You can define a [primary key](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries) when creating a table. Primary key can be specified in two ways: diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index b751384cb72..fb6c1f94902 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -36,6 +36,8 @@ Queries that use `FINAL` are executed slightly slower than similar queries that **In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). +`FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile. + ## Implementation Details If the `FROM` clause is omitted, data will be read from the `system.one` table. diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index bde37241ed0..63c5042f9e8 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -86,10 +86,10 @@ Examples: `1`, `10_000_000`, `0xffff_ffff`, `18446744073709551615`, `0xDEADBEEF` String literals must be enclosed in single quotes, double quotes are not supported. Escaping works either -- in SQL-style based on a preceding single quote where the single-quote character `'` (and only this character) can be escaped as `''`, or -- in C-style based on a preceding backslash with the following supported escape sequences: `\\`, `\'`, `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. The backslash loses its special meaning and will be interpreted literally if it precedes characters different than the listed ones. +- using a preceding single quote where the single-quote character `'` (and only this character) can be escaped as `''`, or +- using a preceding backslash with the following supported escape sequences: `\\`, `\'`, `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. The backslash loses its special meaning, i.e. will be interpreted literally, if it precedes characters different than the listed ones. -In string literals, you need to escape at least `'` and `\` using escape codes `''` (or: `\'`) and `\\`. +In string literals, you need to escape at least `'` and `\` using escape codes `\'` (or: `''`) and `\\`. ### Compound diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index a8186c20026..59d49830852 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -567,7 +567,7 @@ SELECT ts, value, round(avg(value) OVER (PARTITION BY metric ORDER BY toDate(ts) - Range BETWEEN 10 PRECEDING AND CURRENT ROW),2) moving_avg_10_days_temp + Range BETWEEN 10 PRECEDING AND CURRENT ROW),2) AS moving_avg_10_days_temp FROM sensors ORDER BY metric ASC, diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index f62520d3007..8e092bdf8e4 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -1,5 +1,8 @@ #include "LocalServer.h" +#include +#include +#include #include #include #include @@ -179,9 +182,9 @@ void LocalServer::tryInitPath() parent_folder = std::filesystem::temp_directory_path(); } - catch (const fs::filesystem_error& e) + catch (const fs::filesystem_error & e) { - // tmp folder don't exists? misconfiguration? chroot? + // The tmp folder doesn't exist? Is it a misconfiguration? Or chroot? LOG_DEBUG(log, "Can not get temporary folder: {}", e.what()); parent_folder = std::filesystem::current_path(); @@ -390,6 +393,21 @@ try std::cout << std::fixed << std::setprecision(3); std::cerr << std::fixed << std::setprecision(3); + /// Try to increase limit on number of open files. + { + rlimit rlim; + if (getrlimit(RLIMIT_NOFILE, &rlim)) + throw Poco::Exception("Cannot getrlimit"); + + if (rlim.rlim_cur < rlim.rlim_max) + { + rlim.rlim_cur = config().getUInt("max_open_files", static_cast(rlim.rlim_max)); + int rc = setrlimit(RLIMIT_NOFILE, &rlim); + if (rc != 0) + std::cerr << fmt::format("Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()) << '\n'; + } + } + #if defined(FUZZING_MODE) static bool first_time = true; if (first_time) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 3964aa5e565..8c60f840b89 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index a2b5ed9c493..eaffb04e2a9 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -63,6 +63,9 @@ static constexpr const char * getNameByTrait() template struct GroupArraySamplerData { + /// For easy serialization. + static_assert(std::has_unique_object_representations_v || std::is_floating_point_v); + // Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena using Allocator = MixedAlignedArenaAllocator; using Array = PODArray; @@ -97,6 +100,9 @@ struct GroupArrayNumericData; template struct GroupArrayNumericData { + /// For easy serialization. + static_assert(std::has_unique_object_representations_v || std::is_floating_point_v); + // Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena using Allocator = MixedAlignedArenaAllocator; using Array = PODArray; diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h index 5a2da8aa459..8d7010c10db 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h @@ -32,6 +32,9 @@ namespace ErrorCodes template struct MovingData { + /// For easy serialization. + static_assert(std::has_unique_object_representations_v || std::is_floating_point_v); + using Accumulator = T; /// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h index 9737e2e8257..563d56c6f40 100644 --- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h +++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h @@ -117,7 +117,21 @@ public: const auto & value = this->data(place).value; size_t size = value.size(); writeVarUInt(size, buf); - buf.write(reinterpret_cast(value.data()), size * sizeof(value[0])); + + /// In this version, pairs were serialized with padding. + /// We must ensure that padding bytes are zero-filled. + + static_assert(offsetof(typename MaxIntersectionsData::Value, first) == 0); + static_assert(offsetof(typename MaxIntersectionsData::Value, second) > 0); + + char zero_padding[offsetof(typename MaxIntersectionsData::Value, second) - sizeof(value[0].first)]{}; + + for (size_t i = 0; i < size; ++i) + { + writePODBinary(value[i].first, buf); + writePODBinary(zero_padding, buf); + writePODBinary(value[i].second, buf); + } } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override diff --git a/src/Client/ReplxxLineReader.cpp b/src/Client/ReplxxLineReader.cpp index 405cb7af03b..fa5ba8b8ba9 100644 --- a/src/Client/ReplxxLineReader.cpp +++ b/src/Client/ReplxxLineReader.cpp @@ -432,6 +432,7 @@ ReplxxLineReader::ReplxxLineReader( }; rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); +#endif /// Rebind regular incremental search to C-T. /// @@ -443,7 +444,6 @@ ReplxxLineReader::ReplxxLineReader( uint32_t reverse_search = Replxx::KEY::control('R'); return rx.invoke(Replxx::ACTION::HISTORY_INCREMENTAL_SEARCH, reverse_search); }); -#endif } ReplxxLineReader::~ReplxxLineReader() diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 96f76b70f31..3ed3ed73328 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -747,7 +747,7 @@ namespace */ template requires (std::is_same_v || std::is_same_v) - void replicateSSE42Int32(const IntType * __restrict data, IntType * __restrict result_data, const IColumn::Offsets & offsets) + void replicateSSE2Int32(const IntType * __restrict data, IntType * __restrict result_data, const IColumn::Offsets & offsets) { const IntType * data_copy_begin_ptr = nullptr; size_t offsets_size = offsets.size(); @@ -842,7 +842,7 @@ ColumnPtr ColumnVector::replicate(const IColumn::Offsets & offsets) const #ifdef __SSE2__ if constexpr (std::is_same_v) { - replicateSSE42Int32(getData().data(), res->getData().data(), offsets); + replicateSSE2Int32(getData().data(), res->getData().data(), offsets); return res; } #endif diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index 1278f9459b5..99073d79bcd 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -533,9 +533,16 @@ void AsynchronousMetrics::update(TimePoint update_time) AsynchronousMetricValues new_values; auto current_time = std::chrono::system_clock::now(); - auto time_after_previous_update [[maybe_unused]] = current_time - previous_update_time; + auto time_after_previous_update = current_time - previous_update_time; previous_update_time = update_time; + double update_interval = 0.; + if (first_run) + update_interval = update_period.count(); + else + update_interval = std::chrono::duration_cast(time_after_previous_update).count() / 1e6; + new_values["AsynchronousMetricsUpdateInterval"] = { update_interval, "Metrics update interval" }; + /// This is also a good indicator of system responsiveness. new_values["Jitter"] = { std::chrono::duration_cast(current_time - update_time).count() / 1e9, "The difference in time the thread for calculation of the asynchronous metrics was scheduled to wake up and the time it was in fact, woken up." diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 9f9a78c4036..46c171b5cb6 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -237,6 +237,7 @@ void ThreadStatus::setFatalErrorCallback(std::function callback) void ThreadStatus::onFatalError() { + std::lock_guard lock(thread_group->mutex); if (fatal_error_callback) fatal_error_callback(); } diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 4056e27ad52..60179fd5317 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -134,6 +134,8 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *) } +static std::atomic fatal_error_printed{false}; + /** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log. */ static void signalHandler(int sig, siginfo_t * info, void * context) @@ -159,7 +161,16 @@ static void signalHandler(int sig, siginfo_t * info, void * context) if (sig != SIGTSTP) /// This signal is used for debugging. { /// The time that is usually enough for separate thread to print info into log. - sleepForSeconds(20); /// FIXME: use some feedback from threads that process stacktrace + /// Under MSan full stack unwinding with DWARF info about inline functions takes 101 seconds in one case. + for (size_t i = 0; i < 300; ++i) + { + /// We will synchronize with the thread printing the messages with an atomic variable to finish earlier. + if (fatal_error_printed) + break; + + /// This coarse method of synchronization is perfectly ok for fatal signals. + sleepForSeconds(1); + } call_default_signal_handler(sig); } @@ -309,7 +320,9 @@ private: } if (auto logs_queue = thread_ptr->getInternalTextLogsQueue()) + { DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace); + } } std::string signal_description = "Unknown signal"; @@ -407,6 +420,8 @@ private: /// When everything is done, we will try to send these error messages to client. if (thread_ptr) thread_ptr->onFatalError(); + + fatal_error_printed = true; } }; diff --git a/src/DataTypes/registerDataTypeDateTime.cpp b/src/DataTypes/registerDataTypeDateTime.cpp index 5a3e10656b4..8080179ad47 100644 --- a/src/DataTypes/registerDataTypeDateTime.cpp +++ b/src/DataTypes/registerDataTypeDateTime.cpp @@ -64,7 +64,7 @@ static DataTypePtr create(const ASTPtr & arguments) return std::make_shared(); const auto scale = getArgument(arguments, 0, "scale", "DateTime"); - const auto timezone = getArgument(arguments, !!scale, "timezone", "DateTime"); + const auto timezone = getArgument(arguments, scale ? 1 : 0, "timezone", "DateTime"); if (!scale && !timezone) throw Exception::createDeprecated(getExceptionMessage(" has wrong type: ", 0, "scale", "DateTime", Field::Types::Which::UInt64), diff --git a/src/Functions/FunctionsHashing.h b/src/Functions/FunctionsHashing.h index 75505a173cd..e824546f220 100644 --- a/src/Functions/FunctionsHashing.h +++ b/src/Functions/FunctionsHashing.h @@ -1112,7 +1112,9 @@ private: { ToType h; if constexpr (std::endian::native == std::endian::little) + { h = apply(key, reinterpret_cast(&vec_from[i]), sizeof(vec_from[i])); + } else { char tmp_buffer[sizeof(vec_from[i])]; @@ -1131,7 +1133,9 @@ private: ToType h; if constexpr (std::endian::native == std::endian::little) + { h = apply(key, reinterpret_cast(&value), sizeof(value)); + } else { char tmp_buffer[sizeof(value)]; @@ -1271,7 +1275,7 @@ private: { /// NOTE: here, of course, you can do without the materialization of the column. ColumnPtr full_column = col_from_const->convertToFullColumn(); - executeArray(key, type, &*full_column, vec_to); + executeArray(key, type, full_column.get(), vec_to); } else throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", @@ -1283,6 +1287,10 @@ private: { WhichDataType which(from_type); + if (icolumn->size() != vec_to.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}", + icolumn->getName(), icolumn->size(), vec_to.size(), getName()); + if (which.isUInt8()) executeIntType(key, icolumn, vec_to); else if (which.isUInt16()) executeIntType(key, icolumn, vec_to); else if (which.isUInt32()) executeIntType(key, icolumn, vec_to); @@ -1343,10 +1351,9 @@ private: const auto & type_map = assert_cast(*type); executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first); } - else if (const auto * const_map = checkAndGetColumnConstData(column)) + else if (const auto * const_map = checkAndGetColumnConst(column)) { - const auto & type_map = assert_cast(*type); - executeForArgument(key, type_map.getNestedType().get(), const_map->getNestedColumnPtr().get(), vec_to, is_first); + executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first); } else { @@ -1382,8 +1389,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - size_t rows = input_rows_count; - auto col_to = ColumnVector::create(rows); + auto col_to = ColumnVector::create(input_rows_count); typename ColumnVector::Container & vec_to = col_to->getData(); @@ -1395,7 +1401,7 @@ public: if (arguments.size() <= first_data_argument) { /// Return a fixed random-looking magic number when input is empty - vec_to.assign(rows, static_cast(0xe28dbde7fe22e41c)); + vec_to.assign(input_rows_count, static_cast(0xe28dbde7fe22e41c)); } KeyType key{}; diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index 1c30dee0482..1f25794536b 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -26,6 +26,7 @@ # pragma GCC diagnostic pop #endif + namespace DB { @@ -66,12 +67,12 @@ ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes( if (!low_cardinality_type) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Incompatible type for low cardinality column: {}", + "Incompatible type for LowCardinality column: {}", column.type->getName()); if (can_be_executed_on_default_arguments) { - /// Normal case, when function can be executed on values's default. + /// Normal case, when function can be executed on values' default. column.column = low_cardinality_column->getDictionary().getNestedColumn(); indexes = low_cardinality_column->getIndexesPtr(); } @@ -280,6 +281,7 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType auto res = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, dictionary_type, new_input_rows_count, dry_run); bool res_is_constant = isColumnConst(*res); + auto keys = res_is_constant ? res->cloneResized(1)->convertToFullColumnIfConst() : res; diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index db8dd55474e..fe38ac49d62 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -23,9 +23,10 @@ namespace ErrorCodes namespace impl { -/// Is the [I]LIKE expression reduced to finding a substring in a string? +/// Is the [I]LIKE expression equivalent to a substring search? inline bool likePatternIsSubstring(std::string_view pattern, String & res) { + /// TODO: ignore multiple leading or trailing % if (pattern.size() < 2 || !pattern.starts_with('%') || !pattern.ends_with('%')) return false; @@ -45,9 +46,25 @@ inline bool likePatternIsSubstring(std::string_view pattern, String & res) case '\\': ++pos; if (pos == end) + /// pattern ends with \% --> trailing % is to be taken literally and pattern doesn't qualify for substring search return false; else - res += *pos; + { + switch (*pos) + { + /// Known LIKE escape sequences: + case '%': + case '_': + case '\\': + res += *pos; + break; + /// For all other escape sequences, the backslash loses its special meaning + default: + res += '\\'; + res += *pos; + break; + } + } break; default: res += *pos; diff --git a/src/Functions/URL/domain.h b/src/Functions/URL/domain.h index 64362edf2c3..91f83a457be 100644 --- a/src/Functions/URL/domain.h +++ b/src/Functions/URL/domain.h @@ -75,6 +75,7 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos Pos dot_pos = nullptr; Pos colon_pos = nullptr; + bool has_sub_delims = false; bool has_at_symbol = false; bool has_terminator_after_colon = false; const auto * start_of_host = pos; @@ -97,25 +98,35 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos case '@': /// myemail@gmail.com if (has_terminator_after_colon) return std::string_view{}; if (has_at_symbol) goto done; + has_sub_delims = false; has_at_symbol = true; start_of_host = pos + 1; break; + case ';': + case '=': + case '&': + case '~': + case '%': + /// Symbols above are sub-delims in RFC3986 and should be + /// allowed for userinfo (named identification here). + /// + /// NOTE: that those symbols is allowed for reg-name (host) + /// too, but right now host parsing looks more like in + /// RFC1034 (in other words domains that are allowed to be + /// registered). + has_sub_delims = true; + continue; case ' ': /// restricted symbols in whole URL case '\t': case '<': case '>': - case '%': case '{': case '}': case '|': case '\\': case '^': - case '~': case '[': case ']': - case ';': - case '=': - case '&': if (colon_pos == nullptr) return std::string_view{}; else @@ -124,6 +135,8 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos } done: + if (has_sub_delims) + return std::string_view{}; if (!has_at_symbol) pos = colon_pos ? colon_pos : pos; return checkAndReturnHost(pos, dot_pos, start_of_host); diff --git a/src/Functions/URL/netloc.cpp b/src/Functions/URL/netloc.cpp index 5c23f7cc06d..bc34e34a40d 100644 --- a/src/Functions/URL/netloc.cpp +++ b/src/Functions/URL/netloc.cpp @@ -7,6 +7,7 @@ namespace DB { +/// NOTE: Implementation is not RFC3986 compatible struct ExtractNetloc { /// We use the same as domain function @@ -67,6 +68,7 @@ struct ExtractNetloc /// Now pos points to the first byte after scheme (if there is). bool has_identification = false; + Pos hostname_end = end; Pos question_mark_pos = end; Pos slash_pos = end; Pos start_of_host = pos; @@ -90,25 +92,39 @@ struct ExtractNetloc return std::string_view(start_of_host, pos - start_of_host); case '@': /// foo:bar@example.ru has_identification = true; + hostname_end = end; break; + case ';': + case '=': + case '&': + case '~': + case '%': + /// Symbols above are sub-delims in RFC3986 and should be + /// allowed for userinfo (named identification here). + /// + /// NOTE: that those symbols is allowed for reg-name (host) + /// too, but right now host parsing looks more like in + /// RFC1034 (in other words domains that are allowed to be + /// registered). + if (!has_identification) + { + hostname_end = pos; + break; + } + [[fallthrough]]; case ' ': /// restricted symbols in whole URL case '\t': case '<': case '>': - case '%': case '{': case '}': case '|': case '\\': case '^': - case '~': case '[': case ']': - case ';': - case '=': - case '&': return pos > start_of_host - ? std::string_view(start_of_host, std::min(std::min(pos - 1, question_mark_pos), slash_pos) - start_of_host) + ? std::string_view(start_of_host, std::min(std::min(pos, question_mark_pos), slash_pos) - start_of_host) : std::string_view(); } } @@ -116,7 +132,7 @@ struct ExtractNetloc if (has_identification) return std::string_view(start_of_host, pos - start_of_host); else - return std::string_view(start_of_host, std::min(std::min(pos, question_mark_pos), slash_pos) - start_of_host); + return std::string_view(start_of_host, std::min(std::min(std::min(pos, question_mark_pos), slash_pos), hostname_end) - start_of_host); } static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size) diff --git a/src/Functions/checkHyperscanRegexp.cpp b/src/Functions/checkHyperscanRegexp.cpp index e6fbc3baa1a..441e35cc5db 100644 --- a/src/Functions/checkHyperscanRegexp.cpp +++ b/src/Functions/checkHyperscanRegexp.cpp @@ -48,12 +48,12 @@ bool SlowWithHyperscanChecker::isSlowOneRepeat(std::string_view regexp) re2_st::StringPiece haystack(regexp.data(), regexp.size()); re2_st::StringPiece matches[2]; size_t start_pos = 0; - while (start_pos < regexp.size()) + while (start_pos < haystack.size()) { - if (searcher_one_repeat.Match(haystack, start_pos, regexp.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 2)) + if (searcher_one_repeat.Match(haystack, start_pos, haystack.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 2)) { const auto & match = matches[0]; - start_pos += (matches[0].data() - haystack.data()) + match.length(); // fwd by prefix + match length + start_pos = (matches[0].data() - haystack.data()) + match.size(); // new start pos = prefix before match + match length const auto & submatch = matches[1]; if (isLargerThanFifty({submatch.data(), submatch.size()})) return true; @@ -70,12 +70,12 @@ bool SlowWithHyperscanChecker::isSlowTwoRepeats(std::string_view regexp) re2_st::StringPiece haystack(regexp.data(), regexp.size()); re2_st::StringPiece matches[3]; size_t start_pos = 0; - while (start_pos < regexp.size()) + while (start_pos < haystack.size()) { - if (searcher_two_repeats.Match(haystack, start_pos, regexp.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 3)) + if (searcher_two_repeats.Match(haystack, start_pos, haystack.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 3)) { const auto & match = matches[0]; - start_pos += (matches[0].data() - haystack.data()) + match.length(); // fwd by prefix + match length + start_pos = (matches[0].data() - haystack.data()) + match.size(); // new start pos = prefix before match + match length const auto & submatch1 = matches[1]; const auto & submatch2 = matches[2]; if (isLargerThanFifty({submatch1.data(), submatch1.size()}) diff --git a/src/Functions/likePatternToRegexp.h b/src/Functions/likePatternToRegexp.h index 298137d7bc1..6c0b6241ae2 100644 --- a/src/Functions/likePatternToRegexp.h +++ b/src/Functions/likePatternToRegexp.h @@ -21,7 +21,12 @@ inline String likePatternToRegexp(std::string_view pattern) const char * const end = pattern.begin() + pattern.size(); if (pos < end && *pos == '%') - ++pos; + /// Eat leading % + while (++pos < end) + { + if (*pos != '%') + break; + } else res = "^"; @@ -29,7 +34,18 @@ inline String likePatternToRegexp(std::string_view pattern) { switch (*pos) { - case '^': case '$': case '.': case '[': case '|': case '(': case ')': case '?': case '*': case '+': case '{': + /// Quote characters which have a special meaning in re2 + case '^': + case '$': + case '.': + case '[': + case '|': + case '(': + case ')': + case '?': + case '*': + case '+': + case '{': res += '\\'; res += *pos; break; @@ -44,22 +60,23 @@ inline String likePatternToRegexp(std::string_view pattern) break; case '\\': if (pos + 1 == end) - throw Exception(ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE, "Invalid escape sequence at the end of LIKE pattern"); - /// Known escape sequences. - if (pos[1] == '%' || pos[1] == '_') + throw Exception(ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE, "Invalid escape sequence at the end of LIKE pattern '{}'", pattern); + switch (pos[1]) { - res += pos[1]; - ++pos; - } - else if (pos[1] == '\\') - { - res += "\\\\"; - ++pos; - } - else - { - /// Unknown escape sequence treated literally: as backslash and the following character. - res += "\\\\"; + /// Interpret quoted LIKE metacharacters %, _ and \ as literals: + case '%': + case '_': + res += pos[1]; + ++pos; + break; + case '\\': + res += "\\\\"; /// backslash has a special meaning in re2 --> quote it + ++pos; + break; + /// Unknown escape sequence treated literally: as backslash (which must be quoted in re2) + the following character + default: + res += "\\\\"; + break; } break; default: diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 3160c5ddb43..7fd4d3aa6d5 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -332,7 +332,7 @@ public: } size_t col_key_size = sub_map_column->size(); - auto column = is_const? ColumnConst::create(std::move(sub_map_column), std::move(col_key_size)) : std::move(sub_map_column); + auto column = is_const ? ColumnConst::create(std::move(sub_map_column), std::move(col_key_size)) : std::move(sub_map_column); ColumnsWithTypeAndName new_arguments = { diff --git a/src/Functions/trap.cpp b/src/Functions/trap.cpp index 44deb901b0d..6260056ef31 100644 --- a/src/Functions/trap.cpp +++ b/src/Functions/trap.cpp @@ -27,6 +27,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int CANNOT_ALLOCATE_MEMORY; extern const int CANNOT_DLOPEN; + extern const int LOGICAL_ERROR; } @@ -174,6 +175,10 @@ public: if (!handle) throw Exception(ErrorCodes::CANNOT_DLOPEN, "Cannot dlopen: ({})", dlerror()); // NOLINT(concurrency-mt-unsafe) // MT-Safe on Linux, see man dlerror } + else if (mode == "logical error") + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: trap"); + } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown trap mode"); } diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 5bc72c5ff62..a3a3c318f5a 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -224,9 +224,9 @@ namespace detail enum class InitializeError { - RETRIABLE_ERROR, + RETRYABLE_ERROR, /// If error is not retriable, `exception` variable must be set. - NON_RETRIABLE_ERROR, + NON_RETRYABLE_ERROR, /// Allows to skip not found urls for globs SKIP_NOT_FOUND_URL, NONE, @@ -398,7 +398,7 @@ namespace detail } else if (!isRetriableError(http_status)) { - initialization_error = InitializeError::NON_RETRIABLE_ERROR; + initialization_error = InitializeError::NON_RETRYABLE_ERROR; exception = std::current_exception(); } else @@ -409,7 +409,7 @@ namespace detail } /** - * Throws if error is retriable, otherwise sets initialization_error = NON_RETRIABLE_ERROR and + * Throws if error is retryable, otherwise sets initialization_error = NON_RETRYABLE_ERROR and * saves exception into `exception` variable. In case url is not found and skip_not_found_url == true, * sets initialization_error = SKIP_NOT_FOUND_URL, otherwise throws. */ @@ -453,9 +453,9 @@ namespace detail /// Retry 200OK if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_OK) - initialization_error = InitializeError::RETRIABLE_ERROR; + initialization_error = InitializeError::RETRYABLE_ERROR; else - initialization_error = InitializeError::NON_RETRIABLE_ERROR; + initialization_error = InitializeError::NON_RETRYABLE_ERROR; return; } @@ -544,7 +544,7 @@ namespace detail { initialize(); - if (initialization_error == InitializeError::NON_RETRIABLE_ERROR) + if (initialization_error == InitializeError::NON_RETRYABLE_ERROR) { assert(exception); break; @@ -553,7 +553,7 @@ namespace detail { return false; } - else if (initialization_error == InitializeError::RETRIABLE_ERROR) + else if (initialization_error == InitializeError::RETRYABLE_ERROR) { LOG_ERROR( log, @@ -582,10 +582,13 @@ namespace detail } catch (const Poco::Exception & e) { - /** - * Retry request unconditionally if nothing has been read yet. - * Otherwise if it is GET method retry with range header. - */ + /// Too many open files - non-retryable. + if (e.code() == POCO_EMFILE) + throw; + + /** Retry request unconditionally if nothing has been read yet. + * Otherwise if it is GET method retry with range header. + */ bool can_retry_request = !offset_from_begin_pos || method == Poco::Net::HTTPRequest::HTTP_GET; if (!can_retry_request) throw; diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index c10c0d10bed..bfb010b6105 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1946,6 +1946,9 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( } auto conjunction = getConjunctionNodes(predicate, allowed_nodes); + if (conjunction.rejected.size() == 1 && WhichDataType{conjunction.rejected.front()->result_type}.isFloat()) + return nullptr; + auto actions = cloneActionsForConjunction(conjunction.allowed, all_inputs); if (!actions) return nullptr; @@ -2011,10 +2014,12 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( node.children.swap(new_children); *predicate = std::move(node); } - else + else if (!WhichDataType{new_children.front()->result_type}.isFloat()) { /// If type is different, cast column. /// This case is possible, cause AND can use any numeric type as argument. + /// But casting floats to UInt8 or Bool produces different results. + /// so we can't apply this optimization to them. Node node; node.type = ActionType::COLUMN; node.result_name = predicate->result_type->getName(); @@ -2036,8 +2041,20 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( else { /// Predicate is function AND, which still have more then one argument. + /// Or there is only one argument that is a float and we can't just + /// remove the AND. /// Just update children and rebuild it. predicate->children.swap(new_children); + if (WhichDataType{predicate->children.front()->result_type}.isFloat()) + { + Node node; + node.type = ActionType::COLUMN; + node.result_name = "1"; + node.column = DataTypeUInt8().createColumnConst(0, 1u); + node.result_type = std::make_shared(); + const auto * const_col = &nodes.emplace_back(std::move(node)); + predicate->children.emplace_back(const_col); + } auto arguments = prepareFunctionArguments(predicate->children); FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 2132c6d39f1..975e0da66ce 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -147,6 +147,7 @@ void DatabaseCatalog::initializeAndLoadTemporaryDatabase() unused_dir_hide_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_hide_timeout_sec", unused_dir_hide_timeout_sec); unused_dir_rm_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_rm_timeout_sec", unused_dir_rm_timeout_sec); unused_dir_cleanup_period_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_cleanup_period_sec", unused_dir_cleanup_period_sec); + drop_error_cooldown_sec = getContext()->getConfigRef().getInt64("database_catalog_drop_error_cooldown_sec", drop_error_cooldown_sec); auto db_for_temporary_and_external_tables = std::make_shared(TEMPORARY_DATABASE, getContext()); attachDatabase(TEMPORARY_DATABASE, db_for_temporary_and_external_tables); diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 11a855ebd79..4200373018d 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -279,7 +279,6 @@ private: bool maybeRemoveDirectory(const String & disk_name, const DiskPtr & disk, const String & unused_dir); static constexpr size_t reschedule_time_ms = 100; - static constexpr time_t drop_error_cooldown_sec = 5; mutable std::mutex databases_mutex; @@ -326,6 +325,9 @@ private: time_t unused_dir_rm_timeout_sec = default_unused_dir_rm_timeout_sec; static constexpr time_t default_unused_dir_cleanup_period_sec = 24 * 60 * 60; /// 1 day time_t unused_dir_cleanup_period_sec = default_unused_dir_cleanup_period_sec; + + static constexpr time_t default_drop_error_cooldown_sec = 5; + time_t drop_error_cooldown_sec = default_drop_error_cooldown_sec; }; /// This class is useful when creating a table or database. diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index 4db61501d3d..56b81b3d224 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -118,6 +118,15 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) alias_node->checkSize(data.settings.max_expanded_ast_elements); ast = alias_node->clone(); ast->setAlias(node_alias); + + /// If the cloned AST was finished, this one should also be considered finished + if (data.finished_asts.contains(alias_node)) + data.finished_asts[ast] = ast; + + /// If we had an alias for node_alias, point it instead to the new node so we don't have to revisit it + /// on subsequent calls + if (auto existing_alias = data.aliases.find(node_alias); existing_alias != data.aliases.end()) + existing_alias->second = ast; } } else @@ -127,6 +136,15 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) auto alias_name = ast->getAliasOrColumnName(); ast = alias_node->clone(); ast->setAlias(alias_name); + + /// If the cloned AST was finished, this one should also be considered finished + if (data.finished_asts.contains(alias_node)) + data.finished_asts[ast] = ast; + + /// If we had an alias for node_alias, point it instead to the new node so we don't have to revisit it + /// on subsequent calls + if (auto existing_alias = data.aliases.find(node_alias); existing_alias != data.aliases.end()) + existing_alias->second = ast; } } } diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index 0d2032bed78..e6e1a03f11c 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -362,11 +362,14 @@ void ServerAsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_tim const auto time_after_previous_update = current_time - heavy_metric_previous_update_time; const bool update_heavy_metric = time_after_previous_update >= heavy_metric_update_period || first_run; + Stopwatch watch; if (update_heavy_metric) { heavy_metric_previous_update_time = update_time; - - Stopwatch watch; + if (first_run) + heavy_update_interval = heavy_metric_update_period.count(); + else + heavy_update_interval = std::chrono::duration_cast(time_after_previous_update).count() / 1e6; /// Test shows that listing 100000 entries consuming around 0.15 sec. updateDetachedPartsStats(); @@ -390,7 +393,9 @@ void ServerAsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_tim watch.elapsedSeconds()); } + new_values["AsynchronousHeavyMetricsCalculationTimeSpent"] = { watch.elapsedSeconds(), "Time in seconds spent for calculation of asynchronous heavy (tables related) metrics (this is the overhead of asynchronous metrics)." }; + new_values["AsynchronousHeavyMetricsUpdateInterval"] = { heavy_update_interval, "Heavy (tables related) metrics update interval" }; new_values["NumberOfDetachedParts"] = { detached_parts_stats.count, "The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed." }; new_values["NumberOfDetachedByUserParts"] = { detached_parts_stats.detached_by_user, "The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed." }; diff --git a/src/Interpreters/ServerAsynchronousMetrics.h b/src/Interpreters/ServerAsynchronousMetrics.h index 81047e2fdf9..8243699a111 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.h +++ b/src/Interpreters/ServerAsynchronousMetrics.h @@ -21,6 +21,7 @@ private: const Duration heavy_metric_update_period; TimePoint heavy_metric_previous_update_time; + double heavy_update_interval = 0.; struct DetachedPartsStats { diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index c233060e646..435401796a0 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -1276,6 +1276,12 @@ void executeQuery( std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete, &istr); auto & pipeline = streams.pipeline; + QueryResultDetails result_details + { + .query_id = context->getClientInfo().current_query_id, + .timezone = DateLUT::instance().getTimeZone(), + }; + std::unique_ptr compressed_buffer; try { @@ -1334,9 +1340,8 @@ void executeQuery( out->onProgress(progress); }); - if (set_result_details) - set_result_details( - context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone()); + result_details.content_type = out->getContentType(); + result_details.format = format_name; pipeline.complete(std::move(out)); } @@ -1345,6 +1350,9 @@ void executeQuery( pipeline.setProgressCallback(context->getProgressCallback()); } + if (set_result_details) + set_result_details(result_details); + if (pipeline.initialized()) { CompletedPipelineExecutor executor(pipeline); diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index 9c561d8b88c..93152cc1de6 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -11,7 +11,15 @@ namespace DB class ReadBuffer; class WriteBuffer; -using SetResultDetailsFunc = std::function; +struct QueryResultDetails +{ + String query_id; + std::optional content_type; + std::optional format; + std::optional timezone; +}; + +using SetResultDetailsFunc = std::function; /// Parse and execute a query. void executeQuery( diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index fe8ebacec15..2936c2e9ef1 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -113,10 +113,8 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F if (group_by_with_cube) s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH CUBE" << (s.hilite ? hilite_none : ""); - if (group_by_with_grouping_sets) + if (group_by_with_grouping_sets && groupBy()) { - if (!groupBy()) /// sanity check, issue 43049 - throw Exception(ErrorCodes::LOGICAL_ERROR, "Corrupt AST"); auto nested_frame = frame; nested_frame.surround_each_list_element_with_parens = true; nested_frame.expression_list_prepend_whitespace = false; diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index bfee5ed8bf9..e0c79d50141 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -269,6 +269,9 @@ void FillingTransform::transform(Chunk & chunk) if (on_totals) return; + if (!chunk.hasRows() && !generate_suffix) + return; + Columns old_fill_columns; Columns old_interpolate_columns; Columns old_other_columns; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index bea2fe87e6d..d898049209e 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -831,12 +831,20 @@ void HTTPHandler::processQuery( customizeContext(request, context); executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context, - [&response, this] (const String & current_query_id, const String & content_type, const String & format, const String & timezone) + [&response, this] (const QueryResultDetails & details) { - response.setContentType(content_type_override.value_or(content_type)); - response.add("X-ClickHouse-Query-Id", current_query_id); - response.add("X-ClickHouse-Format", format); - response.add("X-ClickHouse-Timezone", timezone); + response.add("X-ClickHouse-Query-Id", details.query_id); + + if (content_type_override) + response.setContentType(*content_type_override); + else if (details.content_type) + response.setContentType(*details.content_type); + + if (details.format) + response.add("X-ClickHouse-Format", *details.format); + + if (details.timezone) + response.add("X-ClickHouse-Timezone", *details.timezone); } ); diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 3715d658730..d8ea359ce5f 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -352,11 +352,15 @@ void MySQLHandler::comQuery(ReadBuffer & payload) format_settings.mysql_wire.max_packet_size = max_packet_size; format_settings.mysql_wire.sequence_id = &sequence_id; - auto set_result_details = [&with_output](const String &, const String &, const String &format, const String &) + auto set_result_details = [&with_output](const QueryResultDetails & details) { - if (format != "MySQLWire") - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "MySQL protocol does not support custom output formats"); - with_output = true; + if (details.format) + { + if (*details.format != "MySQLWire") + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "MySQL protocol does not support custom output formats"); + + with_output = true; + } }; executeQuery(should_replace ? replacement : payload, *out, false, query_context, set_result_details, format_settings); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 870a34fa665..a307b472a64 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -611,6 +611,8 @@ void TCPHandler::runImpl() /// It is important to destroy query context here. We do not want it to live arbitrarily longer than the query. query_context.reset(); + CurrentThread::setFatalErrorCallback({}); + if (is_interserver_mode) { /// We don't really have session in interserver mode, new one is created for each query. It's better to reset it now. diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 50fb7dffa34..7b97273d8af 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -175,22 +175,69 @@ namespace const auto CLEANUP_TIMEOUT_MS = 3000; const auto MAX_THREAD_WORK_DURATION_MS = 60000; // once per minute leave do reschedule (we can't lock threads in pool forever) - /// Configuration prefix - const String CONFIG_PREFIX = "kafka"; + const String CONFIG_KAFKA_TAG = "kafka"; + const String CONFIG_KAFKA_TOPIC_TAG = "kafka_topic"; + const String CONFIG_NAME_TAG = "name"; - void loadFromConfig(cppkafka::Configuration & conf, const Poco::Util::AbstractConfiguration & config, const std::string & path) + /// Read server configuration into cppkafka configuration, used by global configuration and by legacy per-topic configuration + void loadFromConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) { - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(path, keys); + /// Read all tags one level below + Poco::Util::AbstractConfiguration::Keys tags; + config.keys(config_prefix, tags); - for (const auto & key : keys) + for (const auto & tag : tags) { - const String key_path = path + "." + key; - // log_level has valid underscore, rest librdkafka setting use dot.separated.format - // which is not acceptable for XML. - // See also https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md - const String key_name = (key == "log_level") ? key : boost::replace_all_copy(key, "_", "."); - conf.set(key_name, config.getString(key_path)); + if (tag.starts_with(CONFIG_KAFKA_TOPIC_TAG)) /// multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc. + continue; /// used by new per-topic configuration, ignore + + const String setting_path = config_prefix + "." + tag; + const String setting_value = config.getString(setting_path); + + /// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML. + /// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md + const String setting_name_in_kafka_config = (tag == "log_level") ? tag : boost::replace_all_copy(tag, "_", "."); + kafka_config.set(setting_name_in_kafka_config, setting_value); + } + } + + /// Read server configuration into cppkafa configuration, used by new per-topic configuration + void loadTopicConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const String & topic) + { + /// Read all tags one level below + Poco::Util::AbstractConfiguration::Keys tags; + config.keys(config_prefix, tags); + + for (const auto & tag : tags) + { + /// Only consider tag . Multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc. + if (!tag.starts_with(CONFIG_KAFKA_TOPIC_TAG)) + continue; + + /// Read topic name between ... + const String kafka_topic_path = config_prefix + "." + tag; + const String kafpa_topic_name_path = kafka_topic_path + "." + CONFIG_NAME_TAG; + + const String topic_name = config.getString(kafpa_topic_name_path); + if (topic_name == topic) + { + /// Found it! Now read the per-topic configuration into cppkafka. + Poco::Util::AbstractConfiguration::Keys inner_tags; + config.keys(kafka_topic_path, inner_tags); + for (const auto & inner_tag : inner_tags) + { + if (inner_tag == CONFIG_NAME_TAG) + continue; // ignore + + /// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML. + /// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md + const String setting_path = kafka_topic_path + "." + inner_tag; + const String setting_value = config.getString(setting_path); + + const String setting_name_in_kafka_config = (inner_tag == "log_level") ? inner_tag : boost::replace_all_copy(inner_tag, "_", "."); + kafka_config.set(setting_name_in_kafka_config, setting_value); + } + } } } } @@ -509,29 +556,33 @@ size_t StorageKafka::getPollTimeoutMillisecond() const String StorageKafka::getConfigPrefix() const { if (!collection_name.empty()) - return "named_collections." + collection_name + "." + CONFIG_PREFIX; /// Add one more level to separate librdkafka configuration. - return CONFIG_PREFIX; + return "named_collections." + collection_name + "." + CONFIG_KAFKA_TAG; /// Add one more level to separate librdkafka configuration. + return CONFIG_KAFKA_TAG; } -void StorageKafka::updateConfiguration(cppkafka::Configuration & conf) +void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config) { - // Update consumer configuration from the configuration + // Update consumer configuration from the configuration. Example: + // + // 250 + // 100000 + // const auto & config = getContext()->getConfigRef(); auto config_prefix = getConfigPrefix(); if (config.has(config_prefix)) - loadFromConfig(conf, config, config_prefix); + loadFromConfig(kafka_config, config, config_prefix); - #if USE_KRB5 - if (conf.has_property("sasl.kerberos.kinit.cmd")) +#if USE_KRB5 + if (kafka_config.has_property("sasl.kerberos.kinit.cmd")) LOG_WARNING(log, "sasl.kerberos.kinit.cmd configuration parameter is ignored."); - conf.set("sasl.kerberos.kinit.cmd",""); - conf.set("sasl.kerberos.min.time.before.relogin","0"); + kafka_config.set("sasl.kerberos.kinit.cmd",""); + kafka_config.set("sasl.kerberos.min.time.before.relogin","0"); - if (conf.has_property("sasl.kerberos.keytab") && conf.has_property("sasl.kerberos.principal")) + if (kafka_config.has_property("sasl.kerberos.keytab") && kafka_config.has_property("sasl.kerberos.principal")) { - String keytab = conf.get("sasl.kerberos.keytab"); - String principal = conf.get("sasl.kerberos.principal"); + String keytab = kafka_config.get("sasl.kerberos.keytab"); + String principal = kafka_config.get("sasl.kerberos.principal"); LOG_DEBUG(log, "Running KerberosInit"); try { @@ -543,21 +594,47 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & conf) } LOG_DEBUG(log, "Finished KerberosInit"); } - #else // USE_KRB5 - if (conf.has_property("sasl.kerberos.keytab") || conf.has_property("sasl.kerberos.principal")) - LOG_WARNING(log, "Kerberos-related parameters are ignored because ClickHouse was built without support of krb5 library."); - #endif // USE_KRB5 +#else // USE_KRB5 + if (kafka_config.has_property("sasl.kerberos.keytab") || kafka_config.has_property("sasl.kerberos.principal")) + LOG_WARNING(log, "Ignoring Kerberos-related parameters because ClickHouse was built without krb5 library support."); +#endif // USE_KRB5 - // Update consumer topic-specific configuration + // Update consumer topic-specific configuration (legacy syntax, retained for compatibility). Example with topic "football": + // + // 250 + // 100000 + // + // The legacy syntax has the problem that periods in topic names (e.g. "sports.football") are not supported because the Poco + // configuration framework hierarchy is based on periods as level separators. Besides that, per-topic tags at the same level + // as are ugly. for (const auto & topic : topics) { const auto topic_config_key = config_prefix + "_" + topic; if (config.has(topic_config_key)) - loadFromConfig(conf, config, topic_config_key); + loadFromConfig(kafka_config, config, topic_config_key); } + // Update consumer topic-specific configuration (new syntax). Example with topics "football" and "baseball": + // + // + // football + // 250 + // 5000 + // + // + // baseball + // 300 + // 2000 + // + // + // Advantages: The period restriction no longer applies (e.g. sports.football will work), everything + // Kafka-related is below . + for (const auto & topic : topics) + if (config.has(config_prefix)) + loadTopicConfig(kafka_config, config, config_prefix, topic); + // No need to add any prefix, messages can be distinguished - conf.set_log_callback([this](cppkafka::KafkaHandleBase &, int level, const std::string & facility, const std::string & message) + kafka_config.set_log_callback([this](cppkafka::KafkaHandleBase &, int level, const std::string & facility, const std::string & message) { auto [poco_level, client_logs_level] = parseSyslogLevel(level); LOG_IMPL(log, client_logs_level, poco_level, "[rdk:{}] {}", facility, message); @@ -573,13 +650,13 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & conf) int status; - status = rd_kafka_conf_interceptor_add_on_new(conf.get_handle(), + status = rd_kafka_conf_interceptor_add_on_new(kafka_config.get_handle(), "init", StorageKafkaInterceptors::rdKafkaOnNew, self); if (status != RD_KAFKA_RESP_ERR_NO_ERROR) LOG_ERROR(log, "Cannot set new interceptor due to {} error", status); // cppkafka always copy the configuration - status = rd_kafka_conf_interceptor_add_on_conf_dup(conf.get_handle(), + status = rd_kafka_conf_interceptor_add_on_conf_dup(kafka_config.get_handle(), "init", StorageKafkaInterceptors::rdKafkaOnConfDup, self); if (status != RD_KAFKA_RESP_ERR_NO_ERROR) LOG_ERROR(log, "Cannot set dup conf interceptor due to {} error", status); diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 890eb5a82e6..3559129cf74 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -126,7 +126,7 @@ private: std::atomic shutdown_called = false; // Update Kafka configuration with values from CH user configuration. - void updateConfiguration(cppkafka::Configuration & conf); + void updateConfiguration(cppkafka::Configuration & kafka_config); String getConfigPrefix() const; void threadFunc(size_t idx); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 06b4cb07f1f..85420cabb8d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -880,7 +880,7 @@ void IMergeTreeDataPart::writeMetadata(const String & filename, const WriteSetti } catch (...) { - tryLogCurrentException("DataPartStorageOnDiskFull"); + tryLogCurrentException("IMergeTreeDataPart"); } throw; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0d4e54453d7..e36ab125f97 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1893,11 +1893,11 @@ void MergeTreeData::stopOutdatedDataPartsLoadingTask() /// (Only files on the first level of nesting are considered). static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_path, time_t threshold) { - if (!disk->isDirectory(directory_path) || disk->getLastModified(directory_path).epochTime() >= threshold) + if (!disk->isDirectory(directory_path) || disk->getLastModified(directory_path).epochTime() > threshold) return false; for (auto it = disk->iterateDirectory(directory_path); it->isValid(); it->next()) - if (disk->getLastModified(it->path()).epochTime() >= threshold) + if (disk->getLastModified(it->path()).epochTime() > threshold) return false; return true; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index b1ac2e2ba0f..0ca29e2826a 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -44,6 +44,27 @@ #include +namespace +{ + +using namespace DB; +bool columnIsPhysical(ColumnDefaultKind kind) +{ + return kind == ColumnDefaultKind::Default || kind == ColumnDefaultKind::Materialized; +} +bool columnDefaultKindHasSameType(ColumnDefaultKind lhs, ColumnDefaultKind rhs) +{ + if (lhs == rhs) + return true; + + if (columnIsPhysical(lhs) == columnIsPhysical(rhs)) + return true; + + return false; +} + +} + namespace DB { @@ -172,11 +193,13 @@ std::optional StorageMerge::supportedPrewhereColumns() const NameSet supported_columns; - std::unordered_map>> column_type_default; + std::unordered_map> column_info; for (const auto & name_type : columns.getAll()) { - column_type_default.emplace(name_type.name, std::make_pair( - name_type.type.get(), columns.getDefault(name_type.name))); + const auto & column_default = columns.getDefault(name_type.name).value_or(ColumnDefault{}); + column_info.emplace(name_type.name, std::make_pair( + name_type.type.get(), + column_default.kind)); supported_columns.emplace(name_type.name); } @@ -191,11 +214,10 @@ std::optional StorageMerge::supportedPrewhereColumns() const const auto & table_columns = table_metadata_ptr->getColumns(); for (const auto & column : table_columns.getAll()) { - const auto & root_type_default = column_type_default[column.name]; - const IDataType * root_type = root_type_default.first; - const std::optional & src_default = root_type_default.second; + const auto & column_default = table_columns.getDefault(column.name).value_or(ColumnDefault{}); + const auto & [root_type, src_default_kind] = column_info[column.name]; if ((root_type && !root_type->equals(*column.type)) || - src_default != table_columns.getDefault(column.name)) + !columnDefaultKindHasSameType(src_default_kind, column_default.kind)) { supported_columns.erase(column.name); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 0de44e62899..429339386b4 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -905,17 +905,16 @@ void StorageReplicatedMergeTree::drop() /// in this case, has_metadata_in_zookeeper = false, and we also permit to drop the table. bool maybe_has_metadata_in_zookeeper = !has_metadata_in_zookeeper.has_value() || *has_metadata_in_zookeeper; + zkutil::ZooKeeperPtr zookeeper; if (maybe_has_metadata_in_zookeeper) { /// Table can be shut down, restarting thread is not active /// and calling StorageReplicatedMergeTree::getZooKeeper()/getAuxiliaryZooKeeper() won't suffice. - zkutil::ZooKeeperPtr zookeeper = getZooKeeperIfTableShutDown(); + zookeeper = getZooKeeperIfTableShutDown(); /// If probably there is metadata in ZooKeeper, we don't allow to drop the table. if (!zookeeper) throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Can't drop readonly replicated table (need to drop data in ZooKeeper as well)"); - - dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings()); } /// Wait for loading of all outdated parts because @@ -929,10 +928,17 @@ void StorageReplicatedMergeTree::drop() } dropAllData(); + + if (maybe_has_metadata_in_zookeeper) + { + /// Session could expire, get it again + zookeeper = getZooKeeperIfTableShutDown(); + dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings(), &has_metadata_in_zookeeper); + } } void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, - Poco::Logger * logger, MergeTreeSettingsPtr table_settings) + Poco::Logger * logger, MergeTreeSettingsPtr table_settings, std::optional * has_metadata_out) { if (zookeeper->expired()) throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Table was not dropped because ZooKeeper session has expired."); @@ -990,12 +996,16 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con Coordination::errorMessage(code), remote_replica_path); /// And finally remove everything else recursively - zookeeper->tryRemoveRecursive(remote_replica_path); - } + /// It may left some garbage if replica_path subtree is concurrently modified + zookeeper->tryRemoveChildrenRecursive(remote_replica_path); - /// It may left some garbage if replica_path subtree are concurrently modified - if (zookeeper->exists(remote_replica_path)) - LOG_ERROR(logger, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", remote_replica_path); + /// Update has_metadata_in_zookeeper to avoid retries. Otherwise we can accidentally remove metadata of a new table on retries + if (has_metadata_out) + *has_metadata_out = false; + + if (zookeeper->tryRemove(remote_replica_path) != Coordination::Error::ZOK) + LOG_ERROR(logger, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", remote_replica_path); + } /// Check that `zookeeper_path` exists: it could have been deleted by another replica after execution of previous line. Strings replicas; @@ -8183,6 +8193,12 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co auto shared_id = getTableSharedID(); if (shared_id == toString(UUIDHelpers::Nil)) { + if (zookeeper->exists(zookeeper_path)) + { + LOG_WARNING(log, "Not removing shared data for part {} because replica does not have metadata in ZooKeeper, " + "but table path exist and other replicas may exist. It may leave some garbage on S3", part.name); + return std::make_pair(false, NameSet{}); + } LOG_TRACE(log, "Part {} blobs can be removed, because table {} completely dropped", part.name, getStorageID().getNameForLogs()); return std::make_pair(true, NameSet{}); } @@ -8208,9 +8224,18 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co return std::make_pair(true, NameSet{}); } - /// If table was completely dropped (no meta in zookeeper) we can safely remove parts if (has_metadata_in_zookeeper.has_value() && !has_metadata_in_zookeeper) + { + if (zookeeper->exists(zookeeper_path)) + { + LOG_WARNING(log, "Not removing shared data for part {} because replica does not have metadata in ZooKeeper, " + "but table path exist and other replicas may exist. It may leave some garbage on S3", part.name); + return std::make_pair(false, NameSet{}); + } + + /// If table was completely dropped (no meta in zookeeper) we can safely remove parts return std::make_pair(true, NameSet{}); + } /// We remove parts during table shutdown. If exception happen, restarting thread will be already turned /// off and nobody will reconnect our zookeeper connection. In this case we use zookeeper connection from diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 5a8f06d0a63..46c78e9064a 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -229,7 +229,7 @@ public: /** Remove a specific replica from zookeeper. */ static void dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica, - Poco::Logger * logger, MergeTreeSettingsPtr table_settings = nullptr); + Poco::Logger * logger, MergeTreeSettingsPtr table_settings = nullptr, std::optional * has_metadata_out = nullptr); /// Removes table from ZooKeeper after the last replica was dropped static bool removeTableNodesFromZooKeeper(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 932964ddcd6..3c2fa05016f 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -9,7 +9,7 @@ from github import Github from build_download_helper import get_build_name_for_check, read_build_urls from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import post_commit_status +from commit_status_helper import format_description, post_commit_status from docker_pull_helper import get_image_with_version from env_helper import ( GITHUB_REPOSITORY, @@ -145,11 +145,13 @@ if __name__ == "__main__": with open( os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8" ) as desc_f: - description = desc_f.readline().rstrip("\n")[:140] + description = desc_f.readline().rstrip("\n") except: status = "failure" description = "Task failed: $?=" + str(retcode) + description = format_description(description) + test_result = TestResult(description, "OK") if "fail" in status: test_result.status = "FAIL" diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index f37b2656be3..ecc36b1c4e3 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -9,6 +9,10 @@ import time from shutil import rmtree from typing import List, Tuple +from ccache_utils import get_ccache_if_not_exists, upload_ccache +from ci_config import CI_CONFIG, BuildConfig +from commit_status_helper import get_commit_filtered_statuses, get_commit +from docker_pull_helper import get_image_with_version from env_helper import ( CACHES_PATH, GITHUB_JOB, @@ -18,18 +22,17 @@ from env_helper import ( S3_DOWNLOAD, TEMP_PATH, ) -from s3_helper import S3Helper +from get_robot_token import get_best_robot_token +from github_helper import GitHub from pr_info import PRInfo +from s3_helper import S3Helper +from tee_popen import TeePopen from version_helper import ( ClickHouseVersion, Git, get_version_from_repo, update_version_local, ) -from ccache_utils import get_ccache_if_not_exists, upload_ccache -from ci_config import CI_CONFIG, BuildConfig -from docker_pull_helper import get_image_with_version -from tee_popen import TeePopen IMAGE_NAME = "clickhouse/binary-builder" BUILD_LOG_NAME = "build_log.log" @@ -122,8 +125,7 @@ def check_for_success_run( logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix, "") logging.info("Checking for artifacts in %s", logged_prefix) try: - # TODO: theoretically, it would miss performance artifact for pr==0, - # but luckily we rerun only really failed tasks now, so we're safe + # Performance artifacts are now part of regular build, so we're safe build_results = s3_helper.list_prefix(s3_prefix) except Exception as ex: logging.info("Got exception while listing %s: %s\nRerun", logged_prefix, ex) @@ -231,6 +233,29 @@ def upload_master_static_binaries( print(f"::notice ::Binary static URL: {url}") +def mark_failed_reports_pending(build_name: str, sha: str) -> None: + try: + gh = GitHub(get_best_robot_token()) + commit = get_commit(gh, sha) + statuses = get_commit_filtered_statuses(commit) + report_status = [ + name + for name, builds in CI_CONFIG["builds_report_config"].items() + if build_name in builds + ][0] + for status in statuses: + if status.context == report_status and status.state in ["failure", "error"]: + logging.info( + "Commit already have failed status for '%s', setting it to 'pending'", + report_status, + ) + commit.create_status( + "pending", status.url, "Set to pending on rerun", report_status + ) + except: # we do not care about any exception here + logging.info("Failed to get or mark the reports status as pending, continue") + + def main(): logging.basicConfig(level=logging.INFO) @@ -260,6 +285,9 @@ def main(): # put them as github actions artifact (result) check_for_success_run(s3_helper, s3_path_prefix, build_name, build_config) + # If it's a latter running, we need to mark possible failed status + mark_failed_reports_pending(build_name, pr_info.sha) + docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME) image_version = docker_image.version diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index 147b49d910e..7987ea2643f 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -508,7 +508,7 @@ def main(): logging.getLogger("git_helper").setLevel(logging.DEBUG) token = args.token or get_best_robot_token() - gh = GitHub(token, create_cache_dir=False, per_page=100) + gh = GitHub(token, create_cache_dir=False) bp = Backport(gh, args.repo, args.dry_run) # https://github.com/python/mypy/issues/3004 bp.gh.cache_path = f"{TEMP_PATH}/gh_cache" # type: ignore diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index 785250c3904..9b2b9e4bcd9 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -3,18 +3,20 @@ import csv import os import time -from typing import List +from typing import List, Literal import logging -from ci_config import CI_CONFIG, REQUIRED_CHECKS -from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL from github import Github from github.Commit import Commit from github.CommitStatus import CommitStatus + +from ci_config import CI_CONFIG, REQUIRED_CHECKS +from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL from pr_info import PRInfo, SKIP_MERGEABLE_CHECK_LABEL RETRY = 5 CommitStatuses = List[CommitStatus] +MERGEABLE_NAME = "Mergeable Check" def override_status(status: str, check_name: str, invert: bool = False) -> str: @@ -102,59 +104,69 @@ def post_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]) -> None: pull_request.add_to_labels(label) -def fail_mergeable_check(commit: Commit, description: str) -> None: - commit.create_status( - context="Mergeable Check", - description=description, - state="failure", - target_url=GITHUB_RUN_URL, - ) +def format_description(description: str) -> str: + if len(description) > 140: + description = description[:137] + "..." + return description -def reset_mergeable_check(commit: Commit, description: str = "") -> None: +def set_mergeable_check( + commit: Commit, + description: str = "", + state: Literal["success", "failure"] = "success", +) -> None: commit.create_status( - context="Mergeable Check", + context=MERGEABLE_NAME, description=description, - state="success", + state=state, target_url=GITHUB_RUN_URL, ) def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None: - if SKIP_MERGEABLE_CHECK_LABEL in pr_info.labels: + not_run = ( + pr_info.labels.intersection({SKIP_MERGEABLE_CHECK_LABEL, "release"}) + or check_name not in REQUIRED_CHECKS + or pr_info.release_pr + or pr_info.number == 0 + ) + if not_run: + # Let's avoid unnecessary work return logging.info("Update Mergeable Check by %s", check_name) commit = get_commit(gh, pr_info.sha) - checks = { - check.context: check.state - for check in filter( - lambda check: (check.context in REQUIRED_CHECKS), - # get_statuses() returns generator, which cannot be reversed - we need comprehension - # pylint: disable=unnecessary-comprehension - reversed([status for status in commit.get_statuses()]), - ) - } + statuses = get_commit_filtered_statuses(commit) + + required_checks = [ + status for status in statuses if status.context in REQUIRED_CHECKS + ] + + mergeable_status = None + for status in statuses: + if status.context == MERGEABLE_NAME: + mergeable_status = status + break success = [] fail = [] - for name, state in checks.items(): - if state == "success": - success.append(name) + for status in required_checks: + if status.state == "success": + success.append(status.context) else: - fail.append(name) + fail.append(status.context) if fail: description = "failed: " + ", ".join(fail) if success: description += "; succeeded: " + ", ".join(success) - if len(description) > 140: - description = description[:137] + "..." - fail_mergeable_check(commit, description) + description = format_description(description) + if mergeable_status is None or mergeable_status.description != description: + set_mergeable_check(commit, description, "failure") return description = ", ".join(success) - if len(description) > 140: - description = description[:137] + "..." - reset_mergeable_check(commit, description) + description = format_description(description) + if mergeable_status is None or mergeable_status.description != description: + set_mergeable_check(commit, description) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index d5d1b1a1085..192d216614e 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -14,7 +14,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union from github import Github from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import post_commit_status +from commit_status_helper import format_description, post_commit_status from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo @@ -456,8 +456,7 @@ def main(): else: description = "Nothing to update" - if len(description) >= 140: - description = description[:136] + "..." + description = format_description(description) with open(changed_json, "w", encoding="utf-8") as images_file: json.dump(result_images, images_file) diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index 9a77a91647e..0484ea8f641 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -10,7 +10,7 @@ from typing import List, Dict, Tuple from github import Github from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import post_commit_status +from commit_status_helper import format_description, post_commit_status from env_helper import RUNNER_TEMP from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo @@ -218,8 +218,7 @@ def main(): else: description = "Nothing to update" - if len(description) >= 140: - description = description[:136] + "..." + description = format_description(description) gh = Github(get_best_robot_token(), per_page=100) post_commit_status(gh, pr_info.sha, NAME, description, status, url) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index fbe934367b4..c6854c5aa78 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -15,7 +15,7 @@ from github import Github from build_check import get_release_or_pr from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import post_commit_status +from commit_status_helper import format_description, post_commit_status from docker_images_check import DockerImage from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET, S3_DOWNLOAD from get_robot_token import get_best_robot_token, get_parameter_from_ssm @@ -369,8 +369,7 @@ def main(): description = f"Processed tags: {', '.join(tags)}" - if len(description) >= 140: - description = description[:136] + "..." + description = format_description(description) gh = Github(get_best_robot_token(), per_page=100) post_commit_status(gh, pr_info.sha, NAME, description, status, url) diff --git a/tests/ci/github_helper.py b/tests/ci/github_helper.py index 1b8861b92a6..eae0bc1c33b 100644 --- a/tests/ci/github_helper.py +++ b/tests/ci/github_helper.py @@ -35,6 +35,8 @@ class GitHub(github.Github): self._cache_path = Path(CACHE_PATH) if create_cache_dir: self.cache_path = self.cache_path + if not kwargs.get("per_page"): + kwargs["per_page"] = 100 # And set Path super().__init__(*args, **kwargs) self._retries = 0 diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py index 48d0da195ce..54245670b26 100644 --- a/tests/ci/install_check.py +++ b/tests/ci/install_check.py @@ -18,7 +18,11 @@ from clickhouse_helper import ( mark_flaky_tests, prepare_tests_results_for_clickhouse, ) -from commit_status_helper import post_commit_status, update_mergeable_check +from commit_status_helper import ( + format_description, + post_commit_status, + update_mergeable_check, +) from compress_files import compress_fast from docker_pull_helper import get_image_with_version, DockerImage from env_helper import CI, TEMP_PATH as TEMP, REPORTS_PATH @@ -341,8 +345,7 @@ def main(): ch_helper = ClickHouseHelper() mark_flaky_tests(ch_helper, args.check_name, test_results) - if len(description) >= 140: - description = description[:136] + "..." + description = format_description(description) post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url) diff --git a/tests/ci/mark_release_ready.py b/tests/ci/mark_release_ready.py index 0ef134b9280..b103dd053bb 100755 --- a/tests/ci/mark_release_ready.py +++ b/tests/ci/mark_release_ready.py @@ -43,7 +43,7 @@ def main(): description = "the release can be created from the commit" args.token = args.token or get_best_robot_token() - gh = GitHub(args.token, create_cache_dir=False, per_page=100) + gh = GitHub(args.token, create_cache_dir=False) # Get the rate limits for a quick fail gh.get_rate_limit() commit = get_commit(gh, args.commit) diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 4a21bfcdd70..fedac48f24d 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -217,7 +217,7 @@ def main(): args = parse_args() logging.info("Going to process PR #%s in repo %s", args.pr, args.repo) token = args.token or get_best_robot_token() - gh = GitHub(token, per_page=100) + gh = GitHub(token) repo = gh.get_repo(args.repo) # An ugly and not nice fix to patch the wrong organization URL, # see https://github.com/PyGithub/PyGithub/issues/2395#issuecomment-1378629710 diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 7119f443719..c6810173f7a 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -7,10 +7,11 @@ from typing import Tuple from github import Github from commit_status_helper import ( + format_description, get_commit, post_labels, remove_labels, - reset_mergeable_check, + set_mergeable_check, ) from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL from get_robot_token import get_best_robot_token @@ -157,7 +158,7 @@ def check_pr_description(pr_info: PRInfo) -> Tuple[str, str]: + second_category + "'" ) - return result_status[:140], category + return result_status, category elif re.match( r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i] @@ -199,6 +200,7 @@ if __name__ == "__main__": pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True) can_run, description, labels_state = should_run_checks_for_pr(pr_info) + description = format_description(description) gh = Github(get_best_robot_token(), per_page=100) commit = get_commit(gh, pr_info.sha) @@ -231,7 +233,7 @@ if __name__ == "__main__": if pr_labels_to_remove: remove_labels(gh, pr_info, pr_labels_to_remove) - reset_mergeable_check(commit, "skipped") + set_mergeable_check(commit, "skipped") if description_error: print( @@ -249,7 +251,7 @@ if __name__ == "__main__": ) commit.create_status( context=NAME, - description=description_error[:139], + description=format_description(description_error), state="failure", target_url=url, ) diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index 73802740975..1a6c4d14616 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -10,7 +10,7 @@ from github import Github from build_download_helper import get_build_name_for_check, read_build_urls from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from commit_status_helper import post_commit_status +from commit_status_helper import format_description, post_commit_status from docker_pull_helper import get_image_with_version from env_helper import ( GITHUB_REPOSITORY, @@ -171,11 +171,13 @@ def main(): with open( os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8" ) as desc_f: - description = desc_f.readline().rstrip("\n")[:140] + description = desc_f.readline().rstrip("\n") except: # status = "failure" description = "Task failed: $?=" + str(retcode) + description = format_description(description) + report_url = upload_results( s3_helper, pr_info.number, diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 5a46c4cb2bd..12c40ea1f66 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -30,13 +30,15 @@ def get_options(i, upgrade_check): if i % 2 == 1: join_alg_num = i // 2 - if join_alg_num % 4 == 0: + if join_alg_num % 5 == 0: client_options.append("join_algorithm='parallel_hash'") - if join_alg_num % 4 == 1: + if join_alg_num % 5 == 1: client_options.append("join_algorithm='partial_merge'") - if join_alg_num % 4 == 2: + if join_alg_num % 5 == 2: client_options.append("join_algorithm='full_sorting_merge'") - if join_alg_num % 4 == 3: + if join_alg_num % 5 == 3: + client_options.append("join_algorithm='grace_hash'") + if join_alg_num % 5 == 4: client_options.append("join_algorithm='auto'") client_options.append("max_rows_in_join=1000") @@ -209,7 +211,7 @@ def prepare_for_hung_check(drop_databases): # Even if all clickhouse-test processes are finished, there are probably some sh scripts, # which still run some new queries. Let's ignore them. try: - query = """clickhouse client -q "SELECT count() FROM system.processes where where elapsed > 300" """ + query = """clickhouse client -q "SELECT count() FROM system.processes where elapsed > 300" """ output = ( check_output(query, shell=True, stderr=STDOUT, timeout=30) .decode("utf-8") diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 9350785b33b..89878990c2c 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -148,7 +148,7 @@ def main(): if args.push: checkout_head(pr_info) - gh = GitHub(get_best_robot_token(), per_page=100, create_cache_dir=False) + gh = GitHub(get_best_robot_token(), create_cache_dir=False) atexit.register(update_mergeable_check, gh, pr_info, NAME) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 8f2c2811260..51ec789c5f7 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -3264,7 +3264,7 @@ class ClickHouseInstance: sleep_time=0.5, check_callback=lambda x: True, ): - logging.debug(f"Executing query {sql} on {self.name}") + # logging.debug(f"Executing query {sql} on {self.name}") result = None for i in range(retry_count): try: @@ -3283,7 +3283,7 @@ class ClickHouseInstance: return result time.sleep(sleep_time) except Exception as ex: - logging.debug("Retry {} got exception {}".format(i + 1, ex)) + # logging.debug("Retry {} got exception {}".format(i + 1, ex)) time.sleep(sleep_time) if result is not None: diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index a903821071b..3f67fe8e5f7 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -648,24 +648,15 @@ def test_async_backups_to_same_destination(interface): "", ) - ids_succeeded = ( - instance.query( - f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_CREATED'" - ) - .rstrip("\n") - .split("\n") - ) + ids_succeeded = instance.query( + f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_CREATED'" + ).splitlines() - ids_failed = ( - instance.query( - f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_FAILED'" - ) - .rstrip("\n") - .split("\n") - ) + ids_failed = instance.query( + f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_FAILED'" + ).splitlines() assert len(ids_succeeded) == 1 - assert len(ids_failed) <= 1 assert set(ids_succeeded + ids_failed) == set(ids) # Check that the first backup is all right. diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index 43e7682ec1d..43c8adda65a 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -82,8 +82,12 @@ def drop_after_test(): try: yield finally: - node0.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY") - node0.query("DROP DATABASE IF EXISTS mydb ON CLUSTER 'cluster' NO DELAY") + node0.query( + "DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY", + settings={ + "distributed_ddl_task_timeout": 360, + }, + ) backup_id_counter = 0 @@ -138,7 +142,12 @@ def test_concurrent_backups_on_same_node(): # This restore part is added to confirm creating an internal backup & restore work # even when a concurrent backup is stopped - nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY") + nodes[0].query( + f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY", + settings={ + "distributed_ddl_task_timeout": 360, + }, + ) nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}") nodes[0].query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl") @@ -158,9 +167,14 @@ def test_concurrent_backups_on_different_nodes(): f"SELECT status FROM system.backups WHERE status == 'CREATING_BACKUP' AND id = '{id}'", "CREATING_BACKUP", ) - assert "Concurrent backups not supported" in nodes[2].query_and_get_error( + assert "Concurrent backups not supported" in nodes[0].query_and_get_error( f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}" ) + assert_eq_with_retry( + nodes[1], + f"SELECT status FROM system.backups WHERE status == 'BACKUP_CREATED' AND id = '{id}'", + "BACKUP_CREATED", + ) def test_concurrent_restores_on_same_node(): @@ -185,11 +199,20 @@ def test_concurrent_restores_on_same_node(): "BACKUP_CREATED", ) - nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY") - nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC") + nodes[0].query( + f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY", + settings={ + "distributed_ddl_task_timeout": 360, + }, + ) + restore_id = ( + nodes[0] + .query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC") + .split("\t")[0] + ) assert_eq_with_retry( nodes[0], - f"SELECT status FROM system.backups WHERE status == 'RESTORING'", + f"SELECT status FROM system.backups WHERE status == 'RESTORING' AND id == '{restore_id}'", "RESTORING", ) assert "Concurrent restores not supported" in nodes[0].query_and_get_error( @@ -219,8 +242,17 @@ def test_concurrent_restores_on_different_node(): "BACKUP_CREATED", ) - nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY") - nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC") + nodes[0].query( + f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY", + settings={ + "distributed_ddl_task_timeout": 360, + }, + ) + restore_id = ( + nodes[0] + .query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC") + .split("\t")[0] + ) assert_eq_with_retry( nodes[0], f"SELECT status FROM system.backups WHERE status == 'RESTORING'", @@ -229,3 +261,9 @@ def test_concurrent_restores_on_different_node(): assert "Concurrent restores not supported" in nodes[1].query_and_get_error( f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}" ) + + assert_eq_with_retry( + nodes[0], + f"SELECT status FROM system.backups WHERE status == 'RESTORED' AND id == '{restore_id}'", + "RESTORED", + ) diff --git a/tests/integration/test_concurrent_queries_restriction_by_query_kind/test.py b/tests/integration/test_concurrent_queries_restriction_by_query_kind/test.py index 777c35f8b50..830090a1c0d 100644 --- a/tests/integration/test_concurrent_queries_restriction_by_query_kind/test.py +++ b/tests/integration/test_concurrent_queries_restriction_by_query_kind/test.py @@ -9,8 +9,16 @@ cluster = ClickHouseCluster(__file__) node_insert = cluster.add_instance( "node_insert", main_configs=["configs/concurrent_insert_restriction.xml"] ) -node_select = cluster.add_instance( - "node_select", main_configs=["configs/concurrent_select_restriction.xml"] +node_select1 = cluster.add_instance( + "node_select1", main_configs=["configs/concurrent_select_restriction.xml"] +) + +node_select2 = cluster.add_instance( + "node_select2", main_configs=["configs/concurrent_select_restriction.xml"] +) + +node_select3 = cluster.add_instance( + "node_select3", main_configs=["configs/concurrent_select_restriction.xml"] ) @@ -18,7 +26,13 @@ node_select = cluster.add_instance( def started_cluster(): try: cluster.start() - node_select.query( + node_select1.query( + "create table test_concurrent_insert (x UInt64) ENGINE = MergeTree() order by tuple()" + ) + node_select2.query( + "create table test_concurrent_insert (x UInt64) ENGINE = MergeTree() order by tuple()" + ) + node_select3.query( "create table test_concurrent_insert (x UInt64) ENGINE = MergeTree() order by tuple()" ) node_insert.query( @@ -79,7 +93,7 @@ def common_pattern(node, query_kind, restricted_sql, normal_sql, limit, wait_tim def test_select(started_cluster): common_pattern( - node_select, + node_select1, "select", "select sleep(3)", "insert into test_concurrent_insert values (0)", @@ -89,7 +103,7 @@ def test_select(started_cluster): # subquery is not counted execute_with_background( - node_select, + node_select2, "select sleep(3)", "insert into test_concurrent_insert select sleep(3)", 2, @@ -98,7 +112,7 @@ def test_select(started_cluster): # intersect and except are counted common_pattern( - node_select, + node_select3, "select", "select sleep(1) INTERSECT select sleep(1) EXCEPT select sleep(1)", "insert into test_concurrent_insert values (0)", diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml index bd59694f65a..15239041478 100644 --- a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml +++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml @@ -53,4 +53,6 @@ 0 + 3 + 0 diff --git a/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py b/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py index 60a1b9b9746..f0bc12e3125 100644 --- a/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py +++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/test.py @@ -1,9 +1,11 @@ import logging import random import string +import time import pytest from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager logging.getLogger().setLevel(logging.INFO) logging.getLogger().addHandler(logging.StreamHandler()) @@ -127,3 +129,75 @@ def test_insert_select_replicated(cluster, min_rows_for_wide_part, files_per_par assert len( list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) ) == (3 * FILES_OVERHEAD) + (files_per_part * 3) + + +def test_drop_table(cluster): + node = list(cluster.instances.values())[0] + node2 = list(cluster.instances.values())[1] + node.query( + "create table test_drop_table (n int) engine=ReplicatedMergeTree('/test/drop_table', '1') order by n partition by n % 99 settings storage_policy='s3'" + ) + node2.query( + "create table test_drop_table (n int) engine=ReplicatedMergeTree('/test/drop_table', '2') order by n partition by n % 99 settings storage_policy='s3'" + ) + node.query("insert into test_drop_table select * from numbers(1000)") + node2.query("system sync replica test_drop_table") + + with PartitionManager() as pm: + pm._add_rule( + { + "probability": 0.01, + "destination": node.ip_address, + "source_port": 2181, + "action": "REJECT --reject-with tcp-reset", + } + ) + pm._add_rule( + { + "probability": 0.01, + "source": node.ip_address, + "destination_port": 2181, + "action": "REJECT --reject-with tcp-reset", + } + ) + + # Will drop in background with retries + node.query("drop table test_drop_table") + + # It should not be possible to create a replica with the same path until the previous one is completely dropped + for i in range(0, 100): + node.query_and_get_answer_with_error( + "create table if not exists test_drop_table (n int) " + "engine=ReplicatedMergeTree('/test/drop_table', '1') " + "order by n partition by n % 99 settings storage_policy='s3'" + ) + time.sleep(0.2) + + # Wait for drop to actually finish + node.wait_for_log_line( + "Removing metadata /var/lib/clickhouse/metadata_dropped/default.test_drop_table", + timeout=60, + look_behind_lines=1000000, + ) + + # It could leave some leftovers, remove them + replicas = node.query_with_retry( + "select name from system.zookeeper where path='/test/drop_table/replicas'" + ) + if "1" in replicas and "test_drop_table" not in node.query("show tables"): + node2.query("system drop replica '1' from table test_drop_table") + + # Just in case table was not created due to connection errors + node.query( + "create table if not exists test_drop_table (n int) engine=ReplicatedMergeTree('/test/drop_table', '1') " + "order by n partition by n % 99 settings storage_policy='s3'" + ) + node.query_with_retry( + "system sync replica test_drop_table", + settings={"receive_timeout": 10}, + retry_count=5, + ) + node2.query("drop table test_drop_table") + assert "1000\t499500\n" == node.query( + "select count(n), sum(n) from test_drop_table" + ) diff --git a/tests/integration/test_storage_kafka/configs/kafka.xml b/tests/integration/test_storage_kafka/configs/kafka.xml index f05f81f59b7..062c98a2ac7 100644 --- a/tests/integration/test_storage_kafka/configs/kafka.xml +++ b/tests/integration/test_storage_kafka/configs/kafka.xml @@ -9,12 +9,14 @@ XXX: for now this messages will appears in stderr. --> cgrp,consumer,topic,protocol + + + consumer_hang + + 300 + + 6000 + - - - 300 - - 6000 - diff --git a/tests/integration/test_storage_kerberized_kafka/configs/kafka.xml b/tests/integration/test_storage_kerberized_kafka/configs/kafka.xml index 68eea654f1c..61558181fe6 100644 --- a/tests/integration/test_storage_kerberized_kafka/configs/kafka.xml +++ b/tests/integration/test_storage_kerberized_kafka/configs/kafka.xml @@ -15,12 +15,13 @@ kafkauser/instance@TEST.CLICKHOUSE.TECH security false + + consumer_hang + + 300 + + 6000 + - - - 300 - - 6000 - diff --git a/tests/queries/0_stateless/00187_like_regexp_prefix.reference b/tests/queries/0_stateless/00187_like_regexp_prefix.reference index d00491fd7e5..6ed281c757a 100644 --- a/tests/queries/0_stateless/00187_like_regexp_prefix.reference +++ b/tests/queries/0_stateless/00187_like_regexp_prefix.reference @@ -1 +1,2 @@ 1 +1 diff --git a/tests/queries/0_stateless/00187_like_regexp_prefix.sql b/tests/queries/0_stateless/00187_like_regexp_prefix.sql index 7fec21b68ad..430b86e8b39 100644 --- a/tests/queries/0_stateless/00187_like_regexp_prefix.sql +++ b/tests/queries/0_stateless/00187_like_regexp_prefix.sql @@ -1 +1,3 @@ SELECT materialize('prepre_f') LIKE '%pre_f%'; + +SELECT materialize('prepre_f') LIKE '%%%pre_f%'; diff --git a/tests/queries/0_stateless/00398_url_functions.reference b/tests/queries/0_stateless/00398_url_functions.reference index 39d740e55cd..6913b3f4fb7 100644 --- a/tests/queries/0_stateless/00398_url_functions.reference +++ b/tests/queries/0_stateless/00398_url_functions.reference @@ -20,7 +20,10 @@ www.example.com 127.0.0.1 www.example.com www.example.com -www.example.com + + + +www.example4.com example.com example.com example.com @@ -38,7 +41,10 @@ www.example.com 127.0.0.1 www.example.com www.example.com -www.example.com + +example2.com +example3.com +www.example4.com example.com example.com example.com @@ -53,6 +59,10 @@ paul:zozo@example.ru www.example.com www.example.com example.com +foo:foo%@foo.com +foo:foo%41bar@foo.com +foo:foo%41%42bar@foo.com +foo:foo%41bar@foo ====DOMAIN==== com diff --git a/tests/queries/0_stateless/00398_url_functions.sql.j2 b/tests/queries/0_stateless/00398_url_functions.sql.j2 index dd7da2ce6ad..95ac536f18b 100644 --- a/tests/queries/0_stateless/00398_url_functions.sql.j2 +++ b/tests/queries/0_stateless/00398_url_functions.sql.j2 @@ -23,7 +23,10 @@ SELECT domain{{ suffix }}('http://www.example.com?q=4') AS Host; SELECT domain{{ suffix }}('http://127.0.0.1:443/') AS Host; SELECT domain{{ suffix }}('//www.example.com') AS Host; SELECT domain{{ suffix }}('//paul@www.example.com') AS Host; -SELECT domain{{ suffix }}('www.example.com') as Host; +SELECT domain{{ suffix }}('//foo:bar%41%40@e-%41-example1.com') AS Host; +SELECT domain{{ suffix }}('//foo:bar%41%40@example2.com') AS Host; +SELECT domain{{ suffix }}('//foo%41%40:bar@example3.com') AS Host; +SELECT domain{{ suffix }}('www.example4.com') as Host; SELECT domain{{ suffix }}('example.com') as Host; SELECT domainWithoutWWW{{ suffix }}('//paul@www.example.com') AS Host; SELECT domainWithoutWWW{{ suffix }}('http://paul@www.example.com:80/') AS Host; @@ -41,6 +44,10 @@ SELECT netloc('svn+ssh://paul:zozo@example.ru/?q=hello%20world') AS Netloc; SELECT netloc('//www.example.com') AS Netloc; SELECT netloc('www.example.com') as Netloc; SELECT netloc('example.com') as Netloc; +SELECT netloc('http://foo:foo%@foo.com') as Netloc; +SELECT netloc('http://foo:foo%41bar@foo.com') as Netloc; +SELECT netloc('http://foo:foo%41%42bar@foo.com') as Netloc; +SELECT netloc('http://foo:foo%41bar@foo%41.com') as Netloc; SELECT '====DOMAIN===='; SELECT topLevelDomain('http://paul@www.example.com:80/') AS Domain; diff --git a/tests/queries/0_stateless/01176_mysql_client_interactive.expect b/tests/queries/0_stateless/01176_mysql_client_interactive.expect index 8d23b3bef60..2bb6ba8726b 100755 --- a/tests/queries/0_stateless/01176_mysql_client_interactive.expect +++ b/tests/queries/0_stateless/01176_mysql_client_interactive.expect @@ -12,9 +12,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$MYSQL_CLIENT_BINARY \$MYSQL_CLIENT_OPT" diff --git a/tests/queries/0_stateless/01179_insert_values_semicolon.expect b/tests/queries/0_stateless/01179_insert_values_semicolon.expect index a5f127ec19e..cb22fb8265c 100755 --- a/tests/queries/0_stateless/01179_insert_values_semicolon.expect +++ b/tests/queries/0_stateless/01179_insert_values_semicolon.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: long set basedir [file dirname $argv0] set basename [file tail $argv0] @@ -40,5 +39,5 @@ expect "2020-01-01 00:00:00.000\t2" send -- "DROP TABLE test_01179\r" expect "Ok." -send -- "\4" +send -- "exit\r" expect eof diff --git a/tests/queries/0_stateless/01180_client_syntax_errors.expect b/tests/queries/0_stateless/01180_client_syntax_errors.expect index c1fd0f93510..508ebc3433b 100755 --- a/tests/queries/0_stateless/01180_client_syntax_errors.expect +++ b/tests/queries/0_stateless/01180_client_syntax_errors.expect @@ -10,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" @@ -31,6 +31,7 @@ expect "Syntax error: failed at position 93 ('UInt64'):*" send -- "select (1, 2\r" expect "Syntax error: failed at position 8 ('('):" expect "Unmatched parentheses: (" +expect ":) " send -- "\4" expect eof diff --git a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect index 629698b4565..3bfd454bb1f 100755 --- a/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect +++ b/tests/queries/0_stateless/01293_client_interactive_vertical_multiline.expect @@ -11,9 +11,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } # useful debugging configuration diff --git a/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect b/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect index 6b11b1eee15..1ded43d3fed 100755 --- a/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect +++ b/tests/queries/0_stateless/01293_client_interactive_vertical_singleline.expect @@ -10,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" diff --git a/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect b/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect index de485383024..a593075bb9a 100755 --- a/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect +++ b/tests/queries/0_stateless/01300_client_save_history_when_terminated_long.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: long set basedir [file dirname $argv0] set basename [file tail $argv0] @@ -11,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" diff --git a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect index 8547be839d4..90e19e077ec 100755 --- a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect +++ b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect @@ -10,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file" diff --git a/tests/queries/0_stateless/01520_client_print_query_id.expect b/tests/queries/0_stateless/01520_client_print_query_id.expect index cbeacc6a4ec..2034483a73d 100755 --- a/tests/queries/0_stateless/01520_client_print_query_id.expect +++ b/tests/queries/0_stateless/01520_client_print_query_id.expect @@ -10,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" @@ -21,10 +21,12 @@ expect ":) " # Make a query send -- "SELECT 'print query id'\r" expect { - "Query id: *" { } + # (?n) - Do not match new lines [1] + # [1]: https://www.tcl.tk/man/tcl8.6/TclCmd/re_syntax.html + -re "(?n)Query id: .*" { } timeout { exit 1 } } -expect "print query id" +expect "'print query id" expect ":) " send -- "\4" diff --git a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect index bf701225605..0faf8f0192b 100755 --- a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect +++ b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: long # This is a separate test, because we want to test the interactive mode. # https://github.com/ClickHouse/ClickHouse/issues/19353 diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh index 056613c11b5..617148de5a3 100755 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh @@ -34,9 +34,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i \$any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i \$any_spawn_id timeout { exit 1 } } spawn bash -c "$*" diff --git a/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect b/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect index 223690f1f8b..a7e4b45eb44 100755 --- a/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect +++ b/tests/queries/0_stateless/01755_client_highlight_multi_line_comment_regression.expect @@ -10,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" diff --git a/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect b/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect index 0e06c2f99df..9105cf30f79 100755 --- a/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect +++ b/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: long set basedir [file dirname $argv0] set basename [file tail $argv0] @@ -10,9 +9,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } # history file is not required, in-memory history is enough diff --git a/tests/queries/0_stateless/01933_client_replxx_convert_history.expect b/tests/queries/0_stateless/01933_client_replxx_convert_history.expect index 0c95b630742..69c5ff0118e 100755 --- a/tests/queries/0_stateless/01933_client_replxx_convert_history.expect +++ b/tests/queries/0_stateless/01933_client_replxx_convert_history.expect @@ -10,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } exec bash -c "echo select 1 > $history_file.txt" diff --git a/tests/queries/0_stateless/01945_show_debug_warning.expect b/tests/queries/0_stateless/01945_show_debug_warning.expect index c93635b3b27..4e6dd3e1b0f 100755 --- a/tests/queries/0_stateless/01945_show_debug_warning.expect +++ b/tests/queries/0_stateless/01945_show_debug_warning.expect @@ -14,9 +14,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } set Debug_type 0 diff --git a/tests/queries/0_stateless/02003_memory_limit_in_client.expect b/tests/queries/0_stateless/02003_memory_limit_in_client.expect index 4f28fafc1e6..377656fa641 100755 --- a/tests/queries/0_stateless/02003_memory_limit_in_client.expect +++ b/tests/queries/0_stateless/02003_memory_limit_in_client.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: no-fasttest # This is a test for system.warnings. Testing in interactive mode is necessary, # as we want to see certain warnings from client @@ -15,9 +14,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } # diff --git a/tests/queries/0_stateless/02047_client_exception.expect b/tests/queries/0_stateless/02047_client_exception.expect index 69f468907a3..4dfdf211ba2 100755 --- a/tests/queries/0_stateless/02047_client_exception.expect +++ b/tests/queries/0_stateless/02047_client_exception.expect @@ -6,14 +6,14 @@ exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 set history_file $env(CLICKHOUSE_TMP)/$basename.history log_user 0 -set timeout 20 +set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" @@ -31,5 +31,5 @@ expect "Received exception from server" send -- "DROP TABLE test_02047\r" expect "Ok." -send -- "\4" +send -- "exit\r" expect eof diff --git a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect index a9905128ad5..a1454696253 100755 --- a/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect +++ b/tests/queries/0_stateless/02049_clickhouse_local_merge_tree.expect @@ -5,14 +5,14 @@ set basename [file tail $argv0] exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 log_user 0 -set timeout 20 +set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion" @@ -50,5 +50,5 @@ expect "Ok." send -- "drop table t\r" expect "Ok." -send -- "\4" +send -- "exit\r" expect eof diff --git a/tests/queries/0_stateless/02105_backslash_letter_commands.expect b/tests/queries/0_stateless/02105_backslash_letter_commands.expect index 8f8ec1f5abd..984e6f6d2eb 100755 --- a/tests/queries/0_stateless/02105_backslash_letter_commands.expect +++ b/tests/queries/0_stateless/02105_backslash_letter_commands.expect @@ -10,9 +10,9 @@ set timeout 60 match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" @@ -20,15 +20,15 @@ expect ":) " # Send a command send -- "\\ld;\r" -expect "Syntax error: *" +expect "Syntax error: " expect ":) " send -- "\\c;\r" -expect "Syntax error: *" +expect "Syntax error: " expect ":) " send -- " \\l ; \\d; \r" -expect "Syntax error (Multi-statements are not allowed): *" +expect "Syntax error (Multi-statements are not allowed): " expect ":) " send -- " \\l ;\r" diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect index 4fd430a4a69..5f882ae9824 100755 --- a/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_client_with_queries_file.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: no-parallel set basedir [file dirname $argv0] set basename [file tail $argv0] @@ -11,20 +10,18 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } -system "$basedir/helpers/02112_prepare.sh" -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT --disable_suggestion --interactive --queries-file $basedir/file_02112" +system "echo \"drop table if exists t; create table t(i String) engine=Memory; insert into t select 'test string'\" > $env(CLICKHOUSE_TMP)/file_02112" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT --disable_suggestion --interactive --queries-file $env(CLICKHOUSE_TMP)/file_02112" expect ":) " -send -- "select * from t format TSV\r" -expect "1" +send -- "select i from t format TSV\r" +expect "test string" expect ":) " send "" expect eof - -system "$basedir/helpers/02112_clean.sh" diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect index a90e85d1069..3413651fe68 100755 --- a/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_local.expect @@ -10,9 +10,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion --interactive --query 'create table t(i Int32) engine=Memory; insert into t select 1'" diff --git a/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect b/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect index 34eac360132..0a136ff2e74 100755 --- a/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect +++ b/tests/queries/0_stateless/02112_delayed_clickhouse_local_with_queries_file.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: no-parallel set basedir [file dirname $argv0] set basename [file tail $argv0] @@ -11,20 +10,18 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } -system "$basedir/helpers/02112_prepare.sh" -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion --interactive --queries-file $basedir/file_02112" +system "echo \"drop table if exists t; create table t(i String) engine=Memory; insert into t select 'test string'\" > $env(CLICKHOUSE_TMP)/file_02112" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion --interactive --queries-file $env(CLICKHOUSE_TMP)/file_02112" expect ":) " -send -- "select * from t format TSV\r" -expect "1" +send -- "select \* from t format TSV\r" +expect "test string" expect ":) " send "" expect eof - -system "$basedir/helpers/02112_clean.sh" diff --git a/tests/queries/0_stateless/02116_interactive_hello.expect b/tests/queries/0_stateless/02116_interactive_hello.expect index 7e895196304..3f2140861ea 100755 --- a/tests/queries/0_stateless/02116_interactive_hello.expect +++ b/tests/queries/0_stateless/02116_interactive_hello.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: long set basedir [file dirname $argv0] set basename [file tail $argv0] @@ -12,16 +11,17 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" -expect -re "ClickHouse client version \[\\d\]{2}.\[\\d\]{1,2}.\[\\d\]{1,2}.\[\\d\]{1,2}.\r" -expect -re "Connecting to database .* at localhost:9000 as user default.\r" -expect -re "Connected to ClickHouse server version \[\\d\]{2}.\[\\d\]{1,2}.\[\\d\]{1,2} revision .*\r" +# (?n) - Do not match new lines +expect -re "(?n)ClickHouse client version \[\\d\]{2}\.\[\\d\]{1,2}\.\[\\d\]{1,2}\.\[\\d\]{1,}.*\r" +expect -re "(?n)Connecting to database .* at localhost:9000 as user default\.\r" +expect -re "(?n)Connected to ClickHouse server version \[\\d\]{2}\.\[\\d\]{1,2}\.\[\\d\]{1,2} revision \[\\d\]{1,}\.\r" expect ":) " send -- "" diff --git a/tests/queries/0_stateless/02132_client_history_navigation.expect b/tests/queries/0_stateless/02132_client_history_navigation.expect index 3316f26d552..dc7e44b41ee 100755 --- a/tests/queries/0_stateless/02132_client_history_navigation.expect +++ b/tests/queries/0_stateless/02132_client_history_navigation.expect @@ -11,9 +11,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } # useful debugging configuration @@ -23,16 +23,20 @@ spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \ expect ":) " # Make a query -send -- "SELECT 1\r" +send -- "SELECT 'unique1'\r" expect "1" expect ":) " -send -- "SELECT 2" +send -- "SELECT 'unique2'" # NOTE: it does not work for alacritty with TERM=xterm send -- "\033\[A" -expect "SELECT 1" +expect "SELECT 'unique1'" send -- "\033\[B" -expect "SELECT 2" +expect "SELECT 'unique2'" send -- "\r" -expect "2" +# First is echoed command +expect "'unique2'" +# Second is the response from the server +expect "'unique2'" + send -- "exit\r" expect eof diff --git a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect index 41d32891e98..01d61a6ad0f 100755 --- a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect +++ b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect @@ -11,9 +11,9 @@ set uuid "" match_max 100000 expect_after { # Do not ignore eof from read. - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file" @@ -44,9 +44,8 @@ expect ":) " # Generate UIUD to avoid matching old database/tables/columns from previous test runs. send -- "select 'begin-' || replace(toString(generateUUIDv4()), '-', '') || '-end' format TSV\r" -expect -re TSV.*TSV.*begin-(.*)-end.* +expect -re "TSV.*TSV.*begin-(.*)-end.*:\\) " set uuid $expect_out(1,string) -expect ":) " # CREATE DATABASE send -- "create database new_${uuid}_database\r" diff --git a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect index f70b699c71f..0c31efc02e9 100755 --- a/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect +++ b/tests/queries/0_stateless/02164_clickhouse_local_interactive_exception.expect @@ -9,15 +9,15 @@ set timeout 20 match_max 100000 expect_after { - eof { exp_continue } - timeout { exit 1 } + -i $any_spawn_id eof { exp_continue } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion" expect ":) " send -- "insert into table function null() format TSV some trash here 123 \n 456\r" -expect -re ".*DB::Exception: Table function 'null' requires 'structure'.*\r" +expect "EMPTY_LIST_OF_COLUMNS_PASSED" expect ":) " send -- "" diff --git a/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.expect b/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.expect index 07815e57610..9f6937cf80d 100755 --- a/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.expect +++ b/tests/queries/0_stateless/02310_clickhouse_client_INSERT_progress_profile_events.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: long # This is the regression for the concurrent access in ProgressIndication, # so it is important to read enough rows here (10e6). @@ -17,8 +16,8 @@ match_max 100000 set stty_init "rows 25 cols 120" expect_after { - eof { exp_continue } - timeout { exit 1 } + -i $any_spawn_id eof { exp_continue } + -i $any_spawn_id timeout { exit 1 } } spawn bash diff --git a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect index 3333ee93468..5e514cd7c12 100755 --- a/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect +++ b/tests/queries/0_stateless/02310_clickhouse_local_INSERT_progress_profile_events.expect @@ -1,5 +1,4 @@ #!/usr/bin/expect -f -# Tags: long # This is the regression for the concurrent access in ProgressIndication, # so it is important to read enough rows here (10e6). @@ -17,8 +16,8 @@ match_max 100000 set stty_init "rows 25 cols 120" expect_after { - eof { exp_continue } - timeout { exit 1 } + -i $any_spawn_id eof { exp_continue } + -i $any_spawn_id timeout { exit 1 } } spawn bash diff --git a/tests/queries/0_stateless/02352_interactive_queries_from_file.expect b/tests/queries/0_stateless/02352_interactive_queries_from_file.expect index a34fc9909f8..e28fb38862c 100755 --- a/tests/queries/0_stateless/02352_interactive_queries_from_file.expect +++ b/tests/queries/0_stateless/02352_interactive_queries_from_file.expect @@ -11,9 +11,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "echo 'select 1;\nselect 2;\nselect 3' > queries_02352" diff --git a/tests/queries/0_stateless/02417_repeat_input_commands.expect b/tests/queries/0_stateless/02417_repeat_input_commands.expect index 3658d5d8494..8070200c55c 100755 --- a/tests/queries/0_stateless/02417_repeat_input_commands.expect +++ b/tests/queries/0_stateless/02417_repeat_input_commands.expect @@ -11,9 +11,9 @@ match_max 100000 expect_after { # Do not ignore eof from expect - eof { exp_continue } + -i $any_spawn_id eof { exp_continue } # A default timeout action is to do nothing, change it to fail - timeout { exit 1 } + -i $any_spawn_id timeout { exit 1 } } spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file" diff --git a/tests/queries/0_stateless/02456_progress_tty.expect b/tests/queries/0_stateless/02456_progress_tty.expect index ba6cc0537eb..3d1d92e5400 100755 --- a/tests/queries/0_stateless/02456_progress_tty.expect +++ b/tests/queries/0_stateless/02456_progress_tty.expect @@ -10,8 +10,8 @@ match_max 100000 set stty_init "rows 25 cols 120" expect_after { - eof { exp_continue } - timeout { exit 1 } + -i $any_spawn_id eof { exp_continue } + -i $any_spawn_id timeout { exit 1 } } spawn bash diff --git a/tests/queries/0_stateless/02480_client_option_print_num_processed_rows.expect b/tests/queries/0_stateless/02480_client_option_print_num_processed_rows.expect index 77e219e804e..dd3c9309b2d 100755 --- a/tests/queries/0_stateless/02480_client_option_print_num_processed_rows.expect +++ b/tests/queries/0_stateless/02480_client_option_print_num_processed_rows.expect @@ -10,8 +10,8 @@ match_max 100000 set stty_init "rows 25 cols 120" expect_after { - eof { exp_continue } - timeout { exit 1 } + -i $any_spawn_id eof { exp_continue } + -i $any_spawn_id timeout { exit 1 } } spawn bash diff --git a/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect b/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect index a391756ba22..2d595b0f492 100755 --- a/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect +++ b/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect @@ -10,8 +10,8 @@ match_max 100000 set stty_init "rows 25 cols 120" expect_after { - eof { exp_continue } - timeout { exit 1 } + -i $any_spawn_id eof { exp_continue } + -i $any_spawn_id timeout { exit 1 } } spawn bash diff --git a/tests/queries/0_stateless/02564_query_id_header.reference b/tests/queries/0_stateless/02564_query_id_header.reference new file mode 100644 index 00000000000..413e8929f36 --- /dev/null +++ b/tests/queries/0_stateless/02564_query_id_header.reference @@ -0,0 +1,22 @@ +CREATE TABLE t_query_id_header (a UInt64) ENGINE = Memory +< Content-Type: text/plain; charset=UTF-8 +< X-ClickHouse-Query-Id: query_id +< X-ClickHouse-Timezone: timezone +INSERT INTO t_query_id_header VALUES (1) +< Content-Type: text/plain; charset=UTF-8 +< X-ClickHouse-Query-Id: query_id +< X-ClickHouse-Timezone: timezone +EXISTS TABLE t_query_id_header +< Content-Type: text/tab-separated-values; charset=UTF-8 +< X-ClickHouse-Format: TabSeparated +< X-ClickHouse-Query-Id: query_id +< X-ClickHouse-Timezone: timezone +SELECT * FROM t_query_id_header +< Content-Type: text/tab-separated-values; charset=UTF-8 +< X-ClickHouse-Format: TabSeparated +< X-ClickHouse-Query-Id: query_id +< X-ClickHouse-Timezone: timezone +DROP TABLE t_query_id_header +< Content-Type: text/plain; charset=UTF-8 +< X-ClickHouse-Query-Id: query_id +< X-ClickHouse-Timezone: timezone diff --git a/tests/queries/0_stateless/02564_query_id_header.sh b/tests/queries/0_stateless/02564_query_id_header.sh new file mode 100755 index 00000000000..67ddbcfcc46 --- /dev/null +++ b/tests/queries/0_stateless/02564_query_id_header.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +CLICKHOUSE_TIMEZONE_ESCAPED=$($CLICKHOUSE_CLIENT --query="SELECT timezone()" | sed 's/[]\/$*.^+:()[]/\\&/g') + +function run_and_check_headers() +{ + query=$1 + query_id="${CLICKHOUSE_DATABASE}_${RANDOM}" + + echo "$query" + + ${CLICKHOUSE_CURL} -sS -v "${CLICKHOUSE_URL}&query_id=$query_id" -d "$1" 2>&1 \ + | grep -e "< X-ClickHouse-Query-Id" -e "< X-ClickHouse-Timezone" -e "< X-ClickHouse-Format" -e "< Content-Type" \ + | sed "s/$CLICKHOUSE_TIMEZONE_ESCAPED/timezone/" \ + | sed "s/$query_id/query_id/" \ + | sed "s/\r$//" \ + | sort +} + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_query_id_header" + +run_and_check_headers "CREATE TABLE t_query_id_header (a UInt64) ENGINE = Memory" +run_and_check_headers "INSERT INTO t_query_id_header VALUES (1)" +run_and_check_headers "EXISTS TABLE t_query_id_header" +run_and_check_headers "SELECT * FROM t_query_id_header" +run_and_check_headers "DROP TABLE t_query_id_header" diff --git a/tests/queries/0_stateless/02575_merge_prewhere_default_expression.reference b/tests/queries/0_stateless/02575_merge_prewhere_default_expression.reference new file mode 100644 index 00000000000..434384b3d77 --- /dev/null +++ b/tests/queries/0_stateless/02575_merge_prewhere_default_expression.reference @@ -0,0 +1,8 @@ +-- { echoOn } +SELECT * FROM m PREWHERE a = 'OK' ORDER BY a, f; +OK 1 +OK 2 +SELECT * FROM m PREWHERE f = 1 ORDER BY a, f; +OK 1 +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=0; +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1; diff --git a/tests/queries/0_stateless/02575_merge_prewhere_default_expression.sql b/tests/queries/0_stateless/02575_merge_prewhere_default_expression.sql new file mode 100644 index 00000000000..83c1d51269d --- /dev/null +++ b/tests/queries/0_stateless/02575_merge_prewhere_default_expression.sql @@ -0,0 +1,38 @@ +-- Allow PREWHERE when Merge() and MergeTree has different DEFAULT expression + +DROP TABLE IF EXISTS m; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE m +( + `a` String, + `f` UInt8 DEFAULT 0 +) +ENGINE = Merge(currentDatabase(), '^(t1|t2)$'); + +CREATE TABLE t1 +( + a String, + f UInt8 DEFAULT 1 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192; +INSERT INTO t1 (a) VALUES ('OK'); + +CREATE TABLE t2 +( + a String, + f UInt8 DEFAULT 2 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192; +INSERT INTO t2 (a) VALUES ('OK'); + +-- { echoOn } +SELECT * FROM m PREWHERE a = 'OK' ORDER BY a, f; +SELECT * FROM m PREWHERE f = 1 ORDER BY a, f; +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=0; +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1; diff --git a/tests/queries/0_stateless/02570_merge_alias_prewhere.reference b/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.reference similarity index 100% rename from tests/queries/0_stateless/02570_merge_alias_prewhere.reference rename to tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.reference diff --git a/tests/queries/0_stateless/02570_merge_alias_prewhere.sql b/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.sql similarity index 90% rename from tests/queries/0_stateless/02570_merge_alias_prewhere.sql rename to tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.sql index 59ca717a418..0f1d582a26e 100644 --- a/tests/queries/0_stateless/02570_merge_alias_prewhere.sql +++ b/tests/queries/0_stateless/02575_merge_prewhere_different_default_kind.sql @@ -1,3 +1,5 @@ +-- Prohibit PREWHERE when Merge and MergeTree has different default type of the column + DROP TABLE IF EXISTS m; DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02575_merge_prewhere_ephemeral.reference b/tests/queries/0_stateless/02575_merge_prewhere_ephemeral.reference new file mode 100644 index 00000000000..9f214b8c536 --- /dev/null +++ b/tests/queries/0_stateless/02575_merge_prewhere_ephemeral.reference @@ -0,0 +1,11 @@ +-- { echoOn } +SELECT * FROM m PREWHERE a = 'OK' ORDER BY a; +OK +OK +SELECT * FROM m PREWHERE f = 1 ORDER BY a; -- { serverError ILLEGAL_PREWHERE } +SELECT * FROM m WHERE a = 'OK' SETTINGS optimize_move_to_prewhere=0; +OK +OK +SELECT * FROM m WHERE a = 'OK' SETTINGS optimize_move_to_prewhere=1; +OK +OK diff --git a/tests/queries/0_stateless/02575_merge_prewhere_ephemeral.sql b/tests/queries/0_stateless/02575_merge_prewhere_ephemeral.sql new file mode 100644 index 00000000000..85e03647d62 --- /dev/null +++ b/tests/queries/0_stateless/02575_merge_prewhere_ephemeral.sql @@ -0,0 +1,38 @@ +-- You cannot query EPHEMERAL + +DROP TABLE IF EXISTS m; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE m +( + `a` String, + `f` UInt8 EPHEMERAL 0 +) +ENGINE = Merge(currentDatabase(), '^(t1|t2)$'); + +CREATE TABLE t1 +( + a String, + f UInt8 DEFAULT 1 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192; +INSERT INTO t1 (a) VALUES ('OK'); + +CREATE TABLE t2 +( + a String, + f UInt8 DEFAULT 2 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192; +INSERT INTO t2 (a) VALUES ('OK'); + +-- { echoOn } +SELECT * FROM m PREWHERE a = 'OK' ORDER BY a; +SELECT * FROM m PREWHERE f = 1 ORDER BY a; -- { serverError ILLEGAL_PREWHERE } +SELECT * FROM m WHERE a = 'OK' SETTINGS optimize_move_to_prewhere=0; +SELECT * FROM m WHERE a = 'OK' SETTINGS optimize_move_to_prewhere=1; diff --git a/tests/queries/0_stateless/02575_merge_prewhere_materialized.reference b/tests/queries/0_stateless/02575_merge_prewhere_materialized.reference new file mode 100644 index 00000000000..434384b3d77 --- /dev/null +++ b/tests/queries/0_stateless/02575_merge_prewhere_materialized.reference @@ -0,0 +1,8 @@ +-- { echoOn } +SELECT * FROM m PREWHERE a = 'OK' ORDER BY a, f; +OK 1 +OK 2 +SELECT * FROM m PREWHERE f = 1 ORDER BY a, f; +OK 1 +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=0; +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1; diff --git a/tests/queries/0_stateless/02575_merge_prewhere_materialized.sql b/tests/queries/0_stateless/02575_merge_prewhere_materialized.sql new file mode 100644 index 00000000000..eae72274c31 --- /dev/null +++ b/tests/queries/0_stateless/02575_merge_prewhere_materialized.sql @@ -0,0 +1,43 @@ +-- Allow PREWHERE when Merge has DEFAULT and MergeTree has MATERIALIZED + +DROP TABLE IF EXISTS m; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; + +CREATE TABLE m +( + `a` String, + `f` UInt8 DEFAULT 0 +) +ENGINE = Merge(currentDatabase(), '^(t1|t2)$'); + +CREATE TABLE t1 +( + a String, + f UInt8 MATERIALIZED 1 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192; +INSERT INTO t1 (a) VALUES ('OK'); + +CREATE TABLE t2 +( + a String, + f UInt8 DEFAULT 2 +) +ENGINE = MergeTree +ORDER BY tuple() +SETTINGS index_granularity = 8192; +INSERT INTO t2 (a) VALUES ('OK'); + +-- { echoOn } +SELECT * FROM m PREWHERE a = 'OK' ORDER BY a, f; +SELECT * FROM m PREWHERE f = 1 ORDER BY a, f; +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=0; +SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1; +-- { echoOff } + +DROP TABLE m; +DROP TABLE t1; +DROP TABLE t2; diff --git a/tests/queries/0_stateless/02667_and_consistency.reference b/tests/queries/0_stateless/02667_and_consistency.reference new file mode 100644 index 00000000000..bcb2b5aecfb --- /dev/null +++ b/tests/queries/0_stateless/02667_and_consistency.reference @@ -0,0 +1,23 @@ +true +===== +true +===== +true +===== +true +===== +===== +1 +===== +===== +allow_experimental_analyzer +true +#45440 +2086579505 0 1 0 0 +-542998757 -542998757 1 0 0 += +2086579505 0 1 0 0 +-542998757 -542998757 1 0 0 += +2086579505 0 1 0 0 +-542998757 -542998757 1 0 0 diff --git a/tests/queries/0_stateless/02667_and_consistency.sql b/tests/queries/0_stateless/02667_and_consistency.sql new file mode 100644 index 00000000000..f02185a1a52 --- /dev/null +++ b/tests/queries/0_stateless/02667_and_consistency.sql @@ -0,0 +1,106 @@ +SELECT toBool(sin(SUM(number))) AS x +FROM +( + SELECT 1 AS number +) +GROUP BY number +HAVING 1 AND sin(sum(number)) +SETTINGS enable_optimize_predicate_expression = 0; + +SELECT '====='; + +SELECT toBool(sin(SUM(number))) AS x +FROM +( + SELECT 1 AS number +) +GROUP BY number +HAVING 1 AND sin(1) +SETTINGS enable_optimize_predicate_expression = 0; + +SELECT '====='; + +SELECT toBool(sin(SUM(number))) AS x +FROM +( + SELECT 1 AS number +) +GROUP BY number +HAVING x AND sin(sum(number)) +SETTINGS enable_optimize_predicate_expression = 1; + +SELECT '====='; + +SELECT toBool(sin(SUM(number))) AS x +FROM +( + SELECT 1 AS number +) +GROUP BY number +HAVING 1 AND sin(sum(number)) +SETTINGS enable_optimize_predicate_expression = 0; + +SELECT '====='; + +SELECT toBool(sin(SUM(number))) AS x +FROM +( + SELECT 1 AS number +) +GROUP BY number +HAVING 1 AND sin(sum(number)) +SETTINGS enable_optimize_predicate_expression = 1; -- { serverError 59 } + +SELECT '====='; + +SELECT 1 and sin(1); + +SELECT '====='; + +SELECT toBool(sin(SUM(number))) AS x +FROM +( + SELECT 1 AS number +) +GROUP BY number +HAVING x AND sin(1) +SETTINGS enable_optimize_predicate_expression = 0; -- { serverError 59 } + +SELECT '====='; +SELECT 'allow_experimental_analyzer'; + +SET allow_experimental_analyzer = 1; + +SELECT toBool(sin(SUM(number))) AS x +FROM +( + SELECT 1 AS number +) +GROUP BY number +HAVING 1 AND sin(sum(number)) +SETTINGS enable_optimize_predicate_expression = 1; + +select '#45440'; + +DROP TABLE IF EXISTS t2; +CREATE TABLE t2(c0 Int32) ENGINE = MergeTree ORDER BY c0; +INSERT INTO t2 VALUES (928386547), (1541944097), (2086579505), (1990427322), (-542998757), (390253678), (554855248), (203290629), (1504693323); + +SELECT + MAX(left.c0), + min2(left.c0, -(-left.c0) * (radians(left.c0) - radians(left.c0))) AS g, + (((-1925024212 IS NOT NULL) IS NOT NULL) != radians(tan(1216286224))) AND cos(lcm(MAX(left.c0), -1966575216) OR (MAX(left.c0) * 1180517420)) AS h, + NOT h, + h IS NULL +FROM t2 AS left +GROUP BY g; +select '='; +SELECT MAX(left.c0), min2(left.c0, -(-left.c0) * (radians(left.c0) - radians(left.c0))) as g, (((-1925024212 IS NOT NULL) IS NOT NULL) != radians(tan(1216286224))) AND cos(lcm(MAX(left.c0), -1966575216) OR (MAX(left.c0) * 1180517420)) as h, not h, h is null + FROM t2 AS left + GROUP BY g HAVING h SETTINGS enable_optimize_predicate_expression = 0; +select '='; +SELECT MAX(left.c0), min2(left.c0, -(-left.c0) * (radians(left.c0) - radians(left.c0))) as g, (((-1925024212 IS NOT NULL) IS NOT NULL) != radians(tan(1216286224))) AND cos(lcm(MAX(left.c0), -1966575216) OR (MAX(left.c0) * 1180517420)) as h, not h, h is null + FROM t2 AS left + GROUP BY g HAVING h SETTINGS enable_optimize_predicate_expression = 1; + +DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/02667_order_by_aggregation_result.reference b/tests/queries/0_stateless/02667_order_by_aggregation_result.reference new file mode 100644 index 00000000000..a89e39d87b3 --- /dev/null +++ b/tests/queries/0_stateless/02667_order_by_aggregation_result.reference @@ -0,0 +1,7 @@ +0 0 +0 1 █████████████████████████████████████████████████▉ +1 2 0.5 +1 0.1 1.1 +00000000-0000-0000-0000-000000000000 b 1 +417ddc5d-e556-4d27-95dd-a34d84e46a50 c 1 +notEmpty a 1 diff --git a/tests/queries/0_stateless/02667_order_by_aggregation_result.sql b/tests/queries/0_stateless/02667_order_by_aggregation_result.sql new file mode 100644 index 00000000000..3fef0374d83 --- /dev/null +++ b/tests/queries/0_stateless/02667_order_by_aggregation_result.sql @@ -0,0 +1,60 @@ +-- Github issues: +-- - https://github.com/ClickHouse/ClickHouse/issues/46268 +-- - https://github.com/ClickHouse/ClickHouse/issues/46273 + +-- Queries that the original PR (https://github.com/ClickHouse/ClickHouse/pull/42827) tried to fix +SELECT (number = 1) AND (number = 2) AS value, sum(value) OVER () FROM numbers(1) WHERE 1; +SELECT time, round(exp_smooth, 10), bar(exp_smooth, -9223372036854775807, 1048575, 50) AS bar FROM (SELECT 2 OR (number = 0) OR (number >= 1) AS value, number AS time, exponentialTimeDecayedSum(2147483646)(value, time) OVER (RANGE BETWEEN CURRENT ROW AND CURRENT ROW) AS exp_smooth FROM numbers(1) WHERE 10) WHERE 25; + +CREATE TABLE ttttttt +( + `timestamp` DateTime, + `col1` Float64, + `col2` Float64, + `col3` Float64 +) +ENGINE = MergeTree() +ORDER BY tuple(); + +INSERT INTO ttttttt VALUES ('2023-02-20 00:00:00', 1, 2, 3); + +-- Query that https://github.com/ClickHouse/ClickHouse/pull/42827 broke +SELECT + argMax(col1, timestamp) AS col1, + argMax(col2, timestamp) AS col2, + col1 / col2 AS final_col +FROM ttttttt +GROUP BY + col3 +ORDER BY final_col DESC; + +SELECT + argMax(col1, timestamp) AS col1, + col1 / 10 AS final_col, + final_col + 1 AS final_col2 +FROM ttttttt +GROUP BY col3; + +-- https://github.com/ClickHouse/ClickHouse/issues/46724 + +CREATE TABLE table1 +( + id String, + device UUID +) +ENGINE = MergeTree() ORDER BY tuple(); + +INSERT INTO table1 VALUES ('notEmpty', '417ddc5d-e556-4d27-95dd-a34d84e46a50'); +INSERT INTO table1 VALUES ('', '417ddc5d-e556-4d27-95dd-a34d84e46a50'); +INSERT INTO table1 VALUES ('', '00000000-0000-0000-0000-000000000000'); + +SELECT + if(empty(id), toString(device), id) AS device, + multiIf( + notEmpty(id),'a', + device == '00000000-0000-0000-0000-000000000000', 'b', + 'c' ) AS device_id_type, + count() +FROM table1 +GROUP BY device, device_id_type +ORDER BY device; diff --git a/tests/queries/0_stateless/02670_max_intersections.reference b/tests/queries/0_stateless/02670_max_intersections.reference new file mode 100644 index 00000000000..30e9e0b1342 --- /dev/null +++ b/tests/queries/0_stateless/02670_max_intersections.reference @@ -0,0 +1 @@ +04010000000000000001000000000000000300000000000000FFFFFFFFFFFFFFFF030000000000000001000000000000000500000000000000FFFFFFFFFFFFFFFF diff --git a/tests/queries/0_stateless/02670_max_intersections.sql b/tests/queries/0_stateless/02670_max_intersections.sql new file mode 100644 index 00000000000..5ac865222fe --- /dev/null +++ b/tests/queries/0_stateless/02670_max_intersections.sql @@ -0,0 +1 @@ +SELECT hex(maxIntersectionsState(*)) FROM VALUES((1, 3), (3, 5)); diff --git a/tests/queries/0_stateless/02671_quantile_fuse_msan.reference b/tests/queries/0_stateless/02671_quantile_fuse_msan.reference new file mode 100644 index 00000000000..f6eca3b9698 --- /dev/null +++ b/tests/queries/0_stateless/02671_quantile_fuse_msan.reference @@ -0,0 +1,2 @@ +1970-01-01 00:00:00 1970-01-01 00:00:00 +1 diff --git a/tests/queries/0_stateless/02671_quantile_fuse_msan.sql b/tests/queries/0_stateless/02671_quantile_fuse_msan.sql new file mode 100644 index 00000000000..efeef0b0ebf --- /dev/null +++ b/tests/queries/0_stateless/02671_quantile_fuse_msan.sql @@ -0,0 +1,5 @@ +SET optimize_syntax_fuse_functions=1; +CREATE TEMPORARY TABLE datetime (`d` DateTime('UTC')); +SELECT quantile(0.1)(d), quantile(0.5)(d) FROM datetime; +INSERT INTO datetime SELECT * FROM generateRandom() LIMIT 10; +SELECT max(cityHash64(*)) > 0 FROM (SELECT quantile(0.1)(d), quantile(0.5)(d) FROM datetime); diff --git a/tests/queries/0_stateless/02672_suspicious_low_cardinality_msan.reference b/tests/queries/0_stateless/02672_suspicious_low_cardinality_msan.reference new file mode 100644 index 00000000000..7dd7c8eece6 --- /dev/null +++ b/tests/queries/0_stateless/02672_suspicious_low_cardinality_msan.reference @@ -0,0 +1,20 @@ +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 +1023 \N \N 0 2147483646 diff --git a/tests/queries/0_stateless/02672_suspicious_low_cardinality_msan.sql b/tests/queries/0_stateless/02672_suspicious_low_cardinality_msan.sql new file mode 100644 index 00000000000..f2841ac8897 --- /dev/null +++ b/tests/queries/0_stateless/02672_suspicious_low_cardinality_msan.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS table1__fuzz_19; + +SET allow_suspicious_low_cardinality_types = 1; +CREATE TABLE table1__fuzz_19 (`id` LowCardinality(UInt16), `v` DateTime64(3, 'UTC')) ENGINE = ReplacingMergeTree(v) PARTITION BY id % 200 ORDER BY id; +INSERT INTO table1__fuzz_19 SELECT number - 205, number FROM numbers(10); +INSERT INTO table1__fuzz_19 SELECT number - 205, number FROM numbers(400, 10); + +SELECT 1023, (((id % -9223372036854775807) = NULL) OR ((id % NULL) = 100) OR ((id % NULL) = 65537)) = ((id % inf) = 9223372036854775806), (id % NULL) = NULL, (id % 3.4028234663852886e38) = 1023, 2147483646 FROM table1__fuzz_19 ORDER BY (((id % 1048577) = 1024) % id) = 1023 DESC NULLS FIRST, id % 2147483646 ASC NULLS FIRST, ((id % 1) = 9223372036854775807) OR ((id % NULL) = 257) DESC NULLS FIRST; + +DROP TABLE table1__fuzz_19; diff --git a/tests/queries/0_stateless/02673_map_hashing_msan.reference b/tests/queries/0_stateless/02673_map_hashing_msan.reference new file mode 100644 index 00000000000..d3fbc1377d4 --- /dev/null +++ b/tests/queries/0_stateless/02673_map_hashing_msan.reference @@ -0,0 +1,6 @@ +4786021384179797717 +5368498105280294197 +42 15687122600100720591 +42 15687122600100720591 +42 15687122600100720591 +\N diff --git a/tests/queries/0_stateless/02673_map_hashing_msan.sql b/tests/queries/0_stateless/02673_map_hashing_msan.sql new file mode 100644 index 00000000000..2fe3620e68c --- /dev/null +++ b/tests/queries/0_stateless/02673_map_hashing_msan.sql @@ -0,0 +1,7 @@ +SELECT cityHash64(map(1, 'Hello'), CAST(materialize('World') AS LowCardinality(String))); +SELECT cityHash64(map(), CAST(materialize('') AS LowCardinality(Nullable(String)))); +SELECT materialize(42) as last_element, cityHash64(map(), CAST(materialize('') AS LowCardinality(Nullable(String))), last_element) from numbers(3); + +SET allow_suspicious_low_cardinality_types = 1; +CREATE TEMPORARY TABLE datetime__fuzz_14 (`d` LowCardinality(Nullable(UInt128))); +SELECT max(mapPopulateSeries(mapPopulateSeries(map(toInt64(1048), toInt64(9223), 3, -2147))), toInt64(1048), map('11', 257, '', NULL), cityHash64(*)) > NULL FROM (SELECT max(cityHash64(mapPopulateSeries(mapPopulateSeries(map(toInt64(1048), toInt64(2147), 655, -2147))), *)) > NULL, map(toInt64(-2147), toInt64(100.0001), -2147, NULL), mapPopulateSeries(map(toInt64(1024), toInt64(1048), 1048, -1)), map(toInt64(256), toInt64(NULL), -1, NULL), quantile(0.0001)(d) FROM datetime__fuzz_14 WITH TOTALS); diff --git a/tests/queries/0_stateless/25339_fill_empty_chunk.reference b/tests/queries/0_stateless/25339_fill_empty_chunk.reference new file mode 100644 index 00000000000..4dd51b52f2f --- /dev/null +++ b/tests/queries/0_stateless/25339_fill_empty_chunk.reference @@ -0,0 +1,11 @@ +1 \N +2 \N +2 \N +2 \N +3 \N +4 \N +5 \N +6 \N +7 \N +8 \N +9 \N diff --git a/tests/queries/0_stateless/25339_fill_empty_chunk.sql b/tests/queries/0_stateless/25339_fill_empty_chunk.sql new file mode 100644 index 00000000000..14ae322d8c9 --- /dev/null +++ b/tests/queries/0_stateless/25339_fill_empty_chunk.sql @@ -0,0 +1,10 @@ +-- this SELECT produces empty chunk in FillingTransform + +SELECT + 2 AS x, + arrayJoin([NULL, NULL, NULL]) +GROUP BY + GROUPING SETS ( + (0), + ([NULL, NULL, NULL])) +ORDER BY x ASC WITH FILL FROM 1 TO 10; diff --git a/tests/queries/0_stateless/25339_like_substring_search_bug.reference b/tests/queries/0_stateless/25339_like_substring_search_bug.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/25339_like_substring_search_bug.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/25339_like_substring_search_bug.sql b/tests/queries/0_stateless/25339_like_substring_search_bug.sql new file mode 100644 index 00000000000..10ce3cdde36 --- /dev/null +++ b/tests/queries/0_stateless/25339_like_substring_search_bug.sql @@ -0,0 +1 @@ +SELECT 'Win\Sys' LIKE '%Win\Sys%'; diff --git a/tests/queries/0_stateless/helpers/02112_clean.sh b/tests/queries/0_stateless/helpers/02112_clean.sh deleted file mode 100755 index 95af0cede9c..00000000000 --- a/tests/queries/0_stateless/helpers/02112_clean.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -FILE=${CURDIR}/../file_02112 -rm "$FILE" diff --git a/tests/queries/0_stateless/helpers/02112_prepare.sh b/tests/queries/0_stateless/helpers/02112_prepare.sh deleted file mode 100755 index c2791b01140..00000000000 --- a/tests/queries/0_stateless/helpers/02112_prepare.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -FILE=${CURDIR}/../file_02112 -echo "drop table if exists t;create table t(i Int32) engine=Memory; insert into t select 1" > "$FILE" diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 00477f0fb8f..ab03da8cc80 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -24,6 +24,7 @@ CapnProto CentOS ClickHouse ClickHouse's +CodeBlock Config ConnectionDetails Contrib @@ -151,6 +152,7 @@ Submodules Subqueries TSVRaw TSan +TabItem TabSeparated TabSeparatedRaw TabSeparatedRawWithNames diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 8436d3378d9..53165d14f96 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -338,6 +338,13 @@ for test_case in "${expect_tests[@]}"; do pattern="^spawn.*CLICKHOUSE_CLIENT_BINARY.*--history_file$" grep -q "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'" fi + + # Otherwise expect_after/expect_before will not bail without stdin attached + # (and actually this is a hack anyway, correct way is to use $any_spawn_id) + pattern="-i \$any_spawn_id timeout" + grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'" + pattern="-i \$any_spawn_id eof" + grep -q -- "$pattern" "$test_case" || echo "Missing '$pattern' in '$test_case'" done # Conflict markers