Merge branch 'master' of github.com:ClickHouse/ClickHouse into split-bc-check

This commit is contained in:
avogar 2023-02-27 13:12:35 +00:00
commit 3411c33fdf
147 changed files with 1707 additions and 427 deletions

View File

@ -29,7 +29,7 @@
* Add two new functions which allow for user-defined keys/seeds with SipHash{64,128}. [#45513](https://github.com/ClickHouse/ClickHouse/pull/45513) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* Allow a three-argument version for table function `format`. close [#45808](https://github.com/ClickHouse/ClickHouse/issues/45808). [#45873](https://github.com/ClickHouse/ClickHouse/pull/45873) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)).
* Add `JodaTime` format support for 'x','w','S'. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. [#46073](https://github.com/ClickHouse/ClickHouse/pull/46073) ([zk_kiger](https://github.com/zk-kiger)).
* Support window function `ntile`.
* Support window function `ntile`. ([lgbo](https://github.com/lgbo-ustc)).
* Add setting `final` to implicitly apply the `FINAL` modifier to every table. [#40945](https://github.com/ClickHouse/ClickHouse/pull/40945) ([Arthur Passos](https://github.com/arthurpassos)).
* Added `arrayPartialSort` and `arrayPartialReverseSort` functions. [#46296](https://github.com/ClickHouse/ClickHouse/pull/46296) ([Joanna Hulboj](https://github.com/jh0x)).
* The new http parameter `client_protocol_version` allows setting a client protocol version for HTTP responses using the Native format. [#40397](https://github.com/ClickHouse/ClickHouse/issues/40397). [#46360](https://github.com/ClickHouse/ClickHouse/pull/46360) ([Geoff Genz](https://github.com/genzgd)).

View File

@ -14,7 +14,7 @@ curl https://clickhouse.com/ | sh
* [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
* [Slack](https://clickhousedb.slack.com/) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.

View File

@ -44,7 +44,7 @@ int NumberParser::parse(const std::string& s, char thSep)
if (tryParse(s, result, thSep))
return result;
else
throw SyntaxException("Not a valid integer", s);
throw SyntaxException("Not a valid integer", "'" + s + "'");
}
@ -60,7 +60,7 @@ unsigned NumberParser::parseUnsigned(const std::string& s, char thSep)
if (tryParseUnsigned(s, result, thSep))
return result;
else
throw SyntaxException("Not a valid unsigned integer", s);
throw SyntaxException("Not a valid unsigned integer", "'" + s + "'");
}
@ -76,7 +76,7 @@ unsigned NumberParser::parseHex(const std::string& s)
if (tryParseHex(s, result))
return result;
else
throw SyntaxException("Not a valid hexadecimal integer", s);
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
}
@ -94,7 +94,7 @@ unsigned NumberParser::parseOct(const std::string& s)
if (tryParseOct(s, result))
return result;
else
throw SyntaxException("Not a valid hexadecimal integer", s);
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
}
@ -112,7 +112,7 @@ Int64 NumberParser::parse64(const std::string& s, char thSep)
if (tryParse64(s, result, thSep))
return result;
else
throw SyntaxException("Not a valid integer", s);
throw SyntaxException("Not a valid integer", "'" + s + "'");
}
@ -128,7 +128,7 @@ UInt64 NumberParser::parseUnsigned64(const std::string& s, char thSep)
if (tryParseUnsigned64(s, result, thSep))
return result;
else
throw SyntaxException("Not a valid unsigned integer", s);
throw SyntaxException("Not a valid unsigned integer", "'" + s + "'");
}
@ -144,7 +144,7 @@ UInt64 NumberParser::parseHex64(const std::string& s)
if (tryParseHex64(s, result))
return result;
else
throw SyntaxException("Not a valid hexadecimal integer", s);
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
}
@ -162,7 +162,7 @@ UInt64 NumberParser::parseOct64(const std::string& s)
if (tryParseOct64(s, result))
return result;
else
throw SyntaxException("Not a valid hexadecimal integer", s);
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
}
@ -180,7 +180,7 @@ double NumberParser::parseFloat(const std::string& s, char decSep, char thSep)
if (tryParseFloat(s, result, decSep, thSep))
return result;
else
throw SyntaxException("Not a valid floating-point number", s);
throw SyntaxException("Not a valid floating-point number", "'" + s + "'");
}
@ -196,7 +196,7 @@ bool NumberParser::parseBool(const std::string& s)
if (tryParseBool(s, result))
return result;
else
throw SyntaxException("Not a valid bool number", s);
throw SyntaxException("Not a valid bool number", "'" + s + "'");
}

View File

@ -60,6 +60,23 @@ elseif (ARCH_AARCH64)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs -Xclang=-target-feature -Xclang=+ldapr -Wno-unused-command-line-argument")
endif ()
# Best-effort check: The build generates and executes intermediate binaries, e.g. protoc and llvm-tablegen. If we build on ARM for ARM
# and the build machine is too old, i.e. doesn't satisfy above modern profile, then these intermediate binaries will not run (dump
# SIGILL). Even if they could run, the build machine wouldn't be able to run the ClickHouse binary. In that case, suggest to run the
# build with the compat profile.
if (OS_LINUX AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)" AND NOT NO_ARMV81_OR_HIGHER)
# CPU features in /proc/cpuinfo and compiler flags don't align :( ... pick some obvious flags contained in the modern but not in the
# legacy profile (full Graviton 3 /proc/cpuinfo is "fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm
# jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs paca pacg dcpodp svei8mm svebf16 i8mm
# bf16 dgh rng")
execute_process(
COMMAND grep -P "^(?=.*atomic)(?=.*ssbs)" /proc/cpuinfo
OUTPUT_VARIABLE FLAGS)
if (NOT FLAGS)
MESSAGE(FATAL_ERROR "The build machine does not satisfy the minimum CPU requirements, try to run cmake with -DNO_ARMV81_OR_HIGHER=1")
endif()
endif()
elseif (ARCH_PPC64LE)
# By Default, build for power8 and up, allow building for power9 and up
# Note that gcc and clang have support for x86 SSE2 intrinsics when building for PowerPC
@ -102,6 +119,22 @@ elseif (ARCH_AMD64)
SET(ENABLE_AVX512_FOR_SPEC_OP 0)
endif()
# Same best-effort check for x86 as above for ARM.
if (OS_LINUX AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64" AND NOT NO_SSE3_OR_HIGHER)
# Test for flags in standard profile but not in NO_SSE3_OR_HIGHER profile.
# /proc/cpuid for Intel Xeon 8124: "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse
# sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl xtopology nonstop_tsc cpuid aperfmperf
# tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c
# rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx
# avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke""
execute_process(
COMMAND grep -P "^(?=.*ssse3)(?=.*sse4_1)(?=.*sse4_2)" /proc/cpuinfo
OUTPUT_VARIABLE FLAGS)
if (NOT FLAGS)
MESSAGE(FATAL_ERROR "The build machine does not satisfy the minimum CPU requirements, try to run cmake with -DNO_SSE3_OR_HIGHER=1")
endif()
endif()
# ClickHouse can be cross-compiled (e.g. on an ARM host for x86) but it is also possible to build ClickHouse on x86 w/o AVX for x86 w/
# AVX. We only check that the compiler can emit certain SIMD instructions, we don't care if the host system is able to run the binary.
# Therefore, use check_cxx_source_compiles (= does the code compile+link?) instead of check_cxx_source_runs (= does the code

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit 5022f30f3e092a54a7c101c335ce5e08769db366
Subproject commit e48aa13f67dc722511b5af33a32ba9b7748176b5

View File

@ -94,6 +94,10 @@ if (ARCH_AMD64 OR ARCH_AARCH64)
add_compile_definitions(TUKLIB_FAST_UNALIGNED_ACCESS=1)
endif ()
if (ARCH_S390X)
add_compile_definitions(WORDS_BIGENDIAN)
endif ()
find_package(Threads REQUIRED)

View File

@ -30,25 +30,10 @@
# - zstd homepage : http://www.zstd.net/
# ################################################################
# Get library version based on information from input content (use regular exp)
function(GetLibraryVersion _content _outputVar1 _outputVar2 _outputVar3)
string(REGEX MATCHALL ".*define ZSTD_VERSION_MAJOR+.* ([0-9]+).*define ZSTD_VERSION_MINOR+.* ([0-9]+).*define ZSTD_VERSION_RELEASE+.* ([0-9]+)" VERSION_REGEX "${_content}")
SET(${_outputVar1} ${CMAKE_MATCH_1} PARENT_SCOPE)
SET(${_outputVar2} ${CMAKE_MATCH_2} PARENT_SCOPE)
SET(${_outputVar3} ${CMAKE_MATCH_3} PARENT_SCOPE)
endfunction()
# Define library directory, where sources and header files are located
SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib")
INCLUDE_DIRECTORIES(BEFORE ${LIBRARY_DIR} "${LIBRARY_DIR}/common")
# Read file content
FILE(READ "${LIBRARY_DIR}/zstd.h" HEADER_CONTENT)
# Parse version
GetLibraryVersion("${HEADER_CONTENT}" LIBVER_MAJOR LIBVER_MINOR LIBVER_RELEASE)
MESSAGE(STATUS "ZSTD VERSION ${LIBVER_MAJOR}.${LIBVER_MINOR}.${LIBVER_RELEASE}")
# cd contrib/zstd/lib
# find . -name '*.c' -or -name '*.S' | grep -vP 'deprecated|legacy' | sort | sed 's/^\./ "${LIBRARY_DIR}/"'
SET(Sources

View File

@ -162,22 +162,59 @@ If you want to change the target table by using `ALTER`, we recommend disabling
## Configuration {#configuration}
Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (`kafka`) and topic-level (`kafka_*`). The global configuration is applied first, and then the topic-level configuration is applied (if it exists).
Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (below `<kafka>`) and topic-level (below `<kafka><kafka_topic>`). The global configuration is applied first, and then the topic-level configuration is applied (if it exists).
``` xml
<!-- Global configuration options for all tables of Kafka engine type -->
<kafka>
<!-- Global configuration options for all tables of Kafka engine type -->
<debug>cgrp</debug>
<auto_offset_reset>smallest</auto_offset_reset>
<!-- Configuration specific to topics "logs" and "stats" -->
<kafka_topic>
<name>logs</name>
<retry_backoff_ms>250</retry_backoff_ms>
<fetch_min_bytes>100000</fetch_min_bytes>
</kafka_topic>
<kafka_topic>
<name>stats</name>
<retry_backoff_ms>400</retry_backoff_ms>
<fetch_min_bytes>50000</fetch_min_bytes>
</kafka_topic>
</kafka>
```
<details markdown="1">
<summary>Example in deprecated syntax</summary>
``` xml
<kafka>
<!-- Global configuration options for all tables of Kafka engine type -->
<debug>cgrp</debug>
<auto_offset_reset>smallest</auto_offset_reset>
</kafka>
<!-- Configuration specific for topic "logs" -->
<!-- Configuration specific to topics "logs" and "stats" -->
<!-- Does NOT support periods in topic names, e.g. "logs.security"> -->
<kafka_logs>
<retry_backoff_ms>250</retry_backoff_ms>
<fetch_min_bytes>100000</fetch_min_bytes>
</kafka_logs>
<kafka_stats>
<retry_backoff_ms>400</retry_backoff_ms>
<fetch_min_bytes>50000</fetch_min_bytes>
</kafka_stats>
```
</details>
For a list of possible configuration options, see the [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Use the underscore (`_`) instead of a dot in the ClickHouse configuration. For example, `check.crcs=true` will be `<check_crcs>true</check_crcs>`.
### Kerberos support {#kafka-kerberos-support}

View File

@ -5,6 +5,10 @@ description: Install ClickHouse
slug: /en/install
---
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import CodeBlock from '@theme/CodeBlock';
# Install ClickHouse
You have three options for getting up and running with ClickHouse:
@ -19,17 +23,27 @@ The quickest and easiest way to get up and running with ClickHouse is to create
## Self-Managed Install
:::tip
For production installs of a specific release version see the [installation options](#available-installation-options) down below.
:::
<Tabs>
<TabItem value="linux" label="Linux" default>
1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable:
```bash
curl https://clickhouse.com/ | sh
```
1. Run the `install` command, which defines a collection of useful symlinks along with the files and folders used by ClickHouse - all of which you can see in the output of the install script:
```bash
sudo ./clickhouse install
```
1. At the end of the install script, you are prompted for a password for the `default` user. Feel free to enter a password, or you can optionally leave it blank:
```response
Creating log directory /var/log/clickhouse-server.
Creating data directory /var/lib/clickhouse.
@ -40,6 +54,7 @@ The quickest and easiest way to get up and running with ClickHouse is to create
Enter password for default user:
```
You should see the following output:
```response
ClickHouse has been successfully installed.
@ -55,6 +70,41 @@ The quickest and easiest way to get up and running with ClickHouse is to create
sudo clickhouse start
```
</TabItem>
<TabItem value="macos" label="macOS">
1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable:
```bash
curl https://clickhouse.com/ | sh
```
1. Run the ClickHouse server:
```bash
./clickhouse server
```
1. Open a new terminal and use the **clickhouse-client** to connect to your service:
```bash
./clickhouse client
```
```response
./clickhouse client
ClickHouse client version 23.2.1.1501 (official build).
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 23.2.1 revision 54461.
local-host :)
```
You are ready to start sending DDL and SQL commands to ClickHouse!
</TabItem>
</Tabs>
:::tip
The [Quick Start](/docs/en/quick-start.mdx/#step-1-get-clickhouse) walks through the steps to download and run ClickHouse, connect to it, and insert data.
:::

View File

@ -3948,3 +3948,71 @@ Default value: `0`.
:::note
Use this setting only for backward compatibility if your use cases depend on old syntax.
:::
## final {#final}
Automatically applies [FINAL](../../sql-reference/statements/select/from/#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from/#final-modifier) is applicable, including joined tables and tables in sub-queries, and
distributed tables.
Possible values:
- 0 - disabled
- 1 - enabled
Default value: `0`.
Example:
```sql
CREATE TABLE test
(
key Int64,
some String
)
ENGINE = ReplacingMergeTree
ORDER BY key;
INSERT INTO test FORMAT Values (1, 'first');
INSERT INTO test FORMAT Values (1, 'second');
SELECT * FROM test;
┌─key─┬─some───┐
│ 1 │ second │
└─────┴────────┘
┌─key─┬─some──┐
│ 1 │ first │
└─────┴───────┘
SELECT * FROM test SETTINGS final = 1;
┌─key─┬─some───┐
│ 1 │ second │
└─────┴────────┘
SET final = 1;
SELECT * FROM test;
┌─key─┬─some───┐
│ 1 │ second │
└─────┴────────┘
```
## asterisk_include_materialized_columns {#asterisk_include_materialized_columns}
Include [MATERIALIZED](../../sql-reference/statements/create/table/#materialized) columns for wildcard query (`SELECT *`).
Possible values:
- 0 - disabled
- 1 - enabled
Default value: `0`.
## asterisk_include_alias_columns {#asterisk_include_alias_columns}
Include [ALIAS](../../sql-reference/statements/create/table/#alias) columns for wildcard query (`SELECT *`).
Possible values:
- 0 - disabled
- 1 - enabled
Default value: `0`.

View File

@ -1,7 +1,7 @@
---
slug: /en/sql-reference/functions/string-replace-functions
sidebar_position: 42
sidebar_label: For Replacing in Strings
sidebar_label: Replacing in Strings
---
# Functions for Searching and Replacing in Strings

View File

@ -1,7 +1,7 @@
---
slug: /en/sql-reference/functions/string-search-functions
sidebar_position: 41
sidebar_label: For Searching in Strings
sidebar_label: Searching in Strings
---
# Functions for Searching in Strings
@ -382,12 +382,12 @@ Checks whether string `haystack` matches the regular expression `pattern`. The p
Returns 1 in case of a match, and 0 otherwise.
Matching is based on UTF-8, e.g. `.` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. The regular expression must not contain null bytes.
If the haystack or pattern contain a sequence of bytes that are not valid UTF-8, the behavior is undefined.
No automatic Unicode normalization is performed, if you need it you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
If the haystack or the pattern are not valid UTF-8, then the behavior is undefined.
No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
Unlike re2's default behavior, `.` matches line breaks. To disable this, prepend the pattern with `(?-s)`.
For patterns to search for substrings in a string, it is better to use LIKE or position, since they work much faster.
For patterns to search for substrings in a string, it is better to use functions [like](#like) or [position](#position) since they work much faster.
## multiMatchAny(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
@ -529,21 +529,25 @@ Result:
## like(haystack, pattern), haystack LIKE pattern operator
Checks whether a string matches a simple regular expression.
The regular expression can contain the metasymbols `%` and `_`.
Checks whether a string matches a LIKE expression.
A LIKE expression contains a mix of normal characters and the following metasymbols:
`%` indicates any quantity of any bytes (including zero characters).
- `%` indicates an arbitrary number of arbitrary characters (including zero characters).
`_` indicates any one byte.
- `_` indicates a single arbitrary character.
Use the backslash (`\`) for escaping metasymbols. See the note on escaping in the description of the match function.
- `\` is for escaping literals `%`, `_` and `\`.
Matching is based on UTF-8, e.g. `_` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes.
If the haystack or pattern contain a sequence of bytes that are not valid UTF-8, then the behavior is undefined.
No automatic Unicode normalization is performed, if you need it you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
If the haystack or the pattern are not valid UTF-8, then the behavior is undefined.
No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
For regular expressions like `%needle%`, the code is more optimal and works as fast as the `position` function.
For other regular expressions, the code is the same as for the match function.
To match against literals `%`, `_` and `/` (which are LIKE metacharacters), prepend them with a backslash, i.e. `\%`, `\_` and `\\`.
The backslash loses its special meaning, i.e. is interpreted literally, if it prepends a character different than `%`, `_` or `\`.
Note that ClickHouse requires backslashes in strings [to be quoted as well](../syntax.md#string), so you would actually need to write `\\%`, `\\_` and `\\\\`.
For patterns of the form `%needle%`, the function is as fast as the `position` function.
Other LIKE expressions are internally converted to a regular expression and executed with a performance similar to function `match`.
## notLike(haystack, pattern), haystack NOT LIKE pattern operator

View File

@ -1,8 +1,8 @@
---
slug: /en/sql-reference/functions/tuple-map-functions
sidebar_position: 46
sidebar_label: Working with maps
title: "Functions for maps"
sidebar_label: Maps
title: "Functions for Maps"
---
## map
@ -440,7 +440,7 @@ mapApply(func, map)
**Parameters**
- `func` - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `map` — [Map](../../sql-reference/data-types/map.md).
**Returned value**
@ -480,7 +480,7 @@ mapFilter(func, map)
**Parameters**
- `func` - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `map` — [Map](../../sql-reference/data-types/map.md).
**Returned value**

View File

@ -127,6 +127,26 @@ Default expressions may be defined as an arbitrary expression from table constan
Normal default value. If the INSERT query does not specify the corresponding column, it will be filled in by computing the corresponding expression.
Example:
```sql
CREATE OR REPLACE TABLE test
(
id UInt64,
updated_at DateTime DEFAULT now(),
updated_at_date Date DEFAULT toDate(updated_at)
)
ENGINE = MergeTree
ORDER BY id;
INSERT INTO test (id) Values (1);
SELECT * FROM test;
┌─id─┬──────────updated_at─┬─updated_at_date─┐
│ 1 │ 2023-02-24 17:06:46 │ 2023-02-24 │
└────┴─────────────────────┴─────────────────┘
```
### MATERIALIZED
`MATERIALIZED expr`
@ -135,6 +155,36 @@ Materialized expression. Such a column cant be specified for INSERT, because
For an INSERT without a list of columns, these columns are not considered.
In addition, this column is not substituted when using an asterisk in a SELECT query. This is to preserve the invariant that the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
Example:
```sql
CREATE OR REPLACE TABLE test
(
id UInt64,
updated_at DateTime MATERIALIZED now(),
updated_at_date Date MATERIALIZED toDate(updated_at)
)
ENGINE = MergeTree
ORDER BY id;
INSERT INTO test Values (1);
SELECT * FROM test;
┌─id─┐
│ 1 │
└────┘
SELECT id, updated_at, updated_at_date FROM test;
┌─id─┬──────────updated_at─┬─updated_at_date─┐
│ 1 │ 2023-02-24 17:08:08 │ 2023-02-24 │
└────┴─────────────────────┴─────────────────┘
SELECT * FROM test SETTINGS asterisk_include_materialized_columns=1;
┌─id─┬──────────updated_at─┬─updated_at_date─┐
│ 1 │ 2023-02-24 17:08:08 │ 2023-02-24 │
└────┴─────────────────────┴─────────────────┘
```
### EPHEMERAL
`EPHEMERAL [expr]`
@ -142,6 +192,34 @@ In addition, this column is not substituted when using an asterisk in a SELECT q
Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required.
INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
Example:
```sql
CREATE OR REPLACE TABLE test
(
id UInt64,
unhexed String EPHEMERAL,
hexed FixedString(4) DEFAULT unhex(unhexed)
)
ENGINE = MergeTree
ORDER BY id
INSERT INTO test (id, unhexed) Values (1, '5a90b714');
SELECT
id,
hexed,
hex(hexed)
FROM test
FORMAT Vertical;
Row 1:
──────
id: 1
hexed: Z<><5A>
hex(hexed): 5A90B714
```
### ALIAS
`ALIAS expr`
@ -156,6 +234,29 @@ If you add a new column to a table but later change its default expression, the
It is not possible to set default values for elements in nested data structures.
```sql
CREATE OR REPLACE TABLE test
(
id UInt64,
size_bytes Int64,
size String Alias formatReadableSize(size_bytes)
)
ENGINE = MergeTree
ORDER BY id;
INSERT INTO test Values (1, 4678899);
SELECT id, size_bytes, size FROM test;
┌─id─┬─size_bytes─┬─size─────┐
│ 1 │ 4678899 │ 4.46 MiB │
└────┴────────────┴──────────┘
SELECT * FROM test SETTINGS asterisk_include_alias_columns=1;
┌─id─┬─size_bytes─┬─size─────┐
│ 1 │ 4678899 │ 4.46 MiB │
└────┴────────────┴──────────┘
```
## Primary Key
You can define a [primary key](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries) when creating a table. Primary key can be specified in two ways:

View File

@ -36,6 +36,8 @@ Queries that use `FINAL` are executed slightly slower than similar queries that
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine havet happened yet and deal with it by applying aggregation (for example, to discard duplicates).
`FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile.
## Implementation Details
If the `FROM` clause is omitted, data will be read from the `system.one` table.

View File

@ -86,10 +86,10 @@ Examples: `1`, `10_000_000`, `0xffff_ffff`, `18446744073709551615`, `0xDEADBEEF`
String literals must be enclosed in single quotes, double quotes are not supported.
Escaping works either
- in SQL-style based on a preceding single quote where the single-quote character `'` (and only this character) can be escaped as `''`, or
- in C-style based on a preceding backslash with the following supported escape sequences: `\\`, `\'`, `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. The backslash loses its special meaning and will be interpreted literally if it precedes characters different than the listed ones.
- using a preceding single quote where the single-quote character `'` (and only this character) can be escaped as `''`, or
- using a preceding backslash with the following supported escape sequences: `\\`, `\'`, `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. The backslash loses its special meaning, i.e. will be interpreted literally, if it precedes characters different than the listed ones.
In string literals, you need to escape at least `'` and `\` using escape codes `''` (or: `\'`) and `\\`.
In string literals, you need to escape at least `'` and `\` using escape codes `\'` (or: `''`) and `\\`.
### Compound

View File

@ -567,7 +567,7 @@ SELECT
ts,
value,
round(avg(value) OVER (PARTITION BY metric ORDER BY toDate(ts)
Range BETWEEN 10 PRECEDING AND CURRENT ROW),2) moving_avg_10_days_temp
Range BETWEEN 10 PRECEDING AND CURRENT ROW),2) AS moving_avg_10_days_temp
FROM sensors
ORDER BY
metric ASC,

View File

@ -1,5 +1,8 @@
#include "LocalServer.h"
#include <sys/resource.h>
#include <Common/logger_useful.h>
#include <base/errnoToString.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/String.h>
#include <Poco/Logger.h>
@ -179,9 +182,9 @@ void LocalServer::tryInitPath()
parent_folder = std::filesystem::temp_directory_path();
}
catch (const fs::filesystem_error& e)
catch (const fs::filesystem_error & e)
{
// tmp folder don't exists? misconfiguration? chroot?
// The tmp folder doesn't exist? Is it a misconfiguration? Or chroot?
LOG_DEBUG(log, "Can not get temporary folder: {}", e.what());
parent_folder = std::filesystem::current_path();
@ -390,6 +393,21 @@ try
std::cout << std::fixed << std::setprecision(3);
std::cerr << std::fixed << std::setprecision(3);
/// Try to increase limit on number of open files.
{
rlimit rlim;
if (getrlimit(RLIMIT_NOFILE, &rlim))
throw Poco::Exception("Cannot getrlimit");
if (rlim.rlim_cur < rlim.rlim_max)
{
rlim.rlim_cur = config().getUInt("max_open_files", static_cast<unsigned>(rlim.rlim_max));
int rc = setrlimit(RLIMIT_NOFILE, &rlim);
if (rc != 0)
std::cerr << fmt::format("Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()) << '\n';
}
}
#if defined(FUZZING_MODE)
static bool first_time = true;
if (first_time)

View File

@ -6,7 +6,6 @@
#include <sys/types.h>
#include <pwd.h>
#include <unistd.h>
#include <Poco/Version.h>
#include <Poco/Net/HTTPServer.h>
#include <Poco/Net/NetException.h>
#include <Poco/Util/HelpFormatter.h>

View File

@ -63,6 +63,9 @@ static constexpr const char * getNameByTrait()
template <typename T>
struct GroupArraySamplerData
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
@ -97,6 +100,9 @@ struct GroupArrayNumericData;
template <typename T>
struct GroupArrayNumericData<T, false>
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;

View File

@ -32,6 +32,9 @@ namespace ErrorCodes
template <typename T>
struct MovingData
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
using Accumulator = T;
/// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena

View File

@ -117,7 +117,21 @@ public:
const auto & value = this->data(place).value;
size_t size = value.size();
writeVarUInt(size, buf);
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
/// In this version, pairs were serialized with padding.
/// We must ensure that padding bytes are zero-filled.
static_assert(offsetof(typename MaxIntersectionsData<PointType>::Value, first) == 0);
static_assert(offsetof(typename MaxIntersectionsData<PointType>::Value, second) > 0);
char zero_padding[offsetof(typename MaxIntersectionsData<PointType>::Value, second) - sizeof(value[0].first)]{};
for (size_t i = 0; i < size; ++i)
{
writePODBinary(value[i].first, buf);
writePODBinary(zero_padding, buf);
writePODBinary(value[i].second, buf);
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override

View File

@ -432,6 +432,7 @@ ReplxxLineReader::ReplxxLineReader(
};
rx.bind_key(Replxx::KEY::control('R'), interactive_history_search);
#endif
/// Rebind regular incremental search to C-T.
///
@ -443,7 +444,6 @@ ReplxxLineReader::ReplxxLineReader(
uint32_t reverse_search = Replxx::KEY::control('R');
return rx.invoke(Replxx::ACTION::HISTORY_INCREMENTAL_SEARCH, reverse_search);
});
#endif
}
ReplxxLineReader::~ReplxxLineReader()

View File

@ -747,7 +747,7 @@ namespace
*/
template<typename IntType>
requires (std::is_same_v<IntType, Int32> || std::is_same_v<IntType, UInt32>)
void replicateSSE42Int32(const IntType * __restrict data, IntType * __restrict result_data, const IColumn::Offsets & offsets)
void replicateSSE2Int32(const IntType * __restrict data, IntType * __restrict result_data, const IColumn::Offsets & offsets)
{
const IntType * data_copy_begin_ptr = nullptr;
size_t offsets_size = offsets.size();
@ -842,7 +842,7 @@ ColumnPtr ColumnVector<T>::replicate(const IColumn::Offsets & offsets) const
#ifdef __SSE2__
if constexpr (std::is_same_v<T, UInt32>)
{
replicateSSE42Int32(getData().data(), res->getData().data(), offsets);
replicateSSE2Int32(getData().data(), res->getData().data(), offsets);
return res;
}
#endif

View File

@ -533,9 +533,16 @@ void AsynchronousMetrics::update(TimePoint update_time)
AsynchronousMetricValues new_values;
auto current_time = std::chrono::system_clock::now();
auto time_after_previous_update [[maybe_unused]] = current_time - previous_update_time;
auto time_after_previous_update = current_time - previous_update_time;
previous_update_time = update_time;
double update_interval = 0.;
if (first_run)
update_interval = update_period.count();
else
update_interval = std::chrono::duration_cast<std::chrono::microseconds>(time_after_previous_update).count() / 1e6;
new_values["AsynchronousMetricsUpdateInterval"] = { update_interval, "Metrics update interval" };
/// This is also a good indicator of system responsiveness.
new_values["Jitter"] = { std::chrono::duration_cast<std::chrono::nanoseconds>(current_time - update_time).count() / 1e9,
"The difference in time the thread for calculation of the asynchronous metrics was scheduled to wake up and the time it was in fact, woken up."

View File

@ -237,6 +237,7 @@ void ThreadStatus::setFatalErrorCallback(std::function<void()> callback)
void ThreadStatus::onFatalError()
{
std::lock_guard lock(thread_group->mutex);
if (fatal_error_callback)
fatal_error_callback();
}

View File

@ -134,6 +134,8 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
}
static std::atomic<bool> fatal_error_printed{false};
/** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log.
*/
static void signalHandler(int sig, siginfo_t * info, void * context)
@ -159,7 +161,16 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
if (sig != SIGTSTP) /// This signal is used for debugging.
{
/// The time that is usually enough for separate thread to print info into log.
sleepForSeconds(20); /// FIXME: use some feedback from threads that process stacktrace
/// Under MSan full stack unwinding with DWARF info about inline functions takes 101 seconds in one case.
for (size_t i = 0; i < 300; ++i)
{
/// We will synchronize with the thread printing the messages with an atomic variable to finish earlier.
if (fatal_error_printed)
break;
/// This coarse method of synchronization is perfectly ok for fatal signals.
sleepForSeconds(1);
}
call_default_signal_handler(sig);
}
@ -309,8 +320,10 @@ private:
}
if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
{
DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace);
}
}
std::string signal_description = "Unknown signal";
@ -407,6 +420,8 @@ private:
/// When everything is done, we will try to send these error messages to client.
if (thread_ptr)
thread_ptr->onFatalError();
fatal_error_printed = true;
}
};

View File

@ -64,7 +64,7 @@ static DataTypePtr create(const ASTPtr & arguments)
return std::make_shared<DataTypeDateTime>();
const auto scale = getArgument<UInt64, ArgumentKind::Optional>(arguments, 0, "scale", "DateTime");
const auto timezone = getArgument<String, ArgumentKind::Optional>(arguments, !!scale, "timezone", "DateTime");
const auto timezone = getArgument<String, ArgumentKind::Optional>(arguments, scale ? 1 : 0, "timezone", "DateTime");
if (!scale && !timezone)
throw Exception::createDeprecated(getExceptionMessage(" has wrong type: ", 0, "scale", "DateTime", Field::Types::Which::UInt64),

View File

@ -1112,7 +1112,9 @@ private:
{
ToType h;
if constexpr (std::endian::native == std::endian::little)
{
h = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
}
else
{
char tmp_buffer[sizeof(vec_from[i])];
@ -1131,7 +1133,9 @@ private:
ToType h;
if constexpr (std::endian::native == std::endian::little)
{
h = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
}
else
{
char tmp_buffer[sizeof(value)];
@ -1271,7 +1275,7 @@ private:
{
/// NOTE: here, of course, you can do without the materialization of the column.
ColumnPtr full_column = col_from_const->convertToFullColumn();
executeArray<first>(key, type, &*full_column, vec_to);
executeArray<first>(key, type, full_column.get(), vec_to);
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
@ -1283,6 +1287,10 @@ private:
{
WhichDataType which(from_type);
if (icolumn->size() != vec_to.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}",
icolumn->getName(), icolumn->size(), vec_to.size(), getName());
if (which.isUInt8()) executeIntType<UInt8, first>(key, icolumn, vec_to);
else if (which.isUInt16()) executeIntType<UInt16, first>(key, icolumn, vec_to);
else if (which.isUInt32()) executeIntType<UInt32, first>(key, icolumn, vec_to);
@ -1343,10 +1351,9 @@ private:
const auto & type_map = assert_cast<const DataTypeMap &>(*type);
executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
}
else if (const auto * const_map = checkAndGetColumnConstData<ColumnMap>(column))
else if (const auto * const_map = checkAndGetColumnConst<ColumnMap>(column))
{
const auto & type_map = assert_cast<const DataTypeMap &>(*type);
executeForArgument(key, type_map.getNestedType().get(), const_map->getNestedColumnPtr().get(), vec_to, is_first);
executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
}
else
{
@ -1382,8 +1389,7 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
size_t rows = input_rows_count;
auto col_to = ColumnVector<ToType>::create(rows);
auto col_to = ColumnVector<ToType>::create(input_rows_count);
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
@ -1395,7 +1401,7 @@ public:
if (arguments.size() <= first_data_argument)
{
/// Return a fixed random-looking magic number when input is empty
vec_to.assign(rows, static_cast<ToType>(0xe28dbde7fe22e41c));
vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
}
KeyType key{};

View File

@ -26,6 +26,7 @@
# pragma GCC diagnostic pop
#endif
namespace DB
{
@ -66,12 +67,12 @@ ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
if (!low_cardinality_type)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Incompatible type for low cardinality column: {}",
"Incompatible type for LowCardinality column: {}",
column.type->getName());
if (can_be_executed_on_default_arguments)
{
/// Normal case, when function can be executed on values's default.
/// Normal case, when function can be executed on values' default.
column.column = low_cardinality_column->getDictionary().getNestedColumn();
indexes = low_cardinality_column->getIndexesPtr();
}
@ -280,6 +281,7 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
auto res = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, dictionary_type, new_input_rows_count, dry_run);
bool res_is_constant = isColumnConst(*res);
auto keys = res_is_constant
? res->cloneResized(1)->convertToFullColumnIfConst()
: res;

View File

@ -23,9 +23,10 @@ namespace ErrorCodes
namespace impl
{
/// Is the [I]LIKE expression reduced to finding a substring in a string?
/// Is the [I]LIKE expression equivalent to a substring search?
inline bool likePatternIsSubstring(std::string_view pattern, String & res)
{
/// TODO: ignore multiple leading or trailing %
if (pattern.size() < 2 || !pattern.starts_with('%') || !pattern.ends_with('%'))
return false;
@ -45,10 +46,26 @@ inline bool likePatternIsSubstring(std::string_view pattern, String & res)
case '\\':
++pos;
if (pos == end)
/// pattern ends with \% --> trailing % is to be taken literally and pattern doesn't qualify for substring search
return false;
else
{
switch (*pos)
{
/// Known LIKE escape sequences:
case '%':
case '_':
case '\\':
res += *pos;
break;
/// For all other escape sequences, the backslash loses its special meaning
default:
res += '\\';
res += *pos;
break;
}
}
break;
default:
res += *pos;
break;

View File

@ -75,6 +75,7 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
Pos dot_pos = nullptr;
Pos colon_pos = nullptr;
bool has_sub_delims = false;
bool has_at_symbol = false;
bool has_terminator_after_colon = false;
const auto * start_of_host = pos;
@ -97,25 +98,35 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
case '@': /// myemail@gmail.com
if (has_terminator_after_colon) return std::string_view{};
if (has_at_symbol) goto done;
has_sub_delims = false;
has_at_symbol = true;
start_of_host = pos + 1;
break;
case ';':
case '=':
case '&':
case '~':
case '%':
/// Symbols above are sub-delims in RFC3986 and should be
/// allowed for userinfo (named identification here).
///
/// NOTE: that those symbols is allowed for reg-name (host)
/// too, but right now host parsing looks more like in
/// RFC1034 (in other words domains that are allowed to be
/// registered).
has_sub_delims = true;
continue;
case ' ': /// restricted symbols in whole URL
case '\t':
case '<':
case '>':
case '%':
case '{':
case '}':
case '|':
case '\\':
case '^':
case '~':
case '[':
case ']':
case ';':
case '=':
case '&':
if (colon_pos == nullptr)
return std::string_view{};
else
@ -124,6 +135,8 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
}
done:
if (has_sub_delims)
return std::string_view{};
if (!has_at_symbol)
pos = colon_pos ? colon_pos : pos;
return checkAndReturnHost(pos, dot_pos, start_of_host);

View File

@ -7,6 +7,7 @@
namespace DB
{
/// NOTE: Implementation is not RFC3986 compatible
struct ExtractNetloc
{
/// We use the same as domain function
@ -67,6 +68,7 @@ struct ExtractNetloc
/// Now pos points to the first byte after scheme (if there is).
bool has_identification = false;
Pos hostname_end = end;
Pos question_mark_pos = end;
Pos slash_pos = end;
Pos start_of_host = pos;
@ -90,25 +92,39 @@ struct ExtractNetloc
return std::string_view(start_of_host, pos - start_of_host);
case '@': /// foo:bar@example.ru
has_identification = true;
hostname_end = end;
break;
case ';':
case '=':
case '&':
case '~':
case '%':
/// Symbols above are sub-delims in RFC3986 and should be
/// allowed for userinfo (named identification here).
///
/// NOTE: that those symbols is allowed for reg-name (host)
/// too, but right now host parsing looks more like in
/// RFC1034 (in other words domains that are allowed to be
/// registered).
if (!has_identification)
{
hostname_end = pos;
break;
}
[[fallthrough]];
case ' ': /// restricted symbols in whole URL
case '\t':
case '<':
case '>':
case '%':
case '{':
case '}':
case '|':
case '\\':
case '^':
case '~':
case '[':
case ']':
case ';':
case '=':
case '&':
return pos > start_of_host
? std::string_view(start_of_host, std::min(std::min(pos - 1, question_mark_pos), slash_pos) - start_of_host)
? std::string_view(start_of_host, std::min(std::min(pos, question_mark_pos), slash_pos) - start_of_host)
: std::string_view();
}
}
@ -116,7 +132,7 @@ struct ExtractNetloc
if (has_identification)
return std::string_view(start_of_host, pos - start_of_host);
else
return std::string_view(start_of_host, std::min(std::min(pos, question_mark_pos), slash_pos) - start_of_host);
return std::string_view(start_of_host, std::min(std::min(std::min(pos, question_mark_pos), slash_pos), hostname_end) - start_of_host);
}
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)

View File

@ -48,12 +48,12 @@ bool SlowWithHyperscanChecker::isSlowOneRepeat(std::string_view regexp)
re2_st::StringPiece haystack(regexp.data(), regexp.size());
re2_st::StringPiece matches[2];
size_t start_pos = 0;
while (start_pos < regexp.size())
while (start_pos < haystack.size())
{
if (searcher_one_repeat.Match(haystack, start_pos, regexp.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 2))
if (searcher_one_repeat.Match(haystack, start_pos, haystack.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 2))
{
const auto & match = matches[0];
start_pos += (matches[0].data() - haystack.data()) + match.length(); // fwd by prefix + match length
start_pos = (matches[0].data() - haystack.data()) + match.size(); // new start pos = prefix before match + match length
const auto & submatch = matches[1];
if (isLargerThanFifty({submatch.data(), submatch.size()}))
return true;
@ -70,12 +70,12 @@ bool SlowWithHyperscanChecker::isSlowTwoRepeats(std::string_view regexp)
re2_st::StringPiece haystack(regexp.data(), regexp.size());
re2_st::StringPiece matches[3];
size_t start_pos = 0;
while (start_pos < regexp.size())
while (start_pos < haystack.size())
{
if (searcher_two_repeats.Match(haystack, start_pos, regexp.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 3))
if (searcher_two_repeats.Match(haystack, start_pos, haystack.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 3))
{
const auto & match = matches[0];
start_pos += (matches[0].data() - haystack.data()) + match.length(); // fwd by prefix + match length
start_pos = (matches[0].data() - haystack.data()) + match.size(); // new start pos = prefix before match + match length
const auto & submatch1 = matches[1];
const auto & submatch2 = matches[2];
if (isLargerThanFifty({submatch1.data(), submatch1.size()})

View File

@ -21,7 +21,12 @@ inline String likePatternToRegexp(std::string_view pattern)
const char * const end = pattern.begin() + pattern.size();
if (pos < end && *pos == '%')
++pos;
/// Eat leading %
while (++pos < end)
{
if (*pos != '%')
break;
}
else
res = "^";
@ -29,7 +34,18 @@ inline String likePatternToRegexp(std::string_view pattern)
{
switch (*pos)
{
case '^': case '$': case '.': case '[': case '|': case '(': case ')': case '?': case '*': case '+': case '{':
/// Quote characters which have a special meaning in re2
case '^':
case '$':
case '.':
case '[':
case '|':
case '(':
case ')':
case '?':
case '*':
case '+':
case '{':
res += '\\';
res += *pos;
break;
@ -44,22 +60,23 @@ inline String likePatternToRegexp(std::string_view pattern)
break;
case '\\':
if (pos + 1 == end)
throw Exception(ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE, "Invalid escape sequence at the end of LIKE pattern");
/// Known escape sequences.
if (pos[1] == '%' || pos[1] == '_')
throw Exception(ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE, "Invalid escape sequence at the end of LIKE pattern '{}'", pattern);
switch (pos[1])
{
/// Interpret quoted LIKE metacharacters %, _ and \ as literals:
case '%':
case '_':
res += pos[1];
++pos;
}
else if (pos[1] == '\\')
{
res += "\\\\";
break;
case '\\':
res += "\\\\"; /// backslash has a special meaning in re2 --> quote it
++pos;
}
else
{
/// Unknown escape sequence treated literally: as backslash and the following character.
break;
/// Unknown escape sequence treated literally: as backslash (which must be quoted in re2) + the following character
default:
res += "\\\\";
break;
}
break;
default:

View File

@ -332,7 +332,7 @@ public:
}
size_t col_key_size = sub_map_column->size();
auto column = is_const? ColumnConst::create(std::move(sub_map_column), std::move(col_key_size)) : std::move(sub_map_column);
auto column = is_const ? ColumnConst::create(std::move(sub_map_column), std::move(col_key_size)) : std::move(sub_map_column);
ColumnsWithTypeAndName new_arguments =
{

View File

@ -27,6 +27,7 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int CANNOT_DLOPEN;
extern const int LOGICAL_ERROR;
}
@ -174,6 +175,10 @@ public:
if (!handle)
throw Exception(ErrorCodes::CANNOT_DLOPEN, "Cannot dlopen: ({})", dlerror()); // NOLINT(concurrency-mt-unsafe) // MT-Safe on Linux, see man dlerror
}
else if (mode == "logical error")
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: trap");
}
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown trap mode");
}

View File

@ -224,9 +224,9 @@ namespace detail
enum class InitializeError
{
RETRIABLE_ERROR,
RETRYABLE_ERROR,
/// If error is not retriable, `exception` variable must be set.
NON_RETRIABLE_ERROR,
NON_RETRYABLE_ERROR,
/// Allows to skip not found urls for globs
SKIP_NOT_FOUND_URL,
NONE,
@ -398,7 +398,7 @@ namespace detail
}
else if (!isRetriableError(http_status))
{
initialization_error = InitializeError::NON_RETRIABLE_ERROR;
initialization_error = InitializeError::NON_RETRYABLE_ERROR;
exception = std::current_exception();
}
else
@ -409,7 +409,7 @@ namespace detail
}
/**
* Throws if error is retriable, otherwise sets initialization_error = NON_RETRIABLE_ERROR and
* Throws if error is retryable, otherwise sets initialization_error = NON_RETRYABLE_ERROR and
* saves exception into `exception` variable. In case url is not found and skip_not_found_url == true,
* sets initialization_error = SKIP_NOT_FOUND_URL, otherwise throws.
*/
@ -453,9 +453,9 @@ namespace detail
/// Retry 200OK
if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_OK)
initialization_error = InitializeError::RETRIABLE_ERROR;
initialization_error = InitializeError::RETRYABLE_ERROR;
else
initialization_error = InitializeError::NON_RETRIABLE_ERROR;
initialization_error = InitializeError::NON_RETRYABLE_ERROR;
return;
}
@ -544,7 +544,7 @@ namespace detail
{
initialize();
if (initialization_error == InitializeError::NON_RETRIABLE_ERROR)
if (initialization_error == InitializeError::NON_RETRYABLE_ERROR)
{
assert(exception);
break;
@ -553,7 +553,7 @@ namespace detail
{
return false;
}
else if (initialization_error == InitializeError::RETRIABLE_ERROR)
else if (initialization_error == InitializeError::RETRYABLE_ERROR)
{
LOG_ERROR(
log,
@ -582,8 +582,11 @@ namespace detail
}
catch (const Poco::Exception & e)
{
/**
* Retry request unconditionally if nothing has been read yet.
/// Too many open files - non-retryable.
if (e.code() == POCO_EMFILE)
throw;
/** Retry request unconditionally if nothing has been read yet.
* Otherwise if it is GET method retry with range header.
*/
bool can_retry_request = !offset_from_begin_pos || method == Poco::Net::HTTPRequest::HTTP_GET;

View File

@ -1946,6 +1946,9 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
}
auto conjunction = getConjunctionNodes(predicate, allowed_nodes);
if (conjunction.rejected.size() == 1 && WhichDataType{conjunction.rejected.front()->result_type}.isFloat())
return nullptr;
auto actions = cloneActionsForConjunction(conjunction.allowed, all_inputs);
if (!actions)
return nullptr;
@ -2011,10 +2014,12 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
node.children.swap(new_children);
*predicate = std::move(node);
}
else
else if (!WhichDataType{new_children.front()->result_type}.isFloat())
{
/// If type is different, cast column.
/// This case is possible, cause AND can use any numeric type as argument.
/// But casting floats to UInt8 or Bool produces different results.
/// so we can't apply this optimization to them.
Node node;
node.type = ActionType::COLUMN;
node.result_name = predicate->result_type->getName();
@ -2036,8 +2041,20 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
else
{
/// Predicate is function AND, which still have more then one argument.
/// Or there is only one argument that is a float and we can't just
/// remove the AND.
/// Just update children and rebuild it.
predicate->children.swap(new_children);
if (WhichDataType{predicate->children.front()->result_type}.isFloat())
{
Node node;
node.type = ActionType::COLUMN;
node.result_name = "1";
node.column = DataTypeUInt8().createColumnConst(0, 1u);
node.result_type = std::make_shared<DataTypeUInt8>();
const auto * const_col = &nodes.emplace_back(std::move(node));
predicate->children.emplace_back(const_col);
}
auto arguments = prepareFunctionArguments(predicate->children);
FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());

View File

@ -147,6 +147,7 @@ void DatabaseCatalog::initializeAndLoadTemporaryDatabase()
unused_dir_hide_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_hide_timeout_sec", unused_dir_hide_timeout_sec);
unused_dir_rm_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_rm_timeout_sec", unused_dir_rm_timeout_sec);
unused_dir_cleanup_period_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_cleanup_period_sec", unused_dir_cleanup_period_sec);
drop_error_cooldown_sec = getContext()->getConfigRef().getInt64("database_catalog_drop_error_cooldown_sec", drop_error_cooldown_sec);
auto db_for_temporary_and_external_tables = std::make_shared<DatabaseMemory>(TEMPORARY_DATABASE, getContext());
attachDatabase(TEMPORARY_DATABASE, db_for_temporary_and_external_tables);

View File

@ -279,7 +279,6 @@ private:
bool maybeRemoveDirectory(const String & disk_name, const DiskPtr & disk, const String & unused_dir);
static constexpr size_t reschedule_time_ms = 100;
static constexpr time_t drop_error_cooldown_sec = 5;
mutable std::mutex databases_mutex;
@ -326,6 +325,9 @@ private:
time_t unused_dir_rm_timeout_sec = default_unused_dir_rm_timeout_sec;
static constexpr time_t default_unused_dir_cleanup_period_sec = 24 * 60 * 60; /// 1 day
time_t unused_dir_cleanup_period_sec = default_unused_dir_cleanup_period_sec;
static constexpr time_t default_drop_error_cooldown_sec = 5;
time_t drop_error_cooldown_sec = default_drop_error_cooldown_sec;
};
/// This class is useful when creating a table or database.

View File

@ -118,6 +118,15 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
alias_node->checkSize(data.settings.max_expanded_ast_elements);
ast = alias_node->clone();
ast->setAlias(node_alias);
/// If the cloned AST was finished, this one should also be considered finished
if (data.finished_asts.contains(alias_node))
data.finished_asts[ast] = ast;
/// If we had an alias for node_alias, point it instead to the new node so we don't have to revisit it
/// on subsequent calls
if (auto existing_alias = data.aliases.find(node_alias); existing_alias != data.aliases.end())
existing_alias->second = ast;
}
}
else
@ -127,6 +136,15 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
auto alias_name = ast->getAliasOrColumnName();
ast = alias_node->clone();
ast->setAlias(alias_name);
/// If the cloned AST was finished, this one should also be considered finished
if (data.finished_asts.contains(alias_node))
data.finished_asts[ast] = ast;
/// If we had an alias for node_alias, point it instead to the new node so we don't have to revisit it
/// on subsequent calls
if (auto existing_alias = data.aliases.find(node_alias); existing_alias != data.aliases.end())
existing_alias->second = ast;
}
}
}

View File

@ -362,11 +362,14 @@ void ServerAsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_tim
const auto time_after_previous_update = current_time - heavy_metric_previous_update_time;
const bool update_heavy_metric = time_after_previous_update >= heavy_metric_update_period || first_run;
Stopwatch watch;
if (update_heavy_metric)
{
heavy_metric_previous_update_time = update_time;
Stopwatch watch;
if (first_run)
heavy_update_interval = heavy_metric_update_period.count();
else
heavy_update_interval = std::chrono::duration_cast<std::chrono::microseconds>(time_after_previous_update).count() / 1e6;
/// Test shows that listing 100000 entries consuming around 0.15 sec.
updateDetachedPartsStats();
@ -390,7 +393,9 @@ void ServerAsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_tim
watch.elapsedSeconds());
}
new_values["AsynchronousHeavyMetricsCalculationTimeSpent"] = { watch.elapsedSeconds(), "Time in seconds spent for calculation of asynchronous heavy (tables related) metrics (this is the overhead of asynchronous metrics)." };
new_values["AsynchronousHeavyMetricsUpdateInterval"] = { heavy_update_interval, "Heavy (tables related) metrics update interval" };
new_values["NumberOfDetachedParts"] = { detached_parts_stats.count, "The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed." };
new_values["NumberOfDetachedByUserParts"] = { detached_parts_stats.detached_by_user, "The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed." };

View File

@ -21,6 +21,7 @@ private:
const Duration heavy_metric_update_period;
TimePoint heavy_metric_previous_update_time;
double heavy_update_interval = 0.;
struct DetachedPartsStats
{

View File

@ -1276,6 +1276,12 @@ void executeQuery(
std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete, &istr);
auto & pipeline = streams.pipeline;
QueryResultDetails result_details
{
.query_id = context->getClientInfo().current_query_id,
.timezone = DateLUT::instance().getTimeZone(),
};
std::unique_ptr<WriteBuffer> compressed_buffer;
try
{
@ -1334,9 +1340,8 @@ void executeQuery(
out->onProgress(progress);
});
if (set_result_details)
set_result_details(
context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone());
result_details.content_type = out->getContentType();
result_details.format = format_name;
pipeline.complete(std::move(out));
}
@ -1345,6 +1350,9 @@ void executeQuery(
pipeline.setProgressCallback(context->getProgressCallback());
}
if (set_result_details)
set_result_details(result_details);
if (pipeline.initialized())
{
CompletedPipelineExecutor executor(pipeline);

View File

@ -11,7 +11,15 @@ namespace DB
class ReadBuffer;
class WriteBuffer;
using SetResultDetailsFunc = std::function<void(const String &, const String &, const String &, const String &)>;
struct QueryResultDetails
{
String query_id;
std::optional<String> content_type;
std::optional<String> format;
std::optional<String> timezone;
};
using SetResultDetailsFunc = std::function<void(const QueryResultDetails &)>;
/// Parse and execute a query.
void executeQuery(

View File

@ -113,10 +113,8 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
if (group_by_with_cube)
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH CUBE" << (s.hilite ? hilite_none : "");
if (group_by_with_grouping_sets)
if (group_by_with_grouping_sets && groupBy())
{
if (!groupBy()) /// sanity check, issue 43049
throw Exception(ErrorCodes::LOGICAL_ERROR, "Corrupt AST");
auto nested_frame = frame;
nested_frame.surround_each_list_element_with_parens = true;
nested_frame.expression_list_prepend_whitespace = false;

View File

@ -269,6 +269,9 @@ void FillingTransform::transform(Chunk & chunk)
if (on_totals)
return;
if (!chunk.hasRows() && !generate_suffix)
return;
Columns old_fill_columns;
Columns old_interpolate_columns;
Columns old_other_columns;

View File

@ -831,12 +831,20 @@ void HTTPHandler::processQuery(
customizeContext(request, context);
executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context,
[&response, this] (const String & current_query_id, const String & content_type, const String & format, const String & timezone)
[&response, this] (const QueryResultDetails & details)
{
response.setContentType(content_type_override.value_or(content_type));
response.add("X-ClickHouse-Query-Id", current_query_id);
response.add("X-ClickHouse-Format", format);
response.add("X-ClickHouse-Timezone", timezone);
response.add("X-ClickHouse-Query-Id", details.query_id);
if (content_type_override)
response.setContentType(*content_type_override);
else if (details.content_type)
response.setContentType(*details.content_type);
if (details.format)
response.add("X-ClickHouse-Format", *details.format);
if (details.timezone)
response.add("X-ClickHouse-Timezone", *details.timezone);
}
);

View File

@ -352,11 +352,15 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
format_settings.mysql_wire.max_packet_size = max_packet_size;
format_settings.mysql_wire.sequence_id = &sequence_id;
auto set_result_details = [&with_output](const String &, const String &, const String &format, const String &)
auto set_result_details = [&with_output](const QueryResultDetails & details)
{
if (format != "MySQLWire")
if (details.format)
{
if (*details.format != "MySQLWire")
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "MySQL protocol does not support custom output formats");
with_output = true;
}
};
executeQuery(should_replace ? replacement : payload, *out, false, query_context, set_result_details, format_settings);

View File

@ -611,6 +611,8 @@ void TCPHandler::runImpl()
/// It is important to destroy query context here. We do not want it to live arbitrarily longer than the query.
query_context.reset();
CurrentThread::setFatalErrorCallback({});
if (is_interserver_mode)
{
/// We don't really have session in interserver mode, new one is created for each query. It's better to reset it now.

View File

@ -175,22 +175,69 @@ namespace
const auto CLEANUP_TIMEOUT_MS = 3000;
const auto MAX_THREAD_WORK_DURATION_MS = 60000; // once per minute leave do reschedule (we can't lock threads in pool forever)
/// Configuration prefix
const String CONFIG_PREFIX = "kafka";
const String CONFIG_KAFKA_TAG = "kafka";
const String CONFIG_KAFKA_TOPIC_TAG = "kafka_topic";
const String CONFIG_NAME_TAG = "name";
void loadFromConfig(cppkafka::Configuration & conf, const Poco::Util::AbstractConfiguration & config, const std::string & path)
/// Read server configuration into cppkafka configuration, used by global configuration and by legacy per-topic configuration
void loadFromConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
{
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(path, keys);
/// Read all tags one level below <kafka>
Poco::Util::AbstractConfiguration::Keys tags;
config.keys(config_prefix, tags);
for (const auto & key : keys)
for (const auto & tag : tags)
{
const String key_path = path + "." + key;
// log_level has valid underscore, rest librdkafka setting use dot.separated.format
// which is not acceptable for XML.
// See also https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
const String key_name = (key == "log_level") ? key : boost::replace_all_copy(key, "_", ".");
conf.set(key_name, config.getString(key_path));
if (tag.starts_with(CONFIG_KAFKA_TOPIC_TAG)) /// multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc.
continue; /// used by new per-topic configuration, ignore
const String setting_path = config_prefix + "." + tag;
const String setting_value = config.getString(setting_path);
/// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML.
/// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
const String setting_name_in_kafka_config = (tag == "log_level") ? tag : boost::replace_all_copy(tag, "_", ".");
kafka_config.set(setting_name_in_kafka_config, setting_value);
}
}
/// Read server configuration into cppkafa configuration, used by new per-topic configuration
void loadTopicConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const String & topic)
{
/// Read all tags one level below <kafka>
Poco::Util::AbstractConfiguration::Keys tags;
config.keys(config_prefix, tags);
for (const auto & tag : tags)
{
/// Only consider tag <kafka_topic>. Multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc.
if (!tag.starts_with(CONFIG_KAFKA_TOPIC_TAG))
continue;
/// Read topic name between <name>...</name>
const String kafka_topic_path = config_prefix + "." + tag;
const String kafpa_topic_name_path = kafka_topic_path + "." + CONFIG_NAME_TAG;
const String topic_name = config.getString(kafpa_topic_name_path);
if (topic_name == topic)
{
/// Found it! Now read the per-topic configuration into cppkafka.
Poco::Util::AbstractConfiguration::Keys inner_tags;
config.keys(kafka_topic_path, inner_tags);
for (const auto & inner_tag : inner_tags)
{
if (inner_tag == CONFIG_NAME_TAG)
continue; // ignore <name>
/// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML.
/// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
const String setting_path = kafka_topic_path + "." + inner_tag;
const String setting_value = config.getString(setting_path);
const String setting_name_in_kafka_config = (inner_tag == "log_level") ? inner_tag : boost::replace_all_copy(inner_tag, "_", ".");
kafka_config.set(setting_name_in_kafka_config, setting_value);
}
}
}
}
}
@ -509,29 +556,33 @@ size_t StorageKafka::getPollTimeoutMillisecond() const
String StorageKafka::getConfigPrefix() const
{
if (!collection_name.empty())
return "named_collections." + collection_name + "." + CONFIG_PREFIX; /// Add one more level to separate librdkafka configuration.
return CONFIG_PREFIX;
return "named_collections." + collection_name + "." + CONFIG_KAFKA_TAG; /// Add one more level to separate librdkafka configuration.
return CONFIG_KAFKA_TAG;
}
void StorageKafka::updateConfiguration(cppkafka::Configuration & conf)
void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config)
{
// Update consumer configuration from the configuration
// Update consumer configuration from the configuration. Example:
// <kafka>
// <retry_backoff_ms>250</retry_backoff_ms>
// <fetch_min_bytes>100000</fetch_min_bytes>
// </kafka>
const auto & config = getContext()->getConfigRef();
auto config_prefix = getConfigPrefix();
if (config.has(config_prefix))
loadFromConfig(conf, config, config_prefix);
loadFromConfig(kafka_config, config, config_prefix);
#if USE_KRB5
if (conf.has_property("sasl.kerberos.kinit.cmd"))
#if USE_KRB5
if (kafka_config.has_property("sasl.kerberos.kinit.cmd"))
LOG_WARNING(log, "sasl.kerberos.kinit.cmd configuration parameter is ignored.");
conf.set("sasl.kerberos.kinit.cmd","");
conf.set("sasl.kerberos.min.time.before.relogin","0");
kafka_config.set("sasl.kerberos.kinit.cmd","");
kafka_config.set("sasl.kerberos.min.time.before.relogin","0");
if (conf.has_property("sasl.kerberos.keytab") && conf.has_property("sasl.kerberos.principal"))
if (kafka_config.has_property("sasl.kerberos.keytab") && kafka_config.has_property("sasl.kerberos.principal"))
{
String keytab = conf.get("sasl.kerberos.keytab");
String principal = conf.get("sasl.kerberos.principal");
String keytab = kafka_config.get("sasl.kerberos.keytab");
String principal = kafka_config.get("sasl.kerberos.principal");
LOG_DEBUG(log, "Running KerberosInit");
try
{
@ -543,21 +594,47 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & conf)
}
LOG_DEBUG(log, "Finished KerberosInit");
}
#else // USE_KRB5
if (conf.has_property("sasl.kerberos.keytab") || conf.has_property("sasl.kerberos.principal"))
LOG_WARNING(log, "Kerberos-related parameters are ignored because ClickHouse was built without support of krb5 library.");
#endif // USE_KRB5
#else // USE_KRB5
if (kafka_config.has_property("sasl.kerberos.keytab") || kafka_config.has_property("sasl.kerberos.principal"))
LOG_WARNING(log, "Ignoring Kerberos-related parameters because ClickHouse was built without krb5 library support.");
#endif // USE_KRB5
// Update consumer topic-specific configuration
// Update consumer topic-specific configuration (legacy syntax, retained for compatibility). Example with topic "football":
// <kafka_football>
// <retry_backoff_ms>250</retry_backoff_ms>
// <fetch_min_bytes>100000</fetch_min_bytes>
// </kafka_football>
// The legacy syntax has the problem that periods in topic names (e.g. "sports.football") are not supported because the Poco
// configuration framework hierarchy is based on periods as level separators. Besides that, per-topic tags at the same level
// as <kafka> are ugly.
for (const auto & topic : topics)
{
const auto topic_config_key = config_prefix + "_" + topic;
if (config.has(topic_config_key))
loadFromConfig(conf, config, topic_config_key);
loadFromConfig(kafka_config, config, topic_config_key);
}
// Update consumer topic-specific configuration (new syntax). Example with topics "football" and "baseball":
// <kafka>
// <kafka_topic>
// <name>football</name>
// <retry_backoff_ms>250</retry_backoff_ms>
// <fetch_min_bytes>5000</fetch_min_bytes>
// </kafka_topic>
// <kafka_topic>
// <name>baseball</name>
// <retry_backoff_ms>300</retry_backoff_ms>
// <fetch_min_bytes>2000</fetch_min_bytes>
// </kafka_topic>
// </kafka>
// Advantages: The period restriction no longer applies (e.g. <name>sports.football</name> will work), everything
// Kafka-related is below <kafka>.
for (const auto & topic : topics)
if (config.has(config_prefix))
loadTopicConfig(kafka_config, config, config_prefix, topic);
// No need to add any prefix, messages can be distinguished
conf.set_log_callback([this](cppkafka::KafkaHandleBase &, int level, const std::string & facility, const std::string & message)
kafka_config.set_log_callback([this](cppkafka::KafkaHandleBase &, int level, const std::string & facility, const std::string & message)
{
auto [poco_level, client_logs_level] = parseSyslogLevel(level);
LOG_IMPL(log, client_logs_level, poco_level, "[rdk:{}] {}", facility, message);
@ -573,13 +650,13 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & conf)
int status;
status = rd_kafka_conf_interceptor_add_on_new(conf.get_handle(),
status = rd_kafka_conf_interceptor_add_on_new(kafka_config.get_handle(),
"init", StorageKafkaInterceptors::rdKafkaOnNew, self);
if (status != RD_KAFKA_RESP_ERR_NO_ERROR)
LOG_ERROR(log, "Cannot set new interceptor due to {} error", status);
// cppkafka always copy the configuration
status = rd_kafka_conf_interceptor_add_on_conf_dup(conf.get_handle(),
status = rd_kafka_conf_interceptor_add_on_conf_dup(kafka_config.get_handle(),
"init", StorageKafkaInterceptors::rdKafkaOnConfDup, self);
if (status != RD_KAFKA_RESP_ERR_NO_ERROR)
LOG_ERROR(log, "Cannot set dup conf interceptor due to {} error", status);

View File

@ -126,7 +126,7 @@ private:
std::atomic<bool> shutdown_called = false;
// Update Kafka configuration with values from CH user configuration.
void updateConfiguration(cppkafka::Configuration & conf);
void updateConfiguration(cppkafka::Configuration & kafka_config);
String getConfigPrefix() const;
void threadFunc(size_t idx);

View File

@ -880,7 +880,7 @@ void IMergeTreeDataPart::writeMetadata(const String & filename, const WriteSetti
}
catch (...)
{
tryLogCurrentException("DataPartStorageOnDiskFull");
tryLogCurrentException("IMergeTreeDataPart");
}
throw;

View File

@ -1893,11 +1893,11 @@ void MergeTreeData::stopOutdatedDataPartsLoadingTask()
/// (Only files on the first level of nesting are considered).
static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_path, time_t threshold)
{
if (!disk->isDirectory(directory_path) || disk->getLastModified(directory_path).epochTime() >= threshold)
if (!disk->isDirectory(directory_path) || disk->getLastModified(directory_path).epochTime() > threshold)
return false;
for (auto it = disk->iterateDirectory(directory_path); it->isValid(); it->next())
if (disk->getLastModified(it->path()).epochTime() >= threshold)
if (disk->getLastModified(it->path()).epochTime() > threshold)
return false;
return true;

View File

@ -44,6 +44,27 @@
#include <algorithm>
namespace
{
using namespace DB;
bool columnIsPhysical(ColumnDefaultKind kind)
{
return kind == ColumnDefaultKind::Default || kind == ColumnDefaultKind::Materialized;
}
bool columnDefaultKindHasSameType(ColumnDefaultKind lhs, ColumnDefaultKind rhs)
{
if (lhs == rhs)
return true;
if (columnIsPhysical(lhs) == columnIsPhysical(rhs))
return true;
return false;
}
}
namespace DB
{
@ -172,11 +193,13 @@ std::optional<NameSet> StorageMerge::supportedPrewhereColumns() const
NameSet supported_columns;
std::unordered_map<std::string, std::pair<const IDataType *, std::optional<ColumnDefault>>> column_type_default;
std::unordered_map<std::string, std::pair<const IDataType *, ColumnDefaultKind>> column_info;
for (const auto & name_type : columns.getAll())
{
column_type_default.emplace(name_type.name, std::make_pair(
name_type.type.get(), columns.getDefault(name_type.name)));
const auto & column_default = columns.getDefault(name_type.name).value_or(ColumnDefault{});
column_info.emplace(name_type.name, std::make_pair(
name_type.type.get(),
column_default.kind));
supported_columns.emplace(name_type.name);
}
@ -191,11 +214,10 @@ std::optional<NameSet> StorageMerge::supportedPrewhereColumns() const
const auto & table_columns = table_metadata_ptr->getColumns();
for (const auto & column : table_columns.getAll())
{
const auto & root_type_default = column_type_default[column.name];
const IDataType * root_type = root_type_default.first;
const std::optional<ColumnDefault> & src_default = root_type_default.second;
const auto & column_default = table_columns.getDefault(column.name).value_or(ColumnDefault{});
const auto & [root_type, src_default_kind] = column_info[column.name];
if ((root_type && !root_type->equals(*column.type)) ||
src_default != table_columns.getDefault(column.name))
!columnDefaultKindHasSameType(src_default_kind, column_default.kind))
{
supported_columns.erase(column.name);
}

View File

@ -905,17 +905,16 @@ void StorageReplicatedMergeTree::drop()
/// in this case, has_metadata_in_zookeeper = false, and we also permit to drop the table.
bool maybe_has_metadata_in_zookeeper = !has_metadata_in_zookeeper.has_value() || *has_metadata_in_zookeeper;
zkutil::ZooKeeperPtr zookeeper;
if (maybe_has_metadata_in_zookeeper)
{
/// Table can be shut down, restarting thread is not active
/// and calling StorageReplicatedMergeTree::getZooKeeper()/getAuxiliaryZooKeeper() won't suffice.
zkutil::ZooKeeperPtr zookeeper = getZooKeeperIfTableShutDown();
zookeeper = getZooKeeperIfTableShutDown();
/// If probably there is metadata in ZooKeeper, we don't allow to drop the table.
if (!zookeeper)
throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Can't drop readonly replicated table (need to drop data in ZooKeeper as well)");
dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings());
}
/// Wait for loading of all outdated parts because
@ -929,10 +928,17 @@ void StorageReplicatedMergeTree::drop()
}
dropAllData();
if (maybe_has_metadata_in_zookeeper)
{
/// Session could expire, get it again
zookeeper = getZooKeeperIfTableShutDown();
dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings(), &has_metadata_in_zookeeper);
}
}
void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica,
Poco::Logger * logger, MergeTreeSettingsPtr table_settings)
Poco::Logger * logger, MergeTreeSettingsPtr table_settings, std::optional<bool> * has_metadata_out)
{
if (zookeeper->expired())
throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Table was not dropped because ZooKeeper session has expired.");
@ -990,12 +996,16 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con
Coordination::errorMessage(code), remote_replica_path);
/// And finally remove everything else recursively
zookeeper->tryRemoveRecursive(remote_replica_path);
}
/// It may left some garbage if replica_path subtree is concurrently modified
zookeeper->tryRemoveChildrenRecursive(remote_replica_path);
/// It may left some garbage if replica_path subtree are concurrently modified
if (zookeeper->exists(remote_replica_path))
/// Update has_metadata_in_zookeeper to avoid retries. Otherwise we can accidentally remove metadata of a new table on retries
if (has_metadata_out)
*has_metadata_out = false;
if (zookeeper->tryRemove(remote_replica_path) != Coordination::Error::ZOK)
LOG_ERROR(logger, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", remote_replica_path);
}
/// Check that `zookeeper_path` exists: it could have been deleted by another replica after execution of previous line.
Strings replicas;
@ -8183,6 +8193,12 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co
auto shared_id = getTableSharedID();
if (shared_id == toString(UUIDHelpers::Nil))
{
if (zookeeper->exists(zookeeper_path))
{
LOG_WARNING(log, "Not removing shared data for part {} because replica does not have metadata in ZooKeeper, "
"but table path exist and other replicas may exist. It may leave some garbage on S3", part.name);
return std::make_pair(false, NameSet{});
}
LOG_TRACE(log, "Part {} blobs can be removed, because table {} completely dropped", part.name, getStorageID().getNameForLogs());
return std::make_pair(true, NameSet{});
}
@ -8208,9 +8224,18 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co
return std::make_pair(true, NameSet{});
}
/// If table was completely dropped (no meta in zookeeper) we can safely remove parts
if (has_metadata_in_zookeeper.has_value() && !has_metadata_in_zookeeper)
{
if (zookeeper->exists(zookeeper_path))
{
LOG_WARNING(log, "Not removing shared data for part {} because replica does not have metadata in ZooKeeper, "
"but table path exist and other replicas may exist. It may leave some garbage on S3", part.name);
return std::make_pair(false, NameSet{});
}
/// If table was completely dropped (no meta in zookeeper) we can safely remove parts
return std::make_pair(true, NameSet{});
}
/// We remove parts during table shutdown. If exception happen, restarting thread will be already turned
/// off and nobody will reconnect our zookeeper connection. In this case we use zookeeper connection from

View File

@ -229,7 +229,7 @@ public:
/** Remove a specific replica from zookeeper.
*/
static void dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica,
Poco::Logger * logger, MergeTreeSettingsPtr table_settings = nullptr);
Poco::Logger * logger, MergeTreeSettingsPtr table_settings = nullptr, std::optional<bool> * has_metadata_out = nullptr);
/// Removes table from ZooKeeper after the last replica was dropped
static bool removeTableNodesFromZooKeeper(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path,

View File

@ -9,7 +9,7 @@ from github import Github
from build_download_helper import get_build_name_for_check, read_build_urls
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from commit_status_helper import format_description, post_commit_status
from docker_pull_helper import get_image_with_version
from env_helper import (
GITHUB_REPOSITORY,
@ -145,11 +145,13 @@ if __name__ == "__main__":
with open(
os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8"
) as desc_f:
description = desc_f.readline().rstrip("\n")[:140]
description = desc_f.readline().rstrip("\n")
except:
status = "failure"
description = "Task failed: $?=" + str(retcode)
description = format_description(description)
test_result = TestResult(description, "OK")
if "fail" in status:
test_result.status = "FAIL"

View File

@ -9,6 +9,10 @@ import time
from shutil import rmtree
from typing import List, Tuple
from ccache_utils import get_ccache_if_not_exists, upload_ccache
from ci_config import CI_CONFIG, BuildConfig
from commit_status_helper import get_commit_filtered_statuses, get_commit
from docker_pull_helper import get_image_with_version
from env_helper import (
CACHES_PATH,
GITHUB_JOB,
@ -18,18 +22,17 @@ from env_helper import (
S3_DOWNLOAD,
TEMP_PATH,
)
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
from github_helper import GitHub
from pr_info import PRInfo
from s3_helper import S3Helper
from tee_popen import TeePopen
from version_helper import (
ClickHouseVersion,
Git,
get_version_from_repo,
update_version_local,
)
from ccache_utils import get_ccache_if_not_exists, upload_ccache
from ci_config import CI_CONFIG, BuildConfig
from docker_pull_helper import get_image_with_version
from tee_popen import TeePopen
IMAGE_NAME = "clickhouse/binary-builder"
BUILD_LOG_NAME = "build_log.log"
@ -122,8 +125,7 @@ def check_for_success_run(
logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix, "")
logging.info("Checking for artifacts in %s", logged_prefix)
try:
# TODO: theoretically, it would miss performance artifact for pr==0,
# but luckily we rerun only really failed tasks now, so we're safe
# Performance artifacts are now part of regular build, so we're safe
build_results = s3_helper.list_prefix(s3_prefix)
except Exception as ex:
logging.info("Got exception while listing %s: %s\nRerun", logged_prefix, ex)
@ -231,6 +233,29 @@ def upload_master_static_binaries(
print(f"::notice ::Binary static URL: {url}")
def mark_failed_reports_pending(build_name: str, sha: str) -> None:
try:
gh = GitHub(get_best_robot_token())
commit = get_commit(gh, sha)
statuses = get_commit_filtered_statuses(commit)
report_status = [
name
for name, builds in CI_CONFIG["builds_report_config"].items()
if build_name in builds
][0]
for status in statuses:
if status.context == report_status and status.state in ["failure", "error"]:
logging.info(
"Commit already have failed status for '%s', setting it to 'pending'",
report_status,
)
commit.create_status(
"pending", status.url, "Set to pending on rerun", report_status
)
except: # we do not care about any exception here
logging.info("Failed to get or mark the reports status as pending, continue")
def main():
logging.basicConfig(level=logging.INFO)
@ -260,6 +285,9 @@ def main():
# put them as github actions artifact (result)
check_for_success_run(s3_helper, s3_path_prefix, build_name, build_config)
# If it's a latter running, we need to mark possible failed status
mark_failed_reports_pending(build_name, pr_info.sha)
docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME)
image_version = docker_image.version

View File

@ -508,7 +508,7 @@ def main():
logging.getLogger("git_helper").setLevel(logging.DEBUG)
token = args.token or get_best_robot_token()
gh = GitHub(token, create_cache_dir=False, per_page=100)
gh = GitHub(token, create_cache_dir=False)
bp = Backport(gh, args.repo, args.dry_run)
# https://github.com/python/mypy/issues/3004
bp.gh.cache_path = f"{TEMP_PATH}/gh_cache" # type: ignore

View File

@ -3,18 +3,20 @@
import csv
import os
import time
from typing import List
from typing import List, Literal
import logging
from ci_config import CI_CONFIG, REQUIRED_CHECKS
from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL
from github import Github
from github.Commit import Commit
from github.CommitStatus import CommitStatus
from ci_config import CI_CONFIG, REQUIRED_CHECKS
from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL
from pr_info import PRInfo, SKIP_MERGEABLE_CHECK_LABEL
RETRY = 5
CommitStatuses = List[CommitStatus]
MERGEABLE_NAME = "Mergeable Check"
def override_status(status: str, check_name: str, invert: bool = False) -> str:
@ -102,59 +104,69 @@ def post_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]) -> None:
pull_request.add_to_labels(label)
def fail_mergeable_check(commit: Commit, description: str) -> None:
commit.create_status(
context="Mergeable Check",
description=description,
state="failure",
target_url=GITHUB_RUN_URL,
)
def format_description(description: str) -> str:
if len(description) > 140:
description = description[:137] + "..."
return description
def reset_mergeable_check(commit: Commit, description: str = "") -> None:
def set_mergeable_check(
commit: Commit,
description: str = "",
state: Literal["success", "failure"] = "success",
) -> None:
commit.create_status(
context="Mergeable Check",
context=MERGEABLE_NAME,
description=description,
state="success",
state=state,
target_url=GITHUB_RUN_URL,
)
def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None:
if SKIP_MERGEABLE_CHECK_LABEL in pr_info.labels:
not_run = (
pr_info.labels.intersection({SKIP_MERGEABLE_CHECK_LABEL, "release"})
or check_name not in REQUIRED_CHECKS
or pr_info.release_pr
or pr_info.number == 0
)
if not_run:
# Let's avoid unnecessary work
return
logging.info("Update Mergeable Check by %s", check_name)
commit = get_commit(gh, pr_info.sha)
checks = {
check.context: check.state
for check in filter(
lambda check: (check.context in REQUIRED_CHECKS),
# get_statuses() returns generator, which cannot be reversed - we need comprehension
# pylint: disable=unnecessary-comprehension
reversed([status for status in commit.get_statuses()]),
)
}
statuses = get_commit_filtered_statuses(commit)
required_checks = [
status for status in statuses if status.context in REQUIRED_CHECKS
]
mergeable_status = None
for status in statuses:
if status.context == MERGEABLE_NAME:
mergeable_status = status
break
success = []
fail = []
for name, state in checks.items():
if state == "success":
success.append(name)
for status in required_checks:
if status.state == "success":
success.append(status.context)
else:
fail.append(name)
fail.append(status.context)
if fail:
description = "failed: " + ", ".join(fail)
if success:
description += "; succeeded: " + ", ".join(success)
if len(description) > 140:
description = description[:137] + "..."
fail_mergeable_check(commit, description)
description = format_description(description)
if mergeable_status is None or mergeable_status.description != description:
set_mergeable_check(commit, description, "failure")
return
description = ", ".join(success)
if len(description) > 140:
description = description[:137] + "..."
reset_mergeable_check(commit, description)
description = format_description(description)
if mergeable_status is None or mergeable_status.description != description:
set_mergeable_check(commit, description)

View File

@ -14,7 +14,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union
from github import Github
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from commit_status_helper import format_description, post_commit_status
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from pr_info import PRInfo
@ -456,8 +456,7 @@ def main():
else:
description = "Nothing to update"
if len(description) >= 140:
description = description[:136] + "..."
description = format_description(description)
with open(changed_json, "w", encoding="utf-8") as images_file:
json.dump(result_images, images_file)

View File

@ -10,7 +10,7 @@ from typing import List, Dict, Tuple
from github import Github
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from commit_status_helper import format_description, post_commit_status
from env_helper import RUNNER_TEMP
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from pr_info import PRInfo
@ -218,8 +218,7 @@ def main():
else:
description = "Nothing to update"
if len(description) >= 140:
description = description[:136] + "..."
description = format_description(description)
gh = Github(get_best_robot_token(), per_page=100)
post_commit_status(gh, pr_info.sha, NAME, description, status, url)

View File

@ -15,7 +15,7 @@ from github import Github
from build_check import get_release_or_pr
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from commit_status_helper import format_description, post_commit_status
from docker_images_check import DockerImage
from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET, S3_DOWNLOAD
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
@ -369,8 +369,7 @@ def main():
description = f"Processed tags: {', '.join(tags)}"
if len(description) >= 140:
description = description[:136] + "..."
description = format_description(description)
gh = Github(get_best_robot_token(), per_page=100)
post_commit_status(gh, pr_info.sha, NAME, description, status, url)

View File

@ -35,6 +35,8 @@ class GitHub(github.Github):
self._cache_path = Path(CACHE_PATH)
if create_cache_dir:
self.cache_path = self.cache_path
if not kwargs.get("per_page"):
kwargs["per_page"] = 100
# And set Path
super().__init__(*args, **kwargs)
self._retries = 0

View File

@ -18,7 +18,11 @@ from clickhouse_helper import (
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from commit_status_helper import post_commit_status, update_mergeable_check
from commit_status_helper import (
format_description,
post_commit_status,
update_mergeable_check,
)
from compress_files import compress_fast
from docker_pull_helper import get_image_with_version, DockerImage
from env_helper import CI, TEMP_PATH as TEMP, REPORTS_PATH
@ -341,8 +345,7 @@ def main():
ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, args.check_name, test_results)
if len(description) >= 140:
description = description[:136] + "..."
description = format_description(description)
post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url)

View File

@ -43,7 +43,7 @@ def main():
description = "the release can be created from the commit"
args.token = args.token or get_best_robot_token()
gh = GitHub(args.token, create_cache_dir=False, per_page=100)
gh = GitHub(args.token, create_cache_dir=False)
# Get the rate limits for a quick fail
gh.get_rate_limit()
commit = get_commit(gh, args.commit)

View File

@ -217,7 +217,7 @@ def main():
args = parse_args()
logging.info("Going to process PR #%s in repo %s", args.pr, args.repo)
token = args.token or get_best_robot_token()
gh = GitHub(token, per_page=100)
gh = GitHub(token)
repo = gh.get_repo(args.repo)
# An ugly and not nice fix to patch the wrong organization URL,
# see https://github.com/PyGithub/PyGithub/issues/2395#issuecomment-1378629710

View File

@ -7,10 +7,11 @@ from typing import Tuple
from github import Github
from commit_status_helper import (
format_description,
get_commit,
post_labels,
remove_labels,
reset_mergeable_check,
set_mergeable_check,
)
from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
from get_robot_token import get_best_robot_token
@ -157,7 +158,7 @@ def check_pr_description(pr_info: PRInfo) -> Tuple[str, str]:
+ second_category
+ "'"
)
return result_status[:140], category
return result_status, category
elif re.match(
r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
@ -199,6 +200,7 @@ if __name__ == "__main__":
pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True)
can_run, description, labels_state = should_run_checks_for_pr(pr_info)
description = format_description(description)
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
@ -231,7 +233,7 @@ if __name__ == "__main__":
if pr_labels_to_remove:
remove_labels(gh, pr_info, pr_labels_to_remove)
reset_mergeable_check(commit, "skipped")
set_mergeable_check(commit, "skipped")
if description_error:
print(
@ -249,7 +251,7 @@ if __name__ == "__main__":
)
commit.create_status(
context=NAME,
description=description_error[:139],
description=format_description(description_error),
state="failure",
target_url=url,
)

View File

@ -10,7 +10,7 @@ from github import Github
from build_download_helper import get_build_name_for_check, read_build_urls
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from commit_status_helper import format_description, post_commit_status
from docker_pull_helper import get_image_with_version
from env_helper import (
GITHUB_REPOSITORY,
@ -171,11 +171,13 @@ def main():
with open(
os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8"
) as desc_f:
description = desc_f.readline().rstrip("\n")[:140]
description = desc_f.readline().rstrip("\n")
except:
# status = "failure"
description = "Task failed: $?=" + str(retcode)
description = format_description(description)
report_url = upload_results(
s3_helper,
pr_info.number,

View File

@ -30,13 +30,15 @@ def get_options(i, upgrade_check):
if i % 2 == 1:
join_alg_num = i // 2
if join_alg_num % 4 == 0:
if join_alg_num % 5 == 0:
client_options.append("join_algorithm='parallel_hash'")
if join_alg_num % 4 == 1:
if join_alg_num % 5 == 1:
client_options.append("join_algorithm='partial_merge'")
if join_alg_num % 4 == 2:
if join_alg_num % 5 == 2:
client_options.append("join_algorithm='full_sorting_merge'")
if join_alg_num % 4 == 3:
if join_alg_num % 5 == 3:
client_options.append("join_algorithm='grace_hash'")
if join_alg_num % 5 == 4:
client_options.append("join_algorithm='auto'")
client_options.append("max_rows_in_join=1000")
@ -209,7 +211,7 @@ def prepare_for_hung_check(drop_databases):
# Even if all clickhouse-test processes are finished, there are probably some sh scripts,
# which still run some new queries. Let's ignore them.
try:
query = """clickhouse client -q "SELECT count() FROM system.processes where where elapsed > 300" """
query = """clickhouse client -q "SELECT count() FROM system.processes where elapsed > 300" """
output = (
check_output(query, shell=True, stderr=STDOUT, timeout=30)
.decode("utf-8")

View File

@ -148,7 +148,7 @@ def main():
if args.push:
checkout_head(pr_info)
gh = GitHub(get_best_robot_token(), per_page=100, create_cache_dir=False)
gh = GitHub(get_best_robot_token(), create_cache_dir=False)
atexit.register(update_mergeable_check, gh, pr_info, NAME)

View File

@ -3264,7 +3264,7 @@ class ClickHouseInstance:
sleep_time=0.5,
check_callback=lambda x: True,
):
logging.debug(f"Executing query {sql} on {self.name}")
# logging.debug(f"Executing query {sql} on {self.name}")
result = None
for i in range(retry_count):
try:
@ -3283,7 +3283,7 @@ class ClickHouseInstance:
return result
time.sleep(sleep_time)
except Exception as ex:
logging.debug("Retry {} got exception {}".format(i + 1, ex))
# logging.debug("Retry {} got exception {}".format(i + 1, ex))
time.sleep(sleep_time)
if result is not None:

View File

@ -648,24 +648,15 @@ def test_async_backups_to_same_destination(interface):
"",
)
ids_succeeded = (
instance.query(
ids_succeeded = instance.query(
f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_CREATED'"
)
.rstrip("\n")
.split("\n")
)
).splitlines()
ids_failed = (
instance.query(
ids_failed = instance.query(
f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_FAILED'"
)
.rstrip("\n")
.split("\n")
)
).splitlines()
assert len(ids_succeeded) == 1
assert len(ids_failed) <= 1
assert set(ids_succeeded + ids_failed) == set(ids)
# Check that the first backup is all right.

View File

@ -82,8 +82,12 @@ def drop_after_test():
try:
yield
finally:
node0.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY")
node0.query("DROP DATABASE IF EXISTS mydb ON CLUSTER 'cluster' NO DELAY")
node0.query(
"DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY",
settings={
"distributed_ddl_task_timeout": 360,
},
)
backup_id_counter = 0
@ -138,7 +142,12 @@ def test_concurrent_backups_on_same_node():
# This restore part is added to confirm creating an internal backup & restore work
# even when a concurrent backup is stopped
nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
nodes[0].query(
f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY",
settings={
"distributed_ddl_task_timeout": 360,
},
)
nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
nodes[0].query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl")
@ -158,9 +167,14 @@ def test_concurrent_backups_on_different_nodes():
f"SELECT status FROM system.backups WHERE status == 'CREATING_BACKUP' AND id = '{id}'",
"CREATING_BACKUP",
)
assert "Concurrent backups not supported" in nodes[2].query_and_get_error(
assert "Concurrent backups not supported" in nodes[0].query_and_get_error(
f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}"
)
assert_eq_with_retry(
nodes[1],
f"SELECT status FROM system.backups WHERE status == 'BACKUP_CREATED' AND id = '{id}'",
"BACKUP_CREATED",
)
def test_concurrent_restores_on_same_node():
@ -185,11 +199,20 @@ def test_concurrent_restores_on_same_node():
"BACKUP_CREATED",
)
nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC")
nodes[0].query(
f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY",
settings={
"distributed_ddl_task_timeout": 360,
},
)
restore_id = (
nodes[0]
.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC")
.split("\t")[0]
)
assert_eq_with_retry(
nodes[0],
f"SELECT status FROM system.backups WHERE status == 'RESTORING'",
f"SELECT status FROM system.backups WHERE status == 'RESTORING' AND id == '{restore_id}'",
"RESTORING",
)
assert "Concurrent restores not supported" in nodes[0].query_and_get_error(
@ -219,8 +242,17 @@ def test_concurrent_restores_on_different_node():
"BACKUP_CREATED",
)
nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC")
nodes[0].query(
f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY",
settings={
"distributed_ddl_task_timeout": 360,
},
)
restore_id = (
nodes[0]
.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC")
.split("\t")[0]
)
assert_eq_with_retry(
nodes[0],
f"SELECT status FROM system.backups WHERE status == 'RESTORING'",
@ -229,3 +261,9 @@ def test_concurrent_restores_on_different_node():
assert "Concurrent restores not supported" in nodes[1].query_and_get_error(
f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}"
)
assert_eq_with_retry(
nodes[0],
f"SELECT status FROM system.backups WHERE status == 'RESTORED' AND id == '{restore_id}'",
"RESTORED",
)

View File

@ -9,8 +9,16 @@ cluster = ClickHouseCluster(__file__)
node_insert = cluster.add_instance(
"node_insert", main_configs=["configs/concurrent_insert_restriction.xml"]
)
node_select = cluster.add_instance(
"node_select", main_configs=["configs/concurrent_select_restriction.xml"]
node_select1 = cluster.add_instance(
"node_select1", main_configs=["configs/concurrent_select_restriction.xml"]
)
node_select2 = cluster.add_instance(
"node_select2", main_configs=["configs/concurrent_select_restriction.xml"]
)
node_select3 = cluster.add_instance(
"node_select3", main_configs=["configs/concurrent_select_restriction.xml"]
)
@ -18,7 +26,13 @@ node_select = cluster.add_instance(
def started_cluster():
try:
cluster.start()
node_select.query(
node_select1.query(
"create table test_concurrent_insert (x UInt64) ENGINE = MergeTree() order by tuple()"
)
node_select2.query(
"create table test_concurrent_insert (x UInt64) ENGINE = MergeTree() order by tuple()"
)
node_select3.query(
"create table test_concurrent_insert (x UInt64) ENGINE = MergeTree() order by tuple()"
)
node_insert.query(
@ -79,7 +93,7 @@ def common_pattern(node, query_kind, restricted_sql, normal_sql, limit, wait_tim
def test_select(started_cluster):
common_pattern(
node_select,
node_select1,
"select",
"select sleep(3)",
"insert into test_concurrent_insert values (0)",
@ -89,7 +103,7 @@ def test_select(started_cluster):
# subquery is not counted
execute_with_background(
node_select,
node_select2,
"select sleep(3)",
"insert into test_concurrent_insert select sleep(3)",
2,
@ -98,7 +112,7 @@ def test_select(started_cluster):
# intersect and except are counted
common_pattern(
node_select,
node_select3,
"select",
"select sleep(1) INTERSECT select sleep(1) EXCEPT select sleep(1)",
"insert into test_concurrent_insert values (0)",

View File

@ -53,4 +53,6 @@
<shard>0</shard>
</macros>
<database_atomic_delay_before_drop_table_sec>3</database_atomic_delay_before_drop_table_sec>
<database_atomic_drop_error_cooldown_sec>0</database_atomic_drop_error_cooldown_sec>
</clickhouse>

View File

@ -1,9 +1,11 @@
import logging
import random
import string
import time
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.network import PartitionManager
logging.getLogger().setLevel(logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler())
@ -127,3 +129,75 @@ def test_insert_select_replicated(cluster, min_rows_for_wide_part, files_per_par
assert len(
list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))
) == (3 * FILES_OVERHEAD) + (files_per_part * 3)
def test_drop_table(cluster):
node = list(cluster.instances.values())[0]
node2 = list(cluster.instances.values())[1]
node.query(
"create table test_drop_table (n int) engine=ReplicatedMergeTree('/test/drop_table', '1') order by n partition by n % 99 settings storage_policy='s3'"
)
node2.query(
"create table test_drop_table (n int) engine=ReplicatedMergeTree('/test/drop_table', '2') order by n partition by n % 99 settings storage_policy='s3'"
)
node.query("insert into test_drop_table select * from numbers(1000)")
node2.query("system sync replica test_drop_table")
with PartitionManager() as pm:
pm._add_rule(
{
"probability": 0.01,
"destination": node.ip_address,
"source_port": 2181,
"action": "REJECT --reject-with tcp-reset",
}
)
pm._add_rule(
{
"probability": 0.01,
"source": node.ip_address,
"destination_port": 2181,
"action": "REJECT --reject-with tcp-reset",
}
)
# Will drop in background with retries
node.query("drop table test_drop_table")
# It should not be possible to create a replica with the same path until the previous one is completely dropped
for i in range(0, 100):
node.query_and_get_answer_with_error(
"create table if not exists test_drop_table (n int) "
"engine=ReplicatedMergeTree('/test/drop_table', '1') "
"order by n partition by n % 99 settings storage_policy='s3'"
)
time.sleep(0.2)
# Wait for drop to actually finish
node.wait_for_log_line(
"Removing metadata /var/lib/clickhouse/metadata_dropped/default.test_drop_table",
timeout=60,
look_behind_lines=1000000,
)
# It could leave some leftovers, remove them
replicas = node.query_with_retry(
"select name from system.zookeeper where path='/test/drop_table/replicas'"
)
if "1" in replicas and "test_drop_table" not in node.query("show tables"):
node2.query("system drop replica '1' from table test_drop_table")
# Just in case table was not created due to connection errors
node.query(
"create table if not exists test_drop_table (n int) engine=ReplicatedMergeTree('/test/drop_table', '1') "
"order by n partition by n % 99 settings storage_policy='s3'"
)
node.query_with_retry(
"system sync replica test_drop_table",
settings={"receive_timeout": 10},
retry_count=5,
)
node2.query("drop table test_drop_table")
assert "1000\t499500\n" == node.query(
"select count(n), sum(n) from test_drop_table"
)

View File

@ -9,12 +9,14 @@
XXX: for now this messages will appears in stderr.
-->
<debug>cgrp,consumer,topic,protocol</debug>
</kafka>
<kafka_consumer_hang>
<kafka_topic>
<name>consumer_hang</name>
<!-- default: 3000 -->
<heartbeat_interval_ms>300</heartbeat_interval_ms>
<!-- default: 10000 -->
<session_timeout_ms>6000</session_timeout_ms>
</kafka_consumer_hang>
</kafka_topic>
</kafka>
</clickhouse>

View File

@ -15,12 +15,13 @@
<sasl_kerberos_principal>kafkauser/instance@TEST.CLICKHOUSE.TECH</sasl_kerberos_principal>
<debug>security</debug>
<api_version_request>false</api_version_request>
</kafka>
<kafka_consumer_hang>
<kafka_topic>
<name>consumer_hang</name>
<!-- default: 3000 -->
<heartbeat_interval_ms>300</heartbeat_interval_ms>
<!-- default: 10000 -->
<session_timeout_ms>6000</session_timeout_ms>
</kafka_consumer_hang>
</kafka_topic>
</kafka>
</clickhouse>

View File

@ -1 +1,3 @@
SELECT materialize('prepre_f') LIKE '%pre_f%';
SELECT materialize('prepre_f') LIKE '%%%pre_f%';

View File

@ -20,7 +20,10 @@ www.example.com
127.0.0.1
www.example.com
www.example.com
www.example.com
www.example4.com
example.com
example.com
example.com
@ -38,7 +41,10 @@ www.example.com
127.0.0.1
www.example.com
www.example.com
www.example.com
example2.com
example3.com
www.example4.com
example.com
example.com
example.com
@ -53,6 +59,10 @@ paul:zozo@example.ru
www.example.com
www.example.com
example.com
foo:foo%@foo.com
foo:foo%41bar@foo.com
foo:foo%41%42bar@foo.com
foo:foo%41bar@foo
====DOMAIN====
com

View File

@ -23,7 +23,10 @@ SELECT domain{{ suffix }}('http://www.example.com?q=4') AS Host;
SELECT domain{{ suffix }}('http://127.0.0.1:443/') AS Host;
SELECT domain{{ suffix }}('//www.example.com') AS Host;
SELECT domain{{ suffix }}('//paul@www.example.com') AS Host;
SELECT domain{{ suffix }}('www.example.com') as Host;
SELECT domain{{ suffix }}('//foo:bar%41%40@e-%41-example1.com') AS Host;
SELECT domain{{ suffix }}('//foo:bar%41%40@example2.com') AS Host;
SELECT domain{{ suffix }}('//foo%41%40:bar@example3.com') AS Host;
SELECT domain{{ suffix }}('www.example4.com') as Host;
SELECT domain{{ suffix }}('example.com') as Host;
SELECT domainWithoutWWW{{ suffix }}('//paul@www.example.com') AS Host;
SELECT domainWithoutWWW{{ suffix }}('http://paul@www.example.com:80/') AS Host;
@ -41,6 +44,10 @@ SELECT netloc('svn+ssh://paul:zozo@example.ru/?q=hello%20world') AS Netloc;
SELECT netloc('//www.example.com') AS Netloc;
SELECT netloc('www.example.com') as Netloc;
SELECT netloc('example.com') as Netloc;
SELECT netloc('http://foo:foo%@foo.com') as Netloc;
SELECT netloc('http://foo:foo%41bar@foo.com') as Netloc;
SELECT netloc('http://foo:foo%41%42bar@foo.com') as Netloc;
SELECT netloc('http://foo:foo%41bar@foo%41.com') as Netloc;
SELECT '====DOMAIN====';
SELECT topLevelDomain('http://paul@www.example.com:80/') AS Domain;

View File

@ -12,9 +12,9 @@ match_max 100000
expect_after {
# Do not ignore eof from expect
eof { exp_continue }
-i $any_spawn_id eof { exp_continue }
# A default timeout action is to do nothing, change it to fail
timeout { exit 1 }
-i $any_spawn_id timeout { exit 1 }
}
spawn bash -c "source $basedir/../shell_config.sh ; \$MYSQL_CLIENT_BINARY \$MYSQL_CLIENT_OPT"

View File

@ -1,5 +1,4 @@
#!/usr/bin/expect -f
# Tags: long
set basedir [file dirname $argv0]
set basename [file tail $argv0]
@ -40,5 +39,5 @@ expect "2020-01-01 00:00:00.000\t2"
send -- "DROP TABLE test_01179\r"
expect "Ok."
send -- "\4"
send -- "exit\r"
expect eof

View File

@ -10,9 +10,9 @@ set timeout 60
match_max 100000
expect_after {
# Do not ignore eof from expect
eof { exp_continue }
-i $any_spawn_id eof { exp_continue }
# A default timeout action is to do nothing, change it to fail
timeout { exit 1 }
-i $any_spawn_id timeout { exit 1 }
}
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"
@ -31,6 +31,7 @@ expect "Syntax error: failed at position 93 ('UInt64'):*"
send -- "select (1, 2\r"
expect "Syntax error: failed at position 8 ('('):"
expect "Unmatched parentheses: ("
expect ":) "
send -- "\4"
expect eof

View File

@ -11,9 +11,9 @@ match_max 100000
expect_after {
# Do not ignore eof from expect
eof { exp_continue }
-i $any_spawn_id eof { exp_continue }
# A default timeout action is to do nothing, change it to fail
timeout { exit 1 }
-i $any_spawn_id timeout { exit 1 }
}
# useful debugging configuration

View File

@ -10,9 +10,9 @@ set timeout 60
match_max 100000
expect_after {
# Do not ignore eof from expect
eof { exp_continue }
-i $any_spawn_id eof { exp_continue }
# A default timeout action is to do nothing, change it to fail
timeout { exit 1 }
-i $any_spawn_id timeout { exit 1 }
}
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"

View File

@ -1,5 +1,4 @@
#!/usr/bin/expect -f
# Tags: long
set basedir [file dirname $argv0]
set basename [file tail $argv0]
@ -11,9 +10,9 @@ set timeout 60
match_max 100000
expect_after {
# Do not ignore eof from expect
eof { exp_continue }
-i $any_spawn_id eof { exp_continue }
# A default timeout action is to do nothing, change it to fail
timeout { exit 1 }
-i $any_spawn_id timeout { exit 1 }
}
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"

View File

@ -10,9 +10,9 @@ set timeout 60
match_max 100000
expect_after {
# Do not ignore eof from expect
eof { exp_continue }
-i $any_spawn_id eof { exp_continue }
# A default timeout action is to do nothing, change it to fail
timeout { exit 1 }
-i $any_spawn_id timeout { exit 1 }
}
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file"

View File

@ -10,9 +10,9 @@ set timeout 60
match_max 100000
expect_after {
# Do not ignore eof from expect
eof { exp_continue }
-i $any_spawn_id eof { exp_continue }
# A default timeout action is to do nothing, change it to fail
timeout { exit 1 }
-i $any_spawn_id timeout { exit 1 }
}
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"
@ -21,10 +21,12 @@ expect ":) "
# Make a query
send -- "SELECT 'print query id'\r"
expect {
"Query id: *" { }
# (?n) - Do not match new lines [1]
# [1]: https://www.tcl.tk/man/tcl8.6/TclCmd/re_syntax.html
-re "(?n)Query id: .*" { }
timeout { exit 1 }
}
expect "print query id"
expect "'print query id"
expect ":) "
send -- "\4"

View File

@ -1,5 +1,4 @@
#!/usr/bin/expect -f
# Tags: long
# This is a separate test, because we want to test the interactive mode.
# https://github.com/ClickHouse/ClickHouse/issues/19353

View File

@ -34,9 +34,9 @@ set timeout 60
match_max 100000
expect_after {
# Do not ignore eof from expect
eof { exp_continue }
-i \$any_spawn_id eof { exp_continue }
# A default timeout action is to do nothing, change it to fail
timeout { exit 1 }
-i \$any_spawn_id timeout { exit 1 }
}
spawn bash -c "$*"

View File

@ -10,9 +10,9 @@ set timeout 60
match_max 100000
expect_after {
# Do not ignore eof from expect
eof { exp_continue }
-i $any_spawn_id eof { exp_continue }
# A default timeout action is to do nothing, change it to fail
timeout { exit 1 }
-i $any_spawn_id timeout { exit 1 }
}
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion --history_file=$history_file"

View File

@ -1,5 +1,4 @@
#!/usr/bin/expect -f
# Tags: long
set basedir [file dirname $argv0]
set basename [file tail $argv0]
@ -10,9 +9,9 @@ set timeout 60
match_max 100000
expect_after {
# Do not ignore eof from expect
eof { exp_continue }
-i $any_spawn_id eof { exp_continue }
# A default timeout action is to do nothing, change it to fail
timeout { exit 1 }
-i $any_spawn_id timeout { exit 1 }
}
# history file is not required, in-memory history is enough

View File

@ -10,9 +10,9 @@ set timeout 60
match_max 100000
expect_after {
# Do not ignore eof from expect
eof { exp_continue }
-i $any_spawn_id eof { exp_continue }
# A default timeout action is to do nothing, change it to fail
timeout { exit 1 }
-i $any_spawn_id timeout { exit 1 }
}
exec bash -c "echo select 1 > $history_file.txt"

View File

@ -14,9 +14,9 @@ match_max 100000
expect_after {
# Do not ignore eof from expect
eof { exp_continue }
-i $any_spawn_id eof { exp_continue }
# A default timeout action is to do nothing, change it to fail
timeout { exit 1 }
-i $any_spawn_id timeout { exit 1 }
}
set Debug_type 0

View File

@ -1,5 +1,4 @@
#!/usr/bin/expect -f
# Tags: no-fasttest
# This is a test for system.warnings. Testing in interactive mode is necessary,
# as we want to see certain warnings from client
@ -15,9 +14,9 @@ match_max 100000
expect_after {
# Do not ignore eof from expect
eof { exp_continue }
-i $any_spawn_id eof { exp_continue }
# A default timeout action is to do nothing, change it to fail
timeout { exit 1 }
-i $any_spawn_id timeout { exit 1 }
}
#

Some files were not shown because too many files have changed in this diff Show More