Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into KRB_CVE_Fix

This commit is contained in:
MeenaRenganathan22 2023-02-27 05:52:25 -08:00
commit 6b1bb3f7ad
153 changed files with 1649 additions and 410 deletions

View File

@ -14,7 +14,7 @@ curl https://clickhouse.com/ | sh
* [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
* [Slack](https://clickhousedb.slack.com/) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.

View File

@ -44,7 +44,7 @@ int NumberParser::parse(const std::string& s, char thSep)
if (tryParse(s, result, thSep))
return result;
else
throw SyntaxException("Not a valid integer", s);
throw SyntaxException("Not a valid integer", "'" + s + "'");
}
@ -60,7 +60,7 @@ unsigned NumberParser::parseUnsigned(const std::string& s, char thSep)
if (tryParseUnsigned(s, result, thSep))
return result;
else
throw SyntaxException("Not a valid unsigned integer", s);
throw SyntaxException("Not a valid unsigned integer", "'" + s + "'");
}
@ -76,7 +76,7 @@ unsigned NumberParser::parseHex(const std::string& s)
if (tryParseHex(s, result))
return result;
else
throw SyntaxException("Not a valid hexadecimal integer", s);
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
}
@ -94,7 +94,7 @@ unsigned NumberParser::parseOct(const std::string& s)
if (tryParseOct(s, result))
return result;
else
throw SyntaxException("Not a valid hexadecimal integer", s);
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
}
@ -112,7 +112,7 @@ Int64 NumberParser::parse64(const std::string& s, char thSep)
if (tryParse64(s, result, thSep))
return result;
else
throw SyntaxException("Not a valid integer", s);
throw SyntaxException("Not a valid integer", "'" + s + "'");
}
@ -128,7 +128,7 @@ UInt64 NumberParser::parseUnsigned64(const std::string& s, char thSep)
if (tryParseUnsigned64(s, result, thSep))
return result;
else
throw SyntaxException("Not a valid unsigned integer", s);
throw SyntaxException("Not a valid unsigned integer", "'" + s + "'");
}
@ -144,7 +144,7 @@ UInt64 NumberParser::parseHex64(const std::string& s)
if (tryParseHex64(s, result))
return result;
else
throw SyntaxException("Not a valid hexadecimal integer", s);
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
}
@ -162,7 +162,7 @@ UInt64 NumberParser::parseOct64(const std::string& s)
if (tryParseOct64(s, result))
return result;
else
throw SyntaxException("Not a valid hexadecimal integer", s);
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
}
@ -180,7 +180,7 @@ double NumberParser::parseFloat(const std::string& s, char decSep, char thSep)
if (tryParseFloat(s, result, decSep, thSep))
return result;
else
throw SyntaxException("Not a valid floating-point number", s);
throw SyntaxException("Not a valid floating-point number", "'" + s + "'");
}
@ -196,7 +196,7 @@ bool NumberParser::parseBool(const std::string& s)
if (tryParseBool(s, result))
return result;
else
throw SyntaxException("Not a valid bool number", s);
throw SyntaxException("Not a valid bool number", "'" + s + "'");
}

View File

@ -60,6 +60,23 @@ elseif (ARCH_AARCH64)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs -Xclang=-target-feature -Xclang=+ldapr -Wno-unused-command-line-argument")
endif ()
# Best-effort check: The build generates and executes intermediate binaries, e.g. protoc and llvm-tablegen. If we build on ARM for ARM
# and the build machine is too old, i.e. doesn't satisfy above modern profile, then these intermediate binaries will not run (dump
# SIGILL). Even if they could run, the build machine wouldn't be able to run the ClickHouse binary. In that case, suggest to run the
# build with the compat profile.
if (OS_LINUX AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)" AND NOT NO_ARMV81_OR_HIGHER)
# CPU features in /proc/cpuinfo and compiler flags don't align :( ... pick some obvious flags contained in the modern but not in the
# legacy profile (full Graviton 3 /proc/cpuinfo is "fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm
# jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs paca pacg dcpodp svei8mm svebf16 i8mm
# bf16 dgh rng")
execute_process(
COMMAND grep -P "^(?=.*atomic)(?=.*ssbs)" /proc/cpuinfo
OUTPUT_VARIABLE FLAGS)
if (NOT FLAGS)
MESSAGE(FATAL_ERROR "The build machine does not satisfy the minimum CPU requirements, try to run cmake with -DNO_ARMV81_OR_HIGHER=1")
endif()
endif()
elseif (ARCH_PPC64LE)
# By Default, build for power8 and up, allow building for power9 and up
# Note that gcc and clang have support for x86 SSE2 intrinsics when building for PowerPC
@ -102,6 +119,22 @@ elseif (ARCH_AMD64)
SET(ENABLE_AVX512_FOR_SPEC_OP 0)
endif()
# Same best-effort check for x86 as above for ARM.
if (OS_LINUX AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64" AND NOT NO_SSE3_OR_HIGHER)
# Test for flags in standard profile but not in NO_SSE3_OR_HIGHER profile.
# /proc/cpuid for Intel Xeon 8124: "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse
# sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl xtopology nonstop_tsc cpuid aperfmperf
# tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c
# rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx
# avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke""
execute_process(
COMMAND grep -P "^(?=.*ssse3)(?=.*sse4_1)(?=.*sse4_2)" /proc/cpuinfo
OUTPUT_VARIABLE FLAGS)
if (NOT FLAGS)
MESSAGE(FATAL_ERROR "The build machine does not satisfy the minimum CPU requirements, try to run cmake with -DNO_SSE3_OR_HIGHER=1")
endif()
endif()
# ClickHouse can be cross-compiled (e.g. on an ARM host for x86) but it is also possible to build ClickHouse on x86 w/o AVX for x86 w/
# AVX. We only check that the compiler can emit certain SIMD instructions, we don't care if the host system is able to run the binary.
# Therefore, use check_cxx_source_compiles (= does the code compile+link?) instead of check_cxx_source_runs (= does the code

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit 5022f30f3e092a54a7c101c335ce5e08769db366
Subproject commit e48aa13f67dc722511b5af33a32ba9b7748176b5

View File

@ -94,6 +94,10 @@ if (ARCH_AMD64 OR ARCH_AARCH64)
add_compile_definitions(TUKLIB_FAST_UNALIGNED_ACCESS=1)
endif ()
if (ARCH_S390X)
add_compile_definitions(WORDS_BIGENDIAN)
endif ()
find_package(Threads REQUIRED)

View File

@ -30,25 +30,10 @@
# - zstd homepage : http://www.zstd.net/
# ################################################################
# Get library version based on information from input content (use regular exp)
function(GetLibraryVersion _content _outputVar1 _outputVar2 _outputVar3)
string(REGEX MATCHALL ".*define ZSTD_VERSION_MAJOR+.* ([0-9]+).*define ZSTD_VERSION_MINOR+.* ([0-9]+).*define ZSTD_VERSION_RELEASE+.* ([0-9]+)" VERSION_REGEX "${_content}")
SET(${_outputVar1} ${CMAKE_MATCH_1} PARENT_SCOPE)
SET(${_outputVar2} ${CMAKE_MATCH_2} PARENT_SCOPE)
SET(${_outputVar3} ${CMAKE_MATCH_3} PARENT_SCOPE)
endfunction()
# Define library directory, where sources and header files are located
SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib")
INCLUDE_DIRECTORIES(BEFORE ${LIBRARY_DIR} "${LIBRARY_DIR}/common")
# Read file content
FILE(READ "${LIBRARY_DIR}/zstd.h" HEADER_CONTENT)
# Parse version
GetLibraryVersion("${HEADER_CONTENT}" LIBVER_MAJOR LIBVER_MINOR LIBVER_RELEASE)
MESSAGE(STATUS "ZSTD VERSION ${LIBVER_MAJOR}.${LIBVER_MINOR}.${LIBVER_RELEASE}")
# cd contrib/zstd/lib
# find . -name '*.c' -or -name '*.S' | grep -vP 'deprecated|legacy' | sort | sed 's/^\./ "${LIBRARY_DIR}/"'
SET(Sources

View File

@ -30,13 +30,15 @@ def get_options(i, backward_compatibility_check):
if i % 2 == 1:
join_alg_num = i // 2
if join_alg_num % 4 == 0:
if join_alg_num % 5 == 0:
client_options.append("join_algorithm='parallel_hash'")
if join_alg_num % 4 == 1:
if join_alg_num % 5 == 1:
client_options.append("join_algorithm='partial_merge'")
if join_alg_num % 4 == 2:
if join_alg_num % 5 == 2:
client_options.append("join_algorithm='full_sorting_merge'")
if join_alg_num % 4 == 3:
if join_alg_num % 5 == 3:
client_options.append("join_algorithm='grace_hash'")
if join_alg_num % 5 == 4:
client_options.append("join_algorithm='auto'")
client_options.append('max_rows_in_join=1000')
@ -211,7 +213,7 @@ def prepare_for_hung_check(drop_databases):
# Even if all clickhouse-test processes are finished, there are probably some sh scripts,
# which still run some new queries. Let's ignore them.
try:
query = """clickhouse client -q "SELECT count() FROM system.processes where where elapsed > 300" """
query = """clickhouse client -q "SELECT count() FROM system.processes where elapsed > 300" """
output = (
check_output(query, shell=True, stderr=STDOUT, timeout=30)
.decode("utf-8")

View File

@ -162,22 +162,59 @@ If you want to change the target table by using `ALTER`, we recommend disabling
## Configuration {#configuration}
Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (`kafka`) and topic-level (`kafka_*`). The global configuration is applied first, and then the topic-level configuration is applied (if it exists).
Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (below `<kafka>`) and topic-level (below `<kafka><kafka_topic>`). The global configuration is applied first, and then the topic-level configuration is applied (if it exists).
``` xml
<!-- Global configuration options for all tables of Kafka engine type -->
<kafka>
<!-- Global configuration options for all tables of Kafka engine type -->
<debug>cgrp</debug>
<auto_offset_reset>smallest</auto_offset_reset>
<!-- Configuration specific to topics "logs" and "stats" -->
<kafka_topic>
<name>logs</name>
<retry_backoff_ms>250</retry_backoff_ms>
<fetch_min_bytes>100000</fetch_min_bytes>
</kafka_topic>
<kafka_topic>
<name>stats</name>
<retry_backoff_ms>400</retry_backoff_ms>
<fetch_min_bytes>50000</fetch_min_bytes>
</kafka_topic>
</kafka>
```
<details markdown="1">
<summary>Example in deprecated syntax</summary>
``` xml
<kafka>
<!-- Global configuration options for all tables of Kafka engine type -->
<debug>cgrp</debug>
<auto_offset_reset>smallest</auto_offset_reset>
</kafka>
<!-- Configuration specific for topic "logs" -->
<!-- Configuration specific to topics "logs" and "stats" -->
<!-- Does NOT support periods in topic names, e.g. "logs.security"> -->
<kafka_logs>
<retry_backoff_ms>250</retry_backoff_ms>
<fetch_min_bytes>100000</fetch_min_bytes>
</kafka_logs>
<kafka_stats>
<retry_backoff_ms>400</retry_backoff_ms>
<fetch_min_bytes>50000</fetch_min_bytes>
</kafka_stats>
```
</details>
For a list of possible configuration options, see the [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Use the underscore (`_`) instead of a dot in the ClickHouse configuration. For example, `check.crcs=true` will be `<check_crcs>true</check_crcs>`.
### Kerberos support {#kafka-kerberos-support}

View File

@ -23,6 +23,10 @@ The quickest and easiest way to get up and running with ClickHouse is to create
## Self-Managed Install
:::tip
For production installs of a specific release version see the [installation options](#available-installation-options) down below.
:::
<Tabs>
<TabItem value="linux" label="Linux" default>

View File

@ -1177,8 +1177,7 @@ This setting can be used to specify the types of columns that could not be deter
**Example**
```sql
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}'
SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)'
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1
```
```response
┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐

View File

@ -507,7 +507,7 @@ Enabled by default.
Ignore unknown keys in json object for named tuples.
Disabled by default.
Enabled by default.
## input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple}

View File

@ -3948,3 +3948,71 @@ Default value: `0`.
:::note
Use this setting only for backward compatibility if your use cases depend on old syntax.
:::
## final {#final}
Automatically applies [FINAL](../../sql-reference/statements/select/from/#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from/#final-modifier) is applicable, including joined tables and tables in sub-queries, and
distributed tables.
Possible values:
- 0 - disabled
- 1 - enabled
Default value: `0`.
Example:
```sql
CREATE TABLE test
(
key Int64,
some String
)
ENGINE = ReplacingMergeTree
ORDER BY key;
INSERT INTO test FORMAT Values (1, 'first');
INSERT INTO test FORMAT Values (1, 'second');
SELECT * FROM test;
┌─key─┬─some───┐
│ 1 │ second │
└─────┴────────┘
┌─key─┬─some──┐
│ 1 │ first │
└─────┴───────┘
SELECT * FROM test SETTINGS final = 1;
┌─key─┬─some───┐
│ 1 │ second │
└─────┴────────┘
SET final = 1;
SELECT * FROM test;
┌─key─┬─some───┐
│ 1 │ second │
└─────┴────────┘
```
## asterisk_include_materialized_columns {#asterisk_include_materialized_columns}
Include [MATERIALIZED](../../sql-reference/statements/create/table/#materialized) columns for wildcard query (`SELECT *`).
Possible values:
- 0 - disabled
- 1 - enabled
Default value: `0`.
## asterisk_include_alias_columns {#asterisk_include_alias_columns}
Include [ALIAS](../../sql-reference/statements/create/table/#alias) columns for wildcard query (`SELECT *`).
Possible values:
- 0 - disabled
- 1 - enabled
Default value: `0`.

View File

@ -1,7 +1,7 @@
---
slug: /en/sql-reference/functions/string-replace-functions
sidebar_position: 42
sidebar_label: For Replacing in Strings
sidebar_label: Replacing in Strings
---
# Functions for Searching and Replacing in Strings

View File

@ -1,7 +1,7 @@
---
slug: /en/sql-reference/functions/string-search-functions
sidebar_position: 41
sidebar_label: For Searching in Strings
sidebar_label: Searching in Strings
---
# Functions for Searching in Strings
@ -382,12 +382,12 @@ Checks whether string `haystack` matches the regular expression `pattern`. The p
Returns 1 in case of a match, and 0 otherwise.
Matching is based on UTF-8, e.g. `.` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. The regular expression must not contain null bytes.
If the haystack or pattern contain a sequence of bytes that are not valid UTF-8, the behavior is undefined.
No automatic Unicode normalization is performed, if you need it you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
If the haystack or the pattern are not valid UTF-8, then the behavior is undefined.
No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
Unlike re2's default behavior, `.` matches line breaks. To disable this, prepend the pattern with `(?-s)`.
For patterns to search for substrings in a string, it is better to use LIKE or position, since they work much faster.
For patterns to search for substrings in a string, it is better to use functions [like](#like) or [position](#position) since they work much faster.
## multiMatchAny(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
@ -529,21 +529,25 @@ Result:
## like(haystack, pattern), haystack LIKE pattern operator
Checks whether a string matches a simple regular expression.
The regular expression can contain the metasymbols `%` and `_`.
Checks whether a string matches a LIKE expression.
A LIKE expression contains a mix of normal characters and the following metasymbols:
`%` indicates any quantity of any bytes (including zero characters).
- `%` indicates an arbitrary number of arbitrary characters (including zero characters).
`_` indicates any one byte.
- `_` indicates a single arbitrary character.
Use the backslash (`\`) for escaping metasymbols. See the note on escaping in the description of the match function.
- `\` is for escaping literals `%`, `_` and `\`.
Matching is based on UTF-8, e.g. `_` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes.
If the haystack or pattern contain a sequence of bytes that are not valid UTF-8, then the behavior is undefined.
No automatic Unicode normalization is performed, if you need it you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
If the haystack or the pattern are not valid UTF-8, then the behavior is undefined.
No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
For regular expressions like `%needle%`, the code is more optimal and works as fast as the `position` function.
For other regular expressions, the code is the same as for the match function.
To match against literals `%`, `_` and `/` (which are LIKE metacharacters), prepend them with a backslash, i.e. `\%`, `\_` and `\\`.
The backslash loses its special meaning, i.e. is interpreted literally, if it prepends a character different than `%`, `_` or `\`.
Note that ClickHouse requires backslashes in strings [to be quoted as well](../syntax.md#string), so you would actually need to write `\\%`, `\\_` and `\\\\`.
For patterns of the form `%needle%`, the function is as fast as the `position` function.
Other LIKE expressions are internally converted to a regular expression and executed with a performance similar to function `match`.
## notLike(haystack, pattern), haystack NOT LIKE pattern operator

View File

@ -1,8 +1,8 @@
---
slug: /en/sql-reference/functions/tuple-map-functions
sidebar_position: 46
sidebar_label: Working with maps
title: "Functions for maps"
sidebar_label: Maps
title: "Functions for Maps"
---
## map
@ -440,7 +440,7 @@ mapApply(func, map)
**Parameters**
- `func` - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `map` — [Map](../../sql-reference/data-types/map.md).
**Returned value**
@ -480,7 +480,7 @@ mapFilter(func, map)
**Parameters**
- `func` - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
- `map` — [Map](../../sql-reference/data-types/map.md).
**Returned value**

View File

@ -127,6 +127,26 @@ Default expressions may be defined as an arbitrary expression from table constan
Normal default value. If the INSERT query does not specify the corresponding column, it will be filled in by computing the corresponding expression.
Example:
```sql
CREATE OR REPLACE TABLE test
(
id UInt64,
updated_at DateTime DEFAULT now(),
updated_at_date Date DEFAULT toDate(updated_at)
)
ENGINE = MergeTree
ORDER BY id;
INSERT INTO test (id) Values (1);
SELECT * FROM test;
┌─id─┬──────────updated_at─┬─updated_at_date─┐
│ 1 │ 2023-02-24 17:06:46 │ 2023-02-24 │
└────┴─────────────────────┴─────────────────┘
```
### MATERIALIZED
`MATERIALIZED expr`
@ -135,6 +155,36 @@ Materialized expression. Such a column cant be specified for INSERT, because
For an INSERT without a list of columns, these columns are not considered.
In addition, this column is not substituted when using an asterisk in a SELECT query. This is to preserve the invariant that the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
Example:
```sql
CREATE OR REPLACE TABLE test
(
id UInt64,
updated_at DateTime MATERIALIZED now(),
updated_at_date Date MATERIALIZED toDate(updated_at)
)
ENGINE = MergeTree
ORDER BY id;
INSERT INTO test Values (1);
SELECT * FROM test;
┌─id─┐
│ 1 │
└────┘
SELECT id, updated_at, updated_at_date FROM test;
┌─id─┬──────────updated_at─┬─updated_at_date─┐
│ 1 │ 2023-02-24 17:08:08 │ 2023-02-24 │
└────┴─────────────────────┴─────────────────┘
SELECT * FROM test SETTINGS asterisk_include_materialized_columns=1;
┌─id─┬──────────updated_at─┬─updated_at_date─┐
│ 1 │ 2023-02-24 17:08:08 │ 2023-02-24 │
└────┴─────────────────────┴─────────────────┘
```
### EPHEMERAL
`EPHEMERAL [expr]`
@ -142,6 +192,34 @@ In addition, this column is not substituted when using an asterisk in a SELECT q
Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required.
INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
Example:
```sql
CREATE OR REPLACE TABLE test
(
id UInt64,
unhexed String EPHEMERAL,
hexed FixedString(4) DEFAULT unhex(unhexed)
)
ENGINE = MergeTree
ORDER BY id
INSERT INTO test (id, unhexed) Values (1, '5a90b714');
SELECT
id,
hexed,
hex(hexed)
FROM test
FORMAT Vertical;
Row 1:
──────
id: 1
hexed: Z<><5A>
hex(hexed): 5A90B714
```
### ALIAS
`ALIAS expr`
@ -156,6 +234,29 @@ If you add a new column to a table but later change its default expression, the
It is not possible to set default values for elements in nested data structures.
```sql
CREATE OR REPLACE TABLE test
(
id UInt64,
size_bytes Int64,
size String Alias formatReadableSize(size_bytes)
)
ENGINE = MergeTree
ORDER BY id;
INSERT INTO test Values (1, 4678899);
SELECT id, size_bytes, size FROM test;
┌─id─┬─size_bytes─┬─size─────┐
│ 1 │ 4678899 │ 4.46 MiB │
└────┴────────────┴──────────┘
SELECT * FROM test SETTINGS asterisk_include_alias_columns=1;
┌─id─┬─size_bytes─┬─size─────┐
│ 1 │ 4678899 │ 4.46 MiB │
└────┴────────────┴──────────┘
```
## Primary Key
You can define a [primary key](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries) when creating a table. Primary key can be specified in two ways:

View File

@ -36,6 +36,8 @@ Queries that use `FINAL` are executed slightly slower than similar queries that
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine havet happened yet and deal with it by applying aggregation (for example, to discard duplicates).
`FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile.
## Implementation Details
If the `FROM` clause is omitted, data will be read from the `system.one` table.

View File

@ -86,10 +86,10 @@ Examples: `1`, `10_000_000`, `0xffff_ffff`, `18446744073709551615`, `0xDEADBEEF`
String literals must be enclosed in single quotes, double quotes are not supported.
Escaping works either
- in SQL-style based on a preceding single quote where the single-quote character `'` (and only this character) can be escaped as `''`, or
- in C-style based on a preceding backslash with the following supported escape sequences: `\\`, `\'`, `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. The backslash loses its special meaning and will be interpreted literally if it precedes characters different than the listed ones.
- using a preceding single quote where the single-quote character `'` (and only this character) can be escaped as `''`, or
- using a preceding backslash with the following supported escape sequences: `\\`, `\'`, `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. The backslash loses its special meaning, i.e. will be interpreted literally, if it precedes characters different than the listed ones.
In string literals, you need to escape at least `'` and `\` using escape codes `''` (or: `\'`) and `\\`.
In string literals, you need to escape at least `'` and `\` using escape codes `\'` (or: `''`) and `\\`.
### Compound

View File

@ -567,7 +567,7 @@ SELECT
ts,
value,
round(avg(value) OVER (PARTITION BY metric ORDER BY toDate(ts)
Range BETWEEN 10 PRECEDING AND CURRENT ROW),2) moving_avg_10_days_temp
Range BETWEEN 10 PRECEDING AND CURRENT ROW),2) AS moving_avg_10_days_temp
FROM sensors
ORDER BY
metric ASC,

View File

@ -1,5 +1,8 @@
#include "LocalServer.h"
#include <sys/resource.h>
#include <Common/logger_useful.h>
#include <base/errnoToString.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/String.h>
#include <Poco/Logger.h>
@ -179,9 +182,9 @@ void LocalServer::tryInitPath()
parent_folder = std::filesystem::temp_directory_path();
}
catch (const fs::filesystem_error& e)
catch (const fs::filesystem_error & e)
{
// tmp folder don't exists? misconfiguration? chroot?
// The tmp folder doesn't exist? Is it a misconfiguration? Or chroot?
LOG_DEBUG(log, "Can not get temporary folder: {}", e.what());
parent_folder = std::filesystem::current_path();
@ -390,6 +393,21 @@ try
std::cout << std::fixed << std::setprecision(3);
std::cerr << std::fixed << std::setprecision(3);
/// Try to increase limit on number of open files.
{
rlimit rlim;
if (getrlimit(RLIMIT_NOFILE, &rlim))
throw Poco::Exception("Cannot getrlimit");
if (rlim.rlim_cur < rlim.rlim_max)
{
rlim.rlim_cur = config().getUInt("max_open_files", static_cast<unsigned>(rlim.rlim_max));
int rc = setrlimit(RLIMIT_NOFILE, &rlim);
if (rc != 0)
std::cerr << fmt::format("Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()) << '\n';
}
}
#if defined(FUZZING_MODE)
static bool first_time = true;
if (first_time)

View File

@ -6,7 +6,6 @@
#include <sys/types.h>
#include <pwd.h>
#include <unistd.h>
#include <Poco/Version.h>
#include <Poco/Net/HTTPServer.h>
#include <Poco/Net/NetException.h>
#include <Poco/Util/HelpFormatter.h>

View File

@ -13,7 +13,7 @@ struct Settings;
namespace ErrorCodes
{
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -129,7 +129,7 @@ public:
const IColumn::Offsets & ith_offsets = ith_column.getOffsets();
if (ith_offsets[row_num] != end || (row_num != 0 && ith_offsets[row_num - 1] != begin))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
}
for (size_t i = begin; i < end; ++i)

View File

@ -19,7 +19,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
@ -197,7 +197,7 @@ public:
const IColumn::Offsets & ith_offsets = ith_column.getOffsets();
if (ith_offsets[row_num] != end || (row_num != 0 && ith_offsets[row_num - 1] != begin))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
}
AggregateFunctionForEachData & state = ensureAggregateData(place, end - begin, *arena);

View File

@ -63,6 +63,9 @@ static constexpr const char * getNameByTrait()
template <typename T>
struct GroupArraySamplerData
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;
@ -97,6 +100,9 @@ struct GroupArrayNumericData;
template <typename T>
struct GroupArrayNumericData<T, false>
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
using Array = PODArray<T, 32, Allocator>;

View File

@ -32,6 +32,9 @@ namespace ErrorCodes
template <typename T>
struct MovingData
{
/// For easy serialization.
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
using Accumulator = T;
/// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena

View File

@ -117,7 +117,21 @@ public:
const auto & value = this->data(place).value;
size_t size = value.size();
writeVarUInt(size, buf);
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
/// In this version, pairs were serialized with padding.
/// We must ensure that padding bytes are zero-filled.
static_assert(offsetof(typename MaxIntersectionsData<PointType>::Value, first) == 0);
static_assert(offsetof(typename MaxIntersectionsData<PointType>::Value, second) > 0);
char zero_padding[offsetof(typename MaxIntersectionsData<PointType>::Value, second) - sizeof(value[0].first)]{};
for (size_t i = 0; i < size; ++i)
{
writePODBinary(value[i].first, buf);
writePODBinary(zero_padding, buf);
writePODBinary(value[i].second, buf);
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override

View File

@ -432,6 +432,7 @@ ReplxxLineReader::ReplxxLineReader(
};
rx.bind_key(Replxx::KEY::control('R'), interactive_history_search);
#endif
/// Rebind regular incremental search to C-T.
///
@ -443,7 +444,6 @@ ReplxxLineReader::ReplxxLineReader(
uint32_t reverse_search = Replxx::KEY::control('R');
return rx.invoke(Replxx::ACTION::HISTORY_INCREMENTAL_SEARCH, reverse_search);
});
#endif
}
ReplxxLineReader::~ReplxxLineReader()

View File

@ -747,7 +747,7 @@ namespace
*/
template<typename IntType>
requires (std::is_same_v<IntType, Int32> || std::is_same_v<IntType, UInt32>)
void replicateSSE42Int32(const IntType * __restrict data, IntType * __restrict result_data, const IColumn::Offsets & offsets)
void replicateSSE2Int32(const IntType * __restrict data, IntType * __restrict result_data, const IColumn::Offsets & offsets)
{
const IntType * data_copy_begin_ptr = nullptr;
size_t offsets_size = offsets.size();
@ -842,7 +842,7 @@ ColumnPtr ColumnVector<T>::replicate(const IColumn::Offsets & offsets) const
#ifdef __SSE2__
if constexpr (std::is_same_v<T, UInt32>)
{
replicateSSE42Int32(getData().data(), res->getData().data(), offsets);
replicateSSE2Int32(getData().data(), res->getData().data(), offsets);
return res;
}
#endif

View File

@ -533,9 +533,16 @@ void AsynchronousMetrics::update(TimePoint update_time)
AsynchronousMetricValues new_values;
auto current_time = std::chrono::system_clock::now();
auto time_after_previous_update [[maybe_unused]] = current_time - previous_update_time;
auto time_after_previous_update = current_time - previous_update_time;
previous_update_time = update_time;
double update_interval = 0.;
if (first_run)
update_interval = update_period.count();
else
update_interval = std::chrono::duration_cast<std::chrono::microseconds>(time_after_previous_update).count() / 1e6;
new_values["AsynchronousMetricsUpdateInterval"] = { update_interval, "Metrics update interval" };
/// This is also a good indicator of system responsiveness.
new_values["Jitter"] = { std::chrono::duration_cast<std::chrono::nanoseconds>(current_time - update_time).count() / 1e9,
"The difference in time the thread for calculation of the asynchronous metrics was scheduled to wake up and the time it was in fact, woken up."

View File

@ -197,7 +197,7 @@
M(187, COLLATION_COMPARISON_FAILED) \
M(188, UNKNOWN_ACTION) \
M(189, TABLE_MUST_NOT_BE_CREATED_MANUALLY) \
M(190, SIZES_OF_ARRAYS_DOESNT_MATCH) \
M(190, SIZES_OF_ARRAYS_DONT_MATCH) \
M(191, SET_SIZE_LIMIT_EXCEEDED) \
M(192, UNKNOWN_USER) \
M(193, WRONG_PASSWORD) \

View File

@ -237,6 +237,7 @@ void ThreadStatus::setFatalErrorCallback(std::function<void()> callback)
void ThreadStatus::onFatalError()
{
std::lock_guard lock(thread_group->mutex);
if (fatal_error_callback)
fatal_error_callback();
}

View File

@ -809,7 +809,7 @@ class IColumn;
M(Bool, input_format_json_read_numbers_as_strings, false, "Allow to parse numbers as strings in JSON input formats", 0) \
M(Bool, input_format_json_read_objects_as_strings, true, "Allow to parse JSON objects as strings in JSON input formats", 0) \
M(Bool, input_format_json_named_tuples_as_objects, true, "Deserialize named tuple columns as JSON objects", 0) \
M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, false, "Ignore unknown keys in json object for named tuples", 0) \
M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, true, "Ignore unknown keys in json object for named tuples", 0) \
M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \

View File

@ -80,6 +80,7 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"23.3", {{"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}}},
{"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"},
{"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"},
{"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"},

View File

@ -134,6 +134,8 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
}
static std::atomic<bool> fatal_error_printed{false};
/** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log.
*/
static void signalHandler(int sig, siginfo_t * info, void * context)
@ -159,7 +161,16 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
if (sig != SIGTSTP) /// This signal is used for debugging.
{
/// The time that is usually enough for separate thread to print info into log.
sleepForSeconds(20); /// FIXME: use some feedback from threads that process stacktrace
/// Under MSan full stack unwinding with DWARF info about inline functions takes 101 seconds in one case.
for (size_t i = 0; i < 300; ++i)
{
/// We will synchronize with the thread printing the messages with an atomic variable to finish earlier.
if (fatal_error_printed)
break;
/// This coarse method of synchronization is perfectly ok for fatal signals.
sleepForSeconds(1);
}
call_default_signal_handler(sig);
}
@ -309,7 +320,9 @@ private:
}
if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
{
DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace);
}
}
std::string signal_description = "Unknown signal";
@ -407,6 +420,8 @@ private:
/// When everything is done, we will try to send these error messages to client.
if (thread_ptr)
thread_ptr->onFatalError();
fatal_error_printed = true;
}
};

View File

@ -25,7 +25,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
namespace Nested
@ -242,7 +242,7 @@ void validateArraySizes(const Block & block)
const ColumnArray & another_array_column = assert_cast<const ColumnArray &>(*elem.column);
if (!first_array_column.hasEqualOffsets(another_array_column))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"Elements '{}' and '{}' "
"of Nested data structure '{}' (Array columns) have different array sizes.",
block.getByPosition(it->second).name, elem.name, split.first);

View File

@ -64,7 +64,7 @@ static DataTypePtr create(const ASTPtr & arguments)
return std::make_shared<DataTypeDateTime>();
const auto scale = getArgument<UInt64, ArgumentKind::Optional>(arguments, 0, "scale", "DateTime");
const auto timezone = getArgument<String, ArgumentKind::Optional>(arguments, !!scale, "timezone", "DateTime");
const auto timezone = getArgument<String, ArgumentKind::Optional>(arguments, scale ? 1 : 0, "timezone", "DateTime");
if (!scale && !timezone)
throw Exception::createDeprecated(getExceptionMessage(" has wrong type: ", 0, "scale", "DateTime", Field::Types::Which::UInt64),

View File

@ -16,7 +16,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -213,7 +213,7 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments)
if (i == 0)
offsets = offsets_i;
else if (*offsets_i != *offsets)
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to aggregate function must be equal.");
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to aggregate function must be equal.");
}
return {nested_columns, offsets->data()};
}

View File

@ -1112,7 +1112,9 @@ private:
{
ToType h;
if constexpr (std::endian::native == std::endian::little)
{
h = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
}
else
{
char tmp_buffer[sizeof(vec_from[i])];
@ -1131,7 +1133,9 @@ private:
ToType h;
if constexpr (std::endian::native == std::endian::little)
{
h = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
}
else
{
char tmp_buffer[sizeof(value)];
@ -1271,7 +1275,7 @@ private:
{
/// NOTE: here, of course, you can do without the materialization of the column.
ColumnPtr full_column = col_from_const->convertToFullColumn();
executeArray<first>(key, type, &*full_column, vec_to);
executeArray<first>(key, type, full_column.get(), vec_to);
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
@ -1283,6 +1287,10 @@ private:
{
WhichDataType which(from_type);
if (icolumn->size() != vec_to.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}",
icolumn->getName(), icolumn->size(), vec_to.size(), getName());
if (which.isUInt8()) executeIntType<UInt8, first>(key, icolumn, vec_to);
else if (which.isUInt16()) executeIntType<UInt16, first>(key, icolumn, vec_to);
else if (which.isUInt32()) executeIntType<UInt32, first>(key, icolumn, vec_to);
@ -1343,10 +1351,9 @@ private:
const auto & type_map = assert_cast<const DataTypeMap &>(*type);
executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
}
else if (const auto * const_map = checkAndGetColumnConstData<ColumnMap>(column))
else if (const auto * const_map = checkAndGetColumnConst<ColumnMap>(column))
{
const auto & type_map = assert_cast<const DataTypeMap &>(*type);
executeForArgument(key, type_map.getNestedType().get(), const_map->getNestedColumnPtr().get(), vec_to, is_first);
executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
}
else
{
@ -1382,8 +1389,7 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
size_t rows = input_rows_count;
auto col_to = ColumnVector<ToType>::create(rows);
auto col_to = ColumnVector<ToType>::create(input_rows_count);
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
@ -1395,7 +1401,7 @@ public:
if (arguments.size() <= first_data_argument)
{
/// Return a fixed random-looking magic number when input is empty
vec_to.assign(rows, static_cast<ToType>(0xe28dbde7fe22e41c));
vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
}
KeyType key{};

View File

@ -26,6 +26,7 @@
# pragma GCC diagnostic pop
#endif
namespace DB
{
@ -66,12 +67,12 @@ ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
if (!low_cardinality_type)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Incompatible type for low cardinality column: {}",
"Incompatible type for LowCardinality column: {}",
column.type->getName());
if (can_be_executed_on_default_arguments)
{
/// Normal case, when function can be executed on values's default.
/// Normal case, when function can be executed on values' default.
column.column = low_cardinality_column->getDictionary().getNestedColumn();
indexes = low_cardinality_column->getIndexesPtr();
}
@ -280,6 +281,7 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
auto res = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, dictionary_type, new_input_rows_count, dry_run);
bool res_is_constant = isColumnConst(*res);
auto keys = res_is_constant
? res->cloneResized(1)->convertToFullColumnIfConst()
: res;

View File

@ -23,9 +23,10 @@ namespace ErrorCodes
namespace impl
{
/// Is the [I]LIKE expression reduced to finding a substring in a string?
/// Is the [I]LIKE expression equivalent to a substring search?
inline bool likePatternIsSubstring(std::string_view pattern, String & res)
{
/// TODO: ignore multiple leading or trailing %
if (pattern.size() < 2 || !pattern.starts_with('%') || !pattern.ends_with('%'))
return false;
@ -45,9 +46,25 @@ inline bool likePatternIsSubstring(std::string_view pattern, String & res)
case '\\':
++pos;
if (pos == end)
/// pattern ends with \% --> trailing % is to be taken literally and pattern doesn't qualify for substring search
return false;
else
res += *pos;
{
switch (*pos)
{
/// Known LIKE escape sequences:
case '%':
case '_':
case '\\':
res += *pos;
break;
/// For all other escape sequences, the backslash loses its special meaning
default:
res += '\\';
res += *pos;
break;
}
}
break;
default:
res += *pos;

View File

@ -75,6 +75,7 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
Pos dot_pos = nullptr;
Pos colon_pos = nullptr;
bool has_sub_delims = false;
bool has_at_symbol = false;
bool has_terminator_after_colon = false;
const auto * start_of_host = pos;
@ -97,25 +98,35 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
case '@': /// myemail@gmail.com
if (has_terminator_after_colon) return std::string_view{};
if (has_at_symbol) goto done;
has_sub_delims = false;
has_at_symbol = true;
start_of_host = pos + 1;
break;
case ';':
case '=':
case '&':
case '~':
case '%':
/// Symbols above are sub-delims in RFC3986 and should be
/// allowed for userinfo (named identification here).
///
/// NOTE: that those symbols is allowed for reg-name (host)
/// too, but right now host parsing looks more like in
/// RFC1034 (in other words domains that are allowed to be
/// registered).
has_sub_delims = true;
continue;
case ' ': /// restricted symbols in whole URL
case '\t':
case '<':
case '>':
case '%':
case '{':
case '}':
case '|':
case '\\':
case '^':
case '~':
case '[':
case ']':
case ';':
case '=':
case '&':
if (colon_pos == nullptr)
return std::string_view{};
else
@ -124,6 +135,8 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
}
done:
if (has_sub_delims)
return std::string_view{};
if (!has_at_symbol)
pos = colon_pos ? colon_pos : pos;
return checkAndReturnHost(pos, dot_pos, start_of_host);

View File

@ -7,6 +7,7 @@
namespace DB
{
/// NOTE: Implementation is not RFC3986 compatible
struct ExtractNetloc
{
/// We use the same as domain function
@ -67,6 +68,7 @@ struct ExtractNetloc
/// Now pos points to the first byte after scheme (if there is).
bool has_identification = false;
Pos hostname_end = end;
Pos question_mark_pos = end;
Pos slash_pos = end;
Pos start_of_host = pos;
@ -90,25 +92,39 @@ struct ExtractNetloc
return std::string_view(start_of_host, pos - start_of_host);
case '@': /// foo:bar@example.ru
has_identification = true;
hostname_end = end;
break;
case ';':
case '=':
case '&':
case '~':
case '%':
/// Symbols above are sub-delims in RFC3986 and should be
/// allowed for userinfo (named identification here).
///
/// NOTE: that those symbols is allowed for reg-name (host)
/// too, but right now host parsing looks more like in
/// RFC1034 (in other words domains that are allowed to be
/// registered).
if (!has_identification)
{
hostname_end = pos;
break;
}
[[fallthrough]];
case ' ': /// restricted symbols in whole URL
case '\t':
case '<':
case '>':
case '%':
case '{':
case '}':
case '|':
case '\\':
case '^':
case '~':
case '[':
case ']':
case ';':
case '=':
case '&':
return pos > start_of_host
? std::string_view(start_of_host, std::min(std::min(pos - 1, question_mark_pos), slash_pos) - start_of_host)
? std::string_view(start_of_host, std::min(std::min(pos, question_mark_pos), slash_pos) - start_of_host)
: std::string_view();
}
}
@ -116,7 +132,7 @@ struct ExtractNetloc
if (has_identification)
return std::string_view(start_of_host, pos - start_of_host);
else
return std::string_view(start_of_host, std::min(std::min(pos, question_mark_pos), slash_pos) - start_of_host);
return std::string_view(start_of_host, std::min(std::min(std::min(pos, question_mark_pos), slash_pos), hostname_end) - start_of_host);
}
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)

View File

@ -37,7 +37,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int LOGICAL_ERROR;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
@ -361,7 +361,7 @@ public:
if (getOffsetsPtr(*column_array) != offsets_column
&& getOffsets(*column_array) != typeid_cast<const ColumnArray::ColumnOffsets &>(*offsets_column).getData())
throw Exception(
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"{}s passed to {} must have equal size",
argument_type_name,
getName());

View File

@ -16,7 +16,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int LOGICAL_ERROR;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int ARGUMENT_OUT_OF_BOUND;
}
@ -356,7 +356,7 @@ private:
{
ColumnArray::Offset prev_offset = row > 0 ? offsets_x[row] : 0;
throw Exception(
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"Arguments of function {} have different array sizes: {} and {}",
getName(),
offsets_x[row] - prev_offset,
@ -423,7 +423,7 @@ private:
if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset))
{
throw Exception(
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"Arguments of function {} have different array sizes: {} and {}",
getName(),
offsets_x[0],

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
class FunctionArrayEnumerateUniq;
@ -153,7 +153,7 @@ ColumnPtr FunctionArrayEnumerateExtended<Derived>::executeImpl(const ColumnsWith
offsets_column = array->getOffsetsPtr();
}
else if (offsets_i != *offsets)
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
getName());
const auto * array_data = &array->getData();

View File

@ -60,7 +60,7 @@ namespace DB
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
class FunctionArrayEnumerateUniqRanked;
@ -194,7 +194,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
{
if (*offsets_by_depth[0] != array->getOffsets())
{
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"Lengths and effective depths of all arrays passed to {} must be equal.", getName());
}
}
@ -217,7 +217,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
{
if (*offsets_by_depth[col_depth] != array->getOffsets())
{
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"Lengths and effective depths of all arrays passed to {} must be equal.", getName());
}
}
@ -225,7 +225,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
if (col_depth < arrays_depths.depths[array_num])
{
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"{}: Passed array number {} depth ({}) is more than the actual array depth ({}).",
getName(), array_num, std::to_string(arrays_depths.depths[array_num]), col_depth);
}

View File

@ -19,7 +19,7 @@ namespace DB
namespace ErrorCodes
{
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
@ -144,7 +144,7 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume
if (i == 0)
offsets = offsets_i;
else if (*offsets_i != *offsets)
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
getName());
}
const IColumn ** aggregate_arguments = aggregate_arguments_vec.data();

View File

@ -21,7 +21,7 @@ namespace DB
namespace ErrorCodes
{
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
@ -190,7 +190,7 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl(
if (i == 0)
offsets = offsets_i;
else if (*offsets_i != *offsets)
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
getName());
}
const IColumn ** aggregate_arguments = aggregate_arguments_vec.data();

View File

@ -18,7 +18,7 @@ namespace DB
namespace ErrorCodes
{
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
@ -151,7 +151,7 @@ ColumnPtr FunctionArrayUniq::executeImpl(const ColumnsWithTypeAndName & argument
if (i == 0)
offsets = &offsets_i;
else if (offsets_i != *offsets)
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
getName());
const auto * array_data = &array->getData();

View File

@ -13,7 +13,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
}
@ -81,7 +81,7 @@ public:
}
else if (!column_array->hasEqualOffsets(static_cast<const ColumnArray &>(*first_array_column)))
{
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"The argument 1 and argument {} of function {} have different array sizes",
i + 1, getName());
}

View File

@ -48,12 +48,12 @@ bool SlowWithHyperscanChecker::isSlowOneRepeat(std::string_view regexp)
re2_st::StringPiece haystack(regexp.data(), regexp.size());
re2_st::StringPiece matches[2];
size_t start_pos = 0;
while (start_pos < regexp.size())
while (start_pos < haystack.size())
{
if (searcher_one_repeat.Match(haystack, start_pos, regexp.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 2))
if (searcher_one_repeat.Match(haystack, start_pos, haystack.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 2))
{
const auto & match = matches[0];
start_pos += (matches[0].data() - haystack.data()) + match.length(); // fwd by prefix + match length
start_pos = (matches[0].data() - haystack.data()) + match.size(); // new start pos = prefix before match + match length
const auto & submatch = matches[1];
if (isLargerThanFifty({submatch.data(), submatch.size()}))
return true;
@ -70,12 +70,12 @@ bool SlowWithHyperscanChecker::isSlowTwoRepeats(std::string_view regexp)
re2_st::StringPiece haystack(regexp.data(), regexp.size());
re2_st::StringPiece matches[3];
size_t start_pos = 0;
while (start_pos < regexp.size())
while (start_pos < haystack.size())
{
if (searcher_two_repeats.Match(haystack, start_pos, regexp.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 3))
if (searcher_two_repeats.Match(haystack, start_pos, haystack.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 3))
{
const auto & match = matches[0];
start_pos += (matches[0].data() - haystack.data()) + match.length(); // fwd by prefix + match length
start_pos = (matches[0].data() - haystack.data()) + match.size(); // new start pos = prefix before match + match length
const auto & submatch1 = matches[1];
const auto & submatch2 = matches[2];
if (isLargerThanFifty({submatch1.data(), submatch1.size()})

View File

@ -21,7 +21,12 @@ inline String likePatternToRegexp(std::string_view pattern)
const char * const end = pattern.begin() + pattern.size();
if (pos < end && *pos == '%')
++pos;
/// Eat leading %
while (++pos < end)
{
if (*pos != '%')
break;
}
else
res = "^";
@ -29,7 +34,18 @@ inline String likePatternToRegexp(std::string_view pattern)
{
switch (*pos)
{
case '^': case '$': case '.': case '[': case '|': case '(': case ')': case '?': case '*': case '+': case '{':
/// Quote characters which have a special meaning in re2
case '^':
case '$':
case '.':
case '[':
case '|':
case '(':
case ')':
case '?':
case '*':
case '+':
case '{':
res += '\\';
res += *pos;
break;
@ -44,22 +60,23 @@ inline String likePatternToRegexp(std::string_view pattern)
break;
case '\\':
if (pos + 1 == end)
throw Exception(ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE, "Invalid escape sequence at the end of LIKE pattern");
/// Known escape sequences.
if (pos[1] == '%' || pos[1] == '_')
throw Exception(ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE, "Invalid escape sequence at the end of LIKE pattern '{}'", pattern);
switch (pos[1])
{
res += pos[1];
++pos;
}
else if (pos[1] == '\\')
{
res += "\\\\";
++pos;
}
else
{
/// Unknown escape sequence treated literally: as backslash and the following character.
res += "\\\\";
/// Interpret quoted LIKE metacharacters %, _ and \ as literals:
case '%':
case '_':
res += pos[1];
++pos;
break;
case '\\':
res += "\\\\"; /// backslash has a special meaning in re2 --> quote it
++pos;
break;
/// Unknown escape sequence treated literally: as backslash (which must be quoted in re2) + the following character
default:
res += "\\\\";
break;
}
break;
default:

View File

@ -332,7 +332,7 @@ public:
}
size_t col_key_size = sub_map_column->size();
auto column = is_const? ColumnConst::create(std::move(sub_map_column), std::move(col_key_size)) : std::move(sub_map_column);
auto column = is_const ? ColumnConst::create(std::move(sub_map_column), std::move(col_key_size)) : std::move(sub_map_column);
ColumnsWithTypeAndName new_arguments =
{

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
namespace
@ -118,7 +118,7 @@ public:
const auto * rhs_array = assert_cast<const ColumnArray *>(arguments[i].column.get());
if (!lhs_array->hasEqualOffsets(*rhs_array))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"The argument 1 and argument {} of function {} have different array offsets",
i + 1,
getName());

View File

@ -27,6 +27,7 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int CANNOT_DLOPEN;
extern const int LOGICAL_ERROR;
}
@ -174,6 +175,10 @@ public:
if (!handle)
throw Exception(ErrorCodes::CANNOT_DLOPEN, "Cannot dlopen: ({})", dlerror()); // NOLINT(concurrency-mt-unsafe) // MT-Safe on Linux, see man dlerror
}
else if (mode == "logical error")
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: trap");
}
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown trap mode");
}

View File

@ -21,7 +21,7 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
extern const int NUMBER_OF_DIMENSIONS_MISMATCHED;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
namespace
@ -200,7 +200,7 @@ private:
const auto & array_y = *assert_cast<const ColumnArray *>(col_y.get());
if (!array_x.hasEqualOffsets(array_y))
{
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"The argument 1 and argument 3 of function {} have different array sizes", getName());
}
}
@ -222,7 +222,7 @@ private:
{
if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset))
{
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"The argument 1 and argument 3 of function {} have different array sizes", getName());
}
prev_offset = offsets_y[row];

View File

@ -12,7 +12,7 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
}
/** Function validateNestedArraySizes is used to check the consistency of Nested DataType subcolumns's offsets when Update
@ -106,7 +106,7 @@ ColumnPtr FunctionValidateNestedArraySizes::executeImpl(
else if (first_length != length)
{
throw Exception(
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
"Elements '{}' and '{}' of Nested data structure (Array columns) "
"have different array sizes ({} and {} respectively) on row {}",
arguments[1].name, arguments[args_idx].name, first_length, length, i);

View File

@ -224,9 +224,9 @@ namespace detail
enum class InitializeError
{
RETRIABLE_ERROR,
RETRYABLE_ERROR,
/// If error is not retriable, `exception` variable must be set.
NON_RETRIABLE_ERROR,
NON_RETRYABLE_ERROR,
/// Allows to skip not found urls for globs
SKIP_NOT_FOUND_URL,
NONE,
@ -398,7 +398,7 @@ namespace detail
}
else if (!isRetriableError(http_status))
{
initialization_error = InitializeError::NON_RETRIABLE_ERROR;
initialization_error = InitializeError::NON_RETRYABLE_ERROR;
exception = std::current_exception();
}
else
@ -409,7 +409,7 @@ namespace detail
}
/**
* Throws if error is retriable, otherwise sets initialization_error = NON_RETRIABLE_ERROR and
* Throws if error is retryable, otherwise sets initialization_error = NON_RETRYABLE_ERROR and
* saves exception into `exception` variable. In case url is not found and skip_not_found_url == true,
* sets initialization_error = SKIP_NOT_FOUND_URL, otherwise throws.
*/
@ -453,9 +453,9 @@ namespace detail
/// Retry 200OK
if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_OK)
initialization_error = InitializeError::RETRIABLE_ERROR;
initialization_error = InitializeError::RETRYABLE_ERROR;
else
initialization_error = InitializeError::NON_RETRIABLE_ERROR;
initialization_error = InitializeError::NON_RETRYABLE_ERROR;
return;
}
@ -544,7 +544,7 @@ namespace detail
{
initialize();
if (initialization_error == InitializeError::NON_RETRIABLE_ERROR)
if (initialization_error == InitializeError::NON_RETRYABLE_ERROR)
{
assert(exception);
break;
@ -553,7 +553,7 @@ namespace detail
{
return false;
}
else if (initialization_error == InitializeError::RETRIABLE_ERROR)
else if (initialization_error == InitializeError::RETRYABLE_ERROR)
{
LOG_ERROR(
log,
@ -582,10 +582,13 @@ namespace detail
}
catch (const Poco::Exception & e)
{
/**
* Retry request unconditionally if nothing has been read yet.
* Otherwise if it is GET method retry with range header.
*/
/// Too many open files - non-retryable.
if (e.code() == POCO_EMFILE)
throw;
/** Retry request unconditionally if nothing has been read yet.
* Otherwise if it is GET method retry with range header.
*/
bool can_retry_request = !offset_from_begin_pos || method == Poco::Net::HTTPRequest::HTTP_GET;
if (!can_retry_request)
throw;

View File

@ -1946,6 +1946,9 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
}
auto conjunction = getConjunctionNodes(predicate, allowed_nodes);
if (conjunction.rejected.size() == 1 && WhichDataType{conjunction.rejected.front()->result_type}.isFloat())
return nullptr;
auto actions = cloneActionsForConjunction(conjunction.allowed, all_inputs);
if (!actions)
return nullptr;
@ -2011,10 +2014,12 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
node.children.swap(new_children);
*predicate = std::move(node);
}
else
else if (!WhichDataType{new_children.front()->result_type}.isFloat())
{
/// If type is different, cast column.
/// This case is possible, cause AND can use any numeric type as argument.
/// But casting floats to UInt8 or Bool produces different results.
/// so we can't apply this optimization to them.
Node node;
node.type = ActionType::COLUMN;
node.result_name = predicate->result_type->getName();
@ -2036,8 +2041,20 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
else
{
/// Predicate is function AND, which still have more then one argument.
/// Or there is only one argument that is a float and we can't just
/// remove the AND.
/// Just update children and rebuild it.
predicate->children.swap(new_children);
if (WhichDataType{predicate->children.front()->result_type}.isFloat())
{
Node node;
node.type = ActionType::COLUMN;
node.result_name = "1";
node.column = DataTypeUInt8().createColumnConst(0, 1u);
node.result_type = std::make_shared<DataTypeUInt8>();
const auto * const_col = &nodes.emplace_back(std::move(node));
predicate->children.emplace_back(const_col);
}
auto arguments = prepareFunctionArguments(predicate->children);
FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());

View File

@ -14,7 +14,7 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int TYPE_MISMATCH;
}
@ -186,7 +186,7 @@ void ArrayJoinAction::execute(Block & block)
const ColumnArray & array = typeid_cast<const ColumnArray &>(*array_ptr);
if (!is_unaligned && !array.hasEqualOffsets(*any_array))
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Sizes of ARRAY-JOIN-ed arrays do not match");
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Sizes of ARRAY-JOIN-ed arrays do not match");
current.column = typeid_cast<const ColumnArray &>(*array_ptr).getDataPtr();
current.type = type->getNestedType();

View File

@ -147,6 +147,7 @@ void DatabaseCatalog::initializeAndLoadTemporaryDatabase()
unused_dir_hide_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_hide_timeout_sec", unused_dir_hide_timeout_sec);
unused_dir_rm_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_rm_timeout_sec", unused_dir_rm_timeout_sec);
unused_dir_cleanup_period_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_cleanup_period_sec", unused_dir_cleanup_period_sec);
drop_error_cooldown_sec = getContext()->getConfigRef().getInt64("database_catalog_drop_error_cooldown_sec", drop_error_cooldown_sec);
auto db_for_temporary_and_external_tables = std::make_shared<DatabaseMemory>(TEMPORARY_DATABASE, getContext());
attachDatabase(TEMPORARY_DATABASE, db_for_temporary_and_external_tables);

View File

@ -279,7 +279,6 @@ private:
bool maybeRemoveDirectory(const String & disk_name, const DiskPtr & disk, const String & unused_dir);
static constexpr size_t reschedule_time_ms = 100;
static constexpr time_t drop_error_cooldown_sec = 5;
mutable std::mutex databases_mutex;
@ -326,6 +325,9 @@ private:
time_t unused_dir_rm_timeout_sec = default_unused_dir_rm_timeout_sec;
static constexpr time_t default_unused_dir_cleanup_period_sec = 24 * 60 * 60; /// 1 day
time_t unused_dir_cleanup_period_sec = default_unused_dir_cleanup_period_sec;
static constexpr time_t default_drop_error_cooldown_sec = 5;
time_t drop_error_cooldown_sec = default_drop_error_cooldown_sec;
};
/// This class is useful when creating a table or database.

View File

@ -118,6 +118,15 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
alias_node->checkSize(data.settings.max_expanded_ast_elements);
ast = alias_node->clone();
ast->setAlias(node_alias);
/// If the cloned AST was finished, this one should also be considered finished
if (data.finished_asts.contains(alias_node))
data.finished_asts[ast] = ast;
/// If we had an alias for node_alias, point it instead to the new node so we don't have to revisit it
/// on subsequent calls
if (auto existing_alias = data.aliases.find(node_alias); existing_alias != data.aliases.end())
existing_alias->second = ast;
}
}
else
@ -127,6 +136,15 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
auto alias_name = ast->getAliasOrColumnName();
ast = alias_node->clone();
ast->setAlias(alias_name);
/// If the cloned AST was finished, this one should also be considered finished
if (data.finished_asts.contains(alias_node))
data.finished_asts[ast] = ast;
/// If we had an alias for node_alias, point it instead to the new node so we don't have to revisit it
/// on subsequent calls
if (auto existing_alias = data.aliases.find(node_alias); existing_alias != data.aliases.end())
existing_alias->second = ast;
}
}
}

View File

@ -362,11 +362,14 @@ void ServerAsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_tim
const auto time_after_previous_update = current_time - heavy_metric_previous_update_time;
const bool update_heavy_metric = time_after_previous_update >= heavy_metric_update_period || first_run;
Stopwatch watch;
if (update_heavy_metric)
{
heavy_metric_previous_update_time = update_time;
Stopwatch watch;
if (first_run)
heavy_update_interval = heavy_metric_update_period.count();
else
heavy_update_interval = std::chrono::duration_cast<std::chrono::microseconds>(time_after_previous_update).count() / 1e6;
/// Test shows that listing 100000 entries consuming around 0.15 sec.
updateDetachedPartsStats();
@ -390,7 +393,9 @@ void ServerAsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_tim
watch.elapsedSeconds());
}
new_values["AsynchronousHeavyMetricsCalculationTimeSpent"] = { watch.elapsedSeconds(), "Time in seconds spent for calculation of asynchronous heavy (tables related) metrics (this is the overhead of asynchronous metrics)." };
new_values["AsynchronousHeavyMetricsUpdateInterval"] = { heavy_update_interval, "Heavy (tables related) metrics update interval" };
new_values["NumberOfDetachedParts"] = { detached_parts_stats.count, "The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed." };
new_values["NumberOfDetachedByUserParts"] = { detached_parts_stats.detached_by_user, "The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed." };

View File

@ -21,6 +21,7 @@ private:
const Duration heavy_metric_update_period;
TimePoint heavy_metric_previous_update_time;
double heavy_update_interval = 0.;
struct DetachedPartsStats
{

View File

@ -1276,6 +1276,12 @@ void executeQuery(
std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete, &istr);
auto & pipeline = streams.pipeline;
QueryResultDetails result_details
{
.query_id = context->getClientInfo().current_query_id,
.timezone = DateLUT::instance().getTimeZone(),
};
std::unique_ptr<WriteBuffer> compressed_buffer;
try
{
@ -1334,9 +1340,8 @@ void executeQuery(
out->onProgress(progress);
});
if (set_result_details)
set_result_details(
context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone());
result_details.content_type = out->getContentType();
result_details.format = format_name;
pipeline.complete(std::move(out));
}
@ -1345,6 +1350,9 @@ void executeQuery(
pipeline.setProgressCallback(context->getProgressCallback());
}
if (set_result_details)
set_result_details(result_details);
if (pipeline.initialized())
{
CompletedPipelineExecutor executor(pipeline);

View File

@ -11,7 +11,15 @@ namespace DB
class ReadBuffer;
class WriteBuffer;
using SetResultDetailsFunc = std::function<void(const String &, const String &, const String &, const String &)>;
struct QueryResultDetails
{
String query_id;
std::optional<String> content_type;
std::optional<String> format;
std::optional<String> timezone;
};
using SetResultDetailsFunc = std::function<void(const QueryResultDetails &)>;
/// Parse and execute a query.
void executeQuery(

View File

@ -113,10 +113,8 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
if (group_by_with_cube)
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH CUBE" << (s.hilite ? hilite_none : "");
if (group_by_with_grouping_sets)
if (group_by_with_grouping_sets && groupBy())
{
if (!groupBy()) /// sanity check, issue 43049
throw Exception(ErrorCodes::LOGICAL_ERROR, "Corrupt AST");
auto nested_frame = frame;
nested_frame.surround_each_list_element_with_parens = true;
nested_frame.expression_list_prepend_whitespace = false;

View File

@ -269,6 +269,9 @@ void FillingTransform::transform(Chunk & chunk)
if (on_totals)
return;
if (!chunk.hasRows() && !generate_suffix)
return;
Columns old_fill_columns;
Columns old_interpolate_columns;
Columns old_other_columns;

View File

@ -831,12 +831,20 @@ void HTTPHandler::processQuery(
customizeContext(request, context);
executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context,
[&response, this] (const String & current_query_id, const String & content_type, const String & format, const String & timezone)
[&response, this] (const QueryResultDetails & details)
{
response.setContentType(content_type_override.value_or(content_type));
response.add("X-ClickHouse-Query-Id", current_query_id);
response.add("X-ClickHouse-Format", format);
response.add("X-ClickHouse-Timezone", timezone);
response.add("X-ClickHouse-Query-Id", details.query_id);
if (content_type_override)
response.setContentType(*content_type_override);
else if (details.content_type)
response.setContentType(*details.content_type);
if (details.format)
response.add("X-ClickHouse-Format", *details.format);
if (details.timezone)
response.add("X-ClickHouse-Timezone", *details.timezone);
}
);

View File

@ -352,11 +352,15 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
format_settings.mysql_wire.max_packet_size = max_packet_size;
format_settings.mysql_wire.sequence_id = &sequence_id;
auto set_result_details = [&with_output](const String &, const String &, const String &format, const String &)
auto set_result_details = [&with_output](const QueryResultDetails & details)
{
if (format != "MySQLWire")
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "MySQL protocol does not support custom output formats");
with_output = true;
if (details.format)
{
if (*details.format != "MySQLWire")
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "MySQL protocol does not support custom output formats");
with_output = true;
}
};
executeQuery(should_replace ? replacement : payload, *out, false, query_context, set_result_details, format_settings);

View File

@ -611,6 +611,8 @@ void TCPHandler::runImpl()
/// It is important to destroy query context here. We do not want it to live arbitrarily longer than the query.
query_context.reset();
CurrentThread::setFatalErrorCallback({});
if (is_interserver_mode)
{
/// We don't really have session in interserver mode, new one is created for each query. It's better to reset it now.

View File

@ -175,22 +175,69 @@ namespace
const auto CLEANUP_TIMEOUT_MS = 3000;
const auto MAX_THREAD_WORK_DURATION_MS = 60000; // once per minute leave do reschedule (we can't lock threads in pool forever)
/// Configuration prefix
const String CONFIG_PREFIX = "kafka";
const String CONFIG_KAFKA_TAG = "kafka";
const String CONFIG_KAFKA_TOPIC_TAG = "kafka_topic";
const String CONFIG_NAME_TAG = "name";
void loadFromConfig(cppkafka::Configuration & conf, const Poco::Util::AbstractConfiguration & config, const std::string & path)
/// Read server configuration into cppkafka configuration, used by global configuration and by legacy per-topic configuration
void loadFromConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
{
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(path, keys);
/// Read all tags one level below <kafka>
Poco::Util::AbstractConfiguration::Keys tags;
config.keys(config_prefix, tags);
for (const auto & key : keys)
for (const auto & tag : tags)
{
const String key_path = path + "." + key;
// log_level has valid underscore, rest librdkafka setting use dot.separated.format
// which is not acceptable for XML.
// See also https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
const String key_name = (key == "log_level") ? key : boost::replace_all_copy(key, "_", ".");
conf.set(key_name, config.getString(key_path));
if (tag.starts_with(CONFIG_KAFKA_TOPIC_TAG)) /// multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc.
continue; /// used by new per-topic configuration, ignore
const String setting_path = config_prefix + "." + tag;
const String setting_value = config.getString(setting_path);
/// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML.
/// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
const String setting_name_in_kafka_config = (tag == "log_level") ? tag : boost::replace_all_copy(tag, "_", ".");
kafka_config.set(setting_name_in_kafka_config, setting_value);
}
}
/// Read server configuration into cppkafa configuration, used by new per-topic configuration
void loadTopicConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const String & topic)
{
/// Read all tags one level below <kafka>
Poco::Util::AbstractConfiguration::Keys tags;
config.keys(config_prefix, tags);
for (const auto & tag : tags)
{
/// Only consider tag <kafka_topic>. Multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc.
if (!tag.starts_with(CONFIG_KAFKA_TOPIC_TAG))
continue;
/// Read topic name between <name>...</name>
const String kafka_topic_path = config_prefix + "." + tag;
const String kafpa_topic_name_path = kafka_topic_path + "." + CONFIG_NAME_TAG;
const String topic_name = config.getString(kafpa_topic_name_path);
if (topic_name == topic)
{
/// Found it! Now read the per-topic configuration into cppkafka.
Poco::Util::AbstractConfiguration::Keys inner_tags;
config.keys(kafka_topic_path, inner_tags);
for (const auto & inner_tag : inner_tags)
{
if (inner_tag == CONFIG_NAME_TAG)
continue; // ignore <name>
/// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML.
/// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
const String setting_path = kafka_topic_path + "." + inner_tag;
const String setting_value = config.getString(setting_path);
const String setting_name_in_kafka_config = (inner_tag == "log_level") ? inner_tag : boost::replace_all_copy(inner_tag, "_", ".");
kafka_config.set(setting_name_in_kafka_config, setting_value);
}
}
}
}
}
@ -509,29 +556,33 @@ size_t StorageKafka::getPollTimeoutMillisecond() const
String StorageKafka::getConfigPrefix() const
{
if (!collection_name.empty())
return "named_collections." + collection_name + "." + CONFIG_PREFIX; /// Add one more level to separate librdkafka configuration.
return CONFIG_PREFIX;
return "named_collections." + collection_name + "." + CONFIG_KAFKA_TAG; /// Add one more level to separate librdkafka configuration.
return CONFIG_KAFKA_TAG;
}
void StorageKafka::updateConfiguration(cppkafka::Configuration & conf)
void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config)
{
// Update consumer configuration from the configuration
// Update consumer configuration from the configuration. Example:
// <kafka>
// <retry_backoff_ms>250</retry_backoff_ms>
// <fetch_min_bytes>100000</fetch_min_bytes>
// </kafka>
const auto & config = getContext()->getConfigRef();
auto config_prefix = getConfigPrefix();
if (config.has(config_prefix))
loadFromConfig(conf, config, config_prefix);
loadFromConfig(kafka_config, config, config_prefix);
#if USE_KRB5
if (conf.has_property("sasl.kerberos.kinit.cmd"))
#if USE_KRB5
if (kafka_config.has_property("sasl.kerberos.kinit.cmd"))
LOG_WARNING(log, "sasl.kerberos.kinit.cmd configuration parameter is ignored.");
conf.set("sasl.kerberos.kinit.cmd","");
conf.set("sasl.kerberos.min.time.before.relogin","0");
kafka_config.set("sasl.kerberos.kinit.cmd","");
kafka_config.set("sasl.kerberos.min.time.before.relogin","0");
if (conf.has_property("sasl.kerberos.keytab") && conf.has_property("sasl.kerberos.principal"))
if (kafka_config.has_property("sasl.kerberos.keytab") && kafka_config.has_property("sasl.kerberos.principal"))
{
String keytab = conf.get("sasl.kerberos.keytab");
String principal = conf.get("sasl.kerberos.principal");
String keytab = kafka_config.get("sasl.kerberos.keytab");
String principal = kafka_config.get("sasl.kerberos.principal");
LOG_DEBUG(log, "Running KerberosInit");
try
{
@ -543,21 +594,47 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & conf)
}
LOG_DEBUG(log, "Finished KerberosInit");
}
#else // USE_KRB5
if (conf.has_property("sasl.kerberos.keytab") || conf.has_property("sasl.kerberos.principal"))
LOG_WARNING(log, "Kerberos-related parameters are ignored because ClickHouse was built without support of krb5 library.");
#endif // USE_KRB5
#else // USE_KRB5
if (kafka_config.has_property("sasl.kerberos.keytab") || kafka_config.has_property("sasl.kerberos.principal"))
LOG_WARNING(log, "Ignoring Kerberos-related parameters because ClickHouse was built without krb5 library support.");
#endif // USE_KRB5
// Update consumer topic-specific configuration
// Update consumer topic-specific configuration (legacy syntax, retained for compatibility). Example with topic "football":
// <kafka_football>
// <retry_backoff_ms>250</retry_backoff_ms>
// <fetch_min_bytes>100000</fetch_min_bytes>
// </kafka_football>
// The legacy syntax has the problem that periods in topic names (e.g. "sports.football") are not supported because the Poco
// configuration framework hierarchy is based on periods as level separators. Besides that, per-topic tags at the same level
// as <kafka> are ugly.
for (const auto & topic : topics)
{
const auto topic_config_key = config_prefix + "_" + topic;
if (config.has(topic_config_key))
loadFromConfig(conf, config, topic_config_key);
loadFromConfig(kafka_config, config, topic_config_key);
}
// Update consumer topic-specific configuration (new syntax). Example with topics "football" and "baseball":
// <kafka>
// <kafka_topic>
// <name>football</name>
// <retry_backoff_ms>250</retry_backoff_ms>
// <fetch_min_bytes>5000</fetch_min_bytes>
// </kafka_topic>
// <kafka_topic>
// <name>baseball</name>
// <retry_backoff_ms>300</retry_backoff_ms>
// <fetch_min_bytes>2000</fetch_min_bytes>
// </kafka_topic>
// </kafka>
// Advantages: The period restriction no longer applies (e.g. <name>sports.football</name> will work), everything
// Kafka-related is below <kafka>.
for (const auto & topic : topics)
if (config.has(config_prefix))
loadTopicConfig(kafka_config, config, config_prefix, topic);
// No need to add any prefix, messages can be distinguished
conf.set_log_callback([this](cppkafka::KafkaHandleBase &, int level, const std::string & facility, const std::string & message)
kafka_config.set_log_callback([this](cppkafka::KafkaHandleBase &, int level, const std::string & facility, const std::string & message)
{
auto [poco_level, client_logs_level] = parseSyslogLevel(level);
LOG_IMPL(log, client_logs_level, poco_level, "[rdk:{}] {}", facility, message);
@ -573,13 +650,13 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & conf)
int status;
status = rd_kafka_conf_interceptor_add_on_new(conf.get_handle(),
status = rd_kafka_conf_interceptor_add_on_new(kafka_config.get_handle(),
"init", StorageKafkaInterceptors::rdKafkaOnNew, self);
if (status != RD_KAFKA_RESP_ERR_NO_ERROR)
LOG_ERROR(log, "Cannot set new interceptor due to {} error", status);
// cppkafka always copy the configuration
status = rd_kafka_conf_interceptor_add_on_conf_dup(conf.get_handle(),
status = rd_kafka_conf_interceptor_add_on_conf_dup(kafka_config.get_handle(),
"init", StorageKafkaInterceptors::rdKafkaOnConfDup, self);
if (status != RD_KAFKA_RESP_ERR_NO_ERROR)
LOG_ERROR(log, "Cannot set dup conf interceptor due to {} error", status);

View File

@ -126,7 +126,7 @@ private:
std::atomic<bool> shutdown_called = false;
// Update Kafka configuration with values from CH user configuration.
void updateConfiguration(cppkafka::Configuration & conf);
void updateConfiguration(cppkafka::Configuration & kafka_config);
String getConfigPrefix() const;
void threadFunc(size_t idx);

View File

@ -684,12 +684,6 @@ void DataPartStorageOnDiskBase::clearDirectory(
request.emplace_back(fs::path(dir) / "delete-on-destroy.txt", true);
request.emplace_back(fs::path(dir) / "txn_version.txt", true);
/// Inverted index
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_dict", true);
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_post", true);
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_seg", true);
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_sid", true);
disk->removeSharedFiles(request, !can_remove_shared_data, names_not_to_remove);
disk->removeDirectory(dir);
}

View File

@ -880,7 +880,7 @@ void IMergeTreeDataPart::writeMetadata(const String & filename, const WriteSetti
}
catch (...)
{
tryLogCurrentException("DataPartStorageOnDiskFull");
tryLogCurrentException("IMergeTreeDataPart");
}
throw;

View File

@ -1893,11 +1893,11 @@ void MergeTreeData::stopOutdatedDataPartsLoadingTask()
/// (Only files on the first level of nesting are considered).
static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_path, time_t threshold)
{
if (!disk->isDirectory(directory_path) || disk->getLastModified(directory_path).epochTime() >= threshold)
if (!disk->isDirectory(directory_path) || disk->getLastModified(directory_path).epochTime() > threshold)
return false;
for (auto it = disk->iterateDirectory(directory_path); it->isValid(); it->next())
if (disk->getLastModified(it->path()).epochTime() >= threshold)
if (disk->getLastModified(it->path()).epochTime() > threshold)
return false;
return true;

View File

@ -75,6 +75,10 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r
{
const String & name = it.first;
/// Exclude files written by inverted index from check. No correct checksums are available for them currently.
if (name.ends_with(".gin_dict") || name.ends_with(".gin_post") || name.ends_with(".gin_seg") || name.ends_with(".gin_sid"))
continue;
auto jt = rhs.files.find(name);
if (jt == rhs.files.end())
throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No file {} in data part", name);

View File

@ -208,26 +208,26 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr);
for (const auto & index_helper : skip_indices)
for (const auto & skip_index : skip_indices)
{
String stream_name = index_helper->getFileName();
String stream_name = skip_index->getFileName();
skip_indices_streams.emplace_back(
std::make_unique<MergeTreeDataPartWriterOnDisk::Stream>(
stream_name,
data_part->getDataPartStoragePtr(),
stream_name, index_helper->getSerializedFileExtension(),
stream_name, skip_index->getSerializedFileExtension(),
stream_name, marks_file_extension,
default_codec, settings.max_compress_block_size,
marks_compression_codec, settings.marks_compress_block_size,
settings.query_write_settings));
GinIndexStorePtr store = nullptr;
if (dynamic_cast<const MergeTreeIndexInverted *>(&*index_helper) != nullptr)
if (typeid_cast<const MergeTreeIndexInverted *>(&*skip_index) != nullptr)
{
store = std::make_shared<GinIndexStore>(stream_name, data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(), storage.getSettings()->max_digestion_size_per_segment);
gin_index_stores[stream_name] = store;
}
skip_indices_aggregators.push_back(index_helper->createIndexAggregatorForPart(store));
skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store));
skip_index_accumulated_marks.push_back(0);
}
}
@ -284,7 +284,7 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
WriteBuffer & marks_out = stream.compress_marks ? stream.marks_compressed_hashing : stream.marks_hashing;
GinIndexStorePtr store;
if (dynamic_cast<const MergeTreeIndexInverted *>(&*index_helper) != nullptr)
if (typeid_cast<const MergeTreeIndexInverted *>(&*index_helper) != nullptr)
{
String stream_name = index_helper->getFileName();
auto it = gin_index_stores.find(stream_name);
@ -388,6 +388,18 @@ void MergeTreeDataPartWriterOnDisk::fillSkipIndicesChecksums(MergeTreeData::Data
auto & stream = *skip_indices_streams[i];
if (!skip_indices_aggregators[i]->empty())
skip_indices_aggregators[i]->getGranuleAndReset()->serializeBinary(stream.compressed_hashing);
/// Register additional files written only by the inverted index. Required because otherwise DROP TABLE complains about unknown
/// files. Note that the provided actual checksums are bogus. The problem is that at this point the file writes happened already and
/// we'd need to re-open + hash the files (fixing this is TODO). For now, CHECK TABLE skips these four files.
if (typeid_cast<const MergeTreeIndexInverted *>(&*skip_indices[i]) != nullptr)
{
String filename_without_extension = skip_indices[i]->getFileName();
checksums.files[filename_without_extension + ".gin_dict"] = MergeTreeDataPartChecksums::Checksum();
checksums.files[filename_without_extension + ".gin_post"] = MergeTreeDataPartChecksums::Checksum();
checksums.files[filename_without_extension + ".gin_seg"] = MergeTreeDataPartChecksums::Checksum();
checksums.files[filename_without_extension + ".gin_sid"] = MergeTreeDataPartChecksums::Checksum();
}
}
for (auto & stream : skip_indices_streams)

View File

@ -157,25 +157,29 @@ IMergeTreeDataPart::Checksums checkDataPart(
}
NameSet projections_on_disk;
const auto & checksum_files_txt = checksums_txt.files;
const auto & checksums_txt_files = checksums_txt.files;
for (auto it = data_part_storage.iterate(); it->isValid(); it->next())
{
auto file_name = it->name();
/// We will check projections later.
if (data_part_storage.isDirectory(file_name) && endsWith(file_name, ".proj"))
if (data_part_storage.isDirectory(file_name) && file_name.ends_with(".proj"))
{
projections_on_disk.insert(file_name);
continue;
}
/// Exclude files written by inverted index from check. No correct checksums are available for them currently.
if (file_name.ends_with(".gin_dict") || file_name.ends_with(".gin_post") || file_name.ends_with(".gin_seg") || file_name.ends_with(".gin_sid"))
continue;
auto checksum_it = checksums_data.files.find(file_name);
/// Skip files that we already calculated. Also skip metadata files that are not checksummed.
if (checksum_it == checksums_data.files.end() && !files_without_checksums.contains(file_name))
{
auto txt_checksum_it = checksum_files_txt.find(file_name);
if (txt_checksum_it == checksum_files_txt.end() || txt_checksum_it->second.uncompressed_size == 0)
auto txt_checksum_it = checksums_txt_files.find(file_name);
if (txt_checksum_it == checksums_txt_files.end() || txt_checksum_it->second.uncompressed_size == 0)
{
/// The file is not compressed.
checksum_file(file_name);

View File

@ -44,6 +44,27 @@
#include <algorithm>
namespace
{
using namespace DB;
bool columnIsPhysical(ColumnDefaultKind kind)
{
return kind == ColumnDefaultKind::Default || kind == ColumnDefaultKind::Materialized;
}
bool columnDefaultKindHasSameType(ColumnDefaultKind lhs, ColumnDefaultKind rhs)
{
if (lhs == rhs)
return true;
if (columnIsPhysical(lhs) == columnIsPhysical(rhs))
return true;
return false;
}
}
namespace DB
{
@ -172,11 +193,13 @@ std::optional<NameSet> StorageMerge::supportedPrewhereColumns() const
NameSet supported_columns;
std::unordered_map<std::string, std::pair<const IDataType *, std::optional<ColumnDefault>>> column_type_default;
std::unordered_map<std::string, std::pair<const IDataType *, ColumnDefaultKind>> column_info;
for (const auto & name_type : columns.getAll())
{
column_type_default.emplace(name_type.name, std::make_pair(
name_type.type.get(), columns.getDefault(name_type.name)));
const auto & column_default = columns.getDefault(name_type.name).value_or(ColumnDefault{});
column_info.emplace(name_type.name, std::make_pair(
name_type.type.get(),
column_default.kind));
supported_columns.emplace(name_type.name);
}
@ -191,11 +214,10 @@ std::optional<NameSet> StorageMerge::supportedPrewhereColumns() const
const auto & table_columns = table_metadata_ptr->getColumns();
for (const auto & column : table_columns.getAll())
{
const auto & root_type_default = column_type_default[column.name];
const IDataType * root_type = root_type_default.first;
const std::optional<ColumnDefault> & src_default = root_type_default.second;
const auto & column_default = table_columns.getDefault(column.name).value_or(ColumnDefault{});
const auto & [root_type, src_default_kind] = column_info[column.name];
if ((root_type && !root_type->equals(*column.type)) ||
src_default != table_columns.getDefault(column.name))
!columnDefaultKindHasSameType(src_default_kind, column_default.kind))
{
supported_columns.erase(column.name);
}

View File

@ -905,17 +905,16 @@ void StorageReplicatedMergeTree::drop()
/// in this case, has_metadata_in_zookeeper = false, and we also permit to drop the table.
bool maybe_has_metadata_in_zookeeper = !has_metadata_in_zookeeper.has_value() || *has_metadata_in_zookeeper;
zkutil::ZooKeeperPtr zookeeper;
if (maybe_has_metadata_in_zookeeper)
{
/// Table can be shut down, restarting thread is not active
/// and calling StorageReplicatedMergeTree::getZooKeeper()/getAuxiliaryZooKeeper() won't suffice.
zkutil::ZooKeeperPtr zookeeper = getZooKeeperIfTableShutDown();
zookeeper = getZooKeeperIfTableShutDown();
/// If probably there is metadata in ZooKeeper, we don't allow to drop the table.
if (!zookeeper)
throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Can't drop readonly replicated table (need to drop data in ZooKeeper as well)");
dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings());
}
/// Wait for loading of all outdated parts because
@ -929,10 +928,17 @@ void StorageReplicatedMergeTree::drop()
}
dropAllData();
if (maybe_has_metadata_in_zookeeper)
{
/// Session could expire, get it again
zookeeper = getZooKeeperIfTableShutDown();
dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings(), &has_metadata_in_zookeeper);
}
}
void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica,
Poco::Logger * logger, MergeTreeSettingsPtr table_settings)
Poco::Logger * logger, MergeTreeSettingsPtr table_settings, std::optional<bool> * has_metadata_out)
{
if (zookeeper->expired())
throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Table was not dropped because ZooKeeper session has expired.");
@ -990,12 +996,16 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con
Coordination::errorMessage(code), remote_replica_path);
/// And finally remove everything else recursively
zookeeper->tryRemoveRecursive(remote_replica_path);
}
/// It may left some garbage if replica_path subtree is concurrently modified
zookeeper->tryRemoveChildrenRecursive(remote_replica_path);
/// It may left some garbage if replica_path subtree are concurrently modified
if (zookeeper->exists(remote_replica_path))
LOG_ERROR(logger, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", remote_replica_path);
/// Update has_metadata_in_zookeeper to avoid retries. Otherwise we can accidentally remove metadata of a new table on retries
if (has_metadata_out)
*has_metadata_out = false;
if (zookeeper->tryRemove(remote_replica_path) != Coordination::Error::ZOK)
LOG_ERROR(logger, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", remote_replica_path);
}
/// Check that `zookeeper_path` exists: it could have been deleted by another replica after execution of previous line.
Strings replicas;
@ -8183,6 +8193,12 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co
auto shared_id = getTableSharedID();
if (shared_id == toString(UUIDHelpers::Nil))
{
if (zookeeper->exists(zookeeper_path))
{
LOG_WARNING(log, "Not removing shared data for part {} because replica does not have metadata in ZooKeeper, "
"but table path exist and other replicas may exist. It may leave some garbage on S3", part.name);
return std::make_pair(false, NameSet{});
}
LOG_TRACE(log, "Part {} blobs can be removed, because table {} completely dropped", part.name, getStorageID().getNameForLogs());
return std::make_pair(true, NameSet{});
}
@ -8208,9 +8224,18 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co
return std::make_pair(true, NameSet{});
}
/// If table was completely dropped (no meta in zookeeper) we can safely remove parts
if (has_metadata_in_zookeeper.has_value() && !has_metadata_in_zookeeper)
{
if (zookeeper->exists(zookeeper_path))
{
LOG_WARNING(log, "Not removing shared data for part {} because replica does not have metadata in ZooKeeper, "
"but table path exist and other replicas may exist. It may leave some garbage on S3", part.name);
return std::make_pair(false, NameSet{});
}
/// If table was completely dropped (no meta in zookeeper) we can safely remove parts
return std::make_pair(true, NameSet{});
}
/// We remove parts during table shutdown. If exception happen, restarting thread will be already turned
/// off and nobody will reconnect our zookeeper connection. In this case we use zookeeper connection from

View File

@ -229,7 +229,7 @@ public:
/** Remove a specific replica from zookeeper.
*/
static void dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica,
Poco::Logger * logger, MergeTreeSettingsPtr table_settings = nullptr);
Poco::Logger * logger, MergeTreeSettingsPtr table_settings = nullptr, std::optional<bool> * has_metadata_out = nullptr);
/// Removes table from ZooKeeper after the last replica was dropped
static bool removeTableNodesFromZooKeeper(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path,

View File

@ -9,7 +9,7 @@ from github import Github
from build_download_helper import get_build_name_for_check, read_build_urls
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from commit_status_helper import format_description, post_commit_status
from docker_pull_helper import get_image_with_version
from env_helper import (
GITHUB_REPOSITORY,
@ -145,11 +145,13 @@ if __name__ == "__main__":
with open(
os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8"
) as desc_f:
description = desc_f.readline().rstrip("\n")[:140]
description = desc_f.readline().rstrip("\n")
except:
status = "failure"
description = "Task failed: $?=" + str(retcode)
description = format_description(description)
test_result = TestResult(description, "OK")
if "fail" in status:
test_result.status = "FAIL"

View File

@ -9,6 +9,10 @@ import time
from shutil import rmtree
from typing import List, Tuple
from ccache_utils import get_ccache_if_not_exists, upload_ccache
from ci_config import CI_CONFIG, BuildConfig
from commit_status_helper import get_commit_filtered_statuses, get_commit
from docker_pull_helper import get_image_with_version
from env_helper import (
CACHES_PATH,
GITHUB_JOB,
@ -18,18 +22,17 @@ from env_helper import (
S3_DOWNLOAD,
TEMP_PATH,
)
from s3_helper import S3Helper
from get_robot_token import get_best_robot_token
from github_helper import GitHub
from pr_info import PRInfo
from s3_helper import S3Helper
from tee_popen import TeePopen
from version_helper import (
ClickHouseVersion,
Git,
get_version_from_repo,
update_version_local,
)
from ccache_utils import get_ccache_if_not_exists, upload_ccache
from ci_config import CI_CONFIG, BuildConfig
from docker_pull_helper import get_image_with_version
from tee_popen import TeePopen
IMAGE_NAME = "clickhouse/binary-builder"
BUILD_LOG_NAME = "build_log.log"
@ -122,8 +125,7 @@ def check_for_success_run(
logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix, "")
logging.info("Checking for artifacts in %s", logged_prefix)
try:
# TODO: theoretically, it would miss performance artifact for pr==0,
# but luckily we rerun only really failed tasks now, so we're safe
# Performance artifacts are now part of regular build, so we're safe
build_results = s3_helper.list_prefix(s3_prefix)
except Exception as ex:
logging.info("Got exception while listing %s: %s\nRerun", logged_prefix, ex)
@ -231,6 +233,29 @@ def upload_master_static_binaries(
print(f"::notice ::Binary static URL: {url}")
def mark_failed_reports_pending(build_name: str, sha: str) -> None:
try:
gh = GitHub(get_best_robot_token())
commit = get_commit(gh, sha)
statuses = get_commit_filtered_statuses(commit)
report_status = [
name
for name, builds in CI_CONFIG["builds_report_config"].items()
if build_name in builds
][0]
for status in statuses:
if status.context == report_status and status.state in ["failure", "error"]:
logging.info(
"Commit already have failed status for '%s', setting it to 'pending'",
report_status,
)
commit.create_status(
"pending", status.url, "Set to pending on rerun", report_status
)
except: # we do not care about any exception here
logging.info("Failed to get or mark the reports status as pending, continue")
def main():
logging.basicConfig(level=logging.INFO)
@ -260,6 +285,9 @@ def main():
# put them as github actions artifact (result)
check_for_success_run(s3_helper, s3_path_prefix, build_name, build_config)
# If it's a latter running, we need to mark possible failed status
mark_failed_reports_pending(build_name, pr_info.sha)
docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME)
image_version = docker_image.version

View File

@ -508,7 +508,7 @@ def main():
logging.getLogger("git_helper").setLevel(logging.DEBUG)
token = args.token or get_best_robot_token()
gh = GitHub(token, create_cache_dir=False, per_page=100)
gh = GitHub(token, create_cache_dir=False)
bp = Backport(gh, args.repo, args.dry_run)
# https://github.com/python/mypy/issues/3004
bp.gh.cache_path = f"{TEMP_PATH}/gh_cache" # type: ignore

View File

@ -3,18 +3,20 @@
import csv
import os
import time
from typing import List
from typing import List, Literal
import logging
from ci_config import CI_CONFIG, REQUIRED_CHECKS
from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL
from github import Github
from github.Commit import Commit
from github.CommitStatus import CommitStatus
from ci_config import CI_CONFIG, REQUIRED_CHECKS
from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL
from pr_info import PRInfo, SKIP_MERGEABLE_CHECK_LABEL
RETRY = 5
CommitStatuses = List[CommitStatus]
MERGEABLE_NAME = "Mergeable Check"
def override_status(status: str, check_name: str, invert: bool = False) -> str:
@ -102,59 +104,69 @@ def post_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]) -> None:
pull_request.add_to_labels(label)
def fail_mergeable_check(commit: Commit, description: str) -> None:
commit.create_status(
context="Mergeable Check",
description=description,
state="failure",
target_url=GITHUB_RUN_URL,
)
def format_description(description: str) -> str:
if len(description) > 140:
description = description[:137] + "..."
return description
def reset_mergeable_check(commit: Commit, description: str = "") -> None:
def set_mergeable_check(
commit: Commit,
description: str = "",
state: Literal["success", "failure"] = "success",
) -> None:
commit.create_status(
context="Mergeable Check",
context=MERGEABLE_NAME,
description=description,
state="success",
state=state,
target_url=GITHUB_RUN_URL,
)
def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None:
if SKIP_MERGEABLE_CHECK_LABEL in pr_info.labels:
not_run = (
pr_info.labels.intersection({SKIP_MERGEABLE_CHECK_LABEL, "release"})
or check_name not in REQUIRED_CHECKS
or pr_info.release_pr
or pr_info.number == 0
)
if not_run:
# Let's avoid unnecessary work
return
logging.info("Update Mergeable Check by %s", check_name)
commit = get_commit(gh, pr_info.sha)
checks = {
check.context: check.state
for check in filter(
lambda check: (check.context in REQUIRED_CHECKS),
# get_statuses() returns generator, which cannot be reversed - we need comprehension
# pylint: disable=unnecessary-comprehension
reversed([status for status in commit.get_statuses()]),
)
}
statuses = get_commit_filtered_statuses(commit)
required_checks = [
status for status in statuses if status.context in REQUIRED_CHECKS
]
mergeable_status = None
for status in statuses:
if status.context == MERGEABLE_NAME:
mergeable_status = status
break
success = []
fail = []
for name, state in checks.items():
if state == "success":
success.append(name)
for status in required_checks:
if status.state == "success":
success.append(status.context)
else:
fail.append(name)
fail.append(status.context)
if fail:
description = "failed: " + ", ".join(fail)
if success:
description += "; succeeded: " + ", ".join(success)
if len(description) > 140:
description = description[:137] + "..."
fail_mergeable_check(commit, description)
description = format_description(description)
if mergeable_status is None or mergeable_status.description != description:
set_mergeable_check(commit, description, "failure")
return
description = ", ".join(success)
if len(description) > 140:
description = description[:137] + "..."
reset_mergeable_check(commit, description)
description = format_description(description)
if mergeable_status is None or mergeable_status.description != description:
set_mergeable_check(commit, description)

View File

@ -14,7 +14,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union
from github import Github
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from commit_status_helper import format_description, post_commit_status
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from pr_info import PRInfo
@ -456,8 +456,7 @@ def main():
else:
description = "Nothing to update"
if len(description) >= 140:
description = description[:136] + "..."
description = format_description(description)
with open(changed_json, "w", encoding="utf-8") as images_file:
json.dump(result_images, images_file)

View File

@ -10,7 +10,7 @@ from typing import List, Dict, Tuple
from github import Github
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from commit_status_helper import format_description, post_commit_status
from env_helper import RUNNER_TEMP
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from pr_info import PRInfo
@ -218,8 +218,7 @@ def main():
else:
description = "Nothing to update"
if len(description) >= 140:
description = description[:136] + "..."
description = format_description(description)
gh = Github(get_best_robot_token(), per_page=100)
post_commit_status(gh, pr_info.sha, NAME, description, status, url)

View File

@ -15,7 +15,7 @@ from github import Github
from build_check import get_release_or_pr
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from commit_status_helper import format_description, post_commit_status
from docker_images_check import DockerImage
from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET, S3_DOWNLOAD
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
@ -369,8 +369,7 @@ def main():
description = f"Processed tags: {', '.join(tags)}"
if len(description) >= 140:
description = description[:136] + "..."
description = format_description(description)
gh = Github(get_best_robot_token(), per_page=100)
post_commit_status(gh, pr_info.sha, NAME, description, status, url)

View File

@ -35,6 +35,8 @@ class GitHub(github.Github):
self._cache_path = Path(CACHE_PATH)
if create_cache_dir:
self.cache_path = self.cache_path
if not kwargs.get("per_page"):
kwargs["per_page"] = 100
# And set Path
super().__init__(*args, **kwargs)
self._retries = 0

View File

@ -18,7 +18,11 @@ from clickhouse_helper import (
mark_flaky_tests,
prepare_tests_results_for_clickhouse,
)
from commit_status_helper import post_commit_status, update_mergeable_check
from commit_status_helper import (
format_description,
post_commit_status,
update_mergeable_check,
)
from compress_files import compress_fast
from docker_pull_helper import get_image_with_version, DockerImage
from env_helper import CI, TEMP_PATH as TEMP, REPORTS_PATH
@ -341,8 +345,7 @@ def main():
ch_helper = ClickHouseHelper()
mark_flaky_tests(ch_helper, args.check_name, test_results)
if len(description) >= 140:
description = description[:136] + "..."
description = format_description(description)
post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url)

View File

@ -43,7 +43,7 @@ def main():
description = "the release can be created from the commit"
args.token = args.token or get_best_robot_token()
gh = GitHub(args.token, create_cache_dir=False, per_page=100)
gh = GitHub(args.token, create_cache_dir=False)
# Get the rate limits for a quick fail
gh.get_rate_limit()
commit = get_commit(gh, args.commit)

View File

@ -217,7 +217,7 @@ def main():
args = parse_args()
logging.info("Going to process PR #%s in repo %s", args.pr, args.repo)
token = args.token or get_best_robot_token()
gh = GitHub(token, per_page=100)
gh = GitHub(token)
repo = gh.get_repo(args.repo)
# An ugly and not nice fix to patch the wrong organization URL,
# see https://github.com/PyGithub/PyGithub/issues/2395#issuecomment-1378629710

View File

@ -7,10 +7,11 @@ from typing import Tuple
from github import Github
from commit_status_helper import (
format_description,
get_commit,
post_labels,
remove_labels,
reset_mergeable_check,
set_mergeable_check,
)
from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
from get_robot_token import get_best_robot_token
@ -157,7 +158,7 @@ def check_pr_description(pr_info: PRInfo) -> Tuple[str, str]:
+ second_category
+ "'"
)
return result_status[:140], category
return result_status, category
elif re.match(
r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
@ -199,6 +200,7 @@ if __name__ == "__main__":
pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True)
can_run, description, labels_state = should_run_checks_for_pr(pr_info)
description = format_description(description)
gh = Github(get_best_robot_token(), per_page=100)
commit = get_commit(gh, pr_info.sha)
@ -231,7 +233,7 @@ if __name__ == "__main__":
if pr_labels_to_remove:
remove_labels(gh, pr_info, pr_labels_to_remove)
reset_mergeable_check(commit, "skipped")
set_mergeable_check(commit, "skipped")
if description_error:
print(
@ -249,7 +251,7 @@ if __name__ == "__main__":
)
commit.create_status(
context=NAME,
description=description_error[:139],
description=format_description(description_error),
state="failure",
target_url=url,
)

View File

@ -10,7 +10,7 @@ from github import Github
from build_download_helper import get_build_name_for_check, read_build_urls
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
from commit_status_helper import post_commit_status
from commit_status_helper import format_description, post_commit_status
from docker_pull_helper import get_image_with_version
from env_helper import (
GITHUB_REPOSITORY,
@ -171,11 +171,13 @@ def main():
with open(
os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8"
) as desc_f:
description = desc_f.readline().rstrip("\n")[:140]
description = desc_f.readline().rstrip("\n")
except:
# status = "failure"
description = "Task failed: $?=" + str(retcode)
description = format_description(description)
report_url = upload_results(
s3_helper,
pr_info.number,

View File

@ -148,7 +148,7 @@ def main():
if args.push:
checkout_head(pr_info)
gh = GitHub(get_best_robot_token(), per_page=100, create_cache_dir=False)
gh = GitHub(get_best_robot_token(), create_cache_dir=False)
atexit.register(update_mergeable_check, gh, pr_info, NAME)

View File

@ -3264,7 +3264,7 @@ class ClickHouseInstance:
sleep_time=0.5,
check_callback=lambda x: True,
):
logging.debug(f"Executing query {sql} on {self.name}")
# logging.debug(f"Executing query {sql} on {self.name}")
result = None
for i in range(retry_count):
try:
@ -3283,7 +3283,7 @@ class ClickHouseInstance:
return result
time.sleep(sleep_time)
except Exception as ex:
logging.debug("Retry {} got exception {}".format(i + 1, ex))
# logging.debug("Retry {} got exception {}".format(i + 1, ex))
time.sleep(sleep_time)
if result is not None:

View File

@ -648,24 +648,15 @@ def test_async_backups_to_same_destination(interface):
"",
)
ids_succeeded = (
instance.query(
f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_CREATED'"
)
.rstrip("\n")
.split("\n")
)
ids_succeeded = instance.query(
f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_CREATED'"
).splitlines()
ids_failed = (
instance.query(
f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_FAILED'"
)
.rstrip("\n")
.split("\n")
)
ids_failed = instance.query(
f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_FAILED'"
).splitlines()
assert len(ids_succeeded) == 1
assert len(ids_failed) <= 1
assert set(ids_succeeded + ids_failed) == set(ids)
# Check that the first backup is all right.

View File

@ -82,8 +82,12 @@ def drop_after_test():
try:
yield
finally:
node0.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY")
node0.query("DROP DATABASE IF EXISTS mydb ON CLUSTER 'cluster' NO DELAY")
node0.query(
"DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY",
settings={
"distributed_ddl_task_timeout": 360,
},
)
backup_id_counter = 0
@ -138,7 +142,12 @@ def test_concurrent_backups_on_same_node():
# This restore part is added to confirm creating an internal backup & restore work
# even when a concurrent backup is stopped
nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
nodes[0].query(
f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY",
settings={
"distributed_ddl_task_timeout": 360,
},
)
nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
nodes[0].query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl")
@ -158,9 +167,14 @@ def test_concurrent_backups_on_different_nodes():
f"SELECT status FROM system.backups WHERE status == 'CREATING_BACKUP' AND id = '{id}'",
"CREATING_BACKUP",
)
assert "Concurrent backups not supported" in nodes[2].query_and_get_error(
assert "Concurrent backups not supported" in nodes[0].query_and_get_error(
f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}"
)
assert_eq_with_retry(
nodes[1],
f"SELECT status FROM system.backups WHERE status == 'BACKUP_CREATED' AND id = '{id}'",
"BACKUP_CREATED",
)
def test_concurrent_restores_on_same_node():
@ -185,11 +199,20 @@ def test_concurrent_restores_on_same_node():
"BACKUP_CREATED",
)
nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC")
nodes[0].query(
f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY",
settings={
"distributed_ddl_task_timeout": 360,
},
)
restore_id = (
nodes[0]
.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC")
.split("\t")[0]
)
assert_eq_with_retry(
nodes[0],
f"SELECT status FROM system.backups WHERE status == 'RESTORING'",
f"SELECT status FROM system.backups WHERE status == 'RESTORING' AND id == '{restore_id}'",
"RESTORING",
)
assert "Concurrent restores not supported" in nodes[0].query_and_get_error(
@ -219,8 +242,17 @@ def test_concurrent_restores_on_different_node():
"BACKUP_CREATED",
)
nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC")
nodes[0].query(
f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY",
settings={
"distributed_ddl_task_timeout": 360,
},
)
restore_id = (
nodes[0]
.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC")
.split("\t")[0]
)
assert_eq_with_retry(
nodes[0],
f"SELECT status FROM system.backups WHERE status == 'RESTORING'",
@ -229,3 +261,9 @@ def test_concurrent_restores_on_different_node():
assert "Concurrent restores not supported" in nodes[1].query_and_get_error(
f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}"
)
assert_eq_with_retry(
nodes[0],
f"SELECT status FROM system.backups WHERE status == 'RESTORED' AND id == '{restore_id}'",
"RESTORED",
)

Some files were not shown because too many files have changed in this diff Show More