mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into KRB_CVE_Fix
This commit is contained in:
commit
6b1bb3f7ad
@ -14,7 +14,7 @@ curl https://clickhouse.com/ | sh
|
||||
* [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
|
||||
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
|
||||
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
|
||||
* [Slack](https://clickhousedb.slack.com/) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
|
||||
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
|
||||
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
|
||||
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
|
||||
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
|
||||
|
@ -44,7 +44,7 @@ int NumberParser::parse(const std::string& s, char thSep)
|
||||
if (tryParse(s, result, thSep))
|
||||
return result;
|
||||
else
|
||||
throw SyntaxException("Not a valid integer", s);
|
||||
throw SyntaxException("Not a valid integer", "'" + s + "'");
|
||||
}
|
||||
|
||||
|
||||
@ -60,7 +60,7 @@ unsigned NumberParser::parseUnsigned(const std::string& s, char thSep)
|
||||
if (tryParseUnsigned(s, result, thSep))
|
||||
return result;
|
||||
else
|
||||
throw SyntaxException("Not a valid unsigned integer", s);
|
||||
throw SyntaxException("Not a valid unsigned integer", "'" + s + "'");
|
||||
}
|
||||
|
||||
|
||||
@ -76,7 +76,7 @@ unsigned NumberParser::parseHex(const std::string& s)
|
||||
if (tryParseHex(s, result))
|
||||
return result;
|
||||
else
|
||||
throw SyntaxException("Not a valid hexadecimal integer", s);
|
||||
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
|
||||
}
|
||||
|
||||
|
||||
@ -94,7 +94,7 @@ unsigned NumberParser::parseOct(const std::string& s)
|
||||
if (tryParseOct(s, result))
|
||||
return result;
|
||||
else
|
||||
throw SyntaxException("Not a valid hexadecimal integer", s);
|
||||
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
|
||||
}
|
||||
|
||||
|
||||
@ -112,7 +112,7 @@ Int64 NumberParser::parse64(const std::string& s, char thSep)
|
||||
if (tryParse64(s, result, thSep))
|
||||
return result;
|
||||
else
|
||||
throw SyntaxException("Not a valid integer", s);
|
||||
throw SyntaxException("Not a valid integer", "'" + s + "'");
|
||||
}
|
||||
|
||||
|
||||
@ -128,7 +128,7 @@ UInt64 NumberParser::parseUnsigned64(const std::string& s, char thSep)
|
||||
if (tryParseUnsigned64(s, result, thSep))
|
||||
return result;
|
||||
else
|
||||
throw SyntaxException("Not a valid unsigned integer", s);
|
||||
throw SyntaxException("Not a valid unsigned integer", "'" + s + "'");
|
||||
}
|
||||
|
||||
|
||||
@ -144,7 +144,7 @@ UInt64 NumberParser::parseHex64(const std::string& s)
|
||||
if (tryParseHex64(s, result))
|
||||
return result;
|
||||
else
|
||||
throw SyntaxException("Not a valid hexadecimal integer", s);
|
||||
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
|
||||
}
|
||||
|
||||
|
||||
@ -162,7 +162,7 @@ UInt64 NumberParser::parseOct64(const std::string& s)
|
||||
if (tryParseOct64(s, result))
|
||||
return result;
|
||||
else
|
||||
throw SyntaxException("Not a valid hexadecimal integer", s);
|
||||
throw SyntaxException("Not a valid hexadecimal integer", "'" + s + "'");
|
||||
}
|
||||
|
||||
|
||||
@ -180,7 +180,7 @@ double NumberParser::parseFloat(const std::string& s, char decSep, char thSep)
|
||||
if (tryParseFloat(s, result, decSep, thSep))
|
||||
return result;
|
||||
else
|
||||
throw SyntaxException("Not a valid floating-point number", s);
|
||||
throw SyntaxException("Not a valid floating-point number", "'" + s + "'");
|
||||
}
|
||||
|
||||
|
||||
@ -196,7 +196,7 @@ bool NumberParser::parseBool(const std::string& s)
|
||||
if (tryParseBool(s, result))
|
||||
return result;
|
||||
else
|
||||
throw SyntaxException("Not a valid bool number", s);
|
||||
throw SyntaxException("Not a valid bool number", "'" + s + "'");
|
||||
}
|
||||
|
||||
|
||||
|
@ -60,6 +60,23 @@ elseif (ARCH_AARCH64)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs -Xclang=-target-feature -Xclang=+ldapr -Wno-unused-command-line-argument")
|
||||
endif ()
|
||||
|
||||
# Best-effort check: The build generates and executes intermediate binaries, e.g. protoc and llvm-tablegen. If we build on ARM for ARM
|
||||
# and the build machine is too old, i.e. doesn't satisfy above modern profile, then these intermediate binaries will not run (dump
|
||||
# SIGILL). Even if they could run, the build machine wouldn't be able to run the ClickHouse binary. In that case, suggest to run the
|
||||
# build with the compat profile.
|
||||
if (OS_LINUX AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)" AND NOT NO_ARMV81_OR_HIGHER)
|
||||
# CPU features in /proc/cpuinfo and compiler flags don't align :( ... pick some obvious flags contained in the modern but not in the
|
||||
# legacy profile (full Graviton 3 /proc/cpuinfo is "fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm
|
||||
# jscvt fcma lrcpc dcpop sha3 sm3 sm4 asimddp sha512 sve asimdfhm dit uscat ilrcpc flagm ssbs paca pacg dcpodp svei8mm svebf16 i8mm
|
||||
# bf16 dgh rng")
|
||||
execute_process(
|
||||
COMMAND grep -P "^(?=.*atomic)(?=.*ssbs)" /proc/cpuinfo
|
||||
OUTPUT_VARIABLE FLAGS)
|
||||
if (NOT FLAGS)
|
||||
MESSAGE(FATAL_ERROR "The build machine does not satisfy the minimum CPU requirements, try to run cmake with -DNO_ARMV81_OR_HIGHER=1")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
elseif (ARCH_PPC64LE)
|
||||
# By Default, build for power8 and up, allow building for power9 and up
|
||||
# Note that gcc and clang have support for x86 SSE2 intrinsics when building for PowerPC
|
||||
@ -102,6 +119,22 @@ elseif (ARCH_AMD64)
|
||||
SET(ENABLE_AVX512_FOR_SPEC_OP 0)
|
||||
endif()
|
||||
|
||||
# Same best-effort check for x86 as above for ARM.
|
||||
if (OS_LINUX AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64" AND NOT NO_SSE3_OR_HIGHER)
|
||||
# Test for flags in standard profile but not in NO_SSE3_OR_HIGHER profile.
|
||||
# /proc/cpuid for Intel Xeon 8124: "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse
|
||||
# sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl xtopology nonstop_tsc cpuid aperfmperf
|
||||
# tsc_known_freq pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c
|
||||
# rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single pti fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm mpx
|
||||
# avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves ida arat pku ospke""
|
||||
execute_process(
|
||||
COMMAND grep -P "^(?=.*ssse3)(?=.*sse4_1)(?=.*sse4_2)" /proc/cpuinfo
|
||||
OUTPUT_VARIABLE FLAGS)
|
||||
if (NOT FLAGS)
|
||||
MESSAGE(FATAL_ERROR "The build machine does not satisfy the minimum CPU requirements, try to run cmake with -DNO_SSE3_OR_HIGHER=1")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ClickHouse can be cross-compiled (e.g. on an ARM host for x86) but it is also possible to build ClickHouse on x86 w/o AVX for x86 w/
|
||||
# AVX. We only check that the compiler can emit certain SIMD instructions, we don't care if the host system is able to run the binary.
|
||||
# Therefore, use check_cxx_source_compiles (= does the code compile+link?) instead of check_cxx_source_runs (= does the code
|
||||
|
2
contrib/libunwind
vendored
2
contrib/libunwind
vendored
@ -1 +1 @@
|
||||
Subproject commit 5022f30f3e092a54a7c101c335ce5e08769db366
|
||||
Subproject commit e48aa13f67dc722511b5af33a32ba9b7748176b5
|
@ -94,6 +94,10 @@ if (ARCH_AMD64 OR ARCH_AARCH64)
|
||||
add_compile_definitions(TUKLIB_FAST_UNALIGNED_ACCESS=1)
|
||||
endif ()
|
||||
|
||||
if (ARCH_S390X)
|
||||
add_compile_definitions(WORDS_BIGENDIAN)
|
||||
endif ()
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
|
||||
|
@ -30,25 +30,10 @@
|
||||
# - zstd homepage : http://www.zstd.net/
|
||||
# ################################################################
|
||||
|
||||
# Get library version based on information from input content (use regular exp)
|
||||
function(GetLibraryVersion _content _outputVar1 _outputVar2 _outputVar3)
|
||||
string(REGEX MATCHALL ".*define ZSTD_VERSION_MAJOR+.* ([0-9]+).*define ZSTD_VERSION_MINOR+.* ([0-9]+).*define ZSTD_VERSION_RELEASE+.* ([0-9]+)" VERSION_REGEX "${_content}")
|
||||
SET(${_outputVar1} ${CMAKE_MATCH_1} PARENT_SCOPE)
|
||||
SET(${_outputVar2} ${CMAKE_MATCH_2} PARENT_SCOPE)
|
||||
SET(${_outputVar3} ${CMAKE_MATCH_3} PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
# Define library directory, where sources and header files are located
|
||||
SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/zstd/lib")
|
||||
INCLUDE_DIRECTORIES(BEFORE ${LIBRARY_DIR} "${LIBRARY_DIR}/common")
|
||||
|
||||
# Read file content
|
||||
FILE(READ "${LIBRARY_DIR}/zstd.h" HEADER_CONTENT)
|
||||
|
||||
# Parse version
|
||||
GetLibraryVersion("${HEADER_CONTENT}" LIBVER_MAJOR LIBVER_MINOR LIBVER_RELEASE)
|
||||
MESSAGE(STATUS "ZSTD VERSION ${LIBVER_MAJOR}.${LIBVER_MINOR}.${LIBVER_RELEASE}")
|
||||
|
||||
# cd contrib/zstd/lib
|
||||
# find . -name '*.c' -or -name '*.S' | grep -vP 'deprecated|legacy' | sort | sed 's/^\./ "${LIBRARY_DIR}/"'
|
||||
SET(Sources
|
||||
|
@ -30,13 +30,15 @@ def get_options(i, backward_compatibility_check):
|
||||
|
||||
if i % 2 == 1:
|
||||
join_alg_num = i // 2
|
||||
if join_alg_num % 4 == 0:
|
||||
if join_alg_num % 5 == 0:
|
||||
client_options.append("join_algorithm='parallel_hash'")
|
||||
if join_alg_num % 4 == 1:
|
||||
if join_alg_num % 5 == 1:
|
||||
client_options.append("join_algorithm='partial_merge'")
|
||||
if join_alg_num % 4 == 2:
|
||||
if join_alg_num % 5 == 2:
|
||||
client_options.append("join_algorithm='full_sorting_merge'")
|
||||
if join_alg_num % 4 == 3:
|
||||
if join_alg_num % 5 == 3:
|
||||
client_options.append("join_algorithm='grace_hash'")
|
||||
if join_alg_num % 5 == 4:
|
||||
client_options.append("join_algorithm='auto'")
|
||||
client_options.append('max_rows_in_join=1000')
|
||||
|
||||
@ -211,7 +213,7 @@ def prepare_for_hung_check(drop_databases):
|
||||
# Even if all clickhouse-test processes are finished, there are probably some sh scripts,
|
||||
# which still run some new queries. Let's ignore them.
|
||||
try:
|
||||
query = """clickhouse client -q "SELECT count() FROM system.processes where where elapsed > 300" """
|
||||
query = """clickhouse client -q "SELECT count() FROM system.processes where elapsed > 300" """
|
||||
output = (
|
||||
check_output(query, shell=True, stderr=STDOUT, timeout=30)
|
||||
.decode("utf-8")
|
||||
|
@ -162,22 +162,59 @@ If you want to change the target table by using `ALTER`, we recommend disabling
|
||||
|
||||
## Configuration {#configuration}
|
||||
|
||||
Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (`kafka`) and topic-level (`kafka_*`). The global configuration is applied first, and then the topic-level configuration is applied (if it exists).
|
||||
Similar to GraphiteMergeTree, the Kafka engine supports extended configuration using the ClickHouse config file. There are two configuration keys that you can use: global (below `<kafka>`) and topic-level (below `<kafka><kafka_topic>`). The global configuration is applied first, and then the topic-level configuration is applied (if it exists).
|
||||
|
||||
``` xml
|
||||
<!-- Global configuration options for all tables of Kafka engine type -->
|
||||
<kafka>
|
||||
<!-- Global configuration options for all tables of Kafka engine type -->
|
||||
<debug>cgrp</debug>
|
||||
<auto_offset_reset>smallest</auto_offset_reset>
|
||||
|
||||
<!-- Configuration specific to topics "logs" and "stats" -->
|
||||
|
||||
<kafka_topic>
|
||||
<name>logs</name>
|
||||
<retry_backoff_ms>250</retry_backoff_ms>
|
||||
<fetch_min_bytes>100000</fetch_min_bytes>
|
||||
</kafka_topic>
|
||||
|
||||
<kafka_topic>
|
||||
<name>stats</name>
|
||||
<retry_backoff_ms>400</retry_backoff_ms>
|
||||
<fetch_min_bytes>50000</fetch_min_bytes>
|
||||
</kafka_topic>
|
||||
</kafka>
|
||||
|
||||
```
|
||||
|
||||
<details markdown="1">
|
||||
|
||||
<summary>Example in deprecated syntax</summary>
|
||||
|
||||
``` xml
|
||||
<kafka>
|
||||
<!-- Global configuration options for all tables of Kafka engine type -->
|
||||
<debug>cgrp</debug>
|
||||
<auto_offset_reset>smallest</auto_offset_reset>
|
||||
</kafka>
|
||||
|
||||
<!-- Configuration specific for topic "logs" -->
|
||||
<!-- Configuration specific to topics "logs" and "stats" -->
|
||||
<!-- Does NOT support periods in topic names, e.g. "logs.security"> -->
|
||||
|
||||
<kafka_logs>
|
||||
<retry_backoff_ms>250</retry_backoff_ms>
|
||||
<fetch_min_bytes>100000</fetch_min_bytes>
|
||||
</kafka_logs>
|
||||
|
||||
<kafka_stats>
|
||||
<retry_backoff_ms>400</retry_backoff_ms>
|
||||
<fetch_min_bytes>50000</fetch_min_bytes>
|
||||
</kafka_stats>
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
|
||||
For a list of possible configuration options, see the [librdkafka configuration reference](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md). Use the underscore (`_`) instead of a dot in the ClickHouse configuration. For example, `check.crcs=true` will be `<check_crcs>true</check_crcs>`.
|
||||
|
||||
### Kerberos support {#kafka-kerberos-support}
|
||||
|
@ -23,6 +23,10 @@ The quickest and easiest way to get up and running with ClickHouse is to create
|
||||
|
||||
## Self-Managed Install
|
||||
|
||||
:::tip
|
||||
For production installs of a specific release version see the [installation options](#available-installation-options) down below.
|
||||
:::
|
||||
|
||||
<Tabs>
|
||||
<TabItem value="linux" label="Linux" default>
|
||||
|
||||
|
@ -1177,8 +1177,7 @@ This setting can be used to specify the types of columns that could not be deter
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}'
|
||||
SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)'
|
||||
DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}') SETTINGS schema_inference_hints = 'age LowCardinality(UInt8), status Nullable(String)', allow_suspicious_low_cardinality_types=1
|
||||
```
|
||||
```response
|
||||
┌─name────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
|
@ -507,7 +507,7 @@ Enabled by default.
|
||||
|
||||
Ignore unknown keys in json object for named tuples.
|
||||
|
||||
Disabled by default.
|
||||
Enabled by default.
|
||||
|
||||
## input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple}
|
||||
|
||||
|
@ -3948,3 +3948,71 @@ Default value: `0`.
|
||||
:::note
|
||||
Use this setting only for backward compatibility if your use cases depend on old syntax.
|
||||
:::
|
||||
|
||||
## final {#final}
|
||||
|
||||
Automatically applies [FINAL](../../sql-reference/statements/select/from/#final-modifier) modifier to all tables in a query, to tables where [FINAL](../../sql-reference/statements/select/from/#final-modifier) is applicable, including joined tables and tables in sub-queries, and
|
||||
distributed tables.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 - disabled
|
||||
- 1 - enabled
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test
|
||||
(
|
||||
key Int64,
|
||||
some String
|
||||
)
|
||||
ENGINE = ReplacingMergeTree
|
||||
ORDER BY key;
|
||||
|
||||
INSERT INTO test FORMAT Values (1, 'first');
|
||||
INSERT INTO test FORMAT Values (1, 'second');
|
||||
|
||||
SELECT * FROM test;
|
||||
┌─key─┬─some───┐
|
||||
│ 1 │ second │
|
||||
└─────┴────────┘
|
||||
┌─key─┬─some──┐
|
||||
│ 1 │ first │
|
||||
└─────┴───────┘
|
||||
|
||||
SELECT * FROM test SETTINGS final = 1;
|
||||
┌─key─┬─some───┐
|
||||
│ 1 │ second │
|
||||
└─────┴────────┘
|
||||
|
||||
SET final = 1;
|
||||
SELECT * FROM test;
|
||||
┌─key─┬─some───┐
|
||||
│ 1 │ second │
|
||||
└─────┴────────┘
|
||||
```
|
||||
|
||||
## asterisk_include_materialized_columns {#asterisk_include_materialized_columns}
|
||||
|
||||
Include [MATERIALIZED](../../sql-reference/statements/create/table/#materialized) columns for wildcard query (`SELECT *`).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 - disabled
|
||||
- 1 - enabled
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## asterisk_include_alias_columns {#asterisk_include_alias_columns}
|
||||
|
||||
Include [ALIAS](../../sql-reference/statements/create/table/#alias) columns for wildcard query (`SELECT *`).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 - disabled
|
||||
- 1 - enabled
|
||||
|
||||
Default value: `0`.
|
||||
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
slug: /en/sql-reference/functions/string-replace-functions
|
||||
sidebar_position: 42
|
||||
sidebar_label: For Replacing in Strings
|
||||
sidebar_label: Replacing in Strings
|
||||
---
|
||||
|
||||
# Functions for Searching and Replacing in Strings
|
||||
|
@ -1,7 +1,7 @@
|
||||
---
|
||||
slug: /en/sql-reference/functions/string-search-functions
|
||||
sidebar_position: 41
|
||||
sidebar_label: For Searching in Strings
|
||||
sidebar_label: Searching in Strings
|
||||
---
|
||||
|
||||
# Functions for Searching in Strings
|
||||
@ -382,12 +382,12 @@ Checks whether string `haystack` matches the regular expression `pattern`. The p
|
||||
Returns 1 in case of a match, and 0 otherwise.
|
||||
|
||||
Matching is based on UTF-8, e.g. `.` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. The regular expression must not contain null bytes.
|
||||
If the haystack or pattern contain a sequence of bytes that are not valid UTF-8, the behavior is undefined.
|
||||
No automatic Unicode normalization is performed, if you need it you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
|
||||
If the haystack or the pattern are not valid UTF-8, then the behavior is undefined.
|
||||
No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
|
||||
|
||||
Unlike re2's default behavior, `.` matches line breaks. To disable this, prepend the pattern with `(?-s)`.
|
||||
|
||||
For patterns to search for substrings in a string, it is better to use LIKE or ‘position’, since they work much faster.
|
||||
For patterns to search for substrings in a string, it is better to use functions [like](#like) or [position](#position) since they work much faster.
|
||||
|
||||
## multiMatchAny(haystack, \[pattern<sub>1</sub>, pattern<sub>2</sub>, …, pattern<sub>n</sub>\])
|
||||
|
||||
@ -529,21 +529,25 @@ Result:
|
||||
|
||||
## like(haystack, pattern), haystack LIKE pattern operator
|
||||
|
||||
Checks whether a string matches a simple regular expression.
|
||||
The regular expression can contain the metasymbols `%` and `_`.
|
||||
Checks whether a string matches a LIKE expression.
|
||||
A LIKE expression contains a mix of normal characters and the following metasymbols:
|
||||
|
||||
`%` indicates any quantity of any bytes (including zero characters).
|
||||
- `%` indicates an arbitrary number of arbitrary characters (including zero characters).
|
||||
|
||||
`_` indicates any one byte.
|
||||
- `_` indicates a single arbitrary character.
|
||||
|
||||
Use the backslash (`\`) for escaping metasymbols. See the note on escaping in the description of the ‘match’ function.
|
||||
- `\` is for escaping literals `%`, `_` and `\`.
|
||||
|
||||
Matching is based on UTF-8, e.g. `_` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes.
|
||||
If the haystack or pattern contain a sequence of bytes that are not valid UTF-8, then the behavior is undefined.
|
||||
No automatic Unicode normalization is performed, if you need it you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
|
||||
If the haystack or the pattern are not valid UTF-8, then the behavior is undefined.
|
||||
No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that.
|
||||
|
||||
For regular expressions like `%needle%`, the code is more optimal and works as fast as the `position` function.
|
||||
For other regular expressions, the code is the same as for the ‘match’ function.
|
||||
To match against literals `%`, `_` and `/` (which are LIKE metacharacters), prepend them with a backslash, i.e. `\%`, `\_` and `\\`.
|
||||
The backslash loses its special meaning, i.e. is interpreted literally, if it prepends a character different than `%`, `_` or `\`.
|
||||
Note that ClickHouse requires backslashes in strings [to be quoted as well](../syntax.md#string), so you would actually need to write `\\%`, `\\_` and `\\\\`.
|
||||
|
||||
For patterns of the form `%needle%`, the function is as fast as the `position` function.
|
||||
Other LIKE expressions are internally converted to a regular expression and executed with a performance similar to function `match`.
|
||||
|
||||
## notLike(haystack, pattern), haystack NOT LIKE pattern operator
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
---
|
||||
slug: /en/sql-reference/functions/tuple-map-functions
|
||||
sidebar_position: 46
|
||||
sidebar_label: Working with maps
|
||||
title: "Functions for maps"
|
||||
sidebar_label: Maps
|
||||
title: "Functions for Maps"
|
||||
---
|
||||
|
||||
## map
|
||||
@ -440,7 +440,7 @@ mapApply(func, map)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `func` - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
|
||||
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
|
||||
- `map` — [Map](../../sql-reference/data-types/map.md).
|
||||
|
||||
**Returned value**
|
||||
@ -480,7 +480,7 @@ mapFilter(func, map)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `func` - [Lamda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
|
||||
- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function).
|
||||
- `map` — [Map](../../sql-reference/data-types/map.md).
|
||||
|
||||
**Returned value**
|
||||
|
@ -127,6 +127,26 @@ Default expressions may be defined as an arbitrary expression from table constan
|
||||
|
||||
Normal default value. If the INSERT query does not specify the corresponding column, it will be filled in by computing the corresponding expression.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
CREATE OR REPLACE TABLE test
|
||||
(
|
||||
id UInt64,
|
||||
updated_at DateTime DEFAULT now(),
|
||||
updated_at_date Date DEFAULT toDate(updated_at)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
|
||||
INSERT INTO test (id) Values (1);
|
||||
|
||||
SELECT * FROM test;
|
||||
┌─id─┬──────────updated_at─┬─updated_at_date─┐
|
||||
│ 1 │ 2023-02-24 17:06:46 │ 2023-02-24 │
|
||||
└────┴─────────────────────┴─────────────────┘
|
||||
```
|
||||
|
||||
### MATERIALIZED
|
||||
|
||||
`MATERIALIZED expr`
|
||||
@ -135,6 +155,36 @@ Materialized expression. Such a column can’t be specified for INSERT, because
|
||||
For an INSERT without a list of columns, these columns are not considered.
|
||||
In addition, this column is not substituted when using an asterisk in a SELECT query. This is to preserve the invariant that the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
CREATE OR REPLACE TABLE test
|
||||
(
|
||||
id UInt64,
|
||||
updated_at DateTime MATERIALIZED now(),
|
||||
updated_at_date Date MATERIALIZED toDate(updated_at)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
|
||||
INSERT INTO test Values (1);
|
||||
|
||||
SELECT * FROM test;
|
||||
┌─id─┐
|
||||
│ 1 │
|
||||
└────┘
|
||||
|
||||
SELECT id, updated_at, updated_at_date FROM test;
|
||||
┌─id─┬──────────updated_at─┬─updated_at_date─┐
|
||||
│ 1 │ 2023-02-24 17:08:08 │ 2023-02-24 │
|
||||
└────┴─────────────────────┴─────────────────┘
|
||||
|
||||
SELECT * FROM test SETTINGS asterisk_include_materialized_columns=1;
|
||||
┌─id─┬──────────updated_at─┬─updated_at_date─┐
|
||||
│ 1 │ 2023-02-24 17:08:08 │ 2023-02-24 │
|
||||
└────┴─────────────────────┴─────────────────┘
|
||||
```
|
||||
|
||||
### EPHEMERAL
|
||||
|
||||
`EPHEMERAL [expr]`
|
||||
@ -142,6 +192,34 @@ In addition, this column is not substituted when using an asterisk in a SELECT q
|
||||
Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required.
|
||||
INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
CREATE OR REPLACE TABLE test
|
||||
(
|
||||
id UInt64,
|
||||
unhexed String EPHEMERAL,
|
||||
hexed FixedString(4) DEFAULT unhex(unhexed)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
|
||||
INSERT INTO test (id, unhexed) Values (1, '5a90b714');
|
||||
|
||||
SELECT
|
||||
id,
|
||||
hexed,
|
||||
hex(hexed)
|
||||
FROM test
|
||||
FORMAT Vertical;
|
||||
|
||||
Row 1:
|
||||
──────
|
||||
id: 1
|
||||
hexed: Z<><5A>
|
||||
hex(hexed): 5A90B714
|
||||
```
|
||||
|
||||
### ALIAS
|
||||
|
||||
`ALIAS expr`
|
||||
@ -156,6 +234,29 @@ If you add a new column to a table but later change its default expression, the
|
||||
|
||||
It is not possible to set default values for elements in nested data structures.
|
||||
|
||||
```sql
|
||||
CREATE OR REPLACE TABLE test
|
||||
(
|
||||
id UInt64,
|
||||
size_bytes Int64,
|
||||
size String Alias formatReadableSize(size_bytes)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
|
||||
INSERT INTO test Values (1, 4678899);
|
||||
|
||||
SELECT id, size_bytes, size FROM test;
|
||||
┌─id─┬─size_bytes─┬─size─────┐
|
||||
│ 1 │ 4678899 │ 4.46 MiB │
|
||||
└────┴────────────┴──────────┘
|
||||
|
||||
SELECT * FROM test SETTINGS asterisk_include_alias_columns=1;
|
||||
┌─id─┬─size_bytes─┬─size─────┐
|
||||
│ 1 │ 4678899 │ 4.46 MiB │
|
||||
└────┴────────────┴──────────┘
|
||||
```
|
||||
|
||||
## Primary Key
|
||||
|
||||
You can define a [primary key](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries) when creating a table. Primary key can be specified in two ways:
|
||||
|
@ -36,6 +36,8 @@ Queries that use `FINAL` are executed slightly slower than similar queries that
|
||||
|
||||
**In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have’t happened yet and deal with it by applying aggregation (for example, to discard duplicates).
|
||||
|
||||
`FINAL` can be applied automatically using [FINAL](../../../operations/settings/settings.md#final) setting to all tables in a query using a session or a user profile.
|
||||
|
||||
## Implementation Details
|
||||
|
||||
If the `FROM` clause is omitted, data will be read from the `system.one` table.
|
||||
|
@ -86,10 +86,10 @@ Examples: `1`, `10_000_000`, `0xffff_ffff`, `18446744073709551615`, `0xDEADBEEF`
|
||||
String literals must be enclosed in single quotes, double quotes are not supported.
|
||||
Escaping works either
|
||||
|
||||
- in SQL-style based on a preceding single quote where the single-quote character `'` (and only this character) can be escaped as `''`, or
|
||||
- in C-style based on a preceding backslash with the following supported escape sequences: `\\`, `\'`, `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. The backslash loses its special meaning and will be interpreted literally if it precedes characters different than the listed ones.
|
||||
- using a preceding single quote where the single-quote character `'` (and only this character) can be escaped as `''`, or
|
||||
- using a preceding backslash with the following supported escape sequences: `\\`, `\'`, `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. The backslash loses its special meaning, i.e. will be interpreted literally, if it precedes characters different than the listed ones.
|
||||
|
||||
In string literals, you need to escape at least `'` and `\` using escape codes `''` (or: `\'`) and `\\`.
|
||||
In string literals, you need to escape at least `'` and `\` using escape codes `\'` (or: `''`) and `\\`.
|
||||
|
||||
### Compound
|
||||
|
||||
|
@ -567,7 +567,7 @@ SELECT
|
||||
ts,
|
||||
value,
|
||||
round(avg(value) OVER (PARTITION BY metric ORDER BY toDate(ts)
|
||||
Range BETWEEN 10 PRECEDING AND CURRENT ROW),2) moving_avg_10_days_temp
|
||||
Range BETWEEN 10 PRECEDING AND CURRENT ROW),2) AS moving_avg_10_days_temp
|
||||
FROM sensors
|
||||
ORDER BY
|
||||
metric ASC,
|
||||
|
@ -1,5 +1,8 @@
|
||||
#include "LocalServer.h"
|
||||
|
||||
#include <sys/resource.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <base/errnoToString.h>
|
||||
#include <Poco/Util/XMLConfiguration.h>
|
||||
#include <Poco/String.h>
|
||||
#include <Poco/Logger.h>
|
||||
@ -179,9 +182,9 @@ void LocalServer::tryInitPath()
|
||||
parent_folder = std::filesystem::temp_directory_path();
|
||||
|
||||
}
|
||||
catch (const fs::filesystem_error& e)
|
||||
catch (const fs::filesystem_error & e)
|
||||
{
|
||||
// tmp folder don't exists? misconfiguration? chroot?
|
||||
// The tmp folder doesn't exist? Is it a misconfiguration? Or chroot?
|
||||
LOG_DEBUG(log, "Can not get temporary folder: {}", e.what());
|
||||
parent_folder = std::filesystem::current_path();
|
||||
|
||||
@ -390,6 +393,21 @@ try
|
||||
std::cout << std::fixed << std::setprecision(3);
|
||||
std::cerr << std::fixed << std::setprecision(3);
|
||||
|
||||
/// Try to increase limit on number of open files.
|
||||
{
|
||||
rlimit rlim;
|
||||
if (getrlimit(RLIMIT_NOFILE, &rlim))
|
||||
throw Poco::Exception("Cannot getrlimit");
|
||||
|
||||
if (rlim.rlim_cur < rlim.rlim_max)
|
||||
{
|
||||
rlim.rlim_cur = config().getUInt("max_open_files", static_cast<unsigned>(rlim.rlim_max));
|
||||
int rc = setrlimit(RLIMIT_NOFILE, &rlim);
|
||||
if (rc != 0)
|
||||
std::cerr << fmt::format("Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()) << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(FUZZING_MODE)
|
||||
static bool first_time = true;
|
||||
if (first_time)
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <sys/types.h>
|
||||
#include <pwd.h>
|
||||
#include <unistd.h>
|
||||
#include <Poco/Version.h>
|
||||
#include <Poco/Net/HTTPServer.h>
|
||||
#include <Poco/Net/NetException.h>
|
||||
#include <Poco/Util/HelpFormatter.h>
|
||||
|
@ -13,7 +13,7 @@ struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
@ -129,7 +129,7 @@ public:
|
||||
const IColumn::Offsets & ith_offsets = ith_column.getOffsets();
|
||||
|
||||
if (ith_offsets[row_num] != end || (row_num != 0 && ith_offsets[row_num - 1] != begin))
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
|
||||
}
|
||||
|
||||
for (size_t i = begin; i < end; ++i)
|
||||
|
@ -19,7 +19,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
@ -197,7 +197,7 @@ public:
|
||||
const IColumn::Offsets & ith_offsets = ith_column.getOffsets();
|
||||
|
||||
if (ith_offsets[row_num] != end || (row_num != 0 && ith_offsets[row_num - 1] != begin))
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Arrays passed to {} aggregate function have different sizes", getName());
|
||||
}
|
||||
|
||||
AggregateFunctionForEachData & state = ensureAggregateData(place, end - begin, *arena);
|
||||
|
@ -63,6 +63,9 @@ static constexpr const char * getNameByTrait()
|
||||
template <typename T>
|
||||
struct GroupArraySamplerData
|
||||
{
|
||||
/// For easy serialization.
|
||||
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
|
||||
|
||||
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
|
||||
using Array = PODArray<T, 32, Allocator>;
|
||||
@ -97,6 +100,9 @@ struct GroupArrayNumericData;
|
||||
template <typename T>
|
||||
struct GroupArrayNumericData<T, false>
|
||||
{
|
||||
/// For easy serialization.
|
||||
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
|
||||
|
||||
// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
|
||||
using Array = PODArray<T, 32, Allocator>;
|
||||
|
@ -32,6 +32,9 @@ namespace ErrorCodes
|
||||
template <typename T>
|
||||
struct MovingData
|
||||
{
|
||||
/// For easy serialization.
|
||||
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
|
||||
|
||||
using Accumulator = T;
|
||||
|
||||
/// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
|
@ -117,7 +117,21 @@ public:
|
||||
const auto & value = this->data(place).value;
|
||||
size_t size = value.size();
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
|
||||
|
||||
/// In this version, pairs were serialized with padding.
|
||||
/// We must ensure that padding bytes are zero-filled.
|
||||
|
||||
static_assert(offsetof(typename MaxIntersectionsData<PointType>::Value, first) == 0);
|
||||
static_assert(offsetof(typename MaxIntersectionsData<PointType>::Value, second) > 0);
|
||||
|
||||
char zero_padding[offsetof(typename MaxIntersectionsData<PointType>::Value, second) - sizeof(value[0].first)]{};
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
writePODBinary(value[i].first, buf);
|
||||
writePODBinary(zero_padding, buf);
|
||||
writePODBinary(value[i].second, buf);
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
|
@ -432,6 +432,7 @@ ReplxxLineReader::ReplxxLineReader(
|
||||
};
|
||||
|
||||
rx.bind_key(Replxx::KEY::control('R'), interactive_history_search);
|
||||
#endif
|
||||
|
||||
/// Rebind regular incremental search to C-T.
|
||||
///
|
||||
@ -443,7 +444,6 @@ ReplxxLineReader::ReplxxLineReader(
|
||||
uint32_t reverse_search = Replxx::KEY::control('R');
|
||||
return rx.invoke(Replxx::ACTION::HISTORY_INCREMENTAL_SEARCH, reverse_search);
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
ReplxxLineReader::~ReplxxLineReader()
|
||||
|
@ -747,7 +747,7 @@ namespace
|
||||
*/
|
||||
template<typename IntType>
|
||||
requires (std::is_same_v<IntType, Int32> || std::is_same_v<IntType, UInt32>)
|
||||
void replicateSSE42Int32(const IntType * __restrict data, IntType * __restrict result_data, const IColumn::Offsets & offsets)
|
||||
void replicateSSE2Int32(const IntType * __restrict data, IntType * __restrict result_data, const IColumn::Offsets & offsets)
|
||||
{
|
||||
const IntType * data_copy_begin_ptr = nullptr;
|
||||
size_t offsets_size = offsets.size();
|
||||
@ -842,7 +842,7 @@ ColumnPtr ColumnVector<T>::replicate(const IColumn::Offsets & offsets) const
|
||||
#ifdef __SSE2__
|
||||
if constexpr (std::is_same_v<T, UInt32>)
|
||||
{
|
||||
replicateSSE42Int32(getData().data(), res->getData().data(), offsets);
|
||||
replicateSSE2Int32(getData().data(), res->getData().data(), offsets);
|
||||
return res;
|
||||
}
|
||||
#endif
|
||||
|
@ -533,9 +533,16 @@ void AsynchronousMetrics::update(TimePoint update_time)
|
||||
AsynchronousMetricValues new_values;
|
||||
|
||||
auto current_time = std::chrono::system_clock::now();
|
||||
auto time_after_previous_update [[maybe_unused]] = current_time - previous_update_time;
|
||||
auto time_after_previous_update = current_time - previous_update_time;
|
||||
previous_update_time = update_time;
|
||||
|
||||
double update_interval = 0.;
|
||||
if (first_run)
|
||||
update_interval = update_period.count();
|
||||
else
|
||||
update_interval = std::chrono::duration_cast<std::chrono::microseconds>(time_after_previous_update).count() / 1e6;
|
||||
new_values["AsynchronousMetricsUpdateInterval"] = { update_interval, "Metrics update interval" };
|
||||
|
||||
/// This is also a good indicator of system responsiveness.
|
||||
new_values["Jitter"] = { std::chrono::duration_cast<std::chrono::nanoseconds>(current_time - update_time).count() / 1e9,
|
||||
"The difference in time the thread for calculation of the asynchronous metrics was scheduled to wake up and the time it was in fact, woken up."
|
||||
|
@ -197,7 +197,7 @@
|
||||
M(187, COLLATION_COMPARISON_FAILED) \
|
||||
M(188, UNKNOWN_ACTION) \
|
||||
M(189, TABLE_MUST_NOT_BE_CREATED_MANUALLY) \
|
||||
M(190, SIZES_OF_ARRAYS_DOESNT_MATCH) \
|
||||
M(190, SIZES_OF_ARRAYS_DONT_MATCH) \
|
||||
M(191, SET_SIZE_LIMIT_EXCEEDED) \
|
||||
M(192, UNKNOWN_USER) \
|
||||
M(193, WRONG_PASSWORD) \
|
||||
|
@ -237,6 +237,7 @@ void ThreadStatus::setFatalErrorCallback(std::function<void()> callback)
|
||||
|
||||
void ThreadStatus::onFatalError()
|
||||
{
|
||||
std::lock_guard lock(thread_group->mutex);
|
||||
if (fatal_error_callback)
|
||||
fatal_error_callback();
|
||||
}
|
||||
|
@ -809,7 +809,7 @@ class IColumn;
|
||||
M(Bool, input_format_json_read_numbers_as_strings, false, "Allow to parse numbers as strings in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_read_objects_as_strings, true, "Allow to parse JSON objects as strings in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_named_tuples_as_objects, true, "Deserialize named tuple columns as JSON objects", 0) \
|
||||
M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, false, "Ignore unknown keys in json object for named tuples", 0) \
|
||||
M(Bool, input_format_json_ignore_unknown_keys_in_named_tuple, true, "Ignore unknown keys in json object for named tuples", 0) \
|
||||
M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
|
||||
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
|
||||
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
|
||||
|
@ -80,6 +80,7 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"23.3", {{"input_format_json_ignore_unknown_keys_in_named_tuple", false, true, "Improve parsing JSON objects as named tuples"}}},
|
||||
{"23.2", {{"output_format_parquet_fixed_string_as_fixed_byte_array", false, true, "Use Parquet FIXED_LENGTH_BYTE_ARRAY type for FixedString by default"},
|
||||
{"output_format_arrow_fixed_string_as_fixed_byte_array", false, true, "Use Arrow FIXED_SIZE_BINARY type for FixedString by default"},
|
||||
{"query_plan_remove_redundant_distinct", false, true, "Remove redundant Distinct step in query plan"},
|
||||
|
@ -134,6 +134,8 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
|
||||
}
|
||||
|
||||
|
||||
static std::atomic<bool> fatal_error_printed{false};
|
||||
|
||||
/** Handler for "fault" or diagnostic signals. Send data about fault to separate thread to write into log.
|
||||
*/
|
||||
static void signalHandler(int sig, siginfo_t * info, void * context)
|
||||
@ -159,7 +161,16 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
|
||||
if (sig != SIGTSTP) /// This signal is used for debugging.
|
||||
{
|
||||
/// The time that is usually enough for separate thread to print info into log.
|
||||
sleepForSeconds(20); /// FIXME: use some feedback from threads that process stacktrace
|
||||
/// Under MSan full stack unwinding with DWARF info about inline functions takes 101 seconds in one case.
|
||||
for (size_t i = 0; i < 300; ++i)
|
||||
{
|
||||
/// We will synchronize with the thread printing the messages with an atomic variable to finish earlier.
|
||||
if (fatal_error_printed)
|
||||
break;
|
||||
|
||||
/// This coarse method of synchronization is perfectly ok for fatal signals.
|
||||
sleepForSeconds(1);
|
||||
}
|
||||
call_default_signal_handler(sig);
|
||||
}
|
||||
|
||||
@ -309,7 +320,9 @@ private:
|
||||
}
|
||||
|
||||
if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
|
||||
{
|
||||
DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace);
|
||||
}
|
||||
}
|
||||
|
||||
std::string signal_description = "Unknown signal";
|
||||
@ -407,6 +420,8 @@ private:
|
||||
/// When everything is done, we will try to send these error messages to client.
|
||||
if (thread_ptr)
|
||||
thread_ptr->onFatalError();
|
||||
|
||||
fatal_error_printed = true;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -25,7 +25,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
}
|
||||
|
||||
namespace Nested
|
||||
@ -242,7 +242,7 @@ void validateArraySizes(const Block & block)
|
||||
const ColumnArray & another_array_column = assert_cast<const ColumnArray &>(*elem.column);
|
||||
|
||||
if (!first_array_column.hasEqualOffsets(another_array_column))
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"Elements '{}' and '{}' "
|
||||
"of Nested data structure '{}' (Array columns) have different array sizes.",
|
||||
block.getByPosition(it->second).name, elem.name, split.first);
|
||||
|
@ -64,7 +64,7 @@ static DataTypePtr create(const ASTPtr & arguments)
|
||||
return std::make_shared<DataTypeDateTime>();
|
||||
|
||||
const auto scale = getArgument<UInt64, ArgumentKind::Optional>(arguments, 0, "scale", "DateTime");
|
||||
const auto timezone = getArgument<String, ArgumentKind::Optional>(arguments, !!scale, "timezone", "DateTime");
|
||||
const auto timezone = getArgument<String, ArgumentKind::Optional>(arguments, scale ? 1 : 0, "timezone", "DateTime");
|
||||
|
||||
if (!scale && !timezone)
|
||||
throw Exception::createDeprecated(getExceptionMessage(" has wrong type: ", 0, "scale", "DateTime", Field::Types::Which::UInt64),
|
||||
|
@ -16,7 +16,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
@ -213,7 +213,7 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments)
|
||||
if (i == 0)
|
||||
offsets = offsets_i;
|
||||
else if (*offsets_i != *offsets)
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to aggregate function must be equal.");
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to aggregate function must be equal.");
|
||||
}
|
||||
return {nested_columns, offsets->data()};
|
||||
}
|
||||
|
@ -1112,7 +1112,9 @@ private:
|
||||
{
|
||||
ToType h;
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
{
|
||||
h = apply(key, reinterpret_cast<const char *>(&vec_from[i]), sizeof(vec_from[i]));
|
||||
}
|
||||
else
|
||||
{
|
||||
char tmp_buffer[sizeof(vec_from[i])];
|
||||
@ -1131,7 +1133,9 @@ private:
|
||||
|
||||
ToType h;
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
{
|
||||
h = apply(key, reinterpret_cast<const char *>(&value), sizeof(value));
|
||||
}
|
||||
else
|
||||
{
|
||||
char tmp_buffer[sizeof(value)];
|
||||
@ -1271,7 +1275,7 @@ private:
|
||||
{
|
||||
/// NOTE: here, of course, you can do without the materialization of the column.
|
||||
ColumnPtr full_column = col_from_const->convertToFullColumn();
|
||||
executeArray<first>(key, type, &*full_column, vec_to);
|
||||
executeArray<first>(key, type, full_column.get(), vec_to);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}",
|
||||
@ -1283,6 +1287,10 @@ private:
|
||||
{
|
||||
WhichDataType which(from_type);
|
||||
|
||||
if (icolumn->size() != vec_to.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Argument column '{}' size {} doesn't match result column size {} of function {}",
|
||||
icolumn->getName(), icolumn->size(), vec_to.size(), getName());
|
||||
|
||||
if (which.isUInt8()) executeIntType<UInt8, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt16()) executeIntType<UInt16, first>(key, icolumn, vec_to);
|
||||
else if (which.isUInt32()) executeIntType<UInt32, first>(key, icolumn, vec_to);
|
||||
@ -1343,10 +1351,9 @@ private:
|
||||
const auto & type_map = assert_cast<const DataTypeMap &>(*type);
|
||||
executeForArgument(key, type_map.getNestedType().get(), map->getNestedColumnPtr().get(), vec_to, is_first);
|
||||
}
|
||||
else if (const auto * const_map = checkAndGetColumnConstData<ColumnMap>(column))
|
||||
else if (const auto * const_map = checkAndGetColumnConst<ColumnMap>(column))
|
||||
{
|
||||
const auto & type_map = assert_cast<const DataTypeMap &>(*type);
|
||||
executeForArgument(key, type_map.getNestedType().get(), const_map->getNestedColumnPtr().get(), vec_to, is_first);
|
||||
executeForArgument(key, type, const_map->convertToFullColumnIfConst().get(), vec_to, is_first);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1382,8 +1389,7 @@ public:
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
size_t rows = input_rows_count;
|
||||
auto col_to = ColumnVector<ToType>::create(rows);
|
||||
auto col_to = ColumnVector<ToType>::create(input_rows_count);
|
||||
|
||||
typename ColumnVector<ToType>::Container & vec_to = col_to->getData();
|
||||
|
||||
@ -1395,7 +1401,7 @@ public:
|
||||
if (arguments.size() <= first_data_argument)
|
||||
{
|
||||
/// Return a fixed random-looking magic number when input is empty
|
||||
vec_to.assign(rows, static_cast<ToType>(0xe28dbde7fe22e41c));
|
||||
vec_to.assign(input_rows_count, static_cast<ToType>(0xe28dbde7fe22e41c));
|
||||
}
|
||||
|
||||
KeyType key{};
|
||||
|
@ -26,6 +26,7 @@
|
||||
# pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -66,12 +67,12 @@ ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
|
||||
|
||||
if (!low_cardinality_type)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Incompatible type for low cardinality column: {}",
|
||||
"Incompatible type for LowCardinality column: {}",
|
||||
column.type->getName());
|
||||
|
||||
if (can_be_executed_on_default_arguments)
|
||||
{
|
||||
/// Normal case, when function can be executed on values's default.
|
||||
/// Normal case, when function can be executed on values' default.
|
||||
column.column = low_cardinality_column->getDictionary().getNestedColumn();
|
||||
indexes = low_cardinality_column->getIndexesPtr();
|
||||
}
|
||||
@ -280,6 +281,7 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
|
||||
|
||||
auto res = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, dictionary_type, new_input_rows_count, dry_run);
|
||||
bool res_is_constant = isColumnConst(*res);
|
||||
|
||||
auto keys = res_is_constant
|
||||
? res->cloneResized(1)->convertToFullColumnIfConst()
|
||||
: res;
|
||||
|
@ -23,9 +23,10 @@ namespace ErrorCodes
|
||||
namespace impl
|
||||
{
|
||||
|
||||
/// Is the [I]LIKE expression reduced to finding a substring in a string?
|
||||
/// Is the [I]LIKE expression equivalent to a substring search?
|
||||
inline bool likePatternIsSubstring(std::string_view pattern, String & res)
|
||||
{
|
||||
/// TODO: ignore multiple leading or trailing %
|
||||
if (pattern.size() < 2 || !pattern.starts_with('%') || !pattern.ends_with('%'))
|
||||
return false;
|
||||
|
||||
@ -45,9 +46,25 @@ inline bool likePatternIsSubstring(std::string_view pattern, String & res)
|
||||
case '\\':
|
||||
++pos;
|
||||
if (pos == end)
|
||||
/// pattern ends with \% --> trailing % is to be taken literally and pattern doesn't qualify for substring search
|
||||
return false;
|
||||
else
|
||||
res += *pos;
|
||||
{
|
||||
switch (*pos)
|
||||
{
|
||||
/// Known LIKE escape sequences:
|
||||
case '%':
|
||||
case '_':
|
||||
case '\\':
|
||||
res += *pos;
|
||||
break;
|
||||
/// For all other escape sequences, the backslash loses its special meaning
|
||||
default:
|
||||
res += '\\';
|
||||
res += *pos;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
res += *pos;
|
||||
|
@ -75,6 +75,7 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
|
||||
|
||||
Pos dot_pos = nullptr;
|
||||
Pos colon_pos = nullptr;
|
||||
bool has_sub_delims = false;
|
||||
bool has_at_symbol = false;
|
||||
bool has_terminator_after_colon = false;
|
||||
const auto * start_of_host = pos;
|
||||
@ -97,25 +98,35 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
|
||||
case '@': /// myemail@gmail.com
|
||||
if (has_terminator_after_colon) return std::string_view{};
|
||||
if (has_at_symbol) goto done;
|
||||
has_sub_delims = false;
|
||||
has_at_symbol = true;
|
||||
start_of_host = pos + 1;
|
||||
break;
|
||||
case ';':
|
||||
case '=':
|
||||
case '&':
|
||||
case '~':
|
||||
case '%':
|
||||
/// Symbols above are sub-delims in RFC3986 and should be
|
||||
/// allowed for userinfo (named identification here).
|
||||
///
|
||||
/// NOTE: that those symbols is allowed for reg-name (host)
|
||||
/// too, but right now host parsing looks more like in
|
||||
/// RFC1034 (in other words domains that are allowed to be
|
||||
/// registered).
|
||||
has_sub_delims = true;
|
||||
continue;
|
||||
case ' ': /// restricted symbols in whole URL
|
||||
case '\t':
|
||||
case '<':
|
||||
case '>':
|
||||
case '%':
|
||||
case '{':
|
||||
case '}':
|
||||
case '|':
|
||||
case '\\':
|
||||
case '^':
|
||||
case '~':
|
||||
case '[':
|
||||
case ']':
|
||||
case ';':
|
||||
case '=':
|
||||
case '&':
|
||||
if (colon_pos == nullptr)
|
||||
return std::string_view{};
|
||||
else
|
||||
@ -124,6 +135,8 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos
|
||||
}
|
||||
|
||||
done:
|
||||
if (has_sub_delims)
|
||||
return std::string_view{};
|
||||
if (!has_at_symbol)
|
||||
pos = colon_pos ? colon_pos : pos;
|
||||
return checkAndReturnHost(pos, dot_pos, start_of_host);
|
||||
|
@ -7,6 +7,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// NOTE: Implementation is not RFC3986 compatible
|
||||
struct ExtractNetloc
|
||||
{
|
||||
/// We use the same as domain function
|
||||
@ -67,6 +68,7 @@ struct ExtractNetloc
|
||||
/// Now pos points to the first byte after scheme (if there is).
|
||||
|
||||
bool has_identification = false;
|
||||
Pos hostname_end = end;
|
||||
Pos question_mark_pos = end;
|
||||
Pos slash_pos = end;
|
||||
Pos start_of_host = pos;
|
||||
@ -90,25 +92,39 @@ struct ExtractNetloc
|
||||
return std::string_view(start_of_host, pos - start_of_host);
|
||||
case '@': /// foo:bar@example.ru
|
||||
has_identification = true;
|
||||
hostname_end = end;
|
||||
break;
|
||||
case ';':
|
||||
case '=':
|
||||
case '&':
|
||||
case '~':
|
||||
case '%':
|
||||
/// Symbols above are sub-delims in RFC3986 and should be
|
||||
/// allowed for userinfo (named identification here).
|
||||
///
|
||||
/// NOTE: that those symbols is allowed for reg-name (host)
|
||||
/// too, but right now host parsing looks more like in
|
||||
/// RFC1034 (in other words domains that are allowed to be
|
||||
/// registered).
|
||||
if (!has_identification)
|
||||
{
|
||||
hostname_end = pos;
|
||||
break;
|
||||
}
|
||||
[[fallthrough]];
|
||||
case ' ': /// restricted symbols in whole URL
|
||||
case '\t':
|
||||
case '<':
|
||||
case '>':
|
||||
case '%':
|
||||
case '{':
|
||||
case '}':
|
||||
case '|':
|
||||
case '\\':
|
||||
case '^':
|
||||
case '~':
|
||||
case '[':
|
||||
case ']':
|
||||
case ';':
|
||||
case '=':
|
||||
case '&':
|
||||
return pos > start_of_host
|
||||
? std::string_view(start_of_host, std::min(std::min(pos - 1, question_mark_pos), slash_pos) - start_of_host)
|
||||
? std::string_view(start_of_host, std::min(std::min(pos, question_mark_pos), slash_pos) - start_of_host)
|
||||
: std::string_view();
|
||||
}
|
||||
}
|
||||
@ -116,7 +132,7 @@ struct ExtractNetloc
|
||||
if (has_identification)
|
||||
return std::string_view(start_of_host, pos - start_of_host);
|
||||
else
|
||||
return std::string_view(start_of_host, std::min(std::min(pos, question_mark_pos), slash_pos) - start_of_host);
|
||||
return std::string_view(start_of_host, std::min(std::min(std::min(pos, question_mark_pos), slash_pos), hostname_end) - start_of_host);
|
||||
}
|
||||
|
||||
static void execute(Pos data, size_t size, Pos & res_data, size_t & res_size)
|
||||
|
@ -37,7 +37,7 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
@ -361,7 +361,7 @@ public:
|
||||
if (getOffsetsPtr(*column_array) != offsets_column
|
||||
&& getOffsets(*column_array) != typeid_cast<const ColumnArray::ColumnOffsets &>(*offsets_column).getData())
|
||||
throw Exception(
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"{}s passed to {} must have equal size",
|
||||
argument_type_name,
|
||||
getName());
|
||||
|
@ -16,7 +16,7 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
}
|
||||
|
||||
@ -356,7 +356,7 @@ private:
|
||||
{
|
||||
ColumnArray::Offset prev_offset = row > 0 ? offsets_x[row] : 0;
|
||||
throw Exception(
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"Arguments of function {} have different array sizes: {} and {}",
|
||||
getName(),
|
||||
offsets_x[row] - prev_offset,
|
||||
@ -423,7 +423,7 @@ private:
|
||||
if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"Arguments of function {} have different array sizes: {} and {}",
|
||||
getName(),
|
||||
offsets_x[0],
|
||||
|
@ -20,7 +20,7 @@ namespace ErrorCodes
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
}
|
||||
|
||||
class FunctionArrayEnumerateUniq;
|
||||
@ -153,7 +153,7 @@ ColumnPtr FunctionArrayEnumerateExtended<Derived>::executeImpl(const ColumnsWith
|
||||
offsets_column = array->getOffsetsPtr();
|
||||
}
|
||||
else if (offsets_i != *offsets)
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
getName());
|
||||
|
||||
const auto * array_data = &array->getData();
|
||||
|
@ -60,7 +60,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
}
|
||||
|
||||
class FunctionArrayEnumerateUniqRanked;
|
||||
@ -194,7 +194,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
|
||||
{
|
||||
if (*offsets_by_depth[0] != array->getOffsets())
|
||||
{
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"Lengths and effective depths of all arrays passed to {} must be equal.", getName());
|
||||
}
|
||||
}
|
||||
@ -217,7 +217,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
|
||||
{
|
||||
if (*offsets_by_depth[col_depth] != array->getOffsets())
|
||||
{
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"Lengths and effective depths of all arrays passed to {} must be equal.", getName());
|
||||
}
|
||||
}
|
||||
@ -225,7 +225,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended<Derived>::executeImpl(
|
||||
|
||||
if (col_depth < arrays_depths.depths[array_num])
|
||||
{
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"{}: Passed array number {} depth ({}) is more than the actual array depth ({}).",
|
||||
getName(), array_num, std::to_string(arrays_depths.depths[array_num]), col_depth);
|
||||
}
|
||||
|
@ -19,7 +19,7 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
@ -144,7 +144,7 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume
|
||||
if (i == 0)
|
||||
offsets = offsets_i;
|
||||
else if (*offsets_i != *offsets)
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
getName());
|
||||
}
|
||||
const IColumn ** aggregate_arguments = aggregate_arguments_vec.data();
|
||||
|
@ -21,7 +21,7 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
@ -190,7 +190,7 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl(
|
||||
if (i == 0)
|
||||
offsets = offsets_i;
|
||||
else if (*offsets_i != *offsets)
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
getName());
|
||||
}
|
||||
const IColumn ** aggregate_arguments = aggregate_arguments_vec.data();
|
||||
|
@ -18,7 +18,7 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
@ -151,7 +151,7 @@ ColumnPtr FunctionArrayUniq::executeImpl(const ColumnsWithTypeAndName & argument
|
||||
if (i == 0)
|
||||
offsets = &offsets_i;
|
||||
else if (offsets_i != *offsets)
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Lengths of all arrays passed to {} must be equal.",
|
||||
getName());
|
||||
|
||||
const auto * array_data = &array->getData();
|
||||
|
@ -13,7 +13,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
@ -81,7 +81,7 @@ public:
|
||||
}
|
||||
else if (!column_array->hasEqualOffsets(static_cast<const ColumnArray &>(*first_array_column)))
|
||||
{
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"The argument 1 and argument {} of function {} have different array sizes",
|
||||
i + 1, getName());
|
||||
}
|
||||
|
@ -48,12 +48,12 @@ bool SlowWithHyperscanChecker::isSlowOneRepeat(std::string_view regexp)
|
||||
re2_st::StringPiece haystack(regexp.data(), regexp.size());
|
||||
re2_st::StringPiece matches[2];
|
||||
size_t start_pos = 0;
|
||||
while (start_pos < regexp.size())
|
||||
while (start_pos < haystack.size())
|
||||
{
|
||||
if (searcher_one_repeat.Match(haystack, start_pos, regexp.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 2))
|
||||
if (searcher_one_repeat.Match(haystack, start_pos, haystack.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 2))
|
||||
{
|
||||
const auto & match = matches[0];
|
||||
start_pos += (matches[0].data() - haystack.data()) + match.length(); // fwd by prefix + match length
|
||||
start_pos = (matches[0].data() - haystack.data()) + match.size(); // new start pos = prefix before match + match length
|
||||
const auto & submatch = matches[1];
|
||||
if (isLargerThanFifty({submatch.data(), submatch.size()}))
|
||||
return true;
|
||||
@ -70,12 +70,12 @@ bool SlowWithHyperscanChecker::isSlowTwoRepeats(std::string_view regexp)
|
||||
re2_st::StringPiece haystack(regexp.data(), regexp.size());
|
||||
re2_st::StringPiece matches[3];
|
||||
size_t start_pos = 0;
|
||||
while (start_pos < regexp.size())
|
||||
while (start_pos < haystack.size())
|
||||
{
|
||||
if (searcher_two_repeats.Match(haystack, start_pos, regexp.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 3))
|
||||
if (searcher_two_repeats.Match(haystack, start_pos, haystack.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 3))
|
||||
{
|
||||
const auto & match = matches[0];
|
||||
start_pos += (matches[0].data() - haystack.data()) + match.length(); // fwd by prefix + match length
|
||||
start_pos = (matches[0].data() - haystack.data()) + match.size(); // new start pos = prefix before match + match length
|
||||
const auto & submatch1 = matches[1];
|
||||
const auto & submatch2 = matches[2];
|
||||
if (isLargerThanFifty({submatch1.data(), submatch1.size()})
|
||||
|
@ -21,7 +21,12 @@ inline String likePatternToRegexp(std::string_view pattern)
|
||||
const char * const end = pattern.begin() + pattern.size();
|
||||
|
||||
if (pos < end && *pos == '%')
|
||||
++pos;
|
||||
/// Eat leading %
|
||||
while (++pos < end)
|
||||
{
|
||||
if (*pos != '%')
|
||||
break;
|
||||
}
|
||||
else
|
||||
res = "^";
|
||||
|
||||
@ -29,7 +34,18 @@ inline String likePatternToRegexp(std::string_view pattern)
|
||||
{
|
||||
switch (*pos)
|
||||
{
|
||||
case '^': case '$': case '.': case '[': case '|': case '(': case ')': case '?': case '*': case '+': case '{':
|
||||
/// Quote characters which have a special meaning in re2
|
||||
case '^':
|
||||
case '$':
|
||||
case '.':
|
||||
case '[':
|
||||
case '|':
|
||||
case '(':
|
||||
case ')':
|
||||
case '?':
|
||||
case '*':
|
||||
case '+':
|
||||
case '{':
|
||||
res += '\\';
|
||||
res += *pos;
|
||||
break;
|
||||
@ -44,22 +60,23 @@ inline String likePatternToRegexp(std::string_view pattern)
|
||||
break;
|
||||
case '\\':
|
||||
if (pos + 1 == end)
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE, "Invalid escape sequence at the end of LIKE pattern");
|
||||
/// Known escape sequences.
|
||||
if (pos[1] == '%' || pos[1] == '_')
|
||||
throw Exception(ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE, "Invalid escape sequence at the end of LIKE pattern '{}'", pattern);
|
||||
switch (pos[1])
|
||||
{
|
||||
res += pos[1];
|
||||
++pos;
|
||||
}
|
||||
else if (pos[1] == '\\')
|
||||
{
|
||||
res += "\\\\";
|
||||
++pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Unknown escape sequence treated literally: as backslash and the following character.
|
||||
res += "\\\\";
|
||||
/// Interpret quoted LIKE metacharacters %, _ and \ as literals:
|
||||
case '%':
|
||||
case '_':
|
||||
res += pos[1];
|
||||
++pos;
|
||||
break;
|
||||
case '\\':
|
||||
res += "\\\\"; /// backslash has a special meaning in re2 --> quote it
|
||||
++pos;
|
||||
break;
|
||||
/// Unknown escape sequence treated literally: as backslash (which must be quoted in re2) + the following character
|
||||
default:
|
||||
res += "\\\\";
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
@ -332,7 +332,7 @@ public:
|
||||
}
|
||||
|
||||
size_t col_key_size = sub_map_column->size();
|
||||
auto column = is_const? ColumnConst::create(std::move(sub_map_column), std::move(col_key_size)) : std::move(sub_map_column);
|
||||
auto column = is_const ? ColumnConst::create(std::move(sub_map_column), std::move(col_key_size)) : std::move(sub_map_column);
|
||||
|
||||
ColumnsWithTypeAndName new_arguments =
|
||||
{
|
||||
|
@ -20,7 +20,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -118,7 +118,7 @@ public:
|
||||
const auto * rhs_array = assert_cast<const ColumnArray *>(arguments[i].column.get());
|
||||
|
||||
if (!lhs_array->hasEqualOffsets(*rhs_array))
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"The argument 1 and argument {} of function {} have different array offsets",
|
||||
i + 1,
|
||||
getName());
|
||||
|
@ -27,6 +27,7 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
extern const int CANNOT_DLOPEN;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
@ -174,6 +175,10 @@ public:
|
||||
if (!handle)
|
||||
throw Exception(ErrorCodes::CANNOT_DLOPEN, "Cannot dlopen: ({})", dlerror()); // NOLINT(concurrency-mt-unsafe) // MT-Safe on Linux, see man dlerror
|
||||
}
|
||||
else if (mode == "logical error")
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: trap");
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown trap mode");
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ namespace ErrorCodes
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
|
||||
extern const int NUMBER_OF_DIMENSIONS_MISMATCHED;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -200,7 +200,7 @@ private:
|
||||
const auto & array_y = *assert_cast<const ColumnArray *>(col_y.get());
|
||||
if (!array_x.hasEqualOffsets(array_y))
|
||||
{
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"The argument 1 and argument 3 of function {} have different array sizes", getName());
|
||||
}
|
||||
}
|
||||
@ -222,7 +222,7 @@ private:
|
||||
{
|
||||
if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset))
|
||||
{
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"The argument 1 and argument 3 of function {} have different array sizes", getName());
|
||||
}
|
||||
prev_offset = offsets_y[row];
|
||||
|
@ -12,7 +12,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
}
|
||||
|
||||
/** Function validateNestedArraySizes is used to check the consistency of Nested DataType subcolumns's offsets when Update
|
||||
@ -106,7 +106,7 @@ ColumnPtr FunctionValidateNestedArraySizes::executeImpl(
|
||||
else if (first_length != length)
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH,
|
||||
ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
|
||||
"Elements '{}' and '{}' of Nested data structure (Array columns) "
|
||||
"have different array sizes ({} and {} respectively) on row {}",
|
||||
arguments[1].name, arguments[args_idx].name, first_length, length, i);
|
||||
|
@ -224,9 +224,9 @@ namespace detail
|
||||
|
||||
enum class InitializeError
|
||||
{
|
||||
RETRIABLE_ERROR,
|
||||
RETRYABLE_ERROR,
|
||||
/// If error is not retriable, `exception` variable must be set.
|
||||
NON_RETRIABLE_ERROR,
|
||||
NON_RETRYABLE_ERROR,
|
||||
/// Allows to skip not found urls for globs
|
||||
SKIP_NOT_FOUND_URL,
|
||||
NONE,
|
||||
@ -398,7 +398,7 @@ namespace detail
|
||||
}
|
||||
else if (!isRetriableError(http_status))
|
||||
{
|
||||
initialization_error = InitializeError::NON_RETRIABLE_ERROR;
|
||||
initialization_error = InitializeError::NON_RETRYABLE_ERROR;
|
||||
exception = std::current_exception();
|
||||
}
|
||||
else
|
||||
@ -409,7 +409,7 @@ namespace detail
|
||||
}
|
||||
|
||||
/**
|
||||
* Throws if error is retriable, otherwise sets initialization_error = NON_RETRIABLE_ERROR and
|
||||
* Throws if error is retryable, otherwise sets initialization_error = NON_RETRYABLE_ERROR and
|
||||
* saves exception into `exception` variable. In case url is not found and skip_not_found_url == true,
|
||||
* sets initialization_error = SKIP_NOT_FOUND_URL, otherwise throws.
|
||||
*/
|
||||
@ -453,9 +453,9 @@ namespace detail
|
||||
|
||||
/// Retry 200OK
|
||||
if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_OK)
|
||||
initialization_error = InitializeError::RETRIABLE_ERROR;
|
||||
initialization_error = InitializeError::RETRYABLE_ERROR;
|
||||
else
|
||||
initialization_error = InitializeError::NON_RETRIABLE_ERROR;
|
||||
initialization_error = InitializeError::NON_RETRYABLE_ERROR;
|
||||
|
||||
return;
|
||||
}
|
||||
@ -544,7 +544,7 @@ namespace detail
|
||||
{
|
||||
initialize();
|
||||
|
||||
if (initialization_error == InitializeError::NON_RETRIABLE_ERROR)
|
||||
if (initialization_error == InitializeError::NON_RETRYABLE_ERROR)
|
||||
{
|
||||
assert(exception);
|
||||
break;
|
||||
@ -553,7 +553,7 @@ namespace detail
|
||||
{
|
||||
return false;
|
||||
}
|
||||
else if (initialization_error == InitializeError::RETRIABLE_ERROR)
|
||||
else if (initialization_error == InitializeError::RETRYABLE_ERROR)
|
||||
{
|
||||
LOG_ERROR(
|
||||
log,
|
||||
@ -582,10 +582,13 @@ namespace detail
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
/**
|
||||
* Retry request unconditionally if nothing has been read yet.
|
||||
* Otherwise if it is GET method retry with range header.
|
||||
*/
|
||||
/// Too many open files - non-retryable.
|
||||
if (e.code() == POCO_EMFILE)
|
||||
throw;
|
||||
|
||||
/** Retry request unconditionally if nothing has been read yet.
|
||||
* Otherwise if it is GET method retry with range header.
|
||||
*/
|
||||
bool can_retry_request = !offset_from_begin_pos || method == Poco::Net::HTTPRequest::HTTP_GET;
|
||||
if (!can_retry_request)
|
||||
throw;
|
||||
|
@ -1946,6 +1946,9 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
|
||||
}
|
||||
|
||||
auto conjunction = getConjunctionNodes(predicate, allowed_nodes);
|
||||
if (conjunction.rejected.size() == 1 && WhichDataType{conjunction.rejected.front()->result_type}.isFloat())
|
||||
return nullptr;
|
||||
|
||||
auto actions = cloneActionsForConjunction(conjunction.allowed, all_inputs);
|
||||
if (!actions)
|
||||
return nullptr;
|
||||
@ -2011,10 +2014,12 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
|
||||
node.children.swap(new_children);
|
||||
*predicate = std::move(node);
|
||||
}
|
||||
else
|
||||
else if (!WhichDataType{new_children.front()->result_type}.isFloat())
|
||||
{
|
||||
/// If type is different, cast column.
|
||||
/// This case is possible, cause AND can use any numeric type as argument.
|
||||
/// But casting floats to UInt8 or Bool produces different results.
|
||||
/// so we can't apply this optimization to them.
|
||||
Node node;
|
||||
node.type = ActionType::COLUMN;
|
||||
node.result_name = predicate->result_type->getName();
|
||||
@ -2036,8 +2041,20 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown(
|
||||
else
|
||||
{
|
||||
/// Predicate is function AND, which still have more then one argument.
|
||||
/// Or there is only one argument that is a float and we can't just
|
||||
/// remove the AND.
|
||||
/// Just update children and rebuild it.
|
||||
predicate->children.swap(new_children);
|
||||
if (WhichDataType{predicate->children.front()->result_type}.isFloat())
|
||||
{
|
||||
Node node;
|
||||
node.type = ActionType::COLUMN;
|
||||
node.result_name = "1";
|
||||
node.column = DataTypeUInt8().createColumnConst(0, 1u);
|
||||
node.result_type = std::make_shared<DataTypeUInt8>();
|
||||
const auto * const_col = &nodes.emplace_back(std::move(node));
|
||||
predicate->children.emplace_back(const_col);
|
||||
}
|
||||
auto arguments = prepareFunctionArguments(predicate->children);
|
||||
|
||||
FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
|
||||
|
@ -14,7 +14,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int SIZES_OF_ARRAYS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int TYPE_MISMATCH;
|
||||
}
|
||||
|
||||
@ -186,7 +186,7 @@ void ArrayJoinAction::execute(Block & block)
|
||||
|
||||
const ColumnArray & array = typeid_cast<const ColumnArray &>(*array_ptr);
|
||||
if (!is_unaligned && !array.hasEqualOffsets(*any_array))
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Sizes of ARRAY-JOIN-ed arrays do not match");
|
||||
throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Sizes of ARRAY-JOIN-ed arrays do not match");
|
||||
|
||||
current.column = typeid_cast<const ColumnArray &>(*array_ptr).getDataPtr();
|
||||
current.type = type->getNestedType();
|
||||
|
@ -147,6 +147,7 @@ void DatabaseCatalog::initializeAndLoadTemporaryDatabase()
|
||||
unused_dir_hide_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_hide_timeout_sec", unused_dir_hide_timeout_sec);
|
||||
unused_dir_rm_timeout_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_rm_timeout_sec", unused_dir_rm_timeout_sec);
|
||||
unused_dir_cleanup_period_sec = getContext()->getConfigRef().getInt64("database_catalog_unused_dir_cleanup_period_sec", unused_dir_cleanup_period_sec);
|
||||
drop_error_cooldown_sec = getContext()->getConfigRef().getInt64("database_catalog_drop_error_cooldown_sec", drop_error_cooldown_sec);
|
||||
|
||||
auto db_for_temporary_and_external_tables = std::make_shared<DatabaseMemory>(TEMPORARY_DATABASE, getContext());
|
||||
attachDatabase(TEMPORARY_DATABASE, db_for_temporary_and_external_tables);
|
||||
|
@ -279,7 +279,6 @@ private:
|
||||
bool maybeRemoveDirectory(const String & disk_name, const DiskPtr & disk, const String & unused_dir);
|
||||
|
||||
static constexpr size_t reschedule_time_ms = 100;
|
||||
static constexpr time_t drop_error_cooldown_sec = 5;
|
||||
|
||||
mutable std::mutex databases_mutex;
|
||||
|
||||
@ -326,6 +325,9 @@ private:
|
||||
time_t unused_dir_rm_timeout_sec = default_unused_dir_rm_timeout_sec;
|
||||
static constexpr time_t default_unused_dir_cleanup_period_sec = 24 * 60 * 60; /// 1 day
|
||||
time_t unused_dir_cleanup_period_sec = default_unused_dir_cleanup_period_sec;
|
||||
|
||||
static constexpr time_t default_drop_error_cooldown_sec = 5;
|
||||
time_t drop_error_cooldown_sec = default_drop_error_cooldown_sec;
|
||||
};
|
||||
|
||||
/// This class is useful when creating a table or database.
|
||||
|
@ -118,6 +118,15 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
|
||||
alias_node->checkSize(data.settings.max_expanded_ast_elements);
|
||||
ast = alias_node->clone();
|
||||
ast->setAlias(node_alias);
|
||||
|
||||
/// If the cloned AST was finished, this one should also be considered finished
|
||||
if (data.finished_asts.contains(alias_node))
|
||||
data.finished_asts[ast] = ast;
|
||||
|
||||
/// If we had an alias for node_alias, point it instead to the new node so we don't have to revisit it
|
||||
/// on subsequent calls
|
||||
if (auto existing_alias = data.aliases.find(node_alias); existing_alias != data.aliases.end())
|
||||
existing_alias->second = ast;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -127,6 +136,15 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
|
||||
auto alias_name = ast->getAliasOrColumnName();
|
||||
ast = alias_node->clone();
|
||||
ast->setAlias(alias_name);
|
||||
|
||||
/// If the cloned AST was finished, this one should also be considered finished
|
||||
if (data.finished_asts.contains(alias_node))
|
||||
data.finished_asts[ast] = ast;
|
||||
|
||||
/// If we had an alias for node_alias, point it instead to the new node so we don't have to revisit it
|
||||
/// on subsequent calls
|
||||
if (auto existing_alias = data.aliases.find(node_alias); existing_alias != data.aliases.end())
|
||||
existing_alias->second = ast;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -362,11 +362,14 @@ void ServerAsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_tim
|
||||
const auto time_after_previous_update = current_time - heavy_metric_previous_update_time;
|
||||
const bool update_heavy_metric = time_after_previous_update >= heavy_metric_update_period || first_run;
|
||||
|
||||
Stopwatch watch;
|
||||
if (update_heavy_metric)
|
||||
{
|
||||
heavy_metric_previous_update_time = update_time;
|
||||
|
||||
Stopwatch watch;
|
||||
if (first_run)
|
||||
heavy_update_interval = heavy_metric_update_period.count();
|
||||
else
|
||||
heavy_update_interval = std::chrono::duration_cast<std::chrono::microseconds>(time_after_previous_update).count() / 1e6;
|
||||
|
||||
/// Test shows that listing 100000 entries consuming around 0.15 sec.
|
||||
updateDetachedPartsStats();
|
||||
@ -390,7 +393,9 @@ void ServerAsynchronousMetrics::updateHeavyMetricsIfNeeded(TimePoint current_tim
|
||||
watch.elapsedSeconds());
|
||||
|
||||
}
|
||||
new_values["AsynchronousHeavyMetricsCalculationTimeSpent"] = { watch.elapsedSeconds(), "Time in seconds spent for calculation of asynchronous heavy (tables related) metrics (this is the overhead of asynchronous metrics)." };
|
||||
|
||||
new_values["AsynchronousHeavyMetricsUpdateInterval"] = { heavy_update_interval, "Heavy (tables related) metrics update interval" };
|
||||
|
||||
new_values["NumberOfDetachedParts"] = { detached_parts_stats.count, "The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed." };
|
||||
new_values["NumberOfDetachedByUserParts"] = { detached_parts_stats.detached_by_user, "The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed." };
|
||||
|
@ -21,6 +21,7 @@ private:
|
||||
|
||||
const Duration heavy_metric_update_period;
|
||||
TimePoint heavy_metric_previous_update_time;
|
||||
double heavy_update_interval = 0.;
|
||||
|
||||
struct DetachedPartsStats
|
||||
{
|
||||
|
@ -1276,6 +1276,12 @@ void executeQuery(
|
||||
std::tie(ast, streams) = executeQueryImpl(begin, end, context, false, QueryProcessingStage::Complete, &istr);
|
||||
auto & pipeline = streams.pipeline;
|
||||
|
||||
QueryResultDetails result_details
|
||||
{
|
||||
.query_id = context->getClientInfo().current_query_id,
|
||||
.timezone = DateLUT::instance().getTimeZone(),
|
||||
};
|
||||
|
||||
std::unique_ptr<WriteBuffer> compressed_buffer;
|
||||
try
|
||||
{
|
||||
@ -1334,9 +1340,8 @@ void executeQuery(
|
||||
out->onProgress(progress);
|
||||
});
|
||||
|
||||
if (set_result_details)
|
||||
set_result_details(
|
||||
context->getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone());
|
||||
result_details.content_type = out->getContentType();
|
||||
result_details.format = format_name;
|
||||
|
||||
pipeline.complete(std::move(out));
|
||||
}
|
||||
@ -1345,6 +1350,9 @@ void executeQuery(
|
||||
pipeline.setProgressCallback(context->getProgressCallback());
|
||||
}
|
||||
|
||||
if (set_result_details)
|
||||
set_result_details(result_details);
|
||||
|
||||
if (pipeline.initialized())
|
||||
{
|
||||
CompletedPipelineExecutor executor(pipeline);
|
||||
|
@ -11,7 +11,15 @@ namespace DB
|
||||
class ReadBuffer;
|
||||
class WriteBuffer;
|
||||
|
||||
using SetResultDetailsFunc = std::function<void(const String &, const String &, const String &, const String &)>;
|
||||
struct QueryResultDetails
|
||||
{
|
||||
String query_id;
|
||||
std::optional<String> content_type;
|
||||
std::optional<String> format;
|
||||
std::optional<String> timezone;
|
||||
};
|
||||
|
||||
using SetResultDetailsFunc = std::function<void(const QueryResultDetails &)>;
|
||||
|
||||
/// Parse and execute a query.
|
||||
void executeQuery(
|
||||
|
@ -113,10 +113,8 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
|
||||
if (group_by_with_cube)
|
||||
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH CUBE" << (s.hilite ? hilite_none : "");
|
||||
|
||||
if (group_by_with_grouping_sets)
|
||||
if (group_by_with_grouping_sets && groupBy())
|
||||
{
|
||||
if (!groupBy()) /// sanity check, issue 43049
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Corrupt AST");
|
||||
auto nested_frame = frame;
|
||||
nested_frame.surround_each_list_element_with_parens = true;
|
||||
nested_frame.expression_list_prepend_whitespace = false;
|
||||
|
@ -269,6 +269,9 @@ void FillingTransform::transform(Chunk & chunk)
|
||||
if (on_totals)
|
||||
return;
|
||||
|
||||
if (!chunk.hasRows() && !generate_suffix)
|
||||
return;
|
||||
|
||||
Columns old_fill_columns;
|
||||
Columns old_interpolate_columns;
|
||||
Columns old_other_columns;
|
||||
|
@ -831,12 +831,20 @@ void HTTPHandler::processQuery(
|
||||
customizeContext(request, context);
|
||||
|
||||
executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context,
|
||||
[&response, this] (const String & current_query_id, const String & content_type, const String & format, const String & timezone)
|
||||
[&response, this] (const QueryResultDetails & details)
|
||||
{
|
||||
response.setContentType(content_type_override.value_or(content_type));
|
||||
response.add("X-ClickHouse-Query-Id", current_query_id);
|
||||
response.add("X-ClickHouse-Format", format);
|
||||
response.add("X-ClickHouse-Timezone", timezone);
|
||||
response.add("X-ClickHouse-Query-Id", details.query_id);
|
||||
|
||||
if (content_type_override)
|
||||
response.setContentType(*content_type_override);
|
||||
else if (details.content_type)
|
||||
response.setContentType(*details.content_type);
|
||||
|
||||
if (details.format)
|
||||
response.add("X-ClickHouse-Format", *details.format);
|
||||
|
||||
if (details.timezone)
|
||||
response.add("X-ClickHouse-Timezone", *details.timezone);
|
||||
}
|
||||
);
|
||||
|
||||
|
@ -352,11 +352,15 @@ void MySQLHandler::comQuery(ReadBuffer & payload)
|
||||
format_settings.mysql_wire.max_packet_size = max_packet_size;
|
||||
format_settings.mysql_wire.sequence_id = &sequence_id;
|
||||
|
||||
auto set_result_details = [&with_output](const String &, const String &, const String &format, const String &)
|
||||
auto set_result_details = [&with_output](const QueryResultDetails & details)
|
||||
{
|
||||
if (format != "MySQLWire")
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "MySQL protocol does not support custom output formats");
|
||||
with_output = true;
|
||||
if (details.format)
|
||||
{
|
||||
if (*details.format != "MySQLWire")
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "MySQL protocol does not support custom output formats");
|
||||
|
||||
with_output = true;
|
||||
}
|
||||
};
|
||||
|
||||
executeQuery(should_replace ? replacement : payload, *out, false, query_context, set_result_details, format_settings);
|
||||
|
@ -611,6 +611,8 @@ void TCPHandler::runImpl()
|
||||
/// It is important to destroy query context here. We do not want it to live arbitrarily longer than the query.
|
||||
query_context.reset();
|
||||
|
||||
CurrentThread::setFatalErrorCallback({});
|
||||
|
||||
if (is_interserver_mode)
|
||||
{
|
||||
/// We don't really have session in interserver mode, new one is created for each query. It's better to reset it now.
|
||||
|
@ -175,22 +175,69 @@ namespace
|
||||
const auto CLEANUP_TIMEOUT_MS = 3000;
|
||||
const auto MAX_THREAD_WORK_DURATION_MS = 60000; // once per minute leave do reschedule (we can't lock threads in pool forever)
|
||||
|
||||
/// Configuration prefix
|
||||
const String CONFIG_PREFIX = "kafka";
|
||||
const String CONFIG_KAFKA_TAG = "kafka";
|
||||
const String CONFIG_KAFKA_TOPIC_TAG = "kafka_topic";
|
||||
const String CONFIG_NAME_TAG = "name";
|
||||
|
||||
void loadFromConfig(cppkafka::Configuration & conf, const Poco::Util::AbstractConfiguration & config, const std::string & path)
|
||||
/// Read server configuration into cppkafka configuration, used by global configuration and by legacy per-topic configuration
|
||||
void loadFromConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
|
||||
{
|
||||
Poco::Util::AbstractConfiguration::Keys keys;
|
||||
config.keys(path, keys);
|
||||
/// Read all tags one level below <kafka>
|
||||
Poco::Util::AbstractConfiguration::Keys tags;
|
||||
config.keys(config_prefix, tags);
|
||||
|
||||
for (const auto & key : keys)
|
||||
for (const auto & tag : tags)
|
||||
{
|
||||
const String key_path = path + "." + key;
|
||||
// log_level has valid underscore, rest librdkafka setting use dot.separated.format
|
||||
// which is not acceptable for XML.
|
||||
// See also https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
|
||||
const String key_name = (key == "log_level") ? key : boost::replace_all_copy(key, "_", ".");
|
||||
conf.set(key_name, config.getString(key_path));
|
||||
if (tag.starts_with(CONFIG_KAFKA_TOPIC_TAG)) /// multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc.
|
||||
continue; /// used by new per-topic configuration, ignore
|
||||
|
||||
const String setting_path = config_prefix + "." + tag;
|
||||
const String setting_value = config.getString(setting_path);
|
||||
|
||||
/// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML.
|
||||
/// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
|
||||
const String setting_name_in_kafka_config = (tag == "log_level") ? tag : boost::replace_all_copy(tag, "_", ".");
|
||||
kafka_config.set(setting_name_in_kafka_config, setting_value);
|
||||
}
|
||||
}
|
||||
|
||||
/// Read server configuration into cppkafa configuration, used by new per-topic configuration
|
||||
void loadTopicConfig(cppkafka::Configuration & kafka_config, const Poco::Util::AbstractConfiguration & config, const String & config_prefix, const String & topic)
|
||||
{
|
||||
/// Read all tags one level below <kafka>
|
||||
Poco::Util::AbstractConfiguration::Keys tags;
|
||||
config.keys(config_prefix, tags);
|
||||
|
||||
for (const auto & tag : tags)
|
||||
{
|
||||
/// Only consider tag <kafka_topic>. Multiple occurrences given as "kafka_topic", "kafka_topic[1]", etc.
|
||||
if (!tag.starts_with(CONFIG_KAFKA_TOPIC_TAG))
|
||||
continue;
|
||||
|
||||
/// Read topic name between <name>...</name>
|
||||
const String kafka_topic_path = config_prefix + "." + tag;
|
||||
const String kafpa_topic_name_path = kafka_topic_path + "." + CONFIG_NAME_TAG;
|
||||
|
||||
const String topic_name = config.getString(kafpa_topic_name_path);
|
||||
if (topic_name == topic)
|
||||
{
|
||||
/// Found it! Now read the per-topic configuration into cppkafka.
|
||||
Poco::Util::AbstractConfiguration::Keys inner_tags;
|
||||
config.keys(kafka_topic_path, inner_tags);
|
||||
for (const auto & inner_tag : inner_tags)
|
||||
{
|
||||
if (inner_tag == CONFIG_NAME_TAG)
|
||||
continue; // ignore <name>
|
||||
|
||||
/// "log_level" has valid underscore, the remaining librdkafka setting use dot.separated.format which isn't acceptable for XML.
|
||||
/// See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
|
||||
const String setting_path = kafka_topic_path + "." + inner_tag;
|
||||
const String setting_value = config.getString(setting_path);
|
||||
|
||||
const String setting_name_in_kafka_config = (inner_tag == "log_level") ? inner_tag : boost::replace_all_copy(inner_tag, "_", ".");
|
||||
kafka_config.set(setting_name_in_kafka_config, setting_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -509,29 +556,33 @@ size_t StorageKafka::getPollTimeoutMillisecond() const
|
||||
String StorageKafka::getConfigPrefix() const
|
||||
{
|
||||
if (!collection_name.empty())
|
||||
return "named_collections." + collection_name + "." + CONFIG_PREFIX; /// Add one more level to separate librdkafka configuration.
|
||||
return CONFIG_PREFIX;
|
||||
return "named_collections." + collection_name + "." + CONFIG_KAFKA_TAG; /// Add one more level to separate librdkafka configuration.
|
||||
return CONFIG_KAFKA_TAG;
|
||||
}
|
||||
|
||||
void StorageKafka::updateConfiguration(cppkafka::Configuration & conf)
|
||||
void StorageKafka::updateConfiguration(cppkafka::Configuration & kafka_config)
|
||||
{
|
||||
// Update consumer configuration from the configuration
|
||||
// Update consumer configuration from the configuration. Example:
|
||||
// <kafka>
|
||||
// <retry_backoff_ms>250</retry_backoff_ms>
|
||||
// <fetch_min_bytes>100000</fetch_min_bytes>
|
||||
// </kafka>
|
||||
const auto & config = getContext()->getConfigRef();
|
||||
auto config_prefix = getConfigPrefix();
|
||||
if (config.has(config_prefix))
|
||||
loadFromConfig(conf, config, config_prefix);
|
||||
loadFromConfig(kafka_config, config, config_prefix);
|
||||
|
||||
#if USE_KRB5
|
||||
if (conf.has_property("sasl.kerberos.kinit.cmd"))
|
||||
#if USE_KRB5
|
||||
if (kafka_config.has_property("sasl.kerberos.kinit.cmd"))
|
||||
LOG_WARNING(log, "sasl.kerberos.kinit.cmd configuration parameter is ignored.");
|
||||
|
||||
conf.set("sasl.kerberos.kinit.cmd","");
|
||||
conf.set("sasl.kerberos.min.time.before.relogin","0");
|
||||
kafka_config.set("sasl.kerberos.kinit.cmd","");
|
||||
kafka_config.set("sasl.kerberos.min.time.before.relogin","0");
|
||||
|
||||
if (conf.has_property("sasl.kerberos.keytab") && conf.has_property("sasl.kerberos.principal"))
|
||||
if (kafka_config.has_property("sasl.kerberos.keytab") && kafka_config.has_property("sasl.kerberos.principal"))
|
||||
{
|
||||
String keytab = conf.get("sasl.kerberos.keytab");
|
||||
String principal = conf.get("sasl.kerberos.principal");
|
||||
String keytab = kafka_config.get("sasl.kerberos.keytab");
|
||||
String principal = kafka_config.get("sasl.kerberos.principal");
|
||||
LOG_DEBUG(log, "Running KerberosInit");
|
||||
try
|
||||
{
|
||||
@ -543,21 +594,47 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & conf)
|
||||
}
|
||||
LOG_DEBUG(log, "Finished KerberosInit");
|
||||
}
|
||||
#else // USE_KRB5
|
||||
if (conf.has_property("sasl.kerberos.keytab") || conf.has_property("sasl.kerberos.principal"))
|
||||
LOG_WARNING(log, "Kerberos-related parameters are ignored because ClickHouse was built without support of krb5 library.");
|
||||
#endif // USE_KRB5
|
||||
#else // USE_KRB5
|
||||
if (kafka_config.has_property("sasl.kerberos.keytab") || kafka_config.has_property("sasl.kerberos.principal"))
|
||||
LOG_WARNING(log, "Ignoring Kerberos-related parameters because ClickHouse was built without krb5 library support.");
|
||||
#endif // USE_KRB5
|
||||
|
||||
// Update consumer topic-specific configuration
|
||||
// Update consumer topic-specific configuration (legacy syntax, retained for compatibility). Example with topic "football":
|
||||
// <kafka_football>
|
||||
// <retry_backoff_ms>250</retry_backoff_ms>
|
||||
// <fetch_min_bytes>100000</fetch_min_bytes>
|
||||
// </kafka_football>
|
||||
// The legacy syntax has the problem that periods in topic names (e.g. "sports.football") are not supported because the Poco
|
||||
// configuration framework hierarchy is based on periods as level separators. Besides that, per-topic tags at the same level
|
||||
// as <kafka> are ugly.
|
||||
for (const auto & topic : topics)
|
||||
{
|
||||
const auto topic_config_key = config_prefix + "_" + topic;
|
||||
if (config.has(topic_config_key))
|
||||
loadFromConfig(conf, config, topic_config_key);
|
||||
loadFromConfig(kafka_config, config, topic_config_key);
|
||||
}
|
||||
|
||||
// Update consumer topic-specific configuration (new syntax). Example with topics "football" and "baseball":
|
||||
// <kafka>
|
||||
// <kafka_topic>
|
||||
// <name>football</name>
|
||||
// <retry_backoff_ms>250</retry_backoff_ms>
|
||||
// <fetch_min_bytes>5000</fetch_min_bytes>
|
||||
// </kafka_topic>
|
||||
// <kafka_topic>
|
||||
// <name>baseball</name>
|
||||
// <retry_backoff_ms>300</retry_backoff_ms>
|
||||
// <fetch_min_bytes>2000</fetch_min_bytes>
|
||||
// </kafka_topic>
|
||||
// </kafka>
|
||||
// Advantages: The period restriction no longer applies (e.g. <name>sports.football</name> will work), everything
|
||||
// Kafka-related is below <kafka>.
|
||||
for (const auto & topic : topics)
|
||||
if (config.has(config_prefix))
|
||||
loadTopicConfig(kafka_config, config, config_prefix, topic);
|
||||
|
||||
// No need to add any prefix, messages can be distinguished
|
||||
conf.set_log_callback([this](cppkafka::KafkaHandleBase &, int level, const std::string & facility, const std::string & message)
|
||||
kafka_config.set_log_callback([this](cppkafka::KafkaHandleBase &, int level, const std::string & facility, const std::string & message)
|
||||
{
|
||||
auto [poco_level, client_logs_level] = parseSyslogLevel(level);
|
||||
LOG_IMPL(log, client_logs_level, poco_level, "[rdk:{}] {}", facility, message);
|
||||
@ -573,13 +650,13 @@ void StorageKafka::updateConfiguration(cppkafka::Configuration & conf)
|
||||
|
||||
int status;
|
||||
|
||||
status = rd_kafka_conf_interceptor_add_on_new(conf.get_handle(),
|
||||
status = rd_kafka_conf_interceptor_add_on_new(kafka_config.get_handle(),
|
||||
"init", StorageKafkaInterceptors::rdKafkaOnNew, self);
|
||||
if (status != RD_KAFKA_RESP_ERR_NO_ERROR)
|
||||
LOG_ERROR(log, "Cannot set new interceptor due to {} error", status);
|
||||
|
||||
// cppkafka always copy the configuration
|
||||
status = rd_kafka_conf_interceptor_add_on_conf_dup(conf.get_handle(),
|
||||
status = rd_kafka_conf_interceptor_add_on_conf_dup(kafka_config.get_handle(),
|
||||
"init", StorageKafkaInterceptors::rdKafkaOnConfDup, self);
|
||||
if (status != RD_KAFKA_RESP_ERR_NO_ERROR)
|
||||
LOG_ERROR(log, "Cannot set dup conf interceptor due to {} error", status);
|
||||
|
@ -126,7 +126,7 @@ private:
|
||||
std::atomic<bool> shutdown_called = false;
|
||||
|
||||
// Update Kafka configuration with values from CH user configuration.
|
||||
void updateConfiguration(cppkafka::Configuration & conf);
|
||||
void updateConfiguration(cppkafka::Configuration & kafka_config);
|
||||
String getConfigPrefix() const;
|
||||
void threadFunc(size_t idx);
|
||||
|
||||
|
@ -684,12 +684,6 @@ void DataPartStorageOnDiskBase::clearDirectory(
|
||||
request.emplace_back(fs::path(dir) / "delete-on-destroy.txt", true);
|
||||
request.emplace_back(fs::path(dir) / "txn_version.txt", true);
|
||||
|
||||
/// Inverted index
|
||||
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_dict", true);
|
||||
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_post", true);
|
||||
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_seg", true);
|
||||
request.emplace_back(fs::path(dir) / "skp_idx_af.gin_sid", true);
|
||||
|
||||
disk->removeSharedFiles(request, !can_remove_shared_data, names_not_to_remove);
|
||||
disk->removeDirectory(dir);
|
||||
}
|
||||
|
@ -880,7 +880,7 @@ void IMergeTreeDataPart::writeMetadata(const String & filename, const WriteSetti
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException("DataPartStorageOnDiskFull");
|
||||
tryLogCurrentException("IMergeTreeDataPart");
|
||||
}
|
||||
|
||||
throw;
|
||||
|
@ -1893,11 +1893,11 @@ void MergeTreeData::stopOutdatedDataPartsLoadingTask()
|
||||
/// (Only files on the first level of nesting are considered).
|
||||
static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_path, time_t threshold)
|
||||
{
|
||||
if (!disk->isDirectory(directory_path) || disk->getLastModified(directory_path).epochTime() >= threshold)
|
||||
if (!disk->isDirectory(directory_path) || disk->getLastModified(directory_path).epochTime() > threshold)
|
||||
return false;
|
||||
|
||||
for (auto it = disk->iterateDirectory(directory_path); it->isValid(); it->next())
|
||||
if (disk->getLastModified(it->path()).epochTime() >= threshold)
|
||||
if (disk->getLastModified(it->path()).epochTime() > threshold)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
|
@ -75,6 +75,10 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r
|
||||
{
|
||||
const String & name = it.first;
|
||||
|
||||
/// Exclude files written by inverted index from check. No correct checksums are available for them currently.
|
||||
if (name.ends_with(".gin_dict") || name.ends_with(".gin_post") || name.ends_with(".gin_seg") || name.ends_with(".gin_sid"))
|
||||
continue;
|
||||
|
||||
auto jt = rhs.files.find(name);
|
||||
if (jt == rhs.files.end())
|
||||
throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No file {} in data part", name);
|
||||
|
@ -208,26 +208,26 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices()
|
||||
auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(settings.marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
CompressionCodecPtr marks_compression_codec = CompressionCodecFactory::instance().get(ast, nullptr);
|
||||
|
||||
for (const auto & index_helper : skip_indices)
|
||||
for (const auto & skip_index : skip_indices)
|
||||
{
|
||||
String stream_name = index_helper->getFileName();
|
||||
String stream_name = skip_index->getFileName();
|
||||
skip_indices_streams.emplace_back(
|
||||
std::make_unique<MergeTreeDataPartWriterOnDisk::Stream>(
|
||||
stream_name,
|
||||
data_part->getDataPartStoragePtr(),
|
||||
stream_name, index_helper->getSerializedFileExtension(),
|
||||
stream_name, skip_index->getSerializedFileExtension(),
|
||||
stream_name, marks_file_extension,
|
||||
default_codec, settings.max_compress_block_size,
|
||||
marks_compression_codec, settings.marks_compress_block_size,
|
||||
settings.query_write_settings));
|
||||
|
||||
GinIndexStorePtr store = nullptr;
|
||||
if (dynamic_cast<const MergeTreeIndexInverted *>(&*index_helper) != nullptr)
|
||||
if (typeid_cast<const MergeTreeIndexInverted *>(&*skip_index) != nullptr)
|
||||
{
|
||||
store = std::make_shared<GinIndexStore>(stream_name, data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(), storage.getSettings()->max_digestion_size_per_segment);
|
||||
gin_index_stores[stream_name] = store;
|
||||
}
|
||||
skip_indices_aggregators.push_back(index_helper->createIndexAggregatorForPart(store));
|
||||
skip_indices_aggregators.push_back(skip_index->createIndexAggregatorForPart(store));
|
||||
skip_index_accumulated_marks.push_back(0);
|
||||
}
|
||||
}
|
||||
@ -284,7 +284,7 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
|
||||
WriteBuffer & marks_out = stream.compress_marks ? stream.marks_compressed_hashing : stream.marks_hashing;
|
||||
|
||||
GinIndexStorePtr store;
|
||||
if (dynamic_cast<const MergeTreeIndexInverted *>(&*index_helper) != nullptr)
|
||||
if (typeid_cast<const MergeTreeIndexInverted *>(&*index_helper) != nullptr)
|
||||
{
|
||||
String stream_name = index_helper->getFileName();
|
||||
auto it = gin_index_stores.find(stream_name);
|
||||
@ -388,6 +388,18 @@ void MergeTreeDataPartWriterOnDisk::fillSkipIndicesChecksums(MergeTreeData::Data
|
||||
auto & stream = *skip_indices_streams[i];
|
||||
if (!skip_indices_aggregators[i]->empty())
|
||||
skip_indices_aggregators[i]->getGranuleAndReset()->serializeBinary(stream.compressed_hashing);
|
||||
|
||||
/// Register additional files written only by the inverted index. Required because otherwise DROP TABLE complains about unknown
|
||||
/// files. Note that the provided actual checksums are bogus. The problem is that at this point the file writes happened already and
|
||||
/// we'd need to re-open + hash the files (fixing this is TODO). For now, CHECK TABLE skips these four files.
|
||||
if (typeid_cast<const MergeTreeIndexInverted *>(&*skip_indices[i]) != nullptr)
|
||||
{
|
||||
String filename_without_extension = skip_indices[i]->getFileName();
|
||||
checksums.files[filename_without_extension + ".gin_dict"] = MergeTreeDataPartChecksums::Checksum();
|
||||
checksums.files[filename_without_extension + ".gin_post"] = MergeTreeDataPartChecksums::Checksum();
|
||||
checksums.files[filename_without_extension + ".gin_seg"] = MergeTreeDataPartChecksums::Checksum();
|
||||
checksums.files[filename_without_extension + ".gin_sid"] = MergeTreeDataPartChecksums::Checksum();
|
||||
}
|
||||
}
|
||||
|
||||
for (auto & stream : skip_indices_streams)
|
||||
|
@ -157,25 +157,29 @@ IMergeTreeDataPart::Checksums checkDataPart(
|
||||
}
|
||||
|
||||
NameSet projections_on_disk;
|
||||
const auto & checksum_files_txt = checksums_txt.files;
|
||||
const auto & checksums_txt_files = checksums_txt.files;
|
||||
for (auto it = data_part_storage.iterate(); it->isValid(); it->next())
|
||||
{
|
||||
auto file_name = it->name();
|
||||
|
||||
/// We will check projections later.
|
||||
if (data_part_storage.isDirectory(file_name) && endsWith(file_name, ".proj"))
|
||||
if (data_part_storage.isDirectory(file_name) && file_name.ends_with(".proj"))
|
||||
{
|
||||
projections_on_disk.insert(file_name);
|
||||
continue;
|
||||
}
|
||||
|
||||
/// Exclude files written by inverted index from check. No correct checksums are available for them currently.
|
||||
if (file_name.ends_with(".gin_dict") || file_name.ends_with(".gin_post") || file_name.ends_with(".gin_seg") || file_name.ends_with(".gin_sid"))
|
||||
continue;
|
||||
|
||||
auto checksum_it = checksums_data.files.find(file_name);
|
||||
|
||||
/// Skip files that we already calculated. Also skip metadata files that are not checksummed.
|
||||
if (checksum_it == checksums_data.files.end() && !files_without_checksums.contains(file_name))
|
||||
{
|
||||
auto txt_checksum_it = checksum_files_txt.find(file_name);
|
||||
if (txt_checksum_it == checksum_files_txt.end() || txt_checksum_it->second.uncompressed_size == 0)
|
||||
auto txt_checksum_it = checksums_txt_files.find(file_name);
|
||||
if (txt_checksum_it == checksums_txt_files.end() || txt_checksum_it->second.uncompressed_size == 0)
|
||||
{
|
||||
/// The file is not compressed.
|
||||
checksum_file(file_name);
|
||||
|
@ -44,6 +44,27 @@
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
using namespace DB;
|
||||
bool columnIsPhysical(ColumnDefaultKind kind)
|
||||
{
|
||||
return kind == ColumnDefaultKind::Default || kind == ColumnDefaultKind::Materialized;
|
||||
}
|
||||
bool columnDefaultKindHasSameType(ColumnDefaultKind lhs, ColumnDefaultKind rhs)
|
||||
{
|
||||
if (lhs == rhs)
|
||||
return true;
|
||||
|
||||
if (columnIsPhysical(lhs) == columnIsPhysical(rhs))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -172,11 +193,13 @@ std::optional<NameSet> StorageMerge::supportedPrewhereColumns() const
|
||||
|
||||
NameSet supported_columns;
|
||||
|
||||
std::unordered_map<std::string, std::pair<const IDataType *, std::optional<ColumnDefault>>> column_type_default;
|
||||
std::unordered_map<std::string, std::pair<const IDataType *, ColumnDefaultKind>> column_info;
|
||||
for (const auto & name_type : columns.getAll())
|
||||
{
|
||||
column_type_default.emplace(name_type.name, std::make_pair(
|
||||
name_type.type.get(), columns.getDefault(name_type.name)));
|
||||
const auto & column_default = columns.getDefault(name_type.name).value_or(ColumnDefault{});
|
||||
column_info.emplace(name_type.name, std::make_pair(
|
||||
name_type.type.get(),
|
||||
column_default.kind));
|
||||
supported_columns.emplace(name_type.name);
|
||||
}
|
||||
|
||||
@ -191,11 +214,10 @@ std::optional<NameSet> StorageMerge::supportedPrewhereColumns() const
|
||||
const auto & table_columns = table_metadata_ptr->getColumns();
|
||||
for (const auto & column : table_columns.getAll())
|
||||
{
|
||||
const auto & root_type_default = column_type_default[column.name];
|
||||
const IDataType * root_type = root_type_default.first;
|
||||
const std::optional<ColumnDefault> & src_default = root_type_default.second;
|
||||
const auto & column_default = table_columns.getDefault(column.name).value_or(ColumnDefault{});
|
||||
const auto & [root_type, src_default_kind] = column_info[column.name];
|
||||
if ((root_type && !root_type->equals(*column.type)) ||
|
||||
src_default != table_columns.getDefault(column.name))
|
||||
!columnDefaultKindHasSameType(src_default_kind, column_default.kind))
|
||||
{
|
||||
supported_columns.erase(column.name);
|
||||
}
|
||||
|
@ -905,17 +905,16 @@ void StorageReplicatedMergeTree::drop()
|
||||
/// in this case, has_metadata_in_zookeeper = false, and we also permit to drop the table.
|
||||
|
||||
bool maybe_has_metadata_in_zookeeper = !has_metadata_in_zookeeper.has_value() || *has_metadata_in_zookeeper;
|
||||
zkutil::ZooKeeperPtr zookeeper;
|
||||
if (maybe_has_metadata_in_zookeeper)
|
||||
{
|
||||
/// Table can be shut down, restarting thread is not active
|
||||
/// and calling StorageReplicatedMergeTree::getZooKeeper()/getAuxiliaryZooKeeper() won't suffice.
|
||||
zkutil::ZooKeeperPtr zookeeper = getZooKeeperIfTableShutDown();
|
||||
zookeeper = getZooKeeperIfTableShutDown();
|
||||
|
||||
/// If probably there is metadata in ZooKeeper, we don't allow to drop the table.
|
||||
if (!zookeeper)
|
||||
throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Can't drop readonly replicated table (need to drop data in ZooKeeper as well)");
|
||||
|
||||
dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings());
|
||||
}
|
||||
|
||||
/// Wait for loading of all outdated parts because
|
||||
@ -929,10 +928,17 @@ void StorageReplicatedMergeTree::drop()
|
||||
}
|
||||
|
||||
dropAllData();
|
||||
|
||||
if (maybe_has_metadata_in_zookeeper)
|
||||
{
|
||||
/// Session could expire, get it again
|
||||
zookeeper = getZooKeeperIfTableShutDown();
|
||||
dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings(), &has_metadata_in_zookeeper);
|
||||
}
|
||||
}
|
||||
|
||||
void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica,
|
||||
Poco::Logger * logger, MergeTreeSettingsPtr table_settings)
|
||||
Poco::Logger * logger, MergeTreeSettingsPtr table_settings, std::optional<bool> * has_metadata_out)
|
||||
{
|
||||
if (zookeeper->expired())
|
||||
throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Table was not dropped because ZooKeeper session has expired.");
|
||||
@ -990,12 +996,16 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con
|
||||
Coordination::errorMessage(code), remote_replica_path);
|
||||
|
||||
/// And finally remove everything else recursively
|
||||
zookeeper->tryRemoveRecursive(remote_replica_path);
|
||||
}
|
||||
/// It may left some garbage if replica_path subtree is concurrently modified
|
||||
zookeeper->tryRemoveChildrenRecursive(remote_replica_path);
|
||||
|
||||
/// It may left some garbage if replica_path subtree are concurrently modified
|
||||
if (zookeeper->exists(remote_replica_path))
|
||||
LOG_ERROR(logger, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", remote_replica_path);
|
||||
/// Update has_metadata_in_zookeeper to avoid retries. Otherwise we can accidentally remove metadata of a new table on retries
|
||||
if (has_metadata_out)
|
||||
*has_metadata_out = false;
|
||||
|
||||
if (zookeeper->tryRemove(remote_replica_path) != Coordination::Error::ZOK)
|
||||
LOG_ERROR(logger, "Replica was not completely removed from ZooKeeper, {} still exists and may contain some garbage.", remote_replica_path);
|
||||
}
|
||||
|
||||
/// Check that `zookeeper_path` exists: it could have been deleted by another replica after execution of previous line.
|
||||
Strings replicas;
|
||||
@ -8183,6 +8193,12 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co
|
||||
auto shared_id = getTableSharedID();
|
||||
if (shared_id == toString(UUIDHelpers::Nil))
|
||||
{
|
||||
if (zookeeper->exists(zookeeper_path))
|
||||
{
|
||||
LOG_WARNING(log, "Not removing shared data for part {} because replica does not have metadata in ZooKeeper, "
|
||||
"but table path exist and other replicas may exist. It may leave some garbage on S3", part.name);
|
||||
return std::make_pair(false, NameSet{});
|
||||
}
|
||||
LOG_TRACE(log, "Part {} blobs can be removed, because table {} completely dropped", part.name, getStorageID().getNameForLogs());
|
||||
return std::make_pair(true, NameSet{});
|
||||
}
|
||||
@ -8208,9 +8224,18 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co
|
||||
return std::make_pair(true, NameSet{});
|
||||
}
|
||||
|
||||
/// If table was completely dropped (no meta in zookeeper) we can safely remove parts
|
||||
if (has_metadata_in_zookeeper.has_value() && !has_metadata_in_zookeeper)
|
||||
{
|
||||
if (zookeeper->exists(zookeeper_path))
|
||||
{
|
||||
LOG_WARNING(log, "Not removing shared data for part {} because replica does not have metadata in ZooKeeper, "
|
||||
"but table path exist and other replicas may exist. It may leave some garbage on S3", part.name);
|
||||
return std::make_pair(false, NameSet{});
|
||||
}
|
||||
|
||||
/// If table was completely dropped (no meta in zookeeper) we can safely remove parts
|
||||
return std::make_pair(true, NameSet{});
|
||||
}
|
||||
|
||||
/// We remove parts during table shutdown. If exception happen, restarting thread will be already turned
|
||||
/// off and nobody will reconnect our zookeeper connection. In this case we use zookeeper connection from
|
||||
|
@ -229,7 +229,7 @@ public:
|
||||
/** Remove a specific replica from zookeeper.
|
||||
*/
|
||||
static void dropReplica(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path, const String & replica,
|
||||
Poco::Logger * logger, MergeTreeSettingsPtr table_settings = nullptr);
|
||||
Poco::Logger * logger, MergeTreeSettingsPtr table_settings = nullptr, std::optional<bool> * has_metadata_out = nullptr);
|
||||
|
||||
/// Removes table from ZooKeeper after the last replica was dropped
|
||||
static bool removeTableNodesFromZooKeeper(zkutil::ZooKeeperPtr zookeeper, const String & zookeeper_path,
|
||||
|
@ -9,7 +9,7 @@ from github import Github
|
||||
|
||||
from build_download_helper import get_build_name_for_check, read_build_urls
|
||||
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
|
||||
from commit_status_helper import post_commit_status
|
||||
from commit_status_helper import format_description, post_commit_status
|
||||
from docker_pull_helper import get_image_with_version
|
||||
from env_helper import (
|
||||
GITHUB_REPOSITORY,
|
||||
@ -145,11 +145,13 @@ if __name__ == "__main__":
|
||||
with open(
|
||||
os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8"
|
||||
) as desc_f:
|
||||
description = desc_f.readline().rstrip("\n")[:140]
|
||||
description = desc_f.readline().rstrip("\n")
|
||||
except:
|
||||
status = "failure"
|
||||
description = "Task failed: $?=" + str(retcode)
|
||||
|
||||
description = format_description(description)
|
||||
|
||||
test_result = TestResult(description, "OK")
|
||||
if "fail" in status:
|
||||
test_result.status = "FAIL"
|
||||
|
@ -9,6 +9,10 @@ import time
|
||||
from shutil import rmtree
|
||||
from typing import List, Tuple
|
||||
|
||||
from ccache_utils import get_ccache_if_not_exists, upload_ccache
|
||||
from ci_config import CI_CONFIG, BuildConfig
|
||||
from commit_status_helper import get_commit_filtered_statuses, get_commit
|
||||
from docker_pull_helper import get_image_with_version
|
||||
from env_helper import (
|
||||
CACHES_PATH,
|
||||
GITHUB_JOB,
|
||||
@ -18,18 +22,17 @@ from env_helper import (
|
||||
S3_DOWNLOAD,
|
||||
TEMP_PATH,
|
||||
)
|
||||
from s3_helper import S3Helper
|
||||
from get_robot_token import get_best_robot_token
|
||||
from github_helper import GitHub
|
||||
from pr_info import PRInfo
|
||||
from s3_helper import S3Helper
|
||||
from tee_popen import TeePopen
|
||||
from version_helper import (
|
||||
ClickHouseVersion,
|
||||
Git,
|
||||
get_version_from_repo,
|
||||
update_version_local,
|
||||
)
|
||||
from ccache_utils import get_ccache_if_not_exists, upload_ccache
|
||||
from ci_config import CI_CONFIG, BuildConfig
|
||||
from docker_pull_helper import get_image_with_version
|
||||
from tee_popen import TeePopen
|
||||
|
||||
IMAGE_NAME = "clickhouse/binary-builder"
|
||||
BUILD_LOG_NAME = "build_log.log"
|
||||
@ -122,8 +125,7 @@ def check_for_success_run(
|
||||
logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix, "")
|
||||
logging.info("Checking for artifacts in %s", logged_prefix)
|
||||
try:
|
||||
# TODO: theoretically, it would miss performance artifact for pr==0,
|
||||
# but luckily we rerun only really failed tasks now, so we're safe
|
||||
# Performance artifacts are now part of regular build, so we're safe
|
||||
build_results = s3_helper.list_prefix(s3_prefix)
|
||||
except Exception as ex:
|
||||
logging.info("Got exception while listing %s: %s\nRerun", logged_prefix, ex)
|
||||
@ -231,6 +233,29 @@ def upload_master_static_binaries(
|
||||
print(f"::notice ::Binary static URL: {url}")
|
||||
|
||||
|
||||
def mark_failed_reports_pending(build_name: str, sha: str) -> None:
|
||||
try:
|
||||
gh = GitHub(get_best_robot_token())
|
||||
commit = get_commit(gh, sha)
|
||||
statuses = get_commit_filtered_statuses(commit)
|
||||
report_status = [
|
||||
name
|
||||
for name, builds in CI_CONFIG["builds_report_config"].items()
|
||||
if build_name in builds
|
||||
][0]
|
||||
for status in statuses:
|
||||
if status.context == report_status and status.state in ["failure", "error"]:
|
||||
logging.info(
|
||||
"Commit already have failed status for '%s', setting it to 'pending'",
|
||||
report_status,
|
||||
)
|
||||
commit.create_status(
|
||||
"pending", status.url, "Set to pending on rerun", report_status
|
||||
)
|
||||
except: # we do not care about any exception here
|
||||
logging.info("Failed to get or mark the reports status as pending, continue")
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
@ -260,6 +285,9 @@ def main():
|
||||
# put them as github actions artifact (result)
|
||||
check_for_success_run(s3_helper, s3_path_prefix, build_name, build_config)
|
||||
|
||||
# If it's a latter running, we need to mark possible failed status
|
||||
mark_failed_reports_pending(build_name, pr_info.sha)
|
||||
|
||||
docker_image = get_image_with_version(IMAGES_PATH, IMAGE_NAME)
|
||||
image_version = docker_image.version
|
||||
|
||||
|
@ -508,7 +508,7 @@ def main():
|
||||
logging.getLogger("git_helper").setLevel(logging.DEBUG)
|
||||
token = args.token or get_best_robot_token()
|
||||
|
||||
gh = GitHub(token, create_cache_dir=False, per_page=100)
|
||||
gh = GitHub(token, create_cache_dir=False)
|
||||
bp = Backport(gh, args.repo, args.dry_run)
|
||||
# https://github.com/python/mypy/issues/3004
|
||||
bp.gh.cache_path = f"{TEMP_PATH}/gh_cache" # type: ignore
|
||||
|
@ -3,18 +3,20 @@
|
||||
import csv
|
||||
import os
|
||||
import time
|
||||
from typing import List
|
||||
from typing import List, Literal
|
||||
import logging
|
||||
|
||||
from ci_config import CI_CONFIG, REQUIRED_CHECKS
|
||||
from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL
|
||||
from github import Github
|
||||
from github.Commit import Commit
|
||||
from github.CommitStatus import CommitStatus
|
||||
|
||||
from ci_config import CI_CONFIG, REQUIRED_CHECKS
|
||||
from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL
|
||||
from pr_info import PRInfo, SKIP_MERGEABLE_CHECK_LABEL
|
||||
|
||||
RETRY = 5
|
||||
CommitStatuses = List[CommitStatus]
|
||||
MERGEABLE_NAME = "Mergeable Check"
|
||||
|
||||
|
||||
def override_status(status: str, check_name: str, invert: bool = False) -> str:
|
||||
@ -102,59 +104,69 @@ def post_labels(gh: Github, pr_info: PRInfo, labels_names: List[str]) -> None:
|
||||
pull_request.add_to_labels(label)
|
||||
|
||||
|
||||
def fail_mergeable_check(commit: Commit, description: str) -> None:
|
||||
commit.create_status(
|
||||
context="Mergeable Check",
|
||||
description=description,
|
||||
state="failure",
|
||||
target_url=GITHUB_RUN_URL,
|
||||
)
|
||||
def format_description(description: str) -> str:
|
||||
if len(description) > 140:
|
||||
description = description[:137] + "..."
|
||||
return description
|
||||
|
||||
|
||||
def reset_mergeable_check(commit: Commit, description: str = "") -> None:
|
||||
def set_mergeable_check(
|
||||
commit: Commit,
|
||||
description: str = "",
|
||||
state: Literal["success", "failure"] = "success",
|
||||
) -> None:
|
||||
commit.create_status(
|
||||
context="Mergeable Check",
|
||||
context=MERGEABLE_NAME,
|
||||
description=description,
|
||||
state="success",
|
||||
state=state,
|
||||
target_url=GITHUB_RUN_URL,
|
||||
)
|
||||
|
||||
|
||||
def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None:
|
||||
if SKIP_MERGEABLE_CHECK_LABEL in pr_info.labels:
|
||||
not_run = (
|
||||
pr_info.labels.intersection({SKIP_MERGEABLE_CHECK_LABEL, "release"})
|
||||
or check_name not in REQUIRED_CHECKS
|
||||
or pr_info.release_pr
|
||||
or pr_info.number == 0
|
||||
)
|
||||
if not_run:
|
||||
# Let's avoid unnecessary work
|
||||
return
|
||||
|
||||
logging.info("Update Mergeable Check by %s", check_name)
|
||||
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
checks = {
|
||||
check.context: check.state
|
||||
for check in filter(
|
||||
lambda check: (check.context in REQUIRED_CHECKS),
|
||||
# get_statuses() returns generator, which cannot be reversed - we need comprehension
|
||||
# pylint: disable=unnecessary-comprehension
|
||||
reversed([status for status in commit.get_statuses()]),
|
||||
)
|
||||
}
|
||||
statuses = get_commit_filtered_statuses(commit)
|
||||
|
||||
required_checks = [
|
||||
status for status in statuses if status.context in REQUIRED_CHECKS
|
||||
]
|
||||
|
||||
mergeable_status = None
|
||||
for status in statuses:
|
||||
if status.context == MERGEABLE_NAME:
|
||||
mergeable_status = status
|
||||
break
|
||||
|
||||
success = []
|
||||
fail = []
|
||||
for name, state in checks.items():
|
||||
if state == "success":
|
||||
success.append(name)
|
||||
for status in required_checks:
|
||||
if status.state == "success":
|
||||
success.append(status.context)
|
||||
else:
|
||||
fail.append(name)
|
||||
fail.append(status.context)
|
||||
|
||||
if fail:
|
||||
description = "failed: " + ", ".join(fail)
|
||||
if success:
|
||||
description += "; succeeded: " + ", ".join(success)
|
||||
if len(description) > 140:
|
||||
description = description[:137] + "..."
|
||||
fail_mergeable_check(commit, description)
|
||||
description = format_description(description)
|
||||
if mergeable_status is None or mergeable_status.description != description:
|
||||
set_mergeable_check(commit, description, "failure")
|
||||
return
|
||||
|
||||
description = ", ".join(success)
|
||||
if len(description) > 140:
|
||||
description = description[:137] + "..."
|
||||
reset_mergeable_check(commit, description)
|
||||
description = format_description(description)
|
||||
if mergeable_status is None or mergeable_status.description != description:
|
||||
set_mergeable_check(commit, description)
|
||||
|
@ -14,7 +14,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
||||
from github import Github
|
||||
|
||||
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
|
||||
from commit_status_helper import post_commit_status
|
||||
from commit_status_helper import format_description, post_commit_status
|
||||
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL
|
||||
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
|
||||
from pr_info import PRInfo
|
||||
@ -456,8 +456,7 @@ def main():
|
||||
else:
|
||||
description = "Nothing to update"
|
||||
|
||||
if len(description) >= 140:
|
||||
description = description[:136] + "..."
|
||||
description = format_description(description)
|
||||
|
||||
with open(changed_json, "w", encoding="utf-8") as images_file:
|
||||
json.dump(result_images, images_file)
|
||||
|
@ -10,7 +10,7 @@ from typing import List, Dict, Tuple
|
||||
from github import Github
|
||||
|
||||
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
|
||||
from commit_status_helper import post_commit_status
|
||||
from commit_status_helper import format_description, post_commit_status
|
||||
from env_helper import RUNNER_TEMP
|
||||
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
|
||||
from pr_info import PRInfo
|
||||
@ -218,8 +218,7 @@ def main():
|
||||
else:
|
||||
description = "Nothing to update"
|
||||
|
||||
if len(description) >= 140:
|
||||
description = description[:136] + "..."
|
||||
description = format_description(description)
|
||||
|
||||
gh = Github(get_best_robot_token(), per_page=100)
|
||||
post_commit_status(gh, pr_info.sha, NAME, description, status, url)
|
||||
|
@ -15,7 +15,7 @@ from github import Github
|
||||
|
||||
from build_check import get_release_or_pr
|
||||
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
|
||||
from commit_status_helper import post_commit_status
|
||||
from commit_status_helper import format_description, post_commit_status
|
||||
from docker_images_check import DockerImage
|
||||
from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET, S3_DOWNLOAD
|
||||
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
|
||||
@ -369,8 +369,7 @@ def main():
|
||||
|
||||
description = f"Processed tags: {', '.join(tags)}"
|
||||
|
||||
if len(description) >= 140:
|
||||
description = description[:136] + "..."
|
||||
description = format_description(description)
|
||||
|
||||
gh = Github(get_best_robot_token(), per_page=100)
|
||||
post_commit_status(gh, pr_info.sha, NAME, description, status, url)
|
||||
|
@ -35,6 +35,8 @@ class GitHub(github.Github):
|
||||
self._cache_path = Path(CACHE_PATH)
|
||||
if create_cache_dir:
|
||||
self.cache_path = self.cache_path
|
||||
if not kwargs.get("per_page"):
|
||||
kwargs["per_page"] = 100
|
||||
# And set Path
|
||||
super().__init__(*args, **kwargs)
|
||||
self._retries = 0
|
||||
|
@ -18,7 +18,11 @@ from clickhouse_helper import (
|
||||
mark_flaky_tests,
|
||||
prepare_tests_results_for_clickhouse,
|
||||
)
|
||||
from commit_status_helper import post_commit_status, update_mergeable_check
|
||||
from commit_status_helper import (
|
||||
format_description,
|
||||
post_commit_status,
|
||||
update_mergeable_check,
|
||||
)
|
||||
from compress_files import compress_fast
|
||||
from docker_pull_helper import get_image_with_version, DockerImage
|
||||
from env_helper import CI, TEMP_PATH as TEMP, REPORTS_PATH
|
||||
@ -341,8 +345,7 @@ def main():
|
||||
ch_helper = ClickHouseHelper()
|
||||
mark_flaky_tests(ch_helper, args.check_name, test_results)
|
||||
|
||||
if len(description) >= 140:
|
||||
description = description[:136] + "..."
|
||||
description = format_description(description)
|
||||
|
||||
post_commit_status(gh, pr_info.sha, args.check_name, description, state, report_url)
|
||||
|
||||
|
@ -43,7 +43,7 @@ def main():
|
||||
description = "the release can be created from the commit"
|
||||
args.token = args.token or get_best_robot_token()
|
||||
|
||||
gh = GitHub(args.token, create_cache_dir=False, per_page=100)
|
||||
gh = GitHub(args.token, create_cache_dir=False)
|
||||
# Get the rate limits for a quick fail
|
||||
gh.get_rate_limit()
|
||||
commit = get_commit(gh, args.commit)
|
||||
|
@ -217,7 +217,7 @@ def main():
|
||||
args = parse_args()
|
||||
logging.info("Going to process PR #%s in repo %s", args.pr, args.repo)
|
||||
token = args.token or get_best_robot_token()
|
||||
gh = GitHub(token, per_page=100)
|
||||
gh = GitHub(token)
|
||||
repo = gh.get_repo(args.repo)
|
||||
# An ugly and not nice fix to patch the wrong organization URL,
|
||||
# see https://github.com/PyGithub/PyGithub/issues/2395#issuecomment-1378629710
|
||||
|
@ -7,10 +7,11 @@ from typing import Tuple
|
||||
from github import Github
|
||||
|
||||
from commit_status_helper import (
|
||||
format_description,
|
||||
get_commit,
|
||||
post_labels,
|
||||
remove_labels,
|
||||
reset_mergeable_check,
|
||||
set_mergeable_check,
|
||||
)
|
||||
from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL
|
||||
from get_robot_token import get_best_robot_token
|
||||
@ -157,7 +158,7 @@ def check_pr_description(pr_info: PRInfo) -> Tuple[str, str]:
|
||||
+ second_category
|
||||
+ "'"
|
||||
)
|
||||
return result_status[:140], category
|
||||
return result_status, category
|
||||
|
||||
elif re.match(
|
||||
r"(?i)^[#>*_ ]*(short\s*description|change\s*log\s*entry)", lines[i]
|
||||
@ -199,6 +200,7 @@ if __name__ == "__main__":
|
||||
|
||||
pr_info = PRInfo(need_orgs=True, pr_event_from_api=True, need_changed_files=True)
|
||||
can_run, description, labels_state = should_run_checks_for_pr(pr_info)
|
||||
description = format_description(description)
|
||||
gh = Github(get_best_robot_token(), per_page=100)
|
||||
commit = get_commit(gh, pr_info.sha)
|
||||
|
||||
@ -231,7 +233,7 @@ if __name__ == "__main__":
|
||||
if pr_labels_to_remove:
|
||||
remove_labels(gh, pr_info, pr_labels_to_remove)
|
||||
|
||||
reset_mergeable_check(commit, "skipped")
|
||||
set_mergeable_check(commit, "skipped")
|
||||
|
||||
if description_error:
|
||||
print(
|
||||
@ -249,7 +251,7 @@ if __name__ == "__main__":
|
||||
)
|
||||
commit.create_status(
|
||||
context=NAME,
|
||||
description=description_error[:139],
|
||||
description=format_description(description_error),
|
||||
state="failure",
|
||||
target_url=url,
|
||||
)
|
||||
|
@ -10,7 +10,7 @@ from github import Github
|
||||
|
||||
from build_download_helper import get_build_name_for_check, read_build_urls
|
||||
from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
|
||||
from commit_status_helper import post_commit_status
|
||||
from commit_status_helper import format_description, post_commit_status
|
||||
from docker_pull_helper import get_image_with_version
|
||||
from env_helper import (
|
||||
GITHUB_REPOSITORY,
|
||||
@ -171,11 +171,13 @@ def main():
|
||||
with open(
|
||||
os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8"
|
||||
) as desc_f:
|
||||
description = desc_f.readline().rstrip("\n")[:140]
|
||||
description = desc_f.readline().rstrip("\n")
|
||||
except:
|
||||
# status = "failure"
|
||||
description = "Task failed: $?=" + str(retcode)
|
||||
|
||||
description = format_description(description)
|
||||
|
||||
report_url = upload_results(
|
||||
s3_helper,
|
||||
pr_info.number,
|
||||
|
@ -148,7 +148,7 @@ def main():
|
||||
if args.push:
|
||||
checkout_head(pr_info)
|
||||
|
||||
gh = GitHub(get_best_robot_token(), per_page=100, create_cache_dir=False)
|
||||
gh = GitHub(get_best_robot_token(), create_cache_dir=False)
|
||||
|
||||
atexit.register(update_mergeable_check, gh, pr_info, NAME)
|
||||
|
||||
|
@ -3264,7 +3264,7 @@ class ClickHouseInstance:
|
||||
sleep_time=0.5,
|
||||
check_callback=lambda x: True,
|
||||
):
|
||||
logging.debug(f"Executing query {sql} on {self.name}")
|
||||
# logging.debug(f"Executing query {sql} on {self.name}")
|
||||
result = None
|
||||
for i in range(retry_count):
|
||||
try:
|
||||
@ -3283,7 +3283,7 @@ class ClickHouseInstance:
|
||||
return result
|
||||
time.sleep(sleep_time)
|
||||
except Exception as ex:
|
||||
logging.debug("Retry {} got exception {}".format(i + 1, ex))
|
||||
# logging.debug("Retry {} got exception {}".format(i + 1, ex))
|
||||
time.sleep(sleep_time)
|
||||
|
||||
if result is not None:
|
||||
|
@ -648,24 +648,15 @@ def test_async_backups_to_same_destination(interface):
|
||||
"",
|
||||
)
|
||||
|
||||
ids_succeeded = (
|
||||
instance.query(
|
||||
f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_CREATED'"
|
||||
)
|
||||
.rstrip("\n")
|
||||
.split("\n")
|
||||
)
|
||||
ids_succeeded = instance.query(
|
||||
f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_CREATED'"
|
||||
).splitlines()
|
||||
|
||||
ids_failed = (
|
||||
instance.query(
|
||||
f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_FAILED'"
|
||||
)
|
||||
.rstrip("\n")
|
||||
.split("\n")
|
||||
)
|
||||
ids_failed = instance.query(
|
||||
f"SELECT id FROM system.backups WHERE id IN {ids_for_query} AND status == 'BACKUP_FAILED'"
|
||||
).splitlines()
|
||||
|
||||
assert len(ids_succeeded) == 1
|
||||
assert len(ids_failed) <= 1
|
||||
assert set(ids_succeeded + ids_failed) == set(ids)
|
||||
|
||||
# Check that the first backup is all right.
|
||||
|
@ -82,8 +82,12 @@ def drop_after_test():
|
||||
try:
|
||||
yield
|
||||
finally:
|
||||
node0.query("DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY")
|
||||
node0.query("DROP DATABASE IF EXISTS mydb ON CLUSTER 'cluster' NO DELAY")
|
||||
node0.query(
|
||||
"DROP TABLE IF EXISTS tbl ON CLUSTER 'cluster' NO DELAY",
|
||||
settings={
|
||||
"distributed_ddl_task_timeout": 360,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
backup_id_counter = 0
|
||||
@ -138,7 +142,12 @@ def test_concurrent_backups_on_same_node():
|
||||
|
||||
# This restore part is added to confirm creating an internal backup & restore work
|
||||
# even when a concurrent backup is stopped
|
||||
nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
|
||||
nodes[0].query(
|
||||
f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY",
|
||||
settings={
|
||||
"distributed_ddl_task_timeout": 360,
|
||||
},
|
||||
)
|
||||
nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}")
|
||||
nodes[0].query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' tbl")
|
||||
|
||||
@ -158,9 +167,14 @@ def test_concurrent_backups_on_different_nodes():
|
||||
f"SELECT status FROM system.backups WHERE status == 'CREATING_BACKUP' AND id = '{id}'",
|
||||
"CREATING_BACKUP",
|
||||
)
|
||||
assert "Concurrent backups not supported" in nodes[2].query_and_get_error(
|
||||
assert "Concurrent backups not supported" in nodes[0].query_and_get_error(
|
||||
f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}"
|
||||
)
|
||||
assert_eq_with_retry(
|
||||
nodes[1],
|
||||
f"SELECT status FROM system.backups WHERE status == 'BACKUP_CREATED' AND id = '{id}'",
|
||||
"BACKUP_CREATED",
|
||||
)
|
||||
|
||||
|
||||
def test_concurrent_restores_on_same_node():
|
||||
@ -185,11 +199,20 @@ def test_concurrent_restores_on_same_node():
|
||||
"BACKUP_CREATED",
|
||||
)
|
||||
|
||||
nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
|
||||
nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC")
|
||||
nodes[0].query(
|
||||
f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY",
|
||||
settings={
|
||||
"distributed_ddl_task_timeout": 360,
|
||||
},
|
||||
)
|
||||
restore_id = (
|
||||
nodes[0]
|
||||
.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC")
|
||||
.split("\t")[0]
|
||||
)
|
||||
assert_eq_with_retry(
|
||||
nodes[0],
|
||||
f"SELECT status FROM system.backups WHERE status == 'RESTORING'",
|
||||
f"SELECT status FROM system.backups WHERE status == 'RESTORING' AND id == '{restore_id}'",
|
||||
"RESTORING",
|
||||
)
|
||||
assert "Concurrent restores not supported" in nodes[0].query_and_get_error(
|
||||
@ -219,8 +242,17 @@ def test_concurrent_restores_on_different_node():
|
||||
"BACKUP_CREATED",
|
||||
)
|
||||
|
||||
nodes[0].query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY")
|
||||
nodes[0].query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC")
|
||||
nodes[0].query(
|
||||
f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY",
|
||||
settings={
|
||||
"distributed_ddl_task_timeout": 360,
|
||||
},
|
||||
)
|
||||
restore_id = (
|
||||
nodes[0]
|
||||
.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} ASYNC")
|
||||
.split("\t")[0]
|
||||
)
|
||||
assert_eq_with_retry(
|
||||
nodes[0],
|
||||
f"SELECT status FROM system.backups WHERE status == 'RESTORING'",
|
||||
@ -229,3 +261,9 @@ def test_concurrent_restores_on_different_node():
|
||||
assert "Concurrent restores not supported" in nodes[1].query_and_get_error(
|
||||
f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}"
|
||||
)
|
||||
|
||||
assert_eq_with_retry(
|
||||
nodes[0],
|
||||
f"SELECT status FROM system.backups WHERE status == 'RESTORED' AND id == '{restore_id}'",
|
||||
"RESTORED",
|
||||
)
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user