Merge branch 'master' into trying_actions

This commit is contained in:
mergify[bot] 2021-10-28 15:57:28 +00:00 committed by GitHub
commit a94b8902dd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
491 changed files with 5300 additions and 2226 deletions

View File

@ -149,8 +149,6 @@ if (ENABLE_FUZZING)
set (ENABLE_JEMALLOC 0)
set (ENABLE_CHECK_HEAVY_BUILDS 1)
set (GLIBC_COMPATIBILITY OFF)
set (ENABLE_PROTOBUF ON)
set (USE_INTERNAL_PROTOBUF_LIBRARY ON)
endif()
# Global libraries

View File

@ -177,8 +177,6 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_)
}
#if !defined(ARCADIA_BUILD) /// Arcadia's variant of CCTZ already has the same implementation.
/// Prefer to load timezones from blobs linked to the binary.
/// The blobs are provided by "tzdata" library.
/// This allows to avoid dependency on system tzdata.
@ -234,5 +232,3 @@ namespace cctz_extension
ZoneInfoSourceFactory zone_info_source_factory = custom_factory;
}
#endif

View File

@ -103,7 +103,6 @@ String LineReader::readLine(const String & first_prompt, const String & second_p
continue;
}
#if !defined(ARCADIA_BUILD) /// C++20
const char * has_extender = nullptr;
for (const auto * extender : extenders)
{
@ -133,7 +132,6 @@ String LineReader::readLine(const String & first_prompt, const String & second_p
if (input.empty())
continue;
}
#endif
line += (line.empty() ? "" : "\n") + input;

View File

@ -83,10 +83,6 @@
# define BOOST_USE_UCONTEXT 1
#endif
#if defined(ARCADIA_BUILD) && defined(BOOST_USE_UCONTEXT)
# undef BOOST_USE_UCONTEXT
#endif
/// TODO: Strange enough, there is no way to detect UB sanitizer.
/// Explicitly allow undefined behaviour for certain functions. Use it as a function attribute.

View File

@ -49,12 +49,3 @@ namespace
#define LOG_WARNING(logger, ...) LOG_IMPL(logger, DB::LogsLevel::warning, Poco::Message::PRIO_WARNING, __VA_ARGS__)
#define LOG_ERROR(logger, ...) LOG_IMPL(logger, DB::LogsLevel::error, Poco::Message::PRIO_ERROR, __VA_ARGS__)
#define LOG_FATAL(logger, ...) LOG_IMPL(logger, DB::LogsLevel::error, Poco::Message::PRIO_FATAL, __VA_ARGS__)
/// Compatibility for external projects.
#if defined(ARCADIA_BUILD)
using Poco::Logger;
using Poco::Message;
using DB::LogsLevel;
using DB::CurrentThread;
#endif

View File

@ -3,41 +3,24 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wold-style-cast"
#if !defined(ARCADIA_BUILD)
# include <miniselect/floyd_rivest_select.h> // Y_IGNORE
#else
# include <algorithm>
#endif
#include <miniselect/floyd_rivest_select.h>
template <class RandomIt>
void nth_element(RandomIt first, RandomIt nth, RandomIt last)
{
#if !defined(ARCADIA_BUILD)
::miniselect::floyd_rivest_select(first, nth, last);
#else
::std::nth_element(first, nth, last);
#endif
}
template <class RandomIt>
void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
{
#if !defined(ARCADIA_BUILD)
::miniselect::floyd_rivest_partial_sort(first, middle, last);
#else
::std::partial_sort(first, middle, last);
#endif
}
template <class RandomIt, class Compare>
void partial_sort(RandomIt first, RandomIt middle, RandomIt last, Compare compare)
{
#if !defined(ARCADIA_BUILD)
::miniselect::floyd_rivest_partial_sort(first, middle, last, compare);
#else
::std::partial_sort(first, middle, last, compare);
#endif
}
#pragma GCC diagnostic pop
}

View File

@ -63,9 +63,7 @@
#include <Common/Elf.h>
#include <filesystem>
#if !defined(ARCADIA_BUILD)
# include <Common/config_version.h>
#endif
#include <Common/config_version.h>
#if defined(OS_DARWIN)
# pragma GCC diagnostic ignored "-Wunused-macros"

View File

@ -15,14 +15,12 @@
#include <Core/ServerUUID.h>
#include <Common/hex.h>
#if !defined(ARCADIA_BUILD)
# include "Common/config_version.h"
# include <Common/config.h>
#endif
#include "Common/config_version.h"
#include <Common/config.h>
#if USE_SENTRY
# include <sentry.h> // Y_IGNORE
# include <sentry.h>
# include <stdio.h>
# include <filesystem>

View File

@ -2,7 +2,7 @@
#include <errmsg.h>
#include <mysql.h>
#else
#include <mysql/errmsg.h> //Y_IGNORE
#include <mysql/errmsg.h>
#include <mysql/mysql.h>
#endif

View File

@ -179,7 +179,7 @@ else ()
set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mbmi")
endif ()
if (HAVE_AVX512)
set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx512f -mavx512bw")
set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx512f -mavx512bw -mprefer-vector-width=256")
endif ()
endif ()
endif ()

2
contrib/aws vendored

@ -1 +1 @@
Subproject commit 06aa8759d17f2032ffd5efa83969270ca9ac727b
Subproject commit 00b03604543367d7e310cb0993973fdcb723ea79

View File

@ -17,6 +17,16 @@ endif ()
add_subdirectory("${protobuf_SOURCE_DIR}/cmake" "${protobuf_BINARY_DIR}")
if (ENABLE_FUZZING)
# `protoc` will be built with sanitizer and it could fail during ClickHouse build
# It easily reproduces in oss-fuzz building pipeline
# To avoid this we can try to build `protoc` without any sanitizer with option `-fno-sanitize=all`, but
# it this case we will face with linker errors, because libcxx still will be built with sanitizer
# So, we can simply suppress all of these failures with a combination this flag and an environment variable
# export MSAN_OPTIONS=exit_code=0
target_compile_options(protoc PRIVATE "-fsanitize-recover=all")
endif()
# We don't want to stop compilation on warnings in protobuf's headers.
# The following line overrides the value assigned by the command target_include_directories() in libprotobuf.cmake
set_property(TARGET libprotobuf PROPERTY INTERFACE_SYSTEM_INCLUDE_DIRECTORIES "${protobuf_SOURCE_DIR}/src")

View File

@ -12,19 +12,19 @@ printenv
rm -f CMakeCache.txt
read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
# Hope, that the most part of files will be in cache, so we just link new executables
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_C_COMPILER="$CC" -DCMAKE_CXX_COMPILER="$CXX" -DENABLE_CLICKHOUSE_ODBC_BRIDGE=OFF \
-DENABLE_LIBRARIES=0 -DENABLE_SSL=1 -DUSE_INTERNAL_SSL_LIBRARY=1 -DUSE_UNWIND=ON -DENABLE_EMBEDDED_COMPILER=0 \
-DENABLE_EXAMPLES=0 -DENABLE_UTILS=0 -DENABLE_THINLTO=0 "-DSANITIZE=$SANITIZER" \
-DENABLE_FUZZING=1 -DFUZZER='libfuzzer' -DENABLE_TCMALLOC=0 -DENABLE_JEMALLOC=0 \
-DENABLE_CHECK_HEAVY_BUILDS=1 -DGLIBC_COMPATIBILITY=OFF "${CMAKE_FLAGS[@]}" ..
# Please, add or change flags directly in cmake
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_C_COMPILER="$CC" -DCMAKE_CXX_COMPILER="$CXX" \
-DSANITIZE="$SANITIZER" -DENABLE_FUZZING=1 -DFUZZER='libfuzzer' -DENABLE_PROTOBUF=1 -DUSE_INTERNAL_PROTOBUF_LIBRARY=1 "${CMAKE_FLAGS[@]}" ..
FUZZER_TARGETS=$(find ../src -name '*_fuzzer.cpp' -execdir basename {} .cpp ';' | tr '\n' ' ')
NUM_JOBS=$(($(nproc || grep -c ^processor /proc/cpuinfo)))
mkdir -p /output/fuzzers
for FUZZER_TARGET in $FUZZER_TARGETS
do
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
ninja $NINJA_FLAGS $FUZZER_TARGET
ninja $NINJA_FLAGS $FUZZER_TARGET -j $NUM_JOBS
# Find this binary in build directory and strip it
FUZZER_PATH=$(find ./src -name "$FUZZER_TARGET")
strip --strip-unneeded "$FUZZER_PATH"

View File

@ -10,7 +10,7 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
python3-pip \
pylint \
yamllint \
&& pip3 install codespell
&& pip3 install codespell PyGithub boto3 unidiff
COPY run.sh /
COPY process_style_check_result.py /

View File

@ -35,6 +35,8 @@ The [system.clusters](../../operations/system-tables/clusters.md) system table c
When creating a new replica of the database, this replica creates tables by itself. If the replica has been unavailable for a long time and has lagged behind the replication log — it checks its local metadata with the current metadata in ZooKeeper, moves the extra tables with data to a separate non-replicated database (so as not to accidentally delete anything superfluous), creates the missing tables, updates the table names if they have been renamed. The data is replicated at the `ReplicatedMergeTree` level, i.e. if the table is not replicated, the data will not be replicated (the database is responsible only for metadata).
[`ALTER TABLE ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md) queries are allowed but not replicated. The database engine will only add/fetch/remove the partition/part to the current replica. However, if the table itself uses a Replicated table engine, then the data will be replicated after using `ATTACH`.
## Usage Example {#usage-example}
Creating a cluster with three hosts:

View File

@ -164,8 +164,7 @@ This format is also available under the name `TSVRaw`.
## TabSeparatedWithNames {#tabseparatedwithnames}
Differs from the `TabSeparated` format in that the column names are written in the first row.
During parsing, the first row is completely ignored. You cant use column names to determine their position or to check their correctness.
(Support for parsing the header row may be added in the future.)
During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness.
This format is also available under the name `TSVWithNames`.

View File

@ -24,7 +24,7 @@ toc_title: Adopters
| <a href="https://arenadata.tech/" class="favicon">ArenaData</a> | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) |
| <a href="https://www.argedor.com/en/clickhouse/" class="favicon">Argedor</a> | ClickHouse support | — | — | — | [Official website](https://www.argedor.com/en/clickhouse/) |
| <a href="https://avito.ru/" class="favicon">Avito</a> | Classifieds | Monitoring | — | — | [Meetup, April 2020](https://www.youtube.com/watch?v=n1tm4j4W8ZQ) |
| <a href="https://badoo.com" class="favicon">Badoo</a> | Dating | Timeseries | — | | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/forecast.pdf) |
| <a href="https://badoo.com" class="favicon">Badoo</a> | Dating | Timeseries | — | 1.6 mln events/sec (2018) | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/forecast.pdf) |
| <a href="https://beeline.ru/" class="favicon">Beeline</a> | Telecom | Data Platform | — | — | [Blog post, July 2021](https://habr.com/en/company/beeline/blog/567508/) |
| <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
| <a href="https://www.bigo.sg/" class="favicon">BIGO</a> | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) |

View File

@ -3830,3 +3830,86 @@ Default value: `0`.
**See Also**
- [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting
## async_insert {#async-insert}
Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts.
If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables.
The data is inserted either after the [async_insert_max_data_size](#async-insert-max-data-size) is exceeded or after [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) milliseconds since the first `INSERT` query. If the [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) is set to a non-zero value, the data is inserted after `async_insert_stale_timeout_ms` milliseconds since the last query.
If [wait_for_async_insert](#wait-for-async-insert) is enabled, every client will wait for the data to be processed and flushed to the table. Otherwise, the query would be processed almost instantly, even if the data is not inserted.
Possible values:
- 0 — Insertions are made synchronously, one after another.
- 1 — Multiple asynchronous insertions enabled.
Default value: `0`.
## async_insert_threads {#async-insert-threads}
The maximum number of threads for background data parsing and insertion.
Possible values:
- Positive integer.
- 0 — Asynchronous insertions are disabled.
Default value: `16`.
## wait_for_async_insert {#wait-for-async-insert}
Enables or disables waiting for processing of asynchronous insertion. If enabled, server will return `OK` only after the data is inserted. Otherwise, it will return `OK` even if the data wasn't inserted.
Possible values:
- 0 — Server returns `OK` even if the data is not yet inserted.
- 1 — Server returns `OK` only after the data is inserted.
Default value: `1`.
## wait_for_async_insert_timeout {#wait-for-async-insert-timeout}
The timeout in seconds for waiting for processing of asynchronous insertion.
Possible values:
- Positive integer.
- 0 — Disabled.
Default value: [lock_acquire_timeout](#lock_acquire_timeout).
## async_insert_max_data_size {#async-insert-max-data-size}
The maximum size of the unparsed data in bytes collected per query before being inserted.
Possible values:
- Positive integer.
- 0 — Asynchronous insertions are disabled.
Default value: `1000000`.
## async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms}
The maximum timeout in milliseconds since the first `INSERT` query before inserting collected data.
Possible values:
- Positive integer.
- 0 — Timeout disabled.
Default value: `200`.
## async_insert_stale_timeout_ms {#async-insert-stale-timeout-ms}
The maximum timeout in milliseconds since the last `INSERT` query before dumping collected data. If enabled, the settings prolongs the [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) with every `INSERT` query as long as [async_insert_max_data_size](#async-insert-max-data-size) is not exceeded.
Possible values:
- Positive integer.
- 0 — Timeout disabled.
Default value: `0`.

View File

@ -8,43 +8,48 @@ Example:
``` sql
SELECT *
FROM system.replicas
WHERE table = 'visits'
WHERE table = 'test_table'
FORMAT Vertical
```
``` text
Query id: dc6dcbcb-dc28-4df9-ae27-4354f5b3b13e
Row 1:
──────
database: merge
table: visits
engine: ReplicatedCollapsingMergeTree
is_leader: 1
can_become_leader: 1
is_readonly: 0
is_session_expired: 0
future_parts: 1
parts_to_check: 0
zookeeper_path: /clickhouse/tables/01-06/visits
replica_name: example01-06-1.yandex.ru
replica_path: /clickhouse/tables/01-06/visits/replicas/example01-06-1.yandex.ru
columns_version: 9
queue_size: 1
inserts_in_queue: 0
merges_in_queue: 1
part_mutations_in_queue: 0
queue_oldest_time: 2020-02-20 08:34:30
inserts_oldest_time: 1970-01-01 00:00:00
merges_oldest_time: 2020-02-20 08:34:30
part_mutations_oldest_time: 1970-01-01 00:00:00
oldest_part_to_get:
oldest_part_to_merge_to: 20200220_20284_20840_7
oldest_part_to_mutate_to:
log_max_index: 596273
log_pointer: 596274
last_queue_update: 2020-02-20 08:34:32
absolute_delay: 0
total_replicas: 2
active_replicas: 2
───────
database: db
table: test_table
engine: ReplicatedMergeTree
is_leader: 1
can_become_leader: 1
is_readonly: 0
is_session_expired: 0
future_parts: 0
parts_to_check: 0
zookeeper_path: /test/test_table
replica_name: r1
replica_path: /test/test_table/replicas/r1
columns_version: -1
queue_size: 27
inserts_in_queue: 27
merges_in_queue: 0
part_mutations_in_queue: 0
queue_oldest_time: 2021-10-12 14:48:48
inserts_oldest_time: 2021-10-12 14:48:48
merges_oldest_time: 1970-01-01 03:00:00
part_mutations_oldest_time: 1970-01-01 03:00:00
oldest_part_to_get: 1_17_17_0
oldest_part_to_merge_to:
oldest_part_to_mutate_to:
log_max_index: 206
log_pointer: 207
last_queue_update: 2021-10-12 14:50:08
absolute_delay: 99
total_replicas: 5
active_replicas: 5
last_queue_update_exception:
zookeeper_exception:
replica_is_active: {'r1':1,'r2':1}
```
Columns:
@ -82,6 +87,8 @@ The next 4 columns have a non-zero value only where there is an active session w
- `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has.
- `total_replicas` (`UInt8`) - The total number of known replicas of this table.
- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ZooKeeper (i.e., the number of functioning replicas).
- `last_queue_update_exception` (`String`) - When the queue contains broken entries. Especially important when ClickHouse breaks backward compatibility between versions and log entries written by newer versions aren't parseable by old versions.
- `zookeeper_exception` (`String`) - The last exception message, got if the error happened when fetching the info from ZooKeeper.
- `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — Map between replica name and is replica active.
If you request all the columns, the table may work a bit slowly, since several reads from ZooKeeper are made for each row.

View File

@ -810,6 +810,150 @@ Result:
└─────┘
```
## normalizeUTF8NFC {#normalizeutf8nfc}
Converts a string to [NFC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string contains a set of bytes that make up a UTF-8 encoded text.
**Syntax**
``` sql
normalizeUTF8NFC(words)
```
**Arguments**
- `words` — Input string that contains UTF-8 encoded text. [String](../../sql-reference/data-types/string.md).
**Returned value**
- String transformed to NFC normalization form.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
Query:
``` sql
SELECT length('â'), normalizeUTF8NFC('â') AS nfc, length(nfc) AS nfc_len;
```
Result:
``` text
┌─length('â')─┬─nfc─┬─nfc_len─┐
│ 2 │ â │ 2 │
└─────────────┴─────┴─────────┘
```
## normalizeUTF8NFD {#normalizeutf8nfd}
Converts a string to [NFD normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string contains a set of bytes that make up a UTF-8 encoded text.
**Syntax**
``` sql
normalizeUTF8NFD(words)
```
**Arguments**
- `words` — Input string that contains UTF-8 encoded text. [String](../../sql-reference/data-types/string.md).
**Returned value**
- String transformed to NFD normalization form.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
Query:
``` sql
SELECT length('â'), normalizeUTF8NFD('â') AS nfd, length(nfd) AS nfd_len;
```
Result:
``` text
┌─length('â')─┬─nfd─┬─nfd_len─┐
│ 2 │ â │ 3 │
└─────────────┴─────┴─────────┘
```
## normalizeUTF8NFKC {#normalizeutf8nfkc}
Converts a string to [NFKC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string contains a set of bytes that make up a UTF-8 encoded text.
**Syntax**
``` sql
normalizeUTF8NFKC(words)
```
**Arguments**
- `words` — Input string that contains UTF-8 encoded text. [String](../../sql-reference/data-types/string.md).
**Returned value**
- String transformed to NFKC normalization form.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
Query:
``` sql
SELECT length('â'), normalizeUTF8NFKC('â') AS nfkc, length(nfkc) AS nfkc_len;
```
Result:
``` text
┌─length('â')─┬─nfkc─┬─nfkc_len─┐
│ 2 │ â │ 2 │
└─────────────┴──────┴──────────┘
```
## normalizeUTF8NFKD {#normalizeutf8nfkd}
Converts a string to [NFKD normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string contains a set of bytes that make up a UTF-8 encoded text.
**Syntax**
``` sql
normalizeUTF8NFKD(words)
```
**Arguments**
- `words` — Input string that contains UTF-8 encoded text. [String](../../sql-reference/data-types/string.md).
**Returned value**
- String transformed to NFKD normalization form.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
Query:
``` sql
SELECT length('â'), normalizeUTF8NFKD('â') AS nfkd, length(nfkd) AS nfkd_len;
```
Result:
``` text
┌─length('â')─┬─nfkd─┬─nfkd_len─┐
│ 2 │ â │ 3 │
└─────────────┴──────┴──────────┘
```
## encodeXMLComponent {#encode-xml-component}
Escapes characters to place string into XML text node or attribute.

View File

@ -350,6 +350,45 @@ Result:
│ ['eleven','11'] │
│ ['twelve','6.0'] │
└──────────────────┘
```
```
## mapContainsKeyLike {#mapContainsKeyLike}
**Syntax**
```sql
mapContainsKeyLike(map, pattern)
```
**Parameters**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
- `pattern` - String pattern to match.
**Returned value**
- `1` if `map` contains `key` like specified pattern, `0` if not.
**Example**
Query:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'});
SELECT mapContainsKeyLike(a, 'a%') FROM test;
```
Result:
```text
┌─mapContainsKeyLike(a, 'a%')─┐
│ 1 │
│ 0 │
└─────────────────────────────┘
```
[Original article](https://clickhouse.com/docs/en/sql-reference/functions/tuple-map-functions/) <!--hide-->

View File

@ -81,6 +81,53 @@ For tuple subtraction: [tupleMinus](../../sql-reference/functions/tuple-function
`a GLOBAL NOT IN ...` The `globalNotIn(a, b)` function.
`a = ANY (subquery)` The `in(a, subquery)` function.
`a != ANY (subquery)` The same as `a NOT IN (SELECT singleValueOrNull(*) FROM subquery)`.
`a = ALL (subquery)` The same as `a IN (SELECT singleValueOrNull(*) FROM subquery)`.
`a != ALL (subquery)` The `notIn(a, subquery)` function.
**Examples**
Query with ALL:
``` sql
SELECT number AS a FROM numbers(10) WHERE a > ALL (SELECT number FROM numbers(3, 3));
```
Result:
``` text
┌─a─┐
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└───┘
```
Query with ANY:
``` sql
SELECT number AS a FROM numbers(10) WHERE a > ANY (SELECT number FROM numbers(3, 3));
```
Result:
``` text
┌─a─┐
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└───┘
```
## Operators for Working with Dates and Times {#operators-datetime}
### EXTRACT {#operator-extract}

View File

@ -119,3 +119,14 @@ Performance will not decrease if:
- Data is added in real time.
- You upload data that is usually sorted by time.
It's also possible to asynchronously insert data in small but frequent inserts. The data from such insertions is combined into batches and then safely inserted into a table. To enable the asynchronous mode, switch on the [async_insert](../../operations/settings/settings.md#async-insert) setting. Note that asynchronous insertions are supported only over HTTP protocol, and deduplication is not supported for them.
**See Also**
- [async_insert](../../operations/settings/settings.md#async-insert)
- [async_insert_threads](../../operations/settings/settings.md#async-insert-threads)
- [wait_for_async_insert](../../operations/settings/settings.md#wait-for-async-insert)
- [wait_for_async_insert_timeout](../../operations/settings/settings.md#wait-for-async-insert-timeout)
- [async_insert_max_data_size](../../operations/settings/settings.md#async-insert-max-data-size)
- [async_insert_busy_timeout_ms](../../operations/settings/settings.md#async-insert-busy-timeout-ms)
- [async_insert_stale_timeout_ms](../../operations/settings/settings.md#async-insert-stale-timeout-ms)

View File

@ -0,0 +1,69 @@
---
toc_title: EXCEPT
---
# EXCEPT Clause {#except-clause}
The `EXCEPT` clause returns only those rows that result from the first query without the second. The queries must match the number of columns, order, and type. The result of `EXCEPT` can contain duplicate rows.
Multiple `EXCEPT` statements are executed left to right if parenthesis are not specified. The `EXCEPT` operator has the same priority as the `UNION` clause and lower priority than the `INTERSECT` clause.
``` sql
SELECT column1 [, column2 ]
FROM table1
[WHERE condition]
EXCEPT
SELECT column1 [, column2 ]
FROM table2
[WHERE condition]
```
The condition could be any expression based on your requirements.
**Examples**
Query:
``` sql
SELECT number FROM numbers(1,10) EXCEPT SELECT number FROM numbers(3,6);
```
Result:
``` text
┌─number─┐
│ 1 │
│ 2 │
│ 9 │
│ 10 │
└────────┘
```
Query:
``` sql
CREATE TABLE t1(one String, two String, three String) ENGINE=Memory();
CREATE TABLE t2(four String, five String, six String) ENGINE=Memory();
INSERT INTO t1 VALUES ('q', 'm', 'b'), ('s', 'd', 'f'), ('l', 'p', 'o'), ('s', 'd', 'f'), ('s', 'd', 'f'), ('k', 't', 'd'), ('l', 'p', 'o');
INSERT INTO t2 VALUES ('q', 'm', 'b'), ('b', 'd', 'k'), ('s', 'y', 't'), ('s', 'd', 'f'), ('m', 'f', 'o'), ('k', 'k', 'd');
SELECT * FROM t1 EXCEPT SELECT * FROM t2;
```
Result:
``` text
┌─one─┬─two─┬─three─┐
│ l │ p │ o │
│ k │ t │ d │
│ l │ p │ o │
└─────┴─────┴───────┘
```
**See Also**
- [UNION](union.md#union-clause)
- [INTERSECT](intersect.md#intersect-clause)

View File

@ -49,6 +49,8 @@ Specifics of each optional clause are covered in separate sections, which are li
- [LIMIT clause](../../../sql-reference/statements/select/limit.md)
- [OFFSET clause](../../../sql-reference/statements/select/offset.md)
- [UNION clause](../../../sql-reference/statements/select/union.md)
- [INTERSECT clause](../../../sql-reference/statements/select/intersect.md)
- [EXCEPT clause](../../../sql-reference/statements/select/except.md)
- [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md)
- [FORMAT clause](../../../sql-reference/statements/select/format.md)

View File

@ -0,0 +1,73 @@
---
toc_title: INTERSECT
---
# INTERSECT Clause {#intersect-clause}
The `INTERSECT` clause returns only those rows that result from both the first and the second queries. The queries must match the number of columns, order, and type. The result of `INTERSECT` can contain duplicate rows.
Multiple `INTERSECT` statements are executes left to right if parenthesis are not specified. The `INTERSECT` operator has a higher priority than the `UNION` and `EXCEPT` clause.
``` sql
SELECT column1 [, column2 ]
FROM table1
[WHERE condition]
INTERSECT
SELECT column1 [, column2 ]
FROM table2
[WHERE condition]
```
The condition could be any expression based on your requirements.
**Examples**
Query:
``` sql
SELECT number FROM numbers(1,10) INTERSECT SELECT number FROM numbers(3,6);
```
Result:
``` text
┌─number─┐
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
└────────┘
```
Query:
``` sql
CREATE TABLE t1(one String, two String, three String) ENGINE=Memory();
CREATE TABLE t2(four String, five String, six String) ENGINE=Memory();
INSERT INTO t1 VALUES ('q', 'm', 'b'), ('s', 'd', 'f'), ('l', 'p', 'o'), ('s', 'd', 'f'), ('s', 'd', 'f'), ('k', 't', 'd'), ('l', 'p', 'o');
INSERT INTO t2 VALUES ('q', 'm', 'b'), ('b', 'd', 'k'), ('s', 'y', 't'), ('s', 'd', 'f'), ('m', 'f', 'o'), ('k', 'k', 'd');
SELECT * FROM t1 INTERSECT SELECT * FROM t2;
```
Result:
``` text
┌─one─┬─two─┬─three─┐
│ q │ m │ b │
│ s │ d │ f │
│ s │ d │ f │
│ s │ d │ f │
└─────┴─────┴───────┘
```
**See Also**
- [UNION](union.md#union-clause)
- [EXCEPT](except.md#except-clause)

View File

@ -3640,3 +3640,87 @@ SELECT * FROM positional_arguments ORDER BY 2,3;
**См. также**
- настройка [optimize_move_to_prewhere](#optimize_move_to_prewhere)
## async_insert {#async-insert}
Включает или отключает асинхронные вставки. Работает только для вставок по протоколу HTTP. Обратите внимание, что при таких вставках дедупликация не производится.
Если включено, данные собираются в пачки перед вставкой в таблицу. Это позволяет производить мелкие и частые вставки в ClickHouse (до 15000 запросов в секунду) без промежуточных таблиц.
Вставка данных происходит либо как только объем вставляемых данных превышает [async_insert_max_data_size](#async-insert-max-data-size), либо через [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) миллисекунд после первого запроса `INSERT`. Если в [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) задано ненулевое значение, то данные вставляются через `async_insert_stale_timeout_ms` миллисекунд после последнего запроса.
Если включен параметр [wait_for_async_insert](#wait-for-async-insert), каждый клиент ждет, пока данные будут сброшены в таблицу. Иначе запрос будет обработан почти моментально, даже если данные еще не вставлены.
Возможные значения:
- 0 — вставки производятся синхронно, один запрос за другим.
- 1 — включены множественные асинхронные вставки.
Значение по умолчанию: `0`.
## async_insert_threads {#async-insert-threads}
Максимальное число потоков для фоновой обработки и вставки данных.
Возможные значения:
- Положительное целое число.
- 0 — асинхронные вставки отключены.
Значение по умолчанию: `16`.
## wait_for_async_insert {#wait-for-async-insert}
Включает или отключает ожидание обработки асинхронных вставок. Если включено, клиент выведет `OK` только после того, как данные вставлены. Иначе будет выведен `OK`, даже если вставка не произошла.
Возможные значения:
- 0 — сервер возвращает `OK` даже если вставка данных еще не завершена.
- 1 — сервер возвращает `OK` только после завершения вставки данных.
Значение по умолчанию: `1`.
## wait_for_async_insert_timeout {#wait-for-async-insert-timeout}
Время ожидания в секундах, выделяемое для обработки асинхронной вставки.
Возможные значения:
- Положительное целое число.
- 0 — ожидание отключено.
Значение по умолчанию: [lock_acquire_timeout](#lock_acquire_timeout).
## async_insert_max_data_size {#async-insert-max-data-size}
Максимальный размер необработанных данных (в байтах), собранных за запрос, перед их вставкой.
Возможные значения:
- Положительное целое число.
- 0 — асинхронные вставки отключены.
Значение по умолчанию: `1000000`.
## async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms}
Максимальное время ожидания в миллисекундах после первого запроса `INSERT` и перед вставкой данных.
Возможные значения:
- Положительное целое число.
- 0 — ожидание отключено.
Значение по умолчанию: `200`.
## async_insert_stale_timeout_ms {#async-insert-stale-timeout-ms}
Максимальное время ожидания в миллисекундах после последнего запроса `INSERT` и перед вставкой данных. Если установлено ненулевое значение, [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) будет продлеваться с каждым запросом `INSERT`, пока не будет превышен [async_insert_max_data_size](#async-insert-max-data-size).
Возможные значения:
- Положительное целое число.
- 0 — ожидание отключено.
Значение по умолчанию: `0`.

View File

@ -8,43 +8,48 @@
``` sql
SELECT *
FROM system.replicas
WHERE table = 'visits'
WHERE table = 'test_table'
FORMAT Vertical
```
``` text
Query id: dc6dcbcb-dc28-4df9-ae27-4354f5b3b13e
Row 1:
──────
database: merge
table: visits
engine: ReplicatedCollapsingMergeTree
is_leader: 1
can_become_leader: 1
is_readonly: 0
is_session_expired: 0
future_parts: 1
parts_to_check: 0
zookeeper_path: /clickhouse/tables/01-06/visits
replica_name: example01-06-1.yandex.ru
replica_path: /clickhouse/tables/01-06/visits/replicas/example01-06-1.yandex.ru
columns_version: 9
queue_size: 1
inserts_in_queue: 0
merges_in_queue: 1
part_mutations_in_queue: 0
queue_oldest_time: 2020-02-20 08:34:30
inserts_oldest_time: 0000-00-00 00:00:00
merges_oldest_time: 2020-02-20 08:34:30
part_mutations_oldest_time: 0000-00-00 00:00:00
oldest_part_to_get:
oldest_part_to_merge_to: 20200220_20284_20840_7
oldest_part_to_mutate_to:
log_max_index: 596273
log_pointer: 596274
last_queue_update: 2020-02-20 08:34:32
absolute_delay: 0
total_replicas: 2
active_replicas: 2
───────
database: db
table: test_table
engine: ReplicatedMergeTree
is_leader: 1
can_become_leader: 1
is_readonly: 0
is_session_expired: 0
future_parts: 0
parts_to_check: 0
zookeeper_path: /test/test_table
replica_name: r1
replica_path: /test/test_table/replicas/r1
columns_version: -1
queue_size: 27
inserts_in_queue: 27
merges_in_queue: 0
part_mutations_in_queue: 0
queue_oldest_time: 2021-10-12 14:48:48
inserts_oldest_time: 2021-10-12 14:48:48
merges_oldest_time: 1970-01-01 03:00:00
part_mutations_oldest_time: 1970-01-01 03:00:00
oldest_part_to_get: 1_17_17_0
oldest_part_to_merge_to:
oldest_part_to_mutate_to:
log_max_index: 206
log_pointer: 207
last_queue_update: 2021-10-12 14:50:08
absolute_delay: 99
total_replicas: 5
active_replicas: 5
last_queue_update_exception:
zookeeper_exception:
replica_is_active: {'r1':1,'r2':1}
```
Столбцы:
@ -82,6 +87,8 @@ active_replicas: 2
- `absolute_delay` (`UInt64`) - задержка (в секундах) для текущей реплики.
- `total_replicas` (`UInt8`) - общее число известных реплик этой таблицы.
- `active_replicas` (`UInt8`) - число реплик этой таблицы, имеющих сессию в ZK; то есть, число работающих реплик.
- `last_queue_update_exception` (`String`) - если в очереди есть битые записи. Особенно важно, когда в ClickHouse нарушается обратная совместимость между версиями, а записи журнала, сделанные более новыми версиями, не могут быть проанализированы старыми версиями.
- `zookeeper_exception` (`String`) - последнее сообщение об исключении. Появляется, если ошибка произошла при получении информации из ZooKeeper.
- `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — соответствие между именем реплики и признаком активности реплики.
Если запрашивать все столбцы, то таблица может работать слегка медленно, так как на каждую строчку делается несколько чтений из ZK.

View File

@ -781,6 +781,150 @@ SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1
└─────┘
```
## normalizeUTF8NFC {#normalizeutf8nfc}
Преобразует строку в нормализованную форму [NFC](https://ru.wikipedia.org/wiki/Юникод#Алгоритмы_нормализации), предполагая, что строка содержит набор байтов, составляющих текст в кодировке UTF-8.
**Синтаксис**
``` sql
normalizeUTF8NFC(words)
```
**Аргументы**
- `words` — входная строка, которая содержит набор байтов, составляющих текст в кодировке UTF-8. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Строка, преобразованная в нормализированную форму NFC.
Тип: [String](../../sql-reference/data-types/string.md).
**Пример**
Запрос:
``` sql
SELECT length('â'), normalizeUTF8NFC('â') AS nfc, length(nfc) AS nfc_len;
```
Результат:
``` text
┌─length('â')─┬─nfc─┬─nfc_len─┐
│ 2 │ â │ 2 │
└─────────────┴─────┴─────────┘
```
## normalizeUTF8NFD {#normalizeutf8nfd}
Преобразует строку в нормализованную форму [NFD](https://ru.wikipedia.org/wiki/Юникод#Алгоритмы_нормализации), предполагая, что строка содержит набор байтов, составляющих текст в кодировке UTF-8.
**Синтаксис**
``` sql
normalizeUTF8NFD(words)
```
**Аргументы**
- `words` — входная строка, которая содержит набор байтов, составляющих текст в кодировке UTF-8. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Строка, преобразованная в нормализированную форму NFD.
Тип: [String](../../sql-reference/data-types/string.md).
**Пример**
Запрос:
``` sql
SELECT length('â'), normalizeUTF8NFD('â') AS nfd, length(nfd) AS nfd_len;
```
Результат:
``` text
┌─length('â')─┬─nfd─┬─nfd_len─┐
│ 2 │ â │ 3 │
└─────────────┴─────┴─────────┘
```
## normalizeUTF8NFKC {#normalizeutf8nfkc}
Преобразует строку в нормализованную форму [NFKC](https://ru.wikipedia.org/wiki/Юникод#Алгоритмы_нормализации), предполагая, что строка содержит набор байтов, составляющих текст в кодировке UTF-8.
**Синтаксис**
``` sql
normalizeUTF8NFKC(words)
```
**Аргументы**
- `words` — входная строка, которая содержит набор байтов, составляющих текст в кодировке UTF-8. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Строка, преобразованная в нормализированную форму NFKC.
Тип: [String](../../sql-reference/data-types/string.md).
**Пример**
Запрос:
``` sql
SELECT length('â'), normalizeUTF8NFKC('â') AS nfkc, length(nfkc) AS nfkc_len;
```
Результат:
``` text
┌─length('â')─┬─nfkc─┬─nfkc_len─┐
│ 2 │ â │ 2 │
└─────────────┴──────┴──────────┘
```
## normalizeUTF8NFKD {#normalizeutf8nfkd}
Преобразует строку в нормализованную форму [NFKD](https://ru.wikipedia.org/wiki/Юникод#Алгоритмы_нормализации), предполагая, что строка содержит набор байтов, составляющих текст в кодировке UTF-8.
**Синтаксис**
``` sql
normalizeUTF8NFKD(words)
```
**Аргументы**
- `words` — входная строка, которая содержит набор байтов, составляющих текст в кодировке UTF-8. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Строка, преобразованная в нормализированную форму NFKD.
Тип: [String](../../sql-reference/data-types/string.md).
**Пример**
Запрос:
``` sql
SELECT length('â'), normalizeUTF8NFKD('â') AS nfkd, length(nfkd) AS nfkd_len;
```
Результат:
``` text
┌─length('â')─┬─nfkd─┬─nfkd_len─┐
│ 2 │ â │ 3 │
└─────────────┴──────┴──────────┘
```
## encodeXMLComponent {#encode-xml-component}
Экранирует символы для размещения строки в текстовом узле или атрибуте XML.

View File

@ -82,6 +82,53 @@ toc_title: "Операторы"
`a GLOBAL NOT IN ...` - функция `globalNotIn(a, b)`
`a = ANY (subquery)` функция `in(a, subquery)`.
`a != ANY (subquery)` равнозначно `a NOT IN (SELECT singleValueOrNull(*) FROM subquery)`.
`a = ALL (subquery)` равнозначно `a IN (SELECT singleValueOrNull(*) FROM subquery)`.
`a != ALL (subquery)` функция `notIn(a, subquery)`.
**Примеры**
Запрос с ALL:
``` sql
SELECT number AS a FROM numbers(10) WHERE a > ALL (SELECT number FROM numbers(3, 3));
```
Результат:
``` text
┌─a─┐
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└───┘
```
Запрос с ANY:
``` sql
SELECT number AS a FROM numbers(10) WHERE a > ANY (SELECT number FROM numbers(3, 3));
```
Результат:
``` text
┌─a─┐
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└───┘
```
## Оператор для работы с датами и временем {#operators-datetime}
### EXTRACT {#extract}

View File

@ -121,3 +121,14 @@ INSERT INTO [db.]table [(c1, c2, c3)] SELECT ...
- Данные поступают в режиме реального времени.
- Вы загружаете данные, которые как правило отсортированы по времени.
Также возможно вставлять данные асинхронно во множественных маленьких вставках. Данные от таких вставок сначала собираются в пачки, а потом вставляются в таблицу. Чтобы включить асинхронный режим, используйте настройку [async_insert](../../operations/settings/settings.md#async-insert). Обратите внимание, что асинхронные вставки поддерживаются только через протокол HTTP, а дедупликация при этом не производится.
**См. также**
- [async_insert](../../operations/settings/settings.md#async-insert)
- [async_insert_threads](../../operations/settings/settings.md#async-insert-threads)
- [wait_for_async_insert](../../operations/settings/settings.md#wait-for-async-insert)
- [wait_for_async_insert_timeout](../../operations/settings/settings.md#wait-for-async-insert-timeout)
- [async_insert_max_data_size](../../operations/settings/settings.md#async-insert-max-data-size)
- [async_insert_busy_timeout_ms](../../operations/settings/settings.md#async-insert-busy-timeout-ms)
- [async_insert_stale_timeout_ms](../../operations/settings/settings.md#async-insert-stale-timeout-ms)

View File

@ -0,0 +1,69 @@
---
toc_title: EXCEPT
---
# Секция EXCEPT {#except-clause}
`EXCEPT` возвращает только те строки, которые являются результатом первого запроса без результатов второго. В запросах количество, порядок следования и типы столбцов должны совпадать. Результат `EXCEPT` может содержать повторяющиеся строки.
Если используется несколько `EXCEPT`, и в выражении не указаны скобки, `EXCEPT` выполняется по порядку слева направо. `EXCEPT` имеет такой же приоритет выполнения, как `UNION`, и приоритет ниже, чем у `INTERSECT`.
``` sql
SELECT column1 [, column2 ]
FROM table1
[WHERE condition]
EXCEPT
SELECT column1 [, column2 ]
FROM table2
[WHERE condition]
```
Условие в секции `WHERE` может быть любым в зависимости от ваших требований.
**Примеры**
Запрос:
``` sql
SELECT number FROM numbers(1,10) EXCEPT SELECT number FROM numbers(3,6);
```
Результат:
``` text
┌─number─┐
│ 1 │
│ 2 │
│ 9 │
│ 10 │
└────────┘
```
Запрос:
``` sql
CREATE TABLE t1(one String, two String, three String) ENGINE=Memory();
CREATE TABLE t2(four String, five String, six String) ENGINE=Memory();
INSERT INTO t1 VALUES ('q', 'm', 'b'), ('s', 'd', 'f'), ('l', 'p', 'o'), ('s', 'd', 'f'), ('s', 'd', 'f'), ('k', 't', 'd'), ('l', 'p', 'o');
INSERT INTO t2 VALUES ('q', 'm', 'b'), ('b', 'd', 'k'), ('s', 'y', 't'), ('s', 'd', 'f'), ('m', 'f', 'o'), ('k', 'k', 'd');
SELECT * FROM t1 EXCEPT SELECT * FROM t2;
```
Результат:
``` text
┌─one─┬─two─┬─three─┐
│ l │ p │ o │
│ k │ t │ d │
│ l │ p │ o │
└─────┴─────┴───────┘
```
**См. также**
- [UNION](union.md#union-clause)
- [INTERSECT](intersect.md#intersect-clause)

View File

@ -45,8 +45,10 @@ SELECT [DISTINCT [ON (column1, column2, ...)]] expr_list
- [Секция LIMIT BY](limit-by.md)
- [Секция HAVING](having.md)
- [Секция LIMIT](limit.md)
[Секция OFFSET](offset.md)
- [Секция OFFSET](offset.md)
- [Секция UNION ALL](union.md)
- [Секция INTERSECT](intersect.md)
- [Секция EXCEPT](except.md)
- [Секция INTO OUTFILE](into-outfile.md)
- [Секция FORMAT](format.md)

View File

@ -0,0 +1,73 @@
---
toc_title: INTERSECT
---
# Секция INTERSECT {#intersect-clause}
`INTERSECT` возвращает строки, которые есть только в результатах первого и второго запросов. В запросах должны совпадать количество столбцов, их порядок и тип. Результат `INTERSECT` может содержать повторяющиеся строки.
Если используется несколько `INTERSECT` и скобки не указаны, пересечение выполняется слева направо. У `INTERSECT` более высокий приоритет выполнения, чем у `UNION` и `EXCEPT`.
``` sql
SELECT column1 [, column2 ]
FROM table1
[WHERE condition]
INTERSECT
SELECT column1 [, column2 ]
FROM table2
[WHERE condition]
```
Условие может быть любым в зависимости от ваших требований.
**Примеры**
Запрос:
``` sql
SELECT number FROM numbers(1,10) INTERSECT SELECT number FROM numbers(3,6);
```
Результат:
``` text
┌─number─┐
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
└────────┘
```
Запрос:
``` sql
CREATE TABLE t1(one String, two String, three String) ENGINE=Memory();
CREATE TABLE t2(four String, five String, six String) ENGINE=Memory();
INSERT INTO t1 VALUES ('q', 'm', 'b'), ('s', 'd', 'f'), ('l', 'p', 'o'), ('s', 'd', 'f'), ('s', 'd', 'f'), ('k', 't', 'd'), ('l', 'p', 'o');
INSERT INTO t2 VALUES ('q', 'm', 'b'), ('b', 'd', 'k'), ('s', 'y', 't'), ('s', 'd', 'f'), ('m', 'f', 'o'), ('k', 'k', 'd');
SELECT * FROM t1 INTERSECT SELECT * FROM t2;
```
Результат:
``` text
┌─one─┬─two─┬─three─┐
│ q │ m │ b │
│ s │ d │ f │
│ s │ d │ f │
│ s │ d │ f │
└─────┴─────┴───────┘
```
**См. также**
- [UNION](union.md#union-clause)
- [EXCEPT](except.md#except-clause)

View File

@ -1 +0,0 @@
../../en/development/continuous-integration.md

View File

@ -0,0 +1,155 @@
# 持续集成检查 {#continuous-integration-checks}
当你提交一个pull请求时, ClickHouse[持续集成(CI)系统](https://clickhouse.com/docs/en/development/tests/#test-automation)会对您的代码运行一些自动检查.
这在存储库维护者(来自ClickHouse团队的人)筛选了您的代码并将可测试标签添加到您的pull请求之后发生.
检查的结果被列在[GitHub检查文档](https://docs.github.com/en/github/collaborating-with-pull-requests/collaborating-on-repositories-with-code-quality-features/about-status-checks)中所述的GitHub pull请求页面.
如果检查失败,您可能被要求去修复它. 该界面介绍了您可能遇到的检查,以及如何修复它们.
如果检查失败看起来与您的更改无关, 那么它可能是一些暂时的故障或基础设施问题. 向pull请求推一个空的commit以重新启动CI检查:
```
git reset
git commit --allow-empty
git push
```
如果您不确定要做什么,可以向维护人员寻求帮助.
## 与Master合并 {#merge-with-master}
验证PR是否可以合并到master. 如果没有, 它将失败并显示消息'Cannot fetch mergecommit'的.请按[GitHub文档](https://docs.github.com/en/github/collaborating-with-pull-requests/addressing-merge-conflicts/resolving-a-merge-conflict-on-github)中描述的冲突解决, 或使用git将主分支合并到您的pull请求分支来修复这个检查.
## 文档检查 {#docs-check}
尝试构建ClickHouse文档网站. 如果您更改了文档中的某些内容, 它可能会失败. 最可能的原因是文档中的某些交叉链接是错误的. 转到检查报告并查找`ERROR`和`WARNING`消息.
### 报告详情 {#report-details}
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/eabcc293eb02214caa6826b7c15f101643f67a6b/docs_check.html)
- `docs_output.txt`包含构建日志信息. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/eabcc293eb02214caa6826b7c15f101643f67a6b/docs_check/docs_output.txt)
## 描述信息检查 {#description-check}
检查pull请求的描述是否符合[PULL_REQUEST_TEMPLATE.md](https://github.com/ClickHouse/ClickHouse/blob/master/.github/PULL_REQUEST_TEMPLATE.md)模板.
您必须为您的更改指定一个更改日志类别(例如Bug修复), 并且为[CHANGELOG.md](../whats-new/changelog/)编写一条用户可读的消息用来描述更改.
## 推送到DockerHub {#push-to-dockerhub}
生成用于构建和测试的docker映像, 然后将它们推送到DockerHub.
## 标记检查 {#marker-check}
该检查意味着CI系统已经开始处理PR.当它处于'待处理'状态时,意味着尚未开始所有检查. 启动所有检查后,状态更改为'成功'.
# 格式检查 {#style-check}
使用`utils/check-style/check-style`二进制文件执行一些简单的基于正则表达式的代码样式检查(注意, 它可以在本地运行).
如果失败, 按照[代码样式指南](./style.md)修复样式错误.
### 报告详情 {#report-details}
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check.html)
- `docs_output.txt`记录了查结果错误(无效表格等), 空白页表示没有错误. [成功结果案例](https://clickhouse-test-reports.s3.yandex.net/12550/659c78c7abb56141723af6a81bfae39335aa8cb2/style_check/output.txt)
### PVS 检查 {#pvs-check}
使用静态分析工具[PVS-studio](https://www.viva64.com/en/pvs-studio/)检查代码. 查看报告以查看确切的错误.如果可以则修复它们, 如果不行, 可以向ClickHouse的维护人员寻求帮忙.
### 报告详情 {#report-details}
- [状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/pvs_check.html)
- `test_run.txt.out.log`包含构建和分析日志文件.它只包含解析或未找到的错误.
- `HTML report`包含分析结果.有关说明请访问PVS的[官方网站](https://www.viva64.com/en/m/0036/#ID14E9A2B2CD)
## 快速测试 {#fast-test}
通常情况下这是PR运行的第一个检查.它构建ClickHouse以及大多数无状态运行测试, 其中省略了一些.如果失败,在修复之前不会开始进一步的检查. 查看报告以了解哪些测试失败, 然后按照[此处](./tests.md#functional-test-locally)描述的在本地重现失败.
### 报告详情 {#report-details}
[状态页示例](https://clickhouse-test-reports.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/fast_test.html)
#### 状态页文件 {#status-page-files}
- `runlog.out.log` 是包含所有其他日志的通用日志.
- `test_log.txt`
- `submodule_log.txt` 包含关于克隆和检查所需子模块的消息.
- `stderr.log`
- `stdout.log`
- `clickhouse-server.log`
- `clone_log.txt`
- `install_log.txt`
- `clickhouse-server.err.log`
- `build_log.txt`
- `cmake_log.txt` 包含关于C/C++和Linux标志检查的消息.
#### 状态页列信息 {#status-page-columns}
- 测试名称 -- 包含测试的名称(不带路径, 例如, 所有类型的测试将被剥离到该名称).
- 测试状态 -- 跳过、成功或失败之一.
- 测试时间, 秒. -- 这个测试是空的.
## 建构检查 {#build-check}
在各种配置中构建ClickHouse, 以便在后续步骤中使用. 您必须修复失败的构建.构建日志通常有足够的信息来修复错误, 但是您可能必须在本地重现故障. `cmake`选项可以在构建日志中通过grep `cmake`操作找到.使用这些选项并遵循[一般的构建过程](./build.md).
### 报告详情 {#report-details}
[状态页示例](https://clickhouse-builds.s3.yandex.net/12550/67d716b5cc3987801996c31a67b31bf141bc3486/clickhouse_build_check/report.html)
- **Compiler**: `gcc-9``clang-10` (或其他架构的`clang-10-xx`, 比如`clang-10-freebsd`).
- **Build type**: `Debug` or `RelWithDebInfo` (cmake).
- **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
- **Bundled**: `bundled` 构建使用来自 `contrib` 库, 而 `unbundled` 构建使用系统库.
- **Splitted**: `splitted` is a [split build](https://clickhouse.com/docs/en/development/build/#split-build)
- **Status**: `成功``失败`
- **Build log**: 链接到构建和文件复制日志, 当构建失败时很有用.
- **Build time**.
- **Artifacts**: 构建结果文件 (`XXX`是服务器版本, 比如`20.8.1.4344`).
- `clickhouse-client_XXX_all.deb`
-` clickhouse-common-static-dbg_XXX[+asan, +msan, +ubsan, +tsan]_amd64.deb`
- `clickhouse-common-staticXXX_amd64.deb`
- `clickhouse-server_XXX_all.deb`
- `clickhouse-test_XXX_all.deb`
- `clickhouse_XXX_amd64.buildinfo`
- `clickhouse_XXX_amd64.changes`
- `clickhouse`: Main built binary.
- `clickhouse-odbc-bridge`
- `unit_tests_dbms`: 带有 ClickHouse 单元测试的 GoogleTest 二进制文件.
- `shared_build.tgz`: 使用共享库构建.
- `performance.tgz`: 用于性能测试的特殊包.
## 特殊构建检查 {#special-buildcheck}
使用clang-tidy执行静态分析和代码样式检查. 该报告类似于构建检查. 修复在构建日志中发现的错误.
## 功能无状态测试 {#functional-stateless-tests}
为构建在不同配置中的ClickHouse二进制文件运行[无状态功能测试](./tests.md#functional-tests)——发布、调试、使用杀毒软件等.通过报告查看哪些测试失败,然后按照[此处](./tests.md#functional-test-locally)描述的在本地重现失败.注意, 您必须使用正确的构建配置来重现——在AddressSanitizer下测试可能失败,但在Debug中可以通过.从[CI构建检查页面](./build.md#you-dont-have-to-build-clickhouse)下载二进制文件, 或者在本地构建它.
## 功能有状态测试 {#functional-stateful-tests}
运行[有状态功能测试](./tests.md#functional-tests).以无状态功能测试相同的方式对待它们.不同之处在于它们需要从[Yandex.Metrica数据集](https://clickhouse.com/docs/en/getting-started/example-datasets/metrica/)的`hits`和`visits`表来运行.
## 集成测试 {#integration-tests}
运行[集成测试](./tests.md#integration-tests).
## Testflows 检查{#testflows-check}
使用Testflows测试系统去运行一些测试, 在[此处](https://github.com/ClickHouse/ClickHouse/tree/master/tests/testflows#running-tests-locally)查看如何在本地运行它们.
## 压力测试 {#stress-test}
从多个客户端并发运行无状态功能测试, 用以检测与并发相关的错误.如果失败:
```
* Fix all other test failures first;
* Look at the report to find the server logs and check them for possible causes
of error.
```
## 冒烟测试 {#split-build-smoke-test}
检查[拆分构建](./build.md#split-build)配置中的服务器构建是否可以启动并运行简单查询.如果失败:
```
* Fix other test errors first;
* Build the server in [split build](./build.md#split-build) configuration
locally and check whether it can start and run `select 1`.
```
## 兼容性检查 {#compatibility-check}
检查`clickhouse`二进制文件是否可以在带有旧libc版本的发行版上运行.如果失败, 请向维护人员寻求帮助.
## AST模糊器 {#ast-fuzzer}
运行随机生成的查询来捕获程序错误.如果失败, 请向维护人员寻求帮助.
## 性能测试 {#performance-tests}
测量查询性能的变化. 这是最长的检查, 只需不到 6 小时即可运行.性能测试报告在[此处](https://github.com/ClickHouse/ClickHouse/tree/master/docker/test/performance-comparison#how-to-read-the-report)有详细描述.
## 质量保证 {#qa}
什么是状态页面上的任务(专用网络)项目?
它是 Yandex 内部工作系统的链接. Yandex 员工可以看到检查的开始时间及其更详细的状态.
运行测试的地方
Yandex 内部基础设施的某个地方.

View File

@ -1 +0,0 @@
../../../en/operations/external-authenticators/kerberos.md

View File

@ -0,0 +1,105 @@
# Kerberos认证 {#external-authenticators-kerberos}
现有正确配置的 ClickHouse 用户可以通过 Kerberos 身份验证协议进行身份验证.
目前, Kerberos 只能用作现有用户的外部身份验证器,这些用户在 `users.xml` 或本地访问控制路径中定义.
这些用户只能使用 HTTP 请求, 并且必须能够使用 GSS-SPNEGO 机制进行身份验证.
对于这种方法, 必须在系统中配置 Kerberos, 且必须在 ClickHouse 配置中启用.
## 开启Kerberos {#enabling-kerberos-in-clickHouse}
要启用 Kerberos, 应该在 `config.xml` 中包含 `kerberos` 部分. 此部分可能包含其他参数.
#### 参数: {#parameters}
- `principal` - 将在接受安全上下文时获取和使用的规范服务主体名称.
- 此参数是可选的, 如果省略, 将使用默认主体.
- `realm` - 一个领域, 用于将身份验证限制为仅那些发起者领域与其匹配的请求.
- 此参数是可选的,如果省略,则不会应用其他领域的过滤.
示例 (进入 `config.xml`):
```xml
<yandex>
<!- ... -->
<kerberos />
</yandex>
```
主体规范:
```xml
<yandex>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
```
按领域过滤:
```xml
<yandex>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
```
!!! warning "注意"
您只能定义一个 `kerberos` 部分. 多个 `kerberos` 部分的存在将强制 ClickHouse 禁用 Kerberos 身份验证.
!!! warning "注意"
`主体`和`领域`部分不能同时指定. `主体`和`领域`的出现将迫使ClickHouse禁用Kerberos身份验证.
## Kerberos作为现有用户的外部身份验证器 {#kerberos-as-an-external-authenticator-for-existing-users}
Kerberos可以用作验证本地定义用户(在`users.xml`或本地访问控制路径中定义的用户)身份的方法。目前,**只有**通过HTTP接口的请求才能被认证(通过GSS-SPNEGO机制).
Kerberos主体名称格式通常遵循以下模式:
- *primary/instance@REALM*
*/instance* 部分可能出现零次或多次. **发起者的规范主体名称的主要部分应与被认证用户名匹配, 以便身份验证成功**.
### `users.xml`中启用Kerberos {#enabling-kerberos-in-users-xml}
为了启用用户的 Kerberos 身份验证, 请在用户定义中指定 `kerberos` 部分而不是`密码`或类似部分.
参数:
- `realm` - 用于将身份验证限制为仅那些发起者的领域与其匹配的请求的领域.
- 此参数是可选的, 如果省略, 则不会应用其他按领域的过滤.
示例 (进入 `users.xml`):
```
<yandex>
<!- ... -->
<users>
<!- ... -->
<my_user>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</my_user>
</users>
</yandex>
```
!!! warning "警告"
注意, Kerberos身份验证不能与任何其他身份验证机制一起使用. 任何其他部分(如`密码`和`kerberos`)的出现都会迫使ClickHouse关闭.
!!! info "提醒"
请注意, 现在, 一旦用户 `my_user` 使用 `kerberos`, 必须在主 `config.xml` 文件中启用 Kerberos如前所述.
### 使用 SQL 启用 Kerberos {#enabling-kerberos-using-sql}
在 ClickHouse 中启用 [SQL 驱动的访问控制和帐户管理](https://clickhouse.com/docs/en/operations/access-rights/#access-control)后, 也可以使用 SQL 语句创建由 Kerberos 识别的用户.
```sql
CREATE USER my_user IDENTIFIED WITH kerberos REALM 'EXAMPLE.COM'
```
...或者, 不按领域过滤:
```sql
CREATE USER my_user IDENTIFIED WITH kerberos
```

View File

@ -5,4 +5,34 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
## system.asynchronous_metric_log {#system-tables-async-log}
包含以下内容的历史值 `system.asynchronous_log` (见 [系统。asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics))
包含每分钟记录一次的 `system.asynchronous_metrics`历史值. 默认开启.
列:
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — 事件日期.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 事件时间.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — 事件时间(微秒).
- `name` ([String](../../sql-reference/data-types/string.md)) — 指标名.
- `value` ([Float64](../../sql-reference/data-types/float.md)) — 指标值.
**示例**
``` sql
SELECT * FROM system.asynchronous_metric_log LIMIT 10
```
``` text
┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │
└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘
```
**另请参阅**
- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — 包含在后台定期计算的指标.
- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — 包含定期刷新到磁盘表 `system.metrics` 以及 `system.events` 中的指标值历史记录.

View File

@ -18,7 +18,9 @@
#include <Common/Macros.h>
#include <Common/Config/ConfigProcessor.h>
#include <Common/ThreadStatus.h>
#include <Common/TLDListsHolder.h>
#include <Common/quoteString.h>
#include <Common/randomSeed.h>
#include <loggers/Loggers.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadBufferFromString.h>
@ -35,7 +37,6 @@
#include <Formats/registerFormats.h>
#include <boost/program_options/options_description.hpp>
#include <base/argsToConfig.h>
#include <Common/randomSeed.h>
#include <filesystem>
namespace fs = std::filesystem;
@ -179,7 +180,6 @@ void LocalServer::initialize(Poco::Util::Application & self)
ConfigProcessor config_processor(config_path, false, true);
config_processor.setConfigPath(fs::path(config_path).parent_path());
auto loaded_config = config_processor.loadConfig();
config_processor.savePreprocessedConfig(loaded_config, loaded_config.configuration->getString("path", "."));
config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
}
@ -284,6 +284,11 @@ void LocalServer::tryInitPath()
global_context->setFlagsPath(path + "flags");
global_context->setUserFilesPath(""); // user's files are everywhere
/// top_level_domains_lists
const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/");
if (!top_level_domains_path.empty())
TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config());
}
@ -380,7 +385,6 @@ void LocalServer::setupUsers()
const auto users_config_path = config().getString("users_config", config().getString("config-file", "config.xml"));
ConfigProcessor config_processor(users_config_path);
const auto loaded_config = config_processor.loadConfig();
config_processor.savePreprocessedConfig(loaded_config, config().getString("path", DBMS_DEFAULT_PATH));
users_config = loaded_config.configuration;
}
else
@ -673,6 +677,7 @@ void LocalServer::addOptions(OptionsDescription & options_description)
("no-system-tables", "do not attach system tables (better startup time)")
("path", po::value<std::string>(), "Storage path")
("top_level_domains_path", po::value<std::string>(), "Path to lists with custom TLDs")
;
}

View File

@ -960,9 +960,14 @@ if (ThreadFuzzer::instance().isEffective())
global_context->setMMappedFileCache(mmap_cache_size);
#if USE_EMBEDDED_COMPILER
/// 128 MB
constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128;
size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size);
constexpr size_t compiled_expression_cache_elements_size_default = 10000;
size_t compiled_expression_cache_elements_size = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default);
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size);
#endif
/// Set path for format schema files

View File

@ -351,9 +351,12 @@
-->
<mmap_cache_size>1000</mmap_cache_size>
<!-- Cache size for compiled expressions.-->
<!-- Cache size in bytes for compiled expressions.-->
<compiled_expression_cache_size>134217728</compiled_expression_cache_size>
<!-- Cache size in elements for compiled expressions.-->
<compiled_expression_cache_elements_size>10000</compiled_expression_cache_elements_size>
<!-- Path to data directory, with trailing slash. -->
<path>/var/lib/clickhouse/</path>

View File

@ -279,9 +279,12 @@ mark_cache_size: 5368709120
# also it can be dropped manually by the SYSTEM DROP MMAP CACHE query.
mmap_cache_size: 1000
# Cache size for compiled expressions.
# Cache size in bytes for compiled expressions.
compiled_expression_cache_size: 134217728
# Cache size in elements for compiled expressions.
compiled_expression_cache_elements_size: 10000
# Path to data directory, with trailing slash.
path: /var/lib/clickhouse/

View File

@ -110,7 +110,7 @@ enum class AccessType
(anyone can kill his own queries) */\
\
M(MOVE_PARTITION_BETWEEN_SHARDS, "", GLOBAL, ALL) /* required to be able to move a part/partition to a table
identified by it's ZooKeeper path */\
identified by its ZooKeeper path */\
\
M(CREATE_USER, "", GLOBAL, ACCESS_MANAGEMENT) \
M(ALTER_USER, "", GLOBAL, ACCESS_MANAGEMENT) \

View File

@ -1,8 +1,6 @@
#pragma once
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
#endif
#include "config_core.h"
#include <Access/Credentials.h>
#include <base/types.h>

View File

@ -1,8 +1,6 @@
#pragma once
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
#endif
#include "config_core.h"
#include <base/types.h>

View File

@ -9,9 +9,7 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <Core/DecimalFunctions.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>

View File

@ -9,9 +9,7 @@
#include <AggregateFunctions/IAggregateFunction.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>

View File

@ -10,9 +10,7 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/assert_cast.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>

View File

@ -5,9 +5,7 @@
#include <Common/assert_cast.h>
#include <AggregateFunctions/IAggregateFunction.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>

View File

@ -14,9 +14,7 @@
#include <DataTypes/DataTypeNullable.h>
#include <AggregateFunctions/IAggregateFunction.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>

View File

@ -10,9 +10,7 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>

View File

@ -14,9 +14,7 @@
#include <AggregateFunctions/IAggregateFunction.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#if USE_EMBEDDED_COMPILER
# include <llvm/IR/IRBuilder.h>

View File

@ -395,9 +395,7 @@ private:
using Self = AggregateFunctionSumMapFiltered<T, overflow, tuple_argument>;
using Base = AggregateFunctionMapBase<T, Self, FieldVisitorSum, overflow, tuple_argument, true>;
/// ARCADIA_BUILD disallow unordered_set for big ints for some reason
static constexpr const bool allow_hash = !is_over_big_int<T>;
using ContainerT = std::conditional_t<allow_hash, std::unordered_set<T>, std::set<T>>;
using ContainerT = std::unordered_set<T>;
ContainerT keys_to_keep;
@ -418,13 +416,10 @@ public:
"Aggregate function {} requires an Array as a parameter",
getName());
if constexpr (allow_hash)
keys_to_keep.reserve(keys_to_keep_.size());
keys_to_keep.reserve(keys_to_keep_.size());
for (const Field & f : keys_to_keep_)
{
keys_to_keep.emplace(f.safeGet<T>());
}
}
String getName() const override

View File

@ -9,9 +9,7 @@
#include <Common/Exception.h>
#include <base/types.h>
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
#endif
#include "config_core.h"
#include <cstddef>
#include <memory>

View File

@ -1,15 +1,13 @@
#pragma once
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#if USE_DATASKETCHES
#include <boost/noncopyable.hpp>
#include <memory>
#include <theta_sketch.hpp> // Y_IGNORE
#include <theta_union.hpp> // Y_IGNORE
#include <theta_sketch.hpp>
#include <theta_union.hpp>
namespace DB

View File

@ -98,9 +98,7 @@ void registerAggregateFunctions()
registerAggregateFunctionUniqUpTo(factory);
registerAggregateFunctionTopK(factory);
registerAggregateFunctionsBitwise(factory);
#if !defined(ARCADIA_BUILD)
registerAggregateFunctionsBitmap(factory);
#endif
registerAggregateFunctionsMaxIntersections(factory);
registerAggregateFunctionHistogram(factory);
registerAggregateFunctionRetention(factory);

View File

@ -15,9 +15,7 @@
#include <base/range.h>
#include <Bridge/IBridgeHelper.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
namespace DB

View File

@ -294,6 +294,7 @@ set_source_files_properties(
Columns/ColumnFixedString.cpp
Columns/ColumnsCommon.cpp
Columns/ColumnVector.cpp
Columns/ColumnDecimal.cpp
PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}")
if(RE2_LIBRARY)

View File

@ -14,9 +14,7 @@
#include "Core/Block.h"
#include "Core/Protocol.h"
#if !defined(ARCADIA_BUILD)
# include <Common/config_version.h>
#endif
#include <Common/config_version.h>
#include <Common/UTF8Helpers.h>
#include <Common/TerminalSize.h>
#include <Common/clearPasswordFromCommandLine.h>

View File

@ -31,10 +31,8 @@
#include <Processors/Executors/PipelineExecutor.h>
#include <pcg_random.hpp>
#if !defined(ARCADIA_BUILD)
# include <Common/config_version.h>
# include <Common/config.h>
#endif
#include <Common/config_version.h>
#include <Common/config.h>
#if USE_SSL
# include <Poco/Net/SecureStreamSocket.h>

View File

@ -4,9 +4,7 @@
#include <Poco/Net/StreamSocket.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#include <Client/IServerConnection.h>
#include <Core/Defines.h>

View File

@ -12,9 +12,7 @@
#include <base/setTerminalEcho.h>
#include <base/scope_guard.h>
#if !defined(ARCADIA_BUILD)
#include <readpassphrase/readpassphrase.h> // Y_IGNORE
#endif
#include <readpassphrase/readpassphrase.h>
namespace DB
@ -55,12 +53,10 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
}
if (password_prompt)
{
#if !defined(ARCADIA_BUILD)
std::string prompt{"Password for user (" + user + "): "};
char buf[1000] = {};
if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0))
password = result;
#endif
}
/// By default compression is disabled if address looks like localhost.

View File

@ -1,8 +1,6 @@
#include <Columns/Collator.h>
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
#endif
#include "config_core.h"
#if USE_ICU
# include <unicode/locid.h>

View File

@ -237,25 +237,39 @@ ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_
const UInt8 * filt_end = filt_pos + size;
const T * data_pos = data.data();
#ifdef __SSE2__
static constexpr size_t SIMD_BYTES = 16;
const __m128i zero16 = _mm_setzero_si128();
const UInt8 * filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
/** A slightly more optimized version.
* Based on the assumption that often pieces of consecutive values
* completely pass or do not pass the filter.
* Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
*/
static constexpr size_t SIMD_BYTES = 64;
const UInt8 * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
while (filt_pos < filt_end_sse)
while (filt_pos < filt_end_aligned)
{
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = ~mask;
while (mask)
UInt64 mask = Bytes64MaskToBits64Mask(filt_pos);
if (0xffffffffffffffff == mask)
{
size_t index = __builtin_ctz(mask);
res_data.push_back(*(data_pos + index));
mask = mask & (mask - 1);
res_data.insert(data_pos, data_pos + SIMD_BYTES);
}
else
{
while (mask)
{
size_t index = __builtin_ctzll(mask);
res_data.push_back(data_pos[index]);
#ifdef __BMI__
mask = _blsr_u64(mask);
#else
mask = mask & (mask-1);
#endif
}
}
filt_pos += SIMD_BYTES;
data_pos += SIMD_BYTES;
}
#endif
while (filt_pos < filt_end)
{

View File

@ -236,17 +236,15 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
* completely pass or do not pass the filter.
* Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
*/
#if defined(__AVX512F__) && defined(__AVX512BW__)
static constexpr size_t SIMD_BYTES = 64;
const __m512i zero64 = _mm512_setzero_epi32();
const UInt8 * filt_end_avx512 = filt_pos + col_size / SIMD_BYTES * SIMD_BYTES;
const UInt8 * filt_end_aligned = filt_pos + col_size / SIMD_BYTES * SIMD_BYTES;
const size_t chars_per_simd_elements = SIMD_BYTES * n;
while (filt_pos < filt_end_avx512)
while (filt_pos < filt_end_aligned)
{
uint64_t mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast<const __m512i *>(filt_pos)), zero64, _MM_CMPINT_GT);
uint64_t mask = Bytes64MaskToBits64Mask(filt_pos);
if (0xFFFFFFFFFFFFFFFF == mask)
if (0xffffffffffffffff == mask)
{
res->chars.insert(data_pos, data_pos + chars_per_simd_elements);
}
@ -269,73 +267,6 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
data_pos += chars_per_simd_elements;
filt_pos += SIMD_BYTES;
}
#elif defined(__AVX__) && defined(__AVX2__)
static constexpr size_t SIMD_BYTES = 32;
const __m256i zero32 = _mm256_setzero_si256();
const UInt8 * filt_end_avx2 = filt_pos + col_size / SIMD_BYTES * SIMD_BYTES;
const size_t chars_per_simd_elements = SIMD_BYTES * n;
while (filt_pos < filt_end_avx2)
{
uint32_t mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast<const __m256i *>(filt_pos)), zero32));
if (0xFFFFFFFF == mask)
{
res->chars.insert(data_pos, data_pos + chars_per_simd_elements);
}
else
{
size_t res_chars_size = res->chars.size();
while (mask)
{
size_t index = __builtin_ctz(mask);
res->chars.resize(res_chars_size + n);
memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos + index * n, n);
res_chars_size += n;
#ifdef __BMI__
mask = _blsr_u32(mask);
#else
mask = mask & (mask-1);
#endif
}
}
data_pos += chars_per_simd_elements;
filt_pos += SIMD_BYTES;
}
#elif defined(__SSE2__)
static constexpr size_t SIMD_BYTES = 16;
const __m128i zero16 = _mm_setzero_si128();
const UInt8 * filt_end_sse = filt_pos + col_size / SIMD_BYTES * SIMD_BYTES;
const size_t chars_per_simd_elements = SIMD_BYTES * n;
while (filt_pos < filt_end_sse)
{
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = ~mask;
if (0xFFFF == mask)
{
res->chars.insert(data_pos, data_pos + chars_per_simd_elements);
}
else
{
size_t res_chars_size = res->chars.size();
while (mask)
{
size_t index = __builtin_ctz(mask);
res->chars.resize(res_chars_size + n);
memcpySmallAllowReadWriteOverflow15(&res->chars[res_chars_size], data_pos + index * n, n);
res_chars_size += n;
mask = mask & (mask - 1);
}
}
data_pos += chars_per_simd_elements;
filt_pos += SIMD_BYTES;
}
#endif
size_t res_chars_size = res->chars.size();
while (filt_pos < filt_end)

View File

@ -310,21 +310,20 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
const UInt8 * filt_pos = filt.data();
const UInt8 * filt_end = filt_pos + size;
const T * data_pos = data.data();
/** A slightly more optimized version.
* Based on the assumption that often pieces of consecutive values
* completely pass or do not pass the filter.
* Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
*/
#if defined(__AVX512F__) && defined(__AVX512BW__)
static constexpr size_t SIMD_BYTES = 64;
const __m512i zero64 = _mm512_setzero_epi32();
const UInt8 * filt_end_avx512 = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
const UInt8 * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
while (filt_pos < filt_end_avx512)
while (filt_pos < filt_end_aligned)
{
UInt64 mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast<const __m512i *>(filt_pos)), zero64, _MM_CMPINT_GT);
UInt64 mask = Bytes64MaskToBits64Mask(filt_pos);
if (0xFFFFFFFFFFFFFFFF == mask)
if (0xffffffffffffffff == mask)
{
res_data.insert(data_pos, data_pos + SIMD_BYTES);
}
@ -346,67 +345,6 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
data_pos += SIMD_BYTES;
}
#elif defined(__AVX__) && defined(__AVX2__)
static constexpr size_t SIMD_BYTES = 32;
const __m256i zero32 = _mm256_setzero_si256();
const UInt8 * filt_end_avx2 = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
while (filt_pos < filt_end_avx2)
{
UInt32 mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast<const __m256i *>(filt_pos)), zero32));
if (0xFFFFFFFF == mask)
{
res_data.insert(data_pos, data_pos + SIMD_BYTES);
}
else
{
while (mask)
{
size_t index = __builtin_ctz(mask);
res_data.push_back(data_pos[index]);
#ifdef __BMI__
mask = _blsr_u32(mask);
#else
mask = mask & (mask-1);
#endif
}
}
filt_pos += SIMD_BYTES;
data_pos += SIMD_BYTES;
}
#elif defined(__SSE2__)
static constexpr size_t SIMD_BYTES = 16;
const __m128i zero16 = _mm_setzero_si128();
const UInt8 * filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
while (filt_pos < filt_end_sse)
{
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)), zero16));
mask = ~mask;
if (0xFFFF == mask)
{
res_data.insert(data_pos, data_pos + SIMD_BYTES);
}
else
{
while (mask)
{
size_t index = __builtin_ctz(mask);
res_data.push_back(data_pos[index]);
mask = mask & (mask - 1);
}
}
filt_pos += SIMD_BYTES;
data_pos += SIMD_BYTES;
}
#endif
while (filt_pos < filt_end)
{
if (*filt_pos)

View File

@ -1,7 +1,3 @@
#ifdef __SSE2__
#include <emmintrin.h>
#endif
#include <Columns/IColumn.h>
#include <Columns/ColumnVector.h>
#include <Common/typeid_cast.h>
@ -229,16 +225,19 @@ namespace
memcpy(&res_elems[elems_size_old], &src_elems[arr_offset], arr_size * sizeof(T));
};
#if defined(__AVX512F__) && defined(__AVX512BW__)
const __m512i zero_vec = _mm512_setzero_epi32();
/** A slightly more optimized version.
* Based on the assumption that often pieces of consecutive values
* completely pass or do not pass the filter.
* Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
*/
static constexpr size_t SIMD_BYTES = 64;
const auto * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
while (filt_pos < filt_end_aligned)
{
uint64_t mask = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast<const __m512i *>(filt_pos)), zero_vec, _MM_CMPINT_GT);
uint64_t mask = Bytes64MaskToBits64Mask(filt_pos);
if (mask == 0xffffffffffffffff)
if (0xffffffffffffffff == mask)
{
/// SIMD_BYTES consecutive rows pass the filter
const auto first = offsets_pos == offsets_begin;
@ -270,88 +269,6 @@ namespace
filt_pos += SIMD_BYTES;
offsets_pos += SIMD_BYTES;
}
#elif defined(__AVX__) && defined(__AVX2__)
const __m256i zero_vec = _mm256_setzero_si256();
static constexpr size_t SIMD_BYTES = 32;
const auto * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
while (filt_pos < filt_end_aligned)
{
uint32_t mask = _mm256_movemask_epi8(_mm256_cmpgt_epi8(_mm256_loadu_si256(reinterpret_cast<const __m256i *>(filt_pos)), zero_vec));
if (mask == 0xffffffff)
{
/// SIMD_BYTES consecutive rows pass the filter
const auto first = offsets_pos == offsets_begin;
const auto chunk_offset = first ? 0 : offsets_pos[-1];
const auto chunk_size = offsets_pos[SIMD_BYTES - 1] - chunk_offset;
result_offsets_builder.template insertChunk<SIMD_BYTES>(offsets_pos, first, chunk_offset, chunk_size);
/// copy elements for SIMD_BYTES arrays at once
const auto elems_size_old = res_elems.size();
res_elems.resize(elems_size_old + chunk_size);
memcpy(&res_elems[elems_size_old], &src_elems[chunk_offset], chunk_size * sizeof(T));
}
else
{
while (mask)
{
size_t index = __builtin_ctz(mask);
copy_array(offsets_pos + index);
#ifdef __BMI__
mask = _blsr_u32(mask);
#else
mask = mask & (mask-1);
#endif
}
}
filt_pos += SIMD_BYTES;
offsets_pos += SIMD_BYTES;
}
#elif defined(__SSE2__)
const __m128i zero_vec = _mm_setzero_si128();
static constexpr size_t SIMD_BYTES = 16;
const auto * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
while (filt_pos < filt_end_aligned)
{
UInt16 mask = _mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(filt_pos)),
zero_vec));
mask = ~mask;
if (mask == 0xffff)
{
/// SIMD_BYTES consecutive rows pass the filter
const auto first = offsets_pos == offsets_begin;
const auto chunk_offset = first ? 0 : offsets_pos[-1];
const auto chunk_size = offsets_pos[SIMD_BYTES - 1] - chunk_offset;
result_offsets_builder.template insertChunk<SIMD_BYTES>(offsets_pos, first, chunk_offset, chunk_size);
/// copy elements for SIMD_BYTES arrays at once
const auto elems_size_old = res_elems.size();
res_elems.resize(elems_size_old + chunk_size);
memcpy(&res_elems[elems_size_old], &src_elems[chunk_offset], chunk_size * sizeof(T));
}
else
{
while (mask)
{
size_t index = __builtin_ctz(mask);
copy_array(offsets_pos + index);
mask = mask & (mask - 1);
}
}
filt_pos += SIMD_BYTES;
offsets_pos += SIMD_BYTES;
}
#endif
while (filt_pos < filt_end)
{

View File

@ -2,7 +2,12 @@
#include <Columns/IColumn.h>
#include <Common/PODArray.h>
#ifdef __SSE2__
#include <emmintrin.h>
#endif
#if defined(__AVX512F__) || defined(__AVX512BW__) || defined(__AVX__) || defined(__AVX2__)
#include <immintrin.h>
#endif
/// Common helper methods for implementation of different columns.
@ -15,6 +20,40 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
/// Transform 64-byte mask to 64-bit mask
inline UInt64 Bytes64MaskToBits64Mask(const UInt8 * bytes64)
{
#if defined(__AVX512F__) && defined(__AVX512BW__)
static const __m512i zero64 = _mm512_setzero_epi32();
UInt64 res = _mm512_cmp_epi8_mask(_mm512_loadu_si512(reinterpret_cast<const __m512i *>(bytes64)), zero64, _MM_CMPINT_EQ);
#elif defined(__AVX__) && defined(__AVX2__)
static const __m256i zero32 = _mm256_setzero_si256();
UInt64 res =
(static_cast<UInt64>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(bytes64)), zero32))) & 0xffffffff)
| (static_cast<UInt64>(_mm256_movemask_epi8(_mm256_cmpeq_epi8(
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(bytes64+32)), zero32))) << 32);
#elif defined(__SSE2__) && defined(__POPCNT__)
static const __m128i zero16 = _mm_setzero_si128();
UInt64 res =
(static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16))) & 0xffff)
| ((static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16))) << 16) & 0xffff0000)
| ((static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16))) << 32) & 0xffff00000000)
| ((static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpeq_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16))) << 48) & 0xffff000000000000);
#else
UInt64 res = 0;
const UInt8 * pos = bytes64;
const UInt8 * end = pos + 64;
for (; pos < end; ++pos)
res |= ((*pos == 0)<<(pos-bytes64));
#endif
return ~res;
}
/// Counts how many bytes of `filt` are greater than zero.
size_t countBytesInFilter(const UInt8 * filt, size_t sz);
size_t countBytesInFilter(const IColumn::Filter & filt);

View File

@ -1,8 +1,5 @@
#include <Common/ClickHouseRevision.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config_version.h>
#endif
#include <Common/config_version.h>
namespace ClickHouseRevision
{

View File

@ -55,28 +55,6 @@ public:
return locus;
}
/// Used only in arcadia/metrika
void readText(ReadBuffer & in)
{
for (size_t i = 0; i < BITSET_SIZE; ++i)
{
if (i != 0)
assertChar(',', in);
readIntText(bitset[i], in);
}
}
/// Used only in arcadia/metrika
void writeText(WriteBuffer & out) const
{
for (size_t i = 0; i < BITSET_SIZE; ++i)
{
if (i != 0)
writeCString(",", out);
writeIntText(bitset[i], out);
}
}
private:
/// number of bytes in bitset
static constexpr size_t BITSET_SIZE = (static_cast<size_t>(bucket_count) * content_width + 7) / 8;

View File

@ -1,6 +1,4 @@
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#include <Common/config.h>
#include "ConfigProcessor.h"
#include "YAMLParser.h"

View File

@ -1,8 +1,6 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#include <Common/config.h>
#include <string>
#include <unordered_set>

View File

@ -1,6 +1,4 @@
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#include <Common/config.h>
#if USE_YAML_CPP
#include "YAMLParser.h"
@ -19,7 +17,7 @@
#include <Poco/DOM/Text.h>
#include <Common/Exception.h>
#include <yaml-cpp/yaml.h> // Y_IGNORE
#include <yaml-cpp/yaml.h>
#include <base/logger_useful.h>

View File

@ -1,8 +1,6 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include <Common/config.h>
#endif
#include <Common/config.h>
#include <string>

View File

@ -114,11 +114,7 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
try
{
#if defined(ARCADIA_BUILD)
addresses = Poco::Net::DNS::hostByName(host, &Poco::Net::DNS::DEFAULT_DNS_TIMEOUT, flags).addresses();
#else
addresses = Poco::Net::DNS::hostByName(host, flags).addresses();
#endif
}
catch (const Poco::Net::DNSException & e)
{

View File

@ -577,7 +577,7 @@
M(607, BACKUP_ELEMENT_DUPLICATE) \
M(608, CANNOT_RESTORE_TABLE) \
M(609, FUNCTION_ALREADY_EXISTS) \
M(610, CANNOT_DROP_SYSTEM_FUNCTION) \
M(610, CANNOT_DROP_FUNCTION) \
M(611, CANNOT_CREATE_RECURSIVE_FUNCTION) \
M(612, OBJECT_ALREADY_STORED_ON_DISK) \
M(613, OBJECT_WAS_NOT_STORED_ON_DISK) \

View File

@ -17,9 +17,7 @@
#include <Common/ErrorCodes.h>
#include <filesystem>
#if !defined(ARCADIA_BUILD)
# include <Common/config_version.h>
#endif
#include <Common/config_version.h>
namespace fs = std::filesystem;

View File

@ -36,12 +36,13 @@ public:
using Mapped = TMapped;
using MappedPtr = std::shared_ptr<Mapped>;
private:
using Clock = std::chrono::steady_clock;
public:
LRUCache(size_t max_size_)
: max_size(std::max(static_cast<size_t>(1), max_size_)) {}
/** Initialize LRUCache with max_size and max_elements_size.
* max_elements_size == 0 means no elements size restrictions.
*/
LRUCache(size_t max_size_, size_t max_elements_size_ = 0)
: max_size(std::max(static_cast<size_t>(1), max_size_))
, max_elements_size(max_elements_size_)
{}
MappedPtr get(const Key & key)
{
@ -252,6 +253,7 @@ private:
/// Total weight of values.
size_t current_size = 0;
const size_t max_size;
const size_t max_elements_size;
std::atomic<size_t> hits {0};
std::atomic<size_t> misses {0};
@ -311,7 +313,8 @@ private:
{
size_t current_weight_lost = 0;
size_t queue_size = cells.size();
while ((current_size > max_size) && (queue_size > 1))
while ((current_size > max_size || (max_elements_size != 0 && queue_size > max_elements_size)) && (queue_size > 1))
{
const Key & key = queue.front();

View File

@ -1,6 +1,4 @@
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#if USE_SSL
#include "OpenSSLHelpers.h"

View File

@ -1,8 +1,6 @@
#pragma once
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#if USE_SSL
# include <base/types.h>

View File

@ -7,9 +7,7 @@
#include <Common/StringSearcher.h>
#include <re2/re2.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#if USE_RE2_ST
# include <re2_st/re2.h>

View File

@ -227,7 +227,8 @@
M(CreatedHTTPConnections, "Total amount of created HTTP connections (counter increase every time connection is created).") \
\
M(CannotWriteToWriteBufferDiscard, "Number of stack traces dropped by query profiler or signal handler because pipe is full or cannot write to pipe.") \
M(QueryProfilerSignalOverruns, "Number of times we drop processing of a signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \
M(QueryProfilerSignalOverruns, "Number of times we drop processing of a query profiler signal due to overrun plus the number of signals that OS has not delivered due to overrun.") \
M(QueryProfilerRuns, "Number of times QueryProfiler had been run.") \
\
M(CreatedLogEntryForMerge, "Successfully created log entry to merge parts in ReplicatedMergeTree.") \
M(NotCreatedLogEntryForMerge, "Log entry to merge parts in ReplicatedMergeTree is not created due to concurrent log update by another replica.") \

View File

@ -14,17 +14,19 @@
namespace
{
constexpr UInt64 ZERO = 0;
constexpr UInt64 ALL_THREADS = 0;
UInt64 calculateNewCoresNumber(DB::ThreadIdToTimeMap const & prev, DB::ThreadIdToTimeMap const& next)
{
if (next.find(ZERO) == next.end())
return ZERO;
auto accumulated = std::accumulate(next.cbegin(), next.cend(), ZERO,
[&prev](UInt64 acc, auto const & elem)
if (next.find(ALL_THREADS) == next.end())
return 0;
auto accumulated = std::accumulate(next.cbegin(), next.cend(), 0,
[&prev](UInt64 acc, const auto & elem)
{
if (elem.first == ZERO)
if (elem.first == ALL_THREADS)
return acc;
auto thread_time = elem.second.time();
auto it = prev.find(elem.first);
if (it != prev.end())
@ -32,9 +34,9 @@ namespace
return acc + thread_time;
});
auto elapsed = next.at(ZERO).time() - (prev.contains(ZERO) ? prev.at(ZERO).time() : ZERO);
if (elapsed == ZERO)
return ZERO;
auto elapsed = next.at(ALL_THREADS).time() - (prev.contains(ALL_THREADS) ? prev.at(ALL_THREADS).time() : 0);
if (elapsed == 0)
return 0;
return (accumulated + elapsed - 1) / elapsed;
}
}
@ -109,7 +111,7 @@ size_t ProgressIndication::getUsedThreadsCount() const
UInt64 ProgressIndication::getApproximateCoresNumber() const
{
return std::accumulate(host_active_cores.cbegin(), host_active_cores.cend(), ZERO,
return std::accumulate(host_active_cores.cbegin(), host_active_cores.cend(), 0,
[](UInt64 acc, auto const & elem)
{
return acc + elem.second;
@ -121,11 +123,12 @@ ProgressIndication::MemoryUsage ProgressIndication::getMemoryUsage() const
return std::accumulate(thread_data.cbegin(), thread_data.cend(), MemoryUsage{},
[](MemoryUsage const & acc, auto const & host_data)
{
auto host_usage = std::accumulate(host_data.second.cbegin(), host_data.second.cend(), ZERO,
[](UInt64 memory, auto const & data)
{
return memory + data.second.memory_usage;
});
UInt64 host_usage = 0;
// In ProfileEvents packets thread id 0 specifies common profiling information
// for all threads executing current query on specific host. So instead of summing per thread
// memory consumption it's enough to look for data with thread id 0.
if (auto it = host_data.second.find(ALL_THREADS); it != host_data.second.end())
host_usage = it->second.memory_usage;
return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage)};
});
}

View File

@ -15,6 +15,7 @@
namespace ProfileEvents
{
extern const Event QueryProfilerSignalOverruns;
extern const Event QueryProfilerRuns;
}
namespace DB
@ -60,6 +61,7 @@ namespace
const StackTrace stack_trace(signal_context);
TraceCollector::collect(trace_type, stack_trace, 0);
ProfileEvents::increment(ProfileEvents::QueryProfilerRuns);
errno = saved_errno;
}

View File

@ -4,9 +4,7 @@
#include <signal.h>
#include <time.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
namespace Poco

View File

@ -1,24 +1,10 @@
#pragma once
/// SparseHashMap is a wrapper for google::sparse_hash_map.
#if defined(ARCADIA_BUILD)
#define HASH_FUN_H <unordered_map>
template <typename T>
struct THash;
#endif
#include <sparsehash/sparse_hash_map>
#if !defined(ARCADIA_BUILD)
template <class Key, class T, class HashFcn = std::hash<Key>,
class EqualKey = std::equal_to<Key>,
class Alloc = google::libc_allocator_with_realloc<std::pair<const Key, T>>>
using SparseHashMap = google::sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>;
#else
template <class Key, class T, class HashFcn = std::hash<Key>,
class EqualKey = std::equal_to<Key>,
class Alloc = google::sparsehash::libc_allocator_with_realloc<std::pair<const Key, T>>>
using SparseHashMap = google::sparsehash::sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>;
#undef THash
#endif
template <class Key, class T, class HashFcn = std::hash<Key>,
class EqualKey = std::equal_to<Key>,
class Alloc = google::libc_allocator_with_realloc<std::pair<const Key, T>>>
using SparseHashMap = google::sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>;

View File

@ -13,9 +13,7 @@
#include <sstream>
#include <unordered_map>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#if USE_UNWIND
# include <libunwind.h>
@ -201,7 +199,7 @@ void StackTrace::symbolize(
const StackTrace::FramePointers & frame_pointers, [[maybe_unused]] size_t offset,
size_t size, StackTrace::Frames & frames)
{
#if defined(__ELF__) && !defined(__FreeBSD__) && !defined(ARCADIA_BUILD)
#if defined(__ELF__) && !defined(__FreeBSD__)
auto symbol_index_ptr = DB::SymbolIndex::instance();
const DB::SymbolIndex & symbol_index = *symbol_index_ptr;

View File

@ -116,9 +116,9 @@ public:
/// lower and uppercase variants of the first octet of the first character in `needle`
size_t length_l = UTF8::convertCodePointToUTF8(first_l_u32, l_seq, sizeof(l_seq));
size_t length_r = UTF8::convertCodePointToUTF8(first_u_u32, u_seq, sizeof(u_seq));
size_t length_u = UTF8::convertCodePointToUTF8(first_u_u32, u_seq, sizeof(u_seq));
if (length_l != length_r)
if (length_l != length_u)
throw Exception{"UTF8 sequences with different lowercase and uppercase lengths are not supported", ErrorCodes::UNSUPPORTED_PARAMETER};
}
@ -183,6 +183,31 @@ public:
#endif
}
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
ALWAYS_INLINE bool compareTrivial(const CharT * haystack_pos, const CharT * const haystack_end, const uint8_t * needle_pos) const
{
while (haystack_pos < haystack_end && needle_pos < needle_end)
{
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
/// Invalid UTF-8, should not compare equals
if (!haystack_code_point || !needle_code_point)
break;
/// Not equals case insensitive.
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
break;
/// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
const auto len = UTF8::seqLength(*haystack_pos);
haystack_pos += len;
needle_pos += len;
}
return needle_pos == needle_end;
}
template <typename CharT, typename = std::enable_if_t<sizeof(CharT) == 1>>
ALWAYS_INLINE bool compare(const CharT * /*haystack*/, const CharT * haystack_end, const CharT * pos) const
{
@ -200,34 +225,15 @@ public:
{
if (mask == cachemask)
{
pos += cache_valid_len;
auto needle_pos = needle + cache_valid_len;
while (needle_pos < needle_end)
{
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(pos, haystack_end - pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
/// Invalid UTF-8, should not compare equals
if (!haystack_code_point || !needle_code_point)
break;
/// Not equals case insensitive.
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
break;
/// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
const auto len = UTF8::seqLength(*pos);
pos += len;
needle_pos += len;
}
if (needle_pos == needle_end)
if (compareTrivial(pos, haystack_end, needle))
return true;
}
}
else if ((mask & cachemask) == cachemask)
return true;
{
if (compareTrivial(pos, haystack_end, needle))
return true;
}
return false;
}
@ -238,25 +244,7 @@ public:
pos += first_needle_symbol_is_ascii;
auto needle_pos = needle + first_needle_symbol_is_ascii;
while (needle_pos < needle_end)
{
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(pos, haystack_end - pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
/// Invalid UTF-8, should not compare equals
if (!haystack_code_point || !needle_code_point)
break;
/// Not equals case insensitive.
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
break;
const auto len = UTF8::seqLength(*pos);
pos += len;
needle_pos += len;
}
if (needle_pos == needle_end)
if (compareTrivial(pos, haystack_end, needle_pos))
return true;
}
@ -299,40 +287,21 @@ public:
const auto v_against_l_offset = _mm_cmpeq_epi8(v_haystack_offset, cachel);
const auto v_against_u_offset = _mm_cmpeq_epi8(v_haystack_offset, cacheu);
const auto v_against_l_or_u_offset = _mm_or_si128(v_against_l_offset, v_against_u_offset);
const auto mask_offset = _mm_movemask_epi8(v_against_l_or_u_offset);
const auto mask_offset_both = _mm_movemask_epi8(v_against_l_or_u_offset);
if (0xffff == cachemask)
{
if (mask_offset == cachemask)
if (mask_offset_both == cachemask)
{
auto haystack_pos = haystack + cache_valid_len;
auto needle_pos = needle + cache_valid_len;
while (haystack_pos < haystack_end && needle_pos < needle_end)
{
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
/// Invalid UTF-8, should not compare equals
if (!haystack_code_point || !needle_code_point)
break;
/// Not equals case insensitive.
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
break;
/// @note assuming sequences for lowercase and uppercase have exact same length (that is not always true)
const auto len = UTF8::seqLength(*haystack_pos);
haystack_pos += len;
needle_pos += len;
}
if (needle_pos == needle_end)
if (compareTrivial(haystack, haystack_end, needle))
return haystack;
}
}
else if ((mask_offset & cachemask) == cachemask)
return haystack;
else if ((mask_offset_both & cachemask) == cachemask)
{
if (compareTrivial(haystack, haystack_end, needle))
return haystack;
}
/// first octet was ok, but not the first 16, move to start of next sequence and reapply
haystack += UTF8::seqLength(*haystack);
@ -349,25 +318,7 @@ public:
auto haystack_pos = haystack + first_needle_symbol_is_ascii;
auto needle_pos = needle + first_needle_symbol_is_ascii;
while (haystack_pos < haystack_end && needle_pos < needle_end)
{
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
/// Invalid UTF-8, should not compare equals
if (!haystack_code_point || !needle_code_point)
break;
/// Not equals case insensitive.
if (Poco::Unicode::toLower(*haystack_code_point) != Poco::Unicode::toLower(*needle_code_point))
break;
const auto len = UTF8::seqLength(*haystack_pos);
haystack_pos += len;
needle_pos += len;
}
if (needle_pos == needle_end)
if (compareTrivial(haystack_pos, haystack_end, needle_pos))
return haystack;
}

View File

@ -26,7 +26,7 @@ namespace ErrorCodes
thread_local ThreadStatus * current_thread = nullptr;
thread_local ThreadStatus * main_thread = nullptr;
#if !defined(SANITIZER) && !defined(ARCADIA_BUILD)
#if !defined(SANITIZER)
namespace
{
@ -88,7 +88,7 @@ ThreadStatus::ThreadStatus()
/// Will set alternative signal stack to provide diagnostics for stack overflow errors.
/// If not already installed for current thread.
/// Sanitizer makes larger stack usage and also it's incompatible with alternative stack by default (it sets up and relies on its own).
#if !defined(SANITIZER) && !defined(ARCADIA_BUILD)
#if !defined(SANITIZER)
if (!has_alt_stack)
{
/// Don't repeat tries even if not installed successfully.
@ -147,11 +147,9 @@ ThreadStatus::~ThreadStatus()
thread_group->threads.erase(this);
}
#if !defined(ARCADIA_BUILD)
/// It may cause segfault if query_context was destroyed, but was not detached
auto query_context_ptr = query_context.lock();
assert((!query_context_ptr && query_id.empty()) || (query_context_ptr && query_id == query_context_ptr->getCurrentQueryId()));
#endif
if (deleter)
deleter();

View File

@ -730,7 +730,10 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &
}
};
while (!condition || !condition())
/// do-while control structure to allow using this function in non-blocking
/// fashion with a wait condition which returns false by the time this
/// method is called.
do
{
/// Use getData insteand of exists to avoid watch leak.
impl->get(path, callback, watch);
@ -746,7 +749,8 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &
if (state->event_type == Coordination::DELETED)
return true;
}
} while (!condition || !condition());
return false;
}

View File

@ -9,9 +9,7 @@
#include <IO/WriteBufferFromString.h>
#include <base/logger_useful.h>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#if USE_SSL
# include <Poco/Net/SecureStreamSocket.h>

View File

@ -1,6 +1,4 @@
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
#endif
#include <Common/config.h>
#include <Common/Exception.h>
#include <base/types.h>
#include <IO/VarInt.h>
@ -13,10 +11,10 @@
// This depends on BoringSSL-specific API, notably <openssl/aead.h>.
#if USE_SSL && USE_INTERNAL_SSL_LIBRARY
#include <Parsers/ASTLiteral.h>
#include <openssl/digest.h> // Y_IGNORE
#include <openssl/digest.h>
#include <openssl/err.h>
#include <boost/algorithm/hex.hpp>
#include <openssl/aead.h> // Y_IGNORE
#include <openssl/aead.h>
#endif
// Common part for both parts (with SSL and without)

View File

@ -1,6 +1,5 @@
#pragma once
#if !defined(ARCADIA_BUILD)
#include <string_view>
#include <unordered_map>
#include <base/types.h>
@ -134,5 +133,3 @@ private:
};
}
#endif /* NOT Arcadia_build */

View File

@ -1,6 +1,4 @@
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
#endif
#include "config_core.h"
#include <Compression/CompressionFactory.h>
#include <Parsers/ASTFunction.h>

View File

@ -1,6 +1,6 @@
#pragma once
#include <libnuraft/nuraft.hxx> // Y_IGNORE
#include <libnuraft/nuraft.hxx>
#include <city.h>
#include <optional>
#include <IO/WriteBufferFromFile.h>

View File

@ -4,7 +4,7 @@
#include <map>
#include <mutex>
#include <Core/Types.h>
#include <libnuraft/log_store.hxx> // Y_IGNORE
#include <libnuraft/log_store.hxx>
namespace DB
{

View File

@ -1,9 +1,7 @@
#pragma once
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
# include "config_core.h"
#endif
#include <Common/config.h>
#include "config_core.h"
#if USE_NURAFT

View File

@ -1,5 +1,5 @@
#pragma once
#include <libnuraft/log_store.hxx> // Y_IGNORE
#include <libnuraft/log_store.hxx>
#include <map>
#include <mutex>
#include <Core/Types.h>

View File

@ -1,9 +1,7 @@
#include <Coordination/KeeperServer.h>
#include <Coordination/Defines.h>
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
#endif
#include "config_core.h"
#include <Coordination/LoggerWrapper.h>
#include <Coordination/KeeperStateMachine.h>

Some files were not shown because too many files have changed in this diff Show More