Merge branch 'master' into bobrik-parallel-randes

This commit is contained in:
Nikolai Kochetov 2020-07-20 19:05:28 +03:00
commit d6583698a9
39 changed files with 1057 additions and 848 deletions

View File

@ -30,7 +30,7 @@ struct StringRef
constexpr StringRef(const CharT * data_, size_t size_) : data(reinterpret_cast<const char *>(data_)), size(size_) {}
StringRef(const std::string & s) : data(s.data()), size(s.size()) {}
constexpr StringRef(const std::string_view & s) : data(s.data()), size(s.size()) {}
constexpr explicit StringRef(const std::string_view & s) : data(s.data()), size(s.size()) {}
constexpr StringRef(const char * data_) : StringRef(std::string_view{data_}) {}
constexpr StringRef() = default;

View File

@ -1,17 +1,8 @@
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include/simdjson/jsonparser.h")
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include/simdjson.h")
message (WARNING "submodule contrib/simdjson is missing. to fix try run: \n git submodule update --init --recursive")
return()
endif ()
if (NOT HAVE_SSE42)
message (WARNING "submodule contrib/simdjson requires support of SSE4.2 instructions")
return()
elseif (NOT HAVE_PCLMULQDQ)
message (WARNING "submodule contrib/simdjson requires support of PCLMULQDQ instructions")
return()
endif ()
option (USE_SIMDJSON "Use simdjson" ON)
set (SIMDJSON_LIBRARY "simdjson")
message(STATUS "Using simdjson=${USE_SIMDJSON}: ${SIMDJSON_LIBRARY}")
message(STATUS "Using simdjson=${USE_SIMDJSON}")

2
contrib/simdjson vendored

@ -1 +1 @@
Subproject commit 560f0742cc0895d00d78359dbdeb82064a24adb8
Subproject commit 1e4aa116e5a39e4ba23b9a93e6c7f048c5105b20

View File

@ -1,14 +1,6 @@
set(SIMDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include")
set(SIMDJSON_SRC_DIR "${SIMDJSON_INCLUDE_DIR}/../src")
set(SIMDJSON_SRC
${SIMDJSON_SRC_DIR}/document.cpp
${SIMDJSON_SRC_DIR}/error.cpp
${SIMDJSON_SRC_DIR}/implementation.cpp
${SIMDJSON_SRC_DIR}/jsonioutil.cpp
${SIMDJSON_SRC_DIR}/jsonminifier.cpp
${SIMDJSON_SRC_DIR}/stage1_find_marks.cpp
${SIMDJSON_SRC_DIR}/stage2_build_tape.cpp
)
set(SIMDJSON_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/src")
set(SIMDJSON_SRC ${SIMDJSON_SRC_DIR}/simdjson.cpp)
add_library(${SIMDJSON_LIBRARY} ${SIMDJSON_SRC})
target_include_directories(${SIMDJSON_LIBRARY} SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}" PRIVATE "${SIMDJSON_SRC_DIR}")
add_library(simdjson ${SIMDJSON_SRC})
target_include_directories(simdjson SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}" PRIVATE "${SIMDJSON_SRC_DIR}")

View File

@ -67,19 +67,7 @@ function watchdog
sleep 1
done
./clickhouse-client --query "select elapsed, query from system.processes" ||:
killall clickhouse-server ||:
for x in {1..10}
do
if ! pgrep -f clickhouse-server
then
break
fi
sleep 1
done
killall -9 clickhouse-server clickhouse-client ||:
killall -9 clickhouse-client ||:
}
function fuzz
@ -100,8 +88,18 @@ function fuzz
|| fuzzer_exit_code=$?
echo "Fuzzer exit code is $fuzzer_exit_code"
./clickhouse-client --query "select elapsed, query from system.processes" ||:
kill -9 $server_pid ||:
killall clickhouse-server ||:
for x in {1..10}
do
if ! pgrep -f clickhouse-server
then
break
fi
sleep 1
done
killall -9 clickhouse-server ||:
if [ "$fuzzer_exit_code" == "143" ]
then

View File

@ -1,25 +0,0 @@
{
"checkYo": false,
"excludeFiles": [],
"fileExtensions": [],
"format": "auto",
"ignoreTags": [
"code",
"kbd",
"object",
"samp",
"script",
"style",
"var"
],
"maxRequests": 2,
"lang": "en,ru",
"report": ["console"],
"dictionary": [
"(C|c)lick(H|h)ouse",
"CatBoost",
"(Ш|ш)ард(ы|ов|а|у|е|ам|ирование|ированы|ах)?",
"логир(ование|уются|ования)?",
"конфиг(а|е|ом|у)"
]
}

View File

@ -1,6 +1,6 @@
---
toc_priority: 71
toc_title: Source Code
toc_title: Source Code Browser
---
# Browse ClickHouse Source Code {#browse-clickhouse-source-code}

View File

@ -1,6 +1,6 @@
---
toc_priority: 67
toc_title: How to Build ClickHouse on Linux for AARCH64 (ARM64)
toc_title: Build on Linux for AARCH64 (ARM64)
---
# How to Build ClickHouse on Linux for AARCH64 (ARM64) Architecture {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture}
@ -9,7 +9,7 @@ This is for the case when you have Linux machine and want to use it to build `cl
The cross-build for AARCH64 is based on the [Build instructions](../development/build.md), follow them first.
# Install Clang-8 {#install-clang-8}
## Install Clang-8 {#install-clang-8}
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
For example, in Ubuntu Bionic you can use the following commands:
@ -20,7 +20,7 @@ sudo apt-get update
sudo apt-get install clang-8
```
# Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
``` bash
cd ClickHouse
@ -29,7 +29,7 @@ wget 'https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel
tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build-aarch64/cmake/toolchain/linux-aarch64 --strip-components=1
```
# Build ClickHouse {#build-clickhouse}
## Build ClickHouse {#build-clickhouse}
``` bash
cd ClickHouse

View File

@ -1,6 +1,6 @@
---
toc_priority: 66
toc_title: How to Build ClickHouse on Linux for Mac OS X
toc_title: Build on Linux for Mac OS X
---
# How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x}
@ -9,7 +9,7 @@ This is for the case when you have Linux machine and want to use it to build `cl
The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first.
# Install Clang-8 {#install-clang-8}
## Install Clang-8 {#install-clang-8}
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
For example the commands for Bionic are like:
@ -19,7 +19,7 @@ sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8
sudo apt-get install clang-8
```
# Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
Lets remember the path where we install `cctools` as ${CCTOOLS}
@ -47,7 +47,7 @@ mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
```
# Build ClickHouse {#build-clickhouse}
## Build ClickHouse {#build-clickhouse}
``` bash
cd ClickHouse

View File

@ -1,6 +1,6 @@
---
toc_priority: 65
toc_title: How to Build ClickHouse on Mac OS X
toc_title: Build on Mac OS X
---
# How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x}
@ -45,14 +45,12 @@ $ cd ..
## Caveats {#caveats}
If you intend to run clickhouse-server, make sure to increase the systems maxfiles variable.
If you intend to run `clickhouse-server`, make sure to increase the systems maxfiles variable.
!!! info "Note"
Youll need to use sudo.
To do so, create the following file:
/Library/LaunchDaemons/limit.maxfiles.plist:
To do so, create the `/Library/LaunchDaemons/limit.maxfiles.plist` file with the following content:
``` xml
<?xml version="1.0" encoding="UTF-8"?>

View File

@ -35,6 +35,7 @@ toc_title: Third-Party Libraries Used
| poco | [Boost Software License - Version 1.0](https://github.com/ClickHouse-Extras/poco/blob/fe5505e56c27b6ecb0dcbc40c49dc2caf4e9637f/LICENSE) |
| protobuf | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/protobuf/blob/12735370922a35f03999afff478e1c6d7aa917a4/LICENSE) |
| re2 | [BSD 3-Clause License](https://github.com/google/re2/blob/7cf8b88e8f70f97fd4926b56aa87e7f53b2717e0/LICENSE) |
| sentry-native | [MIT License](https://github.com/getsentry/sentry-native/blob/master/LICENSE) |
| UnixODBC | [LGPL v2.1](https://github.com/ClickHouse-Extras/UnixODBC/tree/b0ad30f7f6289c12b76f04bfb9d466374bb32168) |
| zlib-ng | [Zlib License](https://github.com/ClickHouse-Extras/zlib-ng/blob/develop/LICENSE.md) |
| zstd | [BSD 3-Clause License](https://github.com/facebook/zstd/blob/dev/LICENSE) |

View File

@ -1,6 +1,6 @@
---
toc_priority: 68
toc_title: How to Write C++ Code
toc_title: C++ Guide
---
# How to Write C++ Code {#how-to-write-c-code}

View File

@ -1,6 +1,6 @@
---
toc_priority: 69
toc_title: How to Run ClickHouse Tests
toc_title: Testing
---
# ClickHouse Testing {#clickhouse-testing}
@ -25,12 +25,7 @@ Tests should use (create, drop, etc) only tables in `test` database that is assu
If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`.
Some tests are marked with `zookeeper`, `shard` or `long` in their names.
`zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that
requires server to listen `127.0.0.*`; `distributed` or `global` have the same
meaning. `long` is for tests that run slightly longer that one second. You can
disable these groups of tests using `--no-zookeeper`, `--no-shard` and
`--no-long` options, respectively.
Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that requires server to listen `127.0.0.*`; `distributed` or `global` have the same meaning. `long` is for tests that run slightly longer that one second. You can disable these groups of tests using `--no-zookeeper`, `--no-shard` and `--no-long` options, respectively.
## Known Bugs {#known-bugs}
@ -153,11 +148,11 @@ Motivation:
Normally we release and run all tests on a single variant of ClickHouse build. But there are alternative build variants that are not thoroughly tested. Examples:
- build on FreeBSD;
- build on Debian with libraries from system packages;
- build with shared linking of libraries;
- build on AArch64 platform;
- build on PowerPc platform.
- build on FreeBSD
- build on Debian with libraries from system packages
- build with shared linking of libraries
- build on AArch64 platform
- build on PowerPc platform
For example, build with system packages is bad practice, because we cannot guarantee what exact version of packages a system will have. But this is really needed by Debian maintainers. For this reason we at least have to support this variant of build. Another example: shared linking is a common source of trouble, but it is needed for some enthusiasts.
@ -177,22 +172,22 @@ For production builds, gcc is used (it still generates slightly more efficient c
## Sanitizers {#sanitizers}
**Address sanitizer**.
### Address sanitizer
We run functional and integration tests under ASan on per-commit basis.
**Valgrind (Memcheck)**.
### Valgrind (Memcheck)
We run functional tests under Valgrind overnight. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse).
**Undefined behaviour sanitizer.**
### Undefined behaviour sanitizer
We run functional and integration tests under ASan on per-commit basis.
**Thread sanitizer**.
### Thread sanitizer
We run functional tests under TSan on per-commit basis. We still dont run integration tests under TSan on per-commit basis.
**Memory sanitizer**.
### Memory sanitizer
Currently we still dont use MSan.
**Debug allocator.**
### Debug allocator
Debug version of `jemalloc` is used for debug build.
## Fuzzing {#fuzzing}
@ -227,7 +222,7 @@ If you use `CLion` as an IDE, you can leverage some `clang-tidy` checks out of t
## Code Style {#code-style}
Code style rules are described [here](https://clickhouse.tech/docs/en/development/style/).
Code style rules are described [here](style.md).
To check for some common style violations, you can use `utils/check-style` script.

View File

@ -1,13 +1,14 @@
---
toc_folder_title: Example Datasets
toc_priority: 12
toc_priority: 15
toc_title: Introduction
---
# Example Datasets {#example-datasets}
This section describes how to obtain example datasets and import them into ClickHouse.
For some datasets example queries are also available.
This section describes how to obtain example datasets and import them into ClickHouse. For some datasets example queries are also available.
The list of documented datasets:
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)

View File

@ -37,6 +37,7 @@ The queries are executed as a read-only user. It implies some limitations:
- INSERT queries are not allowed
The following settings are also enforced:
- [max\_result\_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
- [max\_result\_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
- [result\_overflow\_mode=break](../operations/settings/query_complexity/#result-overflow-mode)

View File

@ -397,7 +397,6 @@ The cache is shared for the server and memory is allocated as needed. The cache
``` xml
<mark_cache_size>5368709120</mark_cache_size>
```
## max\_server\_memory\_usage {#max_server_memory_usage}
Limits total RAM usage by the ClickHouse server. You can specify it only for the default profile.
@ -411,11 +410,37 @@ Default value: `0`.
**Additional Info**
On hosts with low RAM and swap, you possibly need setting `max_server_memory_usage_to_ram_ratio > 1`.
The default `max_server_memory_usage` value is calculated as `memory_amount * max_server_memory_usage_to_ram_ratio`.
**See also**
- [max\_memory\_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage)
- [max_server_memory_usage_to_ram_ratio](#max_server_memory_usage_to_ram_ratio)
## max_server_memory_usage_to_ram_ratio {#max_server_memory_usage_to_ram_ratio}
Defines the fraction of total physical RAM amount, available to the Clickhouse server. If the server tries to utilize more, the memory is cut down to the appropriate amount.
Possible values:
- Positive double.
- 0 — The Clickhouse server can use all available RAM.
Default value: `0`.
**Usage**
On hosts with low RAM and swap, you possibly need setting `max_server_memory_usage_to_ram_ratio` larger than 1.
**Example**
``` xml
<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
```
**See Also**
- [max_server_memory_usage](#max_server_memory_usage)
## max\_concurrent\_queries {#max-concurrent-queries}

View File

@ -585,6 +585,31 @@ Possible values:
Default value: 0.
## network_compression_method {#network_compression_method}
Sets the method of data compression that is used for communication between servers and between server and [clickhouse-client](../../interfaces/cli.md).
Possible values:
- `LZ4` — sets LZ4 compression method.
- `ZSTD` — sets ZSTD compression method.
Default value: `LZ4`.
**See Also**
- [network_zstd_compression_level](#network_zstd_compression_level)
## network_zstd_compression_level {#network_zstd_compression_level}
Adjusts the level of ZSTD compression. Used only when [network_compression_method](#network_compression_method) is set to `ZSTD`.
Possible values:
- Positive integer from 1 to 15.
Default value: `1`.
## log\_queries {#settings-log-queries}
Setting up query logging.

View File

@ -16,6 +16,8 @@ By default `clickhouse-local` does not have access to data on the same host, but
!!! warning "Warning"
It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error.
For temporary data an unique temporary data directory is created by default. If you want to override this behavior the data directory can be explicitly specified with the `-- --path` option.
## Usage {#usage}
Basic usage:
@ -40,6 +42,7 @@ Arguments:
Also there are arguments for each ClickHouse configuration variable which are more commonly used instead of `--config-file`.
## Examples {#examples}
``` bash

View File

@ -111,4 +111,43 @@ SELECT alphaTokens('abca1abc')
└─────────────────────────┘
```
## extractAllGroups(text, regexp) {#extractallgroups}
Extracts all groups from non-overlapping substrings matched by a regular expression.
**Syntax**
``` sql
extractAllGroups(text, regexp)
```
**Parameters**
- `text` — [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned values**
- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`).
- If there is no matching group, returns an empty array.
Type: [Array](../data-types/array.md).
**Example**
Query:
``` sql
SELECT extractAllGroups('abc=123, 8="hkl"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)');
```
Result:
``` text
┌─extractAllGroups('abc=123, 8="hkl"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐
│ [['abc','123'],['8','"hkl"']] │
└───────────────────────────────────────────────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/splitting_merging_functions/) <!--hide-->

View File

@ -385,12 +385,37 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat
**Дополнительная информация**
На серверах с небольшим объёмом RAM и файла подкачки может потребоваться настройка `max_server_memory_usage_to_ram_ratio > 1`.
Значение по умолчанию для `max_server_memory_usage` рассчитывается как `memory_amount * max_server_memory_usage_to_ram_ratio`.
**См. также**
- [max_memory_usage](../settings/query-complexity.md#settings_max_memory_usage)
## max_server_memory_usage_to_ram_ratio {#max_server_memory_usage_to_ram_ratio}
Определяет долю оперативной памяти, доступную для использования сервером Clickhouse. Если сервер попытается использовать больше, предоставляемый ему объём памяти будет ограничен до расчётного значения.
Возможные значения:
- Положительное число с плавающей запятой.
- 0 — сервер Clickhouse может использовать всю оперативную память.
Значение по умолчанию: `0`.
**Использование**
На серверах с небольшим объёмом оперативной памяти и файла подкачки может потребоваться установить настройку `max_server_memory_usage_to_ram_ratio` в значение, большее 1.
**Пример**
``` xml
<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
```
**См. также**
- [max_server_memory_usage](#max_server_memory_usage)
## max\_connections {#max-connections}
Максимальное количество входящих соединений.

View File

@ -520,6 +520,31 @@ ClickHouse использует этот параметр при чтении д
Значение по умолчанию: 0.
## network_compression_method {#network_compression_method}
Задает метод сжатия данных, используемый при обмене данными между серверами и при обмене между сервером и [clickhouse-client](../../interfaces/cli.md).
Возможные значения:
- `LZ4` — устанавливает метод сжатия LZ4.
- `ZSTD` — устанавливает метод сжатия ZSTD.
Значение по умолчанию: `LZ4`.
См. также:
- [network_zstd_compression_level](#network_zstd_compression_level)
## network_zstd_compression_level {#network_zstd_compression_level}
Регулирует уровень сжатия ZSTD. Используется только тогда, когда [network_compression_method](#network_compression_method) имеет значение `ZSTD`.
Возможные значения:
- Положительное целое число от 1 до 15.
Значение по умолчанию: `1`.
## log\_queries {#settings-log-queries}
Установка логирования запроса.

View File

@ -33,4 +33,42 @@ SELECT alphaTokens('abca1abc')
└─────────────────────────┘
```
## extractAllGroups(text, regexp) {#extractallgroups}
Выделяет все группы из неперекрывающихся подстрок, которые соответствуют регулярному выражению.
**Синтаксис**
``` sql
extractAllGroups(text, regexp)
```
**Параметры**
- `text` — [String](../data-types/string.md) или [FixedString](../data-types/fixedstring.md).
- `regexp` — Регулярное выражение. Константа. [String](../data-types/string.md) или [FixedString](../data-types/fixedstring.md).
**Возвращаемые значения**
- Если найдена хотя бы одна подходящая группа, функция возвращает столбец вида `Array(Array(String))`, сгруппированный по идентификатору группы (от 1 до N, где N — количество групп с захватом содержимого в `regexp`).
- Если подходящих групп не найдено, возвращает пустой массив.
Тип: [Array](../data-types/array.md).
**Пример использования**
Запрос:
``` sql
SELECT extractAllGroups('abc=123, 8="hkl"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)');
```
Результат:
``` text
┌─extractAllGroups('abc=123, 8="hkl"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐
│ [['abc','123'],['8','"hkl"']] │
└───────────────────────────────────────────────────────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/splitting_merging_functions/) <!--hide-->

View File

@ -293,7 +293,7 @@ Examples of how this hierarchy is treated:
- The `MODIFY SETTING` privilege allows modifying table engine settings. It doesnt affect settings or server configuration parameters.
- The `ATTACH` operation needs the [CREATE](#grant-create) privilege.
- The `DETACH` operation needs the [DROP](#grant-drop) privilege.
- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/misc.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege.
- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/misc.md#kill-mutation-statement) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege.
### CREATE {#grant-create}
@ -312,7 +312,7 @@ Allows executing [CREATE](../../sql-reference/statements/create.md) and [ATTACH]
### DROP {#grant-drop}
Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH](../../sql-reference/statements/misc.md#detach) queries according to the following hierarchy of privileges:
Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH](../../sql-reference/statements/misc.md#detach-statement) queries according to the following hierarchy of privileges:
- `DROP`. Level:
- `DROP DATABASE`. Level: `DATABASE`

View File

@ -126,7 +126,7 @@ def adjust_markdown_html(content):
def minify_html(content):
return htmlmin.minify(content,
remove_comments=False,
remove_empty_space=True,
remove_empty_space=False,
remove_all_empty_space=False,
reduce_empty_attributes=True,
reduce_boolean_attributes=False,

View File

@ -62,8 +62,8 @@ Block CheckSortedBlockInputStream::readImpl()
else if (res > 0)
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Sort order of blocks violated for column {}, left: {}, right: {}.",
backQuoteIfNeed(elem.column_name),
"Sort order of blocks violated for column number {}, left: {}, right: {}.",
column_number,
applyVisitor(FieldVisitorDump(), (*left_col)[left_index]),
applyVisitor(FieldVisitorDump(), (*right_col)[right_index]));
}

View File

@ -89,7 +89,7 @@ endif()
target_link_libraries(clickhouse_functions PRIVATE hyperscan)
if(USE_SIMDJSON)
target_link_libraries(clickhouse_functions PRIVATE ${SIMDJSON_LIBRARY})
target_link_libraries(clickhouse_functions PRIVATE simdjson)
endif()
if(USE_RAPIDJSON)

View File

@ -1,6 +1,5 @@
#pragma once
#include <common/StringRef.h>
#include <Common/Exception.h>
#include <Core/Types.h>
@ -15,43 +14,73 @@ namespace ErrorCodes
/// It can't do anything useful and just throws an exception.
struct DummyJSONParser
{
static constexpr bool need_preallocate = false;
void preallocate(size_t) {}
class Array;
class Object;
bool parse(const StringRef &) { throw Exception{"Functions JSON* are not supported without AVX2", ErrorCodes::NOT_IMPLEMENTED}; }
class Element
{
public:
Element() {}
bool isInt64() const { return false; }
bool isUInt64() const { return false; }
bool isDouble() const { return false; }
bool isString() const { return false; }
bool isArray() const { return false; }
bool isObject() const { return false; }
bool isBool() const { return false; }
bool isNull() const { return false; }
using Iterator = std::nullptr_t;
Iterator getRoot() const { return nullptr; }
Int64 getInt64() const { return 0; }
UInt64 getUInt64() const { return 0; }
double getDouble() const { return 0; }
bool getBool() const { return false; }
std::string_view getString() const { return {}; }
Array getArray() const;
Object getObject() const;
};
static bool isInt64(const Iterator &) { return false; }
static bool isUInt64(const Iterator &) { return false; }
static bool isDouble(const Iterator &) { return false; }
static bool isString(const Iterator &) { return false; }
static bool isArray(const Iterator &) { return false; }
static bool isObject(const Iterator &) { return false; }
static bool isBool(const Iterator &) { return false; }
static bool isNull(const Iterator &) { return true; }
class Array
{
public:
class Iterator
{
public:
Element operator*() const { return {}; }
Iterator & operator ++() { return *this; }
Iterator operator ++(int) { return *this; }
friend bool operator ==(const Iterator &, const Iterator &) { return true; }
friend bool operator !=(const Iterator &, const Iterator &) { return false; }
};
static Int64 getInt64(const Iterator &) { return 0; }
static UInt64 getUInt64(const Iterator &) { return 0; }
static double getDouble(const Iterator &) { return 0; }
static bool getBool(const Iterator &) { return false; }
static StringRef getString(const Iterator &) { return {}; }
Iterator begin() const { return {}; }
Iterator end() const { return {}; }
size_t size() const { return 0; }
Element operator[](size_t) const { return {}; }
};
static size_t sizeOfArray(const Iterator &) { return 0; }
static bool firstArrayElement(Iterator &) { return false; }
static bool arrayElementByIndex(Iterator &, size_t) { return false; }
static bool nextArrayElement(Iterator &) { return false; }
class Object
{
public:
using KeyValuePair = std::pair<std::string_view, Element>;
static size_t sizeOfObject(const Iterator &) { return 0; }
static bool firstObjectMember(Iterator &) { return false; }
static bool firstObjectMember(Iterator &, StringRef &) { return false; }
static bool objectMemberByIndex(Iterator &, size_t) { return false; }
static bool objectMemberByName(Iterator &, const StringRef &) { return false; }
static bool nextObjectMember(Iterator &) { return false; }
static bool nextObjectMember(Iterator &, StringRef &) { return false; }
static bool isObjectMember(const Iterator &) { return false; }
static StringRef getKey(const Iterator &) { return {}; }
class Iterator
{
public:
KeyValuePair operator *() const { return {}; }
Iterator & operator ++() { return *this; }
Iterator operator ++(int) { return *this; }
friend bool operator ==(const Iterator &, const Iterator &) { return true; }
friend bool operator !=(const Iterator &, const Iterator &) { return false; }
};
Iterator begin() const { return {}; }
Iterator end() const { return {}; }
size_t size() const { return 0; }
KeyValuePair operator[](size_t) const { return {}; }
bool find(const std::string_view &, Element &) const { return false; }
};
bool parse(const std::string_view &, Element &) { throw Exception{"Functions JSON* are not supported", ErrorCodes::NOT_IMPLEMENTED}; }
};
}

View File

@ -4,6 +4,57 @@
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
std::vector<FunctionJSONHelpers::Move> FunctionJSONHelpers::prepareMoves(const char * function_name, Block & block, const ColumnNumbers & arguments, size_t first_index_argument, size_t num_index_arguments)
{
std::vector<Move> moves;
moves.reserve(num_index_arguments);
for (const auto i : ext::range(first_index_argument, first_index_argument + num_index_arguments))
{
const auto & column = block.getByPosition(arguments[i]);
if (!isString(column.type) && !isInteger(column.type))
throw Exception{"The argument " + std::to_string(i + 1) + " of function " + String(function_name)
+ " should be a string specifying key or an integer specifying index, illegal type: " + column.type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
if (column.column && isColumnConst(*column.column))
{
const auto & column_const = assert_cast<const ColumnConst &>(*column.column);
if (isString(column.type))
moves.emplace_back(MoveType::ConstKey, column_const.getValue<String>());
else
moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0));
}
else
{
if (isString(column.type))
moves.emplace_back(MoveType::Key, "");
else
moves.emplace_back(MoveType::Index, 0);
}
}
return moves;
}
size_t FunctionJSONHelpers::calculateMaxSize(const ColumnString::Offsets & offsets)
{
size_t max_size = 0;
for (const auto i : ext::range(0, offsets.size()))
{
size_t size = offsets[i] - offsets[i - 1];
if (max_size < size)
max_size = size;
}
if (max_size)
--max_size;
return max_size;
}
void registerFunctionsJSON(FunctionFactory & factory)
{

File diff suppressed because it is too large Load Diff

View File

@ -6,9 +6,7 @@
#if USE_RAPIDJSON
# include <Core/Types.h>
# include <Common/Exception.h>
# include <common/StringRef.h>
# include <common/defines.h>
# include <rapidjson/document.h>
@ -19,197 +17,130 @@ namespace DB
/// It provides ability to parse JSONs using rapidjson library.
struct RapidJSONParser
{
static constexpr bool need_preallocate = false;
void preallocate(size_t) {}
class Array;
class Object;
bool parse(const StringRef & json)
{
rapidjson::MemoryStream ms(json.data, json.size);
rapidjson::EncodedInputStream<rapidjson::UTF8<>, rapidjson::MemoryStream> is(ms);
document.ParseStream(is);
return !document.HasParseError() && (ms.Tell() == json.size);
}
struct Iterator
class Element
{
public:
Iterator() {}
Iterator(const rapidjson::Document & document_) : value(&document_) {}
Iterator(const Iterator & src)
: value(src.value)
, is_object_member(src.is_object_member)
, current_in_array(src.current_in_array)
, end_of_array(src.end_of_array) {}
ALWAYS_INLINE Element() {}
ALWAYS_INLINE Element(const rapidjson::Value & value_) : ptr(&value_) {}
Iterator & operator =(const Iterator & src)
ALWAYS_INLINE bool isInt64() const { return ptr->IsInt64(); }
ALWAYS_INLINE bool isUInt64() const { return ptr->IsUint64(); }
ALWAYS_INLINE bool isDouble() const { return ptr->IsDouble(); }
ALWAYS_INLINE bool isString() const { return ptr->IsString(); }
ALWAYS_INLINE bool isArray() const { return ptr->IsArray(); }
ALWAYS_INLINE bool isObject() const { return ptr->IsObject(); }
ALWAYS_INLINE bool isBool() const { return ptr->IsBool(); }
ALWAYS_INLINE bool isNull() const { return ptr->IsNull(); }
ALWAYS_INLINE Int64 getInt64() const { return ptr->GetInt64(); }
ALWAYS_INLINE UInt64 getUInt64() const { return ptr->GetUint64(); }
ALWAYS_INLINE double getDouble() const { return ptr->GetDouble(); }
ALWAYS_INLINE bool getBool() const { return ptr->GetBool(); }
ALWAYS_INLINE std::string_view getString() const { return {ptr->GetString(), ptr->GetStringLength()}; }
Array getArray() const;
Object getObject() const;
private:
const rapidjson::Value * ptr = nullptr;
};
class Array
{
public:
class Iterator
{
value = src.value;
is_object_member = src.is_object_member;
current_in_array = src.current_in_array;
end_of_array = src.end_of_array;
return *this;
public:
ALWAYS_INLINE Iterator(const rapidjson::Value::ConstValueIterator & it_) : it(it_) {}
ALWAYS_INLINE Element operator*() const { return *it; }
ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; }
ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; }
ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return left.it == right.it; }
ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return !(left == right); }
private:
rapidjson::Value::ConstValueIterator it;
};
ALWAYS_INLINE Array(const rapidjson::Value & value_) : ptr(&value_) {}
ALWAYS_INLINE Iterator begin() const { return ptr->Begin(); }
ALWAYS_INLINE Iterator end() const { return ptr->End(); }
ALWAYS_INLINE size_t size() const { return ptr->Size(); }
ALWAYS_INLINE Element operator[](size_t index) const { return *(ptr->Begin() + index); }
private:
const rapidjson::Value * ptr = nullptr;
};
class Object
{
public:
using KeyValuePair = std::pair<std::string_view, Element>;
class Iterator
{
public:
ALWAYS_INLINE Iterator(const rapidjson::Value::ConstMemberIterator & it_) : it(it_) {}
ALWAYS_INLINE KeyValuePair operator *() const { std::string_view key{it->name.GetString(), it->name.GetStringLength()}; return {key, it->value}; }
ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; }
ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; }
ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return left.it == right.it; }
ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return !(left == right); }
private:
rapidjson::Value::ConstMemberIterator it;
};
ALWAYS_INLINE Object(const rapidjson::Value & value_) : ptr(&value_) {}
ALWAYS_INLINE Iterator begin() const { return ptr->MemberBegin(); }
ALWAYS_INLINE Iterator end() const { return ptr->MemberEnd(); }
ALWAYS_INLINE size_t size() const { return ptr->MemberCount(); }
ALWAYS_INLINE KeyValuePair operator[](size_t index) const
{
auto it = ptr->MemberBegin() + index;
std::string_view key{it->name.GetString(), it->name.GetStringLength()};
return KeyValuePair{key, it->value};
}
bool isInt64() const { return value->IsInt64(); }
bool isUInt64() const { return value->IsUint64(); }
bool isDouble() const { return value->IsDouble(); }
bool isBool() const { return value->IsBool(); }
bool isString() const { return value->IsString(); }
bool isArray() const { return value->IsArray(); }
bool isObject() const { return value->IsObject(); }
bool isNull() const { return value->IsNull(); }
Int64 getInt64() const { return value->GetInt64(); }
UInt64 getUInt64() const { return value->GetUint64(); }
double getDouble() const { return value->GetDouble(); }
bool getBool() const { return value->GetBool(); }
StringRef getString() const { return {value->GetString(), value->GetStringLength()}; }
size_t sizeOfArray() const { return value->Size(); }
bool arrayElementByIndex(size_t index)
ALWAYS_INLINE bool find(const std::string_view & key, Element & result) const
{
if (index >= value->Size())
auto it = ptr->FindMember(rapidjson::StringRef(key.data(), key.length()));
if (it == ptr->MemberEnd())
return false;
setRange(value->Begin() + index, value->End());
value = current_in_array++;
result = it->value;
return true;
}
bool nextArrayElement()
{
if (current_in_array == end_of_array)
return false;
value = current_in_array++;
return true;
}
size_t sizeOfObject() const { return value->MemberCount(); }
bool objectMemberByIndex(size_t index)
{
if (index >= value->MemberCount())
return false;
setRange(value->MemberBegin() + index, value->MemberEnd());
value = &(current_in_object++)->value;
return true;
}
bool objectMemberByIndex(size_t index, StringRef & key)
{
if (index >= value->MemberCount())
return false;
setRange(value->MemberBegin() + index, value->MemberEnd());
key = getKeyImpl(current_in_object);
value = &(current_in_object++)->value;
return true;
}
bool objectMemberByName(const StringRef & name)
{
auto it = value->FindMember(name.data);
if (it == value->MemberEnd())
return false;
setRange(it, value->MemberEnd());
value = &(current_in_object++)->value;
return true;
}
bool nextObjectMember()
{
if (current_in_object == end_of_object)
return false;
value = &(current_in_object++)->value;
return true;
}
bool nextObjectMember(StringRef & key)
{
if (current_in_object == end_of_object)
return false;
key = getKeyImpl(current_in_object);
value = &(current_in_object++)->value;
return true;
}
bool isObjectMember() const { return is_object_member; }
StringRef getKey() const
{
return getKeyImpl(current_in_object - 1);
}
private:
void setRange(rapidjson::Value::ConstValueIterator current, rapidjson::Value::ConstValueIterator end)
{
current_in_array = &*current;
end_of_array = &*end;
is_object_member = false;
}
void setRange(rapidjson::Value::ConstMemberIterator current, rapidjson::Value::ConstMemberIterator end)
{
current_in_object = &*current;
end_of_object = &*end;
is_object_member = true;
}
static StringRef getKeyImpl(const rapidjson::GenericMember<rapidjson::UTF8<>, rapidjson::MemoryPoolAllocator<>> * member)
{
const auto & name = member->name;
return {name.GetString(), name.GetStringLength()};
}
const rapidjson::Value * value = nullptr;
bool is_object_member = false;
union
{
const rapidjson::GenericMember<rapidjson::UTF8<>, rapidjson::MemoryPoolAllocator<>> * current_in_object;
const rapidjson::Value * current_in_array;
};
union
{
const rapidjson::GenericMember<rapidjson::UTF8<>, rapidjson::MemoryPoolAllocator<>> * end_of_object;
const rapidjson::Value * end_of_array;
};
const rapidjson::Value * ptr = nullptr;
};
Iterator getRoot() { return Iterator{document}; }
static bool isInt64(const Iterator & it) { return it.isInt64(); }
static bool isUInt64(const Iterator & it) { return it.isUInt64(); }
static bool isDouble(const Iterator & it) { return it.isDouble(); }
static bool isBool(const Iterator & it) { return it.isBool(); }
static bool isString(const Iterator & it) { return it.isString(); }
static bool isArray(const Iterator & it) { return it.isArray(); }
static bool isObject(const Iterator & it) { return it.isObject(); }
static bool isNull(const Iterator & it) { return it.isNull(); }
static Int64 getInt64(const Iterator & it) { return it.getInt64(); }
static UInt64 getUInt64(const Iterator & it) { return it.getUInt64(); }
static double getDouble(const Iterator & it) { return it.getDouble(); }
static bool getBool(const Iterator & it) { return it.getBool(); }
static StringRef getString(const Iterator & it) { return it.getString(); }
static size_t sizeOfArray(const Iterator & it) { return it.sizeOfArray(); }
static bool firstArrayElement(Iterator & it) { return it.arrayElementByIndex(0); }
static bool arrayElementByIndex(Iterator & it, size_t index) { return it.arrayElementByIndex(index); }
static bool nextArrayElement(Iterator & it) { return it.nextArrayElement(); }
static size_t sizeOfObject(const Iterator & it) { return it.sizeOfObject(); }
static bool firstObjectMember(Iterator & it) { return it.objectMemberByIndex(0); }
static bool firstObjectMember(Iterator & it, StringRef & first_key) { return it.objectMemberByIndex(0, first_key); }
static bool objectMemberByIndex(Iterator & it, size_t index) { return it.objectMemberByIndex(index); }
static bool objectMemberByName(Iterator & it, const StringRef & name) { return it.objectMemberByName(name); }
static bool nextObjectMember(Iterator & it) { return it.nextObjectMember(); }
static bool nextObjectMember(Iterator & it, StringRef & next_key) { return it.nextObjectMember(next_key); }
static bool isObjectMember(const Iterator & it) { return it.isObjectMember(); }
static StringRef getKey(const Iterator & it) { return it.getKey(); }
bool parse(const std::string_view & json, Element & result)
{
rapidjson::MemoryStream ms(json.data(), json.size());
rapidjson::EncodedInputStream<rapidjson::UTF8<>, rapidjson::MemoryStream> is(ms);
document.ParseStream(is);
if (document.HasParseError() || (ms.Tell() != json.size()))
return false;
result = document;
return true;
}
private:
rapidjson::Document document;
};
inline ALWAYS_INLINE RapidJSONParser::Array RapidJSONParser::Element::getArray() const
{
return *ptr;
}
inline ALWAYS_INLINE RapidJSONParser::Object RapidJSONParser::Element::getObject() const
{
return *ptr;
}
}
#endif

View File

@ -7,9 +7,8 @@
#if USE_SIMDJSON
# include <Core/Types.h>
# include <Common/Exception.h>
# include <common/StringRef.h>
# include <simdjson/jsonparser.h>
# include <common/defines.h>
# include <simdjson.h>
namespace DB
@ -23,121 +22,138 @@ namespace ErrorCodes
/// It provides ability to parse JSONs using simdjson library.
struct SimdJSONParser
{
static constexpr bool need_preallocate = true;
class Array;
class Object;
void preallocate(size_t max_size)
class Element
{
if (!pj.allocate_capacity(max_size))
throw Exception{"Can not allocate memory for " + std::to_string(max_size) + " units when parsing JSON",
public:
ALWAYS_INLINE Element() {}
ALWAYS_INLINE Element(const simdjson::dom::element & element_) : element(element_) {}
ALWAYS_INLINE bool isInt64() const { return element.type() == simdjson::dom::element_type::INT64; }
ALWAYS_INLINE bool isUInt64() const { return element.type() == simdjson::dom::element_type::UINT64; }
ALWAYS_INLINE bool isDouble() const { return element.type() == simdjson::dom::element_type::DOUBLE; }
ALWAYS_INLINE bool isString() const { return element.type() == simdjson::dom::element_type::STRING; }
ALWAYS_INLINE bool isArray() const { return element.type() == simdjson::dom::element_type::ARRAY; }
ALWAYS_INLINE bool isObject() const { return element.type() == simdjson::dom::element_type::OBJECT; }
ALWAYS_INLINE bool isBool() const { return element.type() == simdjson::dom::element_type::BOOL; }
ALWAYS_INLINE bool isNull() const { return element.type() == simdjson::dom::element_type::NULL_VALUE; }
ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().first; }
ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().first; }
ALWAYS_INLINE double getDouble() const { return element.get_double().first; }
ALWAYS_INLINE bool getBool() const { return element.get_bool().first; }
ALWAYS_INLINE std::string_view getString() const { return element.get_string().first; }
ALWAYS_INLINE Array getArray() const;
ALWAYS_INLINE Object getObject() const;
private:
simdjson::dom::element element;
};
class Array
{
public:
class Iterator
{
public:
ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator & it_) : it(it_) {}
ALWAYS_INLINE Element operator *() const { return *it; }
ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; }
ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; }
ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return left.it != right.it; }
ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return !(left != right); }
private:
simdjson::dom::array::iterator it;
};
ALWAYS_INLINE Array(const simdjson::dom::array & array_) : array(array_) {}
ALWAYS_INLINE Iterator begin() const { return array.begin(); }
ALWAYS_INLINE Iterator end() const { return array.end(); }
ALWAYS_INLINE size_t size() const { return array.size(); }
ALWAYS_INLINE Element operator[](size_t index) const { return array.at(index).first; }
private:
simdjson::dom::array array;
};
class Object
{
public:
using KeyValuePair = std::pair<std::string_view, Element>;
class Iterator
{
public:
ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator & it_) : it(it_) {}
ALWAYS_INLINE KeyValuePair operator *() const { const auto & res = *it; return {res.key, res.value}; }
ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; }
ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; }
ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return left.it != right.it; }
ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return !(left != right); }
private:
simdjson::dom::object::iterator it;
};
ALWAYS_INLINE Object(const simdjson::dom::object & object_) : object(object_) {}
ALWAYS_INLINE Iterator begin() const { return object.begin(); }
ALWAYS_INLINE Iterator end() const { return object.end(); }
ALWAYS_INLINE size_t size() const { return object.size(); }
KeyValuePair operator [](size_t index) const
{
Iterator it = begin();
while (index--)
++it;
return *it;
}
ALWAYS_INLINE bool find(const std::string_view & key, Element & result) const
{
auto x = object.at_key(key);
if (x.error())
return false;
result = x.first;
return true;
}
private:
simdjson::dom::object object;
};
void reserve(size_t max_size)
{
if (parser.allocate(max_size) != simdjson::error_code::SUCCESS)
throw Exception{"Couldn't allocate " + std::to_string(max_size) + " bytes when parsing JSON",
ErrorCodes::CANNOT_ALLOCATE_MEMORY};
}
bool parse(const StringRef & json) { return !json_parse(json.data, json.size, pj); }
using Iterator = simdjson::ParsedJson::Iterator;
Iterator getRoot() { return Iterator{pj}; }
static bool isInt64(const Iterator & it) { return it.is_integer(); }
static bool isUInt64(const Iterator &) { return false; /* See https://github.com/lemire/simdjson/issues/68 */ }
static bool isDouble(const Iterator & it) { return it.is_double(); }
static bool isString(const Iterator & it) { return it.is_string(); }
static bool isArray(const Iterator & it) { return it.is_array(); }
static bool isObject(const Iterator & it) { return it.is_object(); }
static bool isBool(const Iterator & it) { return it.get_type() == 't' || it.get_type() == 'f'; }
static bool isNull(const Iterator & it) { return it.is_null(); }
static Int64 getInt64(const Iterator & it) { return it.get_integer(); }
static UInt64 getUInt64(const Iterator &) { return 0; /* isUInt64() never returns true */ }
static double getDouble(const Iterator & it) { return it.get_double(); }
static bool getBool(const Iterator & it) { return it.get_type() == 't'; }
static StringRef getString(const Iterator & it) { return StringRef{it.get_string(), it.get_string_length()}; }
static size_t sizeOfArray(const Iterator & it)
bool parse(const std::string_view & json, Element & result)
{
size_t size = 0;
Iterator it2 = it;
if (it2.down())
{
do
++size;
while (it2.next());
}
return size;
}
static bool firstArrayElement(Iterator & it) { return it.down(); }
static bool arrayElementByIndex(Iterator & it, size_t index)
{
if (!it.down())
auto document = parser.parse(json.data(), json.size());
if (document.error())
return false;
while (index--)
if (!it.next())
return false;
result = document.first;
return true;
}
static bool nextArrayElement(Iterator & it) { return it.next(); }
static size_t sizeOfObject(const Iterator & it)
{
size_t size = 0;
Iterator it2 = it;
if (it2.down())
{
do
++size;
while (it2.next() && it2.next()); //-V501
}
return size;
}
static bool firstObjectMember(Iterator & it) { return it.down() && it.next(); }
static bool firstObjectMember(Iterator & it, StringRef & first_key)
{
if (!it.down())
return false;
first_key.data = it.get_string();
first_key.size = it.get_string_length();
return it.next();
}
static bool objectMemberByIndex(Iterator & it, size_t index)
{
if (!it.down())
return false;
while (index--)
if (!it.next() || !it.next()) //-V501
return false;
return it.next();
}
static bool objectMemberByName(Iterator & it, const StringRef & name) { return it.move_to_key(name.data); }
static bool nextObjectMember(Iterator & it) { return it.next() && it.next(); } //-V501
static bool nextObjectMember(Iterator & it, StringRef & next_key)
{
if (!it.next())
return false;
next_key.data = it.get_string();
next_key.size = it.get_string_length();
return it.next();
}
static bool isObjectMember(const Iterator & it) { return it.get_scope_type() == '{'; }
static StringRef getKey(const Iterator & it)
{
Iterator it2 = it;
it2.prev();
return StringRef{it2.get_string(), it2.get_string_length()};
}
private:
simdjson::ParsedJson pj;
simdjson::dom::parser parser;
};
inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const
{
return element.get_array().first;
}
inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const
{
return element.get_object().first;
}
}
#endif

View File

@ -89,18 +89,22 @@ inline void writeStringBinary(const std::string & s, WriteBuffer & buf)
buf.write(s.data(), s.size());
}
inline void writeStringBinary(const char * s, WriteBuffer & buf)
{
writeVarUInt(strlen(s), buf);
buf.write(s, strlen(s));
}
inline void writeStringBinary(const StringRef & s, WriteBuffer & buf)
{
writeVarUInt(s.size, buf);
buf.write(s.data, s.size);
}
inline void writeStringBinary(const char * s, WriteBuffer & buf)
{
writeStringBinary(StringRef{s}, buf);
}
inline void writeStringBinary(const std::string_view & s, WriteBuffer & buf)
{
writeStringBinary(StringRef{s}, buf);
}
template <typename T>
void writeVectorBinary(const std::vector<T> & v, WriteBuffer & buf)
@ -413,15 +417,19 @@ void writeAnyEscapedString(const char * begin, const char * end, WriteBuffer & b
}
inline void writeJSONString(const String & s, WriteBuffer & buf, const FormatSettings & settings)
inline void writeJSONString(const StringRef & s, WriteBuffer & buf, const FormatSettings & settings)
{
writeJSONString(s.data(), s.data() + s.size(), buf, settings);
writeJSONString(s.data, s.data + s.size, buf, settings);
}
inline void writeJSONString(const StringRef & ref, WriteBuffer & buf, const FormatSettings & settings)
inline void writeJSONString(const std::string_view & s, WriteBuffer & buf, const FormatSettings & settings)
{
writeJSONString(ref.data, ref.data + ref.size, buf, settings);
writeJSONString(StringRef{s}, buf, settings);
}
inline void writeJSONString(const String & s, WriteBuffer & buf, const FormatSettings & settings)
{
writeJSONString(StringRef{s}, buf, settings);
}

View File

@ -47,6 +47,10 @@ void MarkTableIdentifiersMatcher::visit(const ASTFunction & func, ASTPtr &, Data
// First argument of dictGet can be a dictionary name, perhaps with a database.
if (functionIsJoinGet(func.name) || functionIsDictGet(func.name))
{
if (func.arguments->children.empty())
{
return;
}
auto & ast = func.arguments->children.at(0);
auto opt_name = tryGetIdentifierName(ast);
if (opt_name && !data.aliases.count(*opt_name))

View File

@ -313,6 +313,18 @@ void TCPHandler::runImpl()
state.io.onException();
exception.emplace(Exception::CreateFromPocoTag{}, e);
}
// Server should die on std logic errors in debug, like with assert()
// or ErrorCodes::LOGICAL_ERROR. This helps catch these errors in
// tests.
#ifndef NDEBUG
catch (const std::logic_error & e)
{
state.io.onException();
exception.emplace(Exception::CreateFromSTDTag{}, e);
sendException(*exception, send_exception_with_stack_trace);
std::abort();
}
#endif
catch (const std::exception & e)
{
state.io.onException();

View File

@ -8,6 +8,7 @@
#include <IO/WriteHelpers.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/localBackup.h>
#include <Storages/MergeTree/checkDataPart.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/escapeForFileName.h>
#include <common/JSON.h>
@ -521,7 +522,18 @@ void IMergeTreeDataPart::loadChecksums(bool require)
if (require)
throw Exception("No checksums.txt in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
bytes_on_disk = calculateTotalSizeOnDisk(volume->getDisk(), getFullRelativePath());
/// If the checksums file is not present, calculate the checksums and write them to disk.
/// Check the data while we are at it.
LOG_WARNING(storage.log, "Checksums for part {} not found. Will calculate them from data on disk.", name);
checksums = checkDataPart(shared_from_this(), false);
{
auto out = volume->getDisk()->writeFile(getFullRelativePath() + "checksums.txt.tmp", 4096);
checksums.write(*out);
}
volume->getDisk()->moveFile(getFullRelativePath() + "checksums.txt.tmp", getFullRelativePath() + "checksums.txt");
bytes_on_disk = checksums.getTotalSizeOnDisk();
}
}

View File

@ -2453,19 +2453,6 @@ static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part)
part->loadColumnsChecksumsIndexes(false, true);
part->modification_time = disk->getLastModified(full_part_path).epochTime();
/// If the checksums file is not present, calculate the checksums and write them to disk.
/// Check the data while we are at it.
if (part->checksums.empty())
{
part->checksums = checkDataPart(part, false);
{
auto out = disk->writeFile(full_part_path + "checksums.txt.tmp", 4096);
part->checksums.write(*out);
}
disk->moveFile(full_part_path + "checksums.txt.tmp", full_part_path + "checksums.txt");
}
}
MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const VolumePtr & volume, const String & relative_path) const

View File

@ -0,0 +1,39 @@
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1')
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_attach_without_checksums(start_cluster):
node1.query("CREATE TABLE test (date Date, key Int32, value String) Engine=MergeTree ORDER BY key PARTITION by date")
node1.query("INSERT INTO test SELECT toDate('2019-10-01'), number, toString(number) FROM numbers(100)")
assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n"
node1.query("ALTER TABLE test DETACH PARTITION '2019-10-01'")
assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "0\n"
assert node1.query("SELECT COUNT() FROM test") == "0\n"
# to be sure output not empty
node1.exec_in_container(['bash', '-c', 'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" | grep -e ".*" '], privileged=True, user='root')
node1.exec_in_container(['bash', '-c', 'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" -delete'], privileged=True, user='root')
node1.query("ALTER TABLE test ATTACH PARTITION '2019-10-01'")
assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n"
assert node1.query("SELECT COUNT() FROM test") == "100\n"

View File

@ -74,7 +74,7 @@ body[data-spy] #content {
#content pre {
background: #eee;
background: #efefef;
padding: 1rem;
}