Merge branch 'master' into database_atomic_improvements

This commit is contained in:
Alexander Tokmakov 2020-07-22 05:24:48 +03:00
commit 9bcaaea3e0
135 changed files with 3128 additions and 1513 deletions

View File

@ -16,4 +16,4 @@ ClickHouse is an open-source column-oriented database management system that all
## Upcoming Events
* [ClickHouse at ByteDance (in Chinese)](https://mp.weixin.qq.com/s/Em-HjPylO8D7WPui4RREAQ) on July 17, 2020.
* [ClickHouse at ByteDance (in Chinese)](https://mp.weixin.qq.com/s/Em-HjPylO8D7WPui4RREAQ) on July 31, 2020.

View File

@ -30,7 +30,7 @@ struct StringRef
constexpr StringRef(const CharT * data_, size_t size_) : data(reinterpret_cast<const char *>(data_)), size(size_) {}
StringRef(const std::string & s) : data(s.data()), size(s.size()) {}
constexpr StringRef(const std::string_view & s) : data(s.data()), size(s.size()) {}
constexpr explicit StringRef(const std::string_view & s) : data(s.data()), size(s.size()) {}
constexpr StringRef(const char * data_) : StringRef(std::string_view{data_}) {}
constexpr StringRef() = default;

View File

@ -1,17 +1,8 @@
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include/simdjson/jsonparser.h")
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include/simdjson.h")
message (WARNING "submodule contrib/simdjson is missing. to fix try run: \n git submodule update --init --recursive")
return()
endif ()
if (NOT HAVE_SSE42)
message (WARNING "submodule contrib/simdjson requires support of SSE4.2 instructions")
return()
elseif (NOT HAVE_PCLMULQDQ)
message (WARNING "submodule contrib/simdjson requires support of PCLMULQDQ instructions")
return()
endif ()
option (USE_SIMDJSON "Use simdjson" ON)
set (SIMDJSON_LIBRARY "simdjson")
message(STATUS "Using simdjson=${USE_SIMDJSON}: ${SIMDJSON_LIBRARY}")
message(STATUS "Using simdjson=${USE_SIMDJSON}")

2
contrib/simdjson vendored

@ -1 +1 @@
Subproject commit 560f0742cc0895d00d78359dbdeb82064a24adb8
Subproject commit 1e4aa116e5a39e4ba23b9a93e6c7f048c5105b20

View File

@ -1,14 +1,6 @@
set(SIMDJSON_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/include")
set(SIMDJSON_SRC_DIR "${SIMDJSON_INCLUDE_DIR}/../src")
set(SIMDJSON_SRC
${SIMDJSON_SRC_DIR}/document.cpp
${SIMDJSON_SRC_DIR}/error.cpp
${SIMDJSON_SRC_DIR}/implementation.cpp
${SIMDJSON_SRC_DIR}/jsonioutil.cpp
${SIMDJSON_SRC_DIR}/jsonminifier.cpp
${SIMDJSON_SRC_DIR}/stage1_find_marks.cpp
${SIMDJSON_SRC_DIR}/stage2_build_tape.cpp
)
set(SIMDJSON_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/simdjson/src")
set(SIMDJSON_SRC ${SIMDJSON_SRC_DIR}/simdjson.cpp)
add_library(${SIMDJSON_LIBRARY} ${SIMDJSON_SRC})
target_include_directories(${SIMDJSON_LIBRARY} SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}" PRIVATE "${SIMDJSON_SRC_DIR}")
add_library(simdjson ${SIMDJSON_SRC})
target_include_directories(simdjson SYSTEM PUBLIC "${SIMDJSON_INCLUDE_DIR}" PRIVATE "${SIMDJSON_SRC_DIR}")

View File

@ -90,7 +90,7 @@ do
sleep 0.1
done
TESTS_TO_SKIP="parquet avro h3 odbc mysql sha256 _orc_ arrow 01098_temporary_and_external_tables 01083_expressions_in_engine_arguments hdfs 00911_tautological_compare protobuf capnproto java_hash hashing secure 00490_special_line_separators_and_characters_outside_of_bmp 00436_convert_charset 00105_shard_collations 01354_order_by_tuple_collate_const 01292_create_user 01098_msgpack_format 00929_multi_match_edit_distance 00926_multimatch 00834_cancel_http_readonly_queries_on_client_close brotli parallel_alter 00302_http_compression 00417_kill_query 01294_lazy_database_concurrent 01193_metadata_loading base64 01031_mutations_interpreter_and_context json client 01305_replica_create_drop_zookeeper 01092_memory_profiler 01355_ilike 01281_unsucceeded_insert_select_queries_counter live_view limit_memory memory_limit memory_leak 00110_external_sort 00682_empty_parts_merge 00701_rollup 00109_shard_totals_after_having"
TESTS_TO_SKIP="parquet avro h3 odbc mysql sha256 _orc_ arrow 01098_temporary_and_external_tables 01083_expressions_in_engine_arguments hdfs 00911_tautological_compare protobuf capnproto java_hash hashing secure 00490_special_line_separators_and_characters_outside_of_bmp 00436_convert_charset 00105_shard_collations 01354_order_by_tuple_collate_const 01292_create_user 01098_msgpack_format 00929_multi_match_edit_distance 00926_multimatch 00834_cancel_http_readonly_queries_on_client_close brotli parallel_alter 00302_http_compression 00417_kill_query 01294_lazy_database_concurrent 01193_metadata_loading base64 01031_mutations_interpreter_and_context json client 01305_replica_create_drop_zookeeper 01092_memory_profiler 01355_ilike 01281_unsucceeded_insert_select_queries_counter live_view limit_memory memory_limit memory_leak 00110_external_sort 00682_empty_parts_merge 00701_rollup 00109_shard_totals_after_having ddl_dictionaries 01251_dict_is_in_infinite_loop 01259_dictionary_custom_settings_ddl 01268_dictionary_direct_layout 01280_ssd_complex_key_dictionary 00652_replicated_mutations_zookeeper"
clickhouse-test -j 4 --no-long --testname --shard --zookeeper --skip $TESTS_TO_SKIP 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee /test_output/test_log.txt

View File

@ -37,6 +37,8 @@ function download
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-10_debug_none_bundled_unsplitted_disable_False_binary/clickhouse"
chmod +x clickhouse
ln -s ./clickhouse ./clickhouse-server
ln -s ./clickhouse ./clickhouse-client
}
function configure
@ -45,7 +47,8 @@ function configure
mkdir db ||:
cp -av "$repo_dir"/programs/server/config* db
cp -av "$repo_dir"/programs/server/user* db
cp -av "$repo_dir"/tests/config db/config.d
# TODO figure out which ones are needed
cp -av "$repo_dir"/tests/config/listen.xml db/config.d
cp -av "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
}
@ -54,31 +57,55 @@ function watchdog
sleep 3600
echo "Fuzzing run has timed out"
./clickhouse client --query "select elapsed, query from system.processes" ||:
killall -9 clickhouse clickhouse-server clickhouse-client ||:
killall clickhouse-client ||:
for x in {1..10}
do
if ! pgrep -f clickhouse-client
then
break
fi
sleep 1
done
killall -9 clickhouse-client ||:
}
function fuzz
{
./clickhouse server --config-file db/config.xml -- --path db 2>&1 | tail -100000 > server.log &
./clickhouse-server --config-file db/config.xml -- --path db 2>&1 | tail -100000 > server.log &
server_pid=$!
kill -0 $server_pid
while ! ./clickhouse client --query "select 1" && kill -0 $server_pid ; do echo . ; sleep 1 ; done
./clickhouse client --query "select 1"
while ! ./clickhouse-client --query "select 1" && kill -0 $server_pid ; do echo . ; sleep 1 ; done
./clickhouse-client --query "select 1"
kill -0 $server_pid
echo Server started
fuzzer_exit_code=0
./clickhouse client --query-fuzzer-runs=1000 \
./clickhouse-client --query-fuzzer-runs=1000 \
< <(for f in $(ls ch/tests/queries/0_stateless/*.sql | sort -R); do cat "$f"; echo ';'; done) \
> >(tail -100000 > fuzzer.log) \
2>&1 \
|| fuzzer_exit_code=$?
echo "Fuzzer exit code is $fuzzer_exit_code"
./clickhouse client --query "select elapsed, query from system.processes" ||:
kill -9 $server_pid ||:
return $fuzzer_exit_code
./clickhouse-client --query "select elapsed, query from system.processes" ||:
killall clickhouse-server ||:
for x in {1..10}
do
if ! pgrep -f clickhouse-server
then
break
fi
sleep 1
done
killall -9 clickhouse-server ||:
if [ "$fuzzer_exit_code" == "143" ]
then
# Killed by watchdog, meaning, no errors.
fuzzer_exit_code=0
fi
}
case "$stage" in
@ -106,11 +133,19 @@ case "$stage" in
time configure
;&
"fuzz")
# Start a watchdog that should kill the fuzzer on timeout.
# The shell won't kill the child sleep when we kill it, so we have to put it
# into a separate process group so that we can kill them all.
set -m
watchdog &
watchdog_pid=$!
set +m
# Check that the watchdog has started
kill -0 $watchdog_pid
fuzzer_exit_code=0
time fuzz || fuzzer_exit_code=$?
kill $watchdog_pid ||:
kill -- -$watchdog_pid ||:
# Debug
date
@ -118,6 +153,19 @@ case "$stage" in
jobs
pstree -aspgT
# Make files with status and description we'll show for this check on Github
if [ "$fuzzer_exit_code" == 0 ]
then
echo "OK" > description.txt
echo "success" > status.txt
else
echo "failure" > status.txt
if ! grep -a "received signal \|Logical error" server.log > description.txt
then
echo "Fuzzer exit code $fuzzer_exit_code. See the logs" > description.txt
fi
fi
exit $fuzzer_exit_code
;&
esac

View File

@ -1,25 +0,0 @@
{
"checkYo": false,
"excludeFiles": [],
"fileExtensions": [],
"format": "auto",
"ignoreTags": [
"code",
"kbd",
"object",
"samp",
"script",
"style",
"var"
],
"maxRequests": 2,
"lang": "en,ru",
"report": ["console"],
"dictionary": [
"(C|c)lick(H|h)ouse",
"CatBoost",
"(Ш|ш)ард(ы|ов|а|у|е|ам|ирование|ированы|ах)?",
"логир(ование|уются|ования)?",
"конфиг(а|е|ом|у)"
]
}

View File

@ -1,6 +1,6 @@
---
toc_priority: 71
toc_title: Source Code
toc_title: Source Code Browser
---
# Browse ClickHouse Source Code {#browse-clickhouse-source-code}

View File

@ -1,6 +1,6 @@
---
toc_priority: 67
toc_title: How to Build ClickHouse on Linux for AARCH64 (ARM64)
toc_title: Build on Linux for AARCH64 (ARM64)
---
# How to Build ClickHouse on Linux for AARCH64 (ARM64) Architecture {#how-to-build-clickhouse-on-linux-for-aarch64-arm64-architecture}
@ -9,7 +9,7 @@ This is for the case when you have Linux machine and want to use it to build `cl
The cross-build for AARCH64 is based on the [Build instructions](../development/build.md), follow them first.
# Install Clang-8 {#install-clang-8}
## Install Clang-8 {#install-clang-8}
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
For example, in Ubuntu Bionic you can use the following commands:
@ -20,7 +20,7 @@ sudo apt-get update
sudo apt-get install clang-8
```
# Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
``` bash
cd ClickHouse
@ -29,7 +29,7 @@ wget 'https://developer.arm.com/-/media/Files/downloads/gnu-a/8.3-2019.03/binrel
tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build-aarch64/cmake/toolchain/linux-aarch64 --strip-components=1
```
# Build ClickHouse {#build-clickhouse}
## Build ClickHouse {#build-clickhouse}
``` bash
cd ClickHouse

View File

@ -1,6 +1,6 @@
---
toc_priority: 66
toc_title: How to Build ClickHouse on Linux for Mac OS X
toc_title: Build on Linux for Mac OS X
---
# How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x}
@ -9,7 +9,7 @@ This is for the case when you have Linux machine and want to use it to build `cl
The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first.
# Install Clang-8 {#install-clang-8}
## Install Clang-8 {#install-clang-8}
Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup.
For example the commands for Bionic are like:
@ -19,7 +19,7 @@ sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8
sudo apt-get install clang-8
```
# Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
## Install Cross-Compilation Toolset {#install-cross-compilation-toolset}
Lets remember the path where we install `cctools` as ${CCTOOLS}
@ -47,7 +47,7 @@ mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
```
# Build ClickHouse {#build-clickhouse}
## Build ClickHouse {#build-clickhouse}
``` bash
cd ClickHouse

View File

@ -1,6 +1,6 @@
---
toc_priority: 65
toc_title: How to Build ClickHouse on Mac OS X
toc_title: Build on Mac OS X
---
# How to Build ClickHouse on Mac OS X {#how-to-build-clickhouse-on-mac-os-x}
@ -45,14 +45,12 @@ $ cd ..
## Caveats {#caveats}
If you intend to run clickhouse-server, make sure to increase the systems maxfiles variable.
If you intend to run `clickhouse-server`, make sure to increase the systems maxfiles variable.
!!! info "Note"
Youll need to use sudo.
To do so, create the following file:
/Library/LaunchDaemons/limit.maxfiles.plist:
To do so, create the `/Library/LaunchDaemons/limit.maxfiles.plist` file with the following content:
``` xml
<?xml version="1.0" encoding="UTF-8"?>

View File

@ -1,11 +1,9 @@
---
toc_priority: 64
toc_title: How to Build ClickHouse on Linux
toc_title: Build on Linux
---
# How to Build ClickHouse for Development {#how-to-build-clickhouse-for-development}
The following tutorial is based on the Ubuntu Linux system. With appropriate changes, it should also work on any other Linux distribution.
# How to Build ClickHouse on Linux {#how-to-build-clickhouse-for-development}
Supported platforms:
@ -13,7 +11,11 @@ Supported platforms:
- AArch64
- Power9 (experimental)
## Install Git, CMake, Python and Ninja {#install-git-cmake-python-and-ninja}
## Normal Build for Development on Ubuntu
The following tutorial is based on the Ubuntu Linux system. With appropriate changes, it should also work on any other Linux distribution.
### Install Git, CMake, Python and Ninja {#install-git-cmake-python-and-ninja}
``` bash
$ sudo apt-get install git cmake python ninja-build
@ -21,18 +23,18 @@ $ sudo apt-get install git cmake python ninja-build
Or cmake3 instead of cmake on older systems.
## Install GCC 9 {#install-gcc-9}
### Install GCC 9 {#install-gcc-9}
There are several ways to do this.
### Install from Repository {#install-from-repository}
#### Install from Repository {#install-from-repository}
On Ubuntu 19.10 or newer:
$ sudo apt-get update
$ sudo apt-get install gcc-9 g++-9
### Install from a PPA Package {#install-from-a-ppa-package}
#### Install from a PPA Package {#install-from-a-ppa-package}
On older Ubuntu:
@ -43,18 +45,18 @@ $ sudo apt-get update
$ sudo apt-get install gcc-9 g++-9
```
### Install from Sources {#install-from-sources}
#### Install from Sources {#install-from-sources}
See [utils/ci/build-gcc-from-sources.sh](https://github.com/ClickHouse/ClickHouse/blob/master/utils/ci/build-gcc-from-sources.sh)
## Use GCC 9 for Builds {#use-gcc-9-for-builds}
### Use GCC 9 for Builds {#use-gcc-9-for-builds}
``` bash
$ export CC=gcc-9
$ export CXX=g++-9
```
## Checkout ClickHouse Sources {#checkout-clickhouse-sources}
### Checkout ClickHouse Sources {#checkout-clickhouse-sources}
``` bash
$ git clone --recursive git@github.com:ClickHouse/ClickHouse.git
@ -66,7 +68,7 @@ or
$ git clone --recursive https://github.com/ClickHouse/ClickHouse.git
```
## Build ClickHouse {#build-clickhouse}
### Build ClickHouse {#build-clickhouse}
``` bash
$ cd ClickHouse
@ -79,7 +81,7 @@ $ ninja
To create an executable, run `ninja clickhouse`.
This will create the `programs/clickhouse` executable, which can be used with `client` or `server` arguments.
# How to Build ClickHouse on Any Linux {#how-to-build-clickhouse-on-any-linux}
## How to Build ClickHouse on Any Linux {#how-to-build-clickhouse-on-any-linux}
The build requires the following components:
@ -93,32 +95,58 @@ The build requires the following components:
If all the components are installed, you may build in the same way as the steps above.
Example for Ubuntu Eoan:
sudo apt update
sudo apt install git cmake ninja-build g++ python
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build
cmake ../ClickHouse
ninja
``` bash
sudo apt update
sudo apt install git cmake ninja-build g++ python
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build
cmake ../ClickHouse
ninja
```
Example for OpenSUSE Tumbleweed:
sudo zypper install git cmake ninja gcc-c++ python lld
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build
cmake ../ClickHouse
ninja
``` bash
sudo zypper install git cmake ninja gcc-c++ python lld
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build
cmake ../ClickHouse
ninja
```
Example for Fedora Rawhide:
``` bash
sudo yum update
yum --nogpg install git cmake make gcc-c++ python2
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build
cmake ../ClickHouse
make -j $(nproc)
```
sudo yum update
yum --nogpg install git cmake make gcc-c++ python2
git clone --recursive https://github.com/ClickHouse/ClickHouse.git
mkdir build && cd build
cmake ../ClickHouse
make -j $(nproc)
# You Dont Have to Build ClickHouse {#you-dont-have-to-build-clickhouse}
## How to Build ClickHouse Debian Package {#how-to-build-clickhouse-debian-package}
### Install Git and Pbuilder {#install-git-and-pbuilder}
``` bash
$ sudo apt-get update
$ sudo apt-get install git python pbuilder debhelper lsb-release fakeroot sudo debian-archive-keyring debian-keyring
```
### Checkout ClickHouse Sources {#checkout-clickhouse-sources-1}
``` bash
$ git clone --recursive --branch master https://github.com/ClickHouse/ClickHouse.git
$ cd ClickHouse
```
### Run Release Script {#run-release-script}
``` bash
$ ./release
```
## You Dont Have to Build ClickHouse {#you-dont-have-to-build-clickhouse}
ClickHouse is available in pre-built binaries and packages. Binaries are portable and can be run on any Linux flavour.
@ -126,26 +154,4 @@ They are built for stable, prestable and testing releases as long as for every c
To find the freshest build from `master`, go to [commits page](https://github.com/ClickHouse/ClickHouse/commits/master), click on the first green checkmark or red cross near commit, and click to the “Details” link right after “ClickHouse Build Check”.
# How to Build ClickHouse Debian Package {#how-to-build-clickhouse-debian-package}
## Install Git and Pbuilder {#install-git-and-pbuilder}
``` bash
$ sudo apt-get update
$ sudo apt-get install git python pbuilder debhelper lsb-release fakeroot sudo debian-archive-keyring debian-keyring
```
## Checkout ClickHouse Sources {#checkout-clickhouse-sources-1}
``` bash
$ git clone --recursive --branch master https://github.com/ClickHouse/ClickHouse.git
$ cd ClickHouse
```
## Run Release Script {#run-release-script}
``` bash
$ ./release
```
[Original article](https://clickhouse.tech/docs/en/development/build/) <!--hide-->

View File

@ -35,6 +35,7 @@ toc_title: Third-Party Libraries Used
| poco | [Boost Software License - Version 1.0](https://github.com/ClickHouse-Extras/poco/blob/fe5505e56c27b6ecb0dcbc40c49dc2caf4e9637f/LICENSE) |
| protobuf | [BSD 3-Clause License](https://github.com/ClickHouse-Extras/protobuf/blob/12735370922a35f03999afff478e1c6d7aa917a4/LICENSE) |
| re2 | [BSD 3-Clause License](https://github.com/google/re2/blob/7cf8b88e8f70f97fd4926b56aa87e7f53b2717e0/LICENSE) |
| sentry-native | [MIT License](https://github.com/getsentry/sentry-native/blob/master/LICENSE) |
| UnixODBC | [LGPL v2.1](https://github.com/ClickHouse-Extras/UnixODBC/tree/b0ad30f7f6289c12b76f04bfb9d466374bb32168) |
| zlib-ng | [Zlib License](https://github.com/ClickHouse-Extras/zlib-ng/blob/develop/LICENSE.md) |
| zstd | [BSD 3-Clause License](https://github.com/facebook/zstd/blob/dev/LICENSE) |

View File

@ -1,6 +1,6 @@
---
toc_priority: 68
toc_title: How to Write C++ Code
toc_title: C++ Guide
---
# How to Write C++ Code {#how-to-write-c-code}

View File

@ -1,6 +1,6 @@
---
toc_priority: 69
toc_title: How to Run ClickHouse Tests
toc_title: Testing
---
# ClickHouse Testing {#clickhouse-testing}
@ -25,12 +25,7 @@ Tests should use (create, drop, etc) only tables in `test` database that is assu
If you want to use distributed queries in functional tests, you can leverage `remote` table function with `127.0.0.{1..2}` addresses for the server to query itself; or you can use predefined test clusters in server configuration file like `test_shard_localhost`.
Some tests are marked with `zookeeper`, `shard` or `long` in their names.
`zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that
requires server to listen `127.0.0.*`; `distributed` or `global` have the same
meaning. `long` is for tests that run slightly longer that one second. You can
disable these groups of tests using `--no-zookeeper`, `--no-shard` and
`--no-long` options, respectively.
Some tests are marked with `zookeeper`, `shard` or `long` in their names. `zookeeper` is for tests that are using ZooKeeper. `shard` is for tests that requires server to listen `127.0.0.*`; `distributed` or `global` have the same meaning. `long` is for tests that run slightly longer that one second. You can disable these groups of tests using `--no-zookeeper`, `--no-shard` and `--no-long` options, respectively.
## Known Bugs {#known-bugs}
@ -153,11 +148,11 @@ Motivation:
Normally we release and run all tests on a single variant of ClickHouse build. But there are alternative build variants that are not thoroughly tested. Examples:
- build on FreeBSD;
- build on Debian with libraries from system packages;
- build with shared linking of libraries;
- build on AArch64 platform;
- build on PowerPc platform.
- build on FreeBSD
- build on Debian with libraries from system packages
- build with shared linking of libraries
- build on AArch64 platform
- build on PowerPc platform
For example, build with system packages is bad practice, because we cannot guarantee what exact version of packages a system will have. But this is really needed by Debian maintainers. For this reason we at least have to support this variant of build. Another example: shared linking is a common source of trouble, but it is needed for some enthusiasts.
@ -177,22 +172,22 @@ For production builds, gcc is used (it still generates slightly more efficient c
## Sanitizers {#sanitizers}
**Address sanitizer**.
### Address sanitizer
We run functional and integration tests under ASan on per-commit basis.
**Valgrind (Memcheck)**.
### Valgrind (Memcheck)
We run functional tests under Valgrind overnight. It takes multiple hours. Currently there is one known false positive in `re2` library, see [this article](https://research.swtch.com/sparse).
**Undefined behaviour sanitizer.**
### Undefined behaviour sanitizer
We run functional and integration tests under ASan on per-commit basis.
**Thread sanitizer**.
### Thread sanitizer
We run functional tests under TSan on per-commit basis. We still dont run integration tests under TSan on per-commit basis.
**Memory sanitizer**.
### Memory sanitizer
Currently we still dont use MSan.
**Debug allocator.**
### Debug allocator
Debug version of `jemalloc` is used for debug build.
## Fuzzing {#fuzzing}
@ -227,7 +222,7 @@ If you use `CLion` as an IDE, you can leverage some `clang-tidy` checks out of t
## Code Style {#code-style}
Code style rules are described [here](https://clickhouse.tech/docs/en/development/style/).
Code style rules are described [here](style.md).
To check for some common style violations, you can use `utils/check-style` script.

View File

@ -96,6 +96,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
- `write_final_mark` — Enables or disables writing the final index mark at the end of data part (after the last byte). Default value: 1. Dont turn it off.
- `merge_max_block_size` — Maximum number of rows in block for merge operations. Default value: 8192.
- `storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes).
- `min_bytes_for_wide_part`, `min_rows_for_wide_part` — Minimum number of bytes/rows in a data part that can be stored in `Wide` format. You can set one, both or none of these settings. See [Data Storage](#mergetree-data-storage).
**Example of Sections Setting**
@ -149,6 +150,10 @@ When data is inserted in a table, separate data parts are created and each of th
Data belonging to different partitions are separated into different parts. In the background, ClickHouse merges data parts for more efficient storage. Parts belonging to different partitions are not merged. The merge mechanism does not guarantee that all rows with the same primary key will be in the same data part.
Data parts can be stored in `Wide` or `Compact` format. In `Wide` format each column is stored in a separate file in a filesystem, in `Compact` format all columns are stored in one file. `Compact` format can be used to increase performance of small and frequent inserts.
Data storing format is controlled by the `min_bytes_for_wide_part` and `min_rows_for_wide_part` settings of the table engine. If the number of bytes or rows in a data part is less then the corresponding setting's value, the part is stored in `Compact` format. Otherwise it is stored in `Wide` format. If none of these settings is set, data parts are stored in `Wide` format.
Each data part is logically divided into granules. A granule is the smallest indivisible data set that ClickHouse reads when selecting data. ClickHouse doesnt split rows or values, so each granule always contains an integer number of rows. The first row of a granule is marked with the value of the primary key for the row. For each data part, ClickHouse creates an index file that stores the marks. For each column, whether its in the primary key or not, ClickHouse also stores the same marks. These marks let you find data directly in column files.
The granule size is restricted by the `index_granularity` and `index_granularity_bytes` settings of the table engine. The number of rows in a granule lays in the `[1, index_granularity]` range, depending on the size of the rows. The size of a granule can exceed `index_granularity_bytes` if the size of a single row is greater than the value of the setting. In this case, the size of the granule equals the size of the row.

View File

@ -22,7 +22,7 @@ The Distributed engine accepts parameters:
See also:
- `insert_distributed_sync` setting
- [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting
- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
Example:

View File

@ -16,7 +16,7 @@ One of the following batches of those t-shirts was supposed to be given away on
So, what does it mean? Here are some ways to translate *“не тормозит”*:
- If you translate it literally, itd be something like *“ClickHouse doesnt press the brake pedal”*.
- If youd want to express it as close to how it sounds to a Russian person with IT background, itd be something like *“If you larger system lags, its not because it uses ClickHouse”*.
- If youd want to express it as close to how it sounds to a Russian person with IT background, itd be something like *“If your larger system lags, its not because it uses ClickHouse”*.
- Shorter, but not so precise versions could be *“ClickHouse is not slow”*, *“ClickHouse doesnt lag”* or just *“ClickHouse is fast”*.
If you havent seen one of those t-shirts in person, you can check them out online in many ClickHouse-related videos. For example, this one:

View File

@ -30,7 +30,7 @@ See [File](../../engines/table-engines/special/file.md) table engine.
## Using Command-Line Redirection {#using-command-line-redirection}
``` sql
``` bash
$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt
```

View File

@ -1,13 +1,14 @@
---
toc_folder_title: Example Datasets
toc_priority: 12
toc_priority: 15
toc_title: Introduction
---
# Example Datasets {#example-datasets}
This section describes how to obtain example datasets and import them into ClickHouse.
For some datasets example queries are also available.
This section describes how to obtain example datasets and import them into ClickHouse. For some datasets example queries are also available.
The list of documented datasets:
- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
- [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)

View File

@ -37,6 +37,7 @@ The queries are executed as a read-only user. It implies some limitations:
- INSERT queries are not allowed
The following settings are also enforced:
- [max\_result\_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
- [max\_result\_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
- [result\_overflow\_mode=break](../operations/settings/query_complexity/#result-overflow-mode)

View File

@ -8,73 +8,54 @@ toc_title: Testing Hardware
With this instruction you can run basic ClickHouse performance test on any server without installation of ClickHouse packages.
1. Go to “commits” page: https://github.com/ClickHouse/ClickHouse/commits/master
2. Click on the first green check mark or red cross with green “ClickHouse Build Check” and click on the “Details” link near “ClickHouse Build Check”. There is no such link in some commits, for example commits with documentation. In this case, choose the nearest commit having this link.
3. Copy the link to “clickhouse” binary for amd64 or aarch64.
3. Copy the link to `clickhouse` binary for amd64 or aarch64.
4. ssh to the server and download it with wget:
<!-- -->
# For amd64:
wget https://clickhouse-builds.s3.yandex.net/0/00ba767f5d2a929394ea3be193b1f79074a1c4bc/1578163263_binary/clickhouse
# For aarch64:
wget https://clickhouse-builds.s3.yandex.net/0/00ba767f5d2a929394ea3be193b1f79074a1c4bc/1578161264_binary/clickhouse
# Then do:
chmod a+x clickhouse
1. Download configs:
<!-- -->
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.xml
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/users.xml
mkdir config.d
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/path.xml -O config.d/path.xml
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/log_to_console.xml -O config.d/log_to_console.xml
1. Download benchmark files:
<!-- -->
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/benchmark-new.sh
chmod a+x benchmark-new.sh
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql
1. Download test data according to the [Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md) instruction (“hits” table containing 100 million rows).
<!-- -->
wget https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz
tar xvf hits_100m_obfuscated_v1.tar.xz -C .
mv hits_100m_obfuscated_v1/* .
1. Run the server:
<!-- -->
./clickhouse server
1. Check the data: ssh to the server in another terminal
<!-- -->
./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated"
100000000
1. Edit the benchmark-new.sh, change `clickhouse-client` to `./clickhouse client` and add `-max_memory_usage 100000000000` parameter.
<!-- -->
mcedit benchmark-new.sh
1. Run the benchmark:
<!-- -->
./benchmark-new.sh hits_100m_obfuscated
1. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com
```bash
# For amd64:
wget https://clickhouse-builds.s3.yandex.net/0/00ba767f5d2a929394ea3be193b1f79074a1c4bc/1578163263_binary/clickhouse
# For aarch64:
wget https://clickhouse-builds.s3.yandex.net/0/00ba767f5d2a929394ea3be193b1f79074a1c4bc/1578161264_binary/clickhouse
# Then do:
chmod a+x clickhouse
```
5. Download configs:
```bash
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.xml
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/users.xml
mkdir config.d
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/path.xml -O config.d/path.xml
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/programs/server/config.d/log_to_console.xml -O config.d/log_to_console.xml
```
6. Download benchmark files:
```bash
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/benchmark-new.sh
chmod a+x benchmark-new.sh
wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql
```
7. Download test data according to the [Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md) instruction (“hits” table containing 100 million rows).
```bash
wget https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz
tar xvf hits_100m_obfuscated_v1.tar.xz -C .
mv hits_100m_obfuscated_v1/* .
```
8. Run the server:
```bash
./clickhouse server
```
9. Check the data: ssh to the server in another terminal
```bash
./clickhouse client --query "SELECT count() FROM hits_100m_obfuscated"
100000000
```
10. Edit the benchmark-new.sh, change `clickhouse-client` to `./clickhouse client` and add `--max_memory_usage 100000000000` parameter.
```bash
mcedit benchmark-new.sh
```
11. Run the benchmark:
```bash
./benchmark-new.sh hits_100m_obfuscated
```
12. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com
All the results are published here: https://clickhouse.tech/benchmark/hardware/

View File

@ -397,7 +397,6 @@ The cache is shared for the server and memory is allocated as needed. The cache
``` xml
<mark_cache_size>5368709120</mark_cache_size>
```
## max\_server\_memory\_usage {#max_server_memory_usage}
Limits total RAM usage by the ClickHouse server. You can specify it only for the default profile.
@ -411,11 +410,37 @@ Default value: `0`.
**Additional Info**
On hosts with low RAM and swap, you possibly need setting `max_server_memory_usage_to_ram_ratio > 1`.
The default `max_server_memory_usage` value is calculated as `memory_amount * max_server_memory_usage_to_ram_ratio`.
**See also**
- [max\_memory\_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage)
- [max_server_memory_usage_to_ram_ratio](#max_server_memory_usage_to_ram_ratio)
## max_server_memory_usage_to_ram_ratio {#max_server_memory_usage_to_ram_ratio}
Defines the fraction of total physical RAM amount, available to the Clickhouse server. If the server tries to utilize more, the memory is cut down to the appropriate amount.
Possible values:
- Positive double.
- 0 — The Clickhouse server can use all available RAM.
Default value: `0`.
**Usage**
On hosts with low RAM and swap, you possibly need setting `max_server_memory_usage_to_ram_ratio` larger than 1.
**Example**
``` xml
<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
```
**See Also**
- [max_server_memory_usage](#max_server_memory_usage)
## max\_concurrent\_queries {#max-concurrent-queries}

View File

@ -585,6 +585,31 @@ Possible values:
Default value: 0.
## network_compression_method {#network_compression_method}
Sets the method of data compression that is used for communication between servers and between server and [clickhouse-client](../../interfaces/cli.md).
Possible values:
- `LZ4` — sets LZ4 compression method.
- `ZSTD` — sets ZSTD compression method.
Default value: `LZ4`.
**See Also**
- [network_zstd_compression_level](#network_zstd_compression_level)
## network_zstd_compression_level {#network_zstd_compression_level}
Adjusts the level of ZSTD compression. Used only when [network_compression_method](#network_compression_method) is set to `ZSTD`.
Possible values:
- Positive integer from 1 to 15.
Default value: `1`.
## log\_queries {#settings-log-queries}
Setting up query logging.
@ -783,6 +808,17 @@ If unsuccessful, several attempts are made to connect to various replicas.
Default value: 50.
## connection\_pool\_max\_wait\_ms {#connection-pool-max-wait-ms}
The wait time in milliseconds for a connection when the connection pool is full.
Possible values:
- Positive integer.
- 0 — Infinite timeout.
Default value: 0.
## connections\_with\_failover\_max\_tries {#connections-with-failover-max-tries}
The maximum number of connection attempts with each replica for the Distributed table engine.
@ -794,6 +830,21 @@ Default value: 3.
Whether to count extreme values (the minimums and maximums in columns of a query result). Accepts 0 or 1. By default, 0 (disabled).
For more information, see the section “Extreme values”.
## kafka\_max\_wait\_ms {#kafka-max-wait-ms}
The wait time in milliseconds for reading messages from [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) before retry.
Possible values:
- Positive integer.
- 0 — Infinite timeout.
Default value: 5000.
See also:
- [Apache Kafka](https://kafka.apache.org/)
## use\_uncompressed\_cache {#setting-use_uncompressed_cache}
Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled).
@ -812,6 +863,17 @@ If a query from the same user with the same query\_id already exists at th
Yandex.Metrica uses this parameter set to 1 for implementing suggestions for segmentation conditions. After entering the next character, if the old query hasnt finished yet, it should be cancelled.
## replace\_running\_query\_max\_wait\_ms {#replace-running-query-max-wait-ms}
The wait time for running query with the same `query_id` to finish, when the [replace_running_query](#replace-running-query) setting is active.
Possible values:
- Positive integer.
- 0 — Throwing an exception that does not allow to run a new query if the server already executes a query with the same `query_id`.
Default value: 5000.
## stream\_flush\_interval\_ms {#stream-flush-interval-ms}
Works for tables with streaming in the case of a timeout, or when a thread generates [max\_insert\_block\_size](#settings-max_insert_block_size) rows.
@ -1397,6 +1459,23 @@ Possible values:
Default value: 16.
## insert_distributed_sync {#insert_distributed_sync}
Enables or disables synchronous data insertion into a [Distributed](../../engines/table-engines/special/distributed.md#distributed) table.
By default, when inserting data into a `Distributed` table, the ClickHouse server sends data to cluster nodes in asynchronous mode. When `insert_distributed_sync=1`, the data is processed synchronously, and the `INSERT` operation succeeds only after all the data is saved on all shards (at least one replica for each shard if `internal_replication` is true).
Possible values:
- 0 — Data is inserted in asynchronous mode.
- 1 — Data is inserted in synchronous mode.
Default value: `0`.
**See Also**
- [Distributed Table Engine](../../engines/table-engines/special/distributed.md#distributed)
- [Managing Distributed Tables](../../sql-reference/statements/system.md#query-language-system-distributed)
## background\_buffer\_flush\_schedule\_pool\_size {#background_buffer_flush_schedule_pool_size}
Sets the number of threads performing background flush in [Buffer](../../engines/table-engines/special/buffer.md)-engine tables. This setting is applied at ClickHouse server start and cant be changed in a user session.
@ -1454,6 +1533,17 @@ Possible values:
Default value: 16.
## validate\_polygons {#validate_polygons}
Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent.
Possible values:
- 0 — Throwing an exception is disabled. `pointInPolygon` accepts invalid polygons and returns possibly incorrect results for them.
- 1 — Throwing an exception is enabled.
Default value: 1.
## transform\_null\_in {#transform_null_in}
Enables equality of [NULL](../../sql-reference/syntax.md#null-literal) values for [IN](../../sql-reference/operators/in.md) operator.

View File

@ -0,0 +1,10 @@
#system.current_roles {#system_tables-current_roles}
Contains active roles of a current user. `SET ROLE` changes the contents of this table.
Columns:
- `role_name` ([String](../../sql-reference/data-types/string.md))) — Role name.
- `with_admin_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `current_role` is a role with `ADMIN OPTION` privilege.
- `is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `current_role` is a default role.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/current-roles) <!--hide-->

View File

@ -0,0 +1,11 @@
#system.enabled_roles {#system_tables-enabled_roles}
Contains all active roles at the moment, including current role of the current user and granted roles for current role.
Columns:
- `role_name` ([String](../../sql-reference/data-types/string.md))) — Role name.
- `with_admin_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `enabled_role` is a role with `ADMIN OPTION` privilege.
- `is_current` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `enabled_role` is a current role of a current user.
- `is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `enabled_role` is a default role.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/enabled-roles) <!--hide-->

View File

@ -0,0 +1,39 @@
# system.licenses {#system-tables_system.licenses}
Сontains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources.
Columns:
- `library_name` ([String](../../sql-reference/data-types/string.md)) — Name of the library, which is license connected with.
- `license_type` ([String](../../sql-reference/data-types/string.md)) — License type — e.g. Apache, MIT.
- `license_path` ([String](../../sql-reference/data-types/string.md)) — Path to the file with the license text.
- `license_text` ([String](../../sql-reference/data-types/string.md)) — License text.
**Example**
``` sql
SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15
```
``` text
┌─library_name───────┬─license_type─┬─license_path────────────────────────┐
│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │
│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │
│ avro │ Apache │ /contrib/avro/LICENSE.txt │
│ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │
│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │
│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │
│ aws │ Apache │ /contrib/aws/LICENSE.txt │
│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │
│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │
│ brotli │ MIT │ /contrib/brotli/LICENSE │
│ capnproto │ MIT │ /contrib/capnproto/LICENSE │
│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │
│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │
│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │
│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │
└────────────────────┴──────────────┴─────────────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/licenses) <!--hide-->

View File

@ -1,25 +1,46 @@
# system.mutations {#system_tables-mutations}
The table contains information about [mutations](../../sql-reference/statements/alter/index.md#alter-mutations) of MergeTree tables and their progress. Each mutation command is represented by a single row. The table has the following columns:
The table contains information about [mutations](../../sql-reference/statements/alter/index.md#mutations) of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables and their progress. Each mutation command is represented by a single row.
**database**, **table** - The name of the database and table to which the mutation was applied.
Columns:
**mutation\_id** - The ID of the mutation. For replicated tables these IDs correspond to znode names in the `<table_path_in_zookeeper>/mutations/` directory in ZooKeeper. For unreplicated tables the IDs correspond to file names in the data directory of the table.
- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database to which the mutation was applied.
**command** - The mutation command string (the part of the query after `ALTER TABLE [db.]table`).
- `table` ([String](../../sql-reference/data-types/string.md)) — The name of the table to which the mutation was applied.
**create\_time** - When this mutation command was submitted for execution.
- `mutation_id` ([String](../../sql-reference/data-types/string.md)) — The ID of the mutation. For replicated tables these IDs correspond to znode names in the `<table_path_in_zookeeper>/mutations/` directory in ZooKeeper. For non-replicated tables the IDs correspond to file names in the data directory of the table.
**block\_numbers.partition\_id**, **block\_numbers.number** - A nested column. For mutations of replicated tables, it contains one record for each partition: the partition ID and the block number that was acquired by the mutation (in each partition, only parts that contain blocks with numbers less than the block number acquired by the mutation in that partition will be mutated). In non-replicated tables, block numbers in all partitions form a single sequence. This means that for mutations of non-replicated tables, the column will contain one record with a single block number acquired by the mutation.
- `command` ([String](../../sql-reference/data-types/string.md)) — The mutation command string (the part of the query after `ALTER TABLE [db.]table`).
**parts\_to\_do** - The number of data parts that need to be mutated for the mutation to finish.
- `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date and time when the mutation command was submitted for execution.
**is\_done** - Is the mutation done? Note that even if `parts_to_do = 0` it is possible that a mutation of a replicated table is not done yet because of a long-running INSERT that will create a new data part that will need to be mutated.
- `block_numbers.partition_id` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — For mutations of replicated tables, the array contains the partitions' IDs (one record for each partition). For mutations of non-replicated tables the array is empty.
If there were problems with mutating some parts, the following columns contain additional information:
- `block_numbers.number` ([Array](../../sql-reference/data-types/array.md)([Int64](../../sql-reference/data-types/int-uint.md))) — For mutations of replicated tables, the array contains one record for each partition, with the block number that was acquired by the mutation. Only parts that contain blocks with numbers less than this number will be mutated in the partition.
In non-replicated tables, block numbers in all partitions form a single sequence. This means that for mutations of non-replicated tables, the column will contain one record with a single block number acquired by the mutation.
**latest\_failed\_part** - The name of the most recent part that could not be mutated.
- `parts_to_do_names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — An array of names of data parts that need to be mutated for the mutation to complete.
**latest\_fail\_time** - The time of the most recent part mutation failure.
- `parts_to_do` ([Int64](../../sql-reference/data-types/int-uint.md)) — The number of data parts that need to be mutated for the mutation to complete.
**latest\_fail\_reason** - The exception message that caused the most recent part mutation failure.
- `is_done` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag whether the mutation is done or not. Possible values:
- `1` if the mutation is completed,
- `0` if the mutation is still in process.
!!! info "Note"
Even if `parts_to_do = 0` it is possible that a mutation of a replicated table is not completed yet because of a long-running `INSERT` query, that will create a new data part needed to be mutated.
If there were problems with mutating some data parts, the following columns contain additional information:
- `latest_failed_part` ([String](../../sql-reference/data-types/string.md)) — The name of the most recent part that could not be mutated.
- `latest_fail_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — The date and time of the most recent part mutation failure.
- `latest_fail_reason` ([String](../../sql-reference/data-types/string.md)) — The exception message that caused the most recent part mutation failure.
**See Also**
- [Mutations](../../sql-reference/statements/alter/index.md#mutations)
- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine
- [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family

View File

@ -6,75 +6,151 @@ Each row describes one data part.
Columns:
- `partition` (String) The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query.
- `partition` ([String](../../sql-reference/data-types/string.md)) The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query.
Formats:
- `YYYYMM` for automatic partitioning by month.
- `any_string` when partitioning manually.
- `name` (`String`) Name of the data part.
- `name` ([String](../../sql-reference/data-types/string.md)) Name of the data part.
- `active` (`UInt8`) Flag that indicates whether the data part is active. If a data part is active, its used in a table. Otherwise, its deleted. Inactive data parts remain after merging.
- `part_type` ([String](../../sql-reference/data-types/string.md)) — The data part storing format.
- `marks` (`UInt64`) The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint doesnt work for adaptive granularity).
Possible Values:
- `rows` (`UInt64`) The number of rows.
- `Wide` — Each column is stored in a separate file in a filesystem.
- `Compact` — All columns are stored in one file in a filesystem.
- `bytes_on_disk` (`UInt64`) Total size of all the data part files in bytes.
Data storing format is controlled by the `min_bytes_for_wide_part` and `min_rows_for_wide_part` settings of the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table.
- `data_compressed_bytes` (`UInt64`) Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included.
- `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) Flag that indicates whether the data part is active. If a data part is active, its used in a table. Otherwise, its deleted. Inactive data parts remain after merging.
- `data_uncompressed_bytes` (`UInt64`) Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included.
- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint doesnt work for adaptive granularity).
- `marks_bytes` (`UInt64`) The size of the file with marks.
- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) The number of rows.
- `modification_time` (`DateTime`) The time the directory with the data part was modified. This usually corresponds to the time of data part creation.\|
- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) Total size of all the data part files in bytes.
- `remove_time` (`DateTime`) The time when the data part became inactive.
- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included.
- `refcount` (`UInt32`) The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges.
- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included.
- `min_date` (`Date`) The minimum value of the date key in the data part.
- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) The size of the file with marks.
- `max_date` (`Date`) The maximum value of the date key in the data part.
- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) The time the directory with the data part was modified. This usually corresponds to the time of data part creation.
- `min_time` (`DateTime`) The minimum value of the date and time key in the data part.
- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) The time when the data part became inactive.
- `max_time`(`DateTime`) The maximum value of the date and time key in the data part.
- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges.
- `partition_id` (`String`) ID of the partition.
- `min_date` ([Date](../../sql-reference/data-types/date.md)) The minimum value of the date key in the data part.
- `min_block_number` (`UInt64`) The minimum number of data parts that make up the current part after merging.
- `max_date` ([Date](../../sql-reference/data-types/date.md)) The maximum value of the date key in the data part.
- `max_block_number` (`UInt64`) The maximum number of data parts that make up the current part after merging.
- `min_time` ([DateTime](../../sql-reference/data-types/datetime.md)) The minimum value of the date and time key in the data part.
- `level` (`UInt32`) Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts.
- `max_time`([DateTime](../../sql-reference/data-types/datetime.md)) The maximum value of the date and time key in the data part.
- `data_version` (`UInt64`) Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`).
- `partition_id` ([String](../../sql-reference/data-types/string.md)) ID of the partition.
- `primary_key_bytes_in_memory` (`UInt64`) The amount of memory (in bytes) used by primary key values.
- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) The minimum number of data parts that make up the current part after merging.
- `primary_key_bytes_in_memory_allocated` (`UInt64`) The amount of memory (in bytes) reserved for primary key values.
- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) The maximum number of data parts that make up the current part after merging.
- `is_frozen` (`UInt8`) Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup doesnt exist. For more details, see [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md#alter_freeze-partition)
- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts.
- `database` (`String`) Name of the database.
- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`).
- `table` (`String`) Name of the table.
- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) The amount of memory (in bytes) used by primary key values.
- `engine` (`String`) Name of the table engine without parameters.
- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) The amount of memory (in bytes) reserved for primary key values.
- `path` (`String`) Absolute path to the folder with data part files.
- `is_frozen` ([UInt8](../../sql-reference/data-types/int-uint.md)) Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup doesnt exist. For more details, see [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md#alter_freeze-partition)
- `disk` (`String`) Name of a disk that stores the data part.
- `database` ([String](../../sql-reference/data-types/string.md)) Name of the database.
- `hash_of_all_files` (`String`) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of compressed files.
- `table` ([String](../../sql-reference/data-types/string.md)) Name of the table.
- `hash_of_uncompressed_files` (`String`) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of uncompressed files (files with marks, index file etc.).
- `engine` ([String](../../sql-reference/data-types/string.md)) Name of the table engine without parameters.
- `uncompressed_hash_of_compressed_files` (`String`) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of data in the compressed files as if they were uncompressed.
- `path` ([String](../../sql-reference/data-types/string.md)) Absolute path to the folder with data part files.
- `bytes` (`UInt64`) Alias for `bytes_on_disk`.
- `disk` ([String](../../sql-reference/data-types/string.md)) Name of a disk that stores the data part.
- `marks_size` (`UInt64`) Alias for `marks_bytes`.
- `hash_of_all_files` ([String](../../sql-reference/data-types/string.md)) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of compressed files.
- `hash_of_uncompressed_files` ([String](../../sql-reference/data-types/string.md)) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of uncompressed files (files with marks, index file etc.).
- `uncompressed_hash_of_compressed_files` ([String](../../sql-reference/data-types/string.md)) [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of data in the compressed files as if they were uncompressed.
- `delete_ttl_info_min` ([DateTime](../../sql-reference/data-types/datetime.md)) — The minimum value of the date and time key for [TTL DELETE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
- `delete_ttl_info_max` ([DateTime](../../sql-reference/data-types/datetime.md)) — The maximum value of the date and time key for [TTL DELETE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
!!! note "Warning"
The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simpliest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields.
- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the minimum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
- `move_ttl_info.max` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the maximum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) Alias for `bytes_on_disk`.
- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) Alias for `marks_bytes`.
**Example**
``` sql
SELECT * FROM system.parts LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
partition: tuple()
name: all_1_4_1_6
part_type: Wide
active: 1
marks: 2
rows: 6
bytes_on_disk: 310
data_compressed_bytes: 157
data_uncompressed_bytes: 91
marks_bytes: 144
modification_time: 2020-06-18 13:01:49
remove_time: 0000-00-00 00:00:00
refcount: 1
min_date: 0000-00-00
max_date: 0000-00-00
min_time: 0000-00-00 00:00:00
max_time: 0000-00-00 00:00:00
partition_id: all
min_block_number: 1
max_block_number: 4
level: 1
data_version: 6
primary_key_bytes_in_memory: 8
primary_key_bytes_in_memory_allocated: 64
is_frozen: 0
database: default
table: months
engine: MergeTree
disk_name: default
path: /var/lib/clickhouse/data/default/months/all_1_4_1_6/
hash_of_all_files: 2d0657a16d9430824d35e327fcbd87bf
hash_of_uncompressed_files: 84950cc30ba867c77a408ae21332ba29
uncompressed_hash_of_compressed_files: 1ad78f1c6843bbfb99a2c931abe7df7d
delete_ttl_info_min: 0000-00-00 00:00:00
delete_ttl_info_max: 0000-00-00 00:00:00
move_ttl_info.expression: []
move_ttl_info.min: []
move_ttl_info.max: []
```
**See Also**
- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md)
- [TTL for Columns and Tables](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl)

View File

@ -0,0 +1,16 @@
#system.role_grants {#system_tables-role_grants}
Contains the role grants for users and roles. To add entries to this table, use `GRANT role TO user`.
Columns:
- `user_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — User name.
- `role_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Role name.
- `granted_role_name` ([String](../../sql-reference/data-types/string.md)) — Name of role granted to the `role_name` role. To grant one role to another one use `GRANT role1 TO role2`.
- `granted_role_is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `granted_role` is a default role. Possible values:
- 1 — `granted_role` is a default role.
- 0 — `granted_role` is not a default role.
- `with_admin_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `granted_role` is a role with [ADMIN OPTION](../../sql-reference/statements/grant.md#admin-option-privilege) privilege. Possible values:
- 1 — The role has `ADMIN OPTION` privilege.
- 0 — The role without `ADMIN OPTION` privilege.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/role-grants) <!--hide-->

View File

@ -0,0 +1,10 @@
#system.roles {#system_tables-roles}
Contains information about configured [roles](../../operations/access-rights.md#role-management).
Columns:
- `name` ([String](../../sql-reference/data-types/string.md)) — Role name.
- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Role ID.
- `storage` ([String](../../sql-reference/data-types/string.md)) — Path to the storage of roles. Configured in the `access_control_path` parameter.
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/roles) <!--hide-->

View File

@ -16,6 +16,8 @@ By default `clickhouse-local` does not have access to data on the same host, but
!!! warning "Warning"
It is not recommended to load production server configuration into `clickhouse-local` because data can be damaged in case of human error.
For temporary data an unique temporary data directory is created by default. If you want to override this behavior the data directory can be explicitly specified with the `-- --path` option.
## Usage {#usage}
Basic usage:
@ -40,6 +42,7 @@ Arguments:
Also there are arguments for each ClickHouse configuration variable which are more commonly used instead of `--config-file`.
## Examples {#examples}
``` bash

View File

@ -1350,4 +1350,42 @@ len: 30
- [generateRandom](../../sql-reference/table-functions/generate.md#generaterandom)
- [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii)
## randomStringUTF8 {#randomstringutf8}
Generates a random string of a specified length. Result string contains valid UTF-8 code points. The value of code points may be outside of the range of assigned Unicode.
**Syntax**
``` sql
randomStringUTF8(length);
```
**Parameters**
- `length` — Required length of the resulting string in code points. [UInt64](../../sql-reference/data-types/int-uint.md).
**Returned value(s)**
- UTF-8 random string.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
Query:
```sql
SELECT randomStringUTF8(13)
```
Result:
```text
┌─randomStringUTF8(13)─┐
│ 𘤗𙉝д兠庇󡅴󱱎󦐪􂕌𔊹𓰛 │
└──────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/other_functions/) <!--hide-->

View File

@ -111,4 +111,43 @@ SELECT alphaTokens('abca1abc')
└─────────────────────────┘
```
## extractAllGroups(text, regexp) {#extractallgroups}
Extracts all groups from non-overlapping substrings matched by a regular expression.
**Syntax**
``` sql
extractAllGroups(text, regexp)
```
**Parameters**
- `text` — [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
**Returned values**
- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`).
- If there is no matching group, returns an empty array.
Type: [Array](../data-types/array.md).
**Example**
Query:
``` sql
SELECT extractAllGroups('abc=123, 8="hkl"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)');
```
Result:
``` text
┌─extractAllGroups('abc=123, 8="hkl"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐
│ [['abc','123'],['8','"hkl"']] │
└───────────────────────────────────────────────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/splitting_merging_functions/) <!--hide-->

View File

@ -7,14 +7,14 @@ toc_title: VIEW
Creates a new view. There are two types of views: normal and materialized.
## Normal {#normal}
Syntax:
``` sql
CREATE [MATERIALIZED] VIEW [IF NOT EXISTS] [db.]table_name [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ...
```
## Normal {#normal}
Normal views dont store any data, they just perform a read from another table on each access. In other words, a normal view is nothing more than a saved query. When reading from a view, this saved query is used as a subquery in the [FROM](../../../sql-reference/statements/select/from.md) clause.
As an example, assume youve created a view:
@ -37,6 +37,11 @@ SELECT a, b, c FROM (SELECT ...)
## Materialized {#materialized}
``` sql
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
```
Materialized views store data transformed by the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query.
When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` the table engine for storing data.

View File

@ -77,3 +77,11 @@ DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name]
Deletes a settings profile.
Deleted settings profile is revoked from all the entities where it was assigned.
## DROP VIEW {#drop-view}
``` sql
DROP VIEW [IF EXISTS] [db.]name [ON CLUSTER cluster]
```
Deletes a view. Views can be deleted by a `DROP TABLE` command as well but `DROP VIEW` checks that `[db.]name` is a view.

View File

@ -115,7 +115,7 @@ Aborts ClickHouse process (like `kill -9 {$ pid_clickhouse-server}`)
## Managing Distributed Tables {#query-language-system-distributed}
ClickHouse can manage [distributed](../../engines/table-engines/special/distributed.md) tables. When a user inserts data into these tables, ClickHouse first creates a queue of the data that should be sent to cluster nodes, then asynchronously sends it. You can manage queue processing with the [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [FLUSH DISTRIBUTED](#query_language-system-flush-distributed), and [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) queries. You can also synchronously insert distributed data with the `insert_distributed_sync` setting.
ClickHouse can manage [distributed](../../engines/table-engines/special/distributed.md) tables. When a user inserts data into these tables, ClickHouse first creates a queue of the data that should be sent to cluster nodes, then asynchronously sends it. You can manage queue processing with the [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [FLUSH DISTRIBUTED](#query_language-system-flush-distributed), and [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) queries. You can also synchronously insert distributed data with the [insert_distributed_sync](../../operations/settings/settings.md#insert_distributed_sync) setting.
### STOP DISTRIBUTED SENDS {#query_language-system-stop-distributed-sends}

View File

@ -67,8 +67,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
- `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов.
- <a name="mergetree_setting-merge_with_ttl_timeout"></a>`merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день).
- `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её.
- `merge_max_block_size`Максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192.
- `merge_max_block_size`максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192.
- `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes).
- `min_bytes_for_wide_part`, `min_rows_for_wide_part` — минимальное количество байт/строк в куске данных для хранения в формате `Wide`. Можно задать одну или обе настройки или не задавать ни одной. Подробнее см. в разделе [Хранение данных](#mergetree-data-storage).
**Пример задания секций**
@ -123,6 +124,10 @@ MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID)
Данные, относящиеся к разным партициям, разбиваются на разные куски. В фоновом режиме ClickHouse выполняет слияния (merge) кусков данных для более эффективного хранения. Куски, относящиеся к разным партициям не объединяются. Механизм слияния не гарантирует, что все строки с одинаковым первичным ключом окажутся в одном куске.
Куски данных могут храниться в формате `Wide` или `Compact`. В формате `Wide` каждый столбец хранится в отдельном файле, а в формате `Compact` все столбцы хранятся в одном файле. Формат `Compact` может быть полезен для повышения производительности при частом добавлении небольших объемов данных.
Формат хранения определяется настройками движка `min_bytes_for_wide_part` и `min_rows_for_wide_part`. Если число байт или строк в куске данных меньше значения, указанного в соответствующей настройке, тогда этот кусок данных хранится в формате `Compact`. В противном случае кусок данных хранится в формате `Wide`. Если ни одна из настроек не задана, куски данных хранятся в формате `Wide`.
Каждый кусок данных логически делится на гранулы. Гранула — это минимальный неделимый набор данных, который ClickHouse считывает при выборке данных. ClickHouse не разбивает строки и значения и гранула всегда содержит целое число строк. Первая строка гранулы помечается значением первичного ключа для этой строки (засечка). Для каждого куска данных ClickHouse создаёт файл с засечками (индексный файл). Для каждого столбца, независимо от того, входит он в первичный ключ или нет, ClickHouse также сохраняет эти же засечки. Засечки используются для поиска данных напрямую в файлах столбцов.
Размер гранул оганичен настройками движка `index_granularity` и `index_granularity_bytes`. Количество строк в грануле лежит в диапазоне `[1, index_granularity]`, в зависимости от размера строк. Размер гранулы может превышать `index_granularity_bytes` в том случае, когда размер единственной строки в грануле превышает значение настройки. В этом случае, размер гранулы равен размеру строки.

View File

@ -385,12 +385,37 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat
**Дополнительная информация**
На серверах с небольшим объёмом RAM и файла подкачки может потребоваться настройка `max_server_memory_usage_to_ram_ratio > 1`.
Значение по умолчанию для `max_server_memory_usage` рассчитывается как `memory_amount * max_server_memory_usage_to_ram_ratio`.
**См. также**
- [max_memory_usage](../settings/query-complexity.md#settings_max_memory_usage)
## max_server_memory_usage_to_ram_ratio {#max_server_memory_usage_to_ram_ratio}
Определяет долю оперативной памяти, доступную для использования сервером Clickhouse. Если сервер попытается использовать больше, предоставляемый ему объём памяти будет ограничен до расчётного значения.
Возможные значения:
- Положительное число с плавающей запятой.
- 0 — сервер Clickhouse может использовать всю оперативную память.
Значение по умолчанию: `0`.
**Использование**
На серверах с небольшим объёмом оперативной памяти и файла подкачки может потребоваться установить настройку `max_server_memory_usage_to_ram_ratio` в значение, большее 1.
**Пример**
``` xml
<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>
```
**См. также**
- [max_server_memory_usage](#max_server_memory_usage)
## max\_connections {#max-connections}
Максимальное количество входящих соединений.

View File

@ -520,6 +520,31 @@ ClickHouse использует этот параметр при чтении д
Значение по умолчанию: 0.
## network_compression_method {#network_compression_method}
Задает метод сжатия данных, используемый при обмене данными между серверами и при обмене между сервером и [clickhouse-client](../../interfaces/cli.md).
Возможные значения:
- `LZ4` — устанавливает метод сжатия LZ4.
- `ZSTD` — устанавливает метод сжатия ZSTD.
Значение по умолчанию: `LZ4`.
См. также:
- [network_zstd_compression_level](#network_zstd_compression_level)
## network_zstd_compression_level {#network_zstd_compression_level}
Регулирует уровень сжатия ZSTD. Используется только тогда, когда [network_compression_method](#network_compression_method) имеет значение `ZSTD`.
Возможные значения:
- Положительное целое число от 1 до 15.
Значение по умолчанию: `1`.
## log\_queries {#settings-log-queries}
Установка логирования запроса.
@ -700,6 +725,17 @@ log_query_threads=1
Значение по умолчанию: 50.
## connection\_pool\_max\_wait\_ms {#connection-pool-max-wait-ms}
Время ожидания соединения в миллисекундах, когда пул соединений заполнен.
Возможные значения:
- Положительное целое число.
- 0 — Бесконечный таймаут.
Значение по умолчанию: 0.
## connections\_with\_failover\_max\_tries {#connections-with-failover-max-tries}
Максимальное количество попыток соединения с каждой репликой, для движка таблиц Distributed.
@ -711,6 +747,21 @@ log_query_threads=1
Считать ли экстремальные значения (минимумы и максимумы по столбцам результата запроса). Принимает 0 или 1. По умолчанию - 0 (выключено).
Подробнее смотрите раздел «Экстремальные значения».
## kafka\_max\_wait\_ms {#kafka-max-wait-ms}
Время ожидания в миллисекундах для чтения сообщений из [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) перед повторной попыткой.
Возможные значения:
- Положительное целое число.
- 0 — Бесконечный таймаут.
Значение по умолчанию: 5000.
См. также:
- [Apache Kafka](https://kafka.apache.org/)
## use\_uncompressed\_cache {#setting-use_uncompressed_cache}
Использовать ли кэш разжатых блоков. Принимает 0 или 1. По умолчанию - 0 (выключено).
@ -730,6 +781,17 @@ log_query_threads=1
Эта настройка, выставленная в 1, используется в Яндекс.Метрике для реализации suggest-а значений для условий сегментации. После ввода очередного символа, если старый запрос ещё не выполнился, его следует отменить.
## replace\_running\_query\_max\_wait\_ms {#replace-running-query-max-wait-ms}
Время ожидания завершения выполнения запроса с тем же `query_id`, когда активирована настройка [replace_running_query](#replace-running-query).
Возможные значения:
- Положительное целое число.
- 0 — Создание исключения, которое не позволяет выполнить новый запрос, если сервер уже выполняет запрос с тем же `query_id`.
Значение по умолчанию: 5000.
## stream\_flush\_interval\_ms {#stream-flush-interval-ms}
Работает для таблиц со стриммингом в случае тайм-аута, или когда поток генерирует [max\_insert\_block\_size](#settings-max_insert_block_size) строк.
@ -1216,6 +1278,34 @@ Default value: 0.
Значение по умолчанию: 16.
## insert_distributed_sync {#insert_distributed_sync}
Включает или отключает режим синхронного добавления данных в распределенные таблицы (таблицы с движком [Distributed](../../engines/table-engines/special/distributed.md#distributed)).
По умолчанию ClickHouse вставляет данные в распределённую таблицу в асинхронном режиме. Если `insert_distributed_sync=1`, то данные вставляются сихронно, а запрос `INSERT` считается выполненным успешно, когда данные записаны на все шарды (по крайней мере на одну реплику для каждого шарда, если `internal_replication = true`).
Возможные значения:
- 0 — Данные добавляются в асинхронном режиме.
- 1 — Данные добавляются в синхронном режиме.
Значение по умолчанию: `0`.
**См. также**
- [Движок Distributed](../../engines/table-engines/special/distributed.md#distributed)
- [Управление распределёнными таблицами](../../sql-reference/statements/system.md#query-language-system-distributed)
## validate\_polygons {#validate_polygons}
Включает или отключает генерирование исключения в функции [pointInPolygon](../../sql-reference/functions/geo.md#pointinpolygon), если многоугольник самопересекающийся или самокасающийся.
Допустимые значения:
- 0 — генерирование исключения отключено. `pointInPolygon` принимает недопустимые многоугольники и возвращает для них, возможно, неверные результаты.
- 1 — генерирование исключения включено.
Значение по умолчанию: 1.
## always_fetch_merged_part {#always_fetch_merged_part}
Запрещает слияние данных для таблиц семейства [Replicated*MergeTree](../../engines/table-engines/mergetree-family/replication.md).

View File

@ -126,6 +126,44 @@ SELECT * FROM system.contributors WHERE name='Olga Khvostikova'
└──────────────────┘
```
## system.licenses {#system-tables_system.licenses}
Содержит информацию о лицензиях сторонних библиотек, которые находятся в директории [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) исходных кодов ClickHouse.
Столбцы:
- `library_name` ([String](../sql-reference/data-types/string.md)) — Название библиотеки, к которой относится лицензия.
- `license_type` ([String](../sql-reference/data-types/string.md)) — Тип лицензии, например, Apache, MIT.
- `license_path` ([String](../sql-reference/data-types/string.md)) — Путь к файлу с текстом лицензии.
- `license_text` ([String](../sql-reference/data-types/string.md)) — Текст лицензии.
**Пример**
``` sql
SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15
```
``` text
┌─library_name───────┬─license_type─┬─license_path────────────────────────┐
│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │
│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │
│ avro │ Apache │ /contrib/avro/LICENSE.txt │
│ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │
│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │
│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │
│ aws │ Apache │ /contrib/aws/LICENSE.txt │
│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │
│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │
│ brotli │ MIT │ /contrib/brotli/LICENSE │
│ capnproto │ MIT │ /contrib/capnproto/LICENSE │
│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │
│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │
│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │
│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │
└────────────────────┴──────────────┴─────────────────────────────────────┘
```
## system.databases {#system-databases}
Таблица содержит один столбец name типа String - имя базы данных.
@ -433,78 +471,154 @@ CurrentMetric_ReplicatedChecks: 0
Столбцы:
- `partition` (`String`) Имя партиции. Что такое партиция можно узнать из описания запроса [ALTER](../sql-reference/statements/alter.md#query_language_queries_alter).
- `partition` ([String](../sql-reference/data-types/string.md)) имя партиции. Что такое партиция можно узнать из описания запроса [ALTER](../sql-reference/statements/alter.md#query_language_queries_alter).
Форматы:
- `YYYYMM` для автоматической схемы партиционирования по месяцам.
- `any_string` при партиционировании вручную.
- `name` (`String`) имя куска.
- `name` ([String](../sql-reference/data-types/string.md)) имя куска.
- `active` (`UInt8`) признак активности. Если кусок активен, то он используется таблицей, в противном случает он будет удален. Неактивные куски остаются после слияний.
- `part_type` ([String](../sql-reference/data-types/string.md)) — формат хранения данных.
- `marks` (`UInt64`) количество засечек. Чтобы получить примерное количество строк в куске, умножьте `marks` на гранулированность индекса (обычно 8192).
Возможные значения:
- `rows` (`UInt64`) количество строк.
- `Wide` — каждая колонка хранится в отдельном файле.
- `Compact` — все колонки хранятся в одном файле.
- `bytes_on_disk` (`UInt64`) общий размер всех файлов кусков данных в байтах.
Формат хранения данных определяется настройками `min_bytes_for_wide_part` и `min_rows_for_wide_part` таблицы [MergeTree](../engines/table-engines/mergetree-family/mergetree.md).
- `data_compressed_bytes` (`UInt64`) общий размер сжатой информации в куске данных. Размер всех дополнительных файлов (например, файлов с засечками) не учитывается.
- `active` ([UInt8](../sql-reference/data-types/int-uint.md)) признак активности. Если кусок активен, то он используется таблицей, в противном случает он будет удален. Неактивные куски остаются после слияний.
- `data_uncompressed_bytes` (`UInt64`) общий размер распакованной информации куска данных. Размер всех дополнительных файлов (например, файлов с засечками) не учитывается.
- `marks` ([UInt64](../sql-reference/data-types/int-uint.md)) количество засечек. Чтобы получить примерное количество строк в куске, умножьте `marks` на гранулированность индекса (обычно 8192).
- `marks_bytes` (`UInt64`) размер файла с засечками.
- `rows` ([UInt64](../sql-reference/data-types/int-uint.md)) количество строк.
- `modification_time` (`DateTime`) время модификации директории с куском данных. Обычно соответствует времени создания куска.
- `bytes_on_disk` ([UInt64](../sql-reference/data-types/int-uint.md)) общий размер всех файлов кусков данных в байтах.
- `remove_time` (`DateTime`) время, когда кусок стал неактивным.
- `data_compressed_bytes` ([UInt64](../sql-reference/data-types/int-uint.md)) общий размер сжатой информации в куске данных. Размер всех дополнительных файлов (например, файлов с засечками) не учитывается.
- `refcount` (`UInt32`) количество мест, в котором кусок используется. Значение больше 2 говорит о том, что кусок участвует в запросах или в слияниях.
- `data_uncompressed_bytes` ([UInt64](../sql-reference/data-types/int-uint.md)) общий размер распакованной информации куска данных. Размер всех дополнительных файлов (например, файлов с засечками) не учитывается.
- `min_date` (`Date`) минимальное значение ключа даты в куске данных.
- `marks_bytes` ([UInt64](../sql-reference/data-types/int-uint.md)) размер файла с засечками.
- `max_date` (`Date`) максимальное значение ключа даты в куске данных.
- `modification_time` ([DateTime](../sql-reference/data-types/datetime.md)) время модификации директории с куском данных. Обычно соответствует времени создания куска.
- `min_time` (`DateTime`) минимальное значение даты и времени в куске данных.
- `remove_time` ([DateTime](../sql-reference/data-types/datetime.md)) время, когда кусок стал неактивным.
- `max_time`(`DateTime`) максимальное значение даты и времени в куске данных.
- `refcount` ([UInt32](../sql-reference/data-types/int-uint.md)) количество мест, в котором кусок используется. Значение больше 2 говорит о том, что кусок участвует в запросах или в слияниях.
- `partition_id` (`String`) ID партиции.
- `min_date` ([Date](../sql-reference/data-types/date.md)) минимальное значение ключа даты в куске данных.
- `min_block_number` (`UInt64`) минимальное число кусков, из которых состоит текущий после слияния.
- `max_date` ([Date](../sql-reference/data-types/date.md)) максимальное значение ключа даты в куске данных.
- `max_block_number` (`UInt64`) максимальное число кусков, из которых состоит текущий после слияния.
- `min_time` ([DateTime](../sql-reference/data-types/datetime.md)) минимальное значение даты и времени в куске данных.
- `level` (`UInt32`) - глубина дерева слияний. Если слияний не было, то `level=0`.
- `max_time`([DateTime](../sql-reference/data-types/datetime.md)) максимальное значение даты и времени в куске данных.
- `data_version` (`UInt64`) число, которое используется для определения того, какие мутации необходимо применить к куску данных (мутации с версией большей, чем `data_version`).
- `partition_id` ([String](../sql-reference/data-types/string.md)) ID партиции.
- `primary_key_bytes_in_memory` (`UInt64`) объём памяти (в байтах), занимаемой значениями первичных ключей.
- `min_block_number` ([UInt64](../sql-reference/data-types/int-uint.md)) минимальное число кусков, из которых состоит текущий после слияния.
- `primary_key_bytes_in_memory_allocated` (`UInt64`) объём памяти (в байтах) выделенный для размещения первичных ключей.
- `max_block_number` ([UInt64](../sql-reference/data-types/int-uint.md)) максимальное число кусков, из которых состоит текущий после слияния.
- `is_frozen` (`UInt8`) Признак, показывающий существование бэкапа партиции. 1, бэкап есть. 0, бэкапа нет. Смотрите раздел [FREEZE PARTITION](../sql-reference/statements/alter.md#alter_freeze-partition).
- `level` ([UInt32](../sql-reference/data-types/int-uint.md)) - глубина дерева слияний. Если слияний не было, то `level=0`.
- `database` (`String`) имя базы данных.
- `data_version` ([UInt64](../sql-reference/data-types/int-uint.md)) число, которое используется для определения того, какие мутации необходимо применить к куску данных (мутации с версией большей, чем `data_version`).
- `table` (`String`) имя таблицы.
- `primary_key_bytes_in_memory` ([UInt64](../sql-reference/data-types/int-uint.md)) объём памяти (в байтах), занимаемой значениями первичных ключей.
- `engine` (`String`) имя движка таблицы, без параметров.
- `primary_key_bytes_in_memory_allocated` ([UInt64](../sql-reference/data-types/int-uint.md)) объём памяти (в байтах) выделенный для размещения первичных ключей.
- `path` (`String`) абсолютный путь к папке с файлами кусков данных.
- `is_frozen` ([UInt8](../sql-reference/data-types/int-uint.md)) Признак, показывающий существование бэкапа партиции. 1, бэкап есть. 0, бэкапа нет. Смотрите раздел [FREEZE PARTITION](../sql-reference/statements/alter.md#alter_freeze-partition).
- `disk` (`String`) имя диска, на котором находится кусок данных.
- `database` ([String](../sql-reference/data-types/string.md)) имя базы данных.
- `hash_of_all_files` (`String`) значение [sipHash128](../sql-reference/functions/hash-functions.md#hash_functions-siphash128) для сжатых файлов.
- `table` ([String](../sql-reference/data-types/string.md)) имя таблицы.
- `hash_of_uncompressed_files` (`String`) значение [sipHash128](../sql-reference/functions/hash-functions.md#hash_functions-siphash128) несжатых файлов (файлы с засечками, первичным ключом и пр.)
- `engine` ([String](../sql-reference/data-types/string.md)) имя движка таблицы, без параметров.
- `uncompressed_hash_of_compressed_files` (`String`) значение [sipHash128](../sql-reference/functions/hash-functions.md#hash_functions-siphash128) данных в сжатых файлах как если бы они были разжатыми.
- `path` ([String](../sql-reference/data-types/string.md)) абсолютный путь к папке с файлами кусков данных.
- `bytes` (`UInt64`) алиас для `bytes_on_disk`.
- `disk` ([String](../sql-reference/data-types/string.md)) имя диска, на котором находится кусок данных.
- `marks_size` (`UInt64`) алиас для `marks_bytes`.
- `hash_of_all_files` ([String](../sql-reference/data-types/string.md)) значение [sipHash128](../sql-reference/functions/hash-functions.md#hash_functions-siphash128) для сжатых файлов.
- `hash_of_uncompressed_files` ([String](../sql-reference/data-types/string.md)) значение [sipHash128](../sql-reference/functions/hash-functions.md#hash_functions-siphash128) несжатых файлов (файлы с засечками, первичным ключом и пр.)
- `uncompressed_hash_of_compressed_files` ([String](../sql-reference/data-types/string.md)) значение [sipHash128](../sql-reference/functions/hash-functions.md#hash_functions-siphash128) данных в сжатых файлах как если бы они были разжатыми.
- `delete_ttl_info_min` ([DateTime](../sql-reference/data-types/datetime.md)) — Минимальное значение ключа даты и времени для правила [TTL DELETE](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
- `delete_ttl_info_max` ([DateTime](../sql-reference/data-types/datetime.md)) — Максимальное значение ключа даты и времени для правила [TTL DELETE](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
- `move_ttl_info.expression` ([Array](../sql-reference/data-types/array.md)([String](../sql-reference/data-types/string.md))) — Массив выражений. Каждое выражение задаёт правило [TTL MOVE](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
!!! note "Предупреждение"
Массив выражений `move_ttl_info.expression` используется, в основном, для обратной совместимости. Для работы с правилами `TTL MOVE` лучше использовать поля `move_ttl_info.min` и `move_ttl_info.max`.
- `move_ttl_info.min` ([Array](../sql-reference/data-types/array.md)([DateTime](../sql-reference/data-types/datetime.md))) — Массив значений. Каждый элемент массива задаёт минимальное значение ключа даты и времени для правила [TTL MOVE](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
- `move_ttl_info.max` ([Array](../sql-reference/data-types/array.md)([DateTime](../sql-reference/data-types/datetime.md))) — Массив значений. Каждый элемент массива задаёт максимальное значение ключа даты и времени для правила [TTL MOVE](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
- `bytes` ([UInt64](../sql-reference/data-types/int-uint.md)) алиас для `bytes_on_disk`.
- `marks_size` ([UInt64](../sql-reference/data-types/int-uint.md)) алиас для `marks_bytes`.
**Пример**
``` sql
SELECT * FROM system.parts LIMIT 1 FORMAT Vertical;
```
``` text
Row 1:
──────
partition: tuple()
name: all_1_4_1_6
part_type: Wide
active: 1
marks: 2
rows: 6
bytes_on_disk: 310
data_compressed_bytes: 157
data_uncompressed_bytes: 91
marks_bytes: 144
modification_time: 2020-06-18 13:01:49
remove_time: 0000-00-00 00:00:00
refcount: 1
min_date: 0000-00-00
max_date: 0000-00-00
min_time: 0000-00-00 00:00:00
max_time: 0000-00-00 00:00:00
partition_id: all
min_block_number: 1
max_block_number: 4
level: 1
data_version: 6
primary_key_bytes_in_memory: 8
primary_key_bytes_in_memory_allocated: 64
is_frozen: 0
database: default
table: months
engine: MergeTree
disk_name: default
path: /var/lib/clickhouse/data/default/months/all_1_4_1_6/
hash_of_all_files: 2d0657a16d9430824d35e327fcbd87bf
hash_of_uncompressed_files: 84950cc30ba867c77a408ae21332ba29
uncompressed_hash_of_compressed_files: 1ad78f1c6843bbfb99a2c931abe7df7d
delete_ttl_info_min: 0000-00-00 00:00:00
delete_ttl_info_max: 0000-00-00 00:00:00
move_ttl_info.expression: []
move_ttl_info.min: []
move_ttl_info.max: []
```
**См. также**
- [Движок MergeTree](../engines/table-engines/mergetree-family/mergetree.md)
- [TTL для столбцов и таблиц](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl)
## system.part\_log {#system_tables-part-log}
@ -1210,29 +1324,50 @@ path: /clickhouse/tables/01-08/visits/replicas
## system.mutations {#system_tables-mutations}
Таблица содержит информацию о ходе выполнения [мутаций](../sql-reference/statements/alter.md#alter-mutations) MergeTree-таблиц. Каждой команде мутации соответствует одна строка. В таблице есть следующие столбцы:
Таблица содержит информацию о ходе выполнения [мутаций](../sql-reference/statements/alter.md#alter-mutations) таблиц семейства MergeTree. Каждой команде мутации соответствует одна строка таблицы.
**database**, **table** - имя БД и таблицы, к которой была применена мутация.
Столбцы:
**mutation\_id** - ID запроса. Для реплицированных таблиц эти ID соответствуют именам записей в директории `<table_path_in_zookeeper>/mutations/` в ZooKeeper, для нереплицированных - именам файлов в директории с данными таблицы.
- `database` ([String](../sql-reference/data-types/string.md)) — имя БД, к которой была применена мутация.
**command** - Команда мутации (часть запроса после `ALTER TABLE [db.]table`).
- `table` ([String](../sql-reference/data-types/string.md)) — имя таблицы, к которой была применена мутация.
**create\_time** - Время создания мутации.
- `mutation_id` ([String](../sql-reference/data-types/string.md)) — ID запроса. Для реплицированных таблиц эти ID соответствуют именам записей в директории `<table_path_in_zookeeper>/mutations/` в ZooKeeper, для нереплицированных — именам файлов в директории с данными таблицы.
**block\_numbers.partition\_id**, **block\_numbers.number** - Nested-столбец. Для мутаций реплицированных таблиц для каждой партиции содержит номер блока, полученный этой мутацией (в каждой партиции будут изменены только куски, содержащие блоки с номерами, меньшими номера, полученного мутацией в этой партиции). Для нереплицированных таблиц нумерация блоков сквозная по партициям, поэтому столбец содержит одну запись с единственным номером блока, полученным мутацией.
- `command` ([String](../sql-reference/data-types/string.md)) — команда мутации (часть запроса после `ALTER TABLE [db.]table`).
**parts\_to\_do** - Количество кусков таблицы, которые ещё предстоит изменить.
- `create_time` ([Datetime](../sql-reference/data-types/datetime.md)) — дата и время создания мутации.
**is\_done** - Завершена ли мутация. Замечание: даже если `parts_to_do = 0`, для реплицированной таблицы возможна ситуация, когда мутация ещё не завершена из-за долго выполняющейся вставки, которая добавляет данные, которые нужно будет мутировать.
- `block_numbers.partition_id` ([Array](../sql-reference/data-types/array.md)([String](../sql-reference/data-types/string.md))) — Для мутаций реплицированных таблиц массив содержит содержит номера партиций (по одной записи для каждой партиции). Для мутаций нереплицированных таблиц массив пустой.
- `block_numbers.number` ([Array](../sql-reference/data-types/array.md)([Int64](../sql-reference/data-types/int-uint.md))) — Для мутаций реплицированных таблиц массив содержит по одной записи для каждой партиции, с номером блока, полученным этой мутацией. В каждой партиции будут изменены только куски, содержащие блоки с номерами меньше чем данный номер.
Для нереплицированных таблиц нумерация блоков сквозная по партициям. Поэтому массив содержит единственную запись с номером блока, полученным мутацией.
- `parts_to_do_names` ([Array](../sql-reference/data-types/array.md)([String](../sql-reference/data-types/string.md))) — массив с именами кусков данных, которые должны быть изменены для завершения мутации.
- `parts_to_do` ([Int64](../sql-reference/data-types/int-uint.md)) — количество кусков данных, которые должны быть изменены для завершения мутации.
- `is_done` ([UInt8](../sql-reference/data-types/int-uint.md)) — Признак, завершена ли мутация. Возможные значения:
- `1` — мутация завершена,
- `0` — мутация еще продолжается.
!!! info "Замечание"
Даже если `parts_to_do = 0`, для реплицированной таблицы возможна ситуация, когда мутация ещё не завершена из-за долго выполняющейся операции `INSERT`, которая добавляет данные, которые нужно будет мутировать.
Если во время мутации какого-либо куска возникли проблемы, заполняются следующие столбцы:
**latest\_failed\_part** - Имя последнего куска, мутация которого не удалась.
- `latest_failed_part` ([String](../sql-reference/data-types/string.md)) — имя последнего куска, мутация которого не удалась.
**latest\_fail\_time** — время последней ошибки мутации.
- `latest_fail_time` ([Datetime](../sql-reference/data-types/datetime.md)) — дата и время последней ошибки мутации.
**latest\_fail\_reason** — причина последней ошибки мутации.
- `latest_fail_reason` ([String](../sql-reference/data-types/string.md)) — причина последней ошибки мутации.
**См. также**
- [Мутации](../sql-reference/statements/alter.md#alter-mutations)
- [Движок MergeTree](../engines/table-engines/mergetree-family/mergetree.md)
- [Репликация данных](../engines/table-engines/mergetree-family/replication.md) (семейство ReplicatedMergeTree)
## system.disks {#system_tables-disks}
@ -1261,10 +1396,56 @@ Cодержит информацию о дисках, заданных в [ко
Если политика хранения содержит несколько томов, то каждому тому соответствует отдельная запись в таблице.
## system.roles {#system_tables-roles}
Содержит сведения о [ролях](../operations/access-rights.md#role-management).
Столбцы:
- `name` ([String](../sql-reference/data-types/string.md)) — Имя роли.
- `id` ([UUID](../sql-reference/data-types/uuid.md)) — ID роли.
- `storage` ([String](../sql-reference/data-types/string.md)) — Путь к хранилищу ролей. Настраивается в параметре `access_control_path`.
## system.role_grants {#system_tables-role_grants}
Содержит [гранты](../sql-reference/statements/grant.md) ролей для пользователей и ролей. Чтобы добавить записи в эту таблицу, используйте команду `GRANT role TO user`.
Столбцы:
- `user_name` ([Nullable](../sql-reference/data-types/nullable.md)([String](../sql-reference/data-types/string.md))) — Имя пользователя.
- `role_name` ([Nullable](../sql-reference/data-types/nullable.md)([String](../sql-reference/data-types/string.md))) — Имя роли.
- `granted_role_name` ([String](../sql-reference/data-types/string.md)) — Имя роли, назначенной для роли `role_name`. Чтобы назначить одну роль другой используйте `GRANT role1 TO role2`.
- `granted_role_is_default` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Флаг, который показывает, является ли `granted_role` ролью по умолчанию. Возможные значения:
- 1 — `granted_role` является ролью по умолчанию.
- 0 — `granted_role` не является ролью по умолчанию.
- `with_admin_option` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Флаг, который показывает, обладает ли `granted_role` роль привилегией `ADMIN OPTION`. Возможные значения:
- 1 — Роль обладает привилегией `ADMIN OPTION`.
- 0 — Роль не обладает привилегией `ADMIN OPTION`.
## system.current_roles {#system_tables-current_roles}
Содержит активные роли текущего пользователя. `SET ROLE` изменяет содержимое этой таблицы.
Столбцы:
- `role_name` ([String](../sql-reference/data-types/string.md))) — Имя роли.
- `with_admin_option` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Флаг, который показывает, обладает ли `current_role` роль привилегией `ADMIN OPTION`.
- `is_default` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Флаг, который показывает, является ли `current_role` ролью по умолчанию.
## system.enabled_roles {#system_tables-enabled_roles}
Содержит все активные роли на данный момент, включая текущую роль текущего пользователя и роли, назначенные для текущей роли.
Столбцы:
- `role_name` ([String](../sql-reference/data-types/string.md))) — Имя роли.
- `with_admin_option` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Флаг, который показывает, обладает ли `enabled_role` роль привилегией `ADMIN OPTION`.
- `is_current` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Флаг, который показывает, является ли `enabled_role` текущей ролью текущего пользователя.
- `is_default` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Флаг, который показывает, является ли `enabled_role` ролью по умолчанию.
## system.quotas {#system_tables-quotas}
Содержит информацию о [квотах](quotas.md).
Столбцы:
- `name` ([String](../sql-reference/data-types/string.md)) — Имя квоты.
- `id` ([UUID](../sql-reference/data-types/uuid.md)) — ID квоты.
- `storage`([String](../sql-reference/data-types/string.md)) — Хранилище квот. Возможные значения: "users.xml", если квота задана в файле users.xml, "disk" — если квота задана в SQL-запросе.
@ -1286,6 +1467,7 @@ Cодержит информацию о дисках, заданных в [ко
Содержит информацию о максимумах для всех интервалов всех квот. Одной квоте могут соответствовать любое количество строк или ноль.
Столбцы:
- `quota_name` ([String](../sql-reference/data-types/string.md)) — Имя квоты.
- `duration` ([UInt32](../sql-reference/data-types/int-uint.md)) — Длина временного интервала для расчета потребления ресурсов, в секундах.
- `is_randomized_interval` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Логическое значение. Оно показывает, является ли интервал рандомизированным. Интервал всегда начинается в одно и то же время, если он не рандомизирован. Например, интервал в 1 минуту всегда начинается с целого числа минут (то есть он может начинаться в 11:20:00, но никогда не начинается в 11:20:01), интервал в один день всегда начинается в полночь UTC. Если интервал рандомизирован, то самый первый интервал начинается в произвольное время, а последующие интервалы начинаются один за другим. Значения:
@ -1303,6 +1485,7 @@ Cодержит информацию о дисках, заданных в [ко
Использование квоты текущим пользователем: сколько используется и сколько осталось.
Столбцы:
- `quota_name` ([String](../sql-reference/data-types/string.md)) — Имя квоты.
- `quota_key`([String](../sql-reference/data-types/string.md)) — Значение ключа. Например, если keys = `ip_address`, `quota_key` может иметь значение '192.168.1.1'.
- `start_time`([Nullable](../sql-reference/data-types/nullable.md)([DateTime](../sql-reference/data-types/datetime.md))) — Время начала расчета потребления ресурсов.
@ -1327,6 +1510,7 @@ Cодержит информацию о дисках, заданных в [ко
Использование квот всеми пользователями.
Столбцы:
- `quota_name` ([String](../sql-reference/data-types/string.md)) — Имя квоты.
- `quota_key` ([String](../sql-reference/data-types/string.md)) — Ключ квоты.
- `is_current` ([UInt8](../sql-reference/data-types/int-uint.md#uint-ranges)) — Квота используется для текущего пользователя.

View File

@ -1334,4 +1334,42 @@ len: 30
- [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii)
## randomStringUTF8 {#randomstringutf8}
Генерирует строку определенной длины со случайной строкой в кодировке UTF-8.
**Синтаксис**
``` sql
randomStringUTF8(length);
```
**Параметры**
- `length` — Длина итоговой строки в кодовых точках. [UInt64](../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
- Случайная строка в кодировке UTF-8.
Тип: [String](../../sql-reference/data-types/string.md).
**Пример**
Запрос:
```sql
SELECT randomStringUTF8(13)
```
Результат:
```text
┌─randomStringUTF8(13)─┐
│ 𘤗𙉝д兠庇󡅴󱱎󦐪􂕌𔊹𓰛 │
└──────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/other_functions/) <!--hide-->

View File

@ -33,4 +33,42 @@ SELECT alphaTokens('abca1abc')
└─────────────────────────┘
```
## extractAllGroups(text, regexp) {#extractallgroups}
Выделяет все группы из неперекрывающихся подстрок, которые соответствуют регулярному выражению.
**Синтаксис**
``` sql
extractAllGroups(text, regexp)
```
**Параметры**
- `text` — [String](../data-types/string.md) или [FixedString](../data-types/fixedstring.md).
- `regexp` — Регулярное выражение. Константа. [String](../data-types/string.md) или [FixedString](../data-types/fixedstring.md).
**Возвращаемые значения**
- Если найдена хотя бы одна подходящая группа, функция возвращает столбец вида `Array(Array(String))`, сгруппированный по идентификатору группы (от 1 до N, где N — количество групп с захватом содержимого в `regexp`).
- Если подходящих групп не найдено, возвращает пустой массив.
Тип: [Array](../data-types/array.md).
**Пример использования**
Запрос:
``` sql
SELECT extractAllGroups('abc=123, 8="hkl"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)');
```
Результат:
``` text
┌─extractAllGroups('abc=123, 8="hkl"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐
│ [['abc','123'],['8','"hkl"']] │
└───────────────────────────────────────────────────────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/splitting_merging_functions/) <!--hide-->

View File

@ -293,7 +293,7 @@ Examples of how this hierarchy is treated:
- The `MODIFY SETTING` privilege allows modifying table engine settings. It doesnt affect settings or server configuration parameters.
- The `ATTACH` operation needs the [CREATE](#grant-create) privilege.
- The `DETACH` operation needs the [DROP](#grant-drop) privilege.
- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/misc.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege.
- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/misc.md#kill-mutation-statement) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege.
### CREATE {#grant-create}
@ -312,7 +312,7 @@ Allows executing [CREATE](../../sql-reference/statements/create.md) and [ATTACH]
### DROP {#grant-drop}
Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH](../../sql-reference/statements/misc.md#detach) queries according to the following hierarchy of privileges:
Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH](../../sql-reference/statements/misc.md#detach-statement) queries according to the following hierarchy of privileges:
- `DROP`. Level:
- `DROP DATABASE`. Level: `DATABASE`

View File

@ -15,7 +15,7 @@ FROM <left_table>
## Поддерживаемые типы соединения {#select-join-types}
Весе типы из стандартого [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) поддерживаются:
Все типы из стандартого [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) поддерживаются:
- `INNER JOIN`, возвращаются только совпадающие строки.
- `LEFT OUTER JOIN`, не совпадающие строки из левой таблицы возвращаются в дополнение к совпадающим строкам.

View File

@ -90,7 +90,7 @@ SELECT name, status FROM system.dictionaries;
## Управление распределёнными таблицами {#query-language-system-distributed}
ClickHouse может оперировать [распределёнными](../../sql-reference/statements/system.md) таблицами. Когда пользователь вставляет данные в эти таблицы, ClickHouse сначала формирует очередь из данных, которые должны быть отправлены на узлы кластера, а затем асинхронно отправляет подготовленные данные. Вы можете управлять очередью с помощью запросов [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) и [FLUSH DISTRIBUTED](#query_language-system-flush-distributed). Также есть возможность синхронно вставлять распределенные данные с помощью настройки `insert_distributed_sync`.
ClickHouse может оперировать [распределёнными](../../sql-reference/statements/system.md) таблицами. Когда пользователь вставляет данные в эти таблицы, ClickHouse сначала формирует очередь из данных, которые должны быть отправлены на узлы кластера, а затем асинхронно отправляет подготовленные данные. Вы можете управлять очередью с помощью запросов [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) и [FLUSH DISTRIBUTED](#query_language-system-flush-distributed). Также есть возможность синхронно вставлять распределенные данные с помощью настройки [insert_distributed_sync](../../operations/settings/settings.md#insert_distributed_sync).
### STOP DISTRIBUTED SENDS {#query_language-system-stop-distributed-sends}

View File

@ -87,7 +87,10 @@ def build_blog_nav(lang, args):
posts = []
post_meta_items = []
for post in os.listdir(year_dir):
meta, _ = util.read_md_file(os.path.join(year_dir, post))
post_path = os.path.join(year_dir, post)
if not post.endswith('.md'):
raise RuntimeError(f'Unexpected non-md file in posts folder: {post_path}')
meta, _ = util.read_md_file(post_path)
post_date = meta['date']
post_title = meta['title']
if datetime.date.fromisoformat(post_date) > datetime.date.today():

View File

@ -44,7 +44,7 @@ then
if [[ ! -z "${CLOUDFLARE_TOKEN}" ]]
then
sleep 1m
git diff --stat="9999,9999" --diff-filter=M HEAD~1 | grep '|' | awk '$1 ~ /\.html$/ { if ($3>8) { url="https://content.clickhouse.tech/"$1; sub(/\/index.html/, "/", url); print "\""url"\""; }}' | split -l 25 /dev/stdin PURGE
git diff --stat="9999,9999" --diff-filter=M HEAD~1 | grep '|' | awk '$1 ~ /\.html$/ { if ($3>6) { url="https://content.clickhouse.tech/"$1; sub(/index.html/, "", url); print "\""url"\""; }}' | split -l 25 /dev/stdin PURGE
for FILENAME in $(ls PURGE*)
do
POST_DATA=$(cat "${FILENAME}" | sed -n -e 'H;${x;s/\n/,/g;s/^,//;p;}' | awk '{print "{\"files\":["$0"]}";}')

View File

@ -124,15 +124,7 @@ def adjust_markdown_html(content):
def minify_html(content):
return htmlmin.minify(content,
remove_comments=False,
remove_empty_space=True,
remove_all_empty_space=False,
reduce_empty_attributes=True,
reduce_boolean_attributes=False,
remove_optional_attribute_quotes=True,
convert_charrefs=False,
keep_pre=True)
return htmlmin.minify(content)
def build_website(args):

View File

@ -4,9 +4,9 @@
功能性测试是最简便使用的。绝大部分 ClickHouse 的功能可以通过功能性测试来测试,任何代码的更改都必须通过该测试。
每个功能测试会向正在运行的 ClickHouse服 务器发送一个或多个查询,并将结果与预期结果进行比较。
每个功能测试会向正在运行的 ClickHouse服务器发送一个或多个查询并将结果与预期结果进行比较。
测试用例在 `tests/queries` 目录中。这里有两个子目录:`stateless` 和 `stateful`目录。 无状态的测试无需预加载测试数据集 - 通常是在测试运行期间动态创建小量的数据集。有状态测试需要来自 Yandex.Metrica 的预加载测试数据,而不向一般公众提供。 我们倾向于仅使用«无状态»测试并避免添加新的«有状态»测试。
测试用例在 `tests/queries` 目录中。这里有两个子目录:`stateless` 和 `stateful`目录。无状态的测试无需预加载测试数据集 - 通常是在测试运行期间动态创建小量的数据集。有状态测试需要来自 Yandex.Metrica 的预加载测试数据,而不向一般公众提供。我们倾向于仅使用«无状态»测试并避免添加新的«有状态»测试。
每个测试用例可以是两种类型之一:`.sql` 和 `.sh`。`.sql` 测试文件是用于管理`clickhouse-client --multiquery --testmode`的简单SQL脚本。`.sh` 测试文件是一个可以自己运行的脚本。
@ -28,7 +28,7 @@
## 集成测试 {#ji-cheng-ce-shi}
集成测试允许在集群配置中测试 ClickHouse并与其他服务器如MySQLPostgresMongoDB进行 ClickHouse 交互。它们可用于模拟网络拆分数据包丢弃等。这些测试在Docker 下运行,并使用各种软件创建多个容器。
集成测试允许在集群配置中测试 ClickHouse并与其他服务器如MySQLPostgresMongoDB进行 ClickHouse 交互。它们可用于模拟网络拆分数据包丢弃等。这些测试在Docker下运行并使用各种软件创建多个容器。
参考 `tests/integration/README.md` 文档关于如何使用集成测试。
@ -93,7 +93,7 @@
## 测试环境 {#ce-shi-huan-jing}
在将版本发布为稳定之前,我们将其部署在测试环境中 测试环境是一个处理\[Yandex.Metrica\]https://metrica.yandex.com/总数据的1/39部分大小的集群。 我们与 Yandex.Metrica 团队公用我们的测试环境。ClickHouse 在现有数据的基础上无需停机即可升级。 我们首先看到数据处理成功而不会实时滞后,复制继续工作,并且 Yandex.Metrica 团队无法看到问题。 首先的检查可以通过以下方式完成:
在将版本发布为稳定之前,我们将其部署在测试环境中测试环境是一个处理\[Yandex.Metrica\]https://metrica.yandex.com/总数据的1/39部分大小的集群。我们与 Yandex.Metrica 团队公用我们的测试环境。ClickHouse 在现有数据的基础上无需停机即可升级。我们首先看到数据处理成功而不会实时滞后,复制继续工作,并且 Yandex.Metrica 团队无法看到问题。首先的检查可以通过以下方式完成:
SELECT hostName() AS h, any(version()), any(uptime()), max(UTCEventTime), count() FROM remote('example01-01-{1..3}t', merge, hits) WHERE EventDate >= today() - 2 GROUP BY h ORDER BY h;
@ -101,7 +101,7 @@
## 负载测试 {#fu-zai-ce-shi}
部署到测试环境后,我们使用生产群集中的查询运行负载测试。 这是手动完成的。
部署到测试环境后,我们使用生产群集中的查询运行负载测试。这是手动完成的。
确保在生产集群中开启了 `query_log` 选项。
@ -125,11 +125,11 @@
## 编译测试 {#bian-yi-ce-shi}
构建测试允许检查构建在各种替代配置和某些外部系统上是否被破坏。测试位于`ci`目录。 它们从 DockerVagrant 中的源代码运行构建,有时在 Docker 中运行 `qemu-user-static`。这些测试正在开发中,测试运行不是自动化的。
构建测试允许检查构建在各种替代配置和某些外部系统上是否被破坏。测试位于`ci`目录。它们从 DockerVagrant 中的源代码运行构建,有时在 Docker 中运行 `qemu-user-static`。这些测试正在开发中,测试运行不是自动化的。
动机:
通常我们会在 ClickHouse 构建的单个版本上发布并运行所有测试。 但是有一些未经过彻底测试的替代构建版本。 例子:
通常我们会在 ClickHouse 构建的单个版本上发布并运行所有测试。但是有一些未经过彻底测试的替代构建版本。例子:
- 在 FreeBSD 中的构建;
- 在 Debian 中使用系统包中的库进行构建;
@ -152,33 +152,41 @@ Clang 有更多有用的警告 - 您可以使用 `-Weverything` 查找它们并
对于生产构建,使用 gcc它仍然生成比 clang 稍高效的代码。对于开发来说clang 通常更方便使用。您可以使用调试模式在自己的机器上构建(以节省笔记本电脑的电量),但请注意,由于更好的控制流程和过程分析,编译器使用 `-O3` 会生成更多警告。 当使用 clang 构建时,使用 `libc++` 而不是 `libstdc++`,并且在使用调试模式构建时,使用调试版本的 `libc++`,它允许在运行时捕获更多错误。
## 消毒剂 {#sanitizers}
## Sanitizers {#sanitizers}
**地址消毒剂**.
我们在每个提交的基础上在 ASan 下运行功能和集成测试。
### Address sanitizer
我们使用Asan对每个提交进行功能和集成测试。
**ツ暗ェツ氾环催ツ団ツ法ツ人)**.
我们在 Valgrind 过夜进行功能测试。 这需要几个小时。 目前在 `re2` 库中有一个已知的误报,请参阅 [文章](https://research.swtch.com/sparse)。
### Valgrind (Memcheck)
我们在夜间使用Valgrind进行功能测试。这需要几个小时。目前在 `re2` 库中有一个已知的误报,请参阅[文章](https://research.swtch.com/sparse)。
**螺纹消毒剂**.
我们在 TSan 下进行功能测试。ClickHouse 必须通过所有测试。在 TSan 下运行不是自动化的,只是偶尔执行
### Undefined behaviour sanitizer
我们使用Asan对每个提交进行功能和集成测试
**记忆消毒剂**.
### Thread sanitizer
我们使用TSan对每个提交进行功能测试。目前不使用TSan对每个提交进行集成测试。
### Memory sanitizer
目前我们不使用 MSan。
**未定义的行为消毒剂。**
我们仍然不会在每次提交的基础上使用 UBSan。 有一些地方需要解决。
**调试分alloc。**
### Debug allocator
您可以使用 `DEBUG_TCMALLOC` CMake 选项启用 `tcmalloc` 的调试版本。我们在每次提交的基础上使用调试分配器运行测试。
更多请参阅 `tests/instructions/sanitizers.txt`
## 模糊测试 {#mo-hu-ce-shi}
我们使用简单的模糊测试来生成随机SQL查询并检查服务器是否正常使用 Address sanitizer 执行模糊测试。你可以在`00746_sql_fuzzy.pl` 找到它。 测试应连续进行(过夜和更长时间)。
ClickHouse模糊测试可以通过[libFuzzer](https://llvm.org/docs/LibFuzzer.html)和随机SQL查询实现。
所有的模糊测试都应使用sanitizersAddress及Undefined
截至2018年12月我们仍然不使用库代码的孤立模糊测试。
LibFuzzer用于对库代码进行独立的模糊测试。模糊器作为测试代码的一部分实现并具有“\_fuzzer”名称后缀。
模糊测试示例在`src/Parsers/tests/lexer_fuzzer.cpp`。LibFuzzer配置、字典及语料库存放在`tests/fuzz`。
我们鼓励您为每个处理用户输入的功能编写模糊测试。
默认情况下不构建模糊器。可通过设置`-DENABLE_FUZZING=1`和`-DENABLE_TESTS=1`来构建模糊器。 我们建议在构建模糊器时关闭Jemalloc。
用于将ClickHouse模糊测试集成到的Google OSS-Fuzz的配置文件位于`docker/fuzz`。
此外我们使用简单的模糊测试来生成随机SQL查询并检查服务器是否正常。你可以在`00746_sql_fuzzy.pl` 找到它。测试应连续进行(过夜和更长时间)。
## 安全审计 {#an-quan-shen-ji}
@ -208,7 +216,7 @@ Yandex Cloud 部门的人员从安全角度对 ClickHouse 功能进行了一些
## Metrica B2B 测试 {#metrica-b2b-ce-shi}
每个 ClickHouse 版本都经过 Yandex Metrica 和 AppMetrica 引擎的测试。测试和稳定版本的 ClickHouse 部署在虚拟机上,并使用处理输入数据固定样本的度量引擎的小副本运行。 将度量引擎的两个实例的结果一起进行比较
每个 ClickHouse 版本都经过 Yandex Metrica 和 AppMetrica 引擎的测试。测试和稳定版本的 ClickHouse 部署在虚拟机上,并使用处理输入数据固定样本的度量引擎的小副本运行。将度量引擎的两个实例的结果一起进行比较
这些测试是由单独的团队自动完成的。由于移动部件的数量很多,大部分时间的测试都是完全无关的,很难弄清楚。很可能这些测试对我们来说是负值。然而,这些测试被证明是有用的大约一个或两个倍的数百。
@ -218,12 +226,12 @@ Yandex Cloud 部门的人员从安全角度对 ClickHouse 功能进行了一些
## 自动化测试 {#zi-dong-hua-ce-shi}
我们使用 Yandex 内部 CI 和名为«沙箱»的作业自动化系统运行测试。 我们还继续使用 Jenkins可在Yandex内部使用
我们使用 Yandex 内部 CI 和名为«沙箱»的作业自动化系统运行测试。我们还继续使用 Jenkins可在Yandex内部使用
构建作业和测试在沙箱中按每次提交的基础上运行。结果包和测试结果发布在 GitHub 上,可以通过直接链接下载,结果会被永久存储。当您在 GitHub 上发送拉取请求时,我们将其标记为«可以测试»,我们的 CI 系统将为您构建 ClickHouse 包(发布,调试,地址消除等)。
由于时间和计算能力的限制,我们不使用 Travis CI。
在 Jenkins我们运行字典测试指标B2B测试。 我们使用 Jenkins 来准备和发布版本。 Jenkins是一种传统的技术所有的工作将被转移到沙箱中。
在 Jenkins我们运行字典测试指标B2B测试。我们使用 Jenkins 来准备和发布版本。Jenkins是一种传统的技术所有的工作将被转移到沙箱中。
[来源文章](https://clickhouse.tech/docs/zh/development/tests/) <!--hide-->

View File

@ -216,7 +216,7 @@ private:
ConnectionParameters connection_parameters;
QueryFuzzer fuzzer;
int query_fuzzer_runs;
int query_fuzzer_runs = 0;
void initialize(Poco::Util::Application & self) override
{
@ -1041,10 +1041,12 @@ private:
begin - text.data());
ASTPtr fuzz_base = orig_ast;
for (int fuzz_step = 0; fuzz_step < query_fuzzer_runs; fuzz_step++)
// Don't repeat inserts, the tables grow too big.
const int this_query_runs = as_insert ? 1 : query_fuzzer_runs;
for (int fuzz_step = 0; fuzz_step < this_query_runs; fuzz_step++)
{
fprintf(stderr, "fuzzing step %d for query at pos %zd\n",
fuzz_step, this_query_begin - text.data());
fprintf(stderr, "fuzzing step %d out of %d for query at pos %zd\n",
fuzz_step, this_query_runs, this_query_begin - text.data());
ASTPtr ast_to_process;
try
@ -1058,7 +1060,11 @@ private:
std::stringstream dump_of_cloned_ast;
ast_to_process->dumpTree(dump_of_cloned_ast);
fuzzer.fuzzMain(ast_to_process);
// Run the original query as well.
if (fuzz_step > 0)
{
fuzzer.fuzzMain(ast_to_process);
}
auto base_after_fuzz = fuzz_base->formatForErrorMessage();

View File

@ -202,6 +202,8 @@
M(PerfDataTLBMisses, "Data TLB misses") \
M(PerfInstructionTLBReferences, "Instruction TLB references") \
M(PerfInstructionTLBMisses, "Instruction TLB misses") \
M(PerfLocalMemoryReferences, "Local NUMA node memory reads") \
M(PerfLocalMemoryMisses, "Local NUMA node memory read misses") \
\
M(CreatedHTTPConnections, "Total amount of created HTTP connections (closed or opened).") \
\

View File

@ -8,16 +8,11 @@
namespace DB
{
class ProfilingScopedWriteUnlocker;
class ProfilingScopedWriteRWLock
{
public:
friend class ProfilingScopedWriteUnlocker;
ProfilingScopedWriteRWLock(std::shared_mutex & rwl_, ProfileEvents::Event event_) :
watch(),
event(event_),
ProfilingScopedWriteRWLock(std::shared_mutex & rwl_, ProfileEvents::Event event) :
scoped_write_lock(rwl_)
{
ProfileEvents::increment(event, watch.elapsed());
@ -25,38 +20,14 @@ public:
private:
Stopwatch watch;
ProfileEvents::Event event;
std::unique_lock<std::shared_mutex> scoped_write_lock;
};
/// Inversed RAII
/// Used to unlock current writelock for various purposes.
class ProfilingScopedWriteUnlocker
{
public:
ProfilingScopedWriteUnlocker() = delete;
ProfilingScopedWriteUnlocker(ProfilingScopedWriteRWLock & parent_lock_) : parent_lock(parent_lock_)
{
parent_lock.scoped_write_lock.unlock();
}
~ProfilingScopedWriteUnlocker()
{
Stopwatch watch;
parent_lock.scoped_write_lock.lock();
ProfileEvents::increment(parent_lock.event, watch.elapsed());
}
private:
ProfilingScopedWriteRWLock & parent_lock;
};
class ProfilingScopedReadRWLock
{
public:
ProfilingScopedReadRWLock(std::shared_mutex & rwl, ProfileEvents::Event event) :
watch(),
scoped_read_lock(rwl)
{
ProfileEvents::increment(event, watch.elapsed());

View File

@ -193,12 +193,16 @@ static const PerfEventInfo raw_events_info[] = {
// https://stackoverflow.com/questions/49933319/how-to-interpret-perf-itlb-loads-itlb-load-misses
CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBReferences, ACCESS),
CACHE_EVENT(PERF_COUNT_HW_CACHE_ITLB, PerfInstructionTLBMisses, MISS),
CACHE_EVENT(PERF_COUNT_HW_CACHE_NODE, PerfLocalMemoryReferences, ACCESS),
CACHE_EVENT(PERF_COUNT_HW_CACHE_NODE, PerfLocalMemoryMisses, MISS),
};
static_assert(sizeof(raw_events_info) / sizeof(raw_events_info[0]) == NUMBER_OF_RAW_EVENTS);
#undef HARDWARE_EVENT
#undef SOFTWARE_EVENT
#undef CACHE_EVENT
// A map of event name -> event index, to parse event list in settings.
static std::unordered_map<std::string, size_t> populateEventMap()

View File

@ -59,6 +59,8 @@ namespace ProfileEvents
extern const Event PerfDataTLBMisses;
extern const Event PerfInstructionTLBReferences;
extern const Event PerfInstructionTLBMisses;
extern const Event PerfLocalMemoryReferences;
extern const Event PerfLocalMemoryMisses;
#endif
}
@ -162,7 +164,7 @@ struct PerfEventValue
UInt64 time_running = 0;
};
static constexpr size_t NUMBER_OF_RAW_EVENTS = 20;
static constexpr size_t NUMBER_OF_RAW_EVENTS = 22;
struct PerfDescriptorsHolder : boost::noncopyable
{

View File

@ -372,6 +372,7 @@ struct Settings : public SettingsCollection<Settings>
M(SettingBool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
M(SettingBool, optimize_redundant_functions_in_order_by, true, "Remove functions from ORDER BY if its argument is also in ORDER BY", 0) \
M(SettingBool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
M(SettingBool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \
M(SettingBool, optimize_monotonous_functions_in_order_by, true, "Replace monotonous function with its argument in ORDER BY", 0) \
M(SettingBool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
M(SettingBool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \

View File

@ -62,8 +62,8 @@ Block CheckSortedBlockInputStream::readImpl()
else if (res > 0)
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Sort order of blocks violated for column {}, left: {}, right: {}.",
backQuoteIfNeed(elem.column_name),
"Sort order of blocks violated for column number {}, left: {}, right: {}.",
column_number,
applyVisitor(FieldVisitorDump(), (*left_col)[left_index]),
applyVisitor(FieldVisitorDump(), (*right_col)[right_index]));
}

View File

@ -90,7 +90,7 @@ CacheDictionary::CacheDictionary(
, update_queue(max_update_queue_size_)
, update_pool(max_threads_for_updates)
{
if (!this->source_ptr->supportsSelectiveLoad())
if (!source_ptr->supportsSelectiveLoad())
throw Exception{full_name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD};
createAttributes();
@ -856,40 +856,24 @@ void CacheDictionary::update(BunchUpdateUnit & bunch_update_unit) const
const auto now = std::chrono::system_clock::now();
/// Non const because it will be unlocked.
ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
if (now > backoff_end_time.load())
{
try
{
if (error_count)
{
/// Recover after error: we have to clone the source here because
/// it could keep connections which should be reset after error.
source_ptr = source_ptr->clone();
}
auto current_source_ptr = getSourceAndUpdateIfNeeded();
Stopwatch watch;
/// To perform parallel loading.
BlockInputStreamPtr stream = nullptr;
{
ProfilingScopedWriteUnlocker unlocker(write_lock);
stream = source_ptr->loadIds(bunch_update_unit.getRequestedIds());
}
BlockInputStreamPtr stream = current_source_ptr->loadIds(bunch_update_unit.getRequestedIds());
stream->readPrefix();
while (true)
{
Block block;
{
ProfilingScopedWriteUnlocker unlocker(write_lock);
block = stream->read();
if (!block)
break;
}
Block block = stream->read();
if (!block)
break;
const auto * id_column = typeid_cast<const ColumnUInt64 *>(block.safeGetByPosition(0).column.get());
if (!id_column)
@ -904,6 +888,8 @@ void CacheDictionary::update(BunchUpdateUnit & bunch_update_unit) const
for (const auto i : ext::range(0, ids.size()))
{
/// Modifying cache with write lock
ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
const auto id = ids[i];
const auto find_result = findCellIdx(id, now);
@ -940,6 +926,9 @@ void CacheDictionary::update(BunchUpdateUnit & bunch_update_unit) const
stream->readSuffix();
/// Lock just for last_exception safety
ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
error_count = 0;
last_exception = std::exception_ptr{};
backoff_end_time = std::chrono::system_clock::time_point{};
@ -948,6 +937,8 @@ void CacheDictionary::update(BunchUpdateUnit & bunch_update_unit) const
}
catch (...)
{
/// Lock just for last_exception safety
ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
++error_count;
last_exception = std::current_exception();
backoff_end_time = now + std::chrono::seconds(calculateDurationWithBackoff(rnd_engine, error_count));
@ -958,6 +949,8 @@ void CacheDictionary::update(BunchUpdateUnit & bunch_update_unit) const
}
}
/// Modifying cache state again with write lock
ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
size_t not_found_num = 0;
size_t found_num = 0;

View File

@ -86,7 +86,7 @@ public:
return std::make_shared<CacheDictionary>(
getDictionaryID(),
dict_struct,
source_ptr->clone(),
getSourceAndUpdateIfNeeded()->clone(),
dict_lifetime,
strict_max_lifetime_seconds,
size,
@ -283,6 +283,26 @@ private:
Attribute & getAttribute(const std::string & attribute_name) const;
using SharedDictionarySourcePtr = std::shared_ptr<IDictionarySource>;
/// Update dictionary source pointer if required and return it. Thread safe.
/// MultiVersion is not used here because it works with constant pointers.
/// For some reason almost all methods in IDictionarySource interface are
/// not constant.
SharedDictionarySourcePtr getSourceAndUpdateIfNeeded() const
{
std::lock_guard lock(source_mutex);
if (error_count)
{
/// Recover after error: we have to clone the source here because
/// it could keep connections which should be reset after error.
auto new_source_ptr = source_ptr->clone();
source_ptr = std::move(new_source_ptr);
}
return source_ptr;
}
struct FindResult
{
const size_t cell_idx;
@ -296,7 +316,11 @@ private:
void isInImpl(const PaddedPODArray<Key> & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
const DictionaryStructure dict_struct;
mutable DictionarySourcePtr source_ptr;
/// Dictionary source should be used with mutex
mutable std::mutex source_mutex;
mutable SharedDictionarySourcePtr source_ptr;
const DictionaryLifetime dict_lifetime;
const size_t strict_max_lifetime_seconds;
const bool allow_read_expired_keys;
@ -307,6 +331,9 @@ private:
Poco::Logger * log;
/// This lock is used for the inner cache state update function lock it for
/// write, when it need to update cache state all other functions just
/// readers. Suprisingly this lock is also used for last_exception pointer.
mutable std::shared_mutex rw_lock;
/// Actual size will be increased to match power of 2

View File

@ -89,7 +89,7 @@ endif()
target_link_libraries(clickhouse_functions PRIVATE hyperscan)
if(USE_SIMDJSON)
target_link_libraries(clickhouse_functions PRIVATE ${SIMDJSON_LIBRARY})
target_link_libraries(clickhouse_functions PRIVATE simdjson)
endif()
if(USE_RAPIDJSON)

View File

@ -1,6 +1,5 @@
#pragma once
#include <common/StringRef.h>
#include <Common/Exception.h>
#include <Core/Types.h>
@ -15,43 +14,87 @@ namespace ErrorCodes
/// It can't do anything useful and just throws an exception.
struct DummyJSONParser
{
static constexpr bool need_preallocate = false;
void preallocate(size_t) {}
class Array;
class Object;
bool parse(const StringRef &) { throw Exception{"Functions JSON* are not supported without AVX2", ErrorCodes::NOT_IMPLEMENTED}; }
/// References an element in a JSON document, representing a JSON null, boolean, string, number,
/// array or object.
class Element
{
public:
Element() {}
bool isInt64() const { return false; }
bool isUInt64() const { return false; }
bool isDouble() const { return false; }
bool isString() const { return false; }
bool isArray() const { return false; }
bool isObject() const { return false; }
bool isBool() const { return false; }
bool isNull() const { return false; }
using Iterator = std::nullptr_t;
Iterator getRoot() const { return nullptr; }
Int64 getInt64() const { return 0; }
UInt64 getUInt64() const { return 0; }
double getDouble() const { return 0; }
bool getBool() const { return false; }
std::string_view getString() const { return {}; }
Array getArray() const;
Object getObject() const;
};
static bool isInt64(const Iterator &) { return false; }
static bool isUInt64(const Iterator &) { return false; }
static bool isDouble(const Iterator &) { return false; }
static bool isString(const Iterator &) { return false; }
static bool isArray(const Iterator &) { return false; }
static bool isObject(const Iterator &) { return false; }
static bool isBool(const Iterator &) { return false; }
static bool isNull(const Iterator &) { return true; }
/// References an array in a JSON document.
class Array
{
public:
class Iterator
{
public:
Element operator*() const { return {}; }
Iterator & operator++() { return *this; }
Iterator operator++(int) { return *this; }
friend bool operator==(const Iterator &, const Iterator &) { return true; }
friend bool operator!=(const Iterator &, const Iterator &) { return false; }
};
static Int64 getInt64(const Iterator &) { return 0; }
static UInt64 getUInt64(const Iterator &) { return 0; }
static double getDouble(const Iterator &) { return 0; }
static bool getBool(const Iterator &) { return false; }
static StringRef getString(const Iterator &) { return {}; }
Iterator begin() const { return {}; }
Iterator end() const { return {}; }
size_t size() const { return 0; }
Element operator[](size_t) const { return {}; }
};
static size_t sizeOfArray(const Iterator &) { return 0; }
static bool firstArrayElement(Iterator &) { return false; }
static bool arrayElementByIndex(Iterator &, size_t) { return false; }
static bool nextArrayElement(Iterator &) { return false; }
using KeyValuePair = std::pair<std::string_view, Element>;
static size_t sizeOfObject(const Iterator &) { return 0; }
static bool firstObjectMember(Iterator &) { return false; }
static bool firstObjectMember(Iterator &, StringRef &) { return false; }
static bool objectMemberByIndex(Iterator &, size_t) { return false; }
static bool objectMemberByName(Iterator &, const StringRef &) { return false; }
static bool nextObjectMember(Iterator &) { return false; }
static bool nextObjectMember(Iterator &, StringRef &) { return false; }
static bool isObjectMember(const Iterator &) { return false; }
static StringRef getKey(const Iterator &) { return {}; }
/// References an object in a JSON document.
class Object
{
public:
class Iterator
{
public:
KeyValuePair operator*() const { return {}; }
Iterator & operator++() { return *this; }
Iterator operator++(int) { return *this; }
friend bool operator==(const Iterator &, const Iterator &) { return true; }
friend bool operator!=(const Iterator &, const Iterator &) { return false; }
};
Iterator begin() const { return {}; }
Iterator end() const { return {}; }
size_t size() const { return 0; }
bool find(const std::string_view &, Element &) const { return false; }
#if 0
/// Optional: Provides access to an object's element by index.
KeyValuePair operator[](size_t) const { return {}; }
#endif
};
/// Parses a JSON document, returns the reference to its root element if succeeded.
bool parse(const std::string_view &, Element &) { throw Exception{"Functions JSON* are not supported", ErrorCodes::NOT_IMPLEMENTED}; }
#if 0
/// Optional: Allocates memory to parse JSON documents faster.
void reserve(size_t max_size);
#endif
};
}

View File

@ -4,6 +4,57 @@
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
std::vector<FunctionJSONHelpers::Move> FunctionJSONHelpers::prepareMoves(const char * function_name, Block & block, const ColumnNumbers & arguments, size_t first_index_argument, size_t num_index_arguments)
{
std::vector<Move> moves;
moves.reserve(num_index_arguments);
for (const auto i : ext::range(first_index_argument, first_index_argument + num_index_arguments))
{
const auto & column = block.getByPosition(arguments[i]);
if (!isString(column.type) && !isInteger(column.type))
throw Exception{"The argument " + std::to_string(i + 1) + " of function " + String(function_name)
+ " should be a string specifying key or an integer specifying index, illegal type: " + column.type->getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
if (column.column && isColumnConst(*column.column))
{
const auto & column_const = assert_cast<const ColumnConst &>(*column.column);
if (isString(column.type))
moves.emplace_back(MoveType::ConstKey, column_const.getValue<String>());
else
moves.emplace_back(MoveType::ConstIndex, column_const.getInt(0));
}
else
{
if (isString(column.type))
moves.emplace_back(MoveType::Key, "");
else
moves.emplace_back(MoveType::Index, 0);
}
}
return moves;
}
size_t FunctionJSONHelpers::calculateMaxSize(const ColumnString::Offsets & offsets)
{
size_t max_size = 0;
for (const auto i : ext::range(0, offsets.size()))
{
size_t size = offsets[i] - offsets[i - 1];
if (max_size < size)
max_size = size;
}
if (max_size)
--max_size;
return max_size;
}
void registerFunctionsJSON(FunctionFactory & factory)
{

File diff suppressed because it is too large Load Diff

View File

@ -222,7 +222,7 @@ struct ValueGetterBuilderImpl<>
static TernaryValueGetter build(const IColumn * x)
{
throw Exception(
std::string("Unknown numeric column of type: ") + demangle(typeid(x).name()),
std::string("Unknown numeric column of type: ") + demangle(typeid(*x).name()),
ErrorCodes::LOGICAL_ERROR);
}
};

View File

@ -6,9 +6,7 @@
#if USE_RAPIDJSON
# include <Core/Types.h>
# include <Common/Exception.h>
# include <common/StringRef.h>
# include <common/defines.h>
# include <rapidjson/document.h>
@ -19,197 +17,142 @@ namespace DB
/// It provides ability to parse JSONs using rapidjson library.
struct RapidJSONParser
{
static constexpr bool need_preallocate = false;
void preallocate(size_t) {}
class Array;
class Object;
bool parse(const StringRef & json)
{
rapidjson::MemoryStream ms(json.data, json.size);
rapidjson::EncodedInputStream<rapidjson::UTF8<>, rapidjson::MemoryStream> is(ms);
document.ParseStream(is);
return !document.HasParseError() && (ms.Tell() == json.size);
}
struct Iterator
/// References an element in a JSON document, representing a JSON null, boolean, string, number,
/// array or object.
class Element
{
public:
Iterator() {}
Iterator(const rapidjson::Document & document_) : value(&document_) {}
Iterator(const Iterator & src)
: value(src.value)
, is_object_member(src.is_object_member)
, current_in_array(src.current_in_array)
, end_of_array(src.end_of_array) {}
ALWAYS_INLINE Element() {}
ALWAYS_INLINE Element(const rapidjson::Value & value_) : ptr(&value_) {}
Iterator & operator =(const Iterator & src)
ALWAYS_INLINE bool isInt64() const { return ptr->IsInt64(); }
ALWAYS_INLINE bool isUInt64() const { return ptr->IsUint64(); }
ALWAYS_INLINE bool isDouble() const { return ptr->IsDouble(); }
ALWAYS_INLINE bool isString() const { return ptr->IsString(); }
ALWAYS_INLINE bool isArray() const { return ptr->IsArray(); }
ALWAYS_INLINE bool isObject() const { return ptr->IsObject(); }
ALWAYS_INLINE bool isBool() const { return ptr->IsBool(); }
ALWAYS_INLINE bool isNull() const { return ptr->IsNull(); }
ALWAYS_INLINE Int64 getInt64() const { return ptr->GetInt64(); }
ALWAYS_INLINE UInt64 getUInt64() const { return ptr->GetUint64(); }
ALWAYS_INLINE double getDouble() const { return ptr->GetDouble(); }
ALWAYS_INLINE bool getBool() const { return ptr->GetBool(); }
ALWAYS_INLINE std::string_view getString() const { return {ptr->GetString(), ptr->GetStringLength()}; }
Array getArray() const;
Object getObject() const;
private:
const rapidjson::Value * ptr = nullptr;
};
/// References an array in a JSON document.
class Array
{
public:
class Iterator
{
value = src.value;
is_object_member = src.is_object_member;
current_in_array = src.current_in_array;
end_of_array = src.end_of_array;
return *this;
}
public:
ALWAYS_INLINE Iterator(const rapidjson::Value::ConstValueIterator & it_) : it(it_) {}
ALWAYS_INLINE Element operator*() const { return *it; }
ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; }
ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; }
ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return left.it == right.it; }
ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return !(left == right); }
private:
rapidjson::Value::ConstValueIterator it;
};
bool isInt64() const { return value->IsInt64(); }
bool isUInt64() const { return value->IsUint64(); }
bool isDouble() const { return value->IsDouble(); }
bool isBool() const { return value->IsBool(); }
bool isString() const { return value->IsString(); }
bool isArray() const { return value->IsArray(); }
bool isObject() const { return value->IsObject(); }
bool isNull() const { return value->IsNull(); }
ALWAYS_INLINE Array(const rapidjson::Value & value_) : ptr(&value_) {}
ALWAYS_INLINE Iterator begin() const { return ptr->Begin(); }
ALWAYS_INLINE Iterator end() const { return ptr->End(); }
ALWAYS_INLINE size_t size() const { return ptr->Size(); }
ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return *(ptr->Begin() + index); }
Int64 getInt64() const { return value->GetInt64(); }
UInt64 getUInt64() const { return value->GetUint64(); }
double getDouble() const { return value->GetDouble(); }
bool getBool() const { return value->GetBool(); }
StringRef getString() const { return {value->GetString(), value->GetStringLength()}; }
private:
const rapidjson::Value * ptr = nullptr;
};
size_t sizeOfArray() const { return value->Size(); }
using KeyValuePair = std::pair<std::string_view, Element>;
bool arrayElementByIndex(size_t index)
/// References an object in a JSON document.
class Object
{
public:
class Iterator
{
if (index >= value->Size())
public:
ALWAYS_INLINE Iterator(const rapidjson::Value::ConstMemberIterator & it_) : it(it_) {}
ALWAYS_INLINE KeyValuePair operator *() const { std::string_view key{it->name.GetString(), it->name.GetStringLength()}; return {key, it->value}; }
ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; }
ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; }
ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return left.it == right.it; }
ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return !(left == right); }
private:
rapidjson::Value::ConstMemberIterator it;
};
ALWAYS_INLINE Object(const rapidjson::Value & value_) : ptr(&value_) {}
ALWAYS_INLINE Iterator begin() const { return ptr->MemberBegin(); }
ALWAYS_INLINE Iterator end() const { return ptr->MemberEnd(); }
ALWAYS_INLINE size_t size() const { return ptr->MemberCount(); }
bool find(const std::string_view & key, Element & result) const
{
auto it = ptr->FindMember(rapidjson::StringRef(key.data(), key.length()));
if (it == ptr->MemberEnd())
return false;
setRange(value->Begin() + index, value->End());
value = current_in_array++;
result = it->value;
return true;
}
bool nextArrayElement()
/// Optional: Provides access to an object's element by index.
ALWAYS_INLINE KeyValuePair operator[](size_t index) const
{
if (current_in_array == end_of_array)
return false;
value = current_in_array++;
return true;
}
size_t sizeOfObject() const { return value->MemberCount(); }
bool objectMemberByIndex(size_t index)
{
if (index >= value->MemberCount())
return false;
setRange(value->MemberBegin() + index, value->MemberEnd());
value = &(current_in_object++)->value;
return true;
}
bool objectMemberByIndex(size_t index, StringRef & key)
{
if (index >= value->MemberCount())
return false;
setRange(value->MemberBegin() + index, value->MemberEnd());
key = getKeyImpl(current_in_object);
value = &(current_in_object++)->value;
return true;
}
bool objectMemberByName(const StringRef & name)
{
auto it = value->FindMember(name.data);
if (it == value->MemberEnd())
return false;
setRange(it, value->MemberEnd());
value = &(current_in_object++)->value;
return true;
}
bool nextObjectMember()
{
if (current_in_object == end_of_object)
return false;
value = &(current_in_object++)->value;
return true;
}
bool nextObjectMember(StringRef & key)
{
if (current_in_object == end_of_object)
return false;
key = getKeyImpl(current_in_object);
value = &(current_in_object++)->value;
return true;
}
bool isObjectMember() const { return is_object_member; }
StringRef getKey() const
{
return getKeyImpl(current_in_object - 1);
assert (index < size());
auto it = ptr->MemberBegin() + index;
std::string_view key{it->name.GetString(), it->name.GetStringLength()};
return {key, it->value};
}
private:
void setRange(rapidjson::Value::ConstValueIterator current, rapidjson::Value::ConstValueIterator end)
{
current_in_array = &*current;
end_of_array = &*end;
is_object_member = false;
}
void setRange(rapidjson::Value::ConstMemberIterator current, rapidjson::Value::ConstMemberIterator end)
{
current_in_object = &*current;
end_of_object = &*end;
is_object_member = true;
}
static StringRef getKeyImpl(const rapidjson::GenericMember<rapidjson::UTF8<>, rapidjson::MemoryPoolAllocator<>> * member)
{
const auto & name = member->name;
return {name.GetString(), name.GetStringLength()};
}
const rapidjson::Value * value = nullptr;
bool is_object_member = false;
union
{
const rapidjson::GenericMember<rapidjson::UTF8<>, rapidjson::MemoryPoolAllocator<>> * current_in_object;
const rapidjson::Value * current_in_array;
};
union
{
const rapidjson::GenericMember<rapidjson::UTF8<>, rapidjson::MemoryPoolAllocator<>> * end_of_object;
const rapidjson::Value * end_of_array;
};
const rapidjson::Value * ptr = nullptr;
};
Iterator getRoot() { return Iterator{document}; }
/// Parses a JSON document, returns the reference to its root element if succeeded.
bool parse(const std::string_view & json, Element & result)
{
rapidjson::MemoryStream ms(json.data(), json.size());
rapidjson::EncodedInputStream<rapidjson::UTF8<>, rapidjson::MemoryStream> is(ms);
document.ParseStream(is);
if (document.HasParseError() || (ms.Tell() != json.size()))
return false;
result = document;
return true;
}
static bool isInt64(const Iterator & it) { return it.isInt64(); }
static bool isUInt64(const Iterator & it) { return it.isUInt64(); }
static bool isDouble(const Iterator & it) { return it.isDouble(); }
static bool isBool(const Iterator & it) { return it.isBool(); }
static bool isString(const Iterator & it) { return it.isString(); }
static bool isArray(const Iterator & it) { return it.isArray(); }
static bool isObject(const Iterator & it) { return it.isObject(); }
static bool isNull(const Iterator & it) { return it.isNull(); }
static Int64 getInt64(const Iterator & it) { return it.getInt64(); }
static UInt64 getUInt64(const Iterator & it) { return it.getUInt64(); }
static double getDouble(const Iterator & it) { return it.getDouble(); }
static bool getBool(const Iterator & it) { return it.getBool(); }
static StringRef getString(const Iterator & it) { return it.getString(); }
static size_t sizeOfArray(const Iterator & it) { return it.sizeOfArray(); }
static bool firstArrayElement(Iterator & it) { return it.arrayElementByIndex(0); }
static bool arrayElementByIndex(Iterator & it, size_t index) { return it.arrayElementByIndex(index); }
static bool nextArrayElement(Iterator & it) { return it.nextArrayElement(); }
static size_t sizeOfObject(const Iterator & it) { return it.sizeOfObject(); }
static bool firstObjectMember(Iterator & it) { return it.objectMemberByIndex(0); }
static bool firstObjectMember(Iterator & it, StringRef & first_key) { return it.objectMemberByIndex(0, first_key); }
static bool objectMemberByIndex(Iterator & it, size_t index) { return it.objectMemberByIndex(index); }
static bool objectMemberByName(Iterator & it, const StringRef & name) { return it.objectMemberByName(name); }
static bool nextObjectMember(Iterator & it) { return it.nextObjectMember(); }
static bool nextObjectMember(Iterator & it, StringRef & next_key) { return it.nextObjectMember(next_key); }
static bool isObjectMember(const Iterator & it) { return it.isObjectMember(); }
static StringRef getKey(const Iterator & it) { return it.getKey(); }
#if 0
/// Optional: Allocates memory to parse JSON documents faster.
void reserve(size_t max_size);
#endif
private:
rapidjson::Document document;
};
inline ALWAYS_INLINE RapidJSONParser::Array RapidJSONParser::Element::getArray() const
{
return *ptr;
}
inline ALWAYS_INLINE RapidJSONParser::Object RapidJSONParser::Element::getObject() const
{
return *ptr;
}
}
#endif

View File

@ -7,9 +7,8 @@
#if USE_SIMDJSON
# include <Core/Types.h>
# include <Common/Exception.h>
# include <common/StringRef.h>
# include <simdjson/jsonparser.h>
# include <common/defines.h>
# include <simdjson.h>
namespace DB
@ -23,121 +22,147 @@ namespace ErrorCodes
/// It provides ability to parse JSONs using simdjson library.
struct SimdJSONParser
{
static constexpr bool need_preallocate = true;
class Array;
class Object;
void preallocate(size_t max_size)
/// References an element in a JSON document, representing a JSON null, boolean, string, number,
/// array or object.
class Element
{
if (!pj.allocate_capacity(max_size))
throw Exception{"Can not allocate memory for " + std::to_string(max_size) + " units when parsing JSON",
ErrorCodes::CANNOT_ALLOCATE_MEMORY};
}
public:
ALWAYS_INLINE Element() {}
ALWAYS_INLINE Element(const simdjson::dom::element & element_) : element(element_) {}
bool parse(const StringRef & json) { return !json_parse(json.data, json.size, pj); }
ALWAYS_INLINE bool isInt64() const { return element.type() == simdjson::dom::element_type::INT64; }
ALWAYS_INLINE bool isUInt64() const { return element.type() == simdjson::dom::element_type::UINT64; }
ALWAYS_INLINE bool isDouble() const { return element.type() == simdjson::dom::element_type::DOUBLE; }
ALWAYS_INLINE bool isString() const { return element.type() == simdjson::dom::element_type::STRING; }
ALWAYS_INLINE bool isArray() const { return element.type() == simdjson::dom::element_type::ARRAY; }
ALWAYS_INLINE bool isObject() const { return element.type() == simdjson::dom::element_type::OBJECT; }
ALWAYS_INLINE bool isBool() const { return element.type() == simdjson::dom::element_type::BOOL; }
ALWAYS_INLINE bool isNull() const { return element.type() == simdjson::dom::element_type::NULL_VALUE; }
using Iterator = simdjson::ParsedJson::Iterator;
Iterator getRoot() { return Iterator{pj}; }
ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().first; }
ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().first; }
ALWAYS_INLINE double getDouble() const { return element.get_double().first; }
ALWAYS_INLINE bool getBool() const { return element.get_bool().first; }
ALWAYS_INLINE std::string_view getString() const { return element.get_string().first; }
ALWAYS_INLINE Array getArray() const;
ALWAYS_INLINE Object getObject() const;
static bool isInt64(const Iterator & it) { return it.is_integer(); }
static bool isUInt64(const Iterator &) { return false; /* See https://github.com/lemire/simdjson/issues/68 */ }
static bool isDouble(const Iterator & it) { return it.is_double(); }
static bool isString(const Iterator & it) { return it.is_string(); }
static bool isArray(const Iterator & it) { return it.is_array(); }
static bool isObject(const Iterator & it) { return it.is_object(); }
static bool isBool(const Iterator & it) { return it.get_type() == 't' || it.get_type() == 'f'; }
static bool isNull(const Iterator & it) { return it.is_null(); }
private:
simdjson::dom::element element;
};
static Int64 getInt64(const Iterator & it) { return it.get_integer(); }
static UInt64 getUInt64(const Iterator &) { return 0; /* isUInt64() never returns true */ }
static double getDouble(const Iterator & it) { return it.get_double(); }
static bool getBool(const Iterator & it) { return it.get_type() == 't'; }
static StringRef getString(const Iterator & it) { return StringRef{it.get_string(), it.get_string_length()}; }
static size_t sizeOfArray(const Iterator & it)
/// References an array in a JSON document.
class Array
{
size_t size = 0;
Iterator it2 = it;
if (it2.down())
public:
class Iterator
{
do
++size;
while (it2.next());
}
return size;
}
public:
ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator & it_) : it(it_) {}
ALWAYS_INLINE Element operator*() const { return *it; }
ALWAYS_INLINE Iterator & operator++() { ++it; return *this; }
ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; }
ALWAYS_INLINE friend bool operator!=(const Iterator & left, const Iterator & right) { return left.it != right.it; }
ALWAYS_INLINE friend bool operator==(const Iterator & left, const Iterator & right) { return !(left != right); }
private:
simdjson::dom::array::iterator it;
};
static bool firstArrayElement(Iterator & it) { return it.down(); }
ALWAYS_INLINE Array(const simdjson::dom::array & array_) : array(array_) {}
ALWAYS_INLINE Iterator begin() const { return array.begin(); }
ALWAYS_INLINE Iterator end() const { return array.end(); }
ALWAYS_INLINE size_t size() const { return array.size(); }
ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); return array.at(index).first; }
static bool arrayElementByIndex(Iterator & it, size_t index)
private:
simdjson::dom::array array;
};
using KeyValuePair = std::pair<std::string_view, Element>;
/// References an object in a JSON document.
class Object
{
if (!it.down())
return false;
while (index--)
if (!it.next())
public:
class Iterator
{
public:
ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator & it_) : it(it_) {}
ALWAYS_INLINE KeyValuePair operator*() const { const auto & res = *it; return {res.key, res.value}; }
ALWAYS_INLINE Iterator & operator++() { ++it; return *this; }
ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; }
ALWAYS_INLINE friend bool operator!=(const Iterator & left, const Iterator & right) { return left.it != right.it; }
ALWAYS_INLINE friend bool operator==(const Iterator & left, const Iterator & right) { return !(left != right); }
private:
simdjson::dom::object::iterator it;
};
ALWAYS_INLINE Object(const simdjson::dom::object & object_) : object(object_) {}
ALWAYS_INLINE Iterator begin() const { return object.begin(); }
ALWAYS_INLINE Iterator end() const { return object.end(); }
ALWAYS_INLINE size_t size() const { return object.size(); }
bool find(const std::string_view & key, Element & result) const
{
auto x = object.at_key(key);
if (x.error())
return false;
result = x.first;
return true;
}
/// Optional: Provides access to an object's element by index.
KeyValuePair operator[](size_t index) const
{
assert(index < size());
auto it = object.begin();
while (index--)
++it;
const auto & res = *it;
return {res.key, res.value};
}
private:
simdjson::dom::object object;
};
/// Parses a JSON document, returns the reference to its root element if succeeded.
bool parse(const std::string_view & json, Element & result)
{
auto document = parser.parse(json.data(), json.size());
if (document.error())
return false;
result = document.first;
return true;
}
static bool nextArrayElement(Iterator & it) { return it.next(); }
static size_t sizeOfObject(const Iterator & it)
/// Optional: Allocates memory to parse JSON documents faster.
void reserve(size_t max_size)
{
size_t size = 0;
Iterator it2 = it;
if (it2.down())
{
do
++size;
while (it2.next() && it2.next()); //-V501
}
return size;
}
static bool firstObjectMember(Iterator & it) { return it.down() && it.next(); }
static bool firstObjectMember(Iterator & it, StringRef & first_key)
{
if (!it.down())
return false;
first_key.data = it.get_string();
first_key.size = it.get_string_length();
return it.next();
}
static bool objectMemberByIndex(Iterator & it, size_t index)
{
if (!it.down())
return false;
while (index--)
if (!it.next() || !it.next()) //-V501
return false;
return it.next();
}
static bool objectMemberByName(Iterator & it, const StringRef & name) { return it.move_to_key(name.data); }
static bool nextObjectMember(Iterator & it) { return it.next() && it.next(); } //-V501
static bool nextObjectMember(Iterator & it, StringRef & next_key)
{
if (!it.next())
return false;
next_key.data = it.get_string();
next_key.size = it.get_string_length();
return it.next();
}
static bool isObjectMember(const Iterator & it) { return it.get_scope_type() == '{'; }
static StringRef getKey(const Iterator & it)
{
Iterator it2 = it;
it2.prev();
return StringRef{it2.get_string(), it2.get_string_length()};
if (parser.allocate(max_size) != simdjson::error_code::SUCCESS)
throw Exception{"Couldn't allocate " + std::to_string(max_size) + " bytes when parsing JSON",
ErrorCodes::CANNOT_ALLOCATE_MEMORY};
}
private:
simdjson::ParsedJson pj;
simdjson::dom::parser parser;
};
inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const
{
return element.get_array().first;
}
inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const
{
return element.get_object().first;
}
}
#endif

View File

@ -233,24 +233,30 @@ namespace S3
/// Case when bucket name represented in domain name of S3 URL.
/// E.g. (https://bucket-name.s3.Region.amazonaws.com/key)
/// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#virtual-hosted-style-access
static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3[.\-][a-z0-9\-.:]+))");
static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3|cos)([.\-][a-z0-9\-.:]+))");
/// Case when bucket name and key represented in path of S3 URL.
/// E.g. (https://s3.Region.amazonaws.com/bucket-name/key)
/// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access
static const RE2 path_style_pattern("^/([^/]*)/(.*)");
static constexpr auto S3 = "S3";
static constexpr auto COSN = "COSN";
static constexpr auto COS = "COS";
uri = uri_;
storage_name = S3;
if (uri.getHost().empty())
throw Exception("Host is empty in S3 URI: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
String name;
String endpoint_authority_from_uri;
if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &endpoint_authority_from_uri))
if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri))
{
is_virtual_hosted_style = true;
endpoint = uri.getScheme() + "://" + endpoint_authority_from_uri;
endpoint = uri.getScheme() + "://" + name + endpoint_authority_from_uri;
/// S3 specification requires at least 3 and at most 63 characters in bucket name.
/// https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html
@ -262,6 +268,19 @@ namespace S3
key = uri.getPath().substr(1);
if (key.empty() || key == "/")
throw Exception("Key name is empty in virtual hosted style S3 URI: " + key + " (" + uri.toString() + ")", ErrorCodes::BAD_ARGUMENTS);
boost::to_upper(name);
if (name != S3 && name != COS)
{
throw Exception("Object storage system name is unrecognized in virtual hosted style S3 URI: " + name + " (" + uri.toString() + ")", ErrorCodes::BAD_ARGUMENTS);
}
if (name == S3)
{
storage_name = name;
}
else
{
storage_name = COSN;
}
}
else if (re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key))
{

View File

@ -69,6 +69,7 @@ struct URI
String endpoint;
String bucket;
String key;
String storage_name;
bool is_virtual_hosted_style;

View File

@ -89,18 +89,22 @@ inline void writeStringBinary(const std::string & s, WriteBuffer & buf)
buf.write(s.data(), s.size());
}
inline void writeStringBinary(const char * s, WriteBuffer & buf)
{
writeVarUInt(strlen(s), buf);
buf.write(s, strlen(s));
}
inline void writeStringBinary(const StringRef & s, WriteBuffer & buf)
{
writeVarUInt(s.size, buf);
buf.write(s.data, s.size);
}
inline void writeStringBinary(const char * s, WriteBuffer & buf)
{
writeStringBinary(StringRef{s}, buf);
}
inline void writeStringBinary(const std::string_view & s, WriteBuffer & buf)
{
writeStringBinary(StringRef{s}, buf);
}
template <typename T>
void writeVectorBinary(const std::vector<T> & v, WriteBuffer & buf)
@ -413,15 +417,19 @@ void writeAnyEscapedString(const char * begin, const char * end, WriteBuffer & b
}
inline void writeJSONString(const String & s, WriteBuffer & buf, const FormatSettings & settings)
inline void writeJSONString(const StringRef & s, WriteBuffer & buf, const FormatSettings & settings)
{
writeJSONString(s.data(), s.data() + s.size(), buf, settings);
writeJSONString(s.data, s.data + s.size, buf, settings);
}
inline void writeJSONString(const StringRef & ref, WriteBuffer & buf, const FormatSettings & settings)
inline void writeJSONString(const std::string_view & s, WriteBuffer & buf, const FormatSettings & settings)
{
writeJSONString(ref.data, ref.data + ref.size, buf, settings);
writeJSONString(StringRef{s}, buf, settings);
}
inline void writeJSONString(const String & s, WriteBuffer & buf, const FormatSettings & settings)
{
writeJSONString(StringRef{s}, buf, settings);
}

View File

@ -29,6 +29,13 @@ TEST(S3UriTest, validPatterns)
ASSERT_EQ("data", uri.key);
ASSERT_EQ(false, uri.is_virtual_hosted_style);
}
{
S3::URI uri(Poco::URI("https://bucketname.cos.ap-beijing.myqcloud.com/data"));
ASSERT_EQ("https://cos.ap-beijing.myqcloud.com", uri.endpoint);
ASSERT_EQ("bucketname", uri.bucket);
ASSERT_EQ("data", uri.key);
ASSERT_EQ(true, uri.is_virtual_hosted_style);
}
{
S3::URI uri(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data"));
ASSERT_EQ("https://s3.us-east-2.amazonaws.com", uri.endpoint);

View File

@ -71,10 +71,13 @@ void AnyInputMatcher::visit(ASTPtr & current_ast, Data data)
&& function_argument && function_argument->as<ASTFunction>())
{
auto name = function_node->name;
auto alias = function_node->alias;
///cut any or anyLast
if (!function_argument->as<ASTFunction>()->arguments->children.empty())
{
current_ast = function_argument->clone();
current_ast->setAlias(alias);
for (size_t i = 0; i < current_ast->as<ASTFunction>()->arguments->children.size(); ++i)
changeAllIdentifiers(current_ast, i, name);
}

View File

@ -62,6 +62,11 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
if (func.name == "equals")
{
if (func.arguments->children.size() != 2)
{
throwSyntaxException("Function 'equals' takes two arguments, got '"
+ func.formatForErrorMessage() + "' instead.");
}
ASTPtr left = func.arguments->children.at(0);
ASTPtr right = func.arguments->children.at(1);
auto table_numbers = getTableNumbers(ast, left, right, data);

View File

@ -0,0 +1,216 @@
#pragma once
#include <Functions/FunctionFactory.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTWithAlias.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/IAST.h>
#include <Common/typeid_cast.h>
namespace DB
{
namespace
{
/// @note We place strings in ascending order here under the assumption it colud speed up String to Enum conversion.
String makeStringsEnum(const std::set<String> & values)
{
String enum_string = "Enum8(";
if (values.size() >= 255)
enum_string = "Enum16(";
size_t number = 1;
for (const auto & item : values)
{
enum_string += "\'" + item + "\' = " + std::to_string(number++);
if (number <= values.size())
enum_string += ", ";
}
enum_string += ")";
return enum_string;
}
void changeIfArguments(ASTPtr & first, ASTPtr & second)
{
String first_value = first->as<ASTLiteral>()->value.get<NearestFieldType<String>>();
String second_value = second->as<ASTLiteral>()->value.get<NearestFieldType<String>>();
std::set<String> values;
values.insert(first_value);
values.insert(second_value);
String enum_string = makeStringsEnum(values);
auto enum_literal = std::make_shared<ASTLiteral>(enum_string);
auto first_cast = makeASTFunction("CAST");
first_cast->arguments->children.push_back(first);
first_cast->arguments->children.push_back(enum_literal);
auto second_cast = makeASTFunction("CAST");
second_cast->arguments->children.push_back(second);
second_cast->arguments->children.push_back(enum_literal);
first = first_cast;
second = second_cast;
}
void changeTransformArguments(ASTPtr & array_to, ASTPtr & other)
{
std::set<String> values;
for (const auto & item : array_to->as<ASTLiteral>()->value.get<NearestFieldType<Array>>())
values.insert(item.get<NearestFieldType<String>>());
values.insert(other->as<ASTLiteral>()->value.get<NearestFieldType<String>>());
String enum_string = makeStringsEnum(values);
auto array_cast = makeASTFunction("CAST");
array_cast->arguments->children.push_back(array_to);
array_cast->arguments->children.push_back(std::make_shared<ASTLiteral>("Array(" + enum_string + ")"));
array_to = array_cast;
auto other_cast = makeASTFunction("CAST");
other_cast->arguments->children.push_back(other);
other_cast->arguments->children.push_back(std::make_shared<ASTLiteral>(enum_string));
other = other_cast;
}
bool checkSameType(const Array & array, const String & type)
{
for (const auto & item : array)
if (item.getTypeName() != type)
return false;
return true;
}
}
struct FindUsedFunctionsMatcher
{
using Visitor = ConstInDepthNodeVisitor<FindUsedFunctionsMatcher, true>;
struct Data
{
const std::unordered_set<String> & names;
std::unordered_set<String> & used_functions;
std::vector<String> call_stack = {};
};
static bool needChildVisit(const ASTPtr & node, const ASTPtr &)
{
return !(node->as<ASTFunction>());
}
static void visit(const ASTPtr & ast, Data & data)
{
if (auto * func = ast->as<ASTFunction>())
visit(*func, data);
}
static void visit(const ASTFunction & func, Data & data)
{
if (data.names.count(func.name) && !data.call_stack.empty())
{
String alias = func.tryGetAlias();
if (!alias.empty())
{
data.used_functions.insert(alias);
}
}
data.call_stack.push_back(func.name);
/// Visit children with known call stack
Visitor(data).visit(func.arguments);
data.call_stack.pop_back();
}
};
using FindUsedFunctionsVisitor = FindUsedFunctionsMatcher::Visitor;
struct ConvertStringsToEnumMatcher
{
struct Data
{
std::unordered_set<String> & used_functions;
};
static bool needChildVisit(const ASTPtr & node, const ASTPtr &)
{
return !(node->as<ASTFunction>());
}
static void visit(ASTPtr & ast, Data & data)
{
if (auto * func = ast->as<ASTFunction>())
visit(*func, data);
}
static void visit(ASTFunction & function_node, Data & data)
{
if (!function_node.arguments)
return;
/// We are not sure we could change the type of function result
/// cause it is present in other fucntion as argument
if (data.used_functions.count(function_node.tryGetAlias()))
return;
if (function_node.name == "if")
{
if (function_node.arguments->children.size() != 2)
return;
auto literal1 = function_node.arguments->children[1]->as<ASTLiteral>();
auto literal2 = function_node.arguments->children[2]->as<ASTLiteral>();
if (!literal1 || !literal2)
return;
if (String(literal1->value.getTypeName()) != "String" ||
String(literal2->value.getTypeName()) != "String")
return;
changeIfArguments(function_node.arguments->children[1],
function_node.arguments->children[2]);
}
else if (function_node.name == "transform")
{
if (function_node.arguments->children.size() != 4)
return;
auto literal_to = function_node.arguments->children[2]->as<ASTLiteral>();
auto literal_other = function_node.arguments->children[3]->as<ASTLiteral>();
if (!literal_to || !literal_other)
return;
if (String(literal_to->value.getTypeName()) != "Array" ||
String(literal_other->value.getTypeName()) != "String")
return;
Array array_to = literal_to->value.get<NearestFieldType<Array>>();
if (array_to.size() == 0)
return;
bool to_strings = checkSameType(array_to, "String");
if (!to_strings)
return;
changeTransformArguments(function_node.arguments->children[2],
function_node.arguments->children[3]);
}
}
};
using ConvertStringsToEnumVisitor = InDepthNodeVisitor<ConvertStringsToEnumMatcher, true>;
}

View File

@ -47,6 +47,10 @@ void MarkTableIdentifiersMatcher::visit(const ASTFunction & func, ASTPtr &, Data
// First argument of dictGet can be a dictionary name, perhaps with a database.
if (functionIsJoinGet(func.name) || functionIsDictGet(func.name))
{
if (func.arguments->children.empty())
{
return;
}
auto & ast = func.arguments->children.at(0);
auto opt_name = tryGetIdentifierName(ast);
if (opt_name && !data.aliases.count(*opt_name))

View File

@ -465,7 +465,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
throw Exception("Unknown mutation command type: " + DB::toString<int>(command.type), ErrorCodes::UNKNOWN_MUTATION_COMMAND);
}
/// We cares about affected indices because we also need to rewrite them
/// We care about affected indices because we also need to rewrite them
/// when one of index columns updated or filtered with delete.
/// The same about colums, that are needed for calculation of TTL expressions.
if (!dependencies.empty())

View File

@ -72,6 +72,7 @@ struct StorageID
void assertNotEmpty() const
{
// Can be triggered by user input, e.g. SELECT joinGetOrNull('', 'num', 500)
if (empty())
throw Exception("Both table name and UUID are empty", ErrorCodes::UNKNOWN_TABLE);
if (table_name.empty() && !database_name.empty())

View File

@ -31,6 +31,7 @@
#include <Interpreters/RemoveInjectiveFunctionsVisitor.h>
#include <Interpreters/RedundantFunctionsInOrderByVisitor.h>
#include <Interpreters/MonotonicityCheckVisitor.h>
#include <Interpreters/ConvertStringsToEnumVisitor.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
@ -692,6 +693,18 @@ void optimizeInjectiveFunctionsInsideUniq(ASTPtr & query, const Context & contex
RemoveInjectiveFunctionsVisitor(data).visit(query);
}
void transformIfStringsIntoEnum(ASTPtr & query)
{
std::unordered_set<String> function_names = {"if", "transform"};
std::unordered_set<String> used_as_argument;
FindUsedFunctionsVisitor::Data used_data{function_names, used_as_argument};
FindUsedFunctionsVisitor(used_data).visit(query);
ConvertStringsToEnumVisitor::Data convert_data{used_as_argument};
ConvertStringsToEnumVisitor(convert_data).visit(query);
}
void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const ASTSelectQuery * select_query,
const NamesAndTypesList & source_columns, const NameSet & source_columns_set)
{
@ -1112,6 +1125,10 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect(
if (settings.optimize_monotonous_functions_in_order_by)
optimizeMonotonousFunctionsInOrderBy(select_query, context, tables_with_columns);
/// If function "if" has String-type arguments, transform them into enum
if (settings.optimize_if_transform_strings_to_enum)
transformIfStringsIntoEnum(query);
/// Remove duplicated elements from LIMIT BY clause.
optimizeLimitBy(select_query);

View File

@ -14,12 +14,15 @@
#include <Parsers/ParserWatchQuery.h>
#include <Parsers/ParserSetQuery.h>
#include <Parsers/ASTExplainQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ParserShowAccessEntitiesQuery.h>
#include <Parsers/ParserShowAccessQuery.h>
#include <Parsers/ParserShowCreateAccessEntityQuery.h>
#include <Parsers/ParserShowGrantsQuery.h>
#include <Parsers/ParserShowPrivilegesQuery.h>
#include <Parsers/ParserExplainQuery.h>
#include <Parsers/QueryWithOutputSettingsPushDownVisitor.h>
namespace DB
@ -107,15 +110,13 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
if (!parser_settings.parse(pos, query_with_output.settings_ast, expected))
return false;
query_with_output.children.push_back(query_with_output.settings_ast);
}
if (auto * ast = query->as<ASTExplainQuery>())
{
/// Set default format TSV, because output is a single string column.
if (!ast->format)
// SETTINGS after FORMAT is not parsed by the SELECT parser (ParserSelectQuery)
// Pass them manually, to apply in InterpreterSelectQuery::initSettings()
if (query->as<ASTSelectWithUnionQuery>())
{
ast->format = std::make_shared<ASTIdentifier>("TSV");
ast->children.push_back(ast->format);
QueryWithOutputSettingsPushDownVisitor::Data data{query_with_output.settings_ast};
QueryWithOutputSettingsPushDownVisitor(data).visit(query);
}
}

View File

@ -0,0 +1,56 @@
#include <Common/SettingsChanges.h>
#include <Parsers/QueryWithOutputSettingsPushDownVisitor.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTSubquery.h>
#include <iterator>
#include <algorithm>
namespace DB
{
bool QueryWithOutputSettingsPushDownMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child)
{
if (node->as<ASTSelectWithUnionQuery>())
return true;
if (node->as<ASTSubquery>())
return true;
if (child->as<ASTSelectQuery>())
return true;
return false;
}
void QueryWithOutputSettingsPushDownMatcher::visit(ASTPtr & ast, Data & data)
{
if (auto * select_query = ast->as<ASTSelectQuery>())
visit(*select_query, ast, data);
}
void QueryWithOutputSettingsPushDownMatcher::visit(ASTSelectQuery & select_query, ASTPtr &, Data & data)
{
ASTPtr select_settings_ast = select_query.settings();
if (!select_settings_ast)
{
select_query.setExpression(ASTSelectQuery::Expression::SETTINGS, data.settings_ast->clone());
return;
}
SettingsChanges & select_settings = select_settings_ast->as<ASTSetQuery &>().changes;
SettingsChanges & settings = data.settings_ast->as<ASTSetQuery &>().changes;
for (auto & setting : settings)
{
auto it = std::find_if(select_settings.begin(), select_settings.end(), [&](auto & select_setting)
{
return select_setting.name == setting.name;
});
if (it == select_settings.end())
select_settings.push_back(setting);
else
it->value = setting.value;
}
}
}

View File

@ -0,0 +1,39 @@
#pragma once
#include <Parsers/IAST.h>
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
class ASTSelectQuery;
struct SettingChange;
using SettingsChanges = std::vector<SettingChange>;
/// Pushdown SETTINGS clause that goes after FORMAT to the SELECT query:
/// (since settings after FORMAT parsed separatelly not in the ParserSelectQuery but in ParserQueryWithOutput)
///
/// SELECT 1 FORMAT Null SETTINGS max_block_size = 1 ->
/// SELECT 1 SETTINGS max_block_size = 1 FORMAT Null SETTINGS max_block_size = 1
///
/// Otherwise settings after FORMAT will not be applied.
class QueryWithOutputSettingsPushDownMatcher
{
public:
using Visitor = InDepthNodeVisitor<QueryWithOutputSettingsPushDownMatcher, true>;
struct Data
{
const ASTPtr & settings_ast;
};
static bool needChildVisit(ASTPtr & node, const ASTPtr & child);
static void visit(ASTPtr & ast, Data & data);
private:
static void visit(ASTSelectQuery &, ASTPtr &, Data &);
};
using QueryWithOutputSettingsPushDownVisitor = QueryWithOutputSettingsPushDownMatcher::Visitor;
}

View File

@ -116,6 +116,7 @@ SRCS(
parseUserName.cpp
queryToString.cpp
TokenIterator.cpp
QueryWithOutputSettingsPushDownVisitor.cpp
)
END()

View File

@ -231,9 +231,8 @@ void MergeSortingTransform::generate()
ProfileEvents::increment(ProfileEvents::ExternalSortMerge);
LOG_INFO(log, "There are {} temporary sorted parts to merge.", temporary_files.size());
if (!chunks.empty())
processors.emplace_back(std::make_shared<MergeSorterSource>(
header_without_constants, std::move(chunks), description, max_merged_block_size, limit));
processors.emplace_back(std::make_shared<MergeSorterSource>(
header_without_constants, std::move(chunks), description, max_merged_block_size, limit));
}
generated_prefix = true;

View File

@ -313,6 +313,18 @@ void TCPHandler::runImpl()
state.io.onException();
exception.emplace(Exception::CreateFromPocoTag{}, e);
}
// Server should die on std logic errors in debug, like with assert()
// or ErrorCodes::LOGICAL_ERROR. This helps catch these errors in
// tests.
#ifndef NDEBUG
catch (const std::logic_error & e)
{
state.io.onException();
exception.emplace(Exception::CreateFromSTDTag{}, e);
sendException(*exception, send_exception_with_stack_trace);
std::abort();
}
#endif
catch (const std::exception & e)
{
state.io.onException();

View File

@ -8,6 +8,7 @@
#include <IO/WriteHelpers.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/localBackup.h>
#include <Storages/MergeTree/checkDataPart.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/escapeForFileName.h>
#include <common/JSON.h>
@ -521,7 +522,18 @@ void IMergeTreeDataPart::loadChecksums(bool require)
if (require)
throw Exception("No checksums.txt in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
bytes_on_disk = calculateTotalSizeOnDisk(volume->getDisk(), getFullRelativePath());
/// If the checksums file is not present, calculate the checksums and write them to disk.
/// Check the data while we are at it.
LOG_WARNING(storage.log, "Checksums for part {} not found. Will calculate them from data on disk.", name);
checksums = checkDataPart(shared_from_this(), false);
{
auto out = volume->getDisk()->writeFile(getFullRelativePath() + "checksums.txt.tmp", 4096);
checksums.write(*out);
}
volume->getDisk()->moveFile(getFullRelativePath() + "checksums.txt.tmp", getFullRelativePath() + "checksums.txt");
bytes_on_disk = checksums.getTotalSizeOnDisk();
}
}

View File

@ -2453,19 +2453,6 @@ static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part)
part->loadColumnsChecksumsIndexes(false, true);
part->modification_time = disk->getLastModified(full_part_path).epochTime();
/// If the checksums file is not present, calculate the checksums and write them to disk.
/// Check the data while we are at it.
if (part->checksums.empty())
{
part->checksums = checkDataPart(part, false);
{
auto out = disk->writeFile(full_part_path + "checksums.txt.tmp", 4096);
part->checksums.write(*out);
}
disk->moveFile(full_part_path + "checksums.txt.tmp", full_part_path + "checksums.txt");
}
}
MergeTreeData::MutableDataPartPtr MergeTreeData::loadPartAndFixMetadata(const VolumePtr & volume, const String & relative_path) const
@ -3642,7 +3629,7 @@ bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, S
{
std::ostringstream message;
message << "Table can't create parts with adaptive granularity, but settings"
<< "min_rows_for_wide_part = " << settings.min_rows_for_wide_part
<< " min_rows_for_wide_part = " << settings.min_rows_for_wide_part
<< ", min_bytes_for_wide_part = " << settings.min_bytes_for_wide_part
<< ", min_rows_for_compact_part = " << settings.min_rows_for_compact_part
<< ", min_bytes_for_compact_part = " << settings.min_bytes_for_compact_part

View File

@ -46,7 +46,7 @@ MergedBlockOutputStream::MergedBlockOutputStream(
{
MergeTreeWriterSettings writer_settings(
storage.global_context.getSettings(),
storage.canUseAdaptiveGranularity(),
data_part->index_granularity_info.is_adaptive,
aio_threshold,
blocks_are_granules_size);

View File

@ -618,6 +618,26 @@ void StorageDistributed::shutdown()
std::lock_guard lock(cluster_nodes_mutex);
cluster_nodes_data.clear();
}
void StorageDistributed::drop()
{
// shutdown() should be already called
// and by the same reason we cannot use truncate() here, since
// cluster_nodes_data already cleaned
if (!cluster_nodes_data.empty())
throw Exception("drop called before shutdown", ErrorCodes::LOGICAL_ERROR);
// Distributed table w/o sharding_key does not allows INSERTs
if (relative_data_path.empty())
return;
LOG_DEBUG(log, "Removing pending blocks for async INSERT from filesystem on DROP TABLE");
auto disks = volume->getDisks();
for (const auto & disk : disks)
disk->removeRecursive(relative_data_path);
LOG_DEBUG(log, "Removed");
}
Strings StorageDistributed::getDataPaths() const
{
@ -636,11 +656,15 @@ void StorageDistributed::truncate(const ASTPtr &, const StorageMetadataPtr &, co
{
std::lock_guard lock(cluster_nodes_mutex);
LOG_DEBUG(log, "Removing pending blocks for async INSERT from filesystem on TRUNCATE TABLE");
for (auto it = cluster_nodes_data.begin(); it != cluster_nodes_data.end();)
{
it->second.shutdownAndDropAllData();
it = cluster_nodes_data.erase(it);
}
LOG_DEBUG(log, "Removed");
}
StoragePolicyPtr StorageDistributed::getStoragePolicy() const

View File

@ -95,6 +95,7 @@ public:
void startup() override;
void shutdown() override;
void drop() override;
Strings getDataPaths() const override;

View File

@ -204,6 +204,7 @@ StorageS3::StorageS3(
, format_name(format_name_)
, min_upload_part_size(min_upload_part_size_)
, compression_method(compression_method_)
, name(uri_.storage_name)
{
context_global.getRemoteHostFilter().checkURL(uri_.uri);
StorageInMemoryMetadata storage_metadata;
@ -329,9 +330,9 @@ BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const StorageMet
client, uri.bucket, uri.key);
}
void registerStorageS3(StorageFactory & factory)
void registerStorageS3Impl(const String & name, StorageFactory & factory)
{
factory.registerStorage("S3", [](const StorageFactory::Arguments & args)
factory.registerStorage(name, [](const StorageFactory::Arguments & args)
{
ASTs & engine_args = args.engine_args;
@ -371,6 +372,16 @@ void registerStorageS3(StorageFactory & factory)
});
}
void registerStorageS3(StorageFactory & factory)
{
return registerStorageS3Impl("S3", factory);
}
void registerStorageCOS(StorageFactory & factory)
{
return registerStorageS3Impl("COSN", factory);
}
NamesAndTypesList StorageS3::getVirtuals() const
{
return NamesAndTypesList{

View File

@ -22,7 +22,7 @@ namespace DB
* It sends HTTP GET to server when select is called and
* HTTP PUT when insert is called.
*/
class StorageS3 final : public ext::shared_ptr_helper<StorageS3>, public IStorage
class StorageS3 : public ext::shared_ptr_helper<StorageS3>, public IStorage
{
public:
StorageS3(const S3::URI & uri,
@ -38,7 +38,7 @@ public:
String getName() const override
{
return "S3";
return name;
}
Pipes read(
@ -62,6 +62,7 @@ private:
UInt64 min_upload_part_size;
String compression_method;
std::shared_ptr<Aws::S3::S3Client> client;
String name;
};
}

View File

@ -30,6 +30,7 @@ void registerStorages()
#if USE_AWS_S3
registerStorageS3(factory);
registerStorageCOS(factory);
#endif
#if USE_HDFS

View File

@ -30,6 +30,7 @@ void registerStorageGenerateRandom(StorageFactory & factory);
#if USE_AWS_S3
void registerStorageS3(StorageFactory & factory);
void registerStorageCOS(StorageFactory & factory);
#endif
#if USE_HDFS

View File

@ -85,7 +85,6 @@ StoragePtr TableFunctionS3::getStorage(
{
Poco::URI uri (source);
S3::URI s3_uri (uri);
UInt64 min_upload_part_size = global_context.getSettingsRef().s3_min_upload_part_size;
return StorageS3::create(
s3_uri,
@ -105,6 +104,11 @@ void registerTableFunctionS3(TableFunctionFactory & factory)
factory.registerFunction<TableFunctionS3>();
}
void registerTableFunctionCOS(TableFunctionFactory & factory)
{
factory.registerFunction<TableFunctionCOS>();
}
}
#endif

View File

@ -23,7 +23,7 @@ public:
return name;
}
private:
protected:
StoragePtr executeImpl(
const ASTPtr & ast_function,
const Context & context,
@ -42,6 +42,18 @@ private:
const char * getStorageTypeName() const override { return "S3"; }
};
class TableFunctionCOS : public TableFunctionS3
{
public:
static constexpr auto name = "cosn";
std::string getName() const override
{
return name;
}
private:
const char * getStorageTypeName() const override { return "COSN"; }
};
}
#endif

View File

@ -20,6 +20,7 @@ void registerTableFunctions()
#if USE_AWS_S3
registerTableFunctionS3(factory);
registerTableFunctionCOS(factory);
#endif
#if USE_HDFS

View File

@ -20,6 +20,7 @@ void registerTableFunctionGenerate(TableFunctionFactory & factory);
#if USE_AWS_S3
void registerTableFunctionS3(TableFunctionFactory & factory);
void registerTableFunctionCOS(TableFunctionFactory & factory);
#endif
#if USE_HDFS

View File

@ -6,6 +6,9 @@ cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', with_zookeeper=True)
node2 = cluster.add_instance('node2', with_zookeeper=True)
# no adaptive granularity by default
node3 = cluster.add_instance('node3', image='yandex/clickhouse-server:19.9.5.36', with_installed_binary=True, stay_alive=True)
@pytest.fixture(scope="module")
def start_cluster():
try:
@ -47,3 +50,29 @@ def test_attach_detach(start_cluster):
assert node1.query("SELECT COUNT() FROM test") == "4\n"
assert node2.query("SELECT COUNT() FROM test") == "4\n"
def test_mutate_with_mixed_granularity(start_cluster):
node3.query("""
CREATE TABLE test (date Date, key UInt64, value1 String, value2 String)
ENGINE = MergeTree
ORDER BY key PARTITION BY date""")
node3.query("INSERT INTO test SELECT toDate('2019-10-01') + number % 5, number, toString(number), toString(number * number) FROM numbers(500)")
assert node3.query("SELECT COUNT() FROM test") == "500\n"
node3.restart_with_latest_version()
assert node3.query("SELECT COUNT() FROM test") == "500\n"
node3.query("ALTER TABLE test MODIFY SETTING enable_mixed_granularity_parts = 1")
node3.query("INSERT INTO test SELECT toDate('2019-10-01') + number % 5, number, toString(number), toString(number * number) FROM numbers(500, 500)")
assert node3.query("SELECT COUNT() FROM test") == "1000\n"
assert node3.query("SELECT COUNT() FROM test WHERE key % 100 == 0") == "10\n"
node3.query("ALTER TABLE test DELETE WHERE key % 100 == 0", settings={"mutations_sync": "2"})
assert node3.query("SELECT COUNT() FROM test WHERE key % 100 == 0") == "0\n"

View File

@ -0,0 +1,39 @@
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1')
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_attach_without_checksums(start_cluster):
node1.query("CREATE TABLE test (date Date, key Int32, value String) Engine=MergeTree ORDER BY key PARTITION by date")
node1.query("INSERT INTO test SELECT toDate('2019-10-01'), number, toString(number) FROM numbers(100)")
assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n"
node1.query("ALTER TABLE test DETACH PARTITION '2019-10-01'")
assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "0\n"
assert node1.query("SELECT COUNT() FROM test") == "0\n"
# to be sure output not empty
node1.exec_in_container(['bash', '-c', 'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" | grep -e ".*" '], privileged=True, user='root')
node1.exec_in_container(['bash', '-c', 'find /var/lib/clickhouse/data/default/test/detached -name "checksums.txt" -delete'], privileged=True, user='root')
node1.query("ALTER TABLE test ATTACH PARTITION '2019-10-01'")
assert node1.query("SELECT COUNT() FROM test WHERE key % 10 == 0") == "10\n"
assert node1.query("SELECT COUNT() FROM test") == "100\n"

Some files were not shown because too many files have changed in this diff Show More