Merge branch 'master' into pp-system-unfreeze

This commit is contained in:
Antonio Andelic 2022-06-13 14:46:30 +00:00
commit b9cf6fe367
247 changed files with 5671 additions and 1305 deletions

View File

@ -13,9 +13,7 @@ max-statements=200
ignore-long-lines = (# )?<?https?://\S+>?$
[MESSAGES CONTROL]
disable = bad-continuation,
missing-docstring,
bad-whitespace,
disable = missing-docstring,
too-few-public-methods,
invalid-name,
too-many-arguments,

View File

@ -9,7 +9,7 @@ std::string errnoToString(int code, int the_errno)
char buf[buf_size];
#ifndef _GNU_SOURCE
int rc = strerror_r(the_errno, buf, buf_size);
#ifdef __APPLE__
#ifdef OS_DARWIN
if (rc != 0 && rc != EINVAL)
#else
if (rc != 0)

View File

@ -16,7 +16,7 @@ uint64_t getAvailableMemoryAmountOrZero()
{
#if defined(_SC_PHYS_PAGES) // linux
return getPageSize() * sysconf(_SC_PHYS_PAGES);
#elif defined(__FreeBSD__)
#elif defined(OS_FREEBSD)
struct vmtotal vmt;
size_t vmt_size = sizeof(vmt);
if (sysctlbyname("vm.vmtotal", &vmt, &vmt_size, NULL, 0) == 0)

View File

@ -6,7 +6,7 @@
#include <base/defines.h>
#if defined(__linux__) && !defined(THREAD_SANITIZER) && !defined(USE_MUSL)
#if defined(OS_LINUX) && !defined(THREAD_SANITIZER) && !defined(USE_MUSL)
#define USE_PHDR_CACHE 1
#endif

View File

@ -705,3 +705,109 @@ target_compile_options(_crypto PRIVATE -Wno-gnu-anonymous-struct)
add_library(OpenSSL::Crypto ALIAS _crypto)
add_library(OpenSSL::SSL ALIAS _ssl)
# Helper function used in the populate_openssl_vars function below
function(from_hex HEX DEC)
string(TOUPPER "${HEX}" HEX)
set(_res 0)
string(LENGTH "${HEX}" _strlen)
while (_strlen GREATER 0)
math(EXPR _res "${_res} * 16")
string(SUBSTRING "${HEX}" 0 1 NIBBLE)
string(SUBSTRING "${HEX}" 1 -1 HEX)
if (NIBBLE STREQUAL "A")
math(EXPR _res "${_res} + 10")
elseif (NIBBLE STREQUAL "B")
math(EXPR _res "${_res} + 11")
elseif (NIBBLE STREQUAL "C")
math(EXPR _res "${_res} + 12")
elseif (NIBBLE STREQUAL "D")
math(EXPR _res "${_res} + 13")
elseif (NIBBLE STREQUAL "E")
math(EXPR _res "${_res} + 14")
elseif (NIBBLE STREQUAL "F")
math(EXPR _res "${_res} + 15")
else ()
math(EXPR _res "${_res} + ${NIBBLE}")
endif ()
string(LENGTH "${HEX}" _strlen)
endwhile ()
set(${DEC} ${_res} PARENT_SCOPE)
endfunction()
# ClickHouse uses BoringSSL which is a fork of OpenSSL.
# This populates CMAKE var OPENSSL_VERSION from the OPENSSL_VERSION_NUMBER defined
# in contrib/boringssl/include/openssl/base.h. It also sets the CMAKE var OPENSSL_IS_BORING_SSL
# if it's defined in the file. Both OPENSSL_VERSION and OPENSSL_IS_BORING_SSL variables will be
# used to populate flags in the `system.build_options` table for more context on ssl version used.
# This cmake script is adopted from FindOpenSSL cmake module and slightly modified for this use-case .
if (EXISTS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h")
file(STRINGS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h" openssl_version_str
REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])+.*")
file(STRINGS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h" openssl_is_boringssl
REGEX "^#[\t ]*define[\t ]+OPENSSL_IS_BORINGSSL.*")
# Set to true if OPENSSL_IS_BORING_SSL is defined
if (openssl_is_boringssl)
set(OPENSSL_IS_BORING_SSL 1)
endif ()
# If openssl_version_str is defined extrapolate and set OPENSSL_VERSION
if (openssl_version_str)
# The version number is encoded as 0xMNNFFPPS: major minor fix patch status
# The status gives if this is a developer or prerelease and is ignored here.
# Major, minor, and fix directly translate into the version numbers shown in
# the string. The patch field translates to the single character suffix that
# indicates the bug fix state, which 00 -> nothing, 01 -> a, 02 -> b and so
# on.
string(REGEX REPLACE "^.*OPENSSL_VERSION_NUMBER[\t ]+0x([0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F][0-9a-fA-F])([0-9a-fA-F]).*$"
"\\1;\\2;\\3;\\4;\\5" OPENSSL_VERSION_LIST "${openssl_version_str}")
list(GET OPENSSL_VERSION_LIST 0 OPENSSL_VERSION_MAJOR)
list(GET OPENSSL_VERSION_LIST 1 OPENSSL_VERSION_MINOR)
from_hex("${OPENSSL_VERSION_MINOR}" OPENSSL_VERSION_MINOR)
list(GET OPENSSL_VERSION_LIST 2 OPENSSL_VERSION_FIX)
from_hex("${OPENSSL_VERSION_FIX}" OPENSSL_VERSION_FIX)
list(GET OPENSSL_VERSION_LIST 3 OPENSSL_VERSION_PATCH)
if (NOT OPENSSL_VERSION_PATCH STREQUAL "00")
from_hex("${OPENSSL_VERSION_PATCH}" _tmp)
# 96 is the ASCII code of 'a' minus 1
math(EXPR OPENSSL_VERSION_PATCH_ASCII "${_tmp} + 96")
unset(_tmp)
# Once anyone knows how OpenSSL would call the patch versions beyond 'z'
# this should be updated to handle that, too. This has not happened yet
# so it is simply ignored here for now.
string(ASCII "${OPENSSL_VERSION_PATCH_ASCII}" OPENSSL_VERSION_PATCH_STRING)
endif ()
set(OPENSSL_VERSION "${OPENSSL_VERSION_MAJOR}.${OPENSSL_VERSION_MINOR}.${OPENSSL_VERSION_FIX}${OPENSSL_VERSION_PATCH_STRING}")
else ()
# Since OpenSSL 3.0.0, the new version format is MAJOR.MINOR.PATCH and
# a new OPENSSL_VERSION_STR macro contains exactly that
file(STRINGS "${BORINGSSL_SOURCE_DIR}/include/openssl/base.h" OPENSSL_VERSION_STR
REGEX "^#[\t ]*define[\t ]+OPENSSL_VERSION_STR[\t ]+\"([0-9])+\\.([0-9])+\\.([0-9])+\".*")
string(REGEX REPLACE "^.*OPENSSL_VERSION_STR[\t ]+\"([0-9]+\\.[0-9]+\\.[0-9]+)\".*$"
"\\1" OPENSSL_VERSION_STR "${OPENSSL_VERSION_STR}")
set(OPENSSL_VERSION "${OPENSSL_VERSION_STR}")
# Setting OPENSSL_VERSION_MAJOR OPENSSL_VERSION_MINOR and OPENSSL_VERSION_FIX
string(REGEX MATCHALL "([0-9])+" OPENSSL_VERSION_NUMBER "${OPENSSL_VERSION}")
list(POP_FRONT OPENSSL_VERSION_NUMBER
OPENSSL_VERSION_MAJOR
OPENSSL_VERSION_MINOR
OPENSSL_VERSION_FIX)
unset(OPENSSL_VERSION_NUMBER)
unset(OPENSSL_VERSION_STR)
endif ()
endif ()
# Set CMAKE variables so that they can be referenced properly from everywhere
set(OPENSSL_VERSION "${OPENSSL_VERSION}" CACHE INTERNAL "")
set(OPENSSL_IS_BORING_SSL "${OPENSSL_IS_BORING_SSL}" CACHE INTERNAL 0)

View File

@ -1,15 +1,16 @@
# rebuild in #36968
# docker build -t clickhouse/docs-builder .
# nodejs 17 prefers ipv6 and is broken in our environment
FROM node:16.14.2-alpine3.15
FROM node:16-alpine
RUN apk add --no-cache git openssh bash
# TODO: clean before merge!
ARG DOCS_BRANCH=main
# At this point we want to really update /opt/clickhouse-docs
# despite the cached images
ARG CACHE_INVALIDATOR=0
RUN git clone https://github.com/ClickHouse/clickhouse-docs.git \
--depth=1 --branch=${DOCS_BRANCH} /opt/clickhouse-docs
--depth=1 --branch=main /opt/clickhouse-docs
WORKDIR /opt/clickhouse-docs

View File

@ -8,8 +8,6 @@ if [ "$GIT_DOCS_BRANCH" ] && ! [ "$GIT_DOCS_BRANCH" == "$GIT_BRANCH" ]; then
git fetch origin --depth=1 -- "$GIT_DOCS_BRANCH:$GIT_DOCS_BRANCH"
git checkout "$GIT_DOCS_BRANCH"
else
# Untracked yarn.lock could cause pull to fail
git clean -fdx
# Update docs repo
git pull
fi

View File

@ -42,6 +42,7 @@ DATA_DIR="${CLICKHOUSE_DATA_DIR:-/var/lib/clickhouse}"
LOG_DIR="${LOG_DIR:-/var/log/clickhouse-keeper}"
LOG_PATH="${LOG_DIR}/clickhouse-keeper.log"
ERROR_LOG_PATH="${LOG_DIR}/clickhouse-keeper.err.log"
COORDINATION_DIR="${DATA_DIR}/coordination"
COORDINATION_LOG_DIR="${DATA_DIR}/coordination/log"
COORDINATION_SNAPSHOT_DIR="${DATA_DIR}/coordination/snapshots"
CLICKHOUSE_WATCHDOG_ENABLE=${CLICKHOUSE_WATCHDOG_ENABLE:-0}
@ -49,6 +50,7 @@ CLICKHOUSE_WATCHDOG_ENABLE=${CLICKHOUSE_WATCHDOG_ENABLE:-0}
for dir in "$DATA_DIR" \
"$LOG_DIR" \
"$TMP_DIR" \
"$COORDINATION_DIR" \
"$COORDINATION_LOG_DIR" \
"$COORDINATION_SNAPSHOT_DIR"
do

View File

@ -8,16 +8,16 @@ ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
aspell \
curl \
git \
libxml2-utils \
moreutils \
pylint \
python3-fuzzywuzzy \
python3-pip \
shellcheck \
yamllint \
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff
&& pip3 install black boto3 codespell dohq-artifactory PyGithub unidiff pylint==2.6.2
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH

View File

@ -18,6 +18,7 @@ def process_result(result_folder):
("typos", "typos_output.txt"),
("whitespaces", "whitespaces_output.txt"),
("workflows", "workflows_output.txt"),
("doc typos", "doc_spell_output.txt"),
)
for name, out_file in checks:

View File

@ -11,6 +11,8 @@ echo "Check python formatting with black" | ts
./check-black -n |& tee /test_output/black_output.txt
echo "Check typos" | ts
./check-typos |& tee /test_output/typos_output.txt
echo "Check docs spelling" | ts
./check-doc-aspell |& tee /test_output/doc_spell_output.txt
echo "Check whitespaces" | ts
./check-whitespaces -n |& tee /test_output/whitespaces_output.txt
echo "Check workflows" | ts

View File

@ -138,7 +138,7 @@ It's important to name tests correctly, so one could turn some tests subset off
| Tester flag| What should be in test name | When flag should be added |
|---|---|---|---|
| `--[no-]zookeeper`| "zookeeper" or "replica" | Test uses tables from ReplicatedMergeTree family |
| `--[no-]zookeeper`| "zookeeper" or "replica" | Test uses tables from `ReplicatedMergeTree` family |
| `--[no-]shard` | "shard" or "distributed" or "global"| Test using connections to 127.0.0.2 or similar |
| `--[no-]long` | "long" or "deadlock" or "race" | Test runs longer than 60 seconds |

View File

@ -5,7 +5,7 @@ sidebar_position: 62
# Overview of ClickHouse Architecture
ClickHouse is a true column-oriented DBMS. Data is stored by columns, and during the execution of arrays (vectors or chunks of columns).
ClickHouse is a true column-oriented DBMS. Data is stored by columns, and during the execution of arrays (vectors or chunks of columns).
Whenever possible, operations are dispatched on arrays, rather than on individual values. It is called “vectorized query execution” and it helps lower the cost of actual data processing.
> This idea is nothing new. It dates back to the `APL` (A programming language, 1957) and its descendants: `A +` (APL dialect), `J` (1990), `K` (1993), and `Q` (programming language from Kx Systems, 2003). Array programming is used in scientific data processing. Neither is this idea something new in relational databases: for example, it is used in the `VectorWise` system (also known as Actian Vector Analytic Database by Actian Corporation).
@ -149,13 +149,13 @@ The server implements several different interfaces:
- A TCP interface for the native ClickHouse client and for cross-server communication during distributed query execution.
- An interface for transferring data for replication.
Internally, it is just a primitive multithreaded server without coroutines or fibers. Since the server is not designed to process a high rate of simple queries but to process a relatively low rate of complex queries, each of them can process a vast amount of data for analytics.
Internally, it is just a primitive multithread server without coroutines or fibers. Since the server is not designed to process a high rate of simple queries but to process a relatively low rate of complex queries, each of them can process a vast amount of data for analytics.
The server initializes the `Context` class with the necessary environment for query execution: the list of available databases, users and access rights, settings, clusters, the process list, the query log, and so on. Interpreters use this environment.
We maintain full backward and forward compatibility for the server TCP protocol: old clients can talk to new servers, and new clients can talk to old servers. But we do not want to maintain it eternally, and we are removing support for old versions after about one year.
:::note
:::note
For most external applications, we recommend using the HTTP interface because it is simple and easy to use. The TCP protocol is more tightly linked to internal data structures: it uses an internal format for passing blocks of data, and it uses custom framing for compressed data. We havent released a C library for that protocol because it requires linking most of the ClickHouse codebase, which is not practical.
:::
@ -178,7 +178,7 @@ To execute queries and do side activities ClickHouse allocates threads from one
Server pool is a `Poco::ThreadPool` class instance defined in `Server::main()` method. It can have at most `max_connection` threads. Every thread is dedicated to a single active connection.
Global thread pool is `GlobalThreadPool` singleton class. To allocate thread from it `ThreadFromGlobalPool` is used. It has an interface similar to `std::thread`, but pulls thread from the global pool and does all necessary initializations. It is configured with the following settings:
Global thread pool is `GlobalThreadPool` singleton class. To allocate thread from it `ThreadFromGlobalPool` is used. It has an interface similar to `std::thread`, but pulls thread from the global pool and does all necessary initialization. It is configured with the following settings:
* `max_thread_pool_size` - limit on thread count in pool.
* `max_thread_pool_free_size` - limit on idle thread count waiting for new jobs.
* `thread_pool_queue_size` - limit on scheduled job count.
@ -189,7 +189,7 @@ IO thread pool is implemented as a plain `ThreadPool` accessible via `IOThreadPo
For periodic task execution there is `BackgroundSchedulePool` class. You can register tasks using `BackgroundSchedulePool::TaskHolder` objects and the pool ensures that no task runs two jobs at the same time. It also allows you to postpone task execution to a specific instant in the future or temporarily deactivate task. Global `Context` provides a few instances of this class for different purposes. For general purpose tasks `Context::getSchedulePool()` is used.
There are also specialized thread pools for preemptable tasks. Such `IExecutableTask` task can be split into ordered sequence of jobs, called steps. To schedule these tasks in a manner allowing short tasks to be prioritied over long ones `MergeTreeBackgroundExecutor` is used. As name suggests it is used for background MergeTree related operations such as merges, mutations, fetches and moves. Pool instances are available using `Context::getCommonExecutor()` and other similar methods.
There are also specialized thread pools for preemptable tasks. Such `IExecutableTask` task can be split into ordered sequence of jobs, called steps. To schedule these tasks in a manner allowing short tasks to be prioritized over long ones `MergeTreeBackgroundExecutor` is used. As name suggests it is used for background MergeTree related operations such as merges, mutations, fetches and moves. Pool instances are available using `Context::getCommonExecutor()` and other similar methods.
No matter what pool is used for a job, at start `ThreadStatus` instance is created for this job. It encapsulates all per-thread information: thread id, query id, performance counters, resource consumption and many other useful data. Job can access it via thread local pointer by `CurrentThread::get()` call, so we do not need to pass it to every function.
@ -201,7 +201,7 @@ Servers in a cluster setup are mostly independent. You can create a `Distributed
Things become more complicated when you have subqueries in IN or JOIN clauses, and each of them uses a `Distributed` table. We have different strategies for the execution of these queries.
There is no global query plan for distributed query execution. Each node has its local query plan for its part of the job. We only have simple one-pass distributed query execution: we send queries for remote nodes and then merge the results. But this is not feasible for complicated queries with high cardinality GROUP BYs or with a large amount of temporary data for JOIN. In such cases, we need to “reshuffle” data between servers, which requires additional coordination. ClickHouse does not support that kind of query execution, and we need to work on it.
There is no global query plan for distributed query execution. Each node has its local query plan for its part of the job. We only have simple one-pass distributed query execution: we send queries for remote nodes and then merge the results. But this is not feasible for complicated queries with high cardinality `GROUP BY`s or with a large amount of temporary data for JOIN. In such cases, we need to “reshuffle” data between servers, which requires additional coordination. ClickHouse does not support that kind of query execution, and we need to work on it.
## Merge Tree {#merge-tree}
@ -231,7 +231,7 @@ Replication is physical: only compressed parts are transferred between nodes, no
Besides, each replica stores its state in ZooKeeper as the set of parts and its checksums. When the state on the local filesystem diverges from the reference state in ZooKeeper, the replica restores its consistency by downloading missing and broken parts from other replicas. When there is some unexpected or broken data in the local filesystem, ClickHouse does not remove it, but moves it to a separate directory and forgets it.
:::note
:::note
The ClickHouse cluster consists of independent shards, and each shard consists of replicas. The cluster is **not elastic**, so after adding a new shard, data is not rebalanced between shards automatically. Instead, the cluster load is supposed to be adjusted to be uneven. This implementation gives you more control, and it is ok for relatively small clusters, such as tens of nodes. But for clusters with hundreds of nodes that we are using in production, this approach becomes a significant drawback. We should implement a table engine that spans across the cluster with dynamically replicated regions that could be split and balanced between clusters automatically.
:::

View File

@ -4,7 +4,7 @@ sidebar_label: Build on Mac OS X
description: How to build ClickHouse on Mac OS X
---
# How to Build ClickHouse on Mac OS X
# How to Build ClickHouse on Mac OS X
:::info You don't have to build ClickHouse yourself!
You can install pre-built ClickHouse as described in [Quick Start](https://clickhouse.com/#quick-start). Follow **macOS (Intel)** or **macOS (Apple silicon)** installation instructions.
@ -20,9 +20,9 @@ It is also possible to compile with Apple's XCode `apple-clang` or Homebrew's `g
First install [Homebrew](https://brew.sh/)
## For Apple's Clang (discouraged): Install Xcode and Command Line Tools {#install-xcode-and-command-line-tools}
## For Apple's Clang (discouraged): Install XCode and Command Line Tools {#install-xcode-and-command-line-tools}
Install the latest [Xcode](https://apps.apple.com/am/app/xcode/id497799835?mt=12) from App Store.
Install the latest [XCode](https://apps.apple.com/am/app/xcode/id497799835?mt=12) from App Store.
Open it at least once to accept the end-user license agreement and automatically install the required components.
@ -62,7 +62,7 @@ cmake --build build
# The resulting binary will be created at: build/programs/clickhouse
```
To build using Xcode's native AppleClang compiler in Xcode IDE (this option is only for development builds and workflows, and is **not recommended** unless you know what you are doing):
To build using XCode native AppleClang compiler in XCode IDE (this option is only for development builds and workflows, and is **not recommended** unless you know what you are doing):
``` bash
cd ClickHouse
@ -71,7 +71,7 @@ mkdir build
cd build
XCODE_IDE=1 ALLOW_APPLECLANG=1 cmake -G Xcode -DCMAKE_BUILD_TYPE=Debug -DENABLE_JEMALLOC=OFF ..
cmake --open .
# ...then, in Xcode IDE select ALL_BUILD scheme and start the building process.
# ...then, in XCode IDE select ALL_BUILD scheme and start the building process.
# The resulting binary will be created at: ./programs/Debug/clickhouse
```
@ -91,9 +91,9 @@ cmake --build build
## Caveats {#caveats}
If you intend to run `clickhouse-server`, make sure to increase the systems maxfiles variable.
If you intend to run `clickhouse-server`, make sure to increase the systems `maxfiles` variable.
:::note
:::note
Youll need to use sudo.
:::

View File

@ -19,7 +19,7 @@ The following tutorial is based on the Ubuntu Linux system. With appropriate cha
### Install Git, CMake, Python and Ninja {#install-git-cmake-python-and-ninja}
``` bash
sudo apt-get install git cmake python ninja-build
sudo apt-get install git cmake ccache python3 ninja-build
```
Or cmake3 instead of cmake on older systems.
@ -130,7 +130,7 @@ Here is an example of how to install the new `cmake` from the official website:
```
wget https://github.com/Kitware/CMake/releases/download/v3.22.2/cmake-3.22.2-linux-x86_64.sh
chmod +x cmake-3.22.2-linux-x86_64.sh
./cmake-3.22.2-linux-x86_64.sh
./cmake-3.22.2-linux-x86_64.sh
export PATH=/home/milovidov/work/cmake-3.22.2-linux-x86_64/bin/:${PATH}
hash cmake
```
@ -163,7 +163,7 @@ ClickHouse is available in pre-built binaries and packages. Binaries are portabl
They are built for stable, prestable and testing releases as long as for every commit to master and for every pull request.
To find the freshest build from `master`, go to [commits page](https://github.com/ClickHouse/ClickHouse/commits/master), click on the first green checkmark or red cross near commit, and click to the “Details” link right after “ClickHouse Build Check”.
To find the freshest build from `master`, go to [commits page](https://github.com/ClickHouse/ClickHouse/commits/master), click on the first green check mark or red cross near commit, and click to the “Details” link right after “ClickHouse Build Check”.
## Faster builds for development: Split build configuration {#split-build}

View File

@ -19,7 +19,7 @@ cmake .. \
## CMake files types
1. ClickHouse's source CMake files (located in the root directory and in /src).
1. ClickHouse source CMake files (located in the root directory and in /src).
2. Arch-dependent CMake files (located in /cmake/*os_name*).
3. Libraries finders (search for contrib libraries, located in /contrib/*/CMakeLists.txt).
4. Contrib build CMake files (used instead of libraries' own CMake files, located in /cmake/modules)
@ -456,7 +456,7 @@ option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests"
#### If the option's state could produce unwanted (or unusual) result, explicitly warn the user.
Suppose you have an option that may strip debug symbols from the ClickHouse's part.
Suppose you have an option that may strip debug symbols from the ClickHouse part.
This can speed up the linking process, but produces a binary that cannot be debugged.
In that case, prefer explicitly raising a warning telling the developer that he may be doing something wrong.
Also, such options should be disabled if applies.

View File

@ -31,7 +31,7 @@ If you are not sure what to do, ask a maintainer for help.
## Merge With Master
Verifies that the PR can be merged to master. If not, it will fail with the
message 'Cannot fetch mergecommit'. To fix this check, resolve the conflict as
message `Cannot fetch mergecommit`. To fix this check, resolve the conflict as
described in the [GitHub
documentation](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/resolving-a-merge-conflict-on-github),
or merge the `master` branch to your pull request branch using git.
@ -57,7 +57,7 @@ You have to specify a changelog category for your change (e.g., Bug Fix), and
write a user-readable message describing the change for [CHANGELOG.md](../whats-new/changelog/)
## Push To Dockerhub
## Push To DockerHub
Builds docker images used for build and tests, then pushes them to DockerHub.
@ -118,7 +118,7 @@ Builds ClickHouse in various configurations for use in further steps. You have t
- **Compiler**: `gcc-9` or `clang-10` (or `clang-10-xx` for other architectures e.g. `clang-10-freebsd`).
- **Build type**: `Debug` or `RelWithDebInfo` (cmake).
- **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan).
- **Splitted** `splitted` is a [split build](../development/build.md#split-build)
- **Split** `splitted` is a [split build](../development/build.md#split-build)
- **Status**: `success` or `fail`
- **Build log**: link to the building and files copying log, useful when build failed.
- **Build time**.

View File

@ -96,9 +96,9 @@ SELECT library_name, license_type, license_path FROM system.licenses ORDER BY li
## Adding new third-party libraries and maintaining patches in third-party libraries {#adding-third-party-libraries}
1. Each third-party libary must reside in a dedicated directory under the `contrib/` directory of the ClickHouse repository. Avoid dumps/copies of external code, instead use Git's submodule feature to pull third-party code from an external upstream repository.
2. Submodules are listed in `.gitmodule`. If the external library can be used as-is, you may reference the upstream repository directly. Otherwise, i.e. the external libary requires patching/customization, create a fork of the official repository in the [Clickhouse organization in GitHub](https://github.com/ClickHouse).
1. Each third-party library must reside in a dedicated directory under the `contrib/` directory of the ClickHouse repository. Avoid dumps/copies of external code, instead use Git submodule feature to pull third-party code from an external upstream repository.
2. Submodules are listed in `.gitmodule`. If the external library can be used as-is, you may reference the upstream repository directly. Otherwise, i.e. the external library requires patching/customization, create a fork of the official repository in the [Clickhouse organization in GitHub](https://github.com/ClickHouse).
3. In the latter case, create a branch with `clickhouse/` prefix from the branch you want to integrate, e.g. `clickhouse/master` (for `master`) or `clickhouse/release/vX.Y.Z` (for a `release/vX.Y.Z` tag). The purpose of this branch is to isolate customization of the library from upstream work. For example, pulls from the upstream repository into the fork will leave all `clickhouse/` branches unaffected. Submodules in `contrib/` must only track `clickhouse/` branches of forked third-party repositories.
4. To patch a fork of a third-party library, create a dedicated branch with `clickhouse/` prefix in the fork, e.g. `clickhouse/fix-some-desaster`. Finally, merge the patch branch into the custom tracking branch (e.g. `clickhouse/master` or `clickhouse/release/vX.Y.Z`) using a PR.
5. Always create patches of third-party libraries with the official repository in mind. Once a PR of a patch branch to the `clickhouse/` branch in the fork repository is done and the submodule version in ClickHouse's official repository is bumped, consider opening another PR from the patch branch to the upstream library repository. This ensures, that 1) the contribution has more than a single use case and importance, 2) others will also benefit from it, 3) the change will not remain a maintenance burden solely on ClickHouse developers.
5. Always create patches of third-party libraries with the official repository in mind. Once a PR of a patch branch to the `clickhouse/` branch in the fork repository is done and the submodule version in ClickHouse official repository is bumped, consider opening another PR from the patch branch to the upstream library repository. This ensures, that 1) the contribution has more than a single use case and importance, 2) others will also benefit from it, 3) the change will not remain a maintenance burden solely on ClickHouse developers.
9. To update a submodule with changes in the upstream repository, first merge upstream `master` (or a new `versionX.Y.Z` tag) into the `clickhouse`-tracking branch in the fork repository. Conflicts with patches/customization will need to be resolved in this merge (see Step 4.). Once the merge is done, bump the submodule in ClickHouse to point to the new hash in the fork.

View File

@ -70,7 +70,7 @@ You can also clone the repository via https protocol:
This, however, will not let you send your changes to the server. You can still use it temporarily and add the SSH keys later replacing the remote address of the repository with `git remote` command.
You can also add original ClickHouse repos address to your local repository to pull updates from there:
You can also add original ClickHouse repo address to your local repository to pull updates from there:
git remote add upstream git@github.com:ClickHouse/ClickHouse.git
@ -177,7 +177,7 @@ If you require to build all the binaries (utilities and tests), you should run n
Full build requires about 30GB of free disk space or 15GB to build the main binaries.
When a large amount of RAM is available on build machine you should limit the number of build tasks run in parallel with `-j` param:
When a large amount of RAM is available on build machine you should limit the number of build tasks run in parallel with `-j` parameter:
ninja -j 1 clickhouse-server clickhouse-client
@ -269,7 +269,7 @@ Developing ClickHouse often requires loading realistic datasets. It is particula
Navigate to your fork repository in GitHubs UI. If you have been developing in a branch, you need to select that branch. There will be a “Pull request” button located on the screen. In essence, this means “create a request for accepting my changes into the main repository”.
A pull request can be created even if the work is not completed yet. In this case please put the word “WIP” (work in progress) at the beginning of the title, it can be changed later. This is useful for cooperative reviewing and discussion of changes as well as for running all of the available tests. It is important that you provide a brief description of your changes, it will later be used for generating release changelogs.
A pull request can be created even if the work is not completed yet. In this case please put the word “WIP” (work in progress) at the beginning of the title, it can be changed later. This is useful for cooperative reviewing and discussion of changes as well as for running all of the available tests. It is important that you provide a brief description of your changes, it will later be used for generating release changelog.
Testing will commence as soon as ClickHouse employees label your PR with a tag “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour.

View File

@ -2,7 +2,7 @@
Rust library integration will be described based on BLAKE3 hash-function integration.
The first step is forking a library and making neccessary changes for Rust and C/C++ compatibility.
The first step is forking a library and making necessary changes for Rust and C/C++ compatibility.
After forking library repository you need to change target settings in Cargo.toml file. Firstly, you need to switch build to static library. Secondly, you need to add cbindgen crate to the crate list. We will use it later to generate C-header automatically.
@ -51,9 +51,9 @@ pub unsafe extern "C" fn blake3_apply_shim(
}
```
This method gets C-compatible string, its size and output string pointer as input. Then, it converts C-compatible inputs into types that are used by actual library methods and calls them. After that, it should convert library methods' outputs back into C-compatible type. In that particular case library supported direct writing into pointer by method fill(), so the convertion was not needed. The main advice here is to create less methods, so you will need to do less convertions on each method call and won't create much overhead.
This method gets C-compatible string, its size and output string pointer as input. Then, it converts C-compatible inputs into types that are used by actual library methods and calls them. After that, it should convert library methods' outputs back into C-compatible type. In that particular case library supported direct writing into pointer by method fill(), so the conversion was not needed. The main advice here is to create less methods, so you will need to do less conversions on each method call and won't create much overhead.
Also, you should use attribute #[no_mangle] and extern "C" for every C-compatible attribute. Without it library can compile incorrectly and cbindgen won't launch header autogeneration.
Also, you should use attribute #[no_mangle] and `extern "C"` for every C-compatible attribute. Without it library can compile incorrectly and cbindgen won't launch header autogeneration.
After all these steps you can test your library in a small project to find all problems with compatibility or header generation. If any problems occur during header generation, you can try to configure it with cbindgen.toml file (you can find an example of it in BLAKE3 directory or a template here: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml)). If everything works correctly, you can finally integrate its methods into ClickHouse.

View File

@ -4,7 +4,7 @@ sidebar_label: C++ Guide
description: A list of recommendations regarding coding style, naming convention, formatting and more
---
# How to Write C++ Code
# How to Write C++ Code
## General Recommendations {#general-recommendations}
@ -196,7 +196,7 @@ std::cerr << static_cast<int>(c) << std::endl;
The same is true for small methods in any classes or structs.
For templated classes and structs, do not separate the method declarations from the implementation (because otherwise they must be defined in the same translation unit).
For template classes and structs, do not separate the method declarations from the implementation (because otherwise they must be defined in the same translation unit).
**31.** You can wrap lines at 140 characters, instead of 80.
@ -285,7 +285,7 @@ Note: You can use Doxygen to generate documentation from these comments. But Dox
/// WHAT THE FAIL???
```
**14.** Do not use comments to make delimeters.
**14.** Do not use comments to make delimiters.
``` cpp
///******************************************************
@ -491,7 +491,7 @@ if (0 != close(fd))
throwFromErrno("Cannot close file " + file_name, ErrorCodes::CANNOT_CLOSE_FILE);
```
You can use assert to check invariants in code.
You can use assert to check invariant in code.
**4.** Exception types.
@ -552,9 +552,9 @@ Do not try to implement lock-free data structures unless it is your primary area
In most cases, prefer references.
**10.** const.
**10.** `const`.
Use constant references, pointers to constants, `const_iterator`, and const methods.
Use constant references, pointers to constants, `const_iterator`, and `const` methods.
Consider `const` to be default and use non-`const` only when necessary.
@ -596,7 +596,7 @@ public:
AggregateFunctionPtr get(const String & name, const DataTypes & argument_types) const;
```
**15.** namespace.
**15.** `namespace`.
There is no need to use a separate `namespace` for application code.
@ -606,7 +606,7 @@ For medium to large libraries, put everything in a `namespace`.
In the librarys `.h` file, you can use `namespace detail` to hide implementation details not needed for the application code.
In a `.cpp` file, you can use a `static` or anonymous namespace to hide symbols.
In a `.cpp` file, you can use a `static` or anonymous `namespace` to hide symbols.
Also, a `namespace` can be used for an `enum` to prevent the corresponding names from falling into an external `namespace` (but its better to use an `enum class`).

View File

@ -4,7 +4,7 @@ sidebar_label: Testing
description: Most of ClickHouse features can be tested with functional tests and they are mandatory to use for every change in ClickHouse code that can be tested that way.
---
# ClickHouse Testing
# ClickHouse Testing
## Functional Tests
@ -85,7 +85,7 @@ Performance tests allow to measure and compare performance of some isolated part
Each test run one or multiple queries (possibly with combinations of parameters) in a loop.
If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. It always makes sense to use `perf top` or other perf tools during your tests.
If you want to improve performance of ClickHouse in some scenario, and if improvements can be observed on simple queries, it is highly recommended to write a performance test. It always makes sense to use `perf top` or other `perf` tools during your tests.
## Test Tools and Scripts {#test-tools-and-scripts}
@ -228,7 +228,7 @@ Our Security Team did some basic overview of ClickHouse capabilities from the se
We run `clang-tidy` on per-commit basis. `clang-static-analyzer` checks are also enabled. `clang-tidy` is also used for some style checks.
We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`, `CodeQL`. You will find instructions for usage in `tests/instructions/` directory.
We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`, `CodeQL`. You will find instructions for usage in `tests/instructions/` directory.
If you use `CLion` as an IDE, you can leverage some `clang-tidy` checks out of the box.
@ -244,7 +244,7 @@ In debug build we also involve a customization of libc that ensures that no "har
Debug assertions are used extensively.
In debug build, if exception with "logical error" code (implies a bug) is being thrown, the program is terminated prematurally. It allows to use exceptions in release build but make it an assertion in debug build.
In debug build, if exception with "logical error" code (implies a bug) is being thrown, the program is terminated prematurely. It allows to use exceptions in release build but make it an assertion in debug build.
Debug version of jemalloc is used for debug builds.
Debug version of libc++ is used for debug builds.
@ -253,7 +253,7 @@ Debug version of libc++ is used for debug builds.
Data stored on disk is checksummed. Data in MergeTree tables is checksummed in three ways simultaneously* (compressed data blocks, uncompressed data blocks, the total checksum across blocks). Data transferred over network between client and server or between servers is also checksummed. Replication ensures bit-identical data on replicas.
It is required to protect from faulty hardware (bit rot on storage media, bit flips in RAM on server, bit flips in RAM of network controller, bit flips in RAM of network switch, bit flips in RAM of client, bit flips on the wire). Note that bit flips are common and likely to occur even for ECC RAM and in presense of TCP checksums (if you manage to run thousands of servers processing petabytes of data each day). [See the video (russian)](https://www.youtube.com/watch?v=ooBAQIe0KlQ).
It is required to protect from faulty hardware (bit rot on storage media, bit flips in RAM on server, bit flips in RAM of network controller, bit flips in RAM of network switch, bit flips in RAM of client, bit flips on the wire). Note that bit flips are common and likely to occur even for ECC RAM and in presence of TCP checksums (if you manage to run thousands of servers processing petabytes of data each day). [See the video (russian)](https://www.youtube.com/watch?v=ooBAQIe0KlQ).
ClickHouse provides diagnostics that will help ops engineers to find faulty hardware.

View File

@ -12,7 +12,7 @@ The table engine (type of table) determines:
- Which queries are supported, and how.
- Concurrent data access.
- Use of indexes, if present.
- Whether multithreaded request execution is possible.
- Whether multithread request execution is possible.
- Data replication parameters.
## Engine Families {#engine-families}

View File

@ -40,7 +40,7 @@ Uniqueness of rows is determined by the `ORDER BY` table section, not `PRIMARY K
When merging, `ReplacingMergeTree` from all the rows with the same sorting key leaves only one:
- The last in the selection, if `ver` not set. A selection is a set of rows in a set of parts participating in the merge. The most recently created part (the last insert) will be the last one in the selection. Thus, after deduplication, the very last row from the most recent insert will remain for each unique sorting key.
- With the maximum version, if `ver` specified.
- With the maximum version, if `ver` specified. If `ver` is the same for several rows, then it will use "if `ver` is not specified" rule for them, i.e. the most recent inserted row will remain.
**Query clauses**

View File

@ -1,78 +1,139 @@
---
sidebar_label: Web Analytics Data
description: Dataset consists of two tables containing anonymized web analytics data with hits and visits
description: Dataset consisting of two tables containing anonymized web analytics data with hits and visits
---
# Anonymized Web Analytics Data
Dataset consists of two tables containing anonymized web analytics data with hits (`hits_v1`) and visits (`visits_v1`).
This dataset consists of two tables containing anonymized web analytics data with hits (`hits_v1`) and visits (`visits_v1`).
The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://datasets.clickhouse.com/hits/tsv/hits_100m_obfuscated_v1.tsv.xz and as prepared partitions at https://datasets.clickhouse.com/hits/partitions/hits_100m_obfuscated_v1.tar.xz.
The tables can be downloaded as compressed `tsv.xz` files. In addition to the sample worked with in this document, an extended (7.5GB) version of the `hits` table containing 100 million rows is available as TSV at [https://datasets.clickhouse.com/hits/tsv/hits_100m_obfuscated_v1.tsv.xz](https://datasets.clickhouse.com/hits/tsv/hits_100m_obfuscated_v1.tsv.xz).
## Obtaining Tables from Prepared Partitions {#obtaining-tables-from-prepared-partitions}
## Download and ingest the data
Download and import hits table:
``` bash
curl -O https://datasets.clickhouse.com/hits/partitions/hits_v1.tar
tar xvf hits_v1.tar -C /var/lib/clickhouse # path to ClickHouse data directory
# check permissions on unpacked data, fix if required
sudo service clickhouse-server restart
clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1"
```
Download and import visits:
``` bash
curl -O https://datasets.clickhouse.com/visits/partitions/visits_v1.tar
tar xvf visits_v1.tar -C /var/lib/clickhouse # path to ClickHouse data directory
# check permissions on unpacked data, fix if required
sudo service clickhouse-server restart
clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
```
## Obtaining Tables from Compressed TSV File {#obtaining-tables-from-compressed-tsv-file}
Download and import hits from compressed TSV file:
### Download the hits compressed TSV file:
``` bash
curl https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz | unxz --threads=`nproc` > hits_v1.tsv
# Validate the checksum
md5sum hits_v1.tsv
# Checksum should be equal to: f3631b6295bf06989c1437491f7592cb
# now create table
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets"
# for hits_v1
clickhouse-client --query "CREATE TABLE datasets.hits_v1 ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
# for hits_100m_obfuscated
clickhouse-client --query="CREATE TABLE default.hits_100m_obfuscated (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
```
# import data
### Create the database and table
```bash
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets"
```
For hits_v1
```bash
clickhouse-client --query "CREATE TABLE datasets.hits_v1 ( WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
```
Or for hits_100m_obfuscated
```bash
clickhouse-client --query="CREATE TABLE default.hits_100m_obfuscated (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, Refresh UInt8, RefererCategoryID UInt16, RefererRegionID UInt32, URLCategoryID UInt16, URLRegionID UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, OriginalURL String, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), LocalEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, RemoteIP UInt32, WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming UInt32, DNSTiming UInt32, ConnectTiming UInt32, ResponseStartTiming UInt32, ResponseEndTiming UInt32, FetchTiming UInt32, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
```
### Import the hits data:
```bash
cat hits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.hits_v1 FORMAT TSV" --max_insert_block_size=100000
# optionally you can optimize table
clickhouse-client --query "OPTIMIZE TABLE datasets.hits_v1 FINAL"
```
Verify the count of rows
```bash
clickhouse-client --query "SELECT COUNT(*) FROM datasets.hits_v1"
```
Download and import visits from compressed tsv-file:
```response
8873898
```
### Download the visits compressed TSV file:
``` bash
curl https://datasets.clickhouse.com/visits/tsv/visits_v1.tsv.xz | unxz --threads=`nproc` > visits_v1.tsv
# Validate the checksum
md5sum visits_v1.tsv
# Checksum should be equal to: 6dafe1a0f24e59e3fc2d0fed85601de6
# now create table
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS datasets"
```
### Create the visits table
```bash
clickhouse-client --query "CREATE TABLE datasets.visits_v1 ( CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32), WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8, PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16), CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192"
# import data
```
### Import the visits data
```bash
cat visits_v1.tsv | clickhouse-client --query "INSERT INTO datasets.visits_v1 FORMAT TSV" --max_insert_block_size=100000
# optionally you can optimize table
clickhouse-client --query "OPTIMIZE TABLE datasets.visits_v1 FINAL"
```
Verify the count
```bash
clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1"
```
## Example Queries {#example-queries}
```response
1680609
```
[The ClickHouse tutorial](../../tutorial.md) is based on this web analytics dataset, and the recommended way to get started with this dataset is to go through the tutorial.
## An example JOIN
Additional examples of queries to these tables can be found among [stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) of ClickHouse (they are named `test.hits` and `test.visits` there).
The hits and visits dataset is used in the ClickHouse test
routines, this is one of the queries from the test suite. The rest
of the tests are refernced in the *What's Next* section at the
end of this page.
```sql
clickhouse-client --query "SELECT
EventDate,
hits,
visits
FROM
(
SELECT
EventDate,
count() AS hits
FROM datasets.hits_v1
GROUP BY EventDate
) ANY LEFT JOIN
(
SELECT
StartDate AS EventDate,
sum(Sign) AS visits
FROM datasets.visits_v1
GROUP BY EventDate
) USING EventDate
ORDER BY hits DESC
LIMIT 10
SETTINGS joined_subquery_requires_alias = 0
FORMAT PrettyCompact"
```
```response
┌──EventDate─┬────hits─┬─visits─┐
│ 2014-03-17 │ 1406958 │ 265108 │
│ 2014-03-19 │ 1405797 │ 261624 │
│ 2014-03-18 │ 1383658 │ 258723 │
│ 2014-03-20 │ 1353623 │ 255328 │
│ 2014-03-21 │ 1245779 │ 236232 │
│ 2014-03-23 │ 1046491 │ 202212 │
│ 2014-03-22 │ 1031592 │ 197354 │
└────────────┴─────────┴────────┘
```
## Next Steps
[A Practical Introduction to Sparse Primary Indexes in ClickHouse](../../guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-intro.md) uses the hits dataset to discuss the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices.
Additional examples of queries to these tables can be found among the ClickHouse [stateful tests](https://github.com/ClickHouse/ClickHouse/blob/d7129855757f38ceec3e4ecc6dafacdabe9b178f/tests/queries/1_stateful/00172_parallel_join.sql).
:::note
The test suite uses a database name `test`, and the tables are named `hits` and `visits`. You can rename your database and tables, or edit the SQL from the test file.
:::

View File

@ -190,8 +190,7 @@ sudo ./clickhouse install
### From Precompiled Binaries for Non-Standard Environments {#from-binaries-non-linux}
For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay).
For non-Linux operating systems and for AArch64 CPU architecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay).
- [MacOS x86_64](https://builds.clickhouse.com/master/macos/clickhouse)
```bash

View File

@ -119,7 +119,7 @@ Dates with times are written in the format `YYYY-MM-DD hh:mm:ss` and parsed in t
This all occurs in the system time zone at the time the client or server starts (depending on which of them formats data). For dates with times, daylight saving time is not specified. So if a dump has times during daylight saving time, the dump does not unequivocally match the data, and parsing will select one of the two times.
During a read operation, incorrect dates and dates with times can be parsed with natural overflow or as null dates and times, without an error message.
As an exception, parsing dates with times is also supported in Unix timestamp format, if it consists of exactly 10 decimal digits. The result is not time zone-dependent. The formats YYYY-MM-DD hh:mm:ss and NNNNNNNNNN are differentiated automatically.
As an exception, parsing dates with times is also supported in Unix timestamp format, if it consists of exactly 10 decimal digits. The result is not time zone-dependent. The formats `YYYY-MM-DD hh:mm:ss` and `NNNNNNNNNN` are differentiated automatically.
Strings are output with backslash-escaped special characters. The following escape sequences are used for output: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\'`, `\\`. Parsing also supports the sequences `\a`, `\v`, and `\xHH` (hex escape sequences) and any `\c` sequences, where `c` is any character (these sequences are converted to `c`). Thus, reading data supports formats where a line feed can be written as `\n` or `\`, or as a line feed. For example, the string `Hello world` with a line feed between the words instead of space can be parsed in any of the following variations:
@ -333,8 +333,9 @@ Total rows: 2
```
``` sql
INSERT INTO UserActivity FORMAT Template SETTINGS
INSERT INTO UserActivity SETTINGS
format_template_resultset = '/some/path/resultset.format', format_template_row = '/some/path/row.format'
FORMAT Template
```
`/some/path/resultset.format`:
@ -359,8 +360,9 @@ Similar to `Template`, but skips whitespace characters between delimiters and va
Its possible to read `JSON` using this format, if values of columns have the same order in all rows. For example, the following request can be used for inserting data from output example of format [JSON](#json):
``` sql
INSERT INTO table_name FORMAT TemplateIgnoreSpaces SETTINGS
INSERT INTO table_name SETTINGS
format_template_resultset = '/some/path/resultset.format', format_template_row = '/some/path/row.format', format_template_rows_between_delimiter = ','
FORMAT TemplateIgnoreSpaces
```
`/some/path/resultset.format`:
@ -816,7 +818,7 @@ Columns that are not present in the block will be filled with default values (yo
## JSONEachRow {#jsoneachrow}
In this format, CliskHouse outputs each row as a separated, newline-delimited JSON Object.
In this format, ClickHouse outputs each row as a separated, newline-delimited JSON Object.
Example:
@ -1337,7 +1339,7 @@ Arrays can be nested and can have a value of the `Nullable` type as an argument.
You can insert CapnProto data from a file into ClickHouse table by the following command:
``` bash
$ cat capnproto_messages.bin | clickhouse-client --query "INSERT INTO test.hits FORMAT CapnProto SETTINGS format_schema = 'schema:Message'"
$ cat capnproto_messages.bin | clickhouse-client --query "INSERT INTO test.hits SETTINGS format_schema = 'schema:Message' FORMAT CapnProto"
```
Where `schema.capnp` looks like this:
@ -1363,9 +1365,9 @@ Columns `name` ([String](../sql-reference/data-types/string.md)) and `value` (nu
Rows may optionally contain `help` ([String](../sql-reference/data-types/string.md)) and `timestamp` (number).
Column `type` ([String](../sql-reference/data-types/string.md)) is either `counter`, `gauge`, `histogram`, `summary`, `untyped` or empty.
Each metric value may also have some `labels` ([Map(String, String)](../sql-reference/data-types/map.md)).
Several consequent rows may refer to the one metric with different lables. The table should be sorted by metric name (e.g., with `ORDER BY name`).
Several consequent rows may refer to the one metric with different labels. The table should be sorted by metric name (e.g., with `ORDER BY name`).
There's special requirements for labels for `histogram` and `summary`, see [Prometheus doc](https://prometheus.io/docs/instrumenting/exposition_formats/#histograms-and-summaries) for the details. Special rules applied to row with labels `{'count':''}` and `{'sum':''}`, they'll be convered to `<metric_name>_count` and `<metric_name>_sum` respectively.
There's special requirements for labels for `histogram` and `summary`, see [Prometheus doc](https://prometheus.io/docs/instrumenting/exposition_formats/#histograms-and-summaries) for the details. Special rules applied to row with labels `{'count':''}` and `{'sum':''}`, they'll be converted to `<metric_name>_count` and `<metric_name>_sum` respectively.
**Example:**
@ -1439,7 +1441,7 @@ SELECT * FROM test.table FORMAT Protobuf SETTINGS format_schema = 'schemafile:Me
```
``` bash
cat protobuf_messages.bin | clickhouse-client --query "INSERT INTO test.table FORMAT Protobuf SETTINGS format_schema='schemafile:MessageType'"
cat protobuf_messages.bin | clickhouse-client --query "INSERT INTO test.table SETTINGS format_schema='schemafile:MessageType' FORMAT Protobuf"
```
where the file `schemafile.proto` looks like this:
@ -1665,7 +1667,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-e
### Parquet format settings {#parquet-format-settings}
- [output_format_parquet_row_group_size](../operations/settings/settings.md#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`.
- [output_format_parquet_row_group_size](../operations/settings/settings.md#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`.
- [output_format_parquet_string_as_string](../operations/settings/settings.md#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`.
- [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`.
- [input_format_parquet_case_insensitive_column_matching](../operations/settings/settings.md#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`.
@ -1845,7 +1847,7 @@ When working with the `Regexp` format, you can use the following settings:
- Quoted (similarly to [Values](#data-format-values))
- Raw (extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](#tabseparatedraw))
- `format_regexp_skip_unmatched` — [UInt8](../sql-reference/data-types/int-uint.md). Defines the need to throw an exeption in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`.
- `format_regexp_skip_unmatched` — [UInt8](../sql-reference/data-types/int-uint.md). Defines the need to throw an exception in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`.
**Usage**
@ -1875,7 +1877,7 @@ CREATE TABLE imp_regex_table (id UInt32, array Array(UInt32), string String, dat
Import command:
```bash
$ cat data.tsv | clickhouse-client --query "INSERT INTO imp_regex_table FORMAT Regexp SETTINGS format_regexp='id: (.+?) array: (.+?) string: (.+?) date: (.+?)', format_regexp_escaping_rule='Escaped', format_regexp_skip_unmatched=0;"
$ cat data.tsv | clickhouse-client --query "INSERT INTO imp_regex_table SETTINGS format_regexp='id: (.+?) array: (.+?) string: (.+?) date: (.+?)', format_regexp_escaping_rule='Escaped', format_regexp_skip_unmatched=0 FORMAT Regexp;"
```
Query:

View File

@ -422,7 +422,7 @@ Now `rule` can configure `method`, `headers`, `url`, `handler`:
- `query` — use with `predefined_query_handler` type, executes query when the handler is called.
- `query_param_name` — use with `dynamic_query_handler` type, extracts and executes the value corresponding to the `query_param_name` value in HTTP request params.
- `query_param_name` — use with `dynamic_query_handler` type, extracts and executes the value corresponding to the `query_param_name` value in HTTP request parameters.
- `status` — use with `static` type, response status code.
@ -477,9 +477,9 @@ In one `predefined_query_handler` only supports one `query` of an insert type.
### dynamic_query_handler {#dynamic_query_handler}
In `dynamic_query_handler`, the query is written in the form of param of the HTTP request. The difference is that in `predefined_query_handler`, the query is written in the configuration file. You can configure `query_param_name` in `dynamic_query_handler`.
In `dynamic_query_handler`, the query is written in the form of parameter of the HTTP request. The difference is that in `predefined_query_handler`, the query is written in the configuration file. You can configure `query_param_name` in `dynamic_query_handler`.
ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in.
ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the parameter is not passed in.
To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_final_threads` and `queries` whether the settings were set successfully.

View File

@ -5,7 +5,7 @@ sidebar_label: PostgreSQL Interface
# PostgreSQL Interface
ClickHouse supports the PostgreSQL wire protocol, which allows you to use Postgres clients to connect to ClickHouse. In a sense, ClickHouse can pretend to be a PostgreSQL instance - allowing you to connect a PostgreSQL client application to ClickHouse that is not already directy supported by ClickHouse (for example, Amazon Redshift).
ClickHouse supports the PostgreSQL wire protocol, which allows you to use Postgres clients to connect to ClickHouse. In a sense, ClickHouse can pretend to be a PostgreSQL instance - allowing you to connect a PostgreSQL client application to ClickHouse that is not already directly supported by ClickHouse (for example, Amazon Redshift).
To enable the PostgreSQL wire protocol, add the [postgresql_port](../operations/server-configuration-parameters/settings#server_configuration_parameters-postgresql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder:
@ -59,7 +59,7 @@ The PostgreSQL protocol currently only supports plain-text passwords.
## Using SSL
If you have SSL/TLS configured on your ClickHouse instance, then `postgresql_port` will use the same settings (the port is shared for both secure and unsecure clients).
If you have SSL/TLS configured on your ClickHouse instance, then `postgresql_port` will use the same settings (the port is shared for both secure and insecure clients).
Each client has their own method of how to connect using SSL. The following command demonstrates how to pass in the certificates and key to securely connect `psql` to ClickHouse:

View File

@ -47,6 +47,8 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasnt don
- [ClickHouse (Ruby)](https://github.com/shlima/click_house)
- [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord)
- Rust
- [clickhouse.rs](https://github.com/loyd/clickhouse.rs)
- [clickhouse-rs](https://github.com/suharev7/clickhouse-rs)
- [Klickhouse](https://github.com/Protryon/klickhouse)
- R
- [clickhouse-r](https://github.com/hannesmuehleisen/clickhouse-r)

View File

@ -53,7 +53,7 @@ Internal coordination settings are located in the `<keeper_server>.<coordination
- `auto_forwarding` — Allow to forward write requests from followers to the leader (default: true).
- `shutdown_timeout` — Wait to finish internal connections and shutdown (ms) (default: 5000).
- `startup_timeout` — If the server doesn't connect to other quorum participants in the specified timeout it will terminate (ms) (default: 30000).
- `four_letter_word_white_list` — White list of 4lw commands (default: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro").
- `four_letter_word_white_list` — White list of 4lw commands (default: `conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro`).
Quorum configuration is located in the `<keeper_server>.<raft_configuration>` section and contain servers description.
@ -122,7 +122,7 @@ clickhouse keeper --config /etc/your_path_to_config/config.xml
ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively.
The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro".
The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro`.
You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port.
@ -132,7 +132,7 @@ echo mntr | nc localhost 9181
Bellow is the detailed 4lw commands:
- `ruok`: Tests if server is running in a non-error state. The server will respond with imok if it is running. Otherwise it will not respond at all. A response of "imok" does not necessarily indicate that the server has joined the quorum, just that the server process is active and bound to the specified client port. Use "stat" for details on state wrt quorum and client connection information.
- `ruok`: Tests if server is running in a non-error state. The server will respond with `imok` if it is running. Otherwise it will not respond at all. A response of `imok` does not necessarily indicate that the server has joined the quorum, just that the server process is active and bound to the specified client port. Use "stat" for details on state wrt quorum and client connection information.
```
imok
@ -330,9 +330,9 @@ E.g. for a 3-node cluster, it will continue working correctly if only 1 node cra
Cluster configuration can be dynamically configured but there are some limitations. Reconfiguration relies on Raft also
so to add/remove a node from the cluster you need to have a quorum. If you lose too many nodes in your cluster at the same time without any chance
of starting them again, Raft will stop working and not allow you to reconfigure your cluster using the convenvtional way.
of starting them again, Raft will stop working and not allow you to reconfigure your cluster using the conventional way.
Nevertheless, Clickhouse Keeper has a recovery mode which allows you to forcfully reconfigure your cluster with only 1 node.
Nevertheless, Clickhouse Keeper has a recovery mode which allows you to forcefully reconfigure your cluster with only 1 node.
This should be done only as your last resort if you cannot start your nodes again, or start a new instance on the same endpoint.
Important things to note before continuing:

View File

@ -57,7 +57,7 @@ Substitutions can also be performed from ZooKeeper. To do this, specify the attr
The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`.
Users configuration can be splitted into separate files similar to `config.xml` and `config.d/`.
Users configuration can be split into separate files similar to `config.xml` and `config.d/`.
Directory name is defined as `users_config` setting without `.xml` postfix concatenated with `.d`.
Directory `users.d` is used by default, as `users_config` defaults to `users.xml`.

View File

@ -70,7 +70,7 @@ Regardless of RAID use, always use replication for data security.
Enable NCQ with a long queue. For HDD, choose the CFQ scheduler, and for SSD, choose noop. Dont reduce the readahead setting.
For HDD, enable the write cache.
Make sure that [fstrim](https://en.wikipedia.org/wiki/Trim_(computing)) is enabled for NVME and SSD disks in your OS (usually it's implemented using a cronjob or systemd service).
Make sure that [`fstrim`](https://en.wikipedia.org/wiki/Trim_(computing)) is enabled for NVME and SSD disks in your OS (usually it's implemented using a cronjob or systemd service).
## File System {#file-system}
@ -94,7 +94,7 @@ Use at least a 10 GB network, if possible. 1 Gb will also work, but it will be m
## Huge Pages {#huge-pages}
If you are using old Linux kernel, disable transparent huge pages. It interferes with memory allocators, which leads to significant performance degradation.
If you are using old Linux kernel, disable transparent huge pages. It interferes with memory allocator, which leads to significant performance degradation.
On newer Linux kernels transparent huge pages are alright.
``` bash
@ -107,7 +107,7 @@ If you are using OpenStack, set
```
cpu_mode=host-passthrough
```
in nova.conf.
in `nova.conf`.
If you are using libvirt, set
```
@ -136,7 +136,7 @@ Do not change `minSessionTimeout` setting, large values may affect ClickHouse re
With the default settings, ZooKeeper is a time bomb:
> The ZooKeeper server wont delete files from old snapshots and logs when using the default configuration (see autopurge), and this is the responsibility of the operator.
> The ZooKeeper server wont delete files from old snapshots and logs when using the default configuration (see `autopurge`), and this is the responsibility of the operator.
This bomb must be defused.
@ -241,7 +241,7 @@ JAVA_OPTS="-Xms{{ '{{' }} cluster.get('xms','128M') {{ '}}' }} \
-XX:MaxGCPauseMillis=50"
```
Salt init:
Salt initialization:
``` text
description "zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }} centralized coordination service"

View File

@ -3,7 +3,7 @@ sidebar_position: 46
sidebar_label: Troubleshooting
---
# Troubleshooting
# Troubleshooting
- [Installation](#troubleshooting-installation-errors)
- [Connecting to the server](#troubleshooting-accepts-no-connections)
@ -26,7 +26,7 @@ Possible issues:
### Server Is Not Running {#server-is-not-running}
**Check if server is runnnig**
**Check if server is running**
Command:

View File

@ -4,7 +4,7 @@ sidebar_label: H3 Indexes
# Functions for Working with H3 Indexes
[H3](https://eng.uber.com/h3/) is a geographical indexing system where Earths surface divided into a grid of even hexagonal cells. This system is hierarchical, i. e. each hexagon on the top level ("parent") can be splitted into seven even but smaller ones ("children"), and so on.
[H3](https://eng.uber.com/h3/) is a geographical indexing system where Earths surface divided into a grid of even hexagonal cells. This system is hierarchical, i. e. each hexagon on the top level ("parent") can be split into seven even but smaller ones ("children"), and so on.
The level of the hierarchy is called `resolution` and can receive a value from `0` till `15`, where `0` is the `base` level with the largest and coarsest cells.
@ -1398,4 +1398,4 @@ Result:
│ [(37.42012867767779,-122.03773496427027),(37.33755608435299,-122.090428929044)] │
└─────────────────────────────────────────────────────────────────────────────────┘
```
[Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/h3) <!--hide-->
[Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/h3) <!--hide-->

View File

@ -174,22 +174,24 @@ Result:
Creating `test_function_sum_json` with named arguments and format [JSONEachRow](../../interfaces/formats.md#jsoneachrow) using XML configuration.
File test_function.xml.
```xml
<function>
<type>executable</type>
<name>test_function_sum_json</name>
<return_type>UInt64</return_type>
<return_name>result_name</return_name>
<argument>
<type>UInt64</type>
<name>argument_1</name>
</argument>
<argument>
<type>UInt64</type>
<name>argument_2</name>
</argument>
<format>JSONEachRow</format>
<command>test_function_sum_json.py</command>
</function>
<functions>
<function>
<type>executable</type>
<name>test_function_sum_json</name>
<return_type>UInt64</return_type>
<return_name>result_name</return_name>
<argument>
<type>UInt64</type>
<name>argument_1</name>
</argument>
<argument>
<type>UInt64</type>
<name>argument_2</name>
</argument>
<format>JSONEachRow</format>
<command>test_function_sum_json.py</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function_sum_json.py`.
@ -224,6 +226,50 @@ Result:
└──────────────────────────────┘
```
Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type).
File test_function_parameter_python.xml.
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_parameter_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
</argument>
<format>TabSeparated</format>
<command>test_function_parameter_python.py {test_parameter:UInt64}</command>
</function>
</functions>
```
Script file inside `user_scripts` folder `test_function_parameter_python.py`.
```python
#!/usr/bin/python3
import sys
if __name__ == "__main__":
for line in sys.stdin:
print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="")
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_parameter_python(1)(2);
```
Result:
``` text
┌─test_function_parameter_python(1)(2)─┐
│ Parameter 1 value 2 │
└──────────────────────────────────────┘
```
## Error Handling
Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query.

View File

@ -32,7 +32,7 @@ Integer value in the `Int8`, `Int16`, `Int32`, `Int64`, `Int128` or `Int256` dat
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
The behavior of functions for the [NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf) arguments is undefined. Remember about [numeric convertions issues](#numeric-conversion-issues), when using the functions.
The behavior of functions for the [NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
**Example**
@ -131,7 +131,7 @@ Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data typ
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
The behavior of functions for negative agruments and for the [NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric convertions issues](#numeric-conversion-issues), when using the functions.
The behavior of functions for negative agruments and for the [NaN and Inf](../../sql-reference/data-types/float.md#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
**Example**
@ -689,7 +689,7 @@ x::t
- Converted value.
:::note
:::note
If the input value does not fit the bounds of the target type, the result overflows. For example, `CAST(-1, 'UInt8')` returns `255`.
:::
@ -1433,7 +1433,7 @@ Result:
Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Input value is scaled up or down appropriately depending on it precision.
:::note
:::note
The output value is a timestamp in UTC, not in the timezone of `DateTime64`.
:::

View File

@ -38,7 +38,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
При слиянии `ReplacingMergeTree` оставляет только строку для каждого уникального ключа сортировки:
- Последнюю в выборке, если `ver` не задан. Под выборкой здесь понимается набор строк в наборе кусков данных, участвующих в слиянии. Последний по времени создания кусок (последняя вставка) будет последним в выборке. Таким образом, после дедупликации для каждого значения ключа сортировки останется самая последняя строка из самой последней вставки.
- С максимальной версией, если `ver` задан.
- С максимальной версией, если `ver` задан. Если `ver` одинаковый у нескольких строк, то для них используется правило -- если `ver` не задан, т.е. в результате слияния останется самая последняя строка из самой последней вставки.
**Секции запроса**

View File

@ -41,6 +41,8 @@ sidebar_label: "Клиентские библиотеки от сторонни
- [ClickHouse (Ruby)](https://github.com/shlima/click_house)
- [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord)
- Rust
- [clickhouse.rs](https://github.com/loyd/clickhouse.rs)
- [clickhouse-rs](https://github.com/suharev7/clickhouse-rs)
- [Klickhouse](https://github.com/Protryon/klickhouse)
- R
- [clickhouse-r](https://github.com/hannesmuehleisen/clickhouse-r)

View File

@ -3,23 +3,21 @@ sidebar_position: 66
sidebar_label: ClickHouse Keeper
---
# [пре-продакшн] ClickHouse Keeper {#clickHouse-keeper}
# ClickHouse Keeper {#clickHouse-keeper}
Сервер ClickHouse использует сервис координации [ZooKeeper](https://zookeeper.apache.org/) для [репликации](../engines/table-engines/mergetree-family/replication.md) данных и выполнения [распределенных DDL запросов](../sql-reference/distributed-ddl.md). ClickHouse Keeper — это альтернативный сервис координации, совместимый с ZooKeeper.
:::danger "Предупреждение"
ClickHouse Keeper находится в стадии пре-продакшн и тестируется в CI ClickHouse и на нескольких внутренних инсталляциях.
## Детали реализации {#implementation-details}
ZooKeeper — один из первых широко известных сервисов координации с открытым исходным кодом. Он реализован на языке программирования Java, имеет достаточно простую и мощную модель данных. Алгоритм координации Zookeeper называется ZAB (ZooKeeper Atomic Broadcast). Он не гарантирует линеаризуемость операций чтения, поскольку каждый узел ZooKeeper обслуживает чтения локально. В отличие от ZooKeeper, ClickHouse Keeper реализован на C++ и использует алгоритм [RAFT](https://raft.github.io/), [реализация](https://github.com/eBay/NuRaft). Этот алгоритм позволяет достичь линеаризуемости чтения и записи, имеет несколько реализаций с открытым исходным кодом на разных языках.
По умолчанию ClickHouse Keeper предоставляет те же гарантии, что и ZooKeeper (линеаризуемость записей, последовательная согласованность чтений). У него есть совместимый клиент-серверный протокол, поэтому любой стандартный клиент ZooKeeper может использоваться для взаимодействия с ClickHouse Keeper. Снэпшоты и журналы имеют несовместимый с ZooKeeper формат, однако можно конвертировать данные Zookeeper в снэпшот ClickHouse Keeper с помощью `clickhouse-keeper-converter`. Межсерверный протокол ClickHouse Keeper также несовместим с ZooKeeper, поэтому создание смешанного кластера ZooKeeper / ClickHouse Keeper невозможно.
По умолчанию ClickHouse Keeper предоставляет те же гарантии, что и ZooKeeper (линеаризуемость записей, нелинеаризуемость чтений). ClickHouse Keeper предоставляет совместимый клиент-серверный протокол, поэтому любой стандартный клиент ZooKeeper может использоваться для взаимодействия с ClickHouse Keeper. Снэпшоты и журналы имеют несовместимый с ZooKeeper формат, однако можно конвертировать данные Zookeeper в снэпшот ClickHouse Keeper с помощью `clickhouse-keeper-converter`. Межсерверный протокол ClickHouse Keeper также несовместим с ZooKeeper, поэтому создание смешанного кластера ZooKeeper / ClickHouse Keeper невозможно.
Система управления доступом (ACL) ClickHouse Keeper реализована так же, как в [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl). ClickHouse Keeper поддерживает тот же набор разрешений и идентичные схемы: `world`, `auth`, `digest`, `host` и `ip`. Digest для аутентификации использует пару значений `username:password`. Пароль кодируется в Base64.
Система управления доступом (ACL) ClickHouse Keeper реализована так же, как в [ZooKeeper](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_ZooKeeperAccessControl). ClickHouse Keeper поддерживает тот же набор разрешений и идентичные схемы: `world`, `auth`, `digest`. Digest для аутентификации использует пару значений `username:password`. Пароль кодируется в Base64.
:::info "Примечание"
:::note
Внешние интеграции не поддерживаются.
:::
## Конфигурация {#configuration}
@ -27,34 +25,36 @@ ClickHouse Keeper может использоваться как равноце
- `tcp_port` — порт для подключения клиента (по умолчанию для ZooKeeper: `2181`).
- `tcp_port_secure` — зашифрованный порт для SSL-соединения между клиентом и сервером сервиса.
- `server_id` — уникальный идентификатор сервера, каждый участник кластера должен иметь уникальный номер&nbsp;(1,&nbsp;2,&nbsp;3&nbsp;и&nbsp;т.&nbsp;д.).
- `log_storage_path` — путь к журналам координации, лучше хранить их на незанятом устройстве (актуально и для ZooKeeper).
- `server_id` — уникальный идентификатор сервера, каждый участник кластера должен иметь уникальный номер (1, 2, 3 и т.д.).
- `log_storage_path` — путь к журналам координации, лучше хранить их на не нагруженном устройстве (актуально и для ZooKeeper).
- `snapshot_storage_path` — путь к снэпшотам координации.
Другие общие параметры наследуются из конфигурации сервера ClickHouse (`listen_host`, `logger`, и т. д.).
Настройки внутренней координации находятся в `<keeper_server>.<coordination_settings>`:
- `operation_timeout_ms` — максимальное время ожидания для одной клиентской операции в миллисекундах (по умолчанию: 10000).
- `session_timeout_ms` — максимальное время ожидания для клиентской сессии в миллисекундах (по умолчанию: 30000).
- `dead_session_check_period_ms` — частота, с которой ClickHouse Keeper проверяет мертвые сессии и удаляет их, в миллисекундах (по умолчанию: 500).
- `heart_beat_interval_ms` — частота, с которой узел-лидер ClickHouse Keeper отправляет хартбиты узлам-последователям, в миллисекундах (по умолчанию: 500).
- `election_timeout_lower_bound_ms` — время, после которого последователь может инициировать выборы лидера, если не получил от него сердцебиения (по умолчанию: 1000).
- `election_timeout_upper_bound_ms` — время, после которого последователь должен инициировать выборы лидера, если не получил от него сердцебиения (по умолчанию: 2000).
- `rotate_log_storage_interval` — количество записей в журнале координации для хранения в одном файле (по умолчанию: 100000).
- `reserved_log_items` — минимальное количество записей в журнале координации которые нужно сохранять после снятия снепшота (по умолчанию: 100000).
- `snapshot_distance` — частота, с которой ClickHouse Keeper делает новые снэпшоты (по количеству записей в журналах), в миллисекундах (по умолчанию: 100000).
- `snapshots_to_keep` — количество снэпшотов для сохранения (по умолчанию: 3).
- `stale_log_gap` — время, после которого лидер считает последователя устаревшим и отправляет ему снэпшот вместо журналов (по умолчанию: 10000).
- `fresh_log_gap` — максимальное отставание от лидера в количестве записей журнала после которого последователь считает себя не отстающим (по умолчанию: 200).
- `max_requests_batch_size` — количество запросов на запись, которые будут сгруппированы в один перед отправкой через RAFT (по умолчанию: 100).
- `force_sync` — вызывать `fsync` при каждой записи в журнал координации (по умолчанию: true).
- `quorum_reads` — выполнять запросы чтения аналогично запросам записи через весь консенсус RAFT с негативным эффектом на производительность и размер журналов (по умолчанию: false).
- `raft_logs_level` — уровень логгирования сообщений в текстовый лог (trace, debug и т. д.) (по умолчанию: information).
- `auto_forwarding` — разрешить пересылку запросов на запись от последователей лидеру (по умолчанию: true).
- `shutdown_timeout` — время ожидания завершения внутренних подключений и выключения, в миллисекундах (по умолчанию: 5000).
- `dead_session_check_period_ms` — частота, с которой ClickHouse Keeper проверяет мертвые сессии и удаляет их, в миллисекундах (по умолчанию: 500).
- `election_timeout_lower_bound_ms` — время, после которого последователь может инициировать перевыбор лидера, если не получил от него контрольный сигнал (по умолчанию: 1000).
- `election_timeout_upper_bound_ms` — время, после которого последователь должен инициировать перевыбор лидера, если не получил от него контрольный сигнал (по умолчанию: 2000).
- `force_sync` — вызывать `fsync` при каждой записи в журнал координации (по умолчанию: true).
- `four_letter_word_white_list` — список разрешенных 4-х буквенных команд (по умолчанию: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro").
- `fresh_log_gap` — минимальное отставание от лидера в количестве записей журнала после которого последователь считает себя актуальным (по умолчанию: 200).
- `heart_beat_interval_ms` — частота, с которой узел-лидер ClickHouse Keeper отправляет контрольные сигналы узлам-последователям, в миллисекундах (по умолчанию: 500).
- `max_requests_batch_size` — количество запросов на запись, которые будут сгруппированы в один перед отправкой через RAFT (по умолчанию: 100).
- `min_session_timeout_ms` — Min timeout for client session (ms) (default: 10000).
- `operation_timeout_ms` — максимальное время ожидания для одной клиентской операции в миллисекундах (по умолчанию: 10000).
- `quorum_reads` — выполнять запросы чтения аналогично запросам записи через консенсус RAFT (по умолчанию: false).
- `raft_logs_level` — уровень логгирования сообщений в текстовый лог (trace, debug и т. д.) (по умолчанию: default).
- `reserved_log_items` — минимальное количество записей в журнале координации которые нужно сохранять после снятия снепшота (по умолчанию: 100000).
- `rotate_log_storage_interval` — количество записей в журнале координации для хранения в одном файле (по умолчанию: 100000).
- `session_timeout_ms` — максимальное время ожидания для клиентской сессии в миллисекундах (по умолчанию: 30000).
- `shutdown_timeout` — время ожидания завершения внутренних подключений при выключении, в миллисекундах (по умолчанию: 5000).
- `snapshot_distance` — частота, с которой ClickHouse Keeper делает новые снэпшоты (по количеству записей в журналах) (по умолчанию: 100000).
- `snapshots_to_keep` — количество снэпшотов для хранения (по умолчанию: 3).
- `stale_log_gap` — время, после которого лидер считает последователя отставшим и отправляет ему снэпшот вместо журналов (по умолчанию: 10000).
- `startup_timeout` — время отключения сервера, если он не подключается к другим участникам кворума, в миллисекундах (по умолчанию: 30000).
- `four_letter_word_allow_list` — список разрешенных 4-х буквенных команд (по умолчанию: "conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro").
Конфигурация кворума находится в `<keeper_server>.<raft_configuration>` и содержит описание серверов.
@ -67,6 +67,10 @@ ClickHouse Keeper может использоваться как равноце
- `port` — порт, на котором серверу доступны соединения для внутренней коммуникации.
:::note
В случае изменения топологии кластера ClickHouse Keeper(например, замены сервера), удостоверьтесь, что вы сохраняеете отношение `server_id` - `hostname`, не переиспользуете существующие `server_id` для для новых серверов и не перемешиваете идентификаторы. Подобные ошибки могут случаться, если вы используете автоматизацию при разворачивании кластера без логики сохранения идентификаторов.
:::
Примеры конфигурации кворума с тремя узлами можно найти в [интеграционных тестах](https://github.com/ClickHouse/ClickHouse/tree/master/tests/integration) с префиксом `test_keeper_`. Пример конфигурации для сервера №1:
```xml
@ -314,4 +318,31 @@ clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 --
4. Скопируйте снэпшот на узлы сервера ClickHouse с настроенным `keeper` или запустите ClickHouse Keeper вместо ZooKeeper. Снэпшот должен сохраняться на всех узлах: в противном случае пустые узлы могут захватить лидерство и сконвертированные данные могут быть отброшены на старте.
## Восстановление после потери кворума
Так как ClickHouse Keeper основан на протоколе Raft, он может оставаться работоспособным при отказе определенного количества нод в зависимости от размера кластера.
Например, для кластера из 3 нод, алгоритм кворума продолжает работать при отказе не более чем одной ноды.
Конфигурация кластера может быть изменена динамически с некоторыми ограничениями.
Переконфигурация также использует Raft, поэтому для добавление новой ноды кластера или исключения старой ноды из него требуется достижения кворума в рамках текущей конфигурации кластера.
Если в вашем кластере произошел отказ большего числа нод, чем допускает Raft для вашей текущей конфигурации и у вас нет возможности восстановить их работоспособность, Raft перестанет работать и не позволит изменить конфигурацию стандартным механизмом.
Тем не менее ClickHousr Keeper имеет возможность запуститься в режиме восстановления, который позволяет переконфигурировать класте используя только одну ноду кластера.
Этот механизм может использоваться только как крайняя мера, когда вы не можете восстановить существующие ноды кластера или запустить новый сервер с тем же идентификатором.
Важно:
- Удостоверьтесь, что отказавшие ноды не смогут в дальнейшем подключиться к кластеру в будущем.
- Не запускайте новые ноды, пока не завешите процедуру ниже.
После того, как выполнили действия выше выполните следующие шаги.
1. Выберете одну ноду Keeper, которая станет новым лидером. Учтите, что данные которые с этой ноды будут испольщзованы всем кластером, поэтому рекомендуется выбрать ноду с наиболее актуальным состоянием.
2. Перед дальнейшими действиям сделайте резервную копию данных из директорий `log_storage_path` и `snapshot_storage_path`.
3. Измените настройки на всех нодах кластера, которые вы собираетесь использовать.
4. Отправьте команду `rcvr` на ноду, которую вы выбрали или остановите ее и запустите заново с аргументом `--force-recovery`. Это переведет ноду в режим восстановления.
5. Запускайте остальные ноды кластера по одной и проверяйте, что команда `mntr` возвращает `follower` в выводе состояния `zk_server_state` перед тем, как запустить следующую ноду.
6. Пока нода работает в режиме восстановления, лидер будет возвращать ошибку на запрос `mntr` пока кворум не будет достигнут с помощью новых нод. Любые запросы от клиентов и постедователей будут возвращать ошибку.
7. После достижения кворума лидер перейдет в нормальный режим работы и станет обрабатывать все запросы через Raft. Удостоверьтесь, что запрос `mntr` возвращает `leader` в выводе состояния `zk_server_state`.
[Original article](https://clickhouse.com/docs/en/operations/clickhouse-keeper/) <!--hide-->

View File

@ -174,22 +174,24 @@ SELECT test_function_sum(2, 2);
Создание `test_function_sum_json` с именноваными аргументами и форматом [JSONEachRow](../../interfaces/formats.md#jsoneachrow) с использованием конфигурации XML.
Файл test_function.xml.
```xml
<function>
<type>executable</type>
<name>test_function_sum_json</name>
<return_type>UInt64</return_type>
<return_name>result_name</return_name>
<argument>
<type>UInt64</type>
<name>argument_1</name>
</argument>
<argument>
<type>UInt64</type>
<name>argument_2</name>
</argument>
<format>JSONEachRow</format>
<command>test_function_sum_json.py</command>
</function>
<functions>
<function>
<type>executable</type>
<name>test_function_sum_json</name>
<return_type>UInt64</return_type>
<return_name>result_name</return_name>
<argument>
<type>UInt64</type>
<name>argument_1</name>
</argument>
<argument>
<type>UInt64</type>
<name>argument_2</name>
</argument>
<format>JSONEachRow</format>
<command>test_function_sum_json.py</command>
</function>
</functions>
```
Файл скрипта внутри папки `user_scripts` `test_function_sum_json.py`.
@ -224,6 +226,50 @@ SELECT test_function_sum_json(2, 2);
└──────────────────────────────┘
```
Исполняемые пользовательские функции могут принимать константные параметры, их конфигурация является частью настройки `command` (работает только для пользовательских функций с типом `executable`).
Файл test_function_parameter_python.xml.
```xml
<functions>
<function>
<type>executable</type>
<name>test_function_parameter_python</name>
<return_type>String</return_type>
<argument>
<type>UInt64</type>
</argument>
<format>TabSeparated</format>
<command>test_function_parameter_python.py {test_parameter:UInt64}</command>
</function>
</functions>
```
Файл скрипта внутри папки `user_scripts` `test_function_parameter_python.py`.
```python
#!/usr/bin/python3
import sys
if __name__ == "__main__":
for line in sys.stdin:
print("Parameter " + str(sys.argv[1]) + " value " + str(line), end="")
sys.stdout.flush()
```
Query:
``` sql
SELECT test_function_parameter_python(1)(2);
```
Result:
``` text
┌─test_function_parameter_python(1)(2)─┐
│ Parameter 1 value 2 │
└──────────────────────────────────────┘
```
## Обработка ошибок {#obrabotka-oshibok}
Некоторые функции могут кидать исключения в случае ошибочных данных. В этом случае, выполнение запроса прерывается, и текст ошибки выводится клиенту. При распределённой обработке запроса, при возникновении исключения на одном из серверов, на другие серверы пытается отправиться просьба тоже прервать выполнение запроса.

View File

@ -45,7 +45,7 @@ CHECK TABLE test_table;
└───────────┴───────────┴─────────┘
```
Если `check_query_single_value_result` = 0, запрос `CHECK TABLE` возвращает статус таблицы в целом.
Если `check_query_single_value_result` = 1, запрос `CHECK TABLE` возвращает статус таблицы в целом.
```sql
SET check_query_single_value_result = 1;

View File

@ -41,6 +41,10 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试
- Ruby
- [ClickHouse (Ruby)](https://github.com/shlima/click_house)
- [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord)
- Rust
- [clickhouse.rs](https://github.com/loyd/clickhouse.rs)
- [clickhouse-rs](https://github.com/suharev7/clickhouse-rs)
- [Klickhouse](https://github.com/Protryon/klickhouse)
- R
- [clickhouse-r](https://github.com/hannesmuehleisen/clickhouse-r)
- [RClickHouse](https://github.com/IMSMWU/RClickHouse)

View File

@ -5,7 +5,7 @@
#include <sys/stat.h>
#include <pwd.h>
#if defined(__linux__)
#if defined(OS_LINUX)
#include <syscall.h>
#include <linux/capability.h>
#endif
@ -789,7 +789,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
* then attempt to run this file will end up with a cryptic "Operation not permitted" message.
*/
#if defined(__linux__)
#if defined(OS_LINUX)
fmt::print("Setting capabilities for clickhouse binary. This is optional.\n");
std::string command = fmt::format("command -v setcap >/dev/null"
" && command -v capsh >/dev/null"

View File

@ -2,7 +2,7 @@
#include <csetjmp>
#include <unistd.h>
#ifdef __linux__
#ifdef OS_LINUX
#include <sys/mman.h>
#endif
@ -339,7 +339,7 @@ struct Checker
checkRequiredInstructions();
}
} checker
#ifndef __APPLE__
#ifndef OS_DARWIN
__attribute__((init_priority(101))) /// Run before other static initializers.
#endif
;

View File

@ -11,7 +11,7 @@
#include <pcg_random.hpp>
#include <Common/thread_local_rng.h>
#if !defined(__APPLE__) && !defined(__FreeBSD__)
#if !defined(OS_DARWIN) && !defined(OS_FREEBSD)
#include <malloc.h>
#endif

View File

@ -1,4 +1,4 @@
#if defined(__ELF__) && !defined(__FreeBSD__)
#if defined(__ELF__) && !defined(OS_FREEBSD)
/*
* Copyright 2012-present Facebook, Inc.

View File

@ -1,6 +1,6 @@
#pragma once
#if defined(__ELF__) && !defined(__FreeBSD__)
#if defined(__ELF__) && !defined(OS_FREEBSD)
/*
* Copyright 2012-present Facebook, Inc.

View File

@ -1,4 +1,4 @@
#if defined(__ELF__) && !defined(__FreeBSD__)
#if defined(__ELF__) && !defined(OS_FREEBSD)
#include <Common/Elf.h>
#include <Common/Exception.h>

View File

@ -1,6 +1,6 @@
#pragma once
#if defined(__ELF__) && !defined(__FreeBSD__)
#if defined(__ELF__) && !defined(OS_FREEBSD)
#include <IO/MMapReadBufferFromFile.h>

View File

@ -630,6 +630,7 @@
M(659, UNKNOWN_STATUS_OF_TRANSACTION) \
M(660, HDFS_ERROR) \
M(661, CANNOT_SEND_SIGNAL) \
M(662, FS_METADATA_ERROR) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

View File

@ -218,7 +218,7 @@ static void getNoSpaceLeftInfoMessage(std::filesystem::path path, String & msg)
formatReadableQuantity(fs.f_favail),
mount_point);
#if defined(__linux__)
#if defined(OS_LINUX)
msg += "\nFilesystem: " + getFilesystemName(mount_point);
#endif
}
@ -230,7 +230,7 @@ static void getNoSpaceLeftInfoMessage(std::filesystem::path path, String & msg)
*/
static void getNotEnoughMemoryMessage(std::string & msg)
{
#if defined(__linux__)
#if defined(OS_LINUX)
try
{
static constexpr size_t buf_size = 1024;
@ -261,7 +261,7 @@ static void getNotEnoughMemoryMessage(std::string & msg)
}
}
if (num_maps > max_map_count * 0.99)
if (num_maps > max_map_count * 0.90)
{
msg += fmt::format(
"\nIt looks like that the process is near the limit on number of virtual memory mappings."

View File

@ -30,6 +30,11 @@ namespace
}
}
static bool isQueryInitialized()
{
return CurrentThread::isInitialized() && CurrentThread::get().getQueryContext() && CurrentThread::getQueryId().size != 0;
}
IFileCache::IFileCache(
const String & cache_base_path_,
const FileCacheSettings & cache_settings_)
@ -37,6 +42,7 @@ IFileCache::IFileCache(
, max_size(cache_settings_.max_size)
, max_element_size(cache_settings_.max_elements)
, max_file_segment_size(cache_settings_.max_file_segment_size)
, enable_filesystem_query_cache_limit(cache_settings_.enable_filesystem_query_cache_limit)
{
}
@ -59,9 +65,7 @@ String IFileCache::getPathInLocalCache(const Key & key)
bool IFileCache::isReadOnly()
{
return !CurrentThread::isInitialized()
|| !CurrentThread::get().getQueryContext()
|| CurrentThread::getQueryId().size == 0;
return (!isQueryInitialized());
}
void IFileCache::assertInitialized() const
@ -70,6 +74,73 @@ void IFileCache::assertInitialized() const
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cache not initialized");
}
IFileCache::QueryContextPtr IFileCache::getCurrentQueryContext(std::lock_guard<std::mutex> & cache_lock)
{
if (!isQueryInitialized())
return nullptr;
return getQueryContext(CurrentThread::getQueryId().toString(), cache_lock);
}
IFileCache::QueryContextPtr IFileCache::getQueryContext(const String & query_id, std::lock_guard<std::mutex> &)
{
auto query_iter = query_map.find(query_id);
return (query_iter == query_map.end()) ? nullptr : query_iter->second;
}
void IFileCache::removeQueryContext(const String & query_id)
{
std::lock_guard cache_lock(mutex);
auto query_iter = query_map.find(query_id);
if (query_iter == query_map.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to release query context that does not exist");
query_map.erase(query_iter);
}
IFileCache::QueryContextPtr IFileCache::getOrSetQueryContext(const String & query_id, const ReadSettings & settings, std::lock_guard<std::mutex> & cache_lock)
{
if (query_id.empty())
return nullptr;
auto context = getQueryContext(query_id, cache_lock);
if (!context)
{
auto query_iter = query_map.insert({query_id, std::make_shared<QueryContext>(settings.max_query_cache_size, settings.skip_download_if_exceeds_query_cache)}).first;
context = query_iter->second;
}
return context;
}
IFileCache::QueryContextHolder IFileCache::getQueryContextHolder(const String & query_id, const ReadSettings & settings)
{
std::lock_guard cache_lock(mutex);
/// if enable_filesystem_query_cache_limit is true, and max_query_cache_size large than zero,
/// we create context query for current query.
if (enable_filesystem_query_cache_limit && settings.max_query_cache_size)
{
auto context = getOrSetQueryContext(query_id, settings, cache_lock);
return QueryContextHolder(query_id, this, context);
}
else
return QueryContextHolder();
}
IFileCache::QueryContextHolder::QueryContextHolder(const String & query_id_, IFileCache * cache_, IFileCache::QueryContextPtr context_)
: query_id(query_id_), cache(cache_), context(context_)
{
}
IFileCache::QueryContextHolder::~QueryContextHolder()
{
/// If only the query_map and the current holder hold the context_query,
/// the query has been completed and the query_context is released.
if (context && context.use_count() == 2)
cache->removeQueryContext(query_id);
}
LRUFileCache::LRUFileCache(const String & cache_base_path_, const FileCacheSettings & cache_settings_)
: IFileCache(cache_base_path_, cache_settings_)
, max_stash_element_size(cache_settings_.max_elements)
@ -480,8 +551,170 @@ FileSegmentsHolder LRUFileCache::setDownloading(const Key & key, size_t offset,
return FileSegmentsHolder(std::move(file_segments));
}
bool LRUFileCache::tryReserve(
const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
bool LRUFileCache::tryReserve(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
{
auto query_context = enable_filesystem_query_cache_limit ? getCurrentQueryContext(cache_lock) : nullptr;
/// If the context can be found, subsequent cache replacements are made through the Query context.
if (query_context)
{
auto res = tryReserveForQuery(key, offset, size, query_context, cache_lock);
switch (res)
{
case ReserveResult::FITS_IN_QUERY_LIMIT_AND_RESERVATION_COMPLETED :
{
/// When the maximum cache size of the query is reached, the cache will be
/// evicted from the history cache accessed by the current query.
return true;
}
case ReserveResult::EXCEEDS_QUERY_LIMIT :
{
/// The query currently does not have enough space to reserve.
/// It returns false and reads data directly from the remote fs.
return false;
}
case ReserveResult::FITS_IN_QUERY_LIMIT_NEED_RESERVE_FROM_MAIN_LIST :
{
/// When the maximum cache capacity of the request is not reached, the cache
/// block is evicted from the main LRU queue.
return tryReserveForMainList(key, offset, size, query_context, cache_lock);
}
}
__builtin_unreachable();
}
else
{
return tryReserveForMainList(key, offset, size, query_context, cache_lock);
}
}
LRUFileCache::ReserveResult LRUFileCache::tryReserveForQuery(const Key & key, size_t offset, size_t size, QueryContextPtr query_context, std::lock_guard<std::mutex> & cache_lock)
{
/// The maximum cache capacity of the request is not reached, thus the
//// cache block is evicted from the main LRU queue by tryReserveForMainList().
if (query_context->getCacheSize() + size <= query_context->getMaxCacheSize())
{
return ReserveResult::FITS_IN_QUERY_LIMIT_NEED_RESERVE_FROM_MAIN_LIST;
}
/// When skip_download_if_exceeds_query_cache is true, there is no need
/// to evict old data, skip the cache and read directly from remote fs.
else if (query_context->isSkipDownloadIfExceed())
{
return ReserveResult::EXCEEDS_QUERY_LIMIT;
}
/// The maximum cache size of the query is reached, the cache will be
/// evicted from the history cache accessed by the current query.
else
{
size_t removed_size = 0;
size_t queue_size = queue.getElementsNum(cache_lock);
auto * cell_for_reserve = getCell(key, offset, cache_lock);
std::vector<IFileCache::LRUQueue::Iterator> ghost;
std::vector<FileSegmentCell *> trash;
std::vector<FileSegmentCell *> to_evict;
auto is_overflow = [&]
{
return (max_size != 0 && queue.getTotalWeight(cache_lock) + size - removed_size > max_size)
|| (max_element_size != 0 && queue_size > max_element_size)
|| (query_context->getCacheSize() + size - removed_size > query_context->getMaxCacheSize());
};
/// Select the cache from the LRU queue held by query for expulsion.
for (auto iter = query_context->queue().begin(); iter != query_context->queue().end(); iter++)
{
if (!is_overflow())
break;
auto * cell = getCell(iter->key, iter->offset, cache_lock);
if (!cell)
{
/// The cache corresponding to this record may be swapped out by
/// other queries, so it has become invalid.
ghost.push_back(iter);
removed_size += iter->size;
}
else
{
size_t cell_size = cell->size();
assert(iter->size == cell_size);
if (cell->releasable())
{
auto & file_segment = cell->file_segment;
std::lock_guard segment_lock(file_segment->mutex);
switch (file_segment->download_state)
{
case FileSegment::State::DOWNLOADED:
{
to_evict.push_back(cell);
break;
}
default:
{
trash.push_back(cell);
break;
}
}
removed_size += cell_size;
--queue_size;
}
}
}
assert(trash.empty());
for (auto & cell : trash)
{
auto file_segment = cell->file_segment;
if (file_segment)
{
query_context->remove(file_segment->key(), file_segment->offset(), cell->size(), cache_lock);
std::lock_guard segment_lock(file_segment->mutex);
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
}
}
for (auto & iter : ghost)
query_context->remove(iter->key, iter->offset, iter->size, cache_lock);
if (is_overflow())
{
return ReserveResult::EXCEEDS_QUERY_LIMIT;
}
if (cell_for_reserve)
{
auto queue_iterator = cell_for_reserve->queue_iterator;
if (queue_iterator)
queue.incrementSize(*queue_iterator, size, cache_lock);
else
cell_for_reserve->queue_iterator = queue.add(key, offset, size, cache_lock);
}
for (auto & cell : to_evict)
{
auto file_segment = cell->file_segment;
if (file_segment)
{
query_context->remove(file_segment->key(), file_segment->offset(), cell->size(), cache_lock);
std::lock_guard<std::mutex> segment_lock(file_segment->mutex);
remove(file_segment->key(), file_segment->offset(), cache_lock, segment_lock);
}
}
query_context->reserve(key, offset, size, cache_lock);
return ReserveResult::FITS_IN_QUERY_LIMIT_NEED_RESERVE_FROM_MAIN_LIST;
}
}
bool LRUFileCache::tryReserveForMainList(
const Key & key, size_t offset, size_t size, QueryContextPtr query_context, std::lock_guard<std::mutex> & cache_lock)
{
auto removed_size = 0;
size_t queue_size = queue.getElementsNum(cache_lock);
@ -595,6 +828,9 @@ bool LRUFileCache::tryReserve(
if (queue.getTotalWeight(cache_lock) > (1ull << 63))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache became inconsistent. There must be a bug");
if (query_context)
query_context->reserve(key, offset, size, cache_lock);
return true;
}
@ -616,13 +852,18 @@ void LRUFileCache::remove(const Key & key)
for (auto & [offset, cell] : offsets)
to_remove.push_back(&cell);
bool some_cells_were_skipped = false;
for (auto & cell : to_remove)
{
/// In ordinary case we remove data from cache when it's not used by anyone.
/// But if we have multiple replicated zero-copy tables on the same server
/// it became possible to start removing something from cache when it is used
/// by other "zero-copy" tables. That is why it's not an error.
if (!cell->releasable())
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot remove file from cache because someone reads from it. File segment info: {}",
cell->file_segment->getInfoForLog());
{
some_cells_were_skipped = true;
continue;
}
auto file_segment = cell->file_segment;
if (file_segment)
@ -634,10 +875,13 @@ void LRUFileCache::remove(const Key & key)
auto key_path = getPathInLocalCache(key);
files.erase(key);
if (!some_cells_were_skipped)
{
files.erase(key);
if (fs::exists(key_path))
fs::remove(key_path);
if (fs::exists(key_path))
fs::remove(key_path);
}
}
void LRUFileCache::remove()
@ -844,7 +1088,6 @@ FileSegments LRUFileCache::getSnapshot() const
for (const auto & [offset, cell] : cells_by_offset)
file_segments.push_back(FileSegment::getSnapshot(cell.file_segment, cache_lock));
}
return file_segments;
}
@ -930,7 +1173,7 @@ LRUFileCache::FileSegmentCell::FileSegmentCell(
}
}
LRUFileCache::LRUQueue::Iterator LRUFileCache::LRUQueue::add(
IFileCache::LRUQueue::Iterator IFileCache::LRUQueue::add(
const IFileCache::Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & /* cache_lock */)
{
#ifndef NDEBUG
@ -948,30 +1191,30 @@ LRUFileCache::LRUQueue::Iterator LRUFileCache::LRUQueue::add(
return queue.insert(queue.end(), FileKeyAndOffset(key, offset, size));
}
void LRUFileCache::LRUQueue::remove(Iterator queue_it, std::lock_guard<std::mutex> & /* cache_lock */)
void IFileCache::LRUQueue::remove(Iterator queue_it, std::lock_guard<std::mutex> & /* cache_lock */)
{
cache_size -= queue_it->size;
queue.erase(queue_it);
}
void LRUFileCache::LRUQueue::removeAll(std::lock_guard<std::mutex> & /* cache_lock */)
void IFileCache::LRUQueue::removeAll(std::lock_guard<std::mutex> & /* cache_lock */)
{
queue.clear();
cache_size = 0;
}
void LRUFileCache::LRUQueue::moveToEnd(Iterator queue_it, std::lock_guard<std::mutex> & /* cache_lock */)
void IFileCache::LRUQueue::moveToEnd(Iterator queue_it, std::lock_guard<std::mutex> & /* cache_lock */)
{
queue.splice(queue.end(), queue, queue_it);
}
void LRUFileCache::LRUQueue::incrementSize(Iterator queue_it, size_t size_increment, std::lock_guard<std::mutex> & /* cache_lock */)
void IFileCache::LRUQueue::incrementSize(Iterator queue_it, size_t size_increment, std::lock_guard<std::mutex> & /* cache_lock */)
{
cache_size += size_increment;
queue_it->size += size_increment;
}
bool LRUFileCache::LRUQueue::contains(
bool IFileCache::LRUQueue::contains(
const IFileCache::Key & key, size_t offset, std::lock_guard<std::mutex> & /* cache_lock */) const
{
/// This method is used for assertions in debug mode.
@ -984,31 +1227,7 @@ bool LRUFileCache::LRUQueue::contains(
return false;
}
void LRUFileCache::LRUQueue::assertCorrectness(LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock)
{
[[maybe_unused]] size_t total_size = 0;
for (auto it = queue.begin(); it != queue.end();)
{
auto & [key, offset, size, _] = *it++;
auto * cell = cache->getCell(key, offset, cache_lock);
if (!cell)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cache is in inconsistent state: LRU queue contains entries with no cache cell (assertCorrectness())");
}
assert(cell->size() == size);
total_size += size;
}
assert(total_size == cache_size);
assert(cache_size <= cache->max_size);
assert(queue.size() <= cache->max_element_size);
}
String LRUFileCache::LRUQueue::toString(std::lock_guard<std::mutex> & /* cache_lock */) const
String IFileCache::LRUQueue::toString(std::lock_guard<std::mutex> & /* cache_lock */) const
{
String result;
for (const auto & [key, offset, size, _] : queue)
@ -1057,14 +1276,38 @@ void LRUFileCache::assertCacheCellsCorrectness(
void LRUFileCache::assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock)
{
assertCacheCellsCorrectness(files[key], cache_lock);
queue.assertCorrectness(this, cache_lock);
assertQueueCorrectness(cache_lock);
}
void LRUFileCache::assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock)
{
for (const auto & [key, cells_by_offset] : files)
assertCacheCellsCorrectness(files[key], cache_lock);
queue.assertCorrectness(this, cache_lock);
assertQueueCorrectness(cache_lock);
}
void LRUFileCache::assertQueueCorrectness(std::lock_guard<std::mutex> & cache_lock)
{
[[maybe_unused]] size_t total_size = 0;
for (auto it = queue.begin(); it != queue.end();)
{
auto & [key, offset, size, _] = *it++;
auto * cell = getCell(key, offset, cache_lock);
if (!cell)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cache is in inconsistent state: LRU queue contains entries with no cache cell (assertCorrectness())");
}
assert(cell->size() == size);
total_size += size;
}
assert(total_size == queue.getTotalWeight(cache_lock));
assert(queue.getTotalWeight(cache_lock) <= max_size);
assert(queue.getElementsNum(cache_lock) <= max_element_size);
}
}

View File

@ -12,6 +12,7 @@
#include <map>
#include "FileCache_fwd.h"
#include <IO/ReadSettings.h>
#include <Common/logger_useful.h>
#include <Common/FileSegment.h>
#include <Core/Types.h>
@ -20,6 +21,14 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
class IFileCache;
using FileCachePtr = std::shared_ptr<IFileCache>;
/**
* Local cache for remote filesystem files, represented as a set of non-overlapping non-empty file segments.
*/
@ -106,58 +115,6 @@ protected:
mutable std::mutex mutex;
virtual bool tryReserve(
const Key & key, size_t offset, size_t size,
std::lock_guard<std::mutex> & cache_lock) = 0;
virtual void remove(
Key key, size_t offset,
std::lock_guard<std::mutex> & cache_lock,
std::lock_guard<std::mutex> & segment_lock) = 0;
virtual bool isLastFileSegmentHolder(
const Key & key, size_t offset,
std::lock_guard<std::mutex> & cache_lock,
std::lock_guard<std::mutex> & segment_lock) = 0;
/// If file segment was partially downloaded and then space reservation fails (because of no
/// space left), then update corresponding cache cell metadata (file segment size).
virtual void reduceSizeToDownloaded(
const Key & key, size_t offset,
std::lock_guard<std::mutex> & cache_lock,
std::lock_guard<std::mutex> & segment_lock) = 0;
void assertInitialized() const;
};
using FileCachePtr = std::shared_ptr<IFileCache>;
class LRUFileCache final : public IFileCache
{
public:
LRUFileCache(
const String & cache_base_path_,
const FileCacheSettings & cache_settings_);
FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) override;
FileSegmentsHolder get(const Key & key, size_t offset, size_t size) override;
FileSegments getSnapshot() const override;
void initialize() override;
void remove(const Key & key) override;
void remove() override;
std::vector<String> tryGetCachePaths(const Key & key) override;
size_t getUsedCacheSize() const override;
size_t getFileSegmentsNum() const override;
private:
class LRUQueue
{
public:
@ -186,8 +143,6 @@ private:
/// Space reservation for a file segment is incremental, so we need to be able to increment size of the queue entry.
void incrementSize(Iterator queue_it, size_t size_increment, std::lock_guard<std::mutex> & cache_lock);
void assertCorrectness(LRUFileCache * cache, std::lock_guard<std::mutex> & cache_lock);
String toString(std::lock_guard<std::mutex> & cache_lock) const;
bool contains(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock) const;
@ -203,6 +158,171 @@ private:
size_t cache_size = 0;
};
using AccessKeyAndOffset = std::pair<Key, size_t>;
struct KeyAndOffsetHash
{
std::size_t operator()(const AccessKeyAndOffset & key) const
{
return std::hash<UInt128>()(key.first) ^ std::hash<UInt64>()(key.second);
}
};
using AccessRecord = std::unordered_map<AccessKeyAndOffset, LRUQueue::Iterator, KeyAndOffsetHash>;
/// Used to track and control the cache access of each query.
/// Through it, we can realize the processing of different queries by the cache layer.
struct QueryContext
{
LRUQueue lru_queue;
AccessRecord records;
size_t cache_size = 0;
size_t max_cache_size;
bool skip_download_if_exceeds_query_cache;
QueryContext(size_t max_cache_size_, bool skip_download_if_exceeds_query_cache_)
: max_cache_size(max_cache_size_)
, skip_download_if_exceeds_query_cache(skip_download_if_exceeds_query_cache_) {}
void remove(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
{
if (cache_size < size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Deleted cache size exceeds existing cache size");
if (!skip_download_if_exceeds_query_cache)
{
auto record = records.find({key, offset});
if (record != records.end())
{
lru_queue.remove(record->second, cache_lock);
records.erase({key, offset});
}
}
cache_size -= size;
}
void reserve(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
{
if (cache_size + size > max_cache_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Reserved cache size exceeds the remaining cache size");
if (!skip_download_if_exceeds_query_cache)
{
auto record = records.find({key, offset});
if (record == records.end())
{
auto queue_iter = lru_queue.add(key, offset, 0, cache_lock);
record = records.insert({{key, offset}, queue_iter}).first;
}
record->second->size += size;
}
cache_size += size;
}
void use(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock)
{
if (!skip_download_if_exceeds_query_cache)
{
auto record = records.find({key, offset});
if (record != records.end())
lru_queue.moveToEnd(record->second, cache_lock);
}
}
size_t getMaxCacheSize() { return max_cache_size; }
size_t getCacheSize() { return cache_size; }
LRUQueue & queue() { return lru_queue; }
bool isSkipDownloadIfExceed() { return skip_download_if_exceeds_query_cache; }
};
using QueryContextPtr = std::shared_ptr<QueryContext>;
using QueryContextMap = std::unordered_map<String, QueryContextPtr>;
QueryContextMap query_map;
bool enable_filesystem_query_cache_limit;
QueryContextPtr getCurrentQueryContext(std::lock_guard<std::mutex> & cache_lock);
QueryContextPtr getQueryContext(const String & query_id, std::lock_guard<std::mutex> & cache_lock);
void removeQueryContext(const String & query_id);
QueryContextPtr getOrSetQueryContext(const String & query_id, const ReadSettings & settings, std::lock_guard<std::mutex> &);
virtual bool tryReserve(
const Key & key, size_t offset, size_t size,
std::lock_guard<std::mutex> & cache_lock) = 0;
virtual void remove(
Key key, size_t offset,
std::lock_guard<std::mutex> & cache_lock,
std::lock_guard<std::mutex> & segment_lock) = 0;
virtual bool isLastFileSegmentHolder(
const Key & key, size_t offset,
std::lock_guard<std::mutex> & cache_lock,
std::lock_guard<std::mutex> & segment_lock) = 0;
/// If file segment was partially downloaded and then space reservation fails (because of no
/// space left), then update corresponding cache cell metadata (file segment size).
virtual void reduceSizeToDownloaded(
const Key & key, size_t offset,
std::lock_guard<std::mutex> & cache_lock,
std::lock_guard<std::mutex> & segment_lock) = 0;
void assertInitialized() const;
public:
/// Save a query context information, and adopt different cache policies
/// for different queries through the context cache layer.
struct QueryContextHolder : private boost::noncopyable
{
explicit QueryContextHolder(const String & query_id_, IFileCache * cache_, QueryContextPtr context_);
QueryContextHolder() = default;
~QueryContextHolder();
String query_id {};
IFileCache * cache = nullptr;
QueryContextPtr context = nullptr;
};
QueryContextHolder getQueryContextHolder(const String & query_id, const ReadSettings & settings);
};
class LRUFileCache final : public IFileCache
{
public:
LRUFileCache(
const String & cache_base_path_,
const FileCacheSettings & cache_settings_);
FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) override;
FileSegmentsHolder get(const Key & key, size_t offset, size_t size) override;
FileSegments getSnapshot() const override;
void initialize() override;
void remove(const Key & key) override;
void remove() override;
std::vector<String> tryGetCachePaths(const Key & key) override;
size_t getUsedCacheSize() const override;
size_t getFileSegmentsNum() const override;
private:
struct FileSegmentCell : private boost::noncopyable
{
FileSegmentPtr file_segment;
@ -227,26 +347,22 @@ private:
using FileSegmentsByOffset = std::map<size_t, FileSegmentCell>;
using CachedFiles = std::unordered_map<Key, FileSegmentsByOffset>;
using AccessKeyAndOffset = std::pair<Key, size_t>;
struct KeyAndOffsetHash
{
std::size_t operator()(const AccessKeyAndOffset & key) const
{
return std::hash<UInt128>()(key.first) ^ std::hash<UInt64>()(key.second);
}
};
using AccessRecord = std::unordered_map<AccessKeyAndOffset, LRUQueue::Iterator, KeyAndOffsetHash>;
CachedFiles files;
LRUQueue queue;
LRUQueue stash_queue;
AccessRecord records;
size_t max_stash_element_size;
size_t enable_cache_hits_threshold;
enum class ReserveResult
{
FITS_IN_QUERY_LIMIT_AND_RESERVATION_COMPLETED,
EXCEEDS_QUERY_LIMIT,
FITS_IN_QUERY_LIMIT_NEED_RESERVE_FROM_MAIN_LIST,
};
Poco::Logger * log;
FileSegments getImpl(
@ -266,6 +382,17 @@ private:
const Key & key, size_t offset, size_t size,
std::lock_guard<std::mutex> & cache_lock) override;
bool tryReserveForMainList(
const Key & key, size_t offset, size_t size,
QueryContextPtr query_context,
std::lock_guard<std::mutex> & cache_lock);
/// Limit the maximum cache size for current query.
LRUFileCache::ReserveResult tryReserveForQuery(
const Key & key, size_t offset, size_t size,
QueryContextPtr query_context,
std::lock_guard<std::mutex> & cache_lock);
void remove(
Key key, size_t offset,
std::lock_guard<std::mutex> & cache_lock,
@ -309,6 +436,8 @@ public:
void assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock);
void assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock);
void assertQueueCorrectness(std::lock_guard<std::mutex> & cache_lock);
};
}

View File

@ -11,6 +11,7 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &
max_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS);
max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE);
cache_on_write_operations = config.getUInt64(config_prefix + ".cache_on_write_operations", false);
enable_filesystem_query_cache_limit = config.getUInt64(config_prefix + ".enable_filesystem_query_cache_limit", false);
enable_cache_hits_threshold = config.getUInt64(config_prefix + ".enable_cache_hits_threshold", REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD);
}

View File

@ -13,6 +13,7 @@ struct FileCacheSettings
size_t max_elements = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS;
size_t max_file_segment_size = REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE;
bool cache_on_write_operations = false;
bool enable_filesystem_query_cache_limit = false;
size_t enable_cache_hits_threshold = REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD;

View File

@ -200,6 +200,7 @@ private:
const Range segment_range;
State download_state;
String downloader_id;
RemoteFileReaderPtr remote_file_reader;

View File

@ -16,7 +16,7 @@ namespace ErrorCodes
extern const int CANNOT_UNBLOCK_SIGNAL;
}
#ifdef __APPLE__
#ifdef OS_DARWIN
// We only need to support timeout = {0, 0} at this moment
static int sigtimedwait(const sigset_t *set, siginfo_t *info, const struct timespec * /*timeout*/)
{

View File

@ -9,13 +9,13 @@ namespace ErrorCodes
extern const int SYNTAX_ERROR;
}
Int32 IntervalKind::toAvgSeconds() const
Float64 IntervalKind::toAvgSeconds() const
{
switch (kind)
{
case IntervalKind::Nanosecond:
case IntervalKind::Microsecond:
case IntervalKind::Millisecond: return 0; /// fractional parts of seconds have 0 seconds
case IntervalKind::Nanosecond: return 0.000000001;
case IntervalKind::Microsecond: return 0.000001;
case IntervalKind::Millisecond: return 0.001;
case IntervalKind::Second: return 1;
case IntervalKind::Minute: return 60;
case IntervalKind::Hour: return 3600;
@ -28,6 +28,25 @@ Int32 IntervalKind::toAvgSeconds() const
__builtin_unreachable();
}
bool IntervalKind::isFixedLength() const
{
switch (kind)
{
case IntervalKind::Nanosecond:
case IntervalKind::Microsecond:
case IntervalKind::Millisecond:
case IntervalKind::Second:
case IntervalKind::Minute:
case IntervalKind::Hour:
case IntervalKind::Day:
case IntervalKind::Week: return true;
case IntervalKind::Month:
case IntervalKind::Quarter:
case IntervalKind::Year: return false;
}
__builtin_unreachable();
}
IntervalKind IntervalKind::fromAvgSeconds(Int64 num_seconds)
{
if (num_seconds)

View File

@ -31,12 +31,15 @@ struct IntervalKind
/// Returns number of seconds in one interval.
/// For `Month`, `Quarter` and `Year` the function returns an average number of seconds.
Int32 toAvgSeconds() const;
Float64 toAvgSeconds() const;
/// Chooses an interval kind based on number of seconds.
/// For example, `IntervalKind::fromAvgSeconds(3600)` returns `IntervalKind::Hour`.
static IntervalKind fromAvgSeconds(Int64 num_seconds);
/// Returns whether IntervalKind has a fixed number of seconds (e.g. Day) or non-fixed(e.g. Month)
bool isFixedLength() const;
/// Returns an uppercased version of what `toString()` returns.
const char * toKeyword() const;

View File

@ -27,7 +27,7 @@ void LazyPipeFDs::open()
if (fd >= 0)
throw Exception("Pipe is already opened", ErrorCodes::LOGICAL_ERROR);
#ifndef __APPLE__
#ifndef OS_DARWIN
if (0 != pipe2(fds_rw, O_CLOEXEC))
throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_PIPE);
#else

View File

@ -1,6 +1,6 @@
#include "ProcfsMetricsProvider.h"
#if defined(__linux__)
#if defined(OS_LINUX)
#include <Common/Exception.h>
#include <IO/ReadBufferFromMemory.h>

View File

@ -4,7 +4,7 @@
#include <boost/noncopyable.hpp>
#if defined(__linux__)
#if defined(OS_LINUX)
struct taskstats;
namespace DB
@ -19,7 +19,7 @@ public:
/// Updates only a part of taskstats struct's fields:
/// - cpu_run_virtual_total, cpu_delay_total (when /proc/thread-self/schedstat is available)
/// - blkio_delay_total (when /proc/thread-self/stat is available)
/// - rchar, wchar, read_bytes, write_bytes (when /prod/thread-self/io is available)
/// - rchar, wchar, read_bytes, write_bytes (when /proc/thread-self/io is available)
/// See: man procfs
void getTaskStats(::taskstats & out_stats) const;

View File

@ -2,7 +2,7 @@
#include <string.h>
#if !defined(__APPLE__) && !defined(__FreeBSD__)
#if !defined(OS_DARWIN) && !defined(OS_FREEBSD)
#include <malloc.h>
#endif
#include <algorithm>

View File

@ -33,7 +33,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, [[maybe_unused
else
error << "Address: " << info.si_addr;
#if defined(__x86_64__) && !defined(__FreeBSD__) && !defined(__APPLE__) && !defined(__arm__) && !defined(__powerpc__)
#if defined(__x86_64__) && !defined(OS_FREEBSD) && !defined(OS_DARWIN) && !defined(__arm__) && !defined(__powerpc__)
auto err_mask = context.uc_mcontext.gregs[REG_ERR];
if ((err_mask & 0x02))
error << " Access: write.";
@ -173,18 +173,18 @@ static void * getCallerAddress(const ucontext_t & context)
{
#if defined(__x86_64__)
/// Get the address at the time the signal was raised from the RIP (x86-64)
# if defined(__FreeBSD__)
# if defined(OS_FREEBSD)
return reinterpret_cast<void *>(context.uc_mcontext.mc_rip);
# elif defined(__APPLE__)
# elif defined(OS_DARWIN)
return reinterpret_cast<void *>(context.uc_mcontext->__ss.__rip);
# else
return reinterpret_cast<void *>(context.uc_mcontext.gregs[REG_RIP]);
# endif
#elif defined(__APPLE__) && defined(__aarch64__)
#elif defined(OS_DARWIN) && defined(__aarch64__)
return reinterpret_cast<void *>(context.uc_mcontext->__ss.__pc);
#elif defined(__FreeBSD__) && defined(__aarch64__)
#elif defined(OS_FREEBSD) && defined(__aarch64__)
return reinterpret_cast<void *>(context.uc_mcontext.mc_gpregs.gp_elr);
#elif defined(__aarch64__)
return reinterpret_cast<void *>(context.uc_mcontext.pc);
@ -201,7 +201,7 @@ void StackTrace::symbolize(
const StackTrace::FramePointers & frame_pointers, [[maybe_unused]] size_t offset,
size_t size, StackTrace::Frames & frames)
{
#if defined(__ELF__) && !defined(__FreeBSD__)
#if defined(__ELF__) && !defined(OS_FREEBSD)
auto symbol_index_ptr = DB::SymbolIndex::instance();
const DB::SymbolIndex & symbol_index = *symbol_index_ptr;
@ -332,7 +332,7 @@ static void toStringEveryLineImpl(
if (size == 0)
return callback("<Empty trace>");
#if defined(__ELF__) && !defined(__FreeBSD__)
#if defined(__ELF__) && !defined(OS_FREEBSD)
auto symbol_index_ptr = DB::SymbolIndex::instance();
const DB::SymbolIndex & symbol_index = *symbol_index_ptr;
std::unordered_map<std::string, DB::Dwarf> dwarfs;

View File

@ -9,7 +9,7 @@
#include <functional>
#include <signal.h>
#ifdef __APPLE__
#ifdef OS_DARWIN
// ucontext is not available without _XOPEN_SOURCE
# ifdef __clang__
# pragma clang diagnostic ignored "-Wreserved-id-macro"

View File

@ -1,4 +1,4 @@
#if defined(__ELF__) && !defined(__FreeBSD__)
#if defined(__ELF__) && !defined(OS_FREEBSD)
#include <Common/SymbolIndex.h>
#include <Common/hex.h>

View File

@ -1,6 +1,6 @@
#pragma once
#if defined(__ELF__) && !defined(__FreeBSD__)
#if defined(__ELF__) && !defined(OS_FREEBSD)
#include <vector>
#include <string>

View File

@ -1,6 +1,6 @@
#include "ThreadProfileEvents.h"
#if defined(__linux__)
#if defined(OS_LINUX)
#include "TaskStatsInfoGetter.h"
#include "ProcfsMetricsProvider.h"
@ -177,7 +177,7 @@ void TasksStatsCounters::incrementProfileEvents(const ::taskstats & prev, const
#endif
#if defined(__linux__)
#if defined(OS_LINUX)
namespace DB
{

View File

@ -8,7 +8,7 @@
#include <Common/logger_useful.h>
#if defined(__linux__)
#if defined(OS_LINUX)
#include <linux/taskstats.h>
#else
struct taskstats {};
@ -66,7 +66,7 @@ struct RUsageCounters
static RUsageCounters current()
{
::rusage rusage {};
#if !defined(__APPLE__)
#if !defined(OS_DARWIN)
#if defined(OS_SUNOS)
::getrusage(RUSAGE_LWP, &rusage);
#else
@ -102,7 +102,7 @@ private:
}
};
#if defined(__linux__)
#if defined(OS_LINUX)
struct PerfEventInfo
{
@ -171,7 +171,7 @@ extern PerfEventsCounters current_thread_counters;
#endif
#if defined(__linux__)
#if defined(OS_LINUX)
class TasksStatsCounters
{

View File

@ -841,6 +841,21 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &
return false;
}
void ZooKeeper::waitForEphemeralToDisappearIfAny(const std::string & path)
{
zkutil::EventPtr eph_node_disappeared = std::make_shared<Poco::Event>();
String content;
if (!tryGet(path, content, nullptr, eph_node_disappeared))
return;
int32_t timeout_ms = 2 * session_timeout_ms;
if (!eph_node_disappeared->tryWait(timeout_ms))
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR,
"Ephemeral node {} still exists after {}s, probably it's owned by someone else. "
"Either session_timeout_ms in client's config is different from server's config or it's a bug. "
"Node data: '{}'", path, timeout_ms / 1000, content);
}
ZooKeeperPtr ZooKeeper::startNewSession() const
{
return std::make_shared<ZooKeeper>(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log, get_priority_load_balancing);

View File

@ -240,6 +240,10 @@ public:
/// The function returns true if waited and false if waiting was interrupted by condition.
bool waitForDisappear(const std::string & path, const WaitCondition & condition = {});
/// Wait for the ephemeral node created in previous session to disappear.
/// Throws LOGICAL_ERROR if node still exists after 2x session_timeout.
void waitForEphemeralToDisappearIfAny(const std::string & path);
/// Async interface (a small subset of operations is implemented).
///
/// Usage:

View File

@ -9,7 +9,7 @@ void write(size_t x, WriteBuffer & out)
writeBinary(x, out);
}
#ifdef __APPLE__
#ifdef OS_DARWIN
void write(uint64_t x, WriteBuffer & out)
{
x = __builtin_bswap64(x);
@ -71,7 +71,7 @@ void write(const Error & x, WriteBuffer & out)
write(static_cast<int32_t>(x), out);
}
#ifdef __APPLE__
#ifdef OS_DARWIN
void read(uint64_t & x, ReadBuffer & in)
{
readBinary(x, in);

View File

@ -16,7 +16,7 @@ using namespace DB;
void write(size_t x, WriteBuffer & out);
/// uint64_t != size_t on darwin
#ifdef __APPLE__
#ifdef OS_DARWIN
void write(uint64_t x, WriteBuffer & out);
#endif
@ -45,7 +45,7 @@ void write(const std::vector<T> & arr, WriteBuffer & out)
}
void read(size_t & x, ReadBuffer & in);
#ifdef __APPLE__
#ifdef OS_DARWIN
void read(uint64_t & x, ReadBuffer & in);
#endif
void read(int64_t & x, ReadBuffer & in);

View File

@ -21,7 +21,7 @@ namespace ErrorCodes
}
#if defined(__linux__)
#if defined(OS_LINUX)
#include <unistd.h>
#include <fcntl.h>
@ -101,7 +101,7 @@ bool supportsAtomicRename()
}
#elif defined(__APPLE__)
#elif defined(OS_DARWIN)
// Includes
#include <dlfcn.h> // For dlsym

View File

@ -5,7 +5,7 @@
#include <pthread.h>
#include <cstdint>
#if defined(__FreeBSD__)
#if defined(OS_FREEBSD)
# include <pthread_np.h>
#endif
@ -48,7 +48,7 @@ size_t getStackSize(void ** out_address)
address = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(pthread_get_stackaddr_np(thread)) - size);
#else
pthread_attr_t attr;
# if defined(__FreeBSD__) || defined(OS_SUNOS)
# if defined(OS_FREEBSD) || defined(OS_SUNOS)
pthread_attr_init(&attr);
if (0 != pthread_attr_get_np(pthread_self(), &attr))
throwFromErrno("Cannot pthread_attr_get_np", ErrorCodes::CANNOT_PTHREAD_ATTR);

View File

@ -16,7 +16,7 @@
static void setAffinity()
{
#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__sun)
#if !defined(OS_DARWIN) && !defined(OS_FREEBSD) && !defined(__sun)
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(0, &mask);
@ -283,7 +283,7 @@ int main(int argc, char ** argv)
if (!method || method == 1) test<identity> (n, data.data(), "0: identity");
if (!method || method == 2) test<intHash32> (n, data.data(), "1: intHash32");
#if !defined(__APPLE__) /// The difference in size_t: unsigned long on Linux, unsigned long long on Mac OS.
#if !defined(OS_DARWIN) /// The difference in size_t: unsigned long on Linux, unsigned long long on Mac OS.
if (!method || method == 3) test<intHash64> (n, data.data(), "2: intHash64");
#endif
if (!method || method == 4) test<hash3> (n, data.data(), "3: two rounds");

View File

@ -1,4 +1,4 @@
#if defined(__linux__)
#if defined(OS_LINUX)
#include <Common/ProcfsMetricsProvider.h>
#include <iostream>
@ -6,7 +6,7 @@
#endif
#if defined(__linux__)
#if defined(OS_LINUX)
int main(int argc, char ** argv)
{
using namespace DB;

View File

@ -16,7 +16,7 @@ static NO_INLINE const void * getAddress()
int main(int argc, char ** argv)
{
#if defined(__ELF__) && !defined(__FreeBSD__)
#if defined(__ELF__) && !defined(OS_FREEBSD)
using namespace DB;
if (argc < 2)

View File

@ -1,6 +1,6 @@
#include "filesystemHelpers.h"
#if defined(__linux__)
#if defined(OS_LINUX)
# include <cstdio>
# include <mntent.h>
# include <sys/sysmacros.h>
@ -62,12 +62,12 @@ std::unique_ptr<TemporaryFile> createTemporaryFile(const std::string & path)
return std::make_unique<TemporaryFile>(path);
}
#if !defined(__linux__)
#if !defined(OS_LINUX)
[[noreturn]]
#endif
String getBlockDeviceId([[maybe_unused]] const String & path)
{
#if defined(__linux__)
#if defined(OS_LINUX)
struct stat sb;
if (lstat(path.c_str(), &sb))
throwFromErrnoWithPath("Cannot lstat " + path, path, ErrorCodes::CANNOT_STAT);
@ -79,12 +79,12 @@ String getBlockDeviceId([[maybe_unused]] const String & path)
#endif
}
#if !defined(__linux__)
#if !defined(OS_LINUX)
[[noreturn]]
#endif
BlockDeviceType getBlockDeviceType([[maybe_unused]] const String & device_id)
{
#if defined(__linux__)
#if defined(OS_LINUX)
try
{
ReadBufferFromFile in("/sys/dev/block/" + device_id + "/queue/rotational");
@ -101,12 +101,12 @@ BlockDeviceType getBlockDeviceType([[maybe_unused]] const String & device_id)
#endif
}
#if !defined(__linux__)
#if !defined(OS_LINUX)
[[noreturn]]
#endif
UInt64 getBlockDeviceReadAheadBytes([[maybe_unused]] const String & device_id)
{
#if defined(__linux__)
#if defined(OS_LINUX)
try
{
ReadBufferFromFile in("/sys/dev/block/" + device_id + "/queue/read_ahead_kb");
@ -155,12 +155,12 @@ std::filesystem::path getMountPoint(std::filesystem::path absolute_path)
}
/// Returns name of filesystem mounted to mount_point
#if !defined(__linux__)
#if !defined(OS_LINUX)
[[noreturn]]
#endif
String getFilesystemName([[maybe_unused]] const String & mount_point)
{
#if defined(__linux__)
#if defined(OS_LINUX)
FILE * mounted_filesystems = setmntent("/etc/mtab", "r");
if (!mounted_filesystems)
throw DB::Exception("Cannot open /etc/mtab to get name of filesystem", ErrorCodes::SYSTEM_ERROR);

View File

@ -19,7 +19,7 @@ bool enoughSpaceInDirectory(const std::string & path, size_t data_size);
std::unique_ptr<TemporaryFile> createTemporaryFile(const std::string & path);
// Determine what block device is responsible for specified path
#if !defined(__linux__)
#if !defined(OS_LINUX)
[[noreturn]]
#endif
String getBlockDeviceId([[maybe_unused]] const String & path);
@ -32,13 +32,13 @@ enum class BlockDeviceType
};
// Try to determine block device type
#if !defined(__linux__)
#if !defined(OS_LINUX)
[[noreturn]]
#endif
BlockDeviceType getBlockDeviceType([[maybe_unused]] const String & device_id);
// Get size of read-ahead in bytes for specified block device
#if !defined(__linux__)
#if !defined(OS_LINUX)
[[noreturn]]
#endif
UInt64 getBlockDeviceReadAheadBytes([[maybe_unused]] const String & device_id);
@ -47,7 +47,7 @@ UInt64 getBlockDeviceReadAheadBytes([[maybe_unused]] const String & device_id);
std::filesystem::path getMountPoint(std::filesystem::path absolute_path);
/// Returns name of filesystem mounted to mount_point
#if !defined(__linux__)
#if !defined(OS_LINUX)
[[noreturn]]
#endif
String getFilesystemName([[maybe_unused]] const String & mount_point);

View File

@ -11,7 +11,7 @@ int getCurrentProcessFDCount()
{
namespace fs = std::filesystem;
int result = -1;
#if defined(__linux__) || defined(__APPLE__)
#if defined(OS_LINUX) || defined(OS_DARWIN)
using namespace DB;
Int32 pid = getpid();

View File

@ -1,6 +1,6 @@
#include <Common/getHashOfLoadedBinary.h>
#if defined(__linux__)
#if defined(OS_LINUX)
#include <link.h>
#include <array>

View File

@ -1,7 +1,7 @@
#include "getMappedArea.h"
#include <Common/Exception.h>
#if defined(__linux__)
#if defined(OS_LINUX)
#include <Common/StringUtils/StringUtils.h>
#include <Common/hex.h>

View File

@ -9,7 +9,7 @@ int getMaxFileDescriptorCount()
{
namespace fs = std::filesystem;
int result = -1;
#if defined(__linux__) || defined(__APPLE__)
#if defined(OS_LINUX) || defined(OS_DARWIN)
using namespace DB;
if (fs::exists("/proc/sys/fs/file-max"))

View File

@ -1,4 +1,4 @@
#if defined(__linux__)
#if defined(OS_LINUX)
#include "hasLinuxCapability.h"

View File

@ -1,5 +1,5 @@
#pragma once
#if defined(__linux__)
#if defined(OS_LINUX)
#include <linux/capability.h>

View File

@ -7,6 +7,9 @@
#include <base/getThreadId.h>
#include <base/types.h>
#if defined(__linux__)
#include <sys/utsname.h>
#endif
namespace DB
{
@ -29,6 +32,15 @@ DB::UInt64 randomSeed()
hash.update(times.tv_nsec);
hash.update(times.tv_sec);
hash.update(getThreadId());
hash.update(&times);
/// It makes sense to add something like hostname to avoid seed collision when multiple servers start simultaneously.
/// But randomSeed() must be signal-safe and gethostname and similar functions are not.
/// Let's try to get utsname.nodename using uname syscall (it's signal-safe).
#if defined(__linux__)
struct utsname sysinfo;
if (uname(&sysinfo) == 0)
hash.update(sysinfo);
#endif
return hash.get64();
}

View File

@ -1,6 +1,6 @@
#include "remapExecutable.h"
#if defined(__linux__) && defined(__amd64__) && defined(__SSE2__) && !defined(SANITIZER) && defined(NDEBUG) && !defined(SPLIT_SHARED_LIBRARIES)
#if defined(OS_LINUX) && defined(__amd64__) && defined(__SSE2__) && !defined(SANITIZER) && defined(NDEBUG) && !defined(SPLIT_SHARED_LIBRARIES)
#include <sys/mman.h>
#include <unistd.h>

View File

@ -1,7 +1,7 @@
#include <pthread.h>
#if defined(__APPLE__) || defined(OS_SUNOS)
#elif defined(__FreeBSD__)
#if defined(OS_DARWIN) || defined(OS_SUNOS)
#elif defined(OS_FREEBSD)
#include <pthread_np.h>
#else
#include <sys/prctl.h>
@ -55,10 +55,10 @@ const char * getThreadName()
if (thread_name[0])
return thread_name;
#if defined(__APPLE__) || defined(OS_SUNOS)
#if defined(OS_DARWIN) || defined(OS_SUNOS)
if (pthread_getname_np(pthread_self(), thread_name, THREAD_NAME_SIZE))
throw DB::Exception("Cannot get thread name with pthread_getname_np()", DB::ErrorCodes::PTHREAD_ERROR);
#elif defined(__FreeBSD__)
#elif defined(OS_FREEBSD)
// TODO: make test. freebsd will have this function soon https://freshbsd.org/commit/freebsd/r337983
// if (pthread_get_name_np(pthread_self(), thread_name, THREAD_NAME_SIZE))
// throw DB::Exception("Cannot get thread name with pthread_get_name_np()", DB::ErrorCodes::PTHREAD_ERROR);

View File

@ -98,9 +98,10 @@ TEST(LRUFileCache, get)
DB::ThreadStatus thread_status;
/// To work with cache need query_id and query context.
std::string query_id = "query_id";
auto query_context = DB::Context::createCopy(getContext().context);
query_context->makeQueryContext();
query_context->setCurrentQueryId("query_id");
query_context->setCurrentQueryId(query_id);
DB::CurrentThread::QueryScope query_scope_holder(query_context);
DB::FileCacheSettings settings;
@ -513,4 +514,5 @@ TEST(LRUFileCache, get)
assertRange(49, segments1[1], DB::FileSegment::Range(10, 19), DB::FileSegment::State::EMPTY);
assertRange(50, segments1[2], DB::FileSegment::Range(20, 24), DB::FileSegment::State::EMPTY);
}
}

View File

@ -405,7 +405,7 @@ void Changelog::readChangelogAndInitWriter(uint64_t last_commited_log_index, uin
if (last_log_read_result->last_read_index == 0 || last_log_read_result->error) /// If it's broken log then remove it
{
LOG_INFO(log, "Removing log {} because it's empty or read finished with error", description.path);
LOG_INFO(log, "Removing chagelog {} because it's empty or read finished with error", description.path);
std::filesystem::remove(description.path);
existing_changelogs.erase(last_log_read_result->log_start_index);
std::erase_if(logs, [last_log_read_result] (const auto & item) { return item.first >= last_log_read_result->log_start_index; });

View File

@ -236,7 +236,7 @@ String MonitorCommand::run()
print(ret, "key_arena_size", state_machine.getKeyArenaSize());
print(ret, "latest_snapshot_size", state_machine.getLatestSnapshotBufSize());
#if defined(__linux__) || defined(__APPLE__)
#if defined(OS_LINUX) || defined(OS_DARWIN)
print(ret, "open_file_descriptor_count", getCurrentProcessFDCount());
print(ret, "max_file_descriptor_count", getMaxFileDescriptorCount());
#endif

View File

@ -12,6 +12,7 @@
#include <Coordination/pathUtils.h>
#include <filesystem>
#include <memory>
#include <Common/logger_useful.h>
namespace DB
{
@ -20,6 +21,7 @@ namespace ErrorCodes
{
extern const int UNKNOWN_FORMAT_VERSION;
extern const int UNKNOWN_SNAPSHOT;
extern const int LOGICAL_ERROR;
}
namespace
@ -296,6 +298,25 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
}
}
for (const auto & itr : storage.container)
{
if (itr.key != "/")
{
if (itr.value.stat.numChildren != static_cast<int32_t>(itr.value.getChildren().size()))
{
#ifdef NDEBUG
/// TODO (alesapin) remove this, it should be always CORRUPTED_DATA.
LOG_ERROR(&Poco::Logger::get("KeeperSnapshotManager"), "Children counter in stat.numChildren {}"
" is different from actual children size {} for node {}", itr.value.stat.numChildren, itr.value.getChildren().size(), itr.key);
#else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Children counter in stat.numChildren {}"
" is different from actual children size {} for node {}", itr.value.stat.numChildren, itr.value.getChildren().size(), itr.key);
#endif
}
}
}
size_t active_sessions_size;
readBinary(active_sessions_size, in);

View File

@ -13,7 +13,7 @@
#include <iomanip>
#include <mutex>
#include <functional>
#include <Common/logger_useful.h>
#include <base/defines.h>
namespace DB
{
@ -349,7 +349,9 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr
container.updateValue(parent_path, [child_path, zxid, &prev_parent_zxid,
parent_cversion, &prev_parent_cversion] (KeeperStorage::Node & parent)
{
++parent.stat.numChildren;
parent.addChild(child_path);
prev_parent_cversion = parent.stat.cversion;
prev_parent_zxid = parent.stat.pzxid;
@ -363,7 +365,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr
if (zxid > parent.stat.pzxid)
parent.stat.pzxid = zxid;
++parent.stat.numChildren;
chassert(parent.stat.numChildren == static_cast<int32_t>(parent.getChildren().size()));
});
response.path_created = path_created;
@ -385,6 +387,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr
undo_parent.stat.cversion = prev_parent_cversion;
undo_parent.stat.pzxid = prev_parent_zxid;
undo_parent.removeChild(child_path);
chassert(undo_parent.stat.numChildren == static_cast<int32_t>(undo_parent.getChildren().size()));
});
storage.container.erase(path_created);
@ -494,7 +497,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr
{
response.error = Coordination::Error::ZBADVERSION;
}
else if (it->value.stat.numChildren)
else if (!it->value.getChildren().empty())
{
response.error = Coordination::Error::ZNOTEMPTY;
}
@ -519,6 +522,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr
--parent.stat.numChildren;
++parent.stat.cversion;
parent.removeChild(child_basename);
chassert(parent.stat.numChildren == static_cast<int32_t>(parent.getChildren().size()));
});
response.error = Coordination::Error::ZOK;
@ -540,6 +544,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr
++parent.stat.numChildren;
--parent.stat.cversion;
parent.addChild(child_name);
chassert(parent.stat.numChildren == static_cast<int32_t>(parent.getChildren().size()));
});
};
}
@ -1110,6 +1115,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina
++parent.stat.cversion;
auto base_name = getBaseName(ephemeral_path);
parent.removeChild(base_name);
chassert(parent.stat.numChildren == static_cast<int32_t>(parent.getChildren().size()));
});
container.erase(ephemeral_path);

View File

@ -574,6 +574,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, enable_filesystem_cache_on_write_operations, false, "Write into cache on write operations. To actually work this setting requires be added to disk config too", 0) \
M(Bool, enable_filesystem_cache_log, false, "Allows to record the filesystem caching log for each query", 0) \
M(Bool, read_from_filesystem_cache_if_exists_otherwise_bypass_cache, false, "", 0) \
M(Bool, skip_download_if_exceeds_query_cache, true, "Skip download from remote filesystem if exceeds query cache size", 0) \
M(UInt64, max_query_cache_size, (128UL * 1024 * 1024 * 1024), "Max remote filesystem cache size that can be used by a single query", 0) \
\
M(Bool, use_structure_from_insertion_table_in_table_functions, false, "Use structure from insertion table instead of schema inference from data", 0) \
\

View File

@ -6,11 +6,24 @@
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include <Core/callOnTypeIndex.h>
#include <Core/SortDescription.h>
#include <Core/Block.h>
#include <Core/ColumnNumbers.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeUUID.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
#include "config_core.h"
@ -250,6 +263,36 @@ struct SimpleSortCursor : SortCursorHelper<SimpleSortCursor>
}
};
template <typename ColumnType>
struct SpecializedSingleColumnSortCursor : SortCursorHelper<SpecializedSingleColumnSortCursor<ColumnType>>
{
using SortCursorHelper<SpecializedSingleColumnSortCursor>::SortCursorHelper;
bool ALWAYS_INLINE greaterAt(const SortCursorHelper<SpecializedSingleColumnSortCursor> & rhs, size_t lhs_pos, size_t rhs_pos) const
{
auto & this_impl = this->impl;
auto & lhs_columns = this_impl->sort_columns;
auto & rhs_columns = rhs.impl->sort_columns;
assert(lhs_columns.size() == 1);
assert(rhs_columns.size() == 1);
const auto & lhs_column = assert_cast<const ColumnType &>(*lhs_columns[0]);
const auto & rhs_column = assert_cast<const ColumnType &>(*rhs_columns[0]);
const auto & desc = this->impl->desc[0];
int res = desc.direction * lhs_column.compareAt(lhs_pos, rhs_pos, rhs_column, desc.nulls_direction);
if (res > 0)
return true;
if (res < 0)
return false;
return this_impl->order > rhs.impl->order;
}
};
/// Separate comparator for locale-sensitive string comparisons
struct SortCursorWithCollation : SortCursorHelper<SortCursorWithCollation>
@ -411,6 +454,124 @@ private:
}
};
/** SortQueueVariants allow to specialize sorting queue for concrete types and sort description.
* To access queue callOnVariant method must be used.
*/
class SortQueueVariants
{
public:
SortQueueVariants() = default;
SortQueueVariants(const DataTypes & sort_description_types, const SortDescription & sort_description)
{
bool has_collation = false;
for (const auto & column_description : sort_description)
{
if (column_description.collator)
{
has_collation = true;
break;
}
}
if (has_collation)
{
queue_variants = SortingHeap<SortCursorWithCollation>();
return;
}
else if (sort_description.size() == 1)
{
TypeIndex column_type_index = sort_description_types[0]->getTypeId();
bool result = callOnIndexAndDataType<void>(
column_type_index,
[&](const auto & types)
{
using Types = std::decay_t<decltype(types)>;
using ColumnDataType = typename Types::LeftType;
using ColumnType = typename ColumnDataType::ColumnType;
queue_variants = SortingHeap<SpecializedSingleColumnSortCursor<ColumnType>>();
return true;
});
if (!result)
queue_variants = SortingHeap<SimpleSortCursor>();
}
else
{
queue_variants = SortingHeap<SortCursor>();
}
}
SortQueueVariants(const Block & header, const SortDescription & sort_description)
: SortQueueVariants(extractSortDescriptionTypesFromHeader(header, sort_description), sort_description)
{
}
template <typename Func>
decltype(auto) callOnVariant(Func && func)
{
return std::visit(func, queue_variants);
}
bool variantSupportJITCompilation() const
{
return std::holds_alternative<SortingHeap<SimpleSortCursor>>(queue_variants)
|| std::holds_alternative<SortingHeap<SortCursor>>(queue_variants)
|| std::holds_alternative<SortingHeap<SortCursorWithCollation>>(queue_variants);
}
private:
static DataTypes extractSortDescriptionTypesFromHeader(const Block & header, const SortDescription & sort_description)
{
size_t sort_description_size = sort_description.size();
DataTypes data_types(sort_description_size);
for (size_t i = 0; i < sort_description_size; ++i)
{
const auto & column_sort_description = sort_description[i];
data_types[i] = header.getByName(column_sort_description.column_name).type;
}
return data_types;
}
std::variant<
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<UInt8>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<UInt16>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<UInt32>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<UInt64>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<UInt128>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<UInt256>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<Int8>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<Int16>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<Int32>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<Int64>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<Int128>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<Int256>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<Float32>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<Float64>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnDecimal<Decimal32>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnDecimal<Decimal64>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnDecimal<Decimal128>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnDecimal<Decimal256>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnDecimal<DateTime64>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnVector<UUID>>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnString>>,
SortingHeap<SpecializedSingleColumnSortCursor<ColumnFixedString>>,
SortingHeap<SimpleSortCursor>,
SortingHeap<SortCursor>,
SortingHeap<SortCursorWithCollation>>
queue_variants;
};
template <typename TLeftColumns, typename TRightColumns>
bool less(const TLeftColumns & lhs, const TRightColumns & rhs, size_t i, size_t j, const SortDescriptionWithPositions & descr)
{

View File

@ -10,7 +10,7 @@
#include <sys/time.h>
#include <sys/wait.h>
#include <sys/resource.h>
#if defined(__linux__)
#if defined(OS_LINUX)
#include <sys/prctl.h>
#endif
#include <cerrno>
@ -858,7 +858,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
signal_listener = std::make_unique<SignalListener>(*this);
signal_listener_thread.start(*signal_listener);
#if defined(__ELF__) && !defined(__FreeBSD__)
#if defined(__ELF__) && !defined(OS_FREEBSD)
String build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex();
if (build_id_hex.empty())
build_id_info = "no build id";
@ -868,7 +868,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
build_id_info = "no build id";
#endif
#if defined(__linux__)
#if defined(OS_LINUX)
std::string executable_path = getExecutablePath();
if (!executable_path.empty())
@ -986,7 +986,7 @@ void BaseDaemon::setupWatchdog()
if (0 == pid)
{
logger().information("Forked a child process to watch");
#if defined(__linux__)
#if defined(OS_LINUX)
if (0 != prctl(PR_SET_PDEATHSIG, SIGKILL))
logger().warning("Cannot do prctl to ask termination with parent.");
#endif

View File

@ -149,7 +149,7 @@ void SentryWriter::onFault(int sig, const std::string & error_message, const Sta
sentry_set_tag("signal", strsignal(sig));
sentry_set_extra("signal_number", sentry_value_new_int32(sig));
#if defined(__ELF__) && !defined(__FreeBSD__)
#if defined(__ELF__) && !defined(OS_FREEBSD)
const String & build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex();
sentry_set_tag("build_id", build_id_hex.c_str());
#endif

View File

@ -84,6 +84,19 @@ std::string ExternalQueryBuilder::composeLoadAllQuery() const
}
else
{
/** In case UPDATE_FIELD is specified in {condition} for dictionary that must load all data.
* Replace {condition} with true_condition for initial dictionary load.
* For next dictionary loads {condition} will be updated with UPDATE_FIELD.
*/
static constexpr auto true_condition = "(1 = 1)";
auto condition_position = query.find(CONDITION_PLACEHOLDER_TO_REPLACE_VALUE);
if (condition_position != std::string::npos)
{
auto query_copy = query;
query_copy.replace(condition_position, CONDITION_PLACEHOLDER_TO_REPLACE_VALUE.size(), true_condition);
return query_copy;
}
return query;
}
}

Some files were not shown because too many files have changed in this diff Show More