Merge branch 'master' into interpolate-feature

This commit is contained in:
Yakov Olkhovskiy 2022-04-05 14:39:07 -04:00 committed by GitHub
commit 90c4cd3de7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
633 changed files with 13087 additions and 4485 deletions

View File

@ -16,7 +16,6 @@ Checks: '-*,
modernize-make-unique,
modernize-raw-string-literal,
modernize-redundant-void-arg,
modernize-replace-auto-ptr,
modernize-replace-random-shuffle,
modernize-use-bool-literals,
modernize-use-nullptr,
@ -145,6 +144,7 @@ Checks: '-*,
clang-analyzer-cplusplus.SelfAssignment,
clang-analyzer-deadcode.DeadStores,
clang-analyzer-cplusplus.Move,
clang-analyzer-optin.cplusplus.UninitializedObject,
clang-analyzer-optin.cplusplus.VirtualCall,
clang-analyzer-security.insecureAPI.UncheckedReturn,
clang-analyzer-security.insecureAPI.bcmp,
@ -164,6 +164,8 @@ Checks: '-*,
clang-analyzer-unix.cstring.NullArg,
boost-use-to-string,
alpha.security.cert.env.InvalidPtr,
'
WarningsAsErrors: '*'
@ -210,3 +212,6 @@ CheckOptions:
value: false
- key: performance-move-const-arg.CheckTriviallyCopyableMove
value: false
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
- key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp
value: expr-type

View File

@ -1,4 +1,4 @@
Changelog category (leave one):
### Changelog category (leave one):
- New Feature
- Improvement
- Bug Fix (user-visible misbehaviour in official stable or prestable release)
@ -9,7 +9,7 @@ Changelog category (leave one):
- Not for changelog (changelog entry is not required)
Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md):
### Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md):
...

View File

@ -149,7 +149,6 @@ jobs:
sudo rm -fr "$TEMP_PATH"
SplitBuildSmokeTest:
needs: [BuilderDebSplitted]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
@ -316,7 +315,6 @@ jobs:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinRelease:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -360,6 +358,51 @@ jobs:
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinGCC:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
CHECK_NAME=ClickHouse build check (actions)
BUILD_NAME=binary_gcc
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'true'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebAsan:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
@ -590,7 +633,6 @@ jobs:
##########################################################################################
BuilderDebSplitted:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -636,7 +678,6 @@ jobs:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinTidy:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -682,7 +723,6 @@ jobs:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinDarwin:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -728,7 +768,6 @@ jobs:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinAarch64:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -774,7 +813,6 @@ jobs:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinFreeBSD:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -820,7 +858,6 @@ jobs:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinDarwinAarch64:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -866,7 +903,6 @@ jobs:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinPPC64:
needs: [DockerHubPush]
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Set envs
@ -911,6 +947,34 @@ jobs:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
needs:
- BuilderDebRelease
- BuilderDebAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no version info
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head
python3 docker_server.py --release-type head --no-ubuntu \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
@ -918,6 +982,7 @@ jobs:
- BuilderDebRelease
- BuilderDebAarch64
- BuilderBinRelease
- BuilderBinGCC
- BuilderDebAsan
- BuilderDebTsan
- BuilderDebUBsan
@ -2608,6 +2673,40 @@ jobs:
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
UnitTestsReleaseGCC:
needs: [BuilderBinGCC]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-gcc, actions)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Unit test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 unit_tests_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
UnitTestsTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, fuzzer-unit-tester]

View File

@ -4,7 +4,7 @@ env:
# Force the stdout and stderr streams to be unbuffered
PYTHONUNBUFFERED: 1
on: # yamllint disable-line rule:truthy
on: # yamllint disable-line rule:truthy
pull_request:
types:
- synchronize
@ -370,6 +370,48 @@ jobs:
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinGCC:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
CHECK_NAME=ClickHouse build check (actions)
BUILD_NAME=binary_gcc
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/images_path
- name: Check out repository code
uses: actions/checkout@v2
with:
submodules: 'true'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_NAME }}
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderDebAarch64:
needs: [DockerHubPush, FastTest]
runs-on: [self-hosted, builder]
@ -956,6 +998,34 @@ jobs:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
needs:
- BuilderDebRelease
- BuilderDebAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no version info
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type head --no-push
python3 docker_server.py --release-type head --no-push --no-ubuntu \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
BuilderReport:
@ -963,6 +1033,7 @@ jobs:
- BuilderDebRelease
- BuilderDebAarch64
- BuilderBinRelease
- BuilderBinGCC
- BuilderDebAsan
- BuilderDebTsan
- BuilderDebUBsan
@ -2808,6 +2879,40 @@ jobs:
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
UnitTestsReleaseGCC:
needs: [BuilderBinGCC]
runs-on: [self-hosted, fuzzer-unit-tester]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/unit_tests_asan
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=Unit tests (release-gcc, actions)
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Unit test
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci"
python3 unit_tests_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"
UnitTestsTsan:
needs: [BuilderDebTsan]
runs-on: [self-hosted, fuzzer-unit-tester]
@ -3061,6 +3166,7 @@ jobs:
needs:
- StyleCheck
- DockerHubPush
- DockerServerImages
- CheckLabels
- BuilderReport
- FastTest

View File

@ -36,3 +36,28 @@ jobs:
overwrite: true
tag: ${{ github.ref }}
file_glob: true
############################################################################################
##################################### Docker images #######################################
############################################################################################
DockerServerImages:
runs-on: [self-hosted, style-checker]
steps:
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no version info
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type auto
python3 docker_server.py --release-type auto --no-ubuntu \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH"

View File

@ -11,6 +11,7 @@
* Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)).
* Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)).
* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)).
#### New Feature
@ -366,7 +367,7 @@
#### Improvement
* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch.
* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch.
* Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([alexey-milovidov](https://github.com/alexey-milovidov)).
* Improve `Bool` type serialization and deserialization, check the range of values. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)).
* If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)).

View File

@ -261,12 +261,12 @@ endif ()
# Add a section with the hash of the compiled machine code for integrity checks.
# Only for official builds, because adding a section can be time consuming (rewrite of several GB).
# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary)
if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
set (USE_BINARY_HASH 1)
if (OBJCOPY_PATH AND CLICKHOUSE_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE OR CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64.cmake$"))
set (USE_BINARY_HASH 1 CACHE STRING "Calculate binary hash and store it in the separate section")
endif ()
# Allows to build stripped binary in a separate directory
if (OBJCOPY_PATH AND READELF_PATH)
if (OBJCOPY_PATH AND STRIP_PATH)
option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF)
if (INSTALL_STRIPPED_BINARIES)
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")

View File

@ -2,6 +2,7 @@ set (SRCS
argsToConfig.cpp
coverage.cpp
demangle.cpp
getAvailableMemoryAmount.cpp
getFQDNOrHostName.cpp
getMemoryAmount.cpp
getPageSize.cpp

View File

@ -0,0 +1,44 @@
#include <stdexcept>
#include <fstream>
#include <base/getAvailableMemoryAmount.h>
#include <base/getPageSize.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/param.h>
#if defined(BSD)
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
#endif
uint64_t getAvailableMemoryAmountOrZero()
{
#if defined(_SC_AVPHYS_PAGES) // linux
return getPageSize() * sysconf(_SC_AVPHYS_PAGES);
#elif defined(__FreeBSD__)
struct vmtotal vmt;
size_t vmt_size = sizeof(vmt);
if (sysctlbyname("vm.vmtotal", &vmt, &vmt_size, NULL, 0) == 0)
return getPageSize() * vmt.t_avm;
else
return 0;
#else // darwin
unsigned int usermem;
size_t len = sizeof(usermem);
static int mib[2] = { CTL_HW, HW_USERMEM };
if (sysctl(mib, 2, &usermem, &len, nullptr, 0) == 0 && len == sizeof(usermem))
return usermem;
else
return 0;
#endif
}
uint64_t getAvailableMemoryAmount()
{
auto res = getAvailableMemoryAmountOrZero();
if (!res)
throw std::runtime_error("Cannot determine available memory amount");
return res;
}

View File

@ -0,0 +1,12 @@
#pragma once
#include <cstdint>
/** Returns the size of currently available physical memory (RAM) in bytes.
* Returns 0 on unsupported platform or if it cannot determine the size of physical memory.
*/
uint64_t getAvailableMemoryAmountOrZero();
/** Throws exception if it cannot determine the size of physical memory.
*/
uint64_t getAvailableMemoryAmount();

View File

@ -51,6 +51,6 @@ if (GLIBC_COMPATIBILITY)
message (STATUS "Some symbols from glibc will be replaced for compatibility")
elseif (YANDEX_OFFICIAL_BUILD)
elseif (CLICKHOUSE_OFFICIAL_BUILD)
message (WARNING "Option GLIBC_COMPATIBILITY must be turned on for production builds.")
endif ()

View File

@ -1,28 +0,0 @@
#!/usr/bin/env bash
BINARY_PATH=$1
BINARY_NAME=$(basename "$BINARY_PATH")
DESTINATION_STRIPPED_DIR=$2
OBJCOPY_PATH=${3:objcopy}
READELF_PATH=${4:readelf}
BUILD_ID=$($READELF_PATH -n "$1" | sed -n '/Build ID/ { s/.*: //p; q; }')
BUILD_ID_PREFIX=${BUILD_ID:0:2}
BUILD_ID_SUFFIX=${BUILD_ID:2}
DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id"
DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin"
mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX"
mkdir -p "$DESTINATION_STRIP_BINARY_DIR"
cp "$BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
$OBJCOPY_PATH --only-keep-debug --compress-debug-sections "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
chmod 0644 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
chown 0:0 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
strip --remove-section=.comment --remove-section=.note "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"

View File

@ -11,16 +11,43 @@ macro(clickhouse_strip_binary)
message(FATAL_ERROR "A binary path name must be provided for stripping binary")
endif()
if (NOT DEFINED STRIP_DESTINATION_DIR)
message(FATAL_ERROR "Destination directory for stripped binary must be provided")
endif()
add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD
COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH}
COMMENT "Stripping clickhouse binary" VERBATIM
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin"
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin"
COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMENT "Stripping clickhouse binary" VERBATIM
)
install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse)
install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse)
endmacro()
macro(clickhouse_make_empty_debug_info_for_nfpm)
set(oneValueArgs TARGET DESTINATION_DIR)
cmake_parse_arguments(EMPTY_DEBUG "" "${oneValueArgs}" "" ${ARGN})
if (NOT DEFINED EMPTY_DEBUG_TARGET)
message(FATAL_ERROR "A target name must be provided for stripping binary")
endif()
if (NOT DEFINED EMPTY_DEBUG_DESTINATION_DIR)
message(FATAL_ERROR "Destination directory for empty debug must be provided")
endif()
add_custom_command(TARGET ${EMPTY_DEBUG_TARGET} POST_BUILD
COMMAND mkdir -p "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug"
COMMAND touch "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug"
COMMENT "Addiding empty debug info for NFPM" VERBATIM
)
install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}" COMPONENT clickhouse)
endmacro()

View File

@ -170,32 +170,32 @@ else ()
message (FATAL_ERROR "Cannot find objcopy.")
endif ()
# Readelf (FIXME copypaste)
# Strip (FIXME copypaste)
if (COMPILER_GCC)
find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf")
find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-13" "llvm-strip-12" "llvm-strip-11" "strip")
else ()
find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf")
find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip")
endif ()
if (NOT READELF_PATH AND OS_DARWIN)
if (NOT STRIP_PATH AND OS_DARWIN)
find_program (BREW_PATH NAMES "brew")
if (BREW_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX)
if (LLVM_PREFIX)
find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
find_program (STRIP_PATH NAMES "llvm-strip" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
endif ()
if (NOT READELF_PATH)
if (NOT STRIP_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX)
if (BINUTILS_PREFIX)
find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
find_program (STRIP_PATH NAMES "strip" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
endif ()
endif ()
endif ()
endif ()
if (READELF_PATH)
message (STATUS "Using readelf: ${READELF_PATH}")
if (STRIP_PATH)
message (STATUS "Using strip: ${STRIP_PATH}")
else ()
message (FATAL_ERROR "Cannot find readelf.")
message (FATAL_ERROR "Cannot find strip.")
endif ()

View File

@ -18,6 +18,6 @@ set (VERSION_STRING_SHORT "${VERSION_MAJOR}.${VERSION_MINOR}")
math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000")
if(YANDEX_OFFICIAL_BUILD)
if(CLICKHOUSE_OFFICIAL_BUILD)
set(VERSION_OFFICIAL " (official build)")
endif()

View File

@ -69,9 +69,10 @@ endif ()
target_compile_options(_avrocpp PRIVATE ${SUPPRESS_WARNINGS})
# create a symlink to include headers with <avro/...>
set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
ADD_CUSTOM_TARGET(avro_symlink_headers ALL
COMMAND ${CMAKE_COMMAND} -E make_directory "${AVROCPP_ROOT_DIR}/include"
COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVROCPP_ROOT_DIR}/include/avro"
COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}"
COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro"
)
add_dependencies(_avrocpp avro_symlink_headers)
target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVROCPP_ROOT_DIR}/include")
target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}")

View File

@ -27,7 +27,11 @@ target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRAR
# asio
target_compile_definitions (_boost_headers_only INTERFACE BOOST_ASIO_STANDALONE=1)
target_compile_definitions (_boost_headers_only INTERFACE
BOOST_ASIO_STANDALONE=1
# Avoid using of deprecated in c++ > 17 std::result_of
BOOST_ASIO_HAS_STD_INVOKE_RESULT=1
)
# iostreams

2
contrib/hyperscan vendored

@ -1 +1 @@
Subproject commit e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa
Subproject commit 5edc68c5ac68d2d4f876159e9ee84def6d3dc87c

View File

@ -1,4 +1,4 @@
set (ENABLE_KRB5_DEFAULT 1)
set (ENABLE_KRB5_DEFAULT ${ENABLE_LIBRARIES})
if (NOT CMAKE_SYSTEM_NAME MATCHES "Linux" AND NOT (CMAKE_SYSTEM_NAME MATCHES "Darwin" AND NOT CMAKE_CROSSCOMPILING))
message (WARNING "krb5 disabled in non-Linux and non-native-Darwin environments")
set (ENABLE_KRB5_DEFAULT 0)
@ -16,6 +16,7 @@ if(NOT AWK_PROGRAM)
endif()
set(KRB5_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/krb5/src")
set(KRB5_ET_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}/include_private")
set(ALL_SRCS
"${KRB5_SOURCE_DIR}/util/et/et_name.c"
@ -90,7 +91,6 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5/get_tkt_flags.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5/set_allowable_enctypes.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5/k5sealiov.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5/gssapi_err_krb5.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5/canon_name.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5/inq_cred.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5/export_sec_context.c"
@ -143,11 +143,12 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_buffer_set.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_set.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_token.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/gssapi_err_generic.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/disp_major_status.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_seqstate.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_errmap.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/rel_buffer.c"
"${KRB5_ET_BIN_DIR}/lib/gssapi/krb5/gssapi_err_krb5.c"
"${KRB5_ET_BIN_DIR}/lib/gssapi/generic/gssapi_err_generic.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/spnego/spnego_mech.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/spnego/negoex_util.c"
@ -256,8 +257,8 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/util/profile/prof_parse.c"
"${KRB5_SOURCE_DIR}/util/profile/prof_get.c"
"${KRB5_SOURCE_DIR}/util/profile/prof_set.c"
"${KRB5_SOURCE_DIR}/util/profile/prof_err.c"
"${KRB5_SOURCE_DIR}/util/profile/prof_init.c"
"${KRB5_ET_BIN_DIR}/util/profile/prof_err.c"
"${KRB5_SOURCE_DIR}/lib/krb5/krb/fwd_tgt.c"
"${KRB5_SOURCE_DIR}/lib/krb5/krb/conv_creds.c"
"${KRB5_SOURCE_DIR}/lib/krb5/krb/fast.c"
@ -450,13 +451,12 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/k5e1_err.c"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/kdb5_err.c"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/asn1_err.c"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/krb5_err.c"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/krb524_err.c"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/kv5m_err.c"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/k5e1_err.c"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/kdb5_err.c"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/asn1_err.c"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/krb5_err.c"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/krb524_err.c"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/kv5m_err.c"
"${KRB5_SOURCE_DIR}/lib/krb5/rcache/rc_base.c"
@ -473,7 +473,7 @@ set(ALL_SRCS
)
add_custom_command(
OUTPUT "${KRB5_SOURCE_DIR}/util/et/compile_et"
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/compile_et"
COMMAND /bin/sh
./config_script
./compile_et.sh
@ -481,50 +481,17 @@ add_custom_command(
${AWK_PROGRAM}
sed
>
compile_et
${CMAKE_CURRENT_BINARY_DIR}/compile_et
DEPENDS "${KRB5_SOURCE_DIR}/util/et/compile_et.sh" "${KRB5_SOURCE_DIR}/util/et/config_script"
WORKING_DIRECTORY "${KRB5_SOURCE_DIR}/util/et"
)
file(GLOB_RECURSE ET_FILES
"${KRB5_SOURCE_DIR}/*.et"
)
function(preprocess_et out_var)
set(result)
foreach(in_f ${ARGN})
string(REPLACE
.et
.c
F_C
${in_f}
)
string(REPLACE
.et
.h
F_H
${in_f}
)
get_filename_component(ET_PATH ${in_f} DIRECTORY)
add_custom_command(OUTPUT ${F_C} ${F_H}
COMMAND perl "${KRB5_SOURCE_DIR}/util/et/compile_et" -d "${KRB5_SOURCE_DIR}/util/et" ${in_f}
DEPENDS ${in_f} "${KRB5_SOURCE_DIR}/util/et/compile_et"
WORKING_DIRECTORY ${ET_PATH}
VERBATIM
)
list(APPEND result ${F_C})
endforeach()
set(${out_var} "${result}" PARENT_SCOPE)
endfunction()
add_custom_command(
OUTPUT "${KRB5_SOURCE_DIR}/lib/gssapi/krb5/error_map.h"
OUTPUT "${KRB5_ET_BIN_DIR}/error_map.h"
COMMAND perl
-I../../../util
../../../util/gen-map.pl
-oerror_map.h
-o${KRB5_ET_BIN_DIR}/error_map.h
NAME=gsserrmap
KEY=OM_uint32
VALUE=char*
@ -536,22 +503,21 @@ add_custom_command(
add_custom_target(
ERROR_MAP_H
DEPENDS "${KRB5_SOURCE_DIR}/lib/gssapi/krb5/error_map.h"
DEPENDS "${KRB5_ET_BIN_DIR}/error_map.h"
VERBATIM
)
add_custom_command(
OUTPUT "${KRB5_SOURCE_DIR}/lib/gssapi/generic/errmap.h"
COMMAND perl -w -I../../../util ../../../util/gen.pl bimap errmap.h NAME=mecherrmap LEFT=OM_uint32 RIGHT=struct\ mecherror LEFTPRINT=print_OM_uint32 RIGHTPRINT=mecherror_print LEFTCMP=cmp_OM_uint32 RIGHTCMP=mecherror_cmp
OUTPUT "${KRB5_ET_BIN_DIR}/errmap.h"
COMMAND perl -w -I../../../util ../../../util/gen.pl bimap ${KRB5_ET_BIN_DIR}/errmap.h NAME=mecherrmap LEFT=OM_uint32 RIGHT=struct\ mecherror LEFTPRINT=print_OM_uint32 RIGHTPRINT=mecherror_print LEFTCMP=cmp_OM_uint32 RIGHTCMP=mecherror_cmp
WORKING_DIRECTORY "${KRB5_SOURCE_DIR}/lib/gssapi/generic"
)
add_custom_target(
ERRMAP_H
DEPENDS "${KRB5_SOURCE_DIR}/lib/gssapi/generic/errmap.h"
DEPENDS "${KRB5_ET_BIN_DIR}/errmap.h"
VERBATIM
)
add_custom_target(
KRB_5_H
DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/include/krb5/krb5.h"
@ -567,7 +533,40 @@ add_dependencies(
KRB_5_H
)
preprocess_et(processed_et_files ${ET_FILES})
#
# Generate error tables
#
function(preprocess_et et_path)
string(REPLACE .et .c F_C ${et_path})
string(REPLACE .et .h F_H ${et_path})
get_filename_component(et_dir ${et_path} DIRECTORY)
get_filename_component(et_name ${et_path} NAME_WLE)
add_custom_command(OUTPUT ${F_C} ${F_H} ${KRB5_ET_BIN_DIR}/${et_name}.h
COMMAND perl "${CMAKE_CURRENT_BINARY_DIR}/compile_et" -d "${KRB5_SOURCE_DIR}/util/et" ${et_path}
# for #include w/o path (via -iquote)
COMMAND ${CMAKE_COMMAND} -E create_symlink ${F_H} ${KRB5_ET_BIN_DIR}/${et_name}.h
DEPENDS ${et_path} "${CMAKE_CURRENT_BINARY_DIR}/compile_et"
WORKING_DIRECTORY ${et_dir}
VERBATIM
)
endfunction()
function(generate_error_tables)
file(GLOB_RECURSE ET_FILES "${KRB5_SOURCE_DIR}/*.et")
foreach(et_path ${ET_FILES})
string(REPLACE ${KRB5_SOURCE_DIR} ${KRB5_ET_BIN_DIR} et_bin_path ${et_path})
string(REPLACE / _ et_target_name ${et_path})
get_filename_component(et_bin_dir ${et_bin_path} DIRECTORY)
add_custom_command(OUTPUT ${et_bin_path}
COMMAND ${CMAKE_COMMAND} -E make_directory ${et_bin_dir}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${et_path} ${et_bin_path}
VERBATIM
)
preprocess_et(${et_bin_path})
endforeach()
endfunction()
generate_error_tables()
if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
add_custom_command(
@ -634,12 +633,12 @@ file(MAKE_DIRECTORY
SET(KRBHDEP
"${KRB5_SOURCE_DIR}/include/krb5/krb5.hin"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/krb5_err.h"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/k5e1_err.h"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/kdb5_err.h"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/kv5m_err.h"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/krb524_err.h"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/asn1_err.h"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/krb5_err.h"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/k5e1_err.h"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/kdb5_err.h"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/kv5m_err.h"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/krb524_err.h"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/asn1_err.h"
)
# cmake < 3.18 does not have 'cat' command
@ -656,6 +655,11 @@ target_include_directories(_krb5 SYSTEM BEFORE PUBLIC
"${CMAKE_CURRENT_BINARY_DIR}/include"
)
target_compile_options(_krb5 PRIVATE
# For '#include "file.h"'
-iquote "${CMAKE_CURRENT_BINARY_DIR}/include_private"
)
target_include_directories(_krb5 PRIVATE
"${CMAKE_CURRENT_BINARY_DIR}/include_private" # For autoconf.h and other generated headers.
${KRB5_SOURCE_DIR}

2
contrib/libcxx vendored

@ -1 +1 @@
Subproject commit 61e60294b1de01483caa9f5d00f437c99b674de6
Subproject commit 172b2ae074f6755145b91c53a95c8540c1468239

View File

@ -18,12 +18,14 @@ set(SRCS
"${LIBCXX_SOURCE_DIR}/src/filesystem/directory_iterator.cpp"
"${LIBCXX_SOURCE_DIR}/src/filesystem/int128_builtins.cpp"
"${LIBCXX_SOURCE_DIR}/src/filesystem/operations.cpp"
"${LIBCXX_SOURCE_DIR}/src/format.cpp"
"${LIBCXX_SOURCE_DIR}/src/functional.cpp"
"${LIBCXX_SOURCE_DIR}/src/future.cpp"
"${LIBCXX_SOURCE_DIR}/src/hash.cpp"
"${LIBCXX_SOURCE_DIR}/src/ios.cpp"
"${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp"
"${LIBCXX_SOURCE_DIR}/src/iostream.cpp"
"${LIBCXX_SOURCE_DIR}/src/legacy_pointer_safety.cpp"
"${LIBCXX_SOURCE_DIR}/src/locale.cpp"
"${LIBCXX_SOURCE_DIR}/src/memory.cpp"
"${LIBCXX_SOURCE_DIR}/src/mutex.cpp"
@ -33,6 +35,9 @@ set(SRCS
"${LIBCXX_SOURCE_DIR}/src/random.cpp"
"${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp"
"${LIBCXX_SOURCE_DIR}/src/regex.cpp"
"${LIBCXX_SOURCE_DIR}/src/ryu/d2fixed.cpp"
"${LIBCXX_SOURCE_DIR}/src/ryu/d2s.cpp"
"${LIBCXX_SOURCE_DIR}/src/ryu/f2s.cpp"
"${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp"
"${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp"
"${LIBCXX_SOURCE_DIR}/src/string.cpp"
@ -49,7 +54,9 @@ set(SRCS
add_library(cxx ${SRCS})
set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake")
target_include_directories(cxx SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>)
target_include_directories(cxx SYSTEM BEFORE PUBLIC
$<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}>/src)
target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)
# Enable capturing stack traces for all exceptions.

2
contrib/libcxxabi vendored

@ -1 +1 @@
Subproject commit df8f1e727dbc9e2bedf2282096fa189dc3fe0076
Subproject commit 6eb7cc7a7bdd779e6734d1b9fb451df2274462d7

View File

@ -1,24 +1,24 @@
set(LIBCXXABI_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libcxxabi")
set(SRCS
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_new_delete.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp"
)
add_library(cxxabi ${SRCS})
@ -30,6 +30,7 @@ target_compile_options(cxxabi PRIVATE -w)
target_include_directories(cxxabi SYSTEM BEFORE
PUBLIC $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/include>
PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/include>
PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/src>
)
target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY)
target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast.

View File

@ -1,12 +1,9 @@
# During cross-compilation in our CI we have to use llvm-tblgen and other building tools
# tools to be build for host architecture and everything else for target architecture (e.g. AArch64)
# Possible workaround is to use llvm-tblgen from some package...
# But lets just enable LLVM for native builds
if (CMAKE_CROSSCOMPILING OR SANITIZE STREQUAL "undefined")
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined")
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
else()
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
endif()
option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
if (NOT ENABLE_EMBEDDED_COMPILER)

2
contrib/poco vendored

@ -1 +1 @@
Subproject commit 520a90e02e3e5cb90afeae1846d161dbc508a6f1
Subproject commit 008b16469471d55b176db181756c94e3f14dd2dc

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d
Subproject commit 6f0b6f151ae2a044625ae93acd19ca365fcea64d

2
contrib/unixodbc vendored

@ -1 +1 @@
Subproject commit b0ad30f7f6289c12b76f04bfb9d466374bb32168
Subproject commit a2cd5395e8c7f7390025ec93af5bfebef3fb5fcd

View File

@ -20,7 +20,7 @@ ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml
EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
[Install]
# ClickHouse should not start from the rescue shell (rescue.target).

View File

@ -1,4 +1,3 @@
# rebuild in #33610
# docker build -t clickhouse/docs-check .
ARG FROM_TAG=latest
FROM clickhouse/docs-builder:$FROM_TAG

74
docker/keeper/Dockerfile Normal file
View File

@ -0,0 +1,74 @@
FROM ubuntu:20.04 AS glibc-donor
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
esac \
&& ln -s "${rarch}-linux-gnu" /lib/linux-gnu
FROM alpine
ENV LANG=en_US.UTF-8 \
LANGUAGE=en_US:en \
LC_ALL=en_US.UTF-8 \
TZ=UTC \
CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/
COPY --from=glibc-donor /etc/nsswitch.conf /etc/
COPY entrypoint.sh /entrypoint.sh
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \
arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \
esac
ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
ARG VERSION="22.4.1.917"
ARG PACKAGES="clickhouse-keeper"
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
# can't do chown and owners of mounted volumes should be configured externally.
# We do that in advance at the begining of Dockerfile before any packages will be
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& for package in ${PACKAGES}; do \
{ \
{ echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \
} || \
{ echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \
} ; \
} || exit 1 \
; done \
&& rm /tmp/*.tgz /install -r \
&& addgroup -S -g 101 clickhouse \
&& adduser -S -h /var/lib/clickhouse -s /bin/bash -G clickhouse -g "ClickHouse keeper" -u 101 clickhouse \
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper \
&& chown clickhouse:clickhouse /var/lib/clickhouse \
&& chown root:clickhouse /var/log/clickhouse-keeper \
&& chmod +x /entrypoint.sh \
&& apk add --no-cache su-exec bash tzdata \
&& cp /usr/share/zoneinfo/UTC /etc/localtime \
&& echo "UTC" > /etc/timezone \
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper
EXPOSE 2181 10181 44444
VOLUME /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper
ENTRYPOINT ["/entrypoint.sh"]

View File

@ -0,0 +1 @@
Dockerfile

View File

@ -0,0 +1,93 @@
#!/bin/bash
set +x
set -eo pipefail
shopt -s nullglob
DO_CHOWN=1
if [ "${CLICKHOUSE_DO_NOT_CHOWN:-0}" = "1" ]; then
DO_CHOWN=0
fi
CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"
# support --user
if [ "$(id -u)" = "0" ]; then
USER=$CLICKHOUSE_UID
GROUP=$CLICKHOUSE_GID
if command -v gosu &> /dev/null; then
gosu="gosu $USER:$GROUP"
elif command -v su-exec &> /dev/null; then
gosu="su-exec $USER:$GROUP"
else
echo "No gosu/su-exec detected!"
exit 1
fi
else
USER="$(id -u)"
GROUP="$(id -g)"
gosu=""
DO_CHOWN=0
fi
KEEPER_CONFIG="${KEEPER_CONFIG:-/etc/clickhouse-keeper/config.yaml}"
if [ -f "$KEEPER_CONFIG" ] && ! $gosu test -f "$KEEPER_CONFIG" -a -r "$KEEPER_CONFIG"; then
echo "Configuration file '$KEEPER_CONFIG' isn't readable by user with id '$USER'"
exit 1
fi
DATA_DIR="${CLICKHOUSE_DATA_DIR:-/var/lib/clickhouse}"
LOG_DIR="${LOG_DIR:-/var/log/clickhouse-keeper}"
LOG_PATH="${LOG_DIR}/clickhouse-keeper.log"
ERROR_LOG_PATH="${LOG_DIR}/clickhouse-keeper.err.log"
COORDINATION_LOG_DIR="${DATA_DIR}/coordination/log"
COORDINATION_SNAPSHOT_DIR="${DATA_DIR}/coordination/snapshots"
CLICKHOUSE_WATCHDOG_ENABLE=${CLICKHOUSE_WATCHDOG_ENABLE:-0}
for dir in "$DATA_DIR" \
"$LOG_DIR" \
"$TMP_DIR" \
"$COORDINATION_LOG_DIR" \
"$COORDINATION_SNAPSHOT_DIR"
do
# check if variable not empty
[ -z "$dir" ] && continue
# ensure directories exist
if ! mkdir -p "$dir"; then
echo "Couldn't create necessary directory: $dir"
exit 1
fi
if [ "$DO_CHOWN" = "1" ]; then
# ensure proper directories permissions
# but skip it for if directory already has proper premissions, cause recursive chown may be slow
if [ "$(stat -c %u "$dir")" != "$USER" ] || [ "$(stat -c %g "$dir")" != "$GROUP" ]; then
chown -R "$USER:$GROUP" "$dir"
fi
elif ! $gosu test -d "$dir" -a -w "$dir" -a -r "$dir"; then
echo "Necessary directory '$dir' isn't accessible by user with id '$USER'"
exit 1
fi
done
# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
# Watchdog is launched by default, but does not send SIGINT to the main process,
# so the container can't be finished by ctrl+c
export CLICKHOUSE_WATCHDOG_ENABLE
cd /var/lib/clickhouse
# There is a config file. It is already tested with gosu (if it is readably by keeper user)
if [ -f "$KEEPER_CONFIG" ]; then
exec $gosu /usr/bin/clickhouse-keeper --config-file="$KEEPER_CONFIG" --log-file="$LOG_PATH" --errorlog-file="$ERROR_LOG_PATH" "$@"
fi
# There is no config file. Will use embedded one
exec $gosu /usr/bin/clickhouse-keeper --log-file="$LOG_PATH" --errorlog-file="$ERROR_LOG_PATH" "$@"
fi
# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
exec "$@"

View File

@ -163,6 +163,7 @@ def parse_env_variables(
cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
cmake_flags.append("-DBUILD_STANDALONE_KEEPER=ON")
if is_release_build(build_type, package_type, sanitizer, split_binary):
cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON")
@ -244,7 +245,7 @@ def parse_env_variables(
result.append(f"AUTHOR='{author}'")
if official:
cmake_flags.append("-DYANDEX_OFFICIAL_BUILD=1")
cmake_flags.append("-DCLICKHOUSE_OFFICIAL_BUILD=1")
result.append('CMAKE_FLAGS="' + " ".join(cmake_flags) + '"')

View File

@ -1,2 +0,0 @@
alpine-root/*
tgz-packages/*

View File

@ -1,122 +0,0 @@
FROM ubuntu:20.04
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ARG repository="deb https://packages.clickhouse.com/deb stable main"
ARG version=22.1.1.*
# set non-empty deb_location_url url to create a docker image
# from debs created by CI build, for example:
# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://clickhouse-builds.s3.yandex.net/21852/069cfbff388b3d478d1a16dc7060b48073f5d522/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/" -t filimonovq/clickhouse-server:pr21852
ARG deb_location_url=""
# set non-empty single_binary_location_url to create docker image
# from a single binary url (useful for non-standard builds - with sanitizers, for arm64).
# for example (run on aarch64 server):
# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.com/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm
# note: clickhouse-odbc-bridge is not supported there.
ARG single_binary_location_url=""
# see https://github.com/moby/moby/issues/4032#issuecomment-192327844
ARG DEBIAN_FRONTEND=noninteractive
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
# can't do chown and owners of mounted volumes should be configured externally.
# We do that in advance at the begining of Dockerfile before any packages will be
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
# To drop privileges, we need 'su' command, that simply changes uid and gid.
# In fact, the 'su' command from Linux is not so simple, due to inherent vulnerability in Linux:
# https://ruderich.org/simon/notes/su-sudo-from-root-tty-hijacking
# It has to mitigate this drawback of Linux, and to do this, 'su' command is creating it's own pseudo-terminal
# and forwarding commands. Due to some ridiculous curcumstances, it does not work in Docker (or it does)
# and for these reasons people are using alternatives to the 'su' command in Docker,
# that don't mess with the terminal, don't care about closing the opened files, etc...
# but can only be safe to drop privileges inside Docker.
# The question - what implementation of 'su' command to use.
# It should be a simple script doing about just two syscalls.
# Some people tend to use 'gosu' tool that is written in Go.
# It is not used for several reasons:
# 1. Dependency on some foreign code in yet another programming language - does not sound alright.
# 2. Anselmo D. Adams suggested not to use it due to false positive alarms in some undisclosed security scanners.
COPY su-exec.c /su-exec.c
RUN groupadd -r clickhouse --gid=101 \
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
&& apt-get update \
&& apt-get install --yes --no-install-recommends \
apt-transport-https \
ca-certificates \
dirmngr \
gnupg \
locales \
wget \
tzdata \
&& mkdir -p /etc/apt/sources.list.d \
&& apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \
&& echo $repository > /etc/apt/sources.list.d/clickhouse.list \
&& if [ -n "$deb_location_url" ]; then \
echo "installing from custom url with deb packages: $deb_location_url" \
rm -rf /tmp/clickhouse_debs \
&& mkdir -p /tmp/clickhouse_debs \
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-common-static_${version}_amd64.deb" -P /tmp/clickhouse_debs \
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-client_${version}_all.deb" -P /tmp/clickhouse_debs \
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-server_${version}_all.deb" -P /tmp/clickhouse_debs \
&& dpkg -i /tmp/clickhouse_debs/*.deb ; \
elif [ -n "$single_binary_location_url" ]; then \
echo "installing from single binary url: $single_binary_location_url" \
&& rm -rf /tmp/clickhouse_binary \
&& mkdir -p /tmp/clickhouse_binary \
&& wget --progress=bar:force:noscroll "$single_binary_location_url" -O /tmp/clickhouse_binary/clickhouse \
&& chmod +x /tmp/clickhouse_binary/clickhouse \
&& /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \
else \
echo "installing from repository: $repository" \
&& apt-get update \
&& apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \
&& apt-get install --allow-unauthenticated --yes --no-install-recommends \
clickhouse-common-static=$version \
clickhouse-client=$version \
clickhouse-server=$version ; \
fi \
&& apt-get install -y --no-install-recommends tcc libc-dev && \
tcc /su-exec.c -o /bin/su-exec && \
chown root:root /bin/su-exec && \
chmod 0755 /bin/su-exec && \
rm /su-exec.c && \
apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \
&& clickhouse-local -q 'SELECT * FROM system.build_options' \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \
/tmp/* \
&& apt-get clean \
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
# we need to allow "others" access to clickhouse folder, because docker container
# can be started with arbitrary uid (openshift usecase)
RUN locale-gen en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8
ENV TZ UTC
RUN mkdir /docker-entrypoint-initdb.d
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
EXPOSE 9000 8123 9009
VOLUME /var/lib/clickhouse
ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml
ENTRYPOINT ["/entrypoint.sh"]

1
docker/server/Dockerfile Symbolic link
View File

@ -0,0 +1 @@
Dockerfile.ubuntu

View File

@ -1,3 +1,14 @@
FROM ubuntu:20.04 AS glibc-donor
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
esac \
&& ln -s "${rarch}-linux-gnu" /lib/linux-gnu
FROM alpine
ENV LANG=en_US.UTF-8 \
@ -6,7 +17,24 @@ ENV LANG=en_US.UTF-8 \
TZ=UTC \
CLICKHOUSE_CONFIG=/etc/clickhouse-server/config.xml
COPY alpine-root/ /
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/
COPY --from=glibc-donor /etc/nsswitch.conf /etc/
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
COPY entrypoint.sh /entrypoint.sh
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \
arm64) ln -sf /lib/ld-2.31.so /lib/ld-linux-aarch64.so.1 ;; \
esac
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="20.9.3.45"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
@ -15,9 +43,23 @@ COPY alpine-root/ /
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
RUN addgroup -S -g 101 clickhouse \
RUN arch=${TARGETARCH:-amd64} \
&& for package in ${PACKAGES}; do \
{ \
{ echo "Get ${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}-${arch}.tgz" -O "/tmp/${package}-${VERSION}-${arch}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}-${arch}.tgz" --strip-components=1 -C / ; \
} || \
{ echo "Fallback to ${REPOSITORY}/${package}-${VERSION}.tgz" \
&& wget -c -q "${REPOSITORY}/${package}-${VERSION}.tgz" -O "/tmp/${package}-${VERSION}.tgz" \
&& tar xvzf "/tmp/${package}-${VERSION}.tgz" --strip-components=2 -C / ; \
} ; \
} || exit 1 \
; done \
&& rm /tmp/*.tgz /install -r \
&& addgroup -S -g 101 clickhouse \
&& adduser -S -h /var/lib/clickhouse -s /bin/bash -G clickhouse -g "ClickHouse server" -u 101 clickhouse \
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server/config.d /etc/clickhouse-server/users.d /etc/clickhouse-client /docker-entrypoint-initdb.d \
&& chown clickhouse:clickhouse /var/lib/clickhouse \
&& chown root:clickhouse /var/log/clickhouse-server \
&& chmod +x /entrypoint.sh \

View File

@ -0,0 +1,129 @@
FROM ubuntu:20.04
# see https://github.com/moby/moby/issues/4032#issuecomment-192327844
ARG DEBIAN_FRONTEND=noninteractive
COPY su-exec.c /su-exec.c
# ARG for quick switch to a given ubuntu mirror
ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list \
&& groupadd -r clickhouse --gid=101 \
&& useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
&& apt-get update \
&& apt-get install --yes --no-install-recommends \
apt-transport-https \
ca-certificates \
dirmngr \
gnupg \
locales \
wget \
tzdata \
&& apt-get install -y --no-install-recommends tcc libc-dev && \
tcc /su-exec.c -o /bin/su-exec && \
chown root:root /bin/su-exec && \
chmod 0755 /bin/su-exec && \
rm /su-exec.c && \
apt-get purge -y --auto-remove tcc libc-dev libc-dev-bin libc6-dev linux-libc-dev \
&& apt-get clean
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION=22.1.1.*
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image
# from debs created by CI build, for example:
# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://clickhouse-builds.s3.yandex.net/21852/069cfbff388b3d478d1a16dc7060b48073f5d522/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/" -t filimonovq/clickhouse-server:pr21852
ARG deb_location_url=""
# set non-empty single_binary_location_url to create docker image
# from a single binary url (useful for non-standard builds - with sanitizers, for arm64).
# for example (run on aarch64 server):
# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.com/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm
# note: clickhouse-odbc-bridge is not supported there.
ARG single_binary_location_url=""
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
# can't do chown and owners of mounted volumes should be configured externally.
# We do that in advance at the begining of Dockerfile before any packages will be
# installed to prevent picking those uid / gid by some unrelated software.
# The same uid / gid (101) is used both for alpine and ubuntu.
# To drop privileges, we need 'su' command, that simply changes uid and gid.
# In fact, the 'su' command from Linux is not so simple, due to inherent vulnerability in Linux:
# https://ruderich.org/simon/notes/su-sudo-from-root-tty-hijacking
# It has to mitigate this drawback of Linux, and to do this, 'su' command is creating it's own pseudo-terminal
# and forwarding commands. Due to some ridiculous curcumstances, it does not work in Docker (or it does)
# and for these reasons people are using alternatives to the 'su' command in Docker,
# that don't mess with the terminal, don't care about closing the opened files, etc...
# but can only be safe to drop privileges inside Docker.
# The question - what implementation of 'su' command to use.
# It should be a simple script doing about just two syscalls.
# Some people tend to use 'gosu' tool that is written in Go.
# It is not used for several reasons:
# 1. Dependency on some foreign code in yet another programming language - does not sound alright.
# 2. Anselmo D. Adams suggested not to use it due to false positive alarms in some undisclosed security scanners.
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& if [ -n "${deb_location_url}" ]; then \
echo "installing from custom url with deb packages: ${deb_location_url}" \
rm -rf /tmp/clickhouse_debs \
&& mkdir -p /tmp/clickhouse_debs \
&& for package in ${PACKAGES}; do \
{ wget --progress=bar:force:noscroll "${deb_location_url}/${package}_${VERSION}_${arch}.deb" -P /tmp/clickhouse_debs || \
wget --progress=bar:force:noscroll "${deb_location_url}/${package}_${VERSION}_all.deb" -P /tmp/clickhouse_debs ; } \
|| exit 1 \
; done \
&& dpkg -i /tmp/clickhouse_debs/*.deb ; \
elif [ -n "${single_binary_location_url}" ]; then \
echo "installing from single binary url: ${single_binary_location_url}" \
&& rm -rf /tmp/clickhouse_binary \
&& mkdir -p /tmp/clickhouse_binary \
&& wget --progress=bar:force:noscroll "${single_binary_location_url}" -O /tmp/clickhouse_binary/clickhouse \
&& chmod +x /tmp/clickhouse_binary/clickhouse \
&& /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \
else \
mkdir -p /etc/apt/sources.list.d \
&& apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \
&& echo ${REPOSITORY} > /etc/apt/sources.list.d/clickhouse.list \
&& echo "installing from repository: ${REPOSITORY}" \
&& apt-get update \
&& apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \
&& for package in ${PACKAGES}; do \
packages="${packages} ${package}=${VERSION}" \
; done \
&& apt-get install --allow-unauthenticated --yes --no-install-recommends ${packages} || exit 1 \
; fi \
&& clickhouse-local -q 'SELECT * FROM system.build_options' \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \
/tmp/* \
&& mkdir -p /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client \
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-server /etc/clickhouse-server /etc/clickhouse-client
# we need to allow "others" access to clickhouse folder, because docker container
# can be started with arbitrary uid (openshift usecase)
RUN locale-gen en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8
ENV TZ UTC
RUN mkdir /docker-entrypoint-initdb.d
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
EXPOSE 9000 8123 9009
VOLUME /var/lib/clickhouse
ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml
ENTRYPOINT ["/entrypoint.sh"]

View File

@ -1,63 +0,0 @@
#!/bin/bash
set -x
REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc
REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}"
VERSION="${VERSION:-20.9.3.45}"
DOCKER_IMAGE="${DOCKER_IMAGE:-clickhouse/clickhouse-server}"
# where original files live
DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
# we will create root for our image here
CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root"
# clean up the root from old runs, it's reconstructed each time
rm -rf "$CONTAINER_ROOT_FOLDER"
mkdir -p "$CONTAINER_ROOT_FOLDER"
# where to put downloaded tgz
TGZ_PACKAGES_FOLDER="${DOCKER_BUILD_FOLDER}/tgz-packages"
mkdir -p "$TGZ_PACKAGES_FOLDER"
PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
# download tars from the repo
for package in "${PACKAGES[@]}"
do
wget -c -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz"
done
# unpack tars
for package in "${PACKAGES[@]}"
do
tar xvzf "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz" --strip-components=2 -C "$CONTAINER_ROOT_FOLDER"
done
# prepare few more folders
mkdir -p "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/users.d" \
"${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d" \
"${CONTAINER_ROOT_FOLDER}/var/log/clickhouse-server" \
"${CONTAINER_ROOT_FOLDER}/var/lib/clickhouse" \
"${CONTAINER_ROOT_FOLDER}/docker-entrypoint-initdb.d" \
"${CONTAINER_ROOT_FOLDER}/lib64"
cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/"
cp "${DOCKER_BUILD_FOLDER}/entrypoint.sh" "${CONTAINER_ROOT_FOLDER}/entrypoint.sh"
## get glibc components from ubuntu 20.04 and put them to expected place
docker pull ubuntu:20.04
ubuntu20image=$(docker create --rm ubuntu:20.04)
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libc.so.6 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libdl.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libm.so.6 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/librt.so.1 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_files.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64"
docker cp -L "${ubuntu20image}":/etc/nsswitch.conf "${CONTAINER_ROOT_FOLDER}/etc"
docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull
rm -rf "$CONTAINER_ROOT_FOLDER"

View File

@ -1,47 +0,0 @@
# Since right now we can't set volumes to the docker during build, we split building container in stages:
# 1. build base container
# 2. run base conatiner with mounted volumes
# 3. commit container as image
# 4. build final container atop that image
# Middle steps are performed by the bash script.
FROM ubuntu:18.04 as clickhouse-server-base
ARG gosu_ver=1.14
VOLUME /packages/
# update to allow installing dependencies of clickhouse automatically
RUN apt update; \
DEBIAN_FRONTEND=noninteractive \
apt install -y locales;
ADD https://github.com/tianon/gosu/releases/download/${gosu_ver}/gosu-amd64 /bin/gosu
RUN locale-gen en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US:en
ENV LC_ALL en_US.UTF-8
# installing via apt to simulate real-world scenario, where user installs deb package and all it's dependecies automatically.
CMD DEBIAN_FRONTEND=noninteractive \
apt install -y \
/packages/clickhouse-common-static_*.deb \
/packages/clickhouse-server_*.deb ;
FROM clickhouse-server-base:postinstall as clickhouse-server
RUN mkdir /docker-entrypoint-initdb.d
COPY docker_related_config.xml /etc/clickhouse-server/config.d/
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x \
/entrypoint.sh \
/bin/gosu
EXPOSE 9000 8123 9009
VOLUME /var/lib/clickhouse
ENV CLICKHOUSE_CONFIG /etc/clickhouse-server/config.xml
ENTRYPOINT ["/entrypoint.sh"]

View File

@ -267,6 +267,7 @@ function run_tests
local test_opts=(
--hung-check
--fast-tests-only
--no-random-settings
--no-long
--testname
--shard

View File

@ -13,7 +13,7 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo "$script_dir"
repo_dir=ch
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-13_debug_none_bundled_unsplitted_disable_False_binary"}
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
function clone
{

View File

@ -1,8 +1,10 @@
# docker build -t clickhouse/mysql-js-client .
# MySQL JavaScript client docker container
FROM node:8
FROM node:16.14.2
WORKDIR /usr/app
RUN npm install mysql
COPY ./test.js test.js
COPY ./test.js ./test.js

View File

@ -2,7 +2,7 @@
set -euo pipefail
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-13_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-13_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}
@ -10,7 +10,7 @@ if [ -z "$CLICKHOUSE_REPO_PATH" ]; then
CLICKHOUSE_REPO_PATH=ch
rm -rf ch ||:
mkdir ch ||:
wget -nv -nd -c "https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz"
wget -nv -nd -c "https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz"
tar -C ch --strip-components=1 -xf clickhouse_no_subs.tar.gz
ls -lath ||:
fi

View File

@ -1294,15 +1294,15 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv')
select '' test_name,
'$(sed -n 's/.*<!--message: \(.*\)-->/\1/p' report.html)' test_status,
0 test_duration_ms,
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#fail1' report_url
'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#fail1' report_url
union all
select test || ' #' || toString(query_index), 'slower' test_status, 0 test_duration_ms,
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#changes-in-performance.'
'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#changes-in-performance.'
|| test || '.' || toString(query_index) report_url
from queries where changed_fail != 0 and diff > 0
union all
select test || ' #' || toString(query_index), 'unstable' test_status, 0 test_duration_ms,
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#unstable-queries.'
'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#unstable-queries.'
|| test || '.' || toString(query_index) report_url
from queries where unstable_fail != 0
)
@ -1378,7 +1378,7 @@ $REF_SHA $SHA_TO_TEST $(numactl --hardware | sed -n 's/^available:[[:space:]]\+/
EOF
# Also insert some data about the check into the CI checks table.
"${client[@]}" --query "INSERT INTO "'"'"gh-data"'"'".checks FORMAT TSVWithNamesAndTypes" \
"${client[@]}" --query "INSERT INTO "'"'"default"'"'".checks FORMAT TSVWithNamesAndTypes" \
< ci-checks.tsv
set -x

View File

@ -16,26 +16,17 @@ right_sha=$4
datasets=${CHPC_DATASETS-"hits1 hits10 hits100 values"}
declare -A dataset_paths
if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar"
dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar"
dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar"
dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar"
else
dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar"
dataset_paths["hits100"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar"
dataset_paths["hits1"]="https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar"
dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_expressions/partitions/test_values.tar"
fi
dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar"
dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar"
dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar"
dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar"
function download
{
# Historically there were various paths for the performance test package.
# Test all of them.
declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz"
"https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/clickhouse_build_check/performance/performance.tgz"
)
declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz")
for path in "${urls_to_try[@]}"
do

View File

@ -4,7 +4,7 @@ set -ex
CHPC_CHECK_START_TIMESTAMP="$(date +%s)"
export CHPC_CHECK_START_TIMESTAMP
S3_URL=${S3_URL:="https://clickhouse-builds.s3.yandex.net"}
S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}
COMMON_BUILD_PREFIX="/clickhouse_build_check"
if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
@ -64,9 +64,7 @@ function find_reference_sha
# Historically there were various path for the performance test package,
# test all of them.
unset found
declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz"
"https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/clickhouse_build_check/performance/performance.tgz"
)
declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz")
for path in "${urls_to_try[@]}"
do
if curl_with_retry "$path"

View File

@ -11,7 +11,7 @@ RUN apt-get update -y \
COPY s3downloader /s3downloader
ENV S3_URL="https://clickhouse-datasets.s3.yandex.net"
ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
ENV DATASETS="hits visits"
ENV EXPORT_S3_STORAGE_POLICIES=1

View File

@ -115,7 +115,7 @@ function run_tests()
fi
set +e
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \
clickhouse-test -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \
--skip 00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" \
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt

View File

@ -10,7 +10,7 @@ import requests
import tempfile
DEFAULT_URL = 'https://clickhouse-datasets.s3.yandex.net'
DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com'
AVAILABLE_DATASETS = {
'hits': 'hits_v1.tar',

View File

@ -131,8 +131,23 @@ clickhouse-client -q "system flush logs" ||:
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||:
pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz &
clickhouse-client -q "select * from system.query_log format TSVWithNamesAndTypes" | pigz > /test_output/query-log.tsv.gz &
clickhouse-client -q "select * from system.query_thread_log format TSVWithNamesAndTypes" | pigz > /test_output/query-thread-log.tsv.gz &
# Compress tables.
#
# NOTE:
# - that due to tests with s3 storage we cannot use /var/lib/clickhouse/data
# directly
# - even though ci auto-compress some files (but not *.tsv) it does this only
# for files >64MB, we want this files to be compressed explicitly
for table in query_log zookeeper_log trace_log
do
clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz &
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.1.tsv.gz &
clickhouse-client --port 29000 -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.2.tsv.gz &
fi
done
wait ||:
# Also export trace log in flamegraph-friendly format.
for trace_type in CPU Memory Real
@ -161,14 +176,6 @@ fi
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
# Replace the engine with Ordinary to avoid extra symlinks stuff in artifacts.
# (so that clickhouse-local --path can read it w/o extra care).
sed -i -e "s/ATTACH DATABASE _ UUID '[^']*'/ATTACH DATABASE system/" -e "s/Atomic/Ordinary/" /var/lib/clickhouse/metadata/system.sql
for table in text_log query_log zookeeper_log trace_log; do
sed -i "s/ATTACH TABLE _ UUID '[^']*'/ATTACH TABLE $table/" /var/lib/clickhouse/metadata/system/${table}.sql
tar -chf /test_output/${table}_dump.tar /var/lib/clickhouse/metadata/system.sql /var/lib/clickhouse/metadata/system/${table}.sql /var/lib/clickhouse/data/system/${table} ||:
done
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server1.log ||:
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server2.log ||:
@ -179,8 +186,6 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
rm /var/log/clickhouse-server/clickhouse-server2.log
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:
mv /var/log/clickhouse-server/stderr2.log /test_output/ ||:
tar -chf /test_output/zookeeper_log_dump1.tar /var/lib/clickhouse1/data/system/zookeeper_log ||:
tar -chf /test_output/zookeeper_log_dump2.tar /var/lib/clickhouse2/data/system/zookeeper_log ||:
tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||:
tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||:
fi

View File

@ -41,6 +41,7 @@ sleep 5
./mc admin user add clickminio test testtest
./mc admin policy set clickminio readwrite user=test
./mc mb clickminio/test
./mc policy set public clickminio/test
# Upload data to Minio. By default after unpacking all tests will in

View File

@ -29,7 +29,7 @@ COPY ./download_previous_release /download_previous_release
COPY run.sh /
ENV DATASETS="hits visits"
ENV S3_URL="https://clickhouse-datasets.s3.yandex.net"
ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
ENV EXPORT_S3_STORAGE_POLICIES=1
CMD ["/bin/bash", "/run.sh"]

View File

@ -1,86 +0,0 @@
#!/bin/sh
set -e -x
# Not sure why shellcheck complains that rc is not assigned before it is referenced.
# shellcheck disable=SC2154
trap 'rc=$?; echo EXITED WITH: $rc; exit $rc' EXIT
# CLI option to prevent rebuilding images, just re-run tests with images leftover from previuos time
readonly NO_REBUILD_FLAG="--no-rebuild"
readonly CLICKHOUSE_DOCKER_DIR="$(realpath "${1}")"
readonly CLICKHOUSE_PACKAGES_ARG="${2}"
CLICKHOUSE_SERVER_IMAGE="${3}"
if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then
readonly CLICKHOUSE_PACKAGES_DIR="$(realpath "${2}")" # or --no-rebuild
fi
# In order to allow packages directory to be anywhere, and to reduce amount of context sent to the docker daemon,
# all images are built in multiple stages:
# 1. build base image, install dependencies
# 2. run image with volume mounted, install what needed from those volumes
# 3. tag container as image
# 4. [optional] build another image atop of tagged.
# TODO: optionally mount most recent clickhouse-test and queries directory from local machine
if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then
docker build --network=host \
-f "${CLICKHOUSE_DOCKER_DIR}/test/stateless/clickhouse-statelest-test-runner.Dockerfile" \
--target clickhouse-test-runner-base \
-t clickhouse-test-runner-base:preinstall \
"${CLICKHOUSE_DOCKER_DIR}/test/stateless"
docker rm -f clickhouse-test-runner-installing-packages || true
docker run --network=host \
-v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \
--name clickhouse-test-runner-installing-packages \
clickhouse-test-runner-base:preinstall
docker commit clickhouse-test-runner-installing-packages clickhouse-statelest-test-runner:local
docker rm -f clickhouse-test-runner-installing-packages || true
fi
# # Create a bind-volume to the clickhouse-test script file
# docker volume create --driver local --opt type=none --opt device=/home/enmk/proj/ClickHouse_master/tests/clickhouse-test --opt o=bind clickhouse-test-script-volume
# docker volume create --driver local --opt type=none --opt device=/home/enmk/proj/ClickHouse_master/tests/queries --opt o=bind clickhouse-test-queries-dir-volume
# Build server image (optional) from local packages
if [ -z "${CLICKHOUSE_SERVER_IMAGE}" ]; then
CLICKHOUSE_SERVER_IMAGE="clickhouse/server:local"
if [ "${CLICKHOUSE_PACKAGES_ARG}" != "${NO_REBUILD_FLAG}" ]; then
docker build --network=host \
-f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \
--target clickhouse-server-base \
-t clickhouse-server-base:preinstall \
"${CLICKHOUSE_DOCKER_DIR}/server"
docker rm -f clickhouse_server_base_installing_server || true
docker run --network=host -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \
--name clickhouse_server_base_installing_server \
clickhouse-server-base:preinstall
docker commit clickhouse_server_base_installing_server clickhouse-server-base:postinstall
docker build --network=host \
-f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \
--target clickhouse-server \
-t "${CLICKHOUSE_SERVER_IMAGE}" \
"${CLICKHOUSE_DOCKER_DIR}/server"
fi
fi
docker rm -f test-runner || true
docker-compose down
CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \
docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \
create \
--build --force-recreate
CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \
docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \
run \
--name test-runner \
test-runner

View File

@ -1,34 +0,0 @@
version: "2"
services:
clickhouse-server:
image: ${CLICKHOUSE_SERVER_IMAGE}
expose:
- "8123" # HTTP
- "9000" # TCP
- "9009" # HTTP-interserver
restart: "no"
test-runner:
image: clickhouse-statelest-test-runner:local
restart: "no"
depends_on:
- clickhouse-server
environment:
# these are used by clickhouse-test to point clickhouse-client to the right server
- CLICKHOUSE_HOST=clickhouse-server
- CLICKHOUSE_PORT=9009
- CLICKHOUSE_TEST_HOST_EXPOSED_PORT=51234
expose:
# port for any test to serve data to clickhouse-server on rare occasion (like URL-engine tables in 00646),
# should match value of CLICKHOUSE_TEST_HOST_EXPOSED_PORT above
- "51234"
# NOTE: Dev-mode: mount newest versions of the queries and clickhouse-test script into container.
# volumes:
# - /home/enmk/proj/ClickHouse_master/tests/queries:/usr/share/clickhouse-test/queries:ro
# - /home/enmk/proj/ClickHouse_master/tests/clickhouse-test:/usr/bin/clickhouse-test:ro
# String-form instead of list-form to allow multiple arguments in "${CLICKHOUSE_TEST_ARGS}"
entrypoint: "clickhouse-test ${CLICKHOUSE_TEST_ARGS}"

View File

@ -137,7 +137,7 @@ CREATE TABLE test.test_orc
`f_array_array_float` Array(Array(Float32)),
`day` String
)
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc')
PARTITION BY day
```

View File

@ -688,7 +688,7 @@ Tags:
- `volume_name_N` — Volume name. Volume names must be unique.
- `disk` — a disk within a volume.
- `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volumes disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume.
- `move_factor` — when the amount of available space gets lower than this factor, data automatically start to move on the next volume if any (by default, 0.1).
- `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved.
- `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks.
Cofiguration examples:

View File

@ -36,6 +36,7 @@ Example of configuration:
<access_key_id>AKIAIOSFODNN7EXAMPLE</access_key_id>
<secret_access_key> wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY</secret_access_key>
<format>CSV</format>
<url>https://s3.us-east-1.amazonaws.com/yourbucket/mydata/</url>
</s3_mydata>
</named_collections>
</clickhouse>
@ -44,12 +45,12 @@ Example of configuration:
### Example of using named connections with the s3 function
```sql
INSERT INTO FUNCTION s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz',
INSERT INTO FUNCTION s3(s3_mydata, filename = 'test_file.tsv.gz',
format = 'TSV', structure = 'number UInt64', compression_method = 'gzip')
SELECT * FROM numbers(10000);
SELECT count()
FROM s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz')
FROM s3(s3_mydata, filename = 'test_file.tsv.gz')
┌─count()─┐
│ 10000 │

View File

@ -1616,3 +1616,14 @@ Possible values:
Default value: `10000`.
## global_memory_usage_overcommit_max_wait_microseconds {#global_memory_usage_overcommit_max_wait_microseconds}
Sets maximum waiting time for global overcommit tracker.
Possible values:
- Positive integer.
Default value: `0`.

View File

@ -0,0 +1,31 @@
# Memory overcommit
Memory overcommit is an experimental technique intended to allow to set more flexible memory limits for queries.
The idea of this technique is to introduce settings which can represent guaranteed amount of memory a query can use.
When memory overcommit is enabled and the memory limit is reached ClickHouse will select the most overcommitted query and try to free memory by killing this query.
When memory limit is reached any query will wait some time during atempt to allocate new memory.
If timeout is passed and memory is freed, the query continues execution. Otherwise an exception will be thrown and the query is killed.
Selection of query to stop or kill is performed by either global or user overcommit trackers depending on what memory limit is reached.
## User overcommit tracker
User overcommit tracker finds a query with the biggest overcommit ratio in the user's query list.
Overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage` setting.
Waiting timeout is set by `memory_usage_overcommit_max_wait_microseconds` setting.
**Example**
```sql
SELECT number FROM numbers(1000) GROUP BY number SETTINGS max_guaranteed_memory_usage=4000, memory_usage_overcommit_max_wait_microseconds=500
```
## Global overcommit tracker
Global overcommit tracker finds a query with the biggest overcommit ratio in the list of all queries.
In this case overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage_for_user` setting.
Waiting timeout is set by `global_memory_usage_overcommit_max_wait_microseconds` parameter in the configuration file.

View File

@ -4220,10 +4220,36 @@ Possible values:
- 0 — Disabled.
- 1 — Enabled. The wait time equal shutdown_wait_unfinished config.
Default value: 0.
Default value: `0`.
## shutdown_wait_unfinished
The waiting time in seconds for currently handled connections when shutdown server.
Default Value: 5.
Default Value: `5`.
## max_guaranteed_memory_usage
Maximum guaranteed memory usage for processing of single query.
It represents soft limit in case when hard limit is reached on user level.
Zero means unlimited.
Read more about [memory overcommit](memory-overcommit.md).
Default value: `0`.
## memory_usage_overcommit_max_wait_microseconds
Maximum time thread will wait for memory to be freed in the case of memory overcommit on a user level.
If the timeout is reached and memory is not freed, an exception is thrown.
Read more about [memory overcommit](memory-overcommit.md).
Default value: `0`.
## max_guaranteed_memory_usage_for_user
Maximum guaranteed memory usage for processing all concurrently running queries for the user.
It represents soft limit in case when hard limit is reached on global level.
Zero means unlimited.
Read more about [memory overcommit](memory-overcommit.md).
Default value: `0`.

View File

@ -0,0 +1,48 @@
---
toc_priority: 108
---
# groupArraySorted {#groupArraySorted}
Returns an array with the first N items in ascending order.
``` sql
groupArraySorted(N)(column)
```
**Arguments**
- `N` The number of elements to return.
If the parameter is omitted, default value 10 is used.
**Arguments**
- `column` The value.
- `expr` — Optional. The field or expresion to sort by. If not set values are sorted by themselves.
**Example**
Gets the first 10 numbers:
``` sql
SELECT groupArraySorted(10)(number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number)─┐
│ [0,1,2,3,4,5,6,7,8,9] │
└──────────────────────────────┘
```
Or the last 10:
``` sql
SELECT groupArraySorted(10)(number, -number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number, negate(number))─┐
│ [99,98,97,96,95,94,93,92,91,90] │
└──────────────────────────────────────────────┘
```

View File

@ -35,6 +35,7 @@ ClickHouse-specific aggregate functions:
- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
- [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
- [groupArraySorted](../../../sql-reference/aggregate-functions/reference/grouparraysorted.md)
- [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md)
- [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md)
- [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md)

View File

@ -393,6 +393,13 @@ This is a generalization of other functions named `toStartOf*`. For example,
`toStartOfInterval(t, INTERVAL 1 day)` returns the same as `toStartOfDay(t)`,
`toStartOfInterval(t, INTERVAL 15 minute)` returns the same as `toStartOfFifteenMinutes(t)` etc.
## toLastDayOfMonth {#toLastDayOfMonth}
Rounds up a date or date with time to the last day of the month.
Returns the date.
Alias: `LAST_DAY`.
## toTime {#totime}
Converts a date with time to a certain fixed date, while preserving the time.

View File

@ -77,7 +77,7 @@ A function configuration contains the following settings:
- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number.
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
- `return_type` - the type of a returned value.
- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
- `return_name` - name of returned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.

View File

@ -2499,3 +2499,41 @@ Result:
│ 286 │
└──────────────────────────┘
```
## getTypeSerializationStreams {#getTypeSerializationStreams}
return the serialization streams of data type.
**Syntax**
``` sql
getTypeSerializationStreams(type_name)
getTypeSerializationStreams(column)
```
**Arguments**
- `type_name` - Name of data type to get its serialization paths. [String](../../sql-reference/data-types/string.md#string).
- `column` - any column which has a data type
**Returned value**
- List of serialization streams;
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
**Example**
Query:
``` sql
SELECT getTypeSerializationStreams('Array(Array(Int8))')
```
Result:
``` text
┌───────────────────────getTypeSerializationStreams('Array(Array(Int8))')─────────────────────────────┐
│ ['{ArraySizes}','{ArrayElements, ArraySizes}','{ArrayElements, ArrayElements, Regular}'] │
└─────────────────────────────────────────────────────────────────────────────────────────────────────┘
```

View File

@ -114,9 +114,9 @@ In addition, this column is not substituted when using an asterisk in a SELECT q
### EPHEMERAL {#ephemeral}
`EPHEMERAL expr`
`EPHEMERAL [expr]`
Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement.
Ephemeral column. Such a column isn't stored in the table and cannot be SELECTed, but can be referenced in the defaults of CREATE statement. If `expr` is omitted type for column is required.
INSERT without list of columns will skip such column, so SELECT/INSERT invariant is preserved - the dump obtained using `SELECT *` can be inserted back into the table using INSERT without specifying the list of columns.
### ALIAS {#alias}

View File

@ -273,7 +273,7 @@ GitHubのUIでforkリポジトリに移動します。 ブランチで開発し
プル要求は、作業がまだ完了していない場合でも作成できます。 この場合、単語を入れてください “WIP” (進行中の作業)タイトルの先頭に、それは後で変更することができます。 これは、変更の協調的なレビューと議論、および利用可能なすべてのテストの実行に役立ちます。 変更の簡単な説明を提供することが重要です。
Yandexの従業員がタグであなたのPRにラベルを付けるとすぐにテストが開始されます “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour.
ClickHouseの従業員がタグであなたのPRにラベルを付けるとすぐにテストが開始されます “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour.
システムは、プル要求用にClickHouseバイナリビルドを個別に準備します。 これらのビルドを取得するには “Details” 次のリンク “ClickHouse build check” 小切手のリストのエントリ。 そこには、ビルドへの直接リンクがあります。ClickHouseのdebパッケージは、本番サーバーにも展開できます恐れがない場合

View File

@ -72,11 +72,11 @@ ClickHouse не работает и не собирается на 32-битны
Этот вариант не подходит для отправки изменений на сервер. Вы можете временно его использовать, а затем добавить ssh ключи и заменить адрес репозитория с помощью команды `git remote`.
Вы можете также добавить для своего локального репозитория адрес оригинального репозитория Яндекса, чтобы притягивать оттуда обновления:
Вы можете также добавить для своего локального репозитория адрес оригинального репозитория, чтобы притягивать оттуда обновления:
git remote add upstream git@github.com:ClickHouse/ClickHouse.git
После этого, вы сможете добавлять в свой репозиторий обновления из репозитория Яндекса с помощью команды `git pull upstream master`.
После этого, вы сможете добавлять в свой репозиторий обновления из репозитория ClickHouse с помощью команды `git pull upstream master`.
### Работа с сабмодулями Git {#rabota-s-sabmoduliami-git}
@ -288,7 +288,7 @@ sudo ./llvm.sh 12
Pull request можно создать, даже если работа над задачей ещё не завершена. В этом случае, добавьте в его название слово «WIP» (work in progress). Название можно будет изменить позже. Это полезно для совместного просмотра и обсуждения изменений, а также для запуска всех имеющихся тестов. Введите краткое описание изменений - впоследствии, оно будет использовано для релизных changelog.
Тесты будут запущены, как только сотрудники Яндекса поставят для pull request тег «Can be tested». Результаты первых проверок (стиль кода) появятся уже через несколько минут. Результаты сборки появятся примерно через пол часа. Результаты основного набора тестов будут доступны в пределах часа.
Тесты будут запущены, как только сотрудники ClickHouse поставят для pull request тег «Can be tested». Результаты первых проверок (стиль кода) появятся уже через несколько минут. Результаты сборки появятся примерно через пол часа. Результаты основного набора тестов будут доступны в пределах часа.
Система подготовит сборки ClickHouse специально для вашего pull request. Для их получения, нажмите на ссылку «Details» у проверки «Clickhouse build check». Там вы сможете найти прямые ссылки на собранные .deb пакеты ClickHouse, которые, при желании, вы даже сможете установить на свои продакшен серверы (если не страшно).

View File

@ -678,7 +678,7 @@ TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y);
- `volume_name_N` — название тома. Названия томов должны быть уникальны.
- `disk` — диск, находящийся внутри тома.
- `max_data_part_size_bytes` — максимальный размер куска данных, который может находится на любом из дисков этого тома. Если в результате слияния размер куска ожидается больше, чем max_data_part_size_bytes, то этот кусок будет записан в следующий том. В основном эта функция позволяет хранить новые / мелкие куски на горячем (SSD) томе и перемещать их на холодный (HDD) том, когда они достигают большого размера. Не используйте этот параметр, если политика имеет только один том.
- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1).
- `move_factor` — доля доступного свободного места на томе, если места становится меньше, то данные начнут перемещение на следующий том, если он есть (по умолчанию 0.1). Для перемещения куски сортируются по размеру от большего к меньшему (по убыванию) и выбираются куски, совокупный размер которых достаточен для соблюдения условия `move_factor`, если совокупный размер всех партов недостаточен, будут перемещены все парты.
- `prefer_not_to_merge` — Отключает слияние кусков данных, хранящихся на данном томе. Если данная настройка включена, то слияние данных, хранящихся на данном томе, не допускается. Это позволяет контролировать работу ClickHouse с медленными дисками.
Примеры конфигураций:

View File

@ -110,9 +110,9 @@ SELECT x, toTypeName(x) FROM t1;
### EPHEMERAL {#ephemeral}
`EPHEMERAL expr`
`EPHEMERAL [expr]`
Эфемерное выражение. Такой столбец не хранится в таблице и не может быть получен в запросе SELECT, но на него можно ссылаться в выражениях по умолчанию запроса CREATE.
Эфемерное выражение. Такой столбец не хранится в таблице и не может быть получен в запросе SELECT, но на него можно ссылаться в выражениях по умолчанию запроса CREATE. Если значение по умолчанию `expr` не указано, то тип колонки должен быть специфицирован.
INSERT без списка столбцов игнорирует этот столбец, таким образом сохраняется инвариант - т.е. дамп, полученный путём `SELECT *`, можно вставить обратно в таблицу INSERT-ом без указания списка столбцов.
### ALIAS {#alias}

View File

@ -126,7 +126,7 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32)
**См. также**
- [Движок таблиц PostgreSQL](../../sql-reference/table-functions/postgresql.md)
- [Движок таблиц PostgreSQL](../../engines/table-engines/integrations/postgresql.md)
- [Использование PostgreSQL как источника данных для внешнего словаря](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
[Оригинальная статья](https://clickhouse.com/docs/ru/sql-reference/table-functions/postgresql/) <!--hide-->

View File

@ -16,7 +16,7 @@ jsmin==3.0.0
livereload==2.6.3
Markdown==3.3.2
MarkupSafe==2.1.0
mkdocs==1.1.2
mkdocs==1.3.0
mkdocs-htmlproofer-plugin==0.0.3
mkdocs-macros-plugin==0.4.20
nltk==3.7

View File

@ -259,7 +259,7 @@ ClickHouse的架构描述可以在此处查看https://clickhouse.com/docs/en/
即使工作尚未完成也可以创建拉取请求。在这种情况下请在标题的开头加上«WIP»正在进行中以便后续更改。这对于协同审查和讨论更改以及运行所有可用测试用例很有用。提供有关变更的简短描述很重要这将在后续用于生成重新发布变更日志。
Yandex成员一旦在您的拉取请求上贴上«可以测试»标签,就会开始测试。一些初始检查项(例如,代码类型)的结果会在几分钟内反馈。构建的检查结果将在半小时内完成。而主要的测试用例集结果将在一小时内报告给您。
ClickHouse成员一旦在您的拉取请求上贴上«可以测试»标签,就会开始测试。一些初始检查项(例如,代码类型)的结果会在几分钟内反馈。构建的检查结果将在半小时内完成。而主要的测试用例集结果将在一小时内报告给您。
系统将分别为您的拉取请求准备ClickHouse二进制版本。若要检索这些构建信息请在检查列表中单击« ClickHouse构建检查»旁边的«详细信息»链接。在这里您会找到指向ClickHouse的.deb软件包的直接链接此外甚至可以将其部署在生产服务器上如果您不担心

View File

@ -140,7 +140,7 @@ CREATE TABLE test.test_orc
`f_array_array_float` Array(Array(Float32)),
`day` String
)
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc')
PARTITION BY day
```

View File

@ -8,7 +8,7 @@ toc_title: "版本折叠MergeTree"
这个引擎:
- 允许快速写入不断变化的对象状态。
- 删除后台中的旧对象状态。 这显降低了存储体积。
- 删除后台中的旧对象状态。 这显降低了存储体积。
请参阅部分 [崩溃](#table_engines_versionedcollapsingmergetree) 有关详细信息。

View File

@ -21,8 +21,12 @@ description: |
This package contains the debugging symbols for clickhouse-common.
contents:
- src: root/usr/lib/debug
dst: /usr/lib/debug
- src: root/usr/lib/debug/usr/bin/clickhouse.debug
dst: /usr/lib/debug/usr/bin/clickhouse.debug
- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS

View File

@ -0,0 +1,28 @@
# package sources should be placed in ${PWD}/root
# nfpm should run from the same directory with a config
name: "clickhouse-keeper-dbg"
arch: "${DEB_ARCH}" # amd64, arm64
platform: "linux"
version: "${CLICKHOUSE_VERSION_STRING}"
vendor: "ClickHouse Inc."
homepage: "https://clickhouse.com"
license: "Apache"
section: "database"
priority: "optional"
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
description: |
debugging symbols for clickhouse-keeper
This package contains the debugging symbols for clickhouse-keeper.
contents:
- src: root/usr/lib/debug/usr/bin/clickhouse-keeper.debug
dst: /usr/lib/debug/usr/bin/clickhouse-keeper.debug
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-keeper-dbg/AUTHORS
- src: ../CHANGELOG.md
dst: /usr/share/doc/clickhouse-keeper-dbg/CHANGELOG.md
- src: ../LICENSE
dst: /usr/share/doc/clickhouse-keeper-dbg/LICENSE
- src: ../README.md
dst: /usr/share/doc/clickhouse-keeper-dbg/README.md

View File

@ -0,0 +1,40 @@
# package sources should be placed in ${PWD}/root
# nfpm should run from the same directory with a config
name: "clickhouse-keeper"
arch: "${DEB_ARCH}" # amd64, arm64
platform: "linux"
version: "${CLICKHOUSE_VERSION_STRING}"
vendor: "ClickHouse Inc."
homepage: "https://clickhouse.com"
license: "Apache"
section: "database"
priority: "optional"
conflicts:
- clickhouse-server
depends:
- adduser
suggests:
- clickhouse-keeper-dbg
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
description: |
Static clickhouse-keeper binary
A stand-alone clickhouse-keeper package
contents:
- src: root/etc/clickhouse-keeper
dst: /etc/clickhouse-keeper
type: config
- src: root/usr/bin/clickhouse-keeper
dst: /usr/bin/clickhouse-keeper
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-keeper/AUTHORS
- src: ../CHANGELOG.md
dst: /usr/share/doc/clickhouse-keeper/CHANGELOG.md
- src: ../LICENSE
dst: /usr/share/doc/clickhouse-keeper/LICENSE
- src: ../README.md
dst: /usr/share/doc/clickhouse-keeper/README.md

View File

@ -20,7 +20,7 @@ ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml
EnvironmentFile=-/etc/default/clickhouse
LimitCORE=infinity
LimitNOFILE=500000
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
[Install]
# ClickHouse should not start from the rescue shell (rescue.target).

View File

@ -473,18 +473,11 @@ else ()
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT})
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
endif()
if (NOT INSTALL_STRIPPED_BINARIES)
# Install dunny debug directory
# TODO: move logic to every place where clickhouse_strip_binary is used
add_custom_command(TARGET clickhouse POST_BUILD COMMAND echo > .empty )
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/.empty" DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/.empty)
endif()
if (ENABLE_TESTS)
set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms)
add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS})

View File

@ -163,10 +163,24 @@ void Client::initialize(Poco::Util::Application & self)
configReadClient(config(), home_path);
/** getenv is thread-safe in Linux glibc and in all sane libc implementations.
* But the standard does not guarantee that subsequent calls will not rewrite the value by returned pointer.
*
* man getenv:
*
* As typically implemented, getenv() returns a pointer to a string within the environment list.
* The caller must take care not to modify this string, since that would change the environment of
* the process.
*
* The implementation of getenv() is not required to be reentrant. The string pointed to by the return value of getenv()
* may be statically allocated, and can be modified by a subsequent call to getenv(), putenv(3), setenv(3), or unsetenv(3).
*/
const char * env_user = getenv("CLICKHOUSE_USER");
const char * env_password = getenv("CLICKHOUSE_PASSWORD");
if (env_user)
config().setString("user", env_user);
const char * env_password = getenv("CLICKHOUSE_PASSWORD");
if (env_password)
config().setString("password", env_password);
@ -810,7 +824,7 @@ void Client::addOptions(OptionsDescription & options_description)
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
("max_client_network_bandwidth", po::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.")
("compression", po::value<bool>(), "enable or disable compression")
("compression", po::value<bool>(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).")
("query-fuzzer-runs", po::value<int>()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.")
("interleave-queries-file", po::value<std::vector<std::string>>()->multitoken(),

View File

@ -71,17 +71,11 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecDelta.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecDoubleDelta.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecGorilla.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecT64.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/getCompressionCodecForFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp
@ -137,5 +131,10 @@ if (BUILD_STANDALONE_KEEPER)
add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-keeper)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
endif()

View File

@ -27,5 +27,6 @@ set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECT
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()

View File

@ -184,6 +184,11 @@ void LocalServer::tryInitPath()
if (path.back() != '/')
path += '/';
fs::create_directories(fs::path(path) / "user_defined/");
fs::create_directories(fs::path(path) / "data/");
fs::create_directories(fs::path(path) / "metadata/");
fs::create_directories(fs::path(path) / "metadata_dropped/");
global_context->setPath(path);
global_context->setTemporaryStorage(path + "tmp");
@ -565,7 +570,6 @@ void LocalServer::processConfig()
/// Lock path directory before read
status.emplace(fs::path(path) / "status", StatusFile::write_full_info);
fs::create_directories(fs::path(path) / "user_defined/");
LOG_DEBUG(log, "Loading user defined objects from {}", path);
Poco::File(path + "user_defined/").createDirectories();
UserDefinedSQLObjectsLoader::instance().loadObjects(global_context);
@ -573,9 +577,6 @@ void LocalServer::processConfig()
LOG_DEBUG(log, "Loaded user defined objects.");
LOG_DEBUG(log, "Loading metadata from {}", path);
fs::create_directories(fs::path(path) / "data/");
fs::create_directories(fs::path(path) / "metadata/");
loadMetadataSystem(global_context);
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));

View File

@ -42,6 +42,7 @@ endif()
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()

View File

@ -20,6 +20,7 @@
#include <base/phdr_cache.h>
#include <base/ErrorHandlers.h>
#include <base/getMemoryAmount.h>
#include <base/getAvailableMemoryAmount.h>
#include <base/errnoToString.h>
#include <base/coverage.h>
#include <base/getFQDNOrHostName.h>
@ -45,6 +46,7 @@
#include <Core/ServerUUID.h>
#include <IO/HTTPCommon.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/IOThreadPool.h>
#include <IO/UseSSL.h>
#include <Interpreters/AsynchronousMetrics.h>
@ -80,6 +82,7 @@
#include <Common/SensitiveDataMasker.h>
#include <Common/ThreadFuzzer.h>
#include <Common/getHashOfLoadedBinary.h>
#include <Common/filesystemHelpers.h>
#include <Common/Elf.h>
#include <Server/MySQLHandlerFactory.h>
#include <Server/PostgreSQLHandlerFactory.h>
@ -505,6 +508,101 @@ void checkForUsersNotInMainConfig(
}
}
/// Unused in other builds
#if defined(OS_LINUX)
static String readString(const String & path)
{
ReadBufferFromFile in(path);
String contents;
readStringUntilEOF(contents, in);
return contents;
}
static int readNumber(const String & path)
{
ReadBufferFromFile in(path);
int result;
readText(result, in);
return result;
}
#endif
static void sanityChecks(Server * server)
{
std::string data_path = getCanonicalPath(server->config().getString("path", DBMS_DEFAULT_PATH));
std::string logs_path = server->config().getString("logger.log", "");
#if defined(OS_LINUX)
try
{
if (readString("/sys/devices/system/clocksource/clocksource0/current_clocksource").find("tsc") == std::string::npos)
server->context()->addWarningMessage("Linux is not using fast TSC clock source. Performance can be degraded.");
}
catch (...)
{
}
try
{
if (readNumber("/proc/sys/vm/overcommit_memory") == 2)
server->context()->addWarningMessage("Linux memory overcommit is disabled.");
}
catch (...)
{
}
try
{
if (readString("/sys/kernel/mm/transparent_hugepage/enabled").find("[always]") != std::string::npos)
server->context()->addWarningMessage("Linux transparent hugepage are set to \"always\".");
}
catch (...)
{
}
try
{
if (readNumber("/proc/sys/kernel/pid_max") < 30000)
server->context()->addWarningMessage("Linux max PID is too low.");
}
catch (...)
{
}
try
{
if (readNumber("/proc/sys/kernel/threads-max") < 30000)
server->context()->addWarningMessage("Linux threads max count is too low.");
}
catch (...)
{
}
std::string dev_id = getBlockDeviceId(data_path);
if (getBlockDeviceType(dev_id) == BlockDeviceType::ROT && getBlockDeviceReadAheadBytes(dev_id) == 0)
server->context()->addWarningMessage("Rotational disk with disabled readahead is in use. Performance can be degraded.");
#endif
try
{
if (getAvailableMemoryAmount() < (2l << 30))
server->context()->addWarningMessage("Available memory at server startup is too low (2GiB).");
if (!enoughSpaceInDirectory(data_path, 1ull << 30))
server->context()->addWarningMessage("Available disk space at server startup is too low (1GiB).");
if (!logs_path.empty())
{
if (!enoughSpaceInDirectory(fs::path(logs_path).parent_path(), 1ull << 30))
server->context()->addWarningMessage("Available disk space at server startup is too low (1GiB).");
}
}
catch (...)
{
}
}
int Server::main(const std::vector<std::string> & /*args*/)
{
Poco::Logger * log = &logger();
@ -538,13 +636,14 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->addWarningMessage("Server was built in debug mode. It will work slowly.");
#endif
if (ThreadFuzzer::instance().isEffective())
global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable.");
if (ThreadFuzzer::instance().isEffective())
global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable.");
#if defined(SANITIZER)
global_context->addWarningMessage("Server was built with sanitizer. It will work slowly.");
#endif
sanityChecks(this);
// Initialize global thread pool. Do it before we fetch configs from zookeeper
// nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
@ -766,6 +865,38 @@ if (ThreadFuzzer::instance().isEffective())
}
}
/// Try to increase limit on number of threads.
{
rlimit rlim;
if (getrlimit(RLIMIT_NPROC, &rlim))
throw Poco::Exception("Cannot getrlimit");
if (rlim.rlim_cur == rlim.rlim_max)
{
LOG_DEBUG(log, "rlimit on number of threads is {}", rlim.rlim_cur);
}
else
{
rlim_t old = rlim.rlim_cur;
rlim.rlim_cur = rlim.rlim_max;
int rc = setrlimit(RLIMIT_NPROC, &rlim);
if (rc != 0)
{
LOG_WARNING(log, "Cannot set max number of threads to {}. error: {}", rlim.rlim_cur, strerror(errno));
rlim.rlim_cur = old;
}
else
{
LOG_DEBUG(log, "Set max number of threads to {} (was {}).", rlim.rlim_cur, old);
}
}
if (rlim.rlim_cur < 30000)
{
global_context->addWarningMessage("Maximum number of threads is lower than 30000. There could be problems with handling a lot of simultaneous queries.");
}
}
static ServerErrorHandler error_handler;
Poco::ErrorHandler::set(&error_handler);
@ -829,6 +960,36 @@ if (ThreadFuzzer::instance().isEffective())
fs::create_directories(path / "metadata_dropped/");
}
#if USE_ROCKSDB
/// Initialize merge tree metadata cache
if (config().has("merge_tree_metadata_cache"))
{
fs::create_directories(path / "rocksdb/");
size_t size = config().getUInt64("merge_tree_metadata_cache.lru_cache_size", 256 << 20);
bool continue_if_corrupted = config().getBool("merge_tree_metadata_cache.continue_if_corrupted", false);
try
{
LOG_DEBUG(
log, "Initiailizing merge tree metadata cache lru_cache_size:{} continue_if_corrupted:{}", size, continue_if_corrupted);
global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size);
}
catch (...)
{
if (continue_if_corrupted)
{
/// Rename rocksdb directory and reinitialize merge tree metadata cache
time_t now = time(nullptr);
fs::rename(path / "rocksdb", path / ("rocksdb.old." + std::to_string(now)));
global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size);
}
else
{
throw;
}
}
}
#endif
if (config().has("interserver_http_port") && config().has("interserver_https_port"))
throw Exception("Both http and https interserver ports are specified", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);

View File

@ -148,13 +148,13 @@
<!-- <interserver_https_port>9010</interserver_https_port> -->
<!-- Hostname that is used by other replicas to request this server.
If not specified, than it is determined analogous to 'hostname -f' command.
If not specified, then it is determined analogous to 'hostname -f' command.
This setting could be used to switch replication to another network interface
(the server may be connected to multiple networks via multiple addresses)
-->
<!--
<interserver_http_host>example.yandex.ru</interserver_http_host>
<interserver_http_host>example.clickhouse.com</interserver_http_host>
-->
<!-- You can specify credentials for authenthication between replicas.
@ -765,14 +765,14 @@
-->
<!--<remote_url_allow_hosts>-->
<!-- Host should be specified exactly as in URL. The name is checked before DNS resolution.
Example: "yandex.ru", "yandex.ru." and "www.yandex.ru" are different hosts.
Example: "clickhouse.com", "clickhouse.com." and "www.clickhouse.com" are different hosts.
If port is explicitly specified in URL, the host:port is checked as a whole.
If host specified here without port, any port with this host allowed.
"yandex.ru" -> "yandex.ru:443", "yandex.ru:80" etc. is allowed, but "yandex.ru:80" -> only "yandex.ru:80" is allowed.
"clickhouse.com" -> "clickhouse.com:443", "clickhouse.com:80" etc. is allowed, but "clickhouse.com:80" -> only "clickhouse.com:80" is allowed.
If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]".
If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked.
Host should be specified using the host xml tag:
<host>yandex.ru</host>
<host>clickhouse.com</host>
-->
<!-- Regular expression can be specified. RE2 engine is used for regexps.
@ -1030,25 +1030,17 @@
<flush_interval_milliseconds>1000</flush_interval_milliseconds>
</crash_log>
<!-- Session log. Stores user log in (successful or not) and log out events. -->
<session_log>
<!-- Session log. Stores user log in (successful or not) and log out events.
Note: session log has known security issues and should not be used in production.
-->
<!-- <session_log>
<database>system</database>
<table>session_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</session_log>
<!-- Parameters for embedded dictionaries, used in Yandex.Metrica.
See https://clickhouse.com/docs/en/dicts/internal_dicts/
-->
<!-- Path to file with region hierarchy. -->
<!-- <path_to_regions_hierarchy_file>/opt/geo/regions_hierarchy.txt</path_to_regions_hierarchy_file> -->
<!-- Path to directory with files containing names of regions -->
<!-- <path_to_regions_names_files>/opt/geo/</path_to_regions_names_files> -->
</session_log> -->
<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
<!-- Custom TLD lists.
@ -1294,4 +1286,10 @@
</tables>
</rocksdb>
-->
<!-- Uncomment if enable merge tree metadata cache -->
<merge_tree_metadata_cache>
<lru_cache_size>268435456</lru_cache_size>
<continue_if_corrupted>true</continue_if_corrupted>
</merge_tree_metadata_cache>
</clickhouse>

View File

@ -103,7 +103,7 @@ interserver_http_port: 9009
# If not specified, than it is determined analogous to 'hostname -f' command.
# This setting could be used to switch replication to another network interface
# (the server may be connected to multiple networks via multiple addresses)
# interserver_http_host: example.yandex.ru
# interserver_http_host: example.clickhouse.com
# You can specify credentials for authenthication between replicas.
# This is required when interserver_https_port is accessible from untrusted networks,
@ -592,10 +592,10 @@ remote_servers:
# remote_url_allow_hosts:
# Host should be specified exactly as in URL. The name is checked before DNS resolution.
# Example: "yandex.ru", "yandex.ru." and "www.yandex.ru" are different hosts.
# Example: "clickhouse.com", "clickhouse.com." and "www.clickhouse.com" are different hosts.
# If port is explicitly specified in URL, the host:port is checked as a whole.
# If host specified here without port, any port with this host allowed.
# "yandex.ru" -> "yandex.ru:443", "yandex.ru:80" etc. is allowed, but "yandex.ru:80" -> only "yandex.ru:80" is allowed.
# "clickhouse.com" -> "clickhouse.com:443", "clickhouse.com:80" etc. is allowed, but "clickhouse.com:80" -> only "clickhouse.com:80" is allowed.
# If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]".
# If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked.
@ -803,16 +803,6 @@ crash_log:
partition_by: ''
flush_interval_milliseconds: 1000
# Parameters for embedded dictionaries, used in Yandex.Metrica.
# See https://clickhouse.com/docs/en/dicts/internal_dicts/
# Path to file with region hierarchy.
# path_to_regions_hierarchy_file: /opt/geo/regions_hierarchy.txt
# Path to directory with files containing names of regions
# path_to_regions_names_files: /opt/geo/
# top_level_domains_path: /var/lib/clickhouse/top_level_domains/
# Custom TLD lists.
# Format: name: /path/to/file

View File

@ -266,12 +266,25 @@
color: var(--null-color);
}
@keyframes hourglass-animation {
0% {
transform: rotate(-180deg);
}
50% {
transform: rotate(-180deg);
}
100% {
transform: none;
}
}
#hourglass
{
display: none;
padding-left: 1rem;
margin-left: 1rem;
font-size: 110%;
color: #888;
animation: hourglass-animation 1s linear infinite;
}
#check-mark
@ -457,7 +470,7 @@
}
document.getElementById('check-mark').style.display = 'none';
document.getElementById('hourglass').style.display = 'inline';
document.getElementById('hourglass').style.display = 'inline-block';
xhr.send(query);
}

View File

@ -79,9 +79,9 @@
Each element of list has one of the following forms:
<ip> IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0
2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::.
<host> Hostname. Example: server01.yandex.ru.
<host> Hostname. Example: server01.clickhouse.com.
To check access, DNS query is performed, and all received addresses compared to peer address.
<host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$
<host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.clickhouse\.com$
To check access, DNS PTR query is performed for peer address and then regexp is applied.
Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
Strongly recommended that regexp is ends with $

View File

@ -70,9 +70,9 @@ users:
# Each element of list has one of the following forms:
# ip: IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0
# 2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::.
# host: Hostname. Example: server01.yandex.ru.
# host: Hostname. Example: server01.clickhouse.com.
# To check access, DNS query is performed, and all received addresses compared to peer address.
# host_regexp: Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$
# host_regexp: Regular expression for host names. Example, ^server\d\d-\d\d-\d\.clickhouse\.com$
# To check access, DNS PTR query is performed for peer address and then regexp is applied.
# Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
# Strongly recommended that regexp is ends with $ and take all expression in ''

View File

@ -182,6 +182,7 @@ enum class AccessType
M(JDBC, "", GLOBAL, SOURCES) \
M(HDFS, "", GLOBAL, SOURCES) \
M(S3, "", GLOBAL, SOURCES) \
M(HIVE, "", GLOBAL, SOURCES) \
M(SOURCES, "", GROUP, ALL) \
\
M(ALL, "ALL PRIVILEGES", GROUP, NONE) /* full access */ \

View File

@ -107,6 +107,11 @@ const QuotaTypeInfo & QuotaTypeInfo::get(QuotaType type)
static const auto info = make_info("EXECUTION_TIME", 1000000000 /* execution_time is stored in nanoseconds */);
return info;
}
case QuotaType::WRITTEN_BYTES:
{
static const auto info = make_info("WRITTEN_BYTES", 1);
return info;
}
case QuotaType::MAX: break;
}
throw Exception("Unexpected quota type: " + std::to_string(static_cast<int>(type)), ErrorCodes::LOGICAL_ERROR);

View File

@ -13,13 +13,14 @@ enum class QuotaType
{
QUERIES, /// Number of queries.
QUERY_SELECTS, /// Number of select queries.
QUERY_INSERTS, /// Number of inserts queries.
QUERY_INSERTS, /// Number of insert queries.
ERRORS, /// Number of queries with exceptions.
RESULT_ROWS, /// Number of rows returned as result.
RESULT_BYTES, /// Number of bytes returned as result.
READ_ROWS, /// Number of rows read from tables.
READ_BYTES, /// Number of bytes read from tables.
EXECUTION_TIME, /// Total amount of query execution time in nanoseconds.
WRITTEN_BYTES, /// Number of bytes written to tables.
MAX
};

View File

@ -13,7 +13,7 @@ namespace DB
{
namespace ErrorCodes
{
extern const int QUOTA_EXPIRED;
extern const int QUOTA_EXCEEDED;
}
@ -33,7 +33,7 @@ struct EnabledQuota::Impl
"Quota for user " + backQuote(user_name) + " for " + to_string(duration) + " has been exceeded: "
+ type_info.valueToStringWithName(used) + "/" + type_info.valueToString(max) + ". "
+ "Interval will end at " + to_string(end_of_interval) + ". " + "Name of quota template: " + backQuote(quota_name),
ErrorCodes::QUOTA_EXPIRED);
ErrorCodes::QUOTA_EXCEEDED);
}

View File

@ -0,0 +1,147 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionGroupArraySorted.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeString.h>
#include <Common/FieldVisitorConvertToNumber.h>
static inline constexpr UInt64 GROUP_SORTED_ARRAY_MAX_SIZE = 0xFFFFFF;
static inline constexpr UInt64 GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD = 10;
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySortedNumeric : public AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>
{
using AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>::AggregateFunctionGroupArraySorted;
};
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySortedFieldType
: public AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>
{
using AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>::
AggregateFunctionGroupArraySorted;
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<T>()); }
};
template <template <typename, bool, typename, bool> class AggregateFunctionTemplate, typename TColumnA, bool expr_sorted, typename TColumnB, bool is_plain_b, typename... TArgs>
AggregateFunctionPtr
createAggregateFunctionGroupArraySortedTypedFinal(TArgs && ... args)
{
return AggregateFunctionPtr(new AggregateFunctionTemplate<TColumnA, expr_sorted, TColumnB, is_plain_b>(std::forward<TArgs>(args)...));
}
template <bool expr_sorted = false, typename TColumnB = UInt64, bool is_plain_b = false>
AggregateFunctionPtr
createAggregateFunctionGroupArraySortedTyped(const DataTypes & argument_types, const Array & params, UInt64 threshold)
{
#define DISPATCH(A, C, B) \
if (which.idx == TypeIndex::A) \
return createAggregateFunctionGroupArraySortedTypedFinal<C, B, expr_sorted, TColumnB, is_plain_b>(threshold, argument_types, params);
#define DISPATCH_NUMERIC(A) DISPATCH(A, AggregateFunctionGroupArraySortedNumeric, A)
WhichDataType which(argument_types[0]);
FOR_NUMERIC_TYPES(DISPATCH_NUMERIC)
DISPATCH(Enum8, AggregateFunctionGroupArraySortedNumeric, Int8)
DISPATCH(Enum16, AggregateFunctionGroupArraySortedNumeric, Int16)
DISPATCH(Date, AggregateFunctionGroupArraySortedFieldType, DataTypeDate)
DISPATCH(DateTime, AggregateFunctionGroupArraySortedFieldType, DataTypeDateTime)
#undef DISPATCH
#undef DISPATCH_NUMERIC
if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
{
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, true, expr_sorted, TColumnB, is_plain_b>(
threshold, argument_types, params));
}
else
{
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, false, expr_sorted, TColumnB, is_plain_b>(
threshold, argument_types, params));
}
}
AggregateFunctionPtr createAggregateFunctionGroupArraySorted(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
UInt64 threshold = GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD;
if (params.size() == 1)
{
UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
if (k > GROUP_SORTED_ARRAY_MAX_SIZE)
throw Exception(
"Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(GROUP_SORTED_ARRAY_MAX_SIZE),
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
if (k == 0)
throw Exception("Parameter 0 is illegal for aggregate function " + name, ErrorCodes::ARGUMENT_OUT_OF_BOUND);
threshold = k;
}
else if (!params.empty())
{
throw Exception("Aggregate function " + name + " only supports 1 parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (argument_types.size() == 2)
{
if (isNumber(argument_types[1]))
{
#define DISPATCH2(A, B) \
if (which.idx == TypeIndex::A) \
return createAggregateFunctionGroupArraySortedTyped<true, B>(argument_types, params, threshold);
#define DISPATCH(A) DISPATCH2(A, A)
WhichDataType which(argument_types[1]);
FOR_NUMERIC_TYPES(DISPATCH)
DISPATCH2(Enum8, Int8)
DISPATCH2(Enum16, Int16)
#undef DISPATCH
#undef DISPATCH2
throw Exception("Invalid parameter type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
else if (argument_types[1]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
{
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, true>(argument_types, params, threshold);
}
else
{
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, false>(argument_types, params, threshold);
}
}
else if (argument_types.size() == 1)
{
return createAggregateFunctionGroupArraySortedTyped<>(argument_types, params, threshold);
}
else
{
throw Exception(
"Aggregate function " + name + " requires one or two parameters.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
}
}
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
factory.registerFunction("groupArraySorted", {createAggregateFunctionGroupArraySorted, properties});
}
}

View File

@ -0,0 +1,310 @@
#pragma once
#include <Columns/ColumnArray.h>
#include <DataTypes/DataTypeArray.h>
#include <AggregateFunctions/AggregateFunctionGroupArraySortedData.h>
#include <AggregateFunctions/IAggregateFunction.h>
namespace DB
{
template <typename TColumn, bool is_plain>
inline TColumn readItem(const IColumn * column, Arena * arena, size_t row)
{
if constexpr (std::is_same_v<TColumn, StringRef>)
{
if constexpr (is_plain)
{
StringRef str = column->getDataAt(row);
auto ptr = arena->alloc(str.size);
std::copy(str.data, str.data + str.size, ptr);
return StringRef(ptr, str.size);
}
else
{
const char * begin = nullptr;
return column->serializeValueIntoArena(row, *arena, begin);
}
}
else
{
if constexpr (std::is_same_v<TColumn, UInt64>)
return column->getUInt(row);
else
return column->getInt(row);
}
}
template <typename TColumn, typename TFilter = void>
size_t
getFirstNElements_low_threshold(const TColumn * data, int num_elements, int threshold, size_t * results, const TFilter * filter = nullptr)
{
for (int i = 0; i < threshold; i++)
{
results[i] = 0;
}
threshold = std::min(num_elements, threshold);
int current_max = 0;
int cur;
int z;
for (int i = 0; i < num_elements; i++)
{
if constexpr (!std::is_same_v<TFilter, void>)
{
if (filter[i] == 0)
continue;
}
//Starting from the highest values and we look for the immediately lower than the given one
for (cur = current_max; cur > 0; cur--)
{
if (data[i] > data[results[cur - 1]])
break;
}
if (cur < threshold)
{
//Move all the higher values 1 position to the right
for (z = std::min(threshold - 1, current_max); z > cur; z--)
results[z] = results[z - 1];
if (current_max < threshold)
++current_max;
//insert element into the given position
results[cur] = i;
}
}
return current_max;
}
template <typename T>
struct SortableItem
{
T a;
size_t b;
bool operator<(const SortableItem & other) const { return (this->a < other.a); }
};
template <typename TColumn, typename TFilter = void>
size_t getFirstNElements_high_threshold(
const TColumn * data, size_t num_elements, size_t threshold, size_t * results, const TFilter * filter = nullptr)
{
std::vector<SortableItem<TColumn>> dataIndexed(num_elements);
size_t num_elements_filtered = 0;
for (size_t i = 0; i < num_elements; i++)
{
if constexpr (!std::is_same_v<TFilter, void>)
{
if (filter[i] == 0)
continue;
}
dataIndexed.data()[num_elements_filtered].a = data[i];
dataIndexed.data()[num_elements_filtered].b = i;
num_elements_filtered++;
}
threshold = std::min(num_elements_filtered, threshold);
std::nth_element(dataIndexed.data(), dataIndexed.data() + threshold, dataIndexed.data() + num_elements_filtered);
std::sort(dataIndexed.data(), dataIndexed.data() + threshold);
for (size_t i = 0; i < threshold; i++)
{
results[i] = dataIndexed[i].b;
}
return threshold;
}
static const size_t THRESHOLD_MAX_CUSTOM_FUNCTION = 1000;
template <typename TColumn>
size_t getFirstNElements(const TColumn * data, size_t num_elements, size_t threshold, size_t * results, const UInt8 * filter = nullptr)
{
if (threshold < THRESHOLD_MAX_CUSTOM_FUNCTION)
{
if (filter != nullptr)
return getFirstNElements_low_threshold(data, num_elements, threshold, results, filter);
else
return getFirstNElements_low_threshold(data, num_elements, threshold, results);
}
else
{
if (filter != nullptr)
return getFirstNElements_high_threshold(data, num_elements, threshold, results, filter);
else
return getFirstNElements_high_threshold(data, num_elements, threshold, results);
}
}
template <typename TColumnA, bool is_plain_a, bool use_column_b, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySorted : public IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted<TColumnA, is_plain_a, use_column_b, TColumnB, is_plain_b>>
{
protected:
using State = AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>;
using Base = IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted>;
UInt64 threshold;
DataTypePtr & input_data_type;
mutable std::mutex mutex;
static void deserializeAndInsert(StringRef str, IColumn & data_to);
public:
AggregateFunctionGroupArraySorted(UInt64 threshold_, const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted>(argument_types_, params)
, threshold(threshold_)
, input_data_type(this->argument_types[0])
{
}
void create(AggregateDataPtr place) const override
{
Base::create(place);
this->data(place).threshold = threshold;
}
String getName() const override { return "groupArraySorted"; }
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(input_data_type); }
bool allocatesMemoryInArena() const override
{
if constexpr (std::is_same_v<TColumnA, StringRef>)
return true;
else
return false;
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
State & data = this->data(place);
if constexpr (use_column_b)
{
data.add(
readItem<TColumnA, is_plain_a>(columns[0], arena, row_num), readItem<TColumnB, is_plain_b>(columns[1], arena, row_num));
}
else
{
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row_num));
}
}
template <typename TColumn, bool is_plain, typename TFunc>
void
forFirstRows(size_t batch_size, const IColumn ** columns, size_t data_column, Arena * arena, ssize_t if_argument_pos, TFunc func) const
{
const TColumn * values = nullptr;
std::unique_ptr<std::vector<TColumn>> values_vector;
std::vector<size_t> best_rows(threshold);
if constexpr (std::is_same_v<TColumn, StringRef>)
{
values_vector.reset(new std::vector<TColumn>(batch_size));
for (size_t i = 0; i < batch_size; i++)
(*values_vector)[i] = readItem<TColumn, is_plain>(columns[data_column], arena, i);
values = (*values_vector).data();
}
else
{
const auto & column = assert_cast<const ColumnVector<TColumn> &>(*columns[data_column]);
values = column.getData().data();
}
const UInt8 * filter = nullptr;
StringRef refFilter;
if (if_argument_pos >= 0)
{
refFilter = columns[if_argument_pos]->getRawData();
filter = reinterpret_cast<const UInt8 *>(refFilter.data);
}
size_t num_elements = getFirstNElements(values, batch_size, threshold, best_rows.data(), filter);
for (size_t i = 0; i < num_elements; i++)
{
func(best_rows[i], values);
}
}
void addBatchSinglePlace(
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos) const override
{
State & data = this->data(place);
if constexpr (use_column_b)
{
forFirstRows<TColumnB, is_plain_b>(
batch_size, columns, 1, arena, if_argument_pos, [columns, &arena, &data](size_t row, const TColumnB * values)
{
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row), values[row]);
});
}
else
{
forFirstRows<TColumnA, is_plain_a>(
batch_size, columns, 0, arena, if_argument_pos, [&data](size_t row, const TColumnA * values)
{
data.add(values[row]);
});
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).serialize(buf);
}
void
deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).deserialize(buf, arena);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
{
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
auto & values = this->data(place).values;
offsets_to.push_back(offsets_to.back() + values.size());
IColumn & data_to = arr_to.getData();
for (auto value : values)
{
if constexpr (std::is_same_v<TColumnA, StringRef>)
{
auto str = State::itemValue(value);
if constexpr (is_plain_a)
{
data_to.insertData(str.data, str.size);
}
else
{
data_to.deserializeAndInsertFromArena(str.data);
}
}
else
{
data_to.insert(State::itemValue(value));
}
}
}
};
}

View File

@ -0,0 +1,162 @@
#pragma once
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/VarInt.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
static inline constexpr UInt64 GROUP_SORTED_DEFAULT_THRESHOLD = 0xFFFFFF;
namespace DB
{
template <typename T>
static void writeOneItem(WriteBuffer & buf, T item)
{
if constexpr (std::numeric_limits<T>::is_signed)
{
writeVarInt(item, buf);
}
else
{
writeVarUInt(item, buf);
}
}
static void writeOneItem(WriteBuffer & buf, const StringRef & item)
{
writeBinary(item, buf);
}
template <typename T>
static void readOneItem(ReadBuffer & buf, Arena * /*arena*/, T & item)
{
if constexpr (std::numeric_limits<T>::is_signed)
{
DB::Int64 val;
readVarT(val, buf);
item = val;
}
else
{
DB::UInt64 val;
readVarT(val, buf);
item = val;
}
}
static void readOneItem(ReadBuffer & buf, Arena * arena, StringRef & item)
{
item = readStringBinaryInto(*arena, buf);
}
template <typename Storage>
struct AggregateFunctionGroupArraySortedDataBase
{
typedef typename Storage::value_type ValueType;
AggregateFunctionGroupArraySortedDataBase(UInt64 threshold_ = GROUP_SORTED_DEFAULT_THRESHOLD) : threshold(threshold_) { }
virtual ~AggregateFunctionGroupArraySortedDataBase() { }
inline void narrowDown()
{
while (values.size() > threshold)
values.erase(--values.end());
}
void merge(const AggregateFunctionGroupArraySortedDataBase & other)
{
values.merge(Storage(other.values));
narrowDown();
}
void serialize(WriteBuffer & buf) const
{
writeOneItem(buf, UInt64(values.size()));
for (auto value : values)
{
serializeItem(buf, value);
}
}
virtual void serializeItem(WriteBuffer & buf, ValueType & val) const = 0;
virtual ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const = 0;
void deserialize(ReadBuffer & buf, Arena * arena)
{
values.clear();
UInt64 length;
readOneItem(buf, nullptr, length);
while (length--)
{
values.insert(deserializeItem(buf, arena));
}
narrowDown();
}
UInt64 threshold;
Storage values;
};
template <typename T, bool expr_sorted, typename TIndex>
struct AggregateFunctionGroupArraySortedData
{
};
template <typename T, typename TIndex>
struct AggregateFunctionGroupArraySortedData<T, true, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>
{
using Base = AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>;
using Base::Base;
void add(T item, TIndex weight)
{
Base::values.insert({weight, item});
Base::narrowDown();
}
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override
{
writeOneItem(buf, value.first);
writeOneItem(buf, value.second);
}
virtual typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
{
TIndex first;
T second;
readOneItem(buf, arena, first);
readOneItem(buf, arena, second);
return {first, second};
}
static T itemValue(typename Base::ValueType & value) { return value.second; }
};
template <typename T, typename TIndex>
struct AggregateFunctionGroupArraySortedData<T, false, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>
{
using Base = AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>;
using Base::Base;
void add(T item)
{
Base::values.insert(item);
Base::narrowDown();
}
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override { writeOneItem(buf, value); }
typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
{
T value;
readOneItem(buf, arena, value);
return value;
}
static T itemValue(typename Base::ValueType & value) { return value; }
};
}

Some files were not shown because too many files have changed in this diff Show More