mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-16 19:32:07 +00:00
Merge branch 'master' of github.com:ClickHouse/ClickHouse
This commit is contained in:
commit
3482960f90
280
.github/workflows/pull_request.yml
vendored
280
.github/workflows/pull_request.yml
vendored
@ -984,6 +984,75 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
############################################################################################
|
||||
#################################### INSTALL PACKAGES ######################################
|
||||
############################################################################################
|
||||
InstallPackagesTestRelease:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/test_install
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Install packages (amd64)
|
||||
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Test packages installation
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 install_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
InstallPackagesTestAarch64:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/test_install
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Install packages (arm64)
|
||||
REPO_COPY=${{runner.temp}}/test_install/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Test packages installation
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 install_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
########################### FUNCTIONAl STATELESS TESTS #######################################
|
||||
##############################################################################################
|
||||
@ -2813,6 +2882,217 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
# Parallel replicas
|
||||
FunctionalStatefulTestDebugParallelReplicas:
|
||||
needs: [BuilderDebDebug]
|
||||
runs-on: [self-hosted, func-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/stateful_debug
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Stateful tests (debug, ParallelReplicas)
|
||||
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
|
||||
KILL_TIMEOUT=3600
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Functional test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
FunctionalStatefulTestUBsanParallelReplicas:
|
||||
needs: [BuilderDebUBsan]
|
||||
runs-on: [self-hosted, func-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/stateful_ubsan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Stateful tests (ubsan, ParallelReplicas)
|
||||
REPO_COPY=${{runner.temp}}/stateful_ubsan/ClickHouse
|
||||
KILL_TIMEOUT=3600
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Functional test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
FunctionalStatefulTestMsanParallelReplicas:
|
||||
needs: [BuilderDebMsan]
|
||||
runs-on: [self-hosted, func-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/stateful_msan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Stateful tests (msan, ParallelReplicas)
|
||||
REPO_COPY=${{runner.temp}}/stateful_msan/ClickHouse
|
||||
KILL_TIMEOUT=3600
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Functional test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
FunctionalStatefulTestTsanParallelReplicas:
|
||||
needs: [BuilderDebTsan]
|
||||
runs-on: [self-hosted, func-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/stateful_tsan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Stateful tests (tsan, ParallelReplicas)
|
||||
REPO_COPY=${{runner.temp}}/stateful_tsan/ClickHouse
|
||||
KILL_TIMEOUT=3600
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Functional test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
FunctionalStatefulTestAsanParallelReplicas:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, func-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/stateful_debug
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Stateful tests (asan, ParallelReplicas)
|
||||
REPO_COPY=${{runner.temp}}/stateful_debug/ClickHouse
|
||||
KILL_TIMEOUT=3600
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Functional test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
FunctionalStatefulTestReleaseParallelReplicas:
|
||||
needs: [BuilderDebRelease]
|
||||
runs-on: [self-hosted, func-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/stateful_release
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Stateful tests (release, ParallelReplicas)
|
||||
REPO_COPY=${{runner.temp}}/stateful_release/ClickHouse
|
||||
KILL_TIMEOUT=3600
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Functional test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
##############################################################################################
|
||||
######################################### STRESS TESTS #######################################
|
||||
##############################################################################################
|
||||
|
3
.github/workflows/release.yml
vendored
3
.github/workflows/release.yml
vendored
@ -15,7 +15,8 @@ jobs:
|
||||
- name: Deploy packages and assets
|
||||
run: |
|
||||
GITHUB_TAG="${GITHUB_REF#refs/tags/}"
|
||||
curl '${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true' -d ''
|
||||
curl --silent --data '' \
|
||||
'${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true'
|
||||
############################################################################################
|
||||
##################################### Docker images #######################################
|
||||
############################################################################################
|
||||
|
6
.gitmodules
vendored
6
.gitmodules
vendored
@ -257,6 +257,9 @@
|
||||
[submodule "contrib/qpl"]
|
||||
path = contrib/qpl
|
||||
url = https://github.com/intel/qpl
|
||||
[submodule "contrib/idxd-config"]
|
||||
path = contrib/idxd-config
|
||||
url = https://github.com/intel/idxd-config
|
||||
[submodule "contrib/wyhash"]
|
||||
path = contrib/wyhash
|
||||
url = https://github.com/wangyi-fudan/wyhash
|
||||
@ -330,3 +333,6 @@
|
||||
[submodule "contrib/crc32-vpmsum"]
|
||||
path = contrib/crc32-vpmsum
|
||||
url = https://github.com/antonblanchard/crc32-vpmsum.git
|
||||
[submodule "contrib/liburing"]
|
||||
path = contrib/liburing
|
||||
url = https://github.com/axboe/liburing
|
||||
|
2014
CHANGELOG.md
2014
CHANGELOG.md
File diff suppressed because it is too large
Load Diff
@ -9,13 +9,12 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
* [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster.
|
||||
* [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information.
|
||||
* [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format.
|
||||
* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
|
||||
* [Slack](https://clickhousedb.slack.com/) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time.
|
||||
* [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events.
|
||||
* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation.
|
||||
* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev.
|
||||
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
|
||||
|
||||
## Upcoming events
|
||||
* **Recording available**: [**v22.12 Release Webinar**](https://www.youtube.com/watch?v=sREupr6uc2k) 22.12 is the ClickHouse Christmas release. There are plenty of gifts (a new JOIN algorithm among them) and we adopted something from MongoDB. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
|
||||
* [**ClickHouse Meetup at the CHEQ office in Tel Aviv**](https://www.meetup.com/clickhouse-tel-aviv-user-group/events/289599423/) - Jan 16 - We are very excited to be holding our next in-person ClickHouse meetup at the CHEQ office in Tel Aviv! Hear from CHEQ, ServiceNow and Contentsquare, as well as a deep dive presentation from ClickHouse CTO Alexey Milovidov. Join us for a fun evening of talks, food and discussion!
|
||||
* [**ClickHouse Meetup at Microsoft Office in Seattle**](https://www.meetup.com/clickhouse-seattle-user-group/events/290310025/) - Jan 18 - Keep an eye on this space as we will be announcing speakers soon!
|
||||
* **Recording available**: [**v23.1 Release Webinar**](https://www.youtube.com/watch?v=zYSZXBnTMSE) 23.1 is the ClickHouse New Year release. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. Inverted indices, query cache, and so -- very -- much more.
|
||||
* **Recording available**: [**ClickHouse Meetup at the CHEQ office in Tel Aviv**](https://www.meetup.com/clickhouse-tel-aviv-user-group/events/289599423/) - We are very excited to be holding our next in-person ClickHouse meetup at the CHEQ office in Tel Aviv! Hear from CHEQ, ServiceNow and Contentsquare, as well as a deep dive presentation from ClickHouse CTO Alexey Milovidov. Join us for a fun evening of talks, food and discussion!
|
||||
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -140,6 +140,7 @@ add_contrib (simdjson-cmake simdjson)
|
||||
add_contrib (rapidjson-cmake rapidjson)
|
||||
add_contrib (fastops-cmake fastops)
|
||||
add_contrib (libuv-cmake libuv)
|
||||
add_contrib (liburing-cmake liburing)
|
||||
add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv
|
||||
add_contrib (cassandra-cmake cassandra) # requires: libuv
|
||||
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit 545b8c810a956b2efdc116e86be219af7e83d68a
|
||||
Subproject commit b56784be1aec568fb72aff47f281097c017623cb
|
2
contrib/aws
vendored
2
contrib/aws
vendored
@ -1 +1 @@
|
||||
Subproject commit 4a12641211d4dbc8e2fdb2dd0f1eea0927db9252
|
||||
Subproject commit 06a6610e6fb3385e22ad85014a67aa307825ffb1
|
2
contrib/azure
vendored
2
contrib/azure
vendored
@ -1 +1 @@
|
||||
Subproject commit ea8c3044f43f5afa7016d2d580ed201f495d7e94
|
||||
Subproject commit e4fcdfc81e337e589ce231a452dcc280fcbb3f99
|
@ -460,8 +460,15 @@ set(ICUI18N_SOURCES
|
||||
|
||||
file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" CONTENT " ")
|
||||
enable_language(ASM)
|
||||
|
||||
if (ARCH_S390X)
|
||||
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt70b_dat.S" )
|
||||
else()
|
||||
set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt70l_dat.S" )
|
||||
endif()
|
||||
|
||||
set(ICUDATA_SOURCES
|
||||
"${ICUDATA_SOURCE_DIR}/icudt70l_dat.S"
|
||||
"${ICUDATA_SOURCE_FILE}"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" # Without this cmake can incorrectly detects library type (OBJECT) instead of SHARED/STATIC
|
||||
)
|
||||
|
||||
|
2
contrib/icudata
vendored
2
contrib/icudata
vendored
@ -1 +1 @@
|
||||
Subproject commit 72d9a4a7febc904e2b0a534ccb25ae40fac5f1e5
|
||||
Subproject commit c8e717892a557b4d2852317c7d628aacc0a0e5ab
|
1
contrib/idxd-config
vendored
Submodule
1
contrib/idxd-config
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit f6605c41a735e3fdfef2d2d18655a33af6490b99
|
2
contrib/krb5
vendored
2
contrib/krb5
vendored
@ -1 +1 @@
|
||||
Subproject commit b89e20367b074bd02dd118a6534099b21e88b3c3
|
||||
Subproject commit f8262a1b548eb29d97e059260042036255d07f8d
|
@ -15,6 +15,10 @@ if(NOT AWK_PROGRAM)
|
||||
message(FATAL_ERROR "You need the awk program to build ClickHouse with krb5 enabled.")
|
||||
endif()
|
||||
|
||||
if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
|
||||
add_compile_definitions(USE_BORINGSSL=1)
|
||||
endif ()
|
||||
|
||||
set(KRB5_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/krb5/src")
|
||||
set(KRB5_ET_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}/include_private")
|
||||
|
||||
@ -578,12 +582,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
|
||||
list(APPEND ALL_SRCS "${CMAKE_CURRENT_BINARY_DIR}/include_private/kcmrpc.c")
|
||||
endif()
|
||||
|
||||
if (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)
|
||||
list(REMOVE_ITEM ALL_SRCS "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/aes.c")
|
||||
list(APPEND ALL_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/aes.c")
|
||||
endif ()
|
||||
|
||||
|
||||
target_sources(_krb5 PRIVATE
|
||||
${ALL_SRCS}
|
||||
)
|
||||
|
@ -1,302 +0,0 @@
|
||||
/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
||||
/* lib/crypto/openssl/enc_provider/aes.c */
|
||||
/*
|
||||
* Copyright (C) 2003, 2007, 2008, 2009 by the Massachusetts Institute of Technology.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Export of this software from the United States of America may
|
||||
* require a specific license from the United States Government.
|
||||
* It is the responsibility of any person or organization contemplating
|
||||
* export to obtain such a license before exporting.
|
||||
*
|
||||
* WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
|
||||
* distribute this software and its documentation for any purpose and
|
||||
* without fee is hereby granted, provided that the above copyright
|
||||
* notice appear in all copies and that both that copyright notice and
|
||||
* this permission notice appear in supporting documentation, and that
|
||||
* the name of M.I.T. not be used in advertising or publicity pertaining
|
||||
* to distribution of the software without specific, written prior
|
||||
* permission. Furthermore if you modify this software you must label
|
||||
* your software as modified software and not distribute it in such a
|
||||
* fashion that it might be confused with the original M.I.T. software.
|
||||
* M.I.T. makes no representations about the suitability of
|
||||
* this software for any purpose. It is provided "as is" without express
|
||||
* or implied warranty.
|
||||
*/
|
||||
|
||||
#include "crypto_int.h"
|
||||
#include <openssl/evp.h>
|
||||
#include <openssl/aes.h>
|
||||
|
||||
/* proto's */
|
||||
static krb5_error_code
|
||||
cbc_enc(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data);
|
||||
static krb5_error_code
|
||||
cbc_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data);
|
||||
static krb5_error_code
|
||||
cts_encr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data, size_t dlen);
|
||||
static krb5_error_code
|
||||
cts_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data, size_t dlen);
|
||||
|
||||
#define BLOCK_SIZE 16
|
||||
#define NUM_BITS 8
|
||||
#define IV_CTS_BUF_SIZE 16 /* 16 - hardcoded in CRYPTO_cts128_en/decrypt */
|
||||
|
||||
static const EVP_CIPHER *
|
||||
map_mode(unsigned int len)
|
||||
{
|
||||
if (len==16)
|
||||
return EVP_aes_128_cbc();
|
||||
if (len==32)
|
||||
return EVP_aes_256_cbc();
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Encrypt one block using CBC. */
|
||||
static krb5_error_code
|
||||
cbc_enc(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data)
|
||||
{
|
||||
int ret, olen = BLOCK_SIZE;
|
||||
unsigned char iblock[BLOCK_SIZE], oblock[BLOCK_SIZE];
|
||||
EVP_CIPHER_CTX *ctx;
|
||||
struct iov_cursor cursor;
|
||||
|
||||
ctx = EVP_CIPHER_CTX_new();
|
||||
if (ctx == NULL)
|
||||
return ENOMEM;
|
||||
|
||||
ret = EVP_EncryptInit_ex(ctx, map_mode(key->keyblock.length),
|
||||
NULL, key->keyblock.contents, (ivec) ? (unsigned char*)ivec->data : NULL);
|
||||
if (ret == 0) {
|
||||
EVP_CIPHER_CTX_free(ctx);
|
||||
return KRB5_CRYPTO_INTERNAL;
|
||||
}
|
||||
|
||||
k5_iov_cursor_init(&cursor, data, num_data, BLOCK_SIZE, FALSE);
|
||||
k5_iov_cursor_get(&cursor, iblock);
|
||||
EVP_CIPHER_CTX_set_padding(ctx,0);
|
||||
ret = EVP_EncryptUpdate(ctx, oblock, &olen, iblock, BLOCK_SIZE);
|
||||
if (ret == 1)
|
||||
k5_iov_cursor_put(&cursor, oblock);
|
||||
EVP_CIPHER_CTX_free(ctx);
|
||||
|
||||
zap(iblock, BLOCK_SIZE);
|
||||
zap(oblock, BLOCK_SIZE);
|
||||
return (ret == 1) ? 0 : KRB5_CRYPTO_INTERNAL;
|
||||
}
|
||||
|
||||
/* Decrypt one block using CBC. */
|
||||
static krb5_error_code
|
||||
cbc_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data)
|
||||
{
|
||||
int ret = 0, olen = BLOCK_SIZE;
|
||||
unsigned char iblock[BLOCK_SIZE], oblock[BLOCK_SIZE];
|
||||
EVP_CIPHER_CTX *ctx;
|
||||
struct iov_cursor cursor;
|
||||
|
||||
ctx = EVP_CIPHER_CTX_new();
|
||||
if (ctx == NULL)
|
||||
return ENOMEM;
|
||||
|
||||
ret = EVP_DecryptInit_ex(ctx, map_mode(key->keyblock.length),
|
||||
NULL, key->keyblock.contents, (ivec) ? (unsigned char*)ivec->data : NULL);
|
||||
if (ret == 0) {
|
||||
EVP_CIPHER_CTX_free(ctx);
|
||||
return KRB5_CRYPTO_INTERNAL;
|
||||
}
|
||||
|
||||
k5_iov_cursor_init(&cursor, data, num_data, BLOCK_SIZE, FALSE);
|
||||
k5_iov_cursor_get(&cursor, iblock);
|
||||
EVP_CIPHER_CTX_set_padding(ctx,0);
|
||||
ret = EVP_DecryptUpdate(ctx, oblock, &olen, iblock, BLOCK_SIZE);
|
||||
if (ret == 1)
|
||||
k5_iov_cursor_put(&cursor, oblock);
|
||||
EVP_CIPHER_CTX_free(ctx);
|
||||
|
||||
zap(iblock, BLOCK_SIZE);
|
||||
zap(oblock, BLOCK_SIZE);
|
||||
return (ret == 1) ? 0 : KRB5_CRYPTO_INTERNAL;
|
||||
}
|
||||
|
||||
static krb5_error_code
|
||||
cts_encr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data, size_t dlen)
|
||||
{
|
||||
int ret = 0;
|
||||
size_t size = 0;
|
||||
unsigned char *oblock = NULL, *dbuf = NULL;
|
||||
unsigned char iv_cts[IV_CTS_BUF_SIZE];
|
||||
struct iov_cursor cursor;
|
||||
AES_KEY enck;
|
||||
|
||||
memset(iv_cts,0,sizeof(iv_cts));
|
||||
if (ivec && ivec->data){
|
||||
if (ivec->length != sizeof(iv_cts))
|
||||
return KRB5_CRYPTO_INTERNAL;
|
||||
memcpy(iv_cts, ivec->data,ivec->length);
|
||||
}
|
||||
|
||||
oblock = OPENSSL_malloc(dlen);
|
||||
if (!oblock){
|
||||
return ENOMEM;
|
||||
}
|
||||
dbuf = OPENSSL_malloc(dlen);
|
||||
if (!dbuf){
|
||||
OPENSSL_free(oblock);
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
k5_iov_cursor_init(&cursor, data, num_data, dlen, FALSE);
|
||||
k5_iov_cursor_get(&cursor, dbuf);
|
||||
|
||||
AES_set_encrypt_key(key->keyblock.contents,
|
||||
NUM_BITS * key->keyblock.length, &enck);
|
||||
|
||||
size = CRYPTO_cts128_encrypt((unsigned char *)dbuf, oblock, dlen, &enck,
|
||||
iv_cts, AES_cbc_encrypt);
|
||||
if (size <= 0)
|
||||
ret = KRB5_CRYPTO_INTERNAL;
|
||||
else
|
||||
k5_iov_cursor_put(&cursor, oblock);
|
||||
|
||||
if (!ret && ivec && ivec->data)
|
||||
memcpy(ivec->data, iv_cts, sizeof(iv_cts));
|
||||
|
||||
zap(oblock, dlen);
|
||||
zap(dbuf, dlen);
|
||||
OPENSSL_free(oblock);
|
||||
OPENSSL_free(dbuf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static krb5_error_code
|
||||
cts_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data,
|
||||
size_t num_data, size_t dlen)
|
||||
{
|
||||
int ret = 0;
|
||||
size_t size = 0;
|
||||
unsigned char *oblock = NULL;
|
||||
unsigned char *dbuf = NULL;
|
||||
unsigned char iv_cts[IV_CTS_BUF_SIZE];
|
||||
struct iov_cursor cursor;
|
||||
AES_KEY deck;
|
||||
|
||||
memset(iv_cts,0,sizeof(iv_cts));
|
||||
if (ivec && ivec->data){
|
||||
if (ivec->length != sizeof(iv_cts))
|
||||
return KRB5_CRYPTO_INTERNAL;
|
||||
memcpy(iv_cts, ivec->data,ivec->length);
|
||||
}
|
||||
|
||||
oblock = OPENSSL_malloc(dlen);
|
||||
if (!oblock)
|
||||
return ENOMEM;
|
||||
dbuf = OPENSSL_malloc(dlen);
|
||||
if (!dbuf){
|
||||
OPENSSL_free(oblock);
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
AES_set_decrypt_key(key->keyblock.contents,
|
||||
NUM_BITS * key->keyblock.length, &deck);
|
||||
|
||||
k5_iov_cursor_init(&cursor, data, num_data, dlen, FALSE);
|
||||
k5_iov_cursor_get(&cursor, dbuf);
|
||||
|
||||
size = CRYPTO_cts128_decrypt((unsigned char *)dbuf, oblock,
|
||||
dlen, &deck,
|
||||
iv_cts, AES_cbc_encrypt);
|
||||
if (size <= 0)
|
||||
ret = KRB5_CRYPTO_INTERNAL;
|
||||
else
|
||||
k5_iov_cursor_put(&cursor, oblock);
|
||||
|
||||
if (!ret && ivec && ivec->data)
|
||||
memcpy(ivec->data, iv_cts, sizeof(iv_cts));
|
||||
|
||||
zap(oblock, dlen);
|
||||
zap(dbuf, dlen);
|
||||
OPENSSL_free(oblock);
|
||||
OPENSSL_free(dbuf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
krb5_error_code
|
||||
krb5int_aes_encrypt(krb5_key key, const krb5_data *ivec,
|
||||
krb5_crypto_iov *data, size_t num_data)
|
||||
{
|
||||
int ret = 0;
|
||||
size_t input_length, nblocks;
|
||||
|
||||
input_length = iov_total_length(data, num_data, FALSE);
|
||||
nblocks = (input_length + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
if (nblocks == 1) {
|
||||
if (input_length != BLOCK_SIZE)
|
||||
return KRB5_BAD_MSIZE;
|
||||
ret = cbc_enc(key, ivec, data, num_data);
|
||||
} else if (nblocks > 1) {
|
||||
ret = cts_encr(key, ivec, data, num_data, input_length);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
krb5_error_code
|
||||
krb5int_aes_decrypt(krb5_key key, const krb5_data *ivec,
|
||||
krb5_crypto_iov *data, size_t num_data)
|
||||
{
|
||||
int ret = 0;
|
||||
size_t input_length, nblocks;
|
||||
|
||||
input_length = iov_total_length(data, num_data, FALSE);
|
||||
nblocks = (input_length + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
if (nblocks == 1) {
|
||||
if (input_length != BLOCK_SIZE)
|
||||
return KRB5_BAD_MSIZE;
|
||||
ret = cbc_decr(key, ivec, data, num_data);
|
||||
} else if (nblocks > 1) {
|
||||
ret = cts_decr(key, ivec, data, num_data, input_length);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static krb5_error_code
|
||||
krb5int_aes_init_state (const krb5_keyblock *key, krb5_keyusage usage,
|
||||
krb5_data *state)
|
||||
{
|
||||
state->length = 16;
|
||||
state->data = (void *) malloc(16);
|
||||
if (state->data == NULL)
|
||||
return ENOMEM;
|
||||
memset(state->data, 0, state->length);
|
||||
return 0;
|
||||
}
|
||||
const struct krb5_enc_provider krb5int_enc_aes128 = {
|
||||
16,
|
||||
16, 16,
|
||||
krb5int_aes_encrypt,
|
||||
krb5int_aes_decrypt,
|
||||
NULL,
|
||||
krb5int_aes_init_state,
|
||||
krb5int_default_free_state
|
||||
};
|
||||
|
||||
const struct krb5_enc_provider krb5int_enc_aes256 = {
|
||||
16,
|
||||
32, 32,
|
||||
krb5int_aes_encrypt,
|
||||
krb5int_aes_decrypt,
|
||||
NULL,
|
||||
krb5int_aes_init_state,
|
||||
krb5int_default_free_state
|
||||
};
|
1
contrib/liburing
vendored
Submodule
1
contrib/liburing
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit f5a48392c4ea33f222cbebeb2e2fc31620162949
|
53
contrib/liburing-cmake/CMakeLists.txt
Normal file
53
contrib/liburing-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,53 @@
|
||||
set (ENABLE_LIBURING_DEFAULT ${ENABLE_LIBRARIES})
|
||||
|
||||
if (NOT OS_LINUX)
|
||||
set (ENABLE_LIBURING_DEFAULT OFF)
|
||||
endif ()
|
||||
|
||||
option (ENABLE_LIBURING "Enable liburing" ${ENABLE_LIBURING_DEFAULT})
|
||||
|
||||
if (NOT ENABLE_LIBURING)
|
||||
message (STATUS "Not using liburing")
|
||||
return ()
|
||||
endif ()
|
||||
|
||||
set (LIBURING_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/liburing/src/include")
|
||||
set (LIBURING_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/liburing/src")
|
||||
|
||||
set (SRCS
|
||||
"${LIBURING_SOURCE_DIR}/queue.c"
|
||||
"${LIBURING_SOURCE_DIR}/register.c"
|
||||
"${LIBURING_SOURCE_DIR}/setup.c"
|
||||
"${LIBURING_SOURCE_DIR}/syscall.c"
|
||||
"${LIBURING_SOURCE_DIR}/version.c"
|
||||
)
|
||||
|
||||
add_compile_definitions (_GNU_SOURCE)
|
||||
add_compile_definitions (LIBURING_INTERNAL)
|
||||
|
||||
set (LIBURING_COMPAT_INCLUDE_DIR "${ClickHouse_BINARY_DIR}/contrib/liburing/src/include-compat")
|
||||
set (LIBURING_COMPAT_HEADER "${LIBURING_COMPAT_INCLUDE_DIR}/liburing/compat.h")
|
||||
|
||||
set (LIBURING_CONFIG_HAS_KERNEL_RWF_T FALSE)
|
||||
set (LIBURING_CONFIG_HAS_KERNEL_TIMESPEC FALSE)
|
||||
set (LIBURING_CONFIG_HAS_OPEN_HOW FALSE)
|
||||
set (LIBURING_CONFIG_HAS_STATX FALSE)
|
||||
set (LIBURING_CONFIG_HAS_GLIBC_STATX FALSE)
|
||||
|
||||
configure_file (compat.h.in ${LIBURING_COMPAT_HEADER})
|
||||
|
||||
set (LIBURING_GENERATED_INCLUDE_DIR "${ClickHouse_BINARY_DIR}/contrib/liburing/src/include")
|
||||
set (LIBURING_VERSION_HEADER "${LIBURING_GENERATED_INCLUDE_DIR}/liburing/io_uring_version.h")
|
||||
|
||||
file (READ "${LIBURING_SOURCE_DIR}/../liburing.spec" LIBURING_SPEC)
|
||||
|
||||
string (REGEX MATCH "Version: ([0-9]+)\.([0-9]+)" _ ${LIBURING_SPEC})
|
||||
set (LIBURING_VERSION_MAJOR ${CMAKE_MATCH_1})
|
||||
set (LIBURING_VERSION_MINOR ${CMAKE_MATCH_2})
|
||||
|
||||
configure_file (io_uring_version.h.in ${LIBURING_VERSION_HEADER})
|
||||
|
||||
add_library (_liburing ${SRCS})
|
||||
add_library (ch_contrib::liburing ALIAS _liburing)
|
||||
|
||||
target_include_directories (_liburing SYSTEM PUBLIC ${LIBURING_COMPAT_INCLUDE_DIR} ${LIBURING_GENERATED_INCLUDE_DIR} "${LIBURING_SOURCE_DIR}/include")
|
50
contrib/liburing-cmake/compat.h.in
Normal file
50
contrib/liburing-cmake/compat.h.in
Normal file
@ -0,0 +1,50 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
#ifndef LIBURING_COMPAT_H
|
||||
#define LIBURING_COMPAT_H
|
||||
|
||||
# cmakedefine LIBURING_CONFIG_HAS_KERNEL_RWF_T
|
||||
# cmakedefine LIBURING_CONFIG_HAS_KERNEL_TIMESPEC
|
||||
# cmakedefine LIBURING_CONFIG_HAS_OPEN_HOW
|
||||
# cmakedefine LIBURING_CONFIG_HAS_GLIBC_STATX
|
||||
# cmakedefine LIBURING_CONFIG_HAS_STATX
|
||||
|
||||
#if !defined(LIBURING_CONFIG_HAS_KERNEL_RWF_T)
|
||||
typedef int __kernel_rwf_t;
|
||||
#endif
|
||||
|
||||
#if !defined(LIBURING_CONFIG_HAS_KERNEL_TIMESPEC)
|
||||
#include <stdint.h>
|
||||
|
||||
struct __kernel_timespec {
|
||||
int64_t tv_sec;
|
||||
long long tv_nsec;
|
||||
};
|
||||
|
||||
/* <linux/time_types.h> is not available, so it can't be included */
|
||||
#define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H 1
|
||||
|
||||
#else
|
||||
#include <linux/time_types.h>
|
||||
|
||||
/* <linux/time_types.h> is included above and not needed again */
|
||||
#define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H 1
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(LIBURING_CONFIG_HAS_OPEN_HOW)
|
||||
#include <inttypes.h>
|
||||
|
||||
struct open_how {
|
||||
uint64_t flags;
|
||||
uint64_t mode;
|
||||
uint64_t resolve;
|
||||
};
|
||||
#else
|
||||
#include <linux/openat2.h>
|
||||
#endif
|
||||
|
||||
#if !defined(LIBURING_CONFIG_HAS_GLIBC_STATX) && defined(LIBURING_CONFIG_HAS_STATX)
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
|
||||
#endif
|
8
contrib/liburing-cmake/io_uring_version.h.in
Normal file
8
contrib/liburing-cmake/io_uring_version.h.in
Normal file
@ -0,0 +1,8 @@
|
||||
/* SPDX-License-Identifier: MIT */
|
||||
#ifndef LIBURING_VERSION_H
|
||||
#define LIBURING_VERSION_H
|
||||
|
||||
#define IO_URING_VERSION_MAJOR ${LIBURING_VERSION_MAJOR}
|
||||
#define IO_URING_VERSION_MINOR ${LIBURING_VERSION_MINOR}
|
||||
|
||||
#endif
|
@ -1,3 +1,9 @@
|
||||
# Note: ClickHouse uses BoringSSL. The presence of OpenSSL is only due to IBM's port of ClickHouse to s390x. BoringSSL does not support
|
||||
# s390x, also FIPS validation provided by the OS vendor (Red Hat, Ubuntu) requires (preferrably dynamic) linking with OS packages which
|
||||
# ClickHouse generally avoids.
|
||||
#
|
||||
# Furthermore, the in-source OpenSSL dump in this directory is due to development purposes and non FIPS-compliant.
|
||||
|
||||
if(ENABLE_OPENSSL_DYNAMIC OR ENABLE_OPENSSL)
|
||||
set(ENABLE_SSL 1 CACHE INTERNAL "")
|
||||
set(OPENSSL_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/openssl)
|
||||
|
2
contrib/qpl
vendored
2
contrib/qpl
vendored
@ -1 +1 @@
|
||||
Subproject commit becb7a1b15bdb4845ec3721a550707ffa51d029d
|
||||
Subproject commit d75a29d95d8a548297fce3549d21020005364dc8
|
@ -10,11 +10,30 @@ if (NOT ENABLE_QPL)
|
||||
return()
|
||||
endif()
|
||||
|
||||
## QPL has build dependency on libaccel-config. Here is to build libaccel-config which is required by QPL.
|
||||
## libaccel-config is the utility library for controlling and configuring Intel® In-Memory Analytics Accelerator (Intel® IAA).
|
||||
set (LIBACCEL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/idxd-config")
|
||||
set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake")
|
||||
set (LIBACCEL_HEADER_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake/idxd-header")
|
||||
set (SRCS
|
||||
"${LIBACCEL_SOURCE_DIR}/accfg/lib/libaccfg.c"
|
||||
"${LIBACCEL_SOURCE_DIR}/util/log.c"
|
||||
"${LIBACCEL_SOURCE_DIR}/util/sysfs.c"
|
||||
)
|
||||
|
||||
add_library(accel-config ${SRCS})
|
||||
|
||||
target_compile_options(accel-config PRIVATE "-D_GNU_SOURCE")
|
||||
|
||||
target_include_directories(accel-config BEFORE
|
||||
PRIVATE ${UUID_DIR}
|
||||
PRIVATE ${LIBACCEL_HEADER_DIR}
|
||||
PRIVATE ${LIBACCEL_SOURCE_DIR})
|
||||
|
||||
## QPL build start here.
|
||||
set (QPL_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl")
|
||||
set (QPL_SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl/sources")
|
||||
set (QPL_BINARY_DIR "${ClickHouse_BINARY_DIR}/build/contrib/qpl")
|
||||
set (UUID_DIR "${ClickHouse_SOURCE_DIR}/contrib/qpl-cmake")
|
||||
|
||||
set (EFFICIENT_WAIT OFF)
|
||||
set (BLOCK_ON_FAULT ON)
|
||||
set (LOG_HW_INIT OFF)
|
||||
@ -315,13 +334,8 @@ target_compile_definitions(_qpl
|
||||
PRIVATE -DQPL_BADARG_CHECK
|
||||
PUBLIC -DENABLE_QPL_COMPRESSION)
|
||||
|
||||
find_library(LIBACCEL accel-config)
|
||||
if(NOT LIBACCEL)
|
||||
message(FATAL_ERROR "Please install QPL dependency library:libaccel-config from https://github.com/intel/idxd-config")
|
||||
endif()
|
||||
|
||||
target_link_libraries(_qpl
|
||||
PRIVATE ${LIBACCEL}
|
||||
PRIVATE accel-config
|
||||
PRIVATE ${CMAKE_DL_LIBS})
|
||||
|
||||
add_library (ch_contrib::qpl ALIAS _qpl)
|
||||
|
159
contrib/qpl-cmake/idxd-header/config.h
Normal file
159
contrib/qpl-cmake/idxd-header/config.h
Normal file
@ -0,0 +1,159 @@
|
||||
/* config.h. Generated from config.h.in by configure. */
|
||||
/* config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* Define if building universal (internal helper macro) */
|
||||
/* #undef AC_APPLE_UNIVERSAL_BUILD */
|
||||
|
||||
/* Debug messages. */
|
||||
/* #undef ENABLE_DEBUG */
|
||||
|
||||
/* Documentation / man pages. */
|
||||
/* #define ENABLE_DOCS */
|
||||
|
||||
/* System logging. */
|
||||
#define ENABLE_LOGGING 1
|
||||
|
||||
/* accfg test support */
|
||||
/* #undef ENABLE_TEST */
|
||||
|
||||
/* Define to 1 if big-endian-arch */
|
||||
/* #undef HAVE_BIG_ENDIAN */
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#define HAVE_DLFCN_H 1
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#define HAVE_INTTYPES_H 1
|
||||
|
||||
/* Define to 1 if you have the <linux/version.h> header file. */
|
||||
#define HAVE_LINUX_VERSION_H 1
|
||||
|
||||
/* Define to 1 if little-endian-arch */
|
||||
#define HAVE_LITTLE_ENDIAN 1
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#define HAVE_MEMORY_H 1
|
||||
|
||||
/* Define to 1 if you have the `secure_getenv' function. */
|
||||
#define HAVE_SECURE_GETENV 1
|
||||
|
||||
/* Define to 1 if you have statement expressions. */
|
||||
#define HAVE_STATEMENT_EXPR 1
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#define HAVE_STDINT_H 1
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#define HAVE_STDLIB_H 1
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#define HAVE_STRINGS_H 1
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#define HAVE_STRING_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#define HAVE_SYS_STAT_H 1
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#define HAVE_SYS_TYPES_H 1
|
||||
|
||||
/* Define to 1 if typeof works with your compiler. */
|
||||
#define HAVE_TYPEOF 1
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#define HAVE_UNISTD_H 1
|
||||
|
||||
/* Define to 1 if using libuuid */
|
||||
#define HAVE_UUID 1
|
||||
|
||||
/* Define to 1 if you have the `__secure_getenv' function. */
|
||||
/* #undef HAVE___SECURE_GETENV */
|
||||
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
#define LT_OBJDIR ".libs/"
|
||||
|
||||
/* Name of package */
|
||||
#define PACKAGE "accel-config"
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#define PACKAGE_BUGREPORT "linux-dsa@lists.01.org"
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#define PACKAGE_NAME "accel-config"
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#define PACKAGE_STRING "accel-config 3.5.2.gitf6605c41"
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#define PACKAGE_TARNAME "accel-config"
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#define PACKAGE_URL "https://github.com/xxx/accel-config"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#define PACKAGE_VERSION "3.5.2.gitf6605c41"
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#define STDC_HEADERS 1
|
||||
|
||||
/* Enable extensions on AIX 3, Interix. */
|
||||
#ifndef _ALL_SOURCE
|
||||
# define _ALL_SOURCE 1
|
||||
#endif
|
||||
/* Enable GNU extensions on systems that have them. */
|
||||
#ifndef _GNU_SOURCE
|
||||
# define _GNU_SOURCE 1
|
||||
#endif
|
||||
/* Enable threading extensions on Solaris. */
|
||||
#ifndef _POSIX_PTHREAD_SEMANTICS
|
||||
# define _POSIX_PTHREAD_SEMANTICS 1
|
||||
#endif
|
||||
/* Enable extensions on HP NonStop. */
|
||||
#ifndef _TANDEM_SOURCE
|
||||
# define _TANDEM_SOURCE 1
|
||||
#endif
|
||||
/* Enable general extensions on Solaris. */
|
||||
#ifndef __EXTENSIONS__
|
||||
# define __EXTENSIONS__ 1
|
||||
#endif
|
||||
|
||||
|
||||
/* Version number of package */
|
||||
#define VERSION "3.5.2.gitf6605c41"
|
||||
|
||||
/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
|
||||
significant byte first (like Motorola and SPARC, unlike Intel). */
|
||||
#if defined AC_APPLE_UNIVERSAL_BUILD
|
||||
# if defined __BIG_ENDIAN__
|
||||
# define WORDS_BIGENDIAN 1
|
||||
# endif
|
||||
#else
|
||||
# ifndef WORDS_BIGENDIAN
|
||||
/* # undef WORDS_BIGENDIAN */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Enable large inode numbers on Mac OS X 10.5. */
|
||||
#ifndef _DARWIN_USE_64_BIT_INODE
|
||||
# define _DARWIN_USE_64_BIT_INODE 1
|
||||
#endif
|
||||
|
||||
/* Number of bits in a file offset, on hosts where this is settable. */
|
||||
/* #undef _FILE_OFFSET_BITS */
|
||||
|
||||
/* Define for large files, on AIX-style hosts. */
|
||||
/* #undef _LARGE_FILES */
|
||||
|
||||
/* Define to 1 if on MINIX. */
|
||||
/* #undef _MINIX */
|
||||
|
||||
/* Define to 2 if the system does not provide POSIX.1 features except with
|
||||
this defined. */
|
||||
/* #undef _POSIX_1_SOURCE */
|
||||
|
||||
/* Define to 1 if you need to in order for `stat' and other things to work. */
|
||||
/* #undef _POSIX_SOURCE */
|
||||
|
||||
/* Define to __typeof__ if your compiler spells it that way. */
|
||||
/* #undef typeof */
|
@ -1,6 +1,10 @@
|
||||
set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/snappy")
|
||||
|
||||
set (SNAPPY_IS_BIG_ENDIAN 0)
|
||||
if (ARCH_S390X)
|
||||
set (SNAPPY_IS_BIG_ENDIAN 1)
|
||||
else ()
|
||||
set (SNAPPY_IS_BIG_ENDIAN 0)
|
||||
endif()
|
||||
|
||||
set (HAVE_BYTESWAP_H 1)
|
||||
set (HAVE_SYS_MMAN_H 1)
|
||||
|
@ -134,6 +134,14 @@
|
||||
"name": "clickhouse/keeper-jepsen-test",
|
||||
"dependent": []
|
||||
},
|
||||
"docker/test/install/deb": {
|
||||
"name": "clickhouse/install-deb-test",
|
||||
"dependent": []
|
||||
},
|
||||
"docker/test/install/rpm": {
|
||||
"name": "clickhouse/install-rpm-test",
|
||||
"dependent": []
|
||||
},
|
||||
"docker/docs/builder": {
|
||||
"name": "clickhouse/docs-builder",
|
||||
"dependent": [
|
||||
|
@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
# lts / testing / prestable / etc
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
|
||||
ARG VERSION="23.1.1.3077"
|
||||
ARG VERSION="23.1.3.5"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# user/group precreated explicitly with fixed uid/gid on purpose.
|
||||
|
@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
|
||||
|
||||
ARG REPO_CHANNEL="stable"
|
||||
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
|
||||
ARG VERSION="23.1.1.3077"
|
||||
ARG VERSION="23.1.3.5"
|
||||
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
|
||||
|
||||
# set non-empty deb_location_url url to create a docker image
|
||||
|
@ -138,6 +138,8 @@ function clone_submodules
|
||||
contrib/c-ares
|
||||
contrib/morton-nd
|
||||
contrib/xxHash
|
||||
contrib/simdjson
|
||||
contrib/liburing
|
||||
)
|
||||
|
||||
git submodule sync
|
||||
@ -158,7 +160,9 @@ function run_cmake
|
||||
"-DENABLE_THINLTO=0"
|
||||
"-DUSE_UNWIND=1"
|
||||
"-DENABLE_NURAFT=1"
|
||||
"-DENABLE_SIMDJSON=1"
|
||||
"-DENABLE_JEMALLOC=1"
|
||||
"-DENABLE_LIBURING=1"
|
||||
)
|
||||
|
||||
export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache"
|
||||
@ -227,6 +231,7 @@ function run_tests
|
||||
--hung-check
|
||||
--fast-tests-only
|
||||
--no-random-settings
|
||||
--no-random-merge-tree-settings
|
||||
--no-long
|
||||
--testname
|
||||
--shard
|
||||
@ -234,6 +239,7 @@ function run_tests
|
||||
--check-zookeeper-session
|
||||
--order random
|
||||
--print-time
|
||||
--report-logs-stats
|
||||
--jobs "${NPROC}"
|
||||
)
|
||||
time clickhouse-test "${test_opts[@]}" -- "$FASTTEST_FOCUS" 2>&1 \
|
||||
|
64
docker/test/install/deb/Dockerfile
Normal file
64
docker/test/install/deb/Dockerfile
Normal file
@ -0,0 +1,64 @@
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# The Dockerfile is nicely borrowed from
|
||||
# https://github.com/lionelnicolas/docker-ubuntu-systemd/blob/83aa3249146f5df264fe45353f79fc76eb1e42d7/Dockerfile
|
||||
|
||||
ENV \
|
||||
DEBIAN_FRONTEND=noninteractive \
|
||||
LANG=C.UTF-8 \
|
||||
container=docker \
|
||||
init=/lib/systemd/systemd
|
||||
|
||||
# install systemd packages
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
systemd \
|
||||
&& \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists
|
||||
|
||||
# configure systemd
|
||||
# remove systemd 'wants' triggers
|
||||
# remove everything except tmpfiles setup in sysinit target
|
||||
# remove UTMP updater service
|
||||
# disable /tmp mount
|
||||
# fix missing BPF firewall support warning
|
||||
# just for cosmetics, fix "not-found" entries while using "systemctl --all"
|
||||
RUN \
|
||||
find \
|
||||
/etc/systemd/system/*.wants/* \
|
||||
/lib/systemd/system/multi-user.target.wants/* \
|
||||
/lib/systemd/system/sockets.target.wants/*initctl* \
|
||||
! -type d \
|
||||
-delete && \
|
||||
find \
|
||||
/lib/systemd/system/sysinit.target.wants \
|
||||
! -type d \
|
||||
! -name '*systemd-tmpfiles-setup*' \
|
||||
-delete && \
|
||||
find \
|
||||
/lib/systemd \
|
||||
-name systemd-update-utmp-runlevel.service \
|
||||
-delete && \
|
||||
rm -vf /usr/share/systemd/tmp.mount && \
|
||||
sed -ri '/^IPAddressDeny/d' /lib/systemd/system/systemd-journald.service && \
|
||||
for MATCH in \
|
||||
plymouth-start.service \
|
||||
plymouth-quit-wait.service \
|
||||
syslog.socket \
|
||||
syslog.service \
|
||||
display-manager.service \
|
||||
systemd-sysusers.service \
|
||||
tmp.mount \
|
||||
systemd-udevd.service \
|
||||
; do \
|
||||
grep -rn --binary-files=without-match ${MATCH} /lib/systemd/ | cut -d: -f1 | xargs sed -ri 's/(.*=.*)'${MATCH}'(.*)/\1\2/'; \
|
||||
done && \
|
||||
systemctl disable ondemand.service && \
|
||||
systemctl set-default multi-user.target
|
||||
|
||||
VOLUME ["/run", "/run/lock"]
|
||||
|
||||
STOPSIGNAL SIGRTMIN+3
|
||||
|
||||
ENTRYPOINT ["/lib/systemd/systemd"]
|
55
docker/test/install/rpm/Dockerfile
Normal file
55
docker/test/install/rpm/Dockerfile
Normal file
@ -0,0 +1,55 @@
|
||||
FROM centos:8
|
||||
|
||||
# The Dockerfile is nicely borrowed from
|
||||
# https://github.com/lionelnicolas/docker-ubuntu-systemd/blob/83aa3249146f5df264fe45353f79fc76eb1e42d7/Dockerfile
|
||||
|
||||
ENV \
|
||||
LANG=C.UTF-8 \
|
||||
container=docker \
|
||||
init=/lib/systemd/systemd
|
||||
|
||||
# configure systemd
|
||||
# remove systemd 'wants' triggers
|
||||
# remove everything except tmpfiles setup in sysinit target
|
||||
# remove UTMP updater service
|
||||
# disable /tmp mount
|
||||
# fix missing BPF firewall support warning
|
||||
# just for cosmetics, fix "not-found" entries while using "systemctl --all"
|
||||
RUN \
|
||||
find \
|
||||
/etc/systemd/system/*.wants/ \
|
||||
/lib/systemd/system/multi-user.target.wants/ \
|
||||
/lib/systemd/system/local-fs.target.wants/ \
|
||||
/lib/systemd/system/sockets.target.wants/*initctl* \
|
||||
! -type d \
|
||||
-delete && \
|
||||
find \
|
||||
/lib/systemd/system/sysinit.target.wants \
|
||||
! -type d \
|
||||
! -name '*systemd-tmpfiles-setup*' \
|
||||
-delete && \
|
||||
find \
|
||||
/lib/systemd \
|
||||
-name systemd-update-utmp-runlevel.service \
|
||||
-delete && \
|
||||
rm -vf /usr/share/systemd/tmp.mount && \
|
||||
sed -ri '/^IPAddressDeny/d' /lib/systemd/system/systemd-journald.service && \
|
||||
for MATCH in \
|
||||
plymouth-start.service \
|
||||
plymouth-quit-wait.service \
|
||||
syslog.socket \
|
||||
syslog.service \
|
||||
display-manager.service \
|
||||
systemd-sysusers.service \
|
||||
tmp.mount \
|
||||
systemd-udevd.service \
|
||||
; do \
|
||||
grep -rn --binary-files=without-match ${MATCH} /lib/systemd/ | cut -d: -f1 | xargs sed -ri 's/(.*=.*)'${MATCH}'(.*)/\1\2/'; \
|
||||
done && \
|
||||
systemctl set-default multi-user.target
|
||||
|
||||
VOLUME ["/run", "/run/lock"]
|
||||
|
||||
STOPSIGNAL SIGRTMIN+3
|
||||
|
||||
ENTRYPOINT ["/lib/systemd/systemd"]
|
@ -126,13 +126,16 @@ function run_tests()
|
||||
fi
|
||||
|
||||
set +e
|
||||
clickhouse-test -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \
|
||||
--skip 00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" \
|
||||
|
||||
if [[ -n "$USE_PARALLEL_REPLICAS" ]] && [[ "$USE_PARALLEL_REPLICAS" -eq 1 ]]; then
|
||||
clickhouse-test --client="clickhouse-client --use_hedged_requests=0 --allow_experimental_parallel_reading_from_replicas=1 \
|
||||
--max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" \
|
||||
-j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --no-parallel-replicas --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
|
||||
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
|
||||
clickhouse-test --timeout 1200 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \
|
||||
00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt
|
||||
|
||||
else
|
||||
clickhouse-test -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
|
||||
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
fi
|
||||
set -e
|
||||
}
|
||||
|
||||
|
@ -134,9 +134,9 @@ function run_tests()
|
||||
|
||||
set +e
|
||||
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
|
||||
--test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
|
||||
| ts '%Y-%m-%d %H:%M:%S' \
|
||||
| tee -a test_output/test_result.txt
|
||||
--test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
|
||||
| ts '%Y-%m-%d %H:%M:%S' \
|
||||
| tee -a test_output/test_result.txt
|
||||
set -e
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,31 @@ set -x
|
||||
# core.COMM.PID-TID
|
||||
sysctl kernel.core_pattern='core.%e.%p-%P'
|
||||
|
||||
OK="\tOK\t\\N\t"
|
||||
FAIL="\tFAIL\t\\N\t"
|
||||
|
||||
FAILURE_CONTEXT_LINES=50
|
||||
FAILURE_CONTEXT_MAX_LINE_WIDTH=400
|
||||
|
||||
function escaped()
|
||||
{
|
||||
# That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language.
|
||||
# Also limit lines width just in case (too long lines are not really useful usually)
|
||||
clickhouse local -S 's String' --input-format=LineAsString -q "select substr(s, 1, $FAILURE_CONTEXT_MAX_LINE_WIDTH)
|
||||
from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'"
|
||||
}
|
||||
function head_escaped()
|
||||
{
|
||||
head -n $FAILURE_CONTEXT_LINES $1 | escaped
|
||||
}
|
||||
function unts()
|
||||
{
|
||||
grep -Po "[0-9][0-9]:[0-9][0-9] \K.*"
|
||||
}
|
||||
function trim_server_logs()
|
||||
{
|
||||
head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped
|
||||
}
|
||||
|
||||
function install_packages()
|
||||
{
|
||||
@ -33,7 +58,9 @@ function configure()
|
||||
ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag
|
||||
|
||||
# avoid too slow startup
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml | sed "s|<snapshot_distance>100000</snapshot_distance>|<snapshot_distance>10000</snapshot_distance>|" > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
|
||||
| sed "s|<snapshot_distance>100000</snapshot_distance>|<snapshot_distance>10000</snapshot_distance>|" \
|
||||
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
|
||||
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml
|
||||
@ -136,6 +163,7 @@ function stop()
|
||||
clickhouse stop --max-tries "$max_tries" --do-not-kill && return
|
||||
|
||||
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
|
||||
echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv
|
||||
kill -TERM "$(pidof gdb)" ||:
|
||||
sleep 5
|
||||
echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log
|
||||
@ -151,10 +179,11 @@ function start()
|
||||
if [ "$counter" -gt ${1:-120} ]
|
||||
then
|
||||
echo "Cannot start clickhouse-server"
|
||||
echo -e "Cannot start clickhouse-server\tFAIL" >> /test_output/test_results.tsv
|
||||
rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||:
|
||||
echo -e "Cannot start clickhouse-server$FAIL$(trim_server_logs application_errors.txt)" >> /test_output/test_results.tsv
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n1000 /var/log/clickhouse-server/stderr.log
|
||||
tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e '<Warning> RaftInstance:' -e '<Information> RaftInstance' | tail -n1000
|
||||
tail -n100 /var/log/clickhouse-server/stderr.log
|
||||
tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e '<Warning> RaftInstance:' -e '<Information> RaftInstance' | tail -n100
|
||||
break
|
||||
fi
|
||||
# use root to match with current uid
|
||||
@ -252,9 +281,92 @@ start
|
||||
clickhouse-client --query "SHOW TABLES FROM datasets"
|
||||
clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
||||
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32), WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8, PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16), CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16,
|
||||
EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32,
|
||||
UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String,
|
||||
Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32),
|
||||
RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8,
|
||||
FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2),
|
||||
CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String,
|
||||
IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8,
|
||||
WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8,
|
||||
SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32,
|
||||
IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8,
|
||||
IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8,
|
||||
Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32,
|
||||
RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2),
|
||||
BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32,
|
||||
DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32,
|
||||
RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32,
|
||||
LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32,
|
||||
RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String,
|
||||
ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String,
|
||||
OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String,
|
||||
UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64,
|
||||
URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String,
|
||||
ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64),
|
||||
IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate)
|
||||
ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16,
|
||||
EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32,
|
||||
UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String,
|
||||
RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16),
|
||||
URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8,
|
||||
FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16,
|
||||
UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8,
|
||||
MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16,
|
||||
SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16,
|
||||
ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32,
|
||||
SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8,
|
||||
FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8,
|
||||
HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8,
|
||||
GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32,
|
||||
HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String,
|
||||
HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32,
|
||||
FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32,
|
||||
LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32,
|
||||
RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String,
|
||||
ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String,
|
||||
OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String,
|
||||
UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64,
|
||||
URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String,
|
||||
ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64),
|
||||
IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate)
|
||||
ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8,
|
||||
VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32,
|
||||
Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String,
|
||||
EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String,
|
||||
AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32),
|
||||
RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32,
|
||||
SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32,
|
||||
ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32,
|
||||
SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16,
|
||||
UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16,
|
||||
FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8,
|
||||
FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8,
|
||||
Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8,
|
||||
BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16),
|
||||
Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32),
|
||||
WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64,
|
||||
ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32,
|
||||
ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32,
|
||||
ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32,
|
||||
ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16,
|
||||
ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32,
|
||||
OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String,
|
||||
UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime,
|
||||
PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8,
|
||||
PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16),
|
||||
CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64,
|
||||
StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64,
|
||||
OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64,
|
||||
UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32,
|
||||
ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64),
|
||||
Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32,
|
||||
DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16))
|
||||
ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
|
||||
SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
|
||||
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
|
||||
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0"
|
||||
@ -275,7 +387,9 @@ export ZOOKEEPER_FAULT_INJECTION=1
|
||||
configure
|
||||
|
||||
# But we still need default disk because some tables loaded only into it
|
||||
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml | sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp
|
||||
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
|
||||
| sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
|
||||
> /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp
|
||||
mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
|
||||
@ -283,8 +397,12 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
|
||||
start
|
||||
|
||||
./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
|
||||
&& echo -e 'Test script exit code\tOK' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv
|
||||
&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv
|
||||
|
||||
# NOTE Hung check is implemented in docker/tests/stress/stress
|
||||
rg -Fa "No queries hung" /test_output/test_results.tsv | grep -Fa "OK" \
|
||||
|| echo -e "Hung check failed, possible deadlock found (see hung_check.log)$FAIL$(head_escaped /test_output/hung_check.log | unts)"
|
||||
|
||||
stop
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log
|
||||
@ -295,9 +413,10 @@ unset "${!THREAD_@}"
|
||||
|
||||
start
|
||||
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|
||||
|| (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.clean.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
&& rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt)
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|
||||
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \
|
||||
&& echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(trim_server_logs application_errors.txt)" \
|
||||
>> /test_output/test_results.tsv)
|
||||
|
||||
stop
|
||||
|
||||
@ -310,49 +429,54 @@ stop
|
||||
rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp
|
||||
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
|
||||
&& echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No sanitizer asserts\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv
|
||||
rm -f /test_output/tmp
|
||||
|
||||
# OOM
|
||||
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
|
||||
&& echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Logical errors
|
||||
rg -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \
|
||||
&& echo -e 'Logical error thrown (see clickhouse-server.log or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv
|
||||
rg -Fa "Code: 49. DB::Exception: " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \
|
||||
&& echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No logical errors$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file logical_errors.txt if it's empty
|
||||
[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
|
||||
|
||||
# No such key errors
|
||||
rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \
|
||||
&& echo -e 'S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No lost s3 keys\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(trim_server_logs no_such_key_errors.txt)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file no_such_key_errors.txt if it's empty
|
||||
[ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt
|
||||
|
||||
# Crash
|
||||
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \
|
||||
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Not crashed$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# It also checks for crash without stacktrace (printed by watchdog)
|
||||
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \
|
||||
&& echo -e 'Fatal message in clickhouse-server.log (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(trim_server_logs fatal_messages.txt)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file fatal_messages.txt if it's empty
|
||||
[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
|
||||
|
||||
rg -Fa "########################################" /test_output/* > /dev/null \
|
||||
&& echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv
|
||||
&& echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv
|
||||
|
||||
function get_gdb_log_context()
|
||||
{
|
||||
rg -A50 -Fa " received signal " /test_output/gdb.log | head_escaped
|
||||
}
|
||||
|
||||
rg -Fa " received signal " /test_output/gdb.log > /dev/null \
|
||||
&& echo -e 'Found signal in gdb.log\tFAIL' >> /test_output/test_results.tsv
|
||||
&& echo -e "Found signal in gdb.log$FAIL$(get_gdb_log_context)" >> /test_output/test_results.tsv
|
||||
|
||||
if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
echo -e "Backward compatibility check\n"
|
||||
@ -367,8 +491,8 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
echo "Download clickhouse-server from the previous release"
|
||||
mkdir previous_release_package_folder
|
||||
|
||||
echo $previous_release_tag | download_release_packages && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv
|
||||
echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv
|
||||
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log
|
||||
for table in query_log trace_log
|
||||
@ -381,13 +505,13 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
# Check if we cloned previous release repository successfully
|
||||
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
|
||||
then
|
||||
echo -e "Backward compatibility check: Failed to clone previous release tests\tFAIL" >> /test_output/test_results.tsv
|
||||
echo -e "Backward compatibility check: Failed to clone previous release tests$FAIL" >> /test_output/test_results.tsv
|
||||
elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
|
||||
then
|
||||
echo -e "Backward compatibility check: Failed to download previous release packages\tFAIL" >> /test_output/test_results.tsv
|
||||
echo -e "Backward compatibility check: Failed to download previous release packages$FAIL" >> /test_output/test_results.tsv
|
||||
else
|
||||
echo -e "Successfully cloned previous release tests\tOK" >> /test_output/test_results.tsv
|
||||
echo -e "Successfully downloaded previous release packages\tOK" >> /test_output/test_results.tsv
|
||||
echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv
|
||||
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Uninstall current packages
|
||||
dpkg --remove clickhouse-client
|
||||
@ -446,9 +570,10 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
|
||||
mkdir tmp_stress_output
|
||||
|
||||
./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
|
||||
&& echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv
|
||||
./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" \
|
||||
--backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
|
||||
&& echo -e "Backward compatibility check: Test script exit code$OK" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: Test script failed$FAIL" >> /test_output/test_results.tsv
|
||||
rm -rf tmp_stress_output
|
||||
|
||||
# We experienced deadlocks in this command in very rare cases. Let's debug it:
|
||||
@ -470,9 +595,9 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
export ZOOKEEPER_FAULT_INJECTION=0
|
||||
configure
|
||||
start 500
|
||||
clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|
||||
|| (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \
|
||||
&& rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt)
|
||||
clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \
|
||||
|| (rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt \
|
||||
&& echo -e "Backward compatibility check: Server failed to start$FAIL$(trim_server_logs bc_check_application_errors.txt)" >> /test_output/test_results.tsv)
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
@ -488,8 +613,6 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
# FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server.
|
||||
# Let's just ignore all errors from queries ("} <Error> TCPHandler: Code:", "} <Error> executeQuery: Code:")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'")
|
||||
# NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected
|
||||
# ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part")
|
||||
# FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility
|
||||
echo "Check for Error messages in server log:"
|
||||
rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
|
||||
@ -519,7 +642,6 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
-e "} <Error> TCPHandler: Code:" \
|
||||
-e "} <Error> executeQuery: Code:" \
|
||||
-e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \
|
||||
-e "This engine is deprecated and is not supported in transactions" \
|
||||
-e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \
|
||||
-e "The set of parts restored in place of" \
|
||||
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
|
||||
@ -528,9 +650,11 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
-e "MutateFromLogEntryTask" \
|
||||
-e "No connection to ZooKeeper, cannot get shared table ID" \
|
||||
-e "Session expired" \
|
||||
-e "TOO_MANY_PARTS" \
|
||||
/var/log/clickhouse-server/clickhouse-server.backward.dirty.log | rg -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
|
||||
&& echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)$FAIL$(trim_server_logs bc_check_error_messages.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No Error messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_error_messages.txt if it's empty
|
||||
[ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt
|
||||
@ -539,34 +663,36 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
rg -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
|
||||
&& echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Backward compatibility check: Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No sanitizer asserts$OK" >> /test_output/test_results.tsv
|
||||
rm -f /test_output/tmp
|
||||
|
||||
# OOM
|
||||
rg -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
|
||||
&& echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Backward compatibility check: Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Logical errors
|
||||
echo "Check for Logical errors in server log:"
|
||||
rg -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \
|
||||
&& echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv
|
||||
rg -Fa -A20 "Code: 49. DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \
|
||||
&& echo -e "Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)$FAIL$(trim_server_logs bc_check_logical_errors.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No logical errors$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_logical_errors.txt if it's empty
|
||||
[ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt
|
||||
|
||||
# Crash
|
||||
rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
|
||||
&& echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Backward compatibility check: Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: Not crashed$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# It also checks for crash without stacktrace (printed by watchdog)
|
||||
echo "Check for Fatal message in server log:"
|
||||
rg -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \
|
||||
&& echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)$FAIL$(trim_server_logs bc_check_fatal_messages.txt)" \
|
||||
>> /test_output/test_results.tsv \
|
||||
|| echo -e "Backward compatibility check: No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_fatal_messages.txt if it's empty
|
||||
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
|
||||
@ -574,7 +700,8 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||:
|
||||
for table in query_log trace_log
|
||||
do
|
||||
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.backward.tsv.zst ||:
|
||||
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" \
|
||||
| zstd --threads=0 > /test_output/$table.backward.tsv.zst ||:
|
||||
done
|
||||
fi
|
||||
fi
|
||||
@ -583,13 +710,28 @@ dmesg -T > /test_output/dmesg.log
|
||||
|
||||
# OOM in dmesg -- those are real
|
||||
grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \
|
||||
&& echo -e 'OOM in dmesg\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No OOM in dmesg\tOK' >> /test_output/test_results.tsv
|
||||
&& echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \
|
||||
|| echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv
|
||||
|
||||
mv /var/log/clickhouse-server/stderr.log /test_output/
|
||||
|
||||
# Write check result into check_status.tsv
|
||||
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
|
||||
# Try to choose most specific error for the whole check status
|
||||
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by
|
||||
(test like 'Backward compatibility check%'), -- BC check goes last
|
||||
(test like '%Sanitizer%') DESC,
|
||||
(test like '%Killed by signal%') DESC,
|
||||
(test like '%gdb.log%') DESC,
|
||||
(test ilike '%possible deadlock%') DESC,
|
||||
(test like '%start%') DESC,
|
||||
(test like '%dmesg%') DESC,
|
||||
(test like '%OOM%') DESC,
|
||||
(test like '%Signal 9%') DESC,
|
||||
(test like '%Fatal message%') DESC,
|
||||
(test like '%Error message%') DESC,
|
||||
(test like '%previous release%') DESC,
|
||||
rowNumberInAllBlocks()
|
||||
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
|
||||
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
|
||||
|
||||
# Core dumps
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
from multiprocessing import cpu_count
|
||||
from subprocess import Popen, call, check_output, STDOUT
|
||||
from subprocess import Popen, call, check_output, STDOUT, PIPE
|
||||
import os
|
||||
import argparse
|
||||
import logging
|
||||
@ -299,14 +299,19 @@ if __name__ == "__main__":
|
||||
"00001_select_1",
|
||||
]
|
||||
)
|
||||
res = call(cmd, shell=True, stderr=STDOUT)
|
||||
hung_check_status = "No queries hung\tOK\n"
|
||||
hung_check_log = os.path.join(args.output_folder, "hung_check.log")
|
||||
tee = Popen(['/usr/bin/tee', hung_check_log], stdin=PIPE)
|
||||
res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT)
|
||||
tee.stdin.close()
|
||||
if res != 0 and have_long_running_queries:
|
||||
logging.info("Hung check failed with exit code {}".format(res))
|
||||
hung_check_status = "Hung check failed\tFAIL\n"
|
||||
with open(
|
||||
os.path.join(args.output_folder, "test_results.tsv"), "w+"
|
||||
) as results:
|
||||
results.write(hung_check_status)
|
||||
else:
|
||||
hung_check_status = "No queries hung\tOK\t\\N\t\n"
|
||||
with open(
|
||||
os.path.join(args.output_folder, "test_results.tsv"), "w+"
|
||||
) as results:
|
||||
results.write(hung_check_status)
|
||||
os.remove(hung_check_log)
|
||||
|
||||
|
||||
logging.info("Stress test finished")
|
||||
|
@ -48,6 +48,7 @@ RUN apt-get update \
|
||||
gdb \
|
||||
git \
|
||||
gperf \
|
||||
libclang-rt-${LLVM_VERSION}-dev \
|
||||
lld-${LLVM_VERSION} \
|
||||
llvm-${LLVM_VERSION} \
|
||||
llvm-${LLVM_VERSION}-dev \
|
||||
|
@ -85,8 +85,16 @@ def process_test_log(log_path):
|
||||
if DATABASE_SIGN in line:
|
||||
test_end = True
|
||||
|
||||
# Python does not support TSV, so we have to escape '\t' and '\n' manually
|
||||
# and hope that complex escape sequences will not break anything
|
||||
test_results = [
|
||||
(test[0], test[1], test[2], "".join(test[3])[:4096]) for test in test_results
|
||||
(
|
||||
test[0],
|
||||
test[1],
|
||||
test[2],
|
||||
"".join(test[3])[:4096].replace("\t", "\\t").replace("\n", "\\n"),
|
||||
)
|
||||
for test in test_results
|
||||
]
|
||||
|
||||
return (
|
||||
|
22
docs/changelogs/v22.11.5.15-stable.md
Normal file
22
docs/changelogs/v22.11.5.15-stable.md
Normal file
@ -0,0 +1,22 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.11.5.15-stable (d763e5a9239) FIXME as compared to v22.11.4.3-stable (7f4cf554f69)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#44999](https://github.com/ClickHouse/ClickHouse/issues/44999): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#45552](https://github.com/ClickHouse/ClickHouse/issues/45552): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
24
docs/changelogs/v22.8.13.20-lts.md
Normal file
24
docs/changelogs/v22.8.13.20-lts.md
Normal file
@ -0,0 +1,24 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v22.8.13.20-lts (e4817946d18) FIXME as compared to v22.8.12.45-lts (86b0ecd5d51)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#45565](https://github.com/ClickHouse/ClickHouse/issues/45565): Fix positional arguments exception Positional argument out of bounds. Closes [#40634](https://github.com/ClickHouse/ClickHouse/issues/40634). [#41189](https://github.com/ClickHouse/ClickHouse/pull/41189) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#44997](https://github.com/ClickHouse/ClickHouse/issues/44997): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#45550](https://github.com/ClickHouse/ClickHouse/issues/45550): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Merge pull request [#38262](https://github.com/ClickHouse/ClickHouse/issues/38262) from PolyProgrammist/fix-ordinary-system-un… [#45650](https://github.com/ClickHouse/ClickHouse/pull/45650) ([alesapin](https://github.com/alesapin)).
|
||||
|
23
docs/changelogs/v23.1.2.9-stable.md
Normal file
23
docs/changelogs/v23.1.2.9-stable.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.1.2.9-stable (8dfb1700858) FIXME as compared to v23.1.1.3077-stable (dcaac477025)
|
||||
|
||||
#### Performance Improvement
|
||||
* Backported in [#45705](https://github.com/ClickHouse/ClickHouse/issues/45705): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### Bug Fix
|
||||
* Backported in [#45673](https://github.com/ClickHouse/ClickHouse/issues/45673): Fix wiping sensitive info in logs. [#45603](https://github.com/ClickHouse/ClickHouse/pull/45603) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#45730](https://github.com/ClickHouse/ClickHouse/issues/45730): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
17
docs/changelogs/v23.1.3.5-stable.md
Normal file
17
docs/changelogs/v23.1.3.5-stable.md
Normal file
@ -0,0 +1,17 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2023
|
||||
---
|
||||
|
||||
# 2023 Changelog
|
||||
|
||||
### ClickHouse release v23.1.3.5-stable (548b494bcce) FIXME as compared to v23.1.2.9-stable (8dfb1700858)
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Backported in [#45896](https://github.com/ClickHouse/ClickHouse/issues/45896): Bugfix IPv6 parser for mixed ip4 address with missed first octet (like `::.1.2.3`). [#45871](https://github.com/ClickHouse/ClickHouse/pull/45871) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Get rid of progress timestamps in release publishing [#45818](https://github.com/ClickHouse/ClickHouse/pull/45818) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
@ -16,6 +16,11 @@ Tests are located in `queries` directory. There are two subdirectories: `statele
|
||||
|
||||
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
|
||||
|
||||
:::note
|
||||
A common mistake when testing data types `DateTime` and `DateTime64` is assuming that the server uses a specific time zone (e.g. "UTC"). This is not the case, time zones in CI test runs
|
||||
are deliberately randomized. The easiest workaround is to specify the time zone for test values explicitly, e.g. `toDateTime64(val, 3, 'Europe/Amsterdam')`.
|
||||
:::
|
||||
|
||||
### Running a Test Locally {#functional-test-locally}
|
||||
|
||||
Start the ClickHouse server locally, listening on the default port (9000). To
|
||||
|
@ -77,9 +77,12 @@ Optional parameters:
|
||||
- `rabbitmq_password` - RabbitMQ password.
|
||||
- `rabbitmq_commit_on_select` - Commit messages when select query is made. Default: `false`.
|
||||
- `rabbitmq_max_rows_per_message` — The maximum number of rows written in one RabbitMQ message for row-based formats. Default : `1`.
|
||||
- `rabbitmq_empty_queue_backoff_start` — A start backoff point to reschedule read if the rabbitmq queue is empty.
|
||||
- `rabbitmq_empty_queue_backoff_end` — An end backoff point to reschedule read if the rabbitmq queue is empty.
|
||||
|
||||
|
||||
SSL connection:
|
||||
|
||||
* [ ] SSL connection:
|
||||
|
||||
Use either `rabbitmq_secure = 1` or `amqps` in connection address: `rabbitmq_address = 'amqps://guest:guest@localhost/vhost'`.
|
||||
The default behaviour of the used library is not to check if the created TLS connection is sufficiently secure. Whether the certificate is expired, self-signed, missing or invalid: the connection is simply permitted. More strict checking of certificates can possibly be implemented in the future.
|
||||
|
@ -2,10 +2,10 @@
|
||||
slug: /en/engines/table-engines/mergetree-family/invertedindexes
|
||||
sidebar_label: Inverted Indexes
|
||||
description: Quickly find search terms in text.
|
||||
keywords: [full-text search, text search]
|
||||
keywords: [full-text search, text search, inverted, index, indices]
|
||||
---
|
||||
|
||||
# Inverted indexes [experimental]
|
||||
# Full-text Search using Inverted Indexes [experimental]
|
||||
|
||||
Inverted indexes are an experimental type of [secondary indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#available-types-of-indices) which provide fast text search
|
||||
capabilities for [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md)
|
||||
@ -13,7 +13,7 @@ columns. The main idea of an inverted index is to store a mapping from "terms" t
|
||||
tokenized cells of the string column. For example, the string cell "I will be a little late" is by default tokenized into six terms "I", "will",
|
||||
"be", "a", "little" and "late". Another kind of tokenizer is n-grams. For example, the result of 3-gram tokenization will be 21 terms "I w",
|
||||
" wi", "wil", "ill", "ll ", "l b", " be" etc. The more fine-granular the input strings are tokenized, the bigger but also the more
|
||||
useful the resulting inverted index will be.
|
||||
useful the resulting inverted index will be.
|
||||
|
||||
:::warning
|
||||
Inverted indexes are experimental and should not be used in production environments yet. They may change in the future in backward-incompatible
|
||||
@ -49,20 +49,20 @@ where `N` specifies the tokenizer:
|
||||
Being a type of skipping index, inverted indexes can be dropped or added to a column after table creation:
|
||||
|
||||
``` sql
|
||||
ALTER TABLE tbl DROP INDEX inv_idx;
|
||||
ALTER TABLE tbl ADD INDEX inv_idx(s) TYPE inverted(2) GRANULARITY 1;
|
||||
ALTER TABLE tab DROP INDEX inv_idx;
|
||||
ALTER TABLE tab ADD INDEX inv_idx(s) TYPE inverted(2);
|
||||
```
|
||||
|
||||
To use the index, no special functions or syntax are required. Typical string search predicates automatically leverage the index. As
|
||||
examples, consider:
|
||||
|
||||
```sql
|
||||
SELECT * from tab WHERE s == 'Hello World;
|
||||
SELECT * from tab WHERE s IN (‘Hello’, ‘World’);
|
||||
SELECT * from tab WHERE s LIKE ‘%Hello%’;
|
||||
SELECT * from tab WHERE multiSearchAny(s, ‘Hello’, ‘World’);
|
||||
SELECT * from tab WHERE hasToken(s, ‘Hello’);
|
||||
SELECT * from tab WHERE multiSearchAll(s, [‘Hello’, ‘World’]);
|
||||
INSERT INTO tab(key, str) values (1, 'Hello World');
|
||||
SELECT * from tab WHERE str == 'Hello World';
|
||||
SELECT * from tab WHERE str IN ('Hello', 'World');
|
||||
SELECT * from tab WHERE str LIKE '%Hello%';
|
||||
SELECT * from tab WHERE multiSearchAny(str, ['Hello', 'World']);
|
||||
SELECT * from tab WHERE hasToken(str, 'Hello');
|
||||
```
|
||||
|
||||
The inverted index also works on columns of type `Array(String)`, `Array(FixedString)`, `Map(String)` and `Map(String)`.
|
||||
@ -74,7 +74,120 @@ controls the amount of data read consumed from the underlying column before a ne
|
||||
intermediate memory consumption for index construction but also improves lookup performance since fewer segments need to be checked on
|
||||
average to evaluate a query.
|
||||
|
||||
## Full-text search of the Hacker News dataset
|
||||
|
||||
Let's look at the performance improvements of inverted indexes on a large dataset with lots of text. We will use 28.7M rows of comments on the popular Hacker News website. Here is the table without an inverted index:
|
||||
|
||||
```sql
|
||||
CREATE TABLE hackernews (
|
||||
id UInt64,
|
||||
deleted UInt8,
|
||||
type String,
|
||||
author String,
|
||||
timestamp DateTime,
|
||||
comment String,
|
||||
dead UInt8,
|
||||
parent UInt64,
|
||||
poll UInt64,
|
||||
children Array(UInt32),
|
||||
url String,
|
||||
score UInt32,
|
||||
title String,
|
||||
parts Array(UInt32),
|
||||
descendants UInt32
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (type, author);
|
||||
```
|
||||
|
||||
The 28.7M rows are in a Parquet file in S3 - let's insert them into the `hackernews` table:
|
||||
|
||||
```sql
|
||||
INSERT INTO hackernews
|
||||
SELECT * FROM s3Cluster(
|
||||
'default',
|
||||
'https://datasets-documentation.s3.eu-west-3.amazonaws.com/hackernews/hacknernews.parquet',
|
||||
'Parquet',
|
||||
'
|
||||
id UInt64,
|
||||
deleted UInt8,
|
||||
type String,
|
||||
by String,
|
||||
time DateTime,
|
||||
text String,
|
||||
dead UInt8,
|
||||
parent UInt64,
|
||||
poll UInt64,
|
||||
kids Array(UInt32),
|
||||
url String,
|
||||
score UInt32,
|
||||
title String,
|
||||
parts Array(UInt32),
|
||||
descendants UInt32');
|
||||
```
|
||||
|
||||
Consider the following simple search for the term `ClickHouse` (and its varied upper and lower cases) in the `comment` column:
|
||||
|
||||
```sql
|
||||
SELECT count()
|
||||
FROM hackernews
|
||||
WHERE hasToken(lower(comment), 'clickhouse');
|
||||
```
|
||||
|
||||
Notice it takes 3 seconds to execute the query:
|
||||
|
||||
```response
|
||||
┌─count()─┐
|
||||
│ 1145 │
|
||||
└─────────┘
|
||||
|
||||
1 row in set. Elapsed: 3.001 sec. Processed 28.74 million rows, 9.75 GB (9.58 million rows/s., 3.25 GB/s.)
|
||||
```
|
||||
|
||||
We will use `ALTER TABLE` and add an inverted index on the lowercase of the `comment` column, then materialize it (which can take a while - wait for it to materialize):
|
||||
|
||||
```sql
|
||||
ALTER TABLE hackernews
|
||||
ADD INDEX comment_lowercase(lower(comment)) TYPE inverted;
|
||||
|
||||
ALTER TABLE hackernews MATERIALIZE INDEX comment_lowercase;
|
||||
```
|
||||
|
||||
We run the same query...
|
||||
|
||||
```sql
|
||||
SELECT count()
|
||||
FROM hackernews
|
||||
WHERE hasToken(lower(comment), 'clickhouse')
|
||||
```
|
||||
|
||||
...and notice the query executes 4x faster:
|
||||
|
||||
```response
|
||||
┌─count()─┐
|
||||
│ 1145 │
|
||||
└─────────┘
|
||||
|
||||
1 row in set. Elapsed: 0.747 sec. Processed 4.49 million rows, 1.77 GB (6.01 million rows/s., 2.37 GB/s.)
|
||||
```
|
||||
|
||||
We can also search for one or all of multiple terms, i.e., disjunctions or conjunctions:
|
||||
|
||||
```sql
|
||||
-- multiple OR'ed terms
|
||||
SELECT count(*)
|
||||
FROM hackernews
|
||||
WHERE multiSearchAny(lower(comment), ['oltp', 'olap']);
|
||||
|
||||
-- multiple AND'ed terms
|
||||
SELECT count(*)
|
||||
FROM hackernews
|
||||
WHERE hasToken(lower(comment), 'avx') AND hasToken(lower(comment), 'sve');
|
||||
```
|
||||
|
||||
:::note
|
||||
Unlike other secondary indices, inverted indexes (for now) map to row numbers (row ids) instead of granule ids. The reason for this design
|
||||
is performance. In practice, users often search for multiple terms at once. For example, filter predicate `WHERE s LIKE '%little%' OR s LIKE
|
||||
'%big%'` can be evaluated directly using an inverted index by forming the union of the row id lists for terms "little" and "big". This also
|
||||
means that the parameter `GRANULARITY` supplied to index creation has no meaning (it may be removed from the syntax in the future).
|
||||
:::
|
||||
|
@ -923,15 +923,25 @@ Configuration markup:
|
||||
<single_read_retries>4</single_read_retries>
|
||||
<min_bytes_for_seek>1000</min_bytes_for_seek>
|
||||
<metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
|
||||
<cache_enabled>true</cache_enabled>
|
||||
<cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</s3>
|
||||
<s3_cache>
|
||||
<type>cache</type>
|
||||
<disk>s3</disk>
|
||||
<path>/var/lib/clickhouse/disks/s3_cache/</path>
|
||||
<max_size>10Gi</max_size>
|
||||
</s3_cache>
|
||||
</disks>
|
||||
...
|
||||
</storage_configuration>
|
||||
```
|
||||
|
||||
:::note cache configuration
|
||||
ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) if you are using one of those versions.
|
||||
:::
|
||||
|
||||
### Configuring the S3 disk
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
|
||||
@ -951,8 +961,6 @@ Optional parameters:
|
||||
- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`.
|
||||
- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
|
||||
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
- `cache_enabled` — Allows to cache mark and index files on local FS. Default value is `true`.
|
||||
- `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`.
|
||||
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
@ -960,6 +968,30 @@ Optional parameters:
|
||||
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
|
||||
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
|
||||
|
||||
### Configuring the cache
|
||||
|
||||
This is the cache configuration from above:
|
||||
```xml
|
||||
<s3_cache>
|
||||
<type>cache</type>
|
||||
<disk>s3</disk>
|
||||
<path>/var/lib/clickhouse/disks/s3_cache/</path>
|
||||
<max_size>10Gi</max_size>
|
||||
</s3_cache>
|
||||
```
|
||||
|
||||
These parameters define the cache layer:
|
||||
- `type` — If a disk is of type `cache` it caches mark and index files in memory.
|
||||
- `disk` — The name of the disk that will be cached.
|
||||
|
||||
Cache parameters:
|
||||
- `path` — The path where metadata for the cache is stored.
|
||||
- `max_size` — The size (amount of memory) that the cache can grow to.
|
||||
|
||||
:::tip
|
||||
There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details.
|
||||
:::
|
||||
|
||||
S3 disk can be configured as `main` or `cold` storage:
|
||||
``` xml
|
||||
<storage_configuration>
|
||||
|
@ -19,7 +19,7 @@ ENGINE = GenerateRandom([random_seed] [,max_string_length] [,max_array_length])
|
||||
```
|
||||
|
||||
The `max_array_length` and `max_string_length` parameters specify maximum length of all
|
||||
array columns and strings correspondingly in generated data.
|
||||
array or map columns and strings correspondingly in generated data.
|
||||
|
||||
Generate table engine supports only `SELECT` queries.
|
||||
|
||||
|
@ -1210,6 +1210,7 @@ SELECT * FROM json_each_row_nested
|
||||
- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`.
|
||||
- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`.
|
||||
- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`.
|
||||
- [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - Ignore unknown keys in json object for named tuples. Default value - `false`.
|
||||
- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
|
||||
- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
|
||||
- [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.
|
||||
|
@ -56,6 +56,19 @@ sudo apt-get clean
|
||||
sudo apt-get autoclean
|
||||
```
|
||||
|
||||
### You Can't Get Packages With Yum Because Of Wrong Signature
|
||||
|
||||
Possible issue: the cache is wrong, maybe it's broken after updated GPG key in 2022-09.
|
||||
|
||||
The solution is to clean out the cache and lib directory for yum:
|
||||
|
||||
```
|
||||
sudo find /var/lib/yum/repos/ /var/cache/yum/ -name 'clickhouse-*' -type d -exec rm -rf {} +
|
||||
sudo rm -f /etc/yum.repos.d/clickhouse.repo
|
||||
```
|
||||
|
||||
After that follow the [install guide](../getting-started/install.md#from-rpm-packages)
|
||||
|
||||
## Connecting to the Server {#troubleshooting-accepts-no-connections}
|
||||
|
||||
Possible issues:
|
||||
|
@ -79,7 +79,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
|
||||
- ASYNC: backup or restore asynchronously
|
||||
- PARTITIONS: a list of partitions to restore
|
||||
- SETTINGS:
|
||||
- [`compression_method`](en/sql-reference/statements/create/table/#column-compression-codecs) and compression_level
|
||||
- [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level
|
||||
- `password` for the file on disk
|
||||
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
|
||||
|
||||
|
@ -22,6 +22,6 @@ Additional cache types:
|
||||
- [Dictionaries](../sql-reference/dictionaries/index.md) data cache.
|
||||
- Schema inference cache.
|
||||
- [Filesystem cache](storing-data.md) over S3, Azure, Local and other disks.
|
||||
- [(Experimental) Query result cache](query-result-cache.md).
|
||||
- [(Experimental) Query cache](query-cache.md).
|
||||
|
||||
To drop one of the caches, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md#drop-mark-cache) statements.
|
||||
|
@ -239,7 +239,7 @@ Example of configuration:
|
||||
<clickhouse>
|
||||
<named_collections>
|
||||
<remote1>
|
||||
<host>localhost</host>
|
||||
<host>remote_host</host>
|
||||
<port>9000</port>
|
||||
<database>system</database>
|
||||
<user>foo</user>
|
||||
|
112
docs/en/operations/query-cache.md
Normal file
112
docs/en/operations/query-cache.md
Normal file
@ -0,0 +1,112 @@
|
||||
---
|
||||
slug: /en/operations/query-cache
|
||||
sidebar_position: 65
|
||||
sidebar_label: Query Cache [experimental]
|
||||
---
|
||||
|
||||
# Query Cache [experimental]
|
||||
|
||||
The query cache allows to compute `SELECT` queries just once and to serve further executions of the same query directly from the cache.
|
||||
Depending on the type of the queries, this can dramatically reduce latency and resource consumption of the ClickHouse server.
|
||||
|
||||
## Background, Design and Limitations
|
||||
|
||||
Query caches can generally be viewed as transactionally consistent or inconsistent.
|
||||
|
||||
- In transactionally consistent caches, the database invalidates (discards) cached query results if the result of the `SELECT` query changes
|
||||
or potentially changes. In ClickHouse, operations which change the data include inserts/updates/deletes in/of/from tables or collapsing
|
||||
merges. Transactionally consistent caching is especially suitable for OLTP databases, for example
|
||||
[MySQL](https://dev.mysql.com/doc/refman/5.6/en/query-cache.html) (which removed query cache after v8.0) and
|
||||
[Oracle](https://docs.oracle.com/database/121/TGDBA/tune_result_cache.htm).
|
||||
- In transactionally inconsistent caches, slight inaccuracies in query results are accepted under the assumption that all cache entries are
|
||||
assigned a validity period after which they expire (e.g. 1 minute) and that the underlying data changes only little during this period.
|
||||
This approach is overall more suitable for OLAP databases. As an example where transactionally inconsistent caching is sufficient,
|
||||
consider an hourly sales report in a reporting tool which is simultaneously accessed by multiple users. Sales data changes typically
|
||||
slowly enough that the database only needs to compute the report once (represented by the first `SELECT` query). Further queries can be
|
||||
served directly from the query cache. In this example, a reasonable validity period could be 30 min.
|
||||
|
||||
Transactionally inconsistent caching is traditionally provided by client tools or proxy packages interacting with the database. As a result,
|
||||
the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
|
||||
This reduces maintenance effort and avoids redundancy.
|
||||
|
||||
:::warning
|
||||
The query cache is an experimental feature that should not be used in production. There are known cases (e.g. in distributed query
|
||||
processing) where wrong results are returned.
|
||||
:::
|
||||
|
||||
## Configuration Settings and Usage
|
||||
|
||||
As long as the result cache is experimental it must be activated using the following configuration setting:
|
||||
|
||||
```sql
|
||||
SET allow_experimental_query_cache = true;
|
||||
```
|
||||
|
||||
Afterwards, setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries
|
||||
of the current session should utilize the query cache. For example, the first execution of query
|
||||
|
||||
```sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS use_query_cache = true;
|
||||
```
|
||||
|
||||
will store the query result in the query cache. Subsequent executions of the same query (also with parameter `use_query_cache = true`) will
|
||||
read the computed result from the cache and return it immediately.
|
||||
|
||||
The way the cache is utilized can be configured in more detail using settings [enable_writes_to_query_cache](settings/settings.md#enable-writes-to-query-cache)
|
||||
and [enable_reads_from_query_cache](settings/settings.md#enable-reads-from-query-cache) (both `true` by default). The former setting
|
||||
controls whether query results are stored in the cache, whereas the latter setting determines if the database should try to retrieve query
|
||||
results from the cache. For example, the following query will use the cache only passively, i.e. attempt to read from it but not store its
|
||||
result in it:
|
||||
|
||||
```sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS use_query_cache = true, enable_writes_to_query_cache = false;
|
||||
```
|
||||
|
||||
For maximum control, it is generally recommended to provide settings "use_query_cache", "enable_writes_to_query_cache" and
|
||||
"enable_reads_from_query_cache" only with specific queries. It is also possible to enable caching at user or profile level (e.g. via `SET
|
||||
use_query_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to system tables
|
||||
may return cached results then.
|
||||
|
||||
The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table
|
||||
`system.query_cache`. The number of query cache hits and misses are shown as events "QueryCacheHits" and "QueryCacheMisses" in system table
|
||||
`system.events`. Both counters are only updated for `SELECT` queries which run with setting "use_query_cache = true". Other queries do not
|
||||
affect the cache miss counter.
|
||||
|
||||
The query cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can be
|
||||
changed (see below) but doing so is not recommended for security reasons.
|
||||
|
||||
Query results are referenced in the query cache by the [Abstract Syntax Tree (AST)](https://en.wikipedia.org/wiki/Abstract_syntax_tree) of
|
||||
their query. This means that caching is agnostic to upper/lowercase, for example `SELECT 1` and `select 1` are treated as the same query. To
|
||||
make the matching more natural, all query-level settings related to the query cache are removed from the AST.
|
||||
|
||||
If the query was aborted due to an exception or user cancellation, no entry is written into the query cache.
|
||||
|
||||
The size of the query cache, the maximum number of cache entries and the maximum size of cache entries (in bytes and in records) can
|
||||
be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache).
|
||||
|
||||
To define how long a query must run at least such that its result can be cached, you can use setting
|
||||
[query_cache_min_query_duration](settings/settings.md#query-cache-min-query-duration). For example, the result of query
|
||||
|
||||
``` sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS use_query_cache = true, query_cache_min_query_duration = 5000;
|
||||
```
|
||||
|
||||
is only cached if the query runs longer than 5 seconds. It is also possible to specify how often a query needs to run until its result is
|
||||
cached - for that use setting [query_cache_min_query_runs](settings/settings.md#query-cache-min-query-runs).
|
||||
|
||||
Entries in the query cache become stale after a certain time period (time-to-live). By default, this period is 60 seconds but a different
|
||||
value can be specified at session, profile or query level using setting [query_cache_ttl](settings/settings.md#query-cache-ttl).
|
||||
|
||||
Also, results of queries with non-deterministic functions such as `rand()` and `now()` are not cached. This can be overruled using
|
||||
setting [query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions).
|
||||
|
||||
Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a
|
||||
row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
|
||||
be marked accessible by other users (i.e. shared) by supplying setting
|
||||
[query_cache_share_between_users](settings/settings.md#query-cache-share-between-users).
|
@ -1,99 +0,0 @@
|
||||
---
|
||||
slug: /en/operations/query-result-cache
|
||||
sidebar_position: 65
|
||||
sidebar_label: Query Result Cache [experimental]
|
||||
---
|
||||
|
||||
# Query Result Cache [experimental]
|
||||
|
||||
The query result cache allows to compute SELECT queries just once and to serve further executions of the same query directly from the cache.
|
||||
Depending on the type of the queries, this can dramatically reduce latency and resource consumption of the ClickHouse server.
|
||||
|
||||
## Background, Design and Limitations
|
||||
|
||||
Query result caches can generally be viewed as transactionally consistent or inconsistent.
|
||||
|
||||
- In transactionally consistent caches, the database invalidates (discards) cached query results if the result of the SELECT query changes
|
||||
or potentially changes. In ClickHouse, operations which change the data include inserts/updates/deletes in/of/from tables or collapsing
|
||||
merges. Transactionally consistent caching is especially suitable for OLTP databases, for example
|
||||
[MySQL](https://dev.mysql.com/doc/refman/5.6/en/query-cache.html) (which removed query result cache after v8.0) and
|
||||
[Oracle](https://docs.oracle.com/database/121/TGDBA/tune_result_cache.htm).
|
||||
- In transactionally inconsistent caches, slight inaccuracies in query results are accepted under the assumption that all cache entries are
|
||||
assigned a validity period after which they expire (e.g. 1 minute) and that the underlying data changes only little during this period.
|
||||
This approach is overall more suitable for OLAP databases. As an example where transactionally inconsistent caching is sufficient,
|
||||
consider an hourly sales report in a reporting tool which is simultaneously accessed by multiple users. Sales data changes typically
|
||||
slowly enough that the database only needs to compute the report once (represented by the first SELECT query). Further queries can be
|
||||
served directly from the query result cache. In this example, a reasonable validity period could be 30 min.
|
||||
|
||||
Transactionally inconsistent caching is traditionally provided by client tools or proxy packages interacting with the database. As a result,
|
||||
the same caching logic and configuration is often duplicated. With ClickHouse's query result cache, the caching logic moves to the server
|
||||
side. This reduces maintenance effort and avoids redundancy.
|
||||
|
||||
:::warning
|
||||
The query result cache is an experimental feature that should not be used in production. There are known cases (e.g. in distributed query
|
||||
processing) where wrong results are returned.
|
||||
:::
|
||||
|
||||
## Configuration Settings and Usage
|
||||
|
||||
Parameter [enable_experimental_query_result_cache](settings/settings.md#enable-experimental-query-result-cache) controls whether query
|
||||
results are inserted into / retrieved from the cache for the current query or session. For example, the first execution of query
|
||||
|
||||
``` sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS enable_experimental_query_result_cache = true;
|
||||
```
|
||||
|
||||
stores the query result into the query result cache. Subsequent executions of the same query (also with parameter
|
||||
`enable_experimental_query_result_cache = true`) will read the computed result directly from the cache.
|
||||
|
||||
Sometimes, it is desirable to use the query result cache only passively, i.e. to allow reading from it but not writing into it (if the cache
|
||||
result is not stored yet). Parameter [enable_experimental_query_result_cache_passive_usage](settings/settings.md#enable-experimental-query-result-cache-passive-usage)
|
||||
instead of 'enable_experimental_query_result_cache' can be used for that.
|
||||
|
||||
For maximum control, it is generally recommended to provide settings "enable_experimental_query_result_cache" or
|
||||
"enable_experimental_query_result_cache_passive_usage" only with specific queries. It is also possible to enable caching at user or profile
|
||||
level but one should keep in mind that all SELECT queries may return a cached results, including monitoring or debugging queries to system
|
||||
tables.
|
||||
|
||||
The query result cache can be cleared using statement `SYSTEM DROP QUERY RESULT CACHE`. The content of the query result cache is displayed
|
||||
in system table `SYSTEM.QUERY_RESULT_CACHE`. The number of query result cache hits and misses are shown as events "QueryResultCacheHits" and
|
||||
"QueryResultCacheMisses" in system table `SYSTEM.EVENTS`. Both counters are only updated for SELECT queries which run with settings
|
||||
"enable_experimental_query_result_cache = true" or "enable_experimental_query_result_cache_passive_usage = true". Other queries do not
|
||||
affect the cache miss counter.
|
||||
|
||||
The query result cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can
|
||||
be changed (see below) but doing so is not recommended for security reasons.
|
||||
|
||||
Query results are referenced in the query result cache by the [Abstract Syntax Tree (AST)](https://en.wikipedia.org/wiki/Abstract_syntax_tree)
|
||||
of their query. This means that caching is agnostic to upper/lowercase, for example `SELECT 1` and `select 1` are treated as the same query.
|
||||
To make the matching more natural, all query-level settings related to the query result cache are removed from the AST.
|
||||
|
||||
If the query was aborted due to an exception or user cancellation, no entry is written into the query result cache.
|
||||
|
||||
The size of the query result cache, the maximum number of cache entries and the maximum size of cache entries (in bytes and in records) can
|
||||
be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-result-cache).
|
||||
|
||||
To define how long a query must run at least such that its result can be cached, you can use setting
|
||||
[query_result_cache_min_query_duration](settings/settings.md#query-result-cache-min-query-duration). For example, the result of query
|
||||
|
||||
``` sql
|
||||
SELECT some_expensive_calculation(column_1, column_2)
|
||||
FROM table
|
||||
SETTINGS enable_experimental_query_result_cache = true, query_result_cache_min_query_duration = 5000;
|
||||
```
|
||||
|
||||
is only cached if the query runs longer than 5 seconds. It is also possible to specify how often a query needs to run until its result is
|
||||
cached - for that use setting [query_result_cache_min_query_runs](settings/settings.md#query-result-cache-min-query-runs).
|
||||
|
||||
Entries in the query result cache become stale after a certain time period (time-to-live). By default, this period is 60 seconds but a
|
||||
different value can be specified at session, profile or query level using setting [query_result_cache_ttl](settings/settings.md#query-result-cache-ttl).
|
||||
|
||||
Also, results of queries with non-deterministic functions such as `rand()` and `now()` are not cached. This can be overruled using
|
||||
setting [query_result_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-result-cache-store-results-of-queries-with-nondeterministic-functions).
|
||||
|
||||
Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a
|
||||
row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
|
||||
be marked accessible by other users (i.e. shared) by supplying setting
|
||||
[query_result_cache_share_between_users]{settings/settings.md#query-result-cache-share-between-users}.
|
@ -1270,30 +1270,30 @@ If the table does not exist, ClickHouse will create it. If the structure of the
|
||||
</query_log>
|
||||
```
|
||||
|
||||
## query_result_cache {#server_configuration_parameters_query-result-cache}
|
||||
## query_cache {#server_configuration_parameters_query-cache}
|
||||
|
||||
[Query result cache](../query-result-cache.md) configuration.
|
||||
[Query cache](../query-cache.md) configuration.
|
||||
|
||||
The following settings are available:
|
||||
|
||||
- `size`: The maximum cache size in bytes. 0 means the query result cache is disabled. Default value: `1073741824` (1 GiB).
|
||||
- `max_entries`: The maximum number of SELECT query results stored in the cache. Default value: `1024`.
|
||||
- `max_entry_size`: The maximum size in bytes SELECT query results may have to be saved in the cache. Default value: `1048576` (1 MiB).
|
||||
- `max_entry_records`: The maximum number of records SELECT query results may have to be saved in the cache. Default value: `30000000` (30 mil).
|
||||
- `size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
|
||||
- `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`.
|
||||
- `max_entry_size`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB).
|
||||
- `max_entry_records`: The maximum number of records `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil).
|
||||
|
||||
:::warning
|
||||
Data for the query result cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query result cache altogether.
|
||||
Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query cache altogether.
|
||||
:::
|
||||
|
||||
**Example**
|
||||
|
||||
```xml
|
||||
<query_result_cache>
|
||||
<query_cache>
|
||||
<size>1073741824</size>
|
||||
<max_entries>1024</max_entries>
|
||||
<max_entry_size>1048576</max_entry_size>
|
||||
<max_entry_records>30000000</max_entry_records>
|
||||
</query_result_cache>
|
||||
</query_cache>
|
||||
```
|
||||
|
||||
## query_thread_log {#server_configuration_parameters-query_thread_log}
|
||||
|
@ -233,7 +233,7 @@ Possible values:
|
||||
|
||||
Default value: 100.
|
||||
|
||||
Normally, the `use_async_block_ids_cache` updates as soon as there are updates in the watching keeper path. However, the cache updates might be too frequent and become a heavy burden. This minimum interval prevents the cache from updating too fast. Note that if we set this value too long, the block with duplicated inserts will have a longer retry time.
|
||||
Normally, the `use_async_block_ids_cache` updates as soon as there are updates in the watching keeper path. However, the cache updates might be too frequent and become a heavy burden. This minimum interval prevents the cache from updating too fast. Note that if we set this value too long, the block with duplicated inserts will have a longer retry time.
|
||||
|
||||
## max_replicated_logs_to_keep
|
||||
|
||||
|
@ -500,6 +500,12 @@ Parse named tuple columns as JSON objects.
|
||||
|
||||
Enabled by default.
|
||||
|
||||
## input_format_json_ignore_unknown_keys_in_named_tuple {#input_format_json_ignore_unknown_keys_in_named_tuple}
|
||||
|
||||
Ignore unknown keys in json object for named tuples.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
## input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple}
|
||||
|
||||
Insert default values for missing elements in JSON object while parsing named tuple.
|
||||
|
@ -1301,9 +1301,43 @@ Possible values:
|
||||
|
||||
Default value: `3`.
|
||||
|
||||
## enable_experimental_query_result_cache {#enable-experimental-query-result-cache}
|
||||
## use_query_cache {#use-query-cache}
|
||||
|
||||
If turned on, results of SELECT queries are stored in and (if available) retrieved from the [query result cache](../query-result-cache.md).
|
||||
If turned on, `SELECT` queries may utilize the [query cache](../query-cache.md). Parameters [enable_reads_from_query_cache](#enable-reads-from-query-cache)
|
||||
and [enable_writes_to_query_cache](#enable-writes-to-query-cache) control in more detail how the cache is used.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 - Yes
|
||||
- 1 - No
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## enable_reads_from_query_cache {#enable-reads-from-query-cache}
|
||||
|
||||
If turned on, results of `SELECT` queries are retrieved from the [query cache](../query-cache.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 - Disabled
|
||||
- 1 - Enabled
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
## enable_writes_to_query_cache {#enable-writes-to-query-cache}
|
||||
|
||||
If turned on, results of `SELECT` queries are stored in the [query cache](../query-cache.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 - Disabled
|
||||
- 1 - Enabled
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
## query_cache_store_results_of_queries_with_nondeterministic_functions {#query--store-results-of-queries-with-nondeterministic-functions}
|
||||
|
||||
If turned on, then results of `SELECT` queries with non-deterministic functions (e.g. `rand()`, `now()`) can be cached in the [query cache](../query-cache.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1312,31 +1346,9 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## enable_experimental_query_result_cache_passive_usage {#enable-experimental-query-result-cache-passive-usage}
|
||||
## query_cache_min_query_runs {#query-cache-min-query-runs}
|
||||
|
||||
If turned on, results of SELECT queries are (if available) retrieved from the [query result cache](../query-result-cache.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 - Disabled
|
||||
- 1 - Enabled
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## query_result_cache_store_results_of_queries_with_nondeterministic_functions {#query-result-cache-store-results-of-queries-with-nondeterministic-functions}
|
||||
|
||||
If turned on, then results of SELECT queries with non-deterministic functions (e.g. `rand()`, `now()`) can be cached in the [query result cache](../query-result-cache.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 - Disabled
|
||||
- 1 - Enabled
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## query_result_cache_min_query_runs {#query-result-cache-min-query-runs}
|
||||
|
||||
Minimum number of times a SELECT query must run before its result is stored in the [query result cache](../query-result-cache.md).
|
||||
Minimum number of times a `SELECT` query must run before its result is stored in the [query cache](../query-cache.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1344,9 +1356,9 @@ Possible values:
|
||||
|
||||
Default value: `0`
|
||||
|
||||
## query_result_cache_min_query_duration {#query-result-cache-min-query-duration}
|
||||
## query_cache_min_query_duration {#query-cache-min-query-duration}
|
||||
|
||||
Minimum duration in milliseconds a query needs to run for its result to be stored in the [query result cache](../query-result-cache.md).
|
||||
Minimum duration in milliseconds a query needs to run for its result to be stored in the [query cache](../query-cache.md).
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1354,9 +1366,9 @@ Possible values:
|
||||
|
||||
Default value: `0`
|
||||
|
||||
## query_result_cache_ttl {#query-result-cache-ttl}
|
||||
## query_cache_ttl {#query-cache-ttl}
|
||||
|
||||
After this time in seconds entries in the [query result cache](../query-result-cache.md) become stale.
|
||||
After this time in seconds entries in the [query cache](../query-cache.md) become stale.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -1364,9 +1376,9 @@ Possible values:
|
||||
|
||||
Default value: `60`
|
||||
|
||||
## query_result_cache_share_between_users {#query-result-cache-share-between-users}
|
||||
## query_cache_share_between_users {#query-cache-share-between-users}
|
||||
|
||||
If turned on, the result of SELECT queries cached in the [query result cache](../query-result-cache.md) can be read by other users.
|
||||
If turned on, the result of `SELECT` queries cached in the [query cache](../query-cache.md) can be read by other users.
|
||||
It is not recommended to enable this setting due to security reasons.
|
||||
|
||||
Possible values:
|
||||
@ -1632,6 +1644,49 @@ SELECT * FROM test_table
|
||||
└───┘
|
||||
```
|
||||
|
||||
## insert_keeper_max_retries
|
||||
|
||||
The setting sets the maximum number of retries for ClickHouse Keeper (or ZooKeeper) requests during insert into replicated MergeTree. Only Keeper requests which failed due to network error, Keeper session timeout, or request timeout are considered for retries.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — Retries are disabled
|
||||
|
||||
Default value: 0
|
||||
|
||||
Keeper request retries are done after some timeout. The timeout is controlled by the following settings: `insert_keeper_retry_initial_backoff_ms`, `insert_keeper_retry_max_backoff_ms`.
|
||||
The first retry is done after `insert_keeper_retry_initial_backoff_ms` timeout. The consequent timeouts will be calculated as follows:
|
||||
```
|
||||
timeout = min(insert_keeper_retry_max_backoff_ms, latest_timeout * 2)
|
||||
```
|
||||
|
||||
For example, if `insert_keeper_retry_initial_backoff_ms=100`, `insert_keeper_retry_max_backoff_ms=10000` and `insert_keeper_max_retries=8` then timeouts will be `100, 200, 400, 800, 1600, 3200, 6400, 10000`.
|
||||
|
||||
Apart from fault tolerance, the retries aim to provide a better user experience - they allow to avoid returning an error during INSERT execution if Keeper is restarted, for example, due to an upgrade.
|
||||
|
||||
## insert_keeper_retry_initial_backoff_ms {#insert_keeper_retry_initial_backoff_ms}
|
||||
|
||||
Initial timeout(in milliseconds) to retry a failed Keeper request during INSERT query execution
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — No timeout
|
||||
|
||||
Default value: 100
|
||||
|
||||
## insert_keeper_retry_max_backoff_ms {#insert_keeper_retry_max_backoff_ms}
|
||||
|
||||
Maximum timeout (in milliseconds) to retry a failed Keeper request during INSERT query execution
|
||||
|
||||
Possible values:
|
||||
|
||||
- Positive integer.
|
||||
- 0 — Maximum timeout is not limited
|
||||
|
||||
Default value: 10000
|
||||
|
||||
## max_network_bytes {#settings-max-network-bytes}
|
||||
|
||||
Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query.
|
||||
@ -3634,6 +3689,30 @@ Default value: `0`.
|
||||
|
||||
- [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting
|
||||
|
||||
## optimize_using_constraints
|
||||
|
||||
Use [constraints](../../sql-reference/statements/create/table#constraints) for query optimization. The default is `false`.
|
||||
|
||||
Possible values:
|
||||
|
||||
- true, false
|
||||
|
||||
## optimize_append_index
|
||||
|
||||
Use [constraints](../../sql-reference/statements/create/table#constraints) in order to append index condition. The default is `false`.
|
||||
|
||||
Possible values:
|
||||
|
||||
- true, false
|
||||
|
||||
## optimize_substitute_columns
|
||||
|
||||
Use [constraints](../../sql-reference/statements/create/table#constraints) for column substitution. The default is `false`.
|
||||
|
||||
Possible values:
|
||||
|
||||
- true, false
|
||||
|
||||
## describe_include_subcolumns {#describe_include_subcolumns}
|
||||
|
||||
Enables describing subcolumns for a [DESCRIBE](../../sql-reference/statements/describe-table.md) query. For example, members of a [Tuple](../../sql-reference/data-types/tuple.md) or subcolumns of a [Map](../../sql-reference/data-types/map.md/#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md/#finding-null) or an [Array](../../sql-reference/data-types/array.md/#array-size) data type.
|
||||
|
@ -19,11 +19,11 @@ Example:
|
||||
|
||||
``` sql
|
||||
SELECT maxMap(a, b)
|
||||
FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1]))
|
||||
FROM values('a Array(Char), b Array(Int64)', (['x', 'y'], [2, 2]), (['y', 'z'], [3, 1]))
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─maxMap(a, b)──────┐
|
||||
│ ([1,2,3],[2,2,1]) │
|
||||
└───────────────────┘
|
||||
┌─maxMap(a, b)───────────┐
|
||||
│ [['x','y','z'],[2,3,1]]│
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
@ -6,3 +6,7 @@ sidebar_position: 4
|
||||
# sum
|
||||
|
||||
Calculates the sum. Only works for numbers.
|
||||
|
||||
```
|
||||
SELECT sum(salary) FROM employees;
|
||||
```
|
||||
|
@ -28,15 +28,16 @@ Returns an array of the values with maximum approximate sum of weights.
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT topKWeighted(10)(number, number) FROM numbers(1000)
|
||||
SELECT topKWeighted(2)(k, w) FROM
|
||||
VALUES('k Char, w UInt64', ('y', 1), ('y', 1), ('x', 5), ('y', 1), ('z', 10))
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─topKWeighted(10)(number, number)──────────┐
|
||||
│ [999,998,997,996,995,994,993,992,991,990] │
|
||||
└───────────────────────────────────────────┘
|
||||
┌─topKWeighted(2)(k, w)──┐
|
||||
│ ['z','x'] │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
@ -5,7 +5,7 @@ sidebar_label: Storing Dictionaries in Memory
|
||||
---
|
||||
import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
|
||||
|
||||
# Storing Dictionaries in Memory
|
||||
# Storing Dictionaries in Memory
|
||||
|
||||
There are a variety of ways to store dictionaries in memory.
|
||||
|
||||
@ -25,7 +25,7 @@ ClickHouse generates an exception for errors with dictionaries. Examples of erro
|
||||
|
||||
You can view the list of dictionaries and their statuses in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table.
|
||||
|
||||
<CloudDetails />
|
||||
<CloudDetails />
|
||||
|
||||
The configuration looks like this:
|
||||
|
||||
@ -299,11 +299,11 @@ Example: The table contains discounts for each advertiser in the format:
|
||||
|
||||
To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). These elements must contain elements `name` and `type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others).
|
||||
|
||||
:::warning
|
||||
:::warning
|
||||
Values of `range_min` and `range_max` should fit in `Int64` type.
|
||||
:::
|
||||
|
||||
Example:
|
||||
Example:
|
||||
|
||||
``` xml
|
||||
<layout>
|
||||
@ -459,7 +459,7 @@ select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res;
|
||||
│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null
|
||||
└─────┘
|
||||
|
||||
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
|
||||
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
|
||||
┌─res─┐
|
||||
│ 0.2 │ -- two ranges are matching, range_min 2015-01-15 (0.2) is bigger than 2015-01-01 (0.1)
|
||||
└─────┘
|
||||
@ -496,7 +496,7 @@ select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res;
|
||||
│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null
|
||||
└─────┘
|
||||
|
||||
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
|
||||
select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res;
|
||||
┌─res─┐
|
||||
│ 0.1 │ -- two ranges are matching, range_min 2015-01-01 (0.1) is less than 2015-01-15 (0.2)
|
||||
└─────┘
|
||||
@ -588,7 +588,7 @@ Set a large enough cache size. You need to experiment to select the number of ce
|
||||
3. Assess memory consumption using the `system.dictionaries` table.
|
||||
4. Increase or decrease the number of cells until the required memory consumption is reached.
|
||||
|
||||
:::warning
|
||||
:::warning
|
||||
Do not use ClickHouse as a source, because it is slow to process queries with random reads.
|
||||
:::
|
||||
|
||||
@ -660,25 +660,30 @@ This type of storage is for use with composite [keys](../../../sql-reference/dic
|
||||
|
||||
This type of storage is for mapping network prefixes (IP addresses) to metadata such as ASN.
|
||||
|
||||
Example: The table contains network prefixes and their corresponding AS number and country code:
|
||||
**Example**
|
||||
|
||||
``` text
|
||||
+-----------|-----|------+
|
||||
| prefix | asn | cca2 |
|
||||
+=================+=======+========+
|
||||
| 202.79.32.0/20 | 17501 | NP |
|
||||
+-----------|-----|------+
|
||||
| 2620:0:870::/48 | 3856 | US |
|
||||
+-----------|-----|------+
|
||||
| 2a02:6b8:1::/48 | 13238 | RU |
|
||||
+-----------|-----|------+
|
||||
| 2001:db8::/32 | 65536 | ZZ |
|
||||
+-----------|-----|------+
|
||||
Suppose we have a table in ClickHouse that contains our IP prefixes and mappings:
|
||||
|
||||
```sql
|
||||
CREATE TABLE my_ip_addresses (
|
||||
prefix String,
|
||||
asn UInt32,
|
||||
cca2 String
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
PRIMARY KEY prefix;
|
||||
```
|
||||
|
||||
When using this type of layout, the structure must have a composite key.
|
||||
```sql
|
||||
INSERT INTO my_ip_addresses VALUES
|
||||
('202.79.32.0/20', 17501, 'NP'),
|
||||
('2620:0:870::/48', 3856, 'US'),
|
||||
('2a02:6b8:1::/48', 13238, 'RU'),
|
||||
('2001:db8::/32', 65536, 'ZZ')
|
||||
;
|
||||
```
|
||||
|
||||
Example:
|
||||
Let's define an `ip_trie` dictionary for this table. The `ip_trie` layout requires a composite key:
|
||||
|
||||
``` xml
|
||||
<structure>
|
||||
@ -712,26 +717,29 @@ Example:
|
||||
or
|
||||
|
||||
``` sql
|
||||
CREATE DICTIONARY somedict (
|
||||
CREATE DICTIONARY my_ip_trie_dictionary (
|
||||
prefix String,
|
||||
asn UInt32,
|
||||
cca2 String DEFAULT '??'
|
||||
)
|
||||
PRIMARY KEY prefix
|
||||
SOURCE(CLICKHOUSE(TABLE 'my_ip_addresses'))
|
||||
LAYOUT(IP_TRIE)
|
||||
LIFETIME(3600);
|
||||
```
|
||||
|
||||
The key must have only one String type attribute that contains an allowed IP prefix. Other types are not supported yet.
|
||||
The key must have only one `String` type attribute that contains an allowed IP prefix. Other types are not supported yet.
|
||||
|
||||
For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys:
|
||||
For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys. The syntax is:
|
||||
|
||||
``` sql
|
||||
dictGetT('dict_name', 'attr_name', tuple(ip))
|
||||
```
|
||||
|
||||
The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6:
|
||||
The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6. For example:
|
||||
|
||||
``` sql
|
||||
dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1')))
|
||||
select dictGet('my_ip_trie_dictionary', 'asn', tuple(IPv6StringToNum('2001:db8::1')))
|
||||
```
|
||||
|
||||
Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned.
|
||||
|
@ -21,14 +21,14 @@ For example, you can’t compare a date with a string. You have to use a functio
|
||||
|
||||
Strings are compared by bytes. A shorter string is smaller than all strings that start with it and that contain at least one more character.
|
||||
|
||||
## equals, a = b and a == b operator
|
||||
### equals, a `=` b and a `==` b operator
|
||||
|
||||
## notEquals, a != b and a \<\> b operator
|
||||
### notEquals, a `!=` b and a `<>` b operator
|
||||
|
||||
## less, \< operator
|
||||
### less, `<` operator
|
||||
|
||||
## greater, \> operator
|
||||
### greater, `>` operator
|
||||
|
||||
## lessOrEquals, \<= operator
|
||||
### lessOrEquals, `<=` operator
|
||||
|
||||
## greaterOrEquals, \>= operator
|
||||
### greaterOrEquals, `>=` operator
|
||||
|
@ -215,10 +215,10 @@ The two-argument form of `toDayOfWeek()` enables you to specify whether the week
|
||||
|
||||
| Mode | First day of week | Range |
|
||||
|------|-------------------|------------------------------------------------|
|
||||
| 0 | Monday | 1-7, Monday = 1, Tuesday = 2, ..., Sunday = 7 |
|
||||
| 1 | Monday | 0-6, Monday = 0, Tuesday = 1, ..., Sunday = 6 |
|
||||
| 2 | Sunday | 0-6, Sunday = 0, Monday = 1, ..., Saturday = 6 |
|
||||
| 3 | Sunday | 1-7, Sunday = 1, Monday = 2, ..., Saturday = 7 |
|
||||
| 0 | Monday | 1-7: Monday = 1, Tuesday = 2, ..., Sunday = 7 |
|
||||
| 1 | Monday | 0-6: Monday = 0, Tuesday = 1, ..., Sunday = 6 |
|
||||
| 2 | Sunday | 0-6: Sunday = 0, Monday = 1, ..., Saturday = 6 |
|
||||
| 3 | Sunday | 1-7: Sunday = 1, Monday = 2, ..., Saturday = 7 |
|
||||
|
||||
Alias: `DAYOFWEEK`.
|
||||
|
||||
@ -1293,31 +1293,31 @@ Similar to formatDateTime, except that it formats datetime in Joda style instead
|
||||
Using replacement fields, you can define a pattern for the resulting string.
|
||||
|
||||
|
||||
| Placeholder | Description | Presentation | Examples |
|
||||
| ----------- | ----------- | ------------- | -------- |
|
||||
| G | era | text | AD |
|
||||
| C | century of era (>=0) | number | 20 |
|
||||
| Y | year of era (>=0) | year | 1996 |
|
||||
| x | weekyear(not supported yet) | year | 1996 |
|
||||
| w | week of weekyear(not supported yet) | number | 27 |
|
||||
| e | day of week | number | 2 |
|
||||
| E | day of week | text | Tuesday; Tue |
|
||||
| y | year | year | 1996 |
|
||||
| D | day of year | number | 189 |
|
||||
| M | month of year | month | July; Jul; 07 |
|
||||
| d | day of month | number | 10 |
|
||||
| a | halfday of day | text | PM |
|
||||
| K | hour of halfday (0~11) | number | 0 |
|
||||
| h | clockhour of halfday (1~12) | number | 12 |
|
||||
| H | hour of day (0~23) | number | 0 |
|
||||
| k | clockhour of day (1~24) | number | 24 |
|
||||
| m | minute of hour | number | 30 |
|
||||
| s | second of minute | number | 55 |
|
||||
| S | fraction of second(not supported yet) | number | 978 |
|
||||
| z | time zone(short name not supported yet) | text | Pacific Standard Time; PST |
|
||||
| Z | time zone offset/id(not supported yet) | zone | -0800; -08:00; America/Los_Angeles |
|
||||
| ' | escape for text | delimiter| |
|
||||
| '' | single quote | literal | ' |
|
||||
| Placeholder | Description | Presentation | Examples |
|
||||
| ----------- | ---------------------------------------- | ------------- | ---------------------------------- |
|
||||
| G | era | text | AD |
|
||||
| C | century of era (>=0) | number | 20 |
|
||||
| Y | year of era (>=0) | year | 1996 |
|
||||
| x | weekyear (not supported yet) | year | 1996 |
|
||||
| w | week of weekyear (not supported yet) | number | 27 |
|
||||
| e | day of week | number | 2 |
|
||||
| E | day of week | text | Tuesday; Tue |
|
||||
| y | year | year | 1996 |
|
||||
| D | day of year | number | 189 |
|
||||
| M | month of year | month | July; Jul; 07 |
|
||||
| d | day of month | number | 10 |
|
||||
| a | halfday of day | text | PM |
|
||||
| K | hour of halfday (0~11) | number | 0 |
|
||||
| h | clockhour of halfday (1~12) | number | 12 |
|
||||
| H | hour of day (0~23) | number | 0 |
|
||||
| k | clockhour of day (1~24) | number | 24 |
|
||||
| m | minute of hour | number | 30 |
|
||||
| s | second of minute | number | 55 |
|
||||
| S | fraction of second (not supported yet) | number | 978 |
|
||||
| z | time zone (short name not supported yet) | text | Pacific Standard Time; PST |
|
||||
| Z | time zone offset/id (not supported yet) | zone | -0800; -08:00; America/Los_Angeles |
|
||||
| ' | escape for text | delimiter | |
|
||||
| '' | single quote | literal | ' |
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -304,7 +304,7 @@ Result:
|
||||
└──────────────┘
|
||||
```
|
||||
|
||||
## s2RectUinion
|
||||
## s2RectUnion
|
||||
|
||||
Returns the smallest rectangle containing the union of this rectangle and the given rectangle. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.
|
||||
|
||||
|
@ -45,37 +45,38 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')
|
||||
|
||||
Calculates the MD4 from a string and returns the resulting set of bytes as FixedString(16).
|
||||
|
||||
## MD5
|
||||
## MD5 {#hash_functions-md5}
|
||||
|
||||
Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16).
|
||||
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead.
|
||||
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
|
||||
|
||||
## sipHash64
|
||||
## sipHash64 (#hash_functions-siphash64)
|
||||
|
||||
Produces a 64-bit [SipHash](https://131002.net/siphash/) hash value.
|
||||
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
|
||||
|
||||
```sql
|
||||
sipHash64(par1,...)
|
||||
```
|
||||
|
||||
This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) function.
|
||||
This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) hash function.
|
||||
|
||||
Function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm:
|
||||
The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
|
||||
|
||||
1. After hashing all the input parameters, the function gets the array of hashes.
|
||||
2. Function takes the first and the second elements and calculates a hash for the array of them.
|
||||
3. Then the function takes the hash value, calculated at the previous step, and the third element of the initial hash array, and calculates a hash for the array of them.
|
||||
4. The previous step is repeated for all the remaining elements of the initial hash array.
|
||||
1. The first and the second hash value are concatenated to an array which is hashed.
|
||||
2. The previously calculated hash value and the hash of the third input paramter are hashed in a similar way.
|
||||
3. This calculation is repeated for all remaining hash values of the original input.
|
||||
|
||||
**Arguments**
|
||||
|
||||
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
|
||||
The function takes a variable number of input parameters of any of the [supported data types](/docs/en/sql-reference/data-types/index.md).
|
||||
|
||||
**Returned Value**
|
||||
|
||||
A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
|
||||
|
||||
Note that the calculated hash values may be equal for the same input values of different argument types. This affects for example integer types of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
@ -84,13 +85,45 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00
|
||||
|
||||
```response
|
||||
┌──────────────SipHash─┬─type───┐
|
||||
│ 13726873534472839665 │ UInt64 │
|
||||
│ 11400366955626497465 │ UInt64 │
|
||||
└──────────────────────┴────────┘
|
||||
```
|
||||
|
||||
## sipHash64Keyed
|
||||
|
||||
Same as [sipHash64](#hash_functions-siphash64) but additionally takes an explicit key argument instead of using a fixed key.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
sipHash64Keyed((k0, k1), par1,...)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
Same as [sipHash64](#hash_functions-siphash64), but the first argument is a tuple of two UInt64 values representing the key.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT sipHash64Keyed((506097522914230528, 1084818905618843912), array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS SipHash, toTypeName(SipHash) AS type;
|
||||
```
|
||||
|
||||
```response
|
||||
┌─────────────SipHash─┬─type───┐
|
||||
│ 8017656310194184311 │ UInt64 │
|
||||
└─────────────────────┴────────┘
|
||||
```
|
||||
|
||||
## sipHash128
|
||||
|
||||
Produces a 128-bit [SipHash](https://131002.net/siphash/) hash value. Differs from [sipHash64](#hash_functions-siphash64) in that the final xor-folding state is done up to 128 bits.
|
||||
Like [sipHash64](#hash_functions-siphash64) but produces a 128-bit hash value, i.e. the final xor-folding state is done up to 128 bits.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -100,13 +133,11 @@ sipHash128(par1,...)
|
||||
|
||||
**Arguments**
|
||||
|
||||
The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data).
|
||||
Same as for [sipHash64](#hash_functions-siphash64).
|
||||
|
||||
**Returned value**
|
||||
|
||||
A 128-bit `SipHash` hash value.
|
||||
|
||||
Type: [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md).
|
||||
A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -124,6 +155,40 @@ Result:
|
||||
└──────────────────────────────────┘
|
||||
```
|
||||
|
||||
## sipHash128Keyed
|
||||
|
||||
Same as [sipHash128](#hash_functions-siphash128) but additionally takes an explicit key argument instead of using a fixed key.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
sipHash128Keyed((k0, k1), par1,...)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
Same as [sipHash128](#hash_functions-siphash128), but the first argument is a tuple of two UInt64 values representing the key.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT hex(sipHash128Keyed((506097522914230528, 1084818905618843912),'foo', '\x01', 3));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─hex(sipHash128Keyed((506097522914230528, 1084818905618843912), 'foo', '', 3))─┐
|
||||
│ B8467F65C8B4CFD9A5F8BD733917D9BF │
|
||||
└───────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## cityHash64
|
||||
|
||||
Produces a 64-bit [CityHash](https://github.com/google/cityhash) hash value.
|
||||
|
@ -95,6 +95,32 @@ Result:
|
||||
└───────────────────────────────┘
|
||||
```
|
||||
|
||||
If argument `needle` is empty the following rules apply:
|
||||
- if no `start_pos` was specified: return `1`
|
||||
- if `start_pos = 0`: return `1`
|
||||
- if `start_pos >= 1` and `start_pos <= length(haystack) + 1`: return `start_pos`
|
||||
- otherwise: return `0`
|
||||
|
||||
The same rules also apply to functions `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8`
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
position('abc', ''),
|
||||
position('abc', '', 0),
|
||||
position('abc', '', 1),
|
||||
position('abc', '', 2),
|
||||
position('abc', '', 3),
|
||||
position('abc', '', 4),
|
||||
position('abc', '', 5)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─position('abc', '')─┬─position('abc', '', 0)─┬─position('abc', '', 1)─┬─position('abc', '', 2)─┬─position('abc', '', 3)─┬─position('abc', '', 4)─┬─position('abc', '', 5)─┐
|
||||
│ 1 │ 1 │ 1 │ 2 │ 3 │ 4 │ 0 │
|
||||
└─────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┘
|
||||
```
|
||||
|
||||
|
||||
**Examples for POSITION(needle IN haystack) syntax**
|
||||
|
||||
Query:
|
||||
|
@ -12,7 +12,7 @@ The following operations are available:
|
||||
|
||||
- `ALTER TABLE [db].table_name [ON CLUSTER cluster] ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - Adds index description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk.
|
||||
- `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
|
||||
|
||||
- `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data.
|
||||
|
||||
|
@ -6,6 +6,10 @@ sidebar_label: TTL
|
||||
|
||||
# Manipulations with Table TTL
|
||||
|
||||
:::note
|
||||
If you are looking for details on using TTL for managing old data, check out the [Manage Data with TTL](/docs/en/guides/developer/ttl.md) user guide. The docs below demonstrate how to alter or remove an existing TTL rule.
|
||||
:::
|
||||
|
||||
## MODIFY TTL
|
||||
|
||||
You can change [table TTL](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) with a request of the following form:
|
||||
|
@ -110,7 +110,7 @@ LIFETIME(MIN 0 MAX 1000)
|
||||
### Create a dictionary from a file available by HTTP(S)
|
||||
|
||||
```sql
|
||||
statement: CREATE DICTIONARY default.taxi_zone_dictionary
|
||||
CREATE DICTIONARY default.taxi_zone_dictionary
|
||||
(
|
||||
`LocationID` UInt16 DEFAULT 0,
|
||||
`Borough` String,
|
||||
|
@ -3,6 +3,7 @@ slug: /en/sql-reference/statements/create/table
|
||||
sidebar_position: 36
|
||||
sidebar_label: TABLE
|
||||
title: "CREATE TABLE"
|
||||
keywords: [compression, codec, schema, DDL]
|
||||
---
|
||||
|
||||
Creates a new table. This query can have various syntax forms depending on a use case.
|
||||
@ -293,7 +294,7 @@ These codecs are designed to make compression more effective by using specific f
|
||||
|
||||
#### Gorilla
|
||||
|
||||
`Gorilla` — Calculates XOR between current and previous value and writes it in compact binary form. Efficient when storing a series of floating point values that change slowly, because the best compression rate is achieved when neighboring values are binary equal. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see Compressing Values in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
|
||||
`Gorilla` — Calculates XOR between current and previous floating point value and writes it in compact binary form. The smaller the difference between consecutive values is, i.e. the slower the values of the series changes, the better the compression rate. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078).
|
||||
|
||||
#### FPC
|
||||
|
||||
|
@ -2,15 +2,16 @@
|
||||
slug: /en/sql-reference/statements/delete
|
||||
sidebar_position: 36
|
||||
sidebar_label: DELETE
|
||||
description: Lightweight deletes simplify the process of deleting data from the database.
|
||||
keywords: [delete]
|
||||
title: DELETE Statement
|
||||
---
|
||||
|
||||
# DELETE Statement
|
||||
|
||||
``` sql
|
||||
DELETE FROM [db.]table [WHERE expr]
|
||||
DELETE FROM [db.]table [ON CLUSTER cluster] [WHERE expr]
|
||||
```
|
||||
|
||||
`DELETE FROM` removes rows from table `[db.]table` that match expression `expr`. The deleted rows are marked as deleted immediately and will be automatically filtered out of all subsequent queries. Cleanup of data happens asynchronously in background. This feature is only available for MergeTree table engine family.
|
||||
`DELETE FROM` removes rows from the table `[db.]table` that match the expression `expr`. The deleted rows are marked as deleted immediately and will be automatically filtered out of all subsequent queries. Cleanup of data happens asynchronously in the background. This feature is only available for the MergeTree table engine family.
|
||||
|
||||
For example, the following query deletes all rows from the `hits` table where the `Title` column contains the text `hello`:
|
||||
|
||||
@ -32,7 +33,7 @@ SET allow_experimental_lightweight_delete = true;
|
||||
An [alternative way to delete rows](./alter/delete.md) in ClickHouse is `ALTER TABLE ... DELETE`, which might be more efficient if you do bulk deletes only occasionally and don't need the operation to be applied instantly. In most use cases the new lightweight `DELETE FROM` behavior will be considerably faster.
|
||||
|
||||
:::warning
|
||||
Even though deletes are becoming more lightweight in ClickHouse, they should still not be used as aggressively as on an OLTP system. Ligthweight deletes are currently efficient for wide parts, but for compact parts they can be a heavyweight operation, and it may be better to use `ALTER TABLE` for some scenarios.
|
||||
Even though deletes are becoming more lightweight in ClickHouse, they should still not be used as aggressively as on an OLTP system. Lightweight deletes are currently efficient for wide parts, but for compact parts, they can be a heavyweight operation, and it may be better to use `ALTER TABLE` for some scenarios.
|
||||
:::
|
||||
|
||||
:::note
|
||||
@ -41,3 +42,34 @@ Even though deletes are becoming more lightweight in ClickHouse, they should sti
|
||||
grant ALTER DELETE ON db.table to username;
|
||||
```
|
||||
:::
|
||||
|
||||
## Lightweight Delete Internals
|
||||
|
||||
The idea behind Lightweight Delete is that when a `DELETE FROM table ...` query is executed ClickHouse only saves a mask where each row is marked as either “existing” or as “deleted”. Those “deleted” rows become invisible for subsequent queries, but physically the rows are removed only later by subsequent merges. Writing this mask is usually much more lightweight than what is done by `ALTER table DELETE ...` query.
|
||||
|
||||
### How it is implemented
|
||||
The mask is implemented as a hidden `_row_exists` system column that stores True for all visible rows and False for deleted ones. This column is only present in a part if some rows in this part were deleted. In other words, the column is not persisted when it has all values equal to True.
|
||||
|
||||
## SELECT query
|
||||
When the column is present `SELECT ... FROM table WHERE condition` query internally is extended by an additional predicate on `_row_exists` and becomes similar to
|
||||
```sql
|
||||
SELECT ... FROM table PREWHERE _row_exists WHERE condition
|
||||
```
|
||||
At execution time the column `_row_exists` is read to figure out which rows are not visible and if there are many deleted rows it can figure out which granules can be fully skipped when reading the rest of the columns.
|
||||
|
||||
## DELETE query
|
||||
`DELETE FROM table WHERE condition` is translated into `ALTER table UPDATE _row_exists = 0 WHERE condition` mutation. Internally this mutation is executed in 2 steps:
|
||||
1. `SELECT count() FROM table WHERE condition` for each individual part to figure out if the part is affected.
|
||||
2. Mutate affected parts, and make hardlinks for unaffected parts. Mutating a part in fact only writes `_row_exists` column and just hardlinks all other columns’ files in the case of Wide parts. But for Compact parts, all columns are rewritten because they all are stored together in one file.
|
||||
|
||||
So if we compare Lightweight Delete to `ALTER DELETE` in the first step they both do the same thing to figure out which parts are affected, but in the second step `ALTER DELETE` does much more work because it reads and rewrites all columns’ files for the affected parts.
|
||||
|
||||
With the described implementation now we can see what can negatively affect 'DELETE FROM' execution time:
|
||||
- Heavy WHERE condition in DELETE query
|
||||
- Mutations queue filled with other mutations, because all mutations on a table are executed sequentially
|
||||
- Table having a very large number of data parts
|
||||
- Having a lot of data in Compact parts—in a Compact part, all columns are stored in one file.
|
||||
|
||||
:::note
|
||||
This implementation might change in the future.
|
||||
:::
|
||||
|
@ -510,3 +510,15 @@ Result:
|
||||
**See Also**
|
||||
|
||||
- [system.settings](../../operations/system-tables/settings.md) table
|
||||
|
||||
## SHOW ENGINES
|
||||
|
||||
``` sql
|
||||
SHOW ENGINES [INTO OUTFILE filename] [FORMAT format]
|
||||
```
|
||||
|
||||
Outputs the content of the [system.table_engines](../../operations/system-tables/table_engines.md) table, that contains description of table engines supported by server and their feature support information.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [system.table_engines](../../operations/system-tables/table_engines.md) table
|
@ -103,9 +103,9 @@ Its size can be configured using the server-level setting [uncompressed_cache_si
|
||||
Reset the compiled expression cache.
|
||||
The compiled expression cache is enabled/disabled with the query/user/profile-level setting [compile_expressions](../../operations/settings/settings.md#compile-expressions).
|
||||
|
||||
## DROP QUERY RESULT CACHE
|
||||
## DROP QUERY CACHE
|
||||
|
||||
Resets the [query result cache](../../operations/query-result-cache.md).
|
||||
Resets the [query cache](../../operations/query-cache.md).
|
||||
|
||||
## FLUSH LOGS
|
||||
|
||||
@ -283,7 +283,7 @@ SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
|
||||
Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster. Will run until `receive_timeout` if fetches currently disabled for the table.
|
||||
|
||||
``` sql
|
||||
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name
|
||||
SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name
|
||||
```
|
||||
|
||||
After running this statement the `[db.]replicated_merge_tree_family_table_name` fetches commands from the common replicated log into its own replication queue, and then the query waits till the replica processes all of the fetched commands.
|
||||
@ -362,3 +362,15 @@ Allows to drop filesystem cache.
|
||||
```sql
|
||||
SYSTEM DROP FILESYSTEM CACHE
|
||||
```
|
||||
|
||||
### SYNC FILE CACHE
|
||||
|
||||
:::note
|
||||
It's too heavy and has potential for misuse.
|
||||
:::
|
||||
|
||||
Will do sync syscall.
|
||||
|
||||
```sql
|
||||
SYSTEM SYNC FILE CACHE
|
||||
```
|
||||
|
@ -8,7 +8,7 @@ sidebar_label: generateRandom
|
||||
|
||||
Generates random data with given schema.
|
||||
Allows to populate test tables with data.
|
||||
Supports all data types that can be stored in table except `LowCardinality` and `AggregateFunction`.
|
||||
Not all types are supported.
|
||||
|
||||
``` sql
|
||||
generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]])
|
||||
@ -18,7 +18,7 @@ generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_stri
|
||||
|
||||
- `name` — Name of corresponding column.
|
||||
- `TypeName` — Type of corresponding column.
|
||||
- `max_array_length` — Maximum array length for all generated arrays. Defaults to `10`.
|
||||
- `max_array_length` — Maximum elements for all generated arrays or maps. Defaults to `10`.
|
||||
- `max_string_length` — Maximum string length for all generated strings. Defaults to `10`.
|
||||
- `random_seed` — Specify random seed manually to produce stable results. If NULL — seed is randomly generated.
|
||||
|
||||
|
@ -2,11 +2,12 @@
|
||||
slug: /en/sql-reference/table-functions/s3
|
||||
sidebar_position: 45
|
||||
sidebar_label: s3
|
||||
keywords: [s3, gcs, bucket]
|
||||
---
|
||||
|
||||
# s3 Table Function
|
||||
|
||||
Provides table-like interface to select/insert files in [Amazon S3](https://aws.amazon.com/s3/). This table function is similar to [hdfs](../../sql-reference/table-functions/hdfs.md), but provides S3-specific features.
|
||||
Provides a table-like interface to select/insert files in [Amazon S3](https://aws.amazon.com/s3/) and [Google Cloud Storage](https://cloud.google.com/storage/). This table function is similar to the [hdfs function](../../sql-reference/table-functions/hdfs.md), but provides S3-specific features.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -14,9 +15,24 @@ Provides table-like interface to select/insert files in [Amazon S3](https://aws.
|
||||
s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
|
||||
```
|
||||
|
||||
:::tip GCS
|
||||
The S3 Table Function integrates with Google Cloud Storage by using the GCS XML API and HMAC keys. See the [Google interoperability docs]( https://cloud.google.com/storage/docs/interoperability) for more details about the endpoint and HMAC.
|
||||
|
||||
For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_id` and `aws_secret_access_key`.
|
||||
:::
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
|
||||
|
||||
:::note GCS
|
||||
The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API:
|
||||
```
|
||||
https://storage.googleapis.com/<bucket>/<folder>/<filename(s)>
|
||||
```
|
||||
and not ~~https://storage.cloud.google.com~~.
|
||||
:::
|
||||
|
||||
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension.
|
||||
|
@ -27,7 +27,7 @@ $ cat /etc/clickhouse-server/config.d/named_collections.xml
|
||||
|
||||
## Именованные соединения для доступа к S3
|
||||
|
||||
Описание параметров смотри [Табличная Функция S3](../sql-reference/table-functions/s3.md).
|
||||
Описание параметров смотрите [Табличная Функция S3](../sql-reference/table-functions/s3.md).
|
||||
|
||||
Пример конфигурации:
|
||||
```xml
|
||||
@ -75,7 +75,7 @@ SELECT * FROM s3_engine_table LIMIT 3;
|
||||
|
||||
## Пример использования именованных соединений с базой данных MySQL
|
||||
|
||||
Описание параметров смотри [mysql](../sql-reference/table-functions/mysql.md).
|
||||
Описание параметров смотрите [mysql](../sql-reference/table-functions/mysql.md).
|
||||
|
||||
Пример конфигурации:
|
||||
```xml
|
||||
@ -147,7 +147,7 @@ SELECT dictGet('dict', 'B', 2);
|
||||
|
||||
## Пример использования именованных соединений с базой данных PostgreSQL
|
||||
|
||||
Описание параметров смотри [postgresql](../sql-reference/table-functions/postgresql.md).
|
||||
Описание параметров смотрите [postgresql](../sql-reference/table-functions/postgresql.md).
|
||||
|
||||
Пример конфигурации:
|
||||
```xml
|
||||
@ -227,3 +227,58 @@ SELECT dictGet('dict', 'b', 2);
|
||||
│ two │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
||||
## Пример использования именованных соединений с удалённой базой данных Сlickhouse
|
||||
|
||||
Описание параметров смотрите [remote](../sql-reference/table-functions/remote.md).
|
||||
|
||||
Пример конфигурации:
|
||||
```xml
|
||||
<clickhouse>
|
||||
<named_collections>
|
||||
<remote1>
|
||||
<host>remote_host</host>
|
||||
<port>9000</port>
|
||||
<database>system</database>
|
||||
<user>foo</user>
|
||||
<password>secret</password>
|
||||
</remote1>
|
||||
</named_collections>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
### Пример использования именованных соединений с табличной функцией remote/remoteSecure
|
||||
|
||||
```sql
|
||||
SELECT * FROM remote(remote1, table = one);
|
||||
┌─dummy─┐
|
||||
│ 0 │
|
||||
└───────┘
|
||||
|
||||
SELECT * FROM remote(remote1, database = merge(system, '^one'));
|
||||
┌─dummy─┐
|
||||
│ 0 │
|
||||
└───────┘
|
||||
|
||||
INSERT INTO FUNCTION remote(remote1, database = default, table = test) VALUES (1,'a');
|
||||
|
||||
SELECT * FROM remote(remote1, database = default, table = test);
|
||||
┌─a─┬─b─┐
|
||||
│ 1 │ a │
|
||||
└───┴───┘
|
||||
```
|
||||
|
||||
### Пример использования именованных соединений с внешним словарем с источником удалённым сервером Clickhouse
|
||||
|
||||
```sql
|
||||
CREATE DICTIONARY dict(a Int64, b String)
|
||||
PRIMARY KEY a
|
||||
SOURCE(CLICKHOUSE(NAME remote1 TABLE test DB default))
|
||||
LIFETIME(MIN 1 MAX 2)
|
||||
LAYOUT(HASHED());
|
||||
|
||||
SELECT dictGet('dict', 'b', 1);
|
||||
┌─dictGet('dict', 'b', 1)─┐
|
||||
│ a │
|
||||
└─────────────────────────┘
|
||||
```
|
||||
|
1
docs/tools/.gitignore
vendored
1
docs/tools/.gitignore
vendored
@ -1,3 +1,2 @@
|
||||
build
|
||||
__pycache__
|
||||
*.pyc
|
||||
|
@ -102,7 +102,8 @@ done
|
||||
EOF
|
||||
chmod +x "$PKG_PATH/install/doinst.sh"
|
||||
if [ -f "$PKG_PATH/DEBIAN/postinst" ]; then
|
||||
tail +2 "$PKG_PATH/DEBIAN/postinst" >> "$PKG_PATH/install/doinst.sh"
|
||||
# we don't need debconf source in doinst in any case
|
||||
tail +2 "$PKG_PATH/DEBIAN/postinst" | grep -v debconf/confmodule >> "$PKG_PATH/install/doinst.sh"
|
||||
fi
|
||||
rm -rf "$PKG_PATH/DEBIAN"
|
||||
if [ -f "/usr/bin/pigz" ]; then
|
||||
|
46
packages/clickhouse-keeper.postinstall
Normal file
46
packages/clickhouse-keeper.postinstall
Normal file
@ -0,0 +1,46 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
# set -x
|
||||
|
||||
PROGRAM=clickhouse-keeper
|
||||
KEEPER_USER=${KEEPER_USER:=clickhouse}
|
||||
KEEPER_GROUP=${KEEPER_GROUP:=clickhouse}
|
||||
# Please note that we don't support paths with whitespaces. This is rather ignorant.
|
||||
KEEPER_CONFDIR=${KEEPER_CONFDIR:=/etc/$PROGRAM}
|
||||
KEEPER_DATADIR=${KEEPER_DATADIR:=/var/lib/clickhouse}
|
||||
KEEPER_LOGDIR=${KEEPER_LOGDIR:=/var/log/$PROGRAM}
|
||||
|
||||
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
|
||||
[ -f /etc/default/clickhouse-keeper ] && . /etc/default/clickhouse-keeper
|
||||
|
||||
if [ ! -f "/etc/debian_version" ]; then
|
||||
not_deb_os=1
|
||||
fi
|
||||
|
||||
if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
|
||||
if ! getent group "${KEEPER_GROUP}" > /dev/null 2>&1 ; then
|
||||
groupadd --system "${KEEPER_GROUP}"
|
||||
fi
|
||||
GID=$(getent group "${KEEPER_GROUP}" | cut -d: -f 3)
|
||||
if ! id "${KEEPER_USER}" > /dev/null 2>&1 ; then
|
||||
adduser --system --home /dev/null --no-create-home \
|
||||
--gid "${GID}" --shell /bin/false \
|
||||
"${KEEPER_USER}"
|
||||
fi
|
||||
|
||||
chown -R "${KEEPER_USER}:${KEEPER_GROUP}" "${KEEPER_CONFDIR}"
|
||||
chmod 0755 "${KEEPER_CONFDIR}"
|
||||
|
||||
if ! [ -d "${KEEPER_DATADIR}" ]; then
|
||||
mkdir -p "${KEEPER_DATADIR}"
|
||||
chown -R "${KEEPER_USER}:${KEEPER_GROUP}" "${KEEPER_DATADIR}"
|
||||
chmod 0700 "${KEEPER_DATADIR}"
|
||||
fi
|
||||
|
||||
if ! [ -d "${KEEPER_LOGDIR}" ]; then
|
||||
mkdir -p "${KEEPER_LOGDIR}"
|
||||
chown -R "${KEEPER_USER}:${KEEPER_GROUP}" "${KEEPER_LOGDIR}"
|
||||
chmod 0770 "${KEEPER_LOGDIR}"
|
||||
fi
|
||||
fi
|
||||
# vim: ts=4: sw=4: sts=4: expandtab
|
27
packages/clickhouse-keeper.service
Normal file
27
packages/clickhouse-keeper.service
Normal file
@ -0,0 +1,27 @@
|
||||
[Unit]
|
||||
Description=ClickHouse Keeper - zookeeper compatible distributed coordination server
|
||||
Requires=network-online.target
|
||||
# NOTE: that After/Wants=time-sync.target is not enough, you need to ensure
|
||||
# that the time was adjusted already, if you use systemd-timesyncd you are
|
||||
# safe, but if you use ntp or some other daemon, you should configure it
|
||||
# additionaly.
|
||||
After=time-sync.target network-online.target
|
||||
Wants=time-sync.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=clickhouse
|
||||
Group=clickhouse
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
RuntimeDirectory=%p # %p is resolved to the systemd unit name
|
||||
ExecStart=/usr/bin/clickhouse-keeper --config=/etc/clickhouse-keeper/keeper_config.xml --pid-file=%t/%p/%p.pid
|
||||
# Minus means that this file is optional.
|
||||
EnvironmentFile=-/etc/default/%p
|
||||
LimitCORE=infinity
|
||||
LimitNOFILE=500000
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
|
||||
|
||||
[Install]
|
||||
# ClickHouse should not start from the rescue shell (rescue.target).
|
||||
WantedBy=multi-user.target
|
@ -30,6 +30,8 @@ contents:
|
||||
type: config|noreplace
|
||||
- src: root/usr/bin/clickhouse-keeper
|
||||
dst: /usr/bin/clickhouse-keeper
|
||||
- src: clickhouse-keeper.service
|
||||
dst: /lib/systemd/system/clickhouse-keeper.service
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-keeper/AUTHORS
|
||||
@ -39,3 +41,6 @@ contents:
|
||||
dst: /usr/share/doc/clickhouse-keeper/LICENSE
|
||||
- src: ../README.md
|
||||
dst: /usr/share/doc/clickhouse-keeper/README.md
|
||||
|
||||
scripts:
|
||||
postinstall: ./clickhouse-keeper.postinstall
|
||||
|
@ -11,8 +11,6 @@ CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR:=/var/lib/clickhouse}
|
||||
CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR:=/var/log/clickhouse-server}
|
||||
CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin}
|
||||
CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse}
|
||||
EXTRACT_FROM_CONFIG=${CLICKHOUSE_GENERIC_PROGRAM}-extract-from-config
|
||||
CLICKHOUSE_CONFIG=$CLICKHOUSE_CONFDIR/config.xml
|
||||
CLICKHOUSE_PIDDIR=/var/run/$PROGRAM
|
||||
|
||||
[ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule
|
||||
|
@ -17,10 +17,10 @@ User=clickhouse
|
||||
Group=clickhouse
|
||||
Restart=always
|
||||
RestartSec=30
|
||||
RuntimeDirectory=clickhouse-server
|
||||
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=/run/clickhouse-server/clickhouse-server.pid
|
||||
RuntimeDirectory=%p # %p is resolved to the systemd unit name
|
||||
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid
|
||||
# Minus means that this file is optional.
|
||||
EnvironmentFile=-/etc/default/clickhouse
|
||||
EnvironmentFile=-/etc/default/%p
|
||||
LimitCORE=infinity
|
||||
LimitNOFILE=500000
|
||||
CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE
|
||||
|
@ -277,7 +277,7 @@ private:
|
||||
}
|
||||
|
||||
if (queries.empty())
|
||||
throw Exception("Empty list of queries.", ErrorCodes::EMPTY_DATA_PASSED);
|
||||
throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Empty list of queries.");
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -474,7 +474,7 @@ private:
|
||||
executor.sendQuery(ClientInfo::QueryKind::INITIAL_QUERY);
|
||||
|
||||
ProfileInfo info;
|
||||
while (Block block = executor.read())
|
||||
while (Block block = executor.readBlock())
|
||||
info.update(block);
|
||||
|
||||
executor.finish();
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
#include <cstdlib>
|
||||
#include <fcntl.h>
|
||||
#include <map>
|
||||
@ -127,6 +128,69 @@ void Client::showWarnings()
|
||||
}
|
||||
}
|
||||
|
||||
void Client::parseConnectionsCredentials()
|
||||
{
|
||||
/// It is not possible to correctly handle multiple --host --port options.
|
||||
if (hosts_and_ports.size() >= 2)
|
||||
return;
|
||||
|
||||
String host;
|
||||
std::optional<UInt16> port;
|
||||
if (hosts_and_ports.empty())
|
||||
{
|
||||
host = config().getString("host", "localhost");
|
||||
if (config().has("port"))
|
||||
port = config().getInt("port");
|
||||
}
|
||||
else
|
||||
{
|
||||
host = hosts_and_ports.front().host;
|
||||
port = hosts_and_ports.front().port;
|
||||
}
|
||||
|
||||
Strings keys;
|
||||
config().keys("connections_credentials", keys);
|
||||
for (const auto & connection : keys)
|
||||
{
|
||||
const String & prefix = "connections_credentials." + connection;
|
||||
|
||||
const String & connection_name = config().getString(prefix + ".name", "");
|
||||
if (connection_name != host)
|
||||
continue;
|
||||
|
||||
String connection_hostname;
|
||||
if (config().has(prefix + ".hostname"))
|
||||
connection_hostname = config().getString(prefix + ".hostname");
|
||||
else
|
||||
connection_hostname = connection_name;
|
||||
|
||||
/// Set "host" unconditionally (since it is used as a "name"), while
|
||||
/// other options only if they are not set yet (config.xml/cli
|
||||
/// options).
|
||||
config().setString("host", connection_hostname);
|
||||
if (!hosts_and_ports.empty())
|
||||
hosts_and_ports.front().host = connection_hostname;
|
||||
|
||||
if (config().has(prefix + ".port") && !port.has_value())
|
||||
config().setInt("port", config().getInt(prefix + ".port"));
|
||||
if (config().has(prefix + ".secure") && !config().has("secure"))
|
||||
config().setBool("secure", config().getBool(prefix + ".secure"));
|
||||
if (config().has(prefix + ".user") && !config().has("user"))
|
||||
config().setString("user", config().getString(prefix + ".user"));
|
||||
if (config().has(prefix + ".password") && !config().has("password"))
|
||||
config().setString("password", config().getString(prefix + ".password"));
|
||||
if (config().has(prefix + ".database") && !config().has("database"))
|
||||
config().setString("database", config().getString(prefix + ".database"));
|
||||
if (config().has(prefix + ".history_file") && !config().has("history_file"))
|
||||
{
|
||||
String history_file = config().getString(prefix + ".history_file");
|
||||
if (history_file.starts_with("~") && !home_path.empty())
|
||||
history_file = home_path + "/" + history_file.substr(1);
|
||||
config().setString("history_file", history_file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Make query to get all server warnings
|
||||
std::vector<String> Client::loadWarningMessages()
|
||||
{
|
||||
@ -216,6 +280,8 @@ void Client::initialize(Poco::Util::Application & self)
|
||||
if (env_password)
|
||||
config().setString("password", env_password);
|
||||
|
||||
parseConnectionsCredentials();
|
||||
|
||||
// global_context->setApplicationType(Context::ApplicationType::CLIENT);
|
||||
global_context->setQueryParameters(query_parameters);
|
||||
|
||||
@ -473,24 +539,28 @@ void Client::connect()
|
||||
// Prints changed settings to stderr. Useful for debugging fuzzing failures.
|
||||
void Client::printChangedSettings() const
|
||||
{
|
||||
const auto & changes = global_context->getSettingsRef().changes();
|
||||
if (!changes.empty())
|
||||
auto print_changes = [](const auto & changes, std::string_view settings_name)
|
||||
{
|
||||
fmt::print(stderr, "Changed settings: ");
|
||||
for (size_t i = 0; i < changes.size(); ++i)
|
||||
if (!changes.empty())
|
||||
{
|
||||
if (i)
|
||||
fmt::print(stderr, "Changed {}: ", settings_name);
|
||||
for (size_t i = 0; i < changes.size(); ++i)
|
||||
{
|
||||
fmt::print(stderr, ", ");
|
||||
if (i)
|
||||
fmt::print(stderr, ", ");
|
||||
fmt::print(stderr, "{} = '{}'", changes[i].name, toString(changes[i].value));
|
||||
}
|
||||
fmt::print(stderr, "{} = '{}'", changes[i].name, toString(changes[i].value));
|
||||
|
||||
fmt::print(stderr, "\n");
|
||||
}
|
||||
fmt::print(stderr, "\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::print(stderr, "No changed settings.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
fmt::print(stderr, "No changed {}.\n", settings_name);
|
||||
}
|
||||
};
|
||||
|
||||
print_changes(global_context->getSettingsRef().changes(), "settings");
|
||||
print_changes(cmd_merge_tree_settings.changes(), "MergeTree settings");
|
||||
}
|
||||
|
||||
|
||||
@ -719,7 +789,7 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
// uniformity.
|
||||
// Surprisingly, this is a client exception, because we get the
|
||||
// server exception w/o throwing (see onReceiveException()).
|
||||
client_exception = std::make_unique<Exception>(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
|
||||
client_exception = std::make_unique<Exception>(getCurrentExceptionMessageAndPattern(print_stack_trace), getCurrentExceptionCode());
|
||||
have_error = true;
|
||||
}
|
||||
|
||||
@ -854,7 +924,7 @@ bool Client::processWithFuzzing(const String & full_query)
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
client_exception = std::make_unique<Exception>(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
|
||||
client_exception = std::make_unique<Exception>(getCurrentExceptionMessageAndPattern(print_stack_trace), getCurrentExceptionCode());
|
||||
have_error = true;
|
||||
}
|
||||
|
||||
@ -1287,6 +1357,8 @@ void Client::readArguments(
|
||||
}
|
||||
else if (arg == "--allow_repeated_settings")
|
||||
allow_repeated_settings = true;
|
||||
else if (arg == "--allow_merge_tree_settings")
|
||||
allow_merge_tree_settings = true;
|
||||
else
|
||||
common_arguments.emplace_back(arg);
|
||||
}
|
||||
|
@ -47,6 +47,7 @@ protected:
|
||||
private:
|
||||
void printChangedSettings() const;
|
||||
void showWarnings();
|
||||
void parseConnectionsCredentials();
|
||||
std::vector<String> loadWarningMessages();
|
||||
};
|
||||
}
|
||||
|
@ -57,4 +57,28 @@
|
||||
|
||||
The same can be done on user-level configuration, just create & adjust: ~/.clickhouse-client/config.xml
|
||||
-->
|
||||
|
||||
|
||||
<!-- Analog of .netrc -->
|
||||
<![CDATA[
|
||||
<connections_credentials>
|
||||
<connection>
|
||||
<!-- Name of the connection, host option for the client.
|
||||
"host" is not the same as "hostname" since you may want to have different settings for one host,
|
||||
and in this case you can add "prod" and "prod_readonly".
|
||||
|
||||
Default: "hostname" will be used. -->
|
||||
<name>default</name>
|
||||
<!-- Host that will be used for connection. -->
|
||||
<hostname>127.0.0.1</hostname>
|
||||
<port>9000</port>
|
||||
<secure>1</secure>
|
||||
<user>default</user>
|
||||
<password></password>
|
||||
<database></database>
|
||||
<!-- '~' is expanded to HOME, like in any shell -->
|
||||
<history_file></history_file>
|
||||
</connection>
|
||||
</connections_credentials>
|
||||
]]>
|
||||
</config>
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <Common/ZooKeeper/ZooKeeper.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <IO/ConnectionTimeoutsContext.h>
|
||||
#include <Interpreters/InterpreterInsertQuery.h>
|
||||
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
@ -2041,7 +2040,7 @@ UInt64 ClusterCopier::executeQueryOnCluster(
|
||||
|
||||
while (true)
|
||||
{
|
||||
auto block = remote_query_executor->read();
|
||||
auto block = remote_query_executor->readBlock();
|
||||
if (!block)
|
||||
break;
|
||||
}
|
||||
|
@ -165,9 +165,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
||||
/// should throw exception early and make exception message more readable.
|
||||
if (const auto * insert_query = res->as<ASTInsertQuery>(); insert_query && insert_query->data)
|
||||
{
|
||||
throw Exception(
|
||||
"Can't format ASTInsertQuery with data, since data will be lost",
|
||||
DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA);
|
||||
throw Exception(DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA,
|
||||
"Can't format ASTInsertQuery with data, since data will be lost");
|
||||
}
|
||||
if (!quiet)
|
||||
{
|
||||
|
@ -196,7 +196,7 @@ void Keeper::createServer(const std::string & listen_host, const char * port_nam
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception{message, ErrorCodes::NETWORK_ERROR};
|
||||
throw Exception::createDeprecated(message, ErrorCodes::NETWORK_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -362,6 +362,7 @@ try
|
||||
else
|
||||
path = std::filesystem::path{KEEPER_DEFAULT_PATH};
|
||||
|
||||
std::filesystem::create_directories(path);
|
||||
|
||||
/// Check that the process user id matches the owner of the data.
|
||||
const auto effective_user_id = geteuid();
|
||||
@ -375,7 +376,7 @@ try
|
||||
if (effective_user_id == 0)
|
||||
{
|
||||
message += " Run under 'sudo -u " + data_owner + "'.";
|
||||
throw Exception(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA);
|
||||
throw Exception::createDeprecated(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -243,7 +243,6 @@ ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl(
|
||||
const ColumnRawPtrs & columns,
|
||||
bool cat_features_are_strings) const
|
||||
{
|
||||
std::string error_msg = "Error occurred while applying CatBoost model: ";
|
||||
size_t column_size = columns.front()->size();
|
||||
|
||||
auto result = ColumnFloat64::create(column_size * tree_count);
|
||||
@ -265,7 +264,8 @@ ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl(
|
||||
result_buf, column_size * tree_count))
|
||||
{
|
||||
|
||||
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
|
||||
throw Exception(ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL,
|
||||
"Error occurred while applying CatBoost model: {}", api.GetErrorString());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -288,7 +288,8 @@ ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl(
|
||||
cat_features_buf, cat_features_count,
|
||||
result_buf, column_size * tree_count))
|
||||
{
|
||||
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
|
||||
throw Exception(ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL,
|
||||
"Error occurred while applying CatBoost model: {}", api.GetErrorString());
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -304,7 +305,8 @@ ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl(
|
||||
cat_features_buf, cat_features_count,
|
||||
result_buf, column_size * tree_count))
|
||||
{
|
||||
throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL);
|
||||
throw Exception(ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL,
|
||||
"Error occurred while applying CatBoost model: {}", api.GetErrorString());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -19,6 +19,9 @@ target_link_libraries(clickhouse-local-lib PRIVATE clickhouse-server-lib)
|
||||
if (TARGET ch_rust::skim)
|
||||
target_link_libraries(clickhouse-local-lib PRIVATE ch_rust::skim)
|
||||
endif()
|
||||
if (TARGET ch_contrib::azure_sdk)
|
||||
target_link_libraries(clickhouse-local-lib PRIVATE ch_contrib::azure_sdk)
|
||||
endif()
|
||||
|
||||
# Always use internal readpassphrase
|
||||
target_link_libraries(clickhouse-local-lib PRIVATE readpassphrase)
|
||||
|
@ -51,6 +51,10 @@
|
||||
#include <Functions/getFuzzerData.h>
|
||||
#endif
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
# include <azure/storage/common/internal/xml_wrapper.hpp>
|
||||
#endif
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
|
||||
@ -115,6 +119,14 @@ void LocalServer::initialize(Poco::Util::Application & self)
|
||||
config().getUInt("thread_pool_queue_size", 10000)
|
||||
);
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
/// See the explanation near the same line in Server.cpp
|
||||
GlobalThreadPool::instance().addOnDestroyCallback([]
|
||||
{
|
||||
Azure::Storage::_internal::XmlGlobalDeinitialize();
|
||||
});
|
||||
#endif
|
||||
|
||||
IOThreadPool::initialize(
|
||||
config().getUInt("max_io_thread_pool_size", 100),
|
||||
config().getUInt("max_io_thread_pool_free_size", 0),
|
||||
|
@ -61,13 +61,18 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ
|
||||
return;
|
||||
}
|
||||
|
||||
bool use_connection_pooling = params.getParsed<bool>("use_connection_pooling", true);
|
||||
|
||||
try
|
||||
{
|
||||
std::string connection_string = params.get("connection_string");
|
||||
|
||||
auto connection = ODBCPooledConnectionFactory::instance().get(
|
||||
validateODBCConnectionString(connection_string),
|
||||
getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
|
||||
nanodbc::ConnectionHolderPtr connection;
|
||||
if (use_connection_pooling)
|
||||
connection = ODBCPooledConnectionFactory::instance().get(
|
||||
validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
|
||||
else
|
||||
connection = std::make_shared<nanodbc::ConnectionHolder>(validateODBCConnectionString(connection_string));
|
||||
|
||||
auto identifier = getIdentifierQuote(std::move(connection));
|
||||
|
||||
|
@ -102,7 +102,9 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
|
||||
|
||||
std::string format = params.get("format", "RowBinary");
|
||||
std::string connection_string = params.get("connection_string");
|
||||
bool use_connection_pooling = params.getParsed<bool>("use_connection_pooling", true);
|
||||
LOG_TRACE(log, "Connection string: '{}'", connection_string);
|
||||
LOG_TRACE(log, "Use pooling: {}", use_connection_pooling);
|
||||
|
||||
UInt64 max_block_size = DEFAULT_BLOCK_SIZE;
|
||||
if (params.has("max_block_size"))
|
||||
@ -134,7 +136,7 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
|
||||
try
|
||||
{
|
||||
nanodbc::ConnectionHolderPtr connection_handler;
|
||||
if (getContext()->getSettingsRef().odbc_bridge_use_connection_pooling)
|
||||
if (use_connection_pooling)
|
||||
connection_handler = ODBCPooledConnectionFactory::instance().get(
|
||||
validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
|
||||
else
|
||||
|
@ -70,13 +70,19 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer
|
||||
return;
|
||||
}
|
||||
|
||||
bool use_connection_pooling = params.getParsed<bool>("use_connection_pooling", true);
|
||||
|
||||
try
|
||||
{
|
||||
std::string connection_string = params.get("connection_string");
|
||||
|
||||
auto connection = ODBCPooledConnectionFactory::instance().get(
|
||||
validateODBCConnectionString(connection_string),
|
||||
getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
|
||||
nanodbc::ConnectionHolderPtr connection;
|
||||
|
||||
if (use_connection_pooling)
|
||||
connection = ODBCPooledConnectionFactory::instance().get(
|
||||
validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
|
||||
else
|
||||
connection = std::make_shared<nanodbc::ConnectionHolder>(validateODBCConnectionString(connection_string));
|
||||
|
||||
bool result = isSchemaAllowed(std::move(connection));
|
||||
|
||||
|
@ -27,6 +27,9 @@ set (CLICKHOUSE_SERVER_LINK
|
||||
if (TARGET ch_contrib::jemalloc)
|
||||
list(APPEND CLICKHOUSE_SERVER_LINK PRIVATE ch_contrib::jemalloc)
|
||||
endif()
|
||||
if (TARGET ch_contrib::azure_sdk)
|
||||
list(APPEND CLICKHOUSE_SERVER_LINK PRIVATE ch_contrib::azure_sdk)
|
||||
endif()
|
||||
|
||||
clickhouse_program_add(server)
|
||||
|
||||
|
@ -128,6 +128,10 @@
|
||||
# include <jemalloc/jemalloc.h>
|
||||
#endif
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
# include <azure/storage/common/internal/xml_wrapper.hpp>
|
||||
#endif
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
extern const Metric Revision;
|
||||
@ -416,7 +420,7 @@ void Server::createServer(
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception{message, ErrorCodes::NETWORK_ERROR};
|
||||
throw Exception::createDeprecated(message, ErrorCodes::NETWORK_ERROR);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -750,6 +754,19 @@ try
|
||||
config().getUInt("max_thread_pool_free_size", 1000),
|
||||
config().getUInt("thread_pool_queue_size", 10000));
|
||||
|
||||
#if USE_AZURE_BLOB_STORAGE
|
||||
/// It makes sense to deinitialize libxml after joining of all threads
|
||||
/// in global pool because libxml uses thread-local memory allocations via
|
||||
/// 'pthread_key_create' and 'pthread_setspecific' which should be deallocated
|
||||
/// at 'pthread_exit'. Deinitialization of libxml leads to call of 'pthread_key_delete'
|
||||
/// and if it is done before joining of threads, allocated memory will not be freed
|
||||
/// and there may be memory leaks in threads that used libxml.
|
||||
GlobalThreadPool::instance().addOnDestroyCallback([]
|
||||
{
|
||||
Azure::Storage::_internal::XmlGlobalDeinitialize();
|
||||
});
|
||||
#endif
|
||||
|
||||
IOThreadPool::initialize(
|
||||
config().getUInt("max_io_thread_pool_size", 100),
|
||||
config().getUInt("max_io_thread_pool_free_size", 0),
|
||||
@ -946,7 +963,7 @@ try
|
||||
if (effective_user_id == 0)
|
||||
{
|
||||
message += " Run under 'sudo -u " + data_owner + "'.";
|
||||
throw Exception(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA);
|
||||
throw Exception::createDeprecated(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1517,13 +1534,13 @@ try
|
||||
global_context->setMMappedFileCache(mmap_cache_size);
|
||||
|
||||
/// A cache for query results.
|
||||
size_t query_result_cache_size = config().getUInt64("query_result_cache.size", 1_GiB);
|
||||
if (query_result_cache_size)
|
||||
global_context->setQueryResultCache(
|
||||
query_result_cache_size,
|
||||
config().getUInt64("query_result_cache.max_entries", 1024),
|
||||
config().getUInt64("query_result_cache.max_entry_size", 1_MiB),
|
||||
config().getUInt64("query_result_cache.max_entry_records", 30'000'000));
|
||||
size_t query_cache_size = config().getUInt64("query_cache.size", 1_GiB);
|
||||
if (query_cache_size)
|
||||
global_context->setQueryCache(
|
||||
query_cache_size,
|
||||
config().getUInt64("query_cache.max_entries", 1024),
|
||||
config().getUInt64("query_cache.max_entry_size", 1_MiB),
|
||||
config().getUInt64("query_cache.max_entry_records", 30'000'000));
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
/// 128 MB
|
||||
|
@ -854,6 +854,51 @@
|
||||
</replica>
|
||||
</shard-->
|
||||
</test_cluster_one_shard_three_replicas_localhost>
|
||||
<parallel_replicas>
|
||||
<shard>
|
||||
<internal_replication>false</internal_replication>
|
||||
<replica>
|
||||
<host>127.0.0.1</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>127.0.0.2</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>127.0.0.3</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>127.0.0.4</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>127.0.0.5</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>127.0.0.6</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>127.0.0.7</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>127.0.0.8</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>127.0.0.9</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>127.0.0.10</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</parallel_replicas>
|
||||
<test_cluster_two_shards_localhost>
|
||||
<shard>
|
||||
<replica>
|
||||
@ -1466,13 +1511,13 @@
|
||||
</rocksdb>
|
||||
-->
|
||||
|
||||
<!-- Configuration for the query result cache -->
|
||||
<!-- <query_result_cache> -->
|
||||
<!-- Configuration for the query cache -->
|
||||
<!-- <query_cache> -->
|
||||
<!-- <size>1073741824</size> -->
|
||||
<!-- <max_entries>1024</max_entries> -->
|
||||
<!-- <max_entry_size>1048576</max_entry_size> -->
|
||||
<!-- <max_entry_records>30000000</max_entry_records> -->
|
||||
<!-- </query_result_cache> -->
|
||||
<!-- </query_cache> -->
|
||||
|
||||
<!-- Uncomment if enable merge tree metadata cache -->
|
||||
<!--merge_tree_metadata_cache>
|
||||
|
@ -76,7 +76,7 @@
|
||||
#charts
|
||||
{
|
||||
height: 100%;
|
||||
display: flex;
|
||||
display: none;
|
||||
flex-flow: row wrap;
|
||||
gap: 1rem;
|
||||
}
|
||||
@ -170,6 +170,14 @@
|
||||
background: var(--button-background-color);
|
||||
}
|
||||
|
||||
#auth-error {
|
||||
color: var(--error-color);
|
||||
|
||||
display: flex;
|
||||
flex-flow: row nowrap;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
form {
|
||||
display: inline;
|
||||
}
|
||||
@ -293,6 +301,7 @@
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
<div id="auth-error"></div>
|
||||
<div id="charts"></div>
|
||||
<script>
|
||||
|
||||
@ -322,6 +331,11 @@ if (location.protocol != 'file:') {
|
||||
user = 'default';
|
||||
}
|
||||
|
||||
const errorCodeRegex = /Code: (\d+)/
|
||||
const errorCodeMessageMap = {
|
||||
516: 'Error authenticating with database. Please check your connection params and try again.'
|
||||
}
|
||||
|
||||
/// This is just a demo configuration of the dashboard.
|
||||
|
||||
let queries = [
|
||||
@ -597,6 +611,11 @@ function insertChart(i) {
|
||||
query_editor_confirm.value = 'Ok';
|
||||
query_editor_confirm.className = 'edit-confirm';
|
||||
|
||||
function getCurrentIndex() {
|
||||
/// Indices may change after deletion of other element, hence captured "i" may become incorrect.
|
||||
return [...charts.querySelectorAll('.chart')].findIndex(child => chart == child);
|
||||
}
|
||||
|
||||
function editConfirm() {
|
||||
query_editor.style.display = 'none';
|
||||
query_error.style.display = 'none';
|
||||
@ -605,7 +624,8 @@ function insertChart(i) {
|
||||
title_text.data = '';
|
||||
findParamsInQuery(q.query, params);
|
||||
buildParams();
|
||||
draw(i, chart, getParamsForURL(), q.query);
|
||||
const idx = getCurrentIndex();
|
||||
draw(idx, chart, getParamsForURL(), q.query);
|
||||
saveState();
|
||||
}
|
||||
|
||||
@ -649,8 +669,7 @@ function insertChart(i) {
|
||||
let trash_text = document.createTextNode('✕');
|
||||
trash.appendChild(trash_text);
|
||||
trash.addEventListener('click', e => {
|
||||
/// Indices may change after deletion of other element, hence captured "i" may become incorrect.
|
||||
let idx = [...charts.querySelectorAll('.chart')].findIndex(child => chart == child);
|
||||
const idx = getCurrentIndex();
|
||||
if (plots[idx]) {
|
||||
plots[idx].destroy();
|
||||
plots[idx] = null;
|
||||
@ -796,6 +815,18 @@ async function draw(idx, chart, url_params, query) {
|
||||
error = e.toString();
|
||||
}
|
||||
|
||||
if (error) {
|
||||
const errorMatch = error.match(errorCodeRegex)
|
||||
if (errorMatch && errorMatch[1]) {
|
||||
const code = errorMatch[1]
|
||||
if (errorCodeMessageMap[code]) {
|
||||
const authError = new Error(errorCodeMessageMap[code])
|
||||
authError.code = code
|
||||
throw authError
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!error) {
|
||||
if (!Array.isArray(data)) {
|
||||
error = "Query should return an array.";
|
||||
@ -853,16 +884,50 @@ async function draw(idx, chart, url_params, query) {
|
||||
sync.sub(plots[idx]);
|
||||
|
||||
/// Set title
|
||||
const title = queries[idx].title ? queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ) : '';
|
||||
const title = queries[idx] && queries[idx].title ? queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ) : '';
|
||||
chart.querySelector('.title').firstChild.data = title;
|
||||
}
|
||||
|
||||
function showAuthError(message) {
|
||||
const charts = document.querySelector('#charts');
|
||||
charts.style.display = 'none';
|
||||
const add = document.querySelector('#add');
|
||||
add.style.display = 'none';
|
||||
|
||||
const authError = document.querySelector('#auth-error');
|
||||
authError.textContent = message;
|
||||
authError.style.display = 'flex';
|
||||
}
|
||||
|
||||
function hideAuthError() {
|
||||
const charts = document.querySelector('#charts');
|
||||
charts.style.display = 'flex';
|
||||
const add = document.querySelector('#add');
|
||||
add.style.display = 'block';
|
||||
|
||||
const authError = document.querySelector('#auth-error');
|
||||
authError.textContent = '';
|
||||
authError.style.display = 'none';
|
||||
}
|
||||
|
||||
let firstLoad = true;
|
||||
|
||||
async function drawAll() {
|
||||
let params = getParamsForURL();
|
||||
const charts = document.getElementsByClassName('chart');
|
||||
for (let i = 0; i < queries.length; ++i) {
|
||||
draw(i, charts[i], params, queries[i].query);
|
||||
|
||||
if (!firstLoad) {
|
||||
hideAuthError();
|
||||
}
|
||||
await Promise.all([...Array(queries.length)].map(async (_, i) => {
|
||||
return draw(i, charts[i], params, queries[i].query).catch((e) => {
|
||||
if (!firstLoad) {
|
||||
showAuthError(e.message);
|
||||
}
|
||||
});
|
||||
})).then(() => {
|
||||
firstLoad = false;
|
||||
})
|
||||
}
|
||||
|
||||
function resize() {
|
||||
|
@ -11,11 +11,26 @@ mod ffi {
|
||||
|
||||
struct Item {
|
||||
text: String,
|
||||
orig_text: String,
|
||||
}
|
||||
impl Item {
|
||||
fn new(text: String) -> Self {
|
||||
return Self{
|
||||
// Text that will be shown should not contains new lines since in this case skim may
|
||||
// live some symbols on the screen, and this looks odd.
|
||||
text: text.replace("\n", " "),
|
||||
orig_text: text,
|
||||
};
|
||||
}
|
||||
}
|
||||
impl SkimItem for Item {
|
||||
fn text(&self) -> Cow<str> {
|
||||
return Cow::Borrowed(&self.text);
|
||||
}
|
||||
|
||||
fn output(&self) -> Cow<str> {
|
||||
return Cow::Borrowed(&self.orig_text);
|
||||
}
|
||||
}
|
||||
|
||||
fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, String> {
|
||||
@ -34,7 +49,7 @@ fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, Stri
|
||||
|
||||
let (tx, rx): (SkimItemSender, SkimItemReceiver) = unbounded();
|
||||
for word in words {
|
||||
tx.send(Arc::new(Item{ text: word.to_string() })).unwrap();
|
||||
tx.send(Arc::new(Item::new(word.to_string()))).unwrap();
|
||||
}
|
||||
// so that skim could know when to stop waiting for more items.
|
||||
drop(tx);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user