Merge branch 'master' into remove-trash-3

This commit is contained in:
Alexey Milovidov 2022-09-10 05:08:27 +02:00
commit 5e14b4575b
187 changed files with 3927 additions and 1087 deletions

View File

@ -349,6 +349,100 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinDarwin:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinDarwinAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
@ -425,6 +519,46 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
BuilderSpecialReport:
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse special build check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cat > "$NEEDS_DATA_PATH" << 'EOF'
${{ toJSON(needs) }}
EOF
cd "$GITHUB_WORKSPACE/tests/ci"
python3 build_report_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################
@ -592,6 +726,7 @@ jobs:
- DockerHubPush
- DockerServerImages
- BuilderReport
- BuilderSpecialReport
- FunctionalStatelessTestAsan
- FunctionalStatefulTestDebug
- StressTestTsan

View File

@ -923,6 +923,53 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinAmd64SSE2:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_amd64sse2
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
@ -1011,6 +1058,7 @@ jobs:
- BuilderBinFreeBSD
# - BuilderBinGCC
- BuilderBinPPC64
- BuilderBinAmd64SSE2
- BuilderBinClangTidy
- BuilderDebShared
runs-on: [self-hosted, style-checker]

View File

@ -935,6 +935,51 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinAmd64SSE2:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_amd64sse2
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
@ -1023,6 +1068,7 @@ jobs:
- BuilderBinFreeBSD
# - BuilderBinGCC
- BuilderBinPPC64
- BuilderBinAmd64SSE2
- BuilderBinClangTidy
- BuilderDebShared
runs-on: [self-hosted, style-checker]

View File

@ -29,8 +29,12 @@ jobs:
rm -rf "$TEMP_PATH" && mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY"
# Download and push packages to artifactory
python3 ./tests/ci/push_to_artifactory.py --release "${{ github.ref }}" \
--commit '${{ github.sha }}' --artifactory-url "${{ secrets.JFROG_ARTIFACTORY_URL }}" --all
# Download macos binaries to ${{runner.temp}}/download_binary
python3 ./tests/ci/download_binary.py binary_darwin binary_darwin_aarch64
mv '${{runner.temp}}/download_binary/'clickhouse-* '${{runner.temp}}/push_to_artifactory'
- name: Upload packages to release assets
uses: svenstaro/upload-release-action@v2
with:

View File

@ -426,6 +426,100 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinDarwin:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinDarwinAarch64:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_darwin_aarch64
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
@ -505,6 +599,46 @@ jobs:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
BuilderSpecialReport:
needs:
- BuilderBinDarwin
- BuilderBinDarwinAarch64
runs-on: [self-hosted, style-checker]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse special build check
NEEDS_DATA_PATH=${{runner.temp}}/needs.json
EOF
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{ env.REPORTS_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Report Builder
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cat > "$NEEDS_DATA_PATH" << 'EOF'
${{ toJSON(needs) }}
EOF
cd "$GITHUB_WORKSPACE/tests/ci"
python3 build_report_check.py "$CHECK_NAME"
- name: Cleanup
if: always()
run: |
# shellcheck disable=SC2046
docker kill $(docker ps -q) ||:
# shellcheck disable=SC2046
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr "$TEMP_PATH"
##############################################################################################
########################### FUNCTIONAl STATELESS TESTS #######################################
##############################################################################################
@ -1847,6 +1981,7 @@ jobs:
- DockerHubPush
- DockerServerImages
- BuilderReport
- BuilderSpecialReport
- FunctionalStatelessTestDebug0
- FunctionalStatelessTestDebug1
- FunctionalStatelessTestDebug2

View File

@ -143,6 +143,8 @@ include (cmake/add_warning.cmake)
if (COMPILER_CLANG)
# generate ranges for fast "addr2line" search
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
# NOTE: that clang has a bug because of it does not emit .debug_aranges
# with ThinLTO, so custom ld.lld wrapper is shipped in docker images.
set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges")
endif ()

View File

@ -15,4 +15,5 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any.
## Upcoming events
* [**v22.8 Release Webinar**](https://clickhouse.com/company/events/v22-8-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
* [**v22.9 Release Webinar**](https://clickhouse.com/company/events/v22-9-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
* [**ClickHouse for Analytics @ Barracuda Networks**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/288140358/) Join us for this in person meetup hosted by our friends at Barracuda in Bay Area.

View File

@ -24,6 +24,23 @@ option (ENABLE_BMI "Use BMI instructions on x86_64" 0)
option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0)
option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0)
# X86: Allow compilation for a SSE2-only target machine. Done by a special build in CI for embedded or very old hardware.
option (NO_SSE3_OR_HIGHER "Disable SSE3 or higher on x86_64" 0)
if (NO_SSE3_OR_HIGHER)
SET(ENABLE_SSSE3 0)
SET(ENABLE_SSE41 0)
SET(ENABLE_SSE42 0)
SET(ENABLE_PCLMULQDQ 0)
SET(ENABLE_POPCNT 0)
SET(ENABLE_AVX 0)
SET(ENABLE_AVX2 0)
SET(ENABLE_AVX512 0)
SET(ENABLE_AVX512_VBMI 0)
SET(ENABLE_BMI 0)
SET(ENABLE_AVX2_FOR_SPEC_OP 0)
SET(ENABLE_AVX512_FOR_SPEC_OP 0)
endif()
option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use." 0)
if (ARCH_NATIVE)

17
cmake/ld.lld.in Executable file
View File

@ -0,0 +1,17 @@
#!/usr/bin/env bash
# This is a workaround for bug in llvm/clang,
# that does not produce .debug_aranges with LTO
#
# NOTE: this is a temporary solution, that should be removed once [1] will be
# resolved.
#
# [1]: https://discourse.llvm.org/t/clang-does-not-produce-full-debug-aranges-section-with-thinlto/64898/8
# NOTE: only -flto=thin is supported.
# NOTE: it is not possible to check was there -gdwarf-aranges initially or not.
if [[ "$*" =~ -plugin-opt=thinlto ]]; then
exec "@LLD_PATH@" -mllvm -generate-arange-section "$@"
else
exec "@LLD_PATH@" "$@"
fi

View File

@ -20,7 +20,7 @@ macro(clickhouse_split_debug_symbols)
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin"
COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
# Splits debug symbols into separate file, leaves the binary untouched:
COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND "${OBJCOPY_PATH}" --only-keep-debug "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
# Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check:
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"

View File

@ -94,8 +94,13 @@ if (LINKER_NAME)
if (NOT LLD_PATH)
message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.")
endif ()
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_PATH}")
# This a temporary quirk to emit .debug_aranges with ThinLTO
set (LLD_WRAPPER "${CMAKE_CURRENT_BINARY_DIR}/ld.lld")
configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
else ()
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")

View File

@ -1,6 +1,6 @@
# We use vectorscan, a portable and API/ABI-compatible drop-in replacement for hyperscan.
if (ARCH_AMD64)
if (ARCH_AMD64 AND NOT NO_SSE3_OR_HIGHER)
option (ENABLE_VECTORSCAN "Enable vectorscan library" ${ENABLE_LIBRARIES})
endif()

View File

@ -130,6 +130,7 @@ def parse_env_variables(
ARM_SUFFIX = "-aarch64"
FREEBSD_SUFFIX = "-freebsd"
PPC_SUFFIX = "-ppc64le"
AMD64_SSE2_SUFFIX = "-amd64sse2"
result = []
result.append("OUTPUT_DIR=/output")
@ -141,6 +142,7 @@ def parse_env_variables(
is_cross_arm = compiler.endswith(ARM_SUFFIX)
is_cross_ppc = compiler.endswith(PPC_SUFFIX)
is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
is_amd64_sse2 = compiler.endswith(AMD64_SSE2_SUFFIX)
if is_cross_darwin:
cc = compiler[: -len(DARWIN_SUFFIX)]
@ -186,6 +188,10 @@ def parse_env_variables(
cmake_flags.append(
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake"
)
elif is_amd64_sse2:
cc = compiler[: -len(AMD64_SSE2_SUFFIX)]
result.append("DEB_ARCH=amd64")
cmake_flags.append("-DNO_SSE3_OR_HIGHER=1")
else:
cc = compiler
result.append("DEB_ARCH=amd64")
@ -339,6 +345,7 @@ if __name__ == "__main__":
"clang-14-darwin-aarch64",
"clang-14-aarch64",
"clang-14-ppc64le",
"clang-14-amd64sse2",
"clang-14-freebsd",
"gcc-11",
),

View File

@ -1,8 +1,15 @@
#!/bin/bash
# shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031
set -eux
set -x
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
set -e
set -u
set -o pipefail
trap "exit" INT TERM
# The watchdog is in the separate process group, so we have to kill it separately
# if the script terminates earlier.
@ -87,6 +94,19 @@ function configure
# TODO figure out which ones are needed
cp -av --dereference "$repo_dir"/tests/config/config.d/listen.xml db/config.d
cp -av --dereference "$script_dir"/query-fuzzer-tweaks-users.xml db/users.d
cat > db/config.d/core.xml <<EOL
<clickhouse>
<core_dump>
<!-- 100GiB -->
<size_limit>107374182400</size_limit>
</core_dump>
<!-- NOTE: no need to configure core_path,
since clickhouse is not started as daemon (via clickhouse start)
-->
<core_path>$PWD</core_path>
</clickhouse>
EOL
}
function watchdog
@ -180,7 +200,6 @@ handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass
info signals
continue
gcore
backtrace full
thread apply all backtrace full
info registers

View File

@ -3,8 +3,14 @@
# shellcheck disable=SC2086
# shellcheck disable=SC2024
# Avoid overlaps with previous runs
dmesg --clear
set -x
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
# Thread Fuzzer allows to check more permutations of possible thread scheduling
# and find more potential issues.
@ -101,6 +107,19 @@ EOL
</default>
</profiles>
</clickhouse>
EOL
cat > /etc/clickhouse-server/config.d/core.xml <<EOL
<clickhouse>
<core_dump>
<!-- 100GiB -->
<size_limit>107374182400</size_limit>
</core_dump>
<!-- NOTE: no need to configure core_path,
since clickhouse is not started as daemon (via clickhouse start)
-->
<core_path>$PWD</core_path>
</clickhouse>
EOL
}
@ -157,7 +176,6 @@ handle SIGUSR2 nostop noprint pass
handle SIG$RTMIN nostop noprint pass
info signals
continue
gcore
backtrace full
thread apply all backtrace full
info registers
@ -501,8 +519,7 @@ done
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# Core dumps (see gcore)
# Default filename is 'core.PROCESS_ID'
# Core dumps
for core in core.*; do
pigz $core
mv $core.gz /test_output/

View File

@ -140,6 +140,6 @@ hash cmake
ClickHouse is available in pre-built binaries and packages. Binaries are portable and can be run on any Linux flavour.
They are built for stable, prestable and testing releases as long as for every commit to master and for every pull request.
Binaries are built for stable and LTS releases and also every commit to `master` for each pull request.
To find the freshest build from `master`, go to [commits page](https://github.com/ClickHouse/ClickHouse/commits/master), click on the first green check mark or red cross near commit, and click to the “Details” link right after “ClickHouse Build Check”.

View File

@ -0,0 +1,654 @@
---
slug: /en/getting-started/example-datasets/nypd_complaint_data
sidebar_label: NYPD Complaint Data
description: "Ingest and query Tab Separated Value data in 5 steps"
title: NYPD Complaint Data
---
Tab separated value, or TSV, files are common and may include field headings as the first line of the file. ClickHouse can ingest TSVs, and also can query TSVs without ingesting the files. This guide covers both of these cases. If you need to query or ingest CSV files, the same techniques work, simply substitute `TSV` with `CSV` in your format arguments.
While working through this guide you will:
- **Investigate**: Query the structure and content of the TSV file.
- **Determine the target ClickHouse schema**: Choose proper data types and map the existing data to those types.
- **Create a ClickHouse table**.
- **Preprocess and stream** the data to ClickHouse.
- **Run some queries** against ClickHouse.
The dataset used in this guide comes from the NYC Open Data team, and contains data about "all valid felony, misdemeanor, and violation crimes reported to the New York City Police Department (NYPD)". At the time of writing, the data file is 166MB, but it is updated regularly.
**Source**: [data.cityofnewyork.us](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243)
**Terms of use**: https://www1.nyc.gov/home/terms-of-use.page
## Prerequisites
- Download the dataset by visiting the [NYPD Complaint Data Current (Year To Date)](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243) page, clicking the Export button, and choosing **TSV for Excel**.
- Install [ClickHouse server and client](../../getting-started/install.md).
- [Launch](../../getting-started/install.md#launch) ClickHouse server, and connect with `clickhouse-client`
### A note about the commands described in this guide
There are two types of commands in this guide:
- Some of the commands are querying the TSV files, these are run at the command prompt.
- The rest of the commands are querying ClickHouse, and these are run in the `clickhouse-client` or Play UI.
:::note
The examples in this guide assume that you have saved the TSV file to `${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv`, please adjust the commands if needed.
:::
## Familiarize yourself with the TSV file
Before starting to work with the ClickHouse database familiarize yourself with the data.
### Look at the fields in the source TSV file
This is an example of a command to query a TSV file, but don't run it yet.
```sh
clickhouse-local --query \
"describe file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')"
```
Sample response
```response
CMPLNT_NUM Nullable(Float64)
ADDR_PCT_CD Nullable(Float64)
BORO_NM Nullable(String)
CMPLNT_FR_DT Nullable(String)
CMPLNT_FR_TM Nullable(String)
```
:::tip
Most of the time the above command will let you know which fields in the input data are numeric, and which are strings, and which are tuples. This is not always the case. Because ClickHouse is routineley used with datasets containing billions of records there is a default number (100) of rows examined to [infer the schema](../../guides/developer/working-with-json/json-semi-structured.md/#relying-on-schema-inference) in order to avoid parsing billions of rows to infer the schema. The response below may not match what you see, as the dataset is updated several times each year. Looking at the Data Dictionary you can see that CMPLNT_NUM is specified as text, and not numeric. By overriding the default of 100 rows for inference with the setting `SETTINGS input_format_max_rows_to_read_for_schema_inference=2000`
you can get a better idea of the content.
Note: as of version 22.5 the default is now 25,000 rows for inferring the schema, so only change the setting if you are on an older version or if you need more than 25,000 rows to be sampled.
:::
Run this command at your command prompt. You will be using `clickhouse-local` to query the data in the TSV file you downloaded.
```sh
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"describe file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')"
```
Result:
```response
CMPLNT_NUM Nullable(String)
ADDR_PCT_CD Nullable(Float64)
BORO_NM Nullable(String)
CMPLNT_FR_DT Nullable(String)
CMPLNT_FR_TM Nullable(String)
CMPLNT_TO_DT Nullable(String)
CMPLNT_TO_TM Nullable(String)
CRM_ATPT_CPTD_CD Nullable(String)
HADEVELOPT Nullable(String)
HOUSING_PSA Nullable(Float64)
JURISDICTION_CODE Nullable(Float64)
JURIS_DESC Nullable(String)
KY_CD Nullable(Float64)
LAW_CAT_CD Nullable(String)
LOC_OF_OCCUR_DESC Nullable(String)
OFNS_DESC Nullable(String)
PARKS_NM Nullable(String)
PATROL_BORO Nullable(String)
PD_CD Nullable(Float64)
PD_DESC Nullable(String)
PREM_TYP_DESC Nullable(String)
RPT_DT Nullable(String)
STATION_NAME Nullable(String)
SUSP_AGE_GROUP Nullable(String)
SUSP_RACE Nullable(String)
SUSP_SEX Nullable(String)
TRANSIT_DISTRICT Nullable(Float64)
VIC_AGE_GROUP Nullable(String)
VIC_RACE Nullable(String)
VIC_SEX Nullable(String)
X_COORD_CD Nullable(Float64)
Y_COORD_CD Nullable(Float64)
Latitude Nullable(Float64)
Longitude Nullable(Float64)
Lat_Lon Tuple(Nullable(Float64), Nullable(Float64))
New Georeferenced Column Nullable(String)
```
At this point you should check that the columns in the TSV file match the names and types specified in the **Columns in this Dataset** section of the [dataset web page](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243). The data types are not very specific, all numeric fields are set to `Nullable(Float64)`, and all other fields are `Nullable(String)`. When you create a ClickHouse table to store the data you can specify more appropriate and performant types.
### Determine the proper schema
In order to figure out what types should be used for the fields it is necessary to know what the data looks like. For example, the field `JURISDICTION_CODE` is a numeric: should it be a `UInt8`, or an `Enum`, or is `Float64` appropriate?
```sql
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select JURISDICTION_CODE, count() FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
GROUP BY JURISDICTION_CODE
ORDER BY JURISDICTION_CODE
FORMAT PrettyCompact"
```
Result:
```response
┌─JURISDICTION_CODE─┬─count()─┐
│ 0 │ 188875 │
│ 1 │ 4799 │
│ 2 │ 13833 │
│ 3 │ 656 │
│ 4 │ 51 │
│ 6 │ 5 │
│ 7 │ 2 │
│ 9 │ 13 │
│ 11 │ 14 │
│ 12 │ 5 │
│ 13 │ 2 │
│ 14 │ 70 │
│ 15 │ 20 │
│ 72 │ 159 │
│ 87 │ 9 │
│ 88 │ 75 │
│ 97 │ 405 │
└───────────────────┴─────────┘
```
The query response shows that the `JURISDICTION_CODE` fits well in a `UInt8`.
Similarly, look at some of the `String` fields and see if they are well suited to being `DateTime` or [`LowCardinality(String)`](../../sql-reference/data-types/lowcardinality.md) fields.
For example, the field `PARKS_NM` is described as "Name of NYC park, playground or greenspace of occurrence, if applicable (state parks are not included)". The names of parks in New York City may be a good candidate for a `LowCardinality(String)`:
```sh
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select count(distinct PARKS_NM) FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
FORMAT PrettyCompact"
```
Result:
```response
┌─uniqExact(PARKS_NM)─┐
│ 319 │
└─────────────────────┘
```
Have a look at some of the park names:
```sql
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select distinct PARKS_NM FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
LIMIT 10
FORMAT PrettyCompact"
```
Result:
```response
┌─PARKS_NM───────────────────┐
│ (null) │
│ ASSER LEVY PARK │
│ JAMES J WALKER PARK │
│ BELT PARKWAY/SHORE PARKWAY │
│ PROSPECT PARK │
│ MONTEFIORE SQUARE │
│ SUTTON PLACE PARK │
│ JOYCE KILMER PARK │
│ ALLEY ATHLETIC PLAYGROUND │
│ ASTORIA PARK │
└────────────────────────────┘
```
The dataset in use at the time of writing has only a few hundred distinct parks and playgrounds in the `PARK_NM` column. This is a small number based on the [LowCardinality](../../sql-reference/data-types/lowcardinality.md#lowcardinality-dscr) recommendation to stay below 10,000 distinct strings in a `LowCardinality(String)` field.
### DateTime fields
Based on the **Columns in this Dataset** section of the [dataset web page](https://data.cityofnewyork.us/Public-Safety/NYPD-Complaint-Data-Current-Year-To-Date-/5uac-w243) there are date and time fields for the start and end of the reported event. Looking at the min and max of the `CMPLNT_FR_DT` and `CMPLT_TO_DT` gives an idea of whether or not the fields are always populated:
```sh title="CMPLNT_FR_DT"
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select min(CMPLNT_FR_DT), max(CMPLNT_FR_DT) FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
FORMAT PrettyCompact"
```
Result:
```response
┌─min(CMPLNT_FR_DT)─┬─max(CMPLNT_FR_DT)─┐
│ 01/01/1973 │ 12/31/2021 │
└───────────────────┴───────────────────┘
```
```sh title="CMPLNT_TO_DT"
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select min(CMPLNT_TO_DT), max(CMPLNT_TO_DT) FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
FORMAT PrettyCompact"
```
Result:
```response
┌─min(CMPLNT_TO_DT)─┬─max(CMPLNT_TO_DT)─┐
│ │ 12/31/2021 │
└───────────────────┴───────────────────┘
```
```sh title="CMPLNT_FR_TM"
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select min(CMPLNT_FR_TM), max(CMPLNT_FR_TM) FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
FORMAT PrettyCompact"
```
Result:
```response
┌─min(CMPLNT_FR_TM)─┬─max(CMPLNT_FR_TM)─┐
│ 00:00:00 │ 23:59:00 │
└───────────────────┴───────────────────┘
```
```sh title="CMPLNT_TO_TM"
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select min(CMPLNT_TO_TM), max(CMPLNT_TO_TM) FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
FORMAT PrettyCompact"
```
Result:
```response
┌─min(CMPLNT_TO_TM)─┬─max(CMPLNT_TO_TM)─┐
│ (null) │ 23:59:00 │
└───────────────────┴───────────────────┘
```
## Make a plan
Based on the above investigation:
- `JURISDICTION_CODE` should be cast as `UInt8`.
- `PARKS_NM` should be cast to `LowCardinality(String)`
- `CMPLNT_FR_DT` and `CMPLNT_FR_TM` are always populated (possibly with a default time of `00:00:00`)
- `CMPLNT_TO_DT` and `CMPLNT_TO_TM` may be empty
- Dates and times are stored in separate fields in the source
- Dates are `mm/dd/yyyy` format
- Times are `hh:mm:ss` format
- Dates and times can be concatenated into DateTime types
- There are some dates before January 1st 1970, which means we need a 64 bit DateTime
:::note
There are many more changes to be made to the types, they all can be determined by following the same investigation steps. Look at the number of distinct strings in a field, the min and max of the numerics, and make your decisions. The table schema that is given later in the guide has many low cardinality strings and unsigned integer fields and very few floating point numerics.
:::
## Concatenate the date and time fields
To concatenate the date and time fields `CMPLNT_FR_DT` and `CMPLNT_FR_TM` into a single `String` that can be cast to a `DateTime`, select the two fields joined by the concatenation operator: `CMPLNT_FR_DT || ' ' || CMPLNT_FR_TM`. The `CMPLNT_TO_DT` and `CMPLNT_TO_TM` fields are handled similarly.
```sh
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select CMPLNT_FR_DT || ' ' || CMPLNT_FR_TM AS complaint_begin FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
LIMIT 10
FORMAT PrettyCompact"
```
Result:
```response
┌─complaint_begin─────┐
│ 07/29/2010 00:01:00 │
│ 12/01/2011 12:00:00 │
│ 04/01/2017 15:00:00 │
│ 03/26/2018 17:20:00 │
│ 01/01/2019 00:00:00 │
│ 06/14/2019 00:00:00 │
│ 11/29/2021 20:00:00 │
│ 12/04/2021 00:35:00 │
│ 12/05/2021 12:50:00 │
│ 12/07/2021 20:30:00 │
└─────────────────────┘
```
## Convert the date and time String to a DateTime64 type
Earlier in the guide we discovered that there are dates in the TSV file before January 1st 1970, which means that we need a 64 bit DateTime type for the dates. The dates also need to be converted from `MM/DD/YYYY` to `YYYY/MM/DD` format. Both of these can be done with [`parseDateTime64BestEffort()`](../../sql-reference/functions/type-conversion-functions.md#parsedatetime64besteffort).
```sh
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"WITH (CMPLNT_FR_DT || ' ' || CMPLNT_FR_TM) AS CMPLNT_START,
(CMPLNT_TO_DT || ' ' || CMPLNT_TO_TM) AS CMPLNT_END
select parseDateTime64BestEffort(CMPLNT_START) AS complaint_begin,
parseDateTime64BestEffortOrNull(CMPLNT_END) AS complaint_end
FROM file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
ORDER BY complaint_begin ASC
LIMIT 25
FORMAT PrettyCompact"
```
Lines 2 and 3 above contain the concatenation from the previous step, and lines 4 and 5 above parse the strings into `DateTime64`. As the complaint end time is not guaranteed to exist `parseDateTime64BestEffortOrNull` is used.
Result:
```response
┌─────────complaint_begin─┬───────────complaint_end─┐
│ 1925-01-01 10:00:00.000 │ 2021-02-12 09:30:00.000 │
│ 1925-01-01 11:37:00.000 │ 2022-01-16 11:49:00.000 │
│ 1925-01-01 15:00:00.000 │ 2021-12-31 00:00:00.000 │
│ 1925-01-01 15:00:00.000 │ 2022-02-02 22:00:00.000 │
│ 1925-01-01 19:00:00.000 │ 2022-04-14 05:00:00.000 │
│ 1955-09-01 19:55:00.000 │ 2022-08-01 00:45:00.000 │
│ 1972-03-17 11:40:00.000 │ 2022-03-17 11:43:00.000 │
│ 1972-05-23 22:00:00.000 │ 2022-05-24 09:00:00.000 │
│ 1972-05-30 23:37:00.000 │ 2022-05-30 23:50:00.000 │
│ 1972-07-04 02:17:00.000 │ ᴺᵁᴸᴸ │
│ 1973-01-01 00:00:00.000 │ ᴺᵁᴸᴸ │
│ 1975-01-01 00:00:00.000 │ ᴺᵁᴸᴸ │
│ 1976-11-05 00:01:00.000 │ 1988-10-05 23:59:00.000 │
│ 1977-01-01 00:00:00.000 │ 1977-01-01 23:59:00.000 │
│ 1977-12-20 00:01:00.000 │ ᴺᵁᴸᴸ │
│ 1981-01-01 00:01:00.000 │ ᴺᵁᴸᴸ │
│ 1981-08-14 00:00:00.000 │ 1987-08-13 23:59:00.000 │
│ 1983-01-07 00:00:00.000 │ 1990-01-06 00:00:00.000 │
│ 1984-01-01 00:01:00.000 │ 1984-12-31 23:59:00.000 │
│ 1985-01-01 12:00:00.000 │ 1987-12-31 15:00:00.000 │
│ 1985-01-11 09:00:00.000 │ 1985-12-31 12:00:00.000 │
│ 1986-03-16 00:05:00.000 │ 2022-03-16 00:45:00.000 │
│ 1987-01-07 00:00:00.000 │ 1987-01-09 00:00:00.000 │
│ 1988-04-03 18:30:00.000 │ 2022-08-03 09:45:00.000 │
│ 1988-07-29 12:00:00.000 │ 1990-07-27 22:00:00.000 │
└─────────────────────────┴─────────────────────────┘
```
:::note
The dates shown as `1925` above are from errors in the data. There are several records in the original data with dates in the years `1019` - `1022` that should be `2019` - `2022`. They are being stored as Jan 1st 1925 as that is the earliest date with a 64 bit DateTime.
:::
## Create a table
The decisions made above on the data types used for the columns are reflected in the table schema
below. We also need to decide on the `ORDER BY` and `PRIMARY KEY` used for the table. At least one
of `ORDER BY` or `PRIMARY KEY` must be specified. Here are some guidelines on deciding on the
columns to includes in `ORDER BY`, and more information is in the *Next Steps* section at the end
of this document.
### Order By and Primary Key clauses
- The `ORDER BY` tuple should include fields that are used in query filters
- To maximize compression on disk the `ORDER BY` tuple should be ordered by ascending cardinality
- If it exists, the `PRIMARY KEY` tuple must be a subset of the `ORDER BY` tuple
- If only `ORDER BY` is specified, then the same tuple will be used as `PRIMARY KEY`
- The primary key index is created using the `PRIMARY KEY` tuple if specified, otherwise the `ORDER BY` tuple
- The `PRIMARY KEY` index is kept in main memory
Looking at the dataset and the questions that might be answered by querying it we might
decide that we would look at the types of crimes reported over time in the five boroughs of
New York City. These fields might be then included in the `ORDER BY`:
| Column | Description (from the data dictionary) |
| ----------- | --------------------------------------------------- |
| OFNS_DESC | Description of offense corresponding with key code |
| RPT_DT | Date event was reported to police |
| BORO_NM | The name of the borough in which the incident occurred |
Querying the TSV file for the cardinality of the three candidate columns:
```bash
clickhouse-local --input_format_max_rows_to_read_for_schema_inference=2000 \
--query \
"select formatReadableQuantity(uniq(OFNS_DESC)) as cardinality_OFNS_DESC,
formatReadableQuantity(uniq(RPT_DT)) as cardinality_RPT_DT,
formatReadableQuantity(uniq(BORO_NM)) as cardinality_BORO_NM
FROM
file('${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv', 'TSVWithNames')
FORMAT PrettyCompact"
```
Result:
```response
┌─cardinality_OFNS_DESC─┬─cardinality_RPT_DT─┬─cardinality_BORO_NM─┐
│ 60.00 │ 306.00 │ 6.00 │
└───────────────────────┴────────────────────┴─────────────────────┘
```
Ordering by cardinality, the `ORDER BY` becomes:
```
ORDER BY ( BORO_NM, OFNS_DESC, RPT_DT )
```
:::note
The table below will use more easily read column names, the above names will be mapped to
```
ORDER BY ( borough, offense_description, date_reported )
```
:::
Putting together the changes to data types and the `ORDER BY` tuple gives this table structure:
```sql
CREATE TABLE NYPD_Complaint (
complaint_number String,
precinct UInt8,
borough LowCardinality(String),
complaint_begin DateTime64(0,'America/New_York'),
complaint_end DateTime64(0,'America/New_York'),
was_crime_completed String,
housing_authority String,
housing_level_code UInt32,
jurisdiction_code UInt8,
jurisdiction LowCardinality(String),
offense_code UInt8,
offense_level LowCardinality(String),
location_descriptor LowCardinality(String),
offense_description LowCardinality(String),
park_name LowCardinality(String),
patrol_borough LowCardinality(String),
PD_CD UInt16,
PD_DESC String,
location_type LowCardinality(String),
date_reported Date,
transit_station LowCardinality(String),
suspect_age_group LowCardinality(String),
suspect_race LowCardinality(String),
suspect_sex LowCardinality(String),
transit_district UInt8,
victim_age_group LowCardinality(String),
victim_race LowCardinality(String),
victim_sex LowCardinality(String),
NY_x_coordinate UInt32,
NY_y_coordinate UInt32,
Latitude Float64,
Longitude Float64
) ENGINE = MergeTree
ORDER BY ( borough, offense_description, date_reported )
```
### Finding the primary key of a table
The ClickHouse `system` database, specifically `system.table` has all of the information about the table you
just created. This query shows the `ORDER BY` (sorting key), and the `PRIMARY KEY`:
```sql
SELECT
partition_key,
sorting_key,
primary_key,
table
FROM system.tables
WHERE table = 'NYPD_Complaint'
FORMAT Vertical
```
Response
```response
Query id: 6a5b10bf-9333-4090-b36e-c7f08b1d9e01
Row 1:
──────
partition_key:
sorting_key: borough, offense_description, date_reported
primary_key: borough, offense_description, date_reported
table: NYPD_Complaint
1 row in set. Elapsed: 0.001 sec.
```
## Preprocess and Import Data {#preprocess-import-data}
We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it.
### `clickhouse-local` arguments used
:::tip
`table='input'` appears in the arguments to clickhouse-local below. clickhouse-local takes the provided input (`cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv`) and inserts the input into a table. By default the table is named `table`. In this guide the name of the table is set to `input` to make the data flow clearer. The final argument to clickhouse-local is a query that selects from the table (`FROM input`) which is then piped to `clickhouse-client` to populate the table `NYPD_Complaint`.
:::
```sql
cat ${HOME}/NYPD_Complaint_Data_Current__Year_To_Date_.tsv \
| clickhouse-local --table='input' --input-format='TSVWithNames' \
--input_format_max_rows_to_read_for_schema_inference=2000 \
--query "
WITH (CMPLNT_FR_DT || ' ' || CMPLNT_FR_TM) AS CMPLNT_START,
(CMPLNT_TO_DT || ' ' || CMPLNT_TO_TM) AS CMPLNT_END
SELECT
CMPLNT_NUM AS complaint_number,
ADDR_PCT_CD AS precinct,
BORO_NM AS borough,
parseDateTime64BestEffort(CMPLNT_START) AS complaint_begin,
parseDateTime64BestEffortOrNull(CMPLNT_END) AS complaint_end,
CRM_ATPT_CPTD_CD AS was_crime_completed,
HADEVELOPT AS housing_authority_development,
HOUSING_PSA AS housing_level_code,
JURISDICTION_CODE AS jurisdiction_code,
JURIS_DESC AS jurisdiction,
KY_CD AS offense_code,
LAW_CAT_CD AS offense_level,
LOC_OF_OCCUR_DESC AS location_descriptor,
OFNS_DESC AS offense_description,
PARKS_NM AS park_name,
PATROL_BORO AS patrol_borough,
PD_CD,
PD_DESC,
PREM_TYP_DESC AS location_type,
toDate(parseDateTimeBestEffort(RPT_DT)) AS date_reported,
STATION_NAME AS transit_station,
SUSP_AGE_GROUP AS suspect_age_group,
SUSP_RACE AS suspect_race,
SUSP_SEX AS suspect_sex,
TRANSIT_DISTRICT AS transit_district,
VIC_AGE_GROUP AS victim_age_group,
VIC_RACE AS victim_race,
VIC_SEX AS victim_sex,
X_COORD_CD AS NY_x_coordinate,
Y_COORD_CD AS NY_y_coordinate,
Latitude,
Longitude
FROM input" \
| clickhouse-client --query='INSERT INTO NYPD_Complaint FORMAT TSV'
```
## Validate the Data {#validate-data}
:::note
The dataset changes once or more per year, your counts may not match what is in this document.
:::
Query:
```sql
SELECT count()
FROM NYPD_Complaint
```
Result:
```text
┌─count()─┐
│ 208993 │
└─────────┘
1 row in set. Elapsed: 0.001 sec.
```
The size of the dataset in ClickHouse is just 12% of the original TSV file, compare the size of the original TSV file with the size of the table:
Query:
```sql
SELECT formatReadableSize(total_bytes)
FROM system.tables
WHERE name = 'NYPD_Complaint'
```
Result:
```text
┌─formatReadableSize(total_bytes)─┐
│ 8.63 MiB │
└─────────────────────────────────┘
```
## Run Some Queries {#run-queries}
### Query 1. Compare the number of complaints by month
Query:
```sql
SELECT
dateName('month', date_reported) AS month,
count() AS complaints,
bar(complaints, 0, 50000, 80)
FROM NYPD_Complaint
GROUP BY month
ORDER BY complaints DESC
```
Result:
```response
Query id: 7fbd4244-b32a-4acf-b1f3-c3aa198e74d9
┌─month─────┬─complaints─┬─bar(count(), 0, 50000, 80)───────────────────────────────┐
│ March │ 34536 │ ███████████████████████████████████████████████████████▎ │
│ May │ 34250 │ ██████████████████████████████████████████████████████▋ │
│ April │ 32541 │ ████████████████████████████████████████████████████ │
│ January │ 30806 │ █████████████████████████████████████████████████▎ │
│ February │ 28118 │ ████████████████████████████████████████████▊ │
│ November │ 7474 │ ███████████▊ │
│ December │ 7223 │ ███████████▌ │
│ October │ 7070 │ ███████████▎ │
│ September │ 6910 │ ███████████ │
│ August │ 6801 │ ██████████▊ │
│ June │ 6779 │ ██████████▋ │
│ July │ 6485 │ ██████████▍ │
└───────────┴────────────┴──────────────────────────────────────────────────────────┘
12 rows in set. Elapsed: 0.006 sec. Processed 208.99 thousand rows, 417.99 KB (37.48 million rows/s., 74.96 MB/s.)
```
### Query 2. Compare total number of complaints by Borough
Query:
```sql
SELECT
borough,
count() AS complaints,
bar(complaints, 0, 125000, 60)
FROM NYPD_Complaint
GROUP BY borough
ORDER BY complaints DESC
```
Result:
```response
Query id: 8cdcdfd4-908f-4be0-99e3-265722a2ab8d
┌─borough───────┬─complaints─┬─bar(count(), 0, 125000, 60)──┐
│ BROOKLYN │ 57947 │ ███████████████████████████▋ │
│ MANHATTAN │ 53025 │ █████████████████████████▍ │
│ QUEENS │ 44875 │ █████████████████████▌ │
│ BRONX │ 44260 │ █████████████████████▏ │
│ STATEN ISLAND │ 8503 │ ████ │
│ (null) │ 383 │ ▏ │
└───────────────┴────────────┴──────────────────────────────┘
6 rows in set. Elapsed: 0.008 sec. Processed 208.99 thousand rows, 209.43 KB (27.14 million rows/s., 27.20 MB/s.)
```
## Next Steps
[A Practical Introduction to Sparse Primary Indexes in ClickHouse](../../guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-intro.md) discusses the differences in ClickHouse indexing compared to traditional relational databases, how ClickHouse builds and uses a sparse primary index, and indexing best practices.

View File

@ -4,10 +4,9 @@ sidebar_position: 1
keywords: [clickhouse, install, installation, docs]
description: ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture.
slug: /en/getting-started/install
title: Installation
---
# Installation
## System Requirements {#system-requirements}
ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture.
@ -59,7 +58,7 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
</details>
You can replace `stable` with `lts` or `testing` to use different [release trains](../faq/operations/production.md) based on your needs.
You can replace `stable` with `lts` to use different [release kinds](../faq/operations/production.md) based on your needs.
You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/stable).
@ -106,7 +105,7 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
</details>
If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). `prestable` is sometimes also available.
You can replace `stable` with `lts` to use different [release kinds](../faq/operations/production.md) based on your needs.
Then run these commands to install packages:
@ -221,7 +220,7 @@ For non-Linux operating systems and for AArch64 CPU architecture, ClickHouse bui
curl -O 'https://builds.clickhouse.com/master/aarch64/clickhouse' && chmod a+x ./clickhouse
```
Run `sudo ./clickhouse install` to install ClickHouse system-wide (also with needed configuration files, configuring users etc.). Then run `clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it.
Run `sudo ./clickhouse install` to install ClickHouse system-wide (also with needed configuration files, configuring users etc.). Then run `sudo clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it.
Use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data.

View File

@ -2,10 +2,9 @@
slug: /en/operations/backup
sidebar_position: 49
sidebar_label: Data backup and restore
title: Data backup and restore
---
# Data backup and restore
While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you cant just drop tables with a MergeTree-like engine containing more than 50 Gb of data](server-configuration-parameters/settings.md#max-table-size-to-drop). However, these safeguards do not cover all possible cases and can be circumvented.
In order to effectively mitigate possible human errors, you should carefully prepare a strategy for backing up and restoring your data **in advance**.

View File

@ -20,6 +20,7 @@ Additional cache types:
- [Avro format](../interfaces/formats.md#data-format-avro) schemas cache.
- [Dictionaries](../sql-reference/dictionaries/index.md) data cache.
- Schema inference cache.
- [Filesystem cache](storing-data.md) over S3, Azure, Local and other disks.
Indirectly used:

View File

@ -1452,7 +1452,7 @@ Port for communicating with clients over MySQL protocol.
**Possible values**
Positive integer.
Positive integer to specify the port number to listen to or empty value to disable.
Example
@ -1466,7 +1466,7 @@ Port for communicating with clients over PostgreSQL protocol.
**Possible values**
Positive integer.
Positive integer to specify the port number to listen to or empty value to disable.
Example

View File

@ -1176,8 +1176,9 @@ Enables the quorum writes.
- If `insert_quorum < 2`, the quorum writes are disabled.
- If `insert_quorum >= 2`, the quorum writes are enabled.
- If `insert_quorum = 'auto'`, use majority number (`number_of_replicas / 2 + 1`) as quorum number.
Default value: 0.
Default value: 0 - disabled.
Quorum writes
@ -1259,7 +1260,7 @@ Possible values:
Default value: 1.
By default, blocks inserted into replicated tables by the `INSERT` statement are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
By default, blocks inserted into replicated tables by the `INSERT` statement are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
For the replicated tables by default the only 100 of the most recent blocks for each partition are deduplicated (see [replicated_deduplication_window](merge-tree-settings.md#replicated-deduplication-window), [replicated_deduplication_window_seconds](merge-tree-settings.md/#replicated-deduplication-window-seconds)).
For not replicated tables see [non_replicated_deduplication_window](merge-tree-settings.md/#non-replicated-deduplication-window).

View File

@ -112,6 +112,119 @@ Example of disk configuration:
</clickhouse>
```
## Using local cache {#using-local-cache}
It is possible to configure local cache over disks in storage configuration starting from version 22.3. For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. Cache uses `LRU` cache policy.
Example of configuration for versions later or equal to 22.8:
``` xml
<clickhouse>
<storage_configuration>
<disks>
<s3>
<type>s3</type>
<endpoint>...</endpoint>
... s3 configuration ...
</s3>
<cache>
<type>cache</type>
<disk>s3</disk>
<path>/s3_cache/</path>
<max_size>10000000</max_size>
</cache>
</disks>
</storage_configuration>
```
Example of configuration for versions earlier than 22.8:
``` xml
<clickhouse>
<storage_configuration>
<disks>
<s3>
<type>s3</type>
<endpoint>...</endpoint>
... s3 configuration ...
<data_cache_enabled>1</data_cache_enabled>
<data_cache_size>10000000</data_cache_size>
</s3>
</disks>
</storage_configuration>
```
Cache **configuration settings**:
- `path` - path to the directory with cache. Default: None, this setting is obligatory.
- `max_size` - maximum size of the cache in bytes. When the limit is reached, cache files are evicted according to the cache eviction policy. Default: None, this setting is obligatory.
- `cache_on_write_operations` - allow to turn on `write-through` cache (caching data on any write operations: `INSERT` queries, background merges). Default: `false`. The `write-through` cache can be disabled per query using setting `enable_filesystem_cache_on_write_operations` (data is cached only if both cache config settings and corresponding query setting are enabled).
- `enable_filesystem_query_cache_limit` - allow to limit the size of cache which is downloaded within each query (depends on user setting `max_query_cache_size`). Default: `false`.
- `enable_cache_hits_threshold` - a number, which defines how many times some data needs to be read before it will be cached. Default: `0`, e.g. the data is cached at the first attempt to read it.
- `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `true`.
- `max_file_segment_size` - a maximum size of a single cache file. Default: `104857600` (100 Mb).
- `max_elements` - a limit for a number of cache files. Default: `1048576`.
Cache **query settings**:
- `enable_filesystem_cache` - allows to disable cache per query even if storage policy was configured with `cache` disk type. Default: `true`.
- `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` - allows to use cache in query only if it already exists, otherwise query data will not be written to local cache storage. Default: `false`.
- `enable_filesystem_cache_on_write_operations` - turn on `write-through` cache. This setting works only if setting `cache_on_write_operations` in cache configuration is turned on.
- `enable_filesystem_cache_log` - turn on logging to `system.filesystem_cache_log` table. Gives a detailed view of cache usage per query. Default: `false`.
- `max_query_cache_size` - a limit for the cache size, which can be written to local cache storage. Requires enabled `enable_filesystem_query_cache_limit` in cache configuration. Default: `false`.
- `skip_download_if_exceeds_query_cache` - allows to change the behaviour of setting `max_query_cache_size`. Default: `true`. If this setting is turned on and cache download limit during query was reached, no more cache will be downloaded to cache storage. If this setting is turned off and cache download limit during query was reached, cache will still be written by cost of evicting previously downloaded (within current query) data, e.g. second behaviour allows to preserve `last recentltly used` behaviour while keeping query cache limit.
** Warning **
Cache configuration settings and cache query settings correspond to the latest ClickHouse version, for earlier versions something might not be supported.
Cache **system tables**:
- `system.filesystem_cache` - system tables which shows current state of cache.
- `system.filesystem_cache_log` - system table which shows detailed cache usage per query. Requires `enable_filesystem_cache_log` setting to be `true`.
Cache **commands**:
- `SYSTEM DROP FILESYSTEM CACHE (<path>) (ON CLUSTER)`
- `SHOW CACHES` -- show list of caches which were configured on the server.
- `DESCRIBE CACHE '<cache_name>'` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW CACHES` command.
Cache current metrics:
- `FilesystemCacheSize`
- `FilesystemCacheElements`
Cache asynchronous metrics:
- `FilesystemCacheBytes`
- `FilesystemCacheFiles`
Cache profile events:
- `CachedReadBufferReadFromSourceBytes`, `CachedReadBufferReadFromCacheBytes,`
- `CachedReadBufferReadFromSourceMicroseconds`, `CachedReadBufferReadFromCacheMicroseconds`
- `CachedReadBufferCacheWriteBytes`, `CachedReadBufferCacheWriteMicroseconds`
- `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds`
## Storing Data on Web Server {#storing-data-on-webserver}
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.

View File

@ -74,13 +74,16 @@ Make sure that [`fstrim`](https://en.wikipedia.org/wiki/Trim_(computing)) is ena
## File System {#file-system}
Ext4 is the most reliable option. Set the mount options `noatime`.
XFS should be avoided. It works mostly fine but there are some reports about lower performance.
Ext4 is the most reliable option. Set the mount options `noatime`. XFS works well too.
Most other file systems should also work fine.
FAT-32 and exFAT are not supported due to lack of hard links.
Do not use compressed filesystems, because ClickHouse does compression on its own and better.
It's not recommended to use encrypted filesystems, because you can use builtin encryption in ClickHouse, which is better.
While ClickHouse can work over NFS, it is not the best idea.
## Linux Kernel {#linux-kernel}
Dont use an outdated Linux kernel.

View File

@ -640,7 +640,8 @@ Result:
## date\_diff
Returns the difference between two dates or dates with time values.
Returns the difference between two dates or dates with time values.
The difference is calculated using relative units, e.g. the difference between `2022-01-01` and `2021-12-29` is 3 days for day unit (see [toRelativeDayNum](#torelativedaynum)), 1 month for month unit (see [toRelativeMonthNum](#torelativemonthnum)), 1 year for year unit (see [toRelativeYearNum](#torelativeyearnum)).
**Syntax**
@ -692,6 +693,25 @@ Result:
└────────────────────────────────────────────────────────────────────────────────────────┘
```
Query:
``` sql
SELECT
toDate('2022-01-01') AS e,
toDate('2021-12-29') AS s,
dateDiff('day', s, e) AS day_diff,
dateDiff('month', s, e) AS month__diff,
dateDiff('year', s, e) AS year_diff;
```
Result:
``` text
┌──────────e─┬──────────s─┬─day_diff─┬─month__diff─┬─year_diff─┐
│ 2022-01-01 │ 2021-12-29 │ 3 │ 1 │ 1 │
└────────────┴────────────┴──────────┴─────────────┴───────────┘
```
## date\_sub
Subtracts the time interval or date interval from the provided date or date with time.

View File

@ -12,8 +12,9 @@ ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr
Deletes data matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
:::note
The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use.
:::note
The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use. `ALTER TABLE` is considered a heavyweight operation that requires the underlying data to be merged before it is deleted. For MergeTree tables, consider using the [`DELETE FROM` query](../delete.md), which performs a lightweight delete and can be considerably faster.
:::
The `filter_expr` must be of type `UInt8`. The query deletes rows in the table for which this expression takes a non-zero value.

View File

@ -0,0 +1,37 @@
---
slug: /en/sql-reference/statements/delete
sidebar_position: 36
sidebar_label: DELETE
---
# DELETE Statement
``` sql
DELETE FROM [db.]table [WHERE expr]
```
`DELETE FROM` removes rows from table `[db.]table` that match expression `expr`. The deleted rows are marked as deleted immediately and will be automatically filtered out of all subsequent queries. Cleanup of data happens asynchronously in background. This feature is only available for MergeTree table engine family.
For example, the following query deletes all rows from the `hits` table where the `Title` column contains the text `hello`:
```sql
DELETE FROM hits WHERE Title LIKE '%hello%';
```
Lightweight deletes are asynchronous by default. Set `mutations_sync` equal to 1 to wait for one replica to process the statement, and set `mutations_sync` to 2 to wait for all replicas.
:::note
This feature is experimental and requires you to set `allow_experimental_lightweight_delete` to true:
```sql
SET allow_experimental_lightweight_delete = true;
```
:::
An [alternative way to delete rows](./alter/delete.md) in ClickHouse is `ALTER TABLE ... DELETE`, which might be more efficient if you do bulk deletes only occasionally and don't need the operation to be applied instantly. In most use cases the new lightweight `DELETE FROM` behavior will be considerably faster.
:::warning
Even though deletes are becoming more lightweight in ClickHouse, they should still not be used as aggressively as on OLTP system. Ligthweight deletes are currently efficient for wide parts, but for compact parts they can be a heavyweight operation, and it may be better to use `ALTER TABLE` for some scenarios.
:::

View File

@ -10,7 +10,7 @@ Makes the server "forget" about the existence of a table, a materialized view, o
**Syntax**
``` sql
DETACH TABLE|VIEW|DICTIONARY [IF EXISTS] [db.]name [ON CLUSTER cluster] [PERMANENTLY]
DETACH TABLE|VIEW|DICTIONARY [IF EXISTS] [db.]name [ON CLUSTER cluster] [PERMANENTLY] [SYNC]
```
Detaching does not delete the data or metadata of a table, a materialized view or a dictionary. If an entity was not detached `PERMANENTLY`, on the next server launch the server will read the metadata and recall the table/view/dictionary again. If an entity was detached `PERMANENTLY`, there will be no automatic recall.
@ -24,6 +24,8 @@ Note that you can not detach permanently the table which is already detached (te
Also you can not [DROP](../../sql-reference/statements/drop#drop-table) the detached table, or [CREATE TABLE](../../sql-reference/statements/create/table.md) with the same name as detached permanently, or replace it with the other table with [RENAME TABLE](../../sql-reference/statements/rename.md) query.
The `SYNC` modifier executes the action without delay.
**Example**
Creating a table:

View File

@ -6,7 +6,7 @@ sidebar_label: DROP
# DROP Statements
Deletes existing entity. If the `IF EXISTS` clause is specified, these queries do not return an error if the entity does not exist.
Deletes existing entity. If the `IF EXISTS` clause is specified, these queries do not return an error if the entity does not exist. If the `SYNC` modifier is specified, the entity is dropped without delay.
## DROP DATABASE
@ -15,7 +15,7 @@ Deletes all tables inside the `db` database, then deletes the `db` database itse
Syntax:
``` sql
DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster]
DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC]
```
## DROP TABLE
@ -25,7 +25,7 @@ Deletes the table.
Syntax:
``` sql
DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster]
DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] [SYNC]
```
## DROP DICTIONARY
@ -35,7 +35,7 @@ Deletes the dictionary.
Syntax:
``` sql
DROP DICTIONARY [IF EXISTS] [db.]name
DROP DICTIONARY [IF EXISTS] [db.]name [SYNC]
```
## DROP USER
@ -95,7 +95,7 @@ Deletes a view. Views can be deleted by a `DROP TABLE` command as well but `DROP
Syntax:
``` sql
DROP VIEW [IF EXISTS] [db.]name [ON CLUSTER cluster]
DROP VIEW [IF EXISTS] [db.]name [ON CLUSTER cluster] [SYNC]
```
## DROP FUNCTION

View File

@ -1,5 +1,5 @@
---
slug: /en/development/tests
slug: /zh/development/tests
sidebar_position: 70
sidebar_label: Testing
title: ClickHouse Testing

View File

@ -0,0 +1,37 @@
#pragma once
#include <Backups/IBackupEntry.h>
namespace DB
{
/// Wraps another backup entry and a value of any type.
template <typename T>
class BackupEntryWrappedWith : public IBackupEntry
{
public:
BackupEntryWrappedWith(BackupEntryPtr entry_, const T & custom_value_) : entry(entry_), custom_value(custom_value_) { }
BackupEntryWrappedWith(BackupEntryPtr entry_, T && custom_value_) : entry(entry_), custom_value(std::move(custom_value_)) { }
~BackupEntryWrappedWith() override = default;
UInt64 getSize() const override { return entry->getSize(); }
std::optional<UInt128> getChecksum() const override { return entry->getChecksum(); }
std::unique_ptr<SeekableReadBuffer> getReadBuffer() const override { return entry->getReadBuffer(); }
String getFilePath() const override { return entry->getFilePath(); }
DiskPtr tryGetDiskIfExists() const override { return entry->tryGetDiskIfExists(); }
DataSourceDescription getDataSourceDescription() const override { return entry->getDataSourceDescription(); }
private:
BackupEntryPtr entry;
T custom_value;
};
template <typename T>
void wrapBackupEntriesWith(std::vector<std::pair<String, BackupEntryPtr>> & backup_entries, const T & custom_value)
{
for (auto & [_, backup_entry] : backup_entries)
backup_entry = std::make_shared<BackupEntryWrappedWith<T>>(std::move(backup_entry), custom_value);
}
}

View File

@ -42,6 +42,14 @@ endif ()
# See `src/Common/TargetSpecific.h`
option(ENABLE_MULTITARGET_CODE "Enable platform-dependent code" ON)
if (NO_SSE3_OR_HIGHER)
# Optimized x86 code in DECLARE_*_SPECIFIC_CODE blocks (see `src/Common/TargetSpecific.h`) is sometimes marked FORCE_INLINE. As a
# result, its instruction set requirements (e.g. SSE4.2) leak into generic code. This is normally not a problem for standard x86 builds
# because generic code is compiled with SSE 4.2 anyways. But it breaks SSE2-only builds. Therefore disabling the multitarget code
# machinery and always use generic code. (The cleaner alternative is removing FORCE_INLINE but that impacts performance too much.)
set(ENABLE_MULTITARGET_CODE OFF)
endif()
if (ENABLE_MULTITARGET_CODE)
add_definitions(-DENABLE_MULTITARGET_CODE=1)
else()

View File

@ -16,6 +16,7 @@ namespace ErrorCodes
extern const int ATTEMPT_TO_READ_AFTER_EOF;
extern const int NETWORK_ERROR;
extern const int SOCKET_TIMEOUT;
extern const int DNS_ERROR;
}
ConnectionEstablisher::ConnectionEstablisher(
@ -90,6 +91,7 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
catch (const Exception & e)
{
if (e.code() != ErrorCodes::NETWORK_ERROR && e.code() != ErrorCodes::SOCKET_TIMEOUT
&& e.code() != ErrorCodes::DNS_ERROR
&& e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
throw;

View File

@ -50,7 +50,7 @@ ColumnArray::ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr &&
if (!offsets_concrete)
throw Exception("offsets_column must be a ColumnUInt64", ErrorCodes::LOGICAL_ERROR);
if (!offsets_concrete->empty() && data)
if (!offsets_concrete->empty() && data && !data->empty())
{
Offset last_offset = offsets_concrete->getData().back();

View File

@ -13,6 +13,7 @@
#include <Poco/DOM/Text.h>
#include <Poco/DOM/Attr.h>
#include <Poco/DOM/Comment.h>
#include <Poco/XML/XMLWriter.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Common/ZooKeeper/ZooKeeperNodeCache.h>
#include <Common/ZooKeeper/KeeperException.h>
@ -729,7 +730,11 @@ void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config,
if (!preprocessed_path_parent.empty())
fs::create_directories(preprocessed_path_parent);
}
DOMWriter().writeNode(preprocessed_path, loaded_config.preprocessed_xml);
DOMWriter writer;
writer.setNewLine("\n");
writer.setIndent(" ");
writer.setOptions(Poco::XML::XMLWriter::PRETTY_PRINT);
writer.writeNode(preprocessed_path, loaded_config.preprocessed_xml);
LOG_DEBUG(log, "Saved preprocessed configuration to '{}'.", preprocessed_path);
}
catch (Poco::Exception & e)

View File

@ -26,114 +26,107 @@ namespace ErrorCodes
extern const int CANNOT_PARSE_YAML;
}
/// A prefix symbol in yaml key
/// We add attributes to nodes by using a prefix symbol in the key part.
/// Currently we use @ as a prefix symbol. Note, that @ is reserved
/// by YAML standard, so we need to write a key-value pair like this: "@attribute": attr_value
const char YAML_ATTRIBUTE_PREFIX = '@';
namespace
{
/// A prefix symbol in yaml key
/// We add attributes to nodes by using a prefix symbol in the key part.
/// Currently we use @ as a prefix symbol. Note, that @ is reserved
/// by YAML standard, so we need to write a key-value pair like this: "@attribute": attr_value
const char YAML_ATTRIBUTE_PREFIX = '@';
Poco::AutoPtr<Poco::XML::Element> createCloneNode(Poco::XML::Element & original_node)
{
Poco::AutoPtr<Poco::XML::Element> clone_node = original_node.ownerDocument()->createElement(original_node.nodeName());
original_node.parentNode()->appendChild(clone_node);
return clone_node;
}
void processNode(const YAML::Node & node, Poco::XML::Element & parent_xml_element)
{
auto * xml_document = parent_xml_element.ownerDocument();
switch (node.Type())
Poco::AutoPtr<Poco::XML::Element> cloneXMLNode(const Poco::XML::Element & original_node)
{
case YAML::NodeType::Scalar:
{
std::string value = node.as<std::string>();
Poco::AutoPtr<Poco::XML::Text> xml_value = xml_document->createTextNode(value);
parent_xml_element.appendChild(xml_value);
break;
}
Poco::AutoPtr<Poco::XML::Element> clone_node = original_node.ownerDocument()->createElement(original_node.nodeName());
original_node.parentNode()->appendChild(clone_node);
return clone_node;
}
/// We process YAML Sequences as a
/// list of <key>value</key> tags with same key and different values.
/// For example, we translate this sequence
/// seq:
/// - val1
/// - val2
///
/// into this:
/// <seq>val1</seq>
/// <seq>val2</seq>
case YAML::NodeType::Sequence:
void processNode(const YAML::Node & node, Poco::XML::Element & parent_xml_node)
{
auto * xml_document = parent_xml_node.ownerDocument();
switch (node.Type())
{
for (const auto & child_node : node)
/// For sequences it depends how we want to process them.
/// Sequences of key-value pairs such as:
/// seq:
/// - k1: val1
/// - k2: val2
/// into xml like this:
/// <seq>
/// <k1>val1</k1>
/// <k2>val2</k2>
/// </seq>
///
/// But, if the sequence is just a list, the root-node needs to be repeated, such as:
/// seq:
/// - val1
/// - val2
/// into xml like this:
/// <seq>val1</seq>
/// <seq>val2</seq>
///
/// Therefore check what type the child is, for further processing.
/// Mixing types (values list or map) will lead to strange results but should not happen.
if (parent_xml_element.hasChildNodes() && !child_node.IsMap())
{
/// Create a new parent node with same tag for each child node
processNode(child_node, *createCloneNode(parent_xml_element));
}
else
{
/// Map, so don't recreate the parent node but add directly
processNode(child_node, parent_xml_element);
}
break;
}
case YAML::NodeType::Map:
{
for (const auto & key_value_pair : node)
case YAML::NodeType::Scalar:
{
const auto & key_node = key_value_pair.first;
const auto & value_node = key_value_pair.second;
std::string key = key_node.as<std::string>();
bool is_attribute = (key.starts_with(YAML_ATTRIBUTE_PREFIX) && value_node.IsScalar());
if (is_attribute)
{
/// we use substr(1) here to remove YAML_ATTRIBUTE_PREFIX from key
auto attribute_name = key.substr(1);
std::string value = value_node.as<std::string>();
parent_xml_element.setAttribute(attribute_name, value);
}
else
{
Poco::AutoPtr<Poco::XML::Element> xml_key = xml_document->createElement(key);
parent_xml_element.appendChild(xml_key);
processNode(value_node, *xml_key);
}
std::string value = node.as<std::string>();
Poco::AutoPtr<Poco::XML::Text> xml_value = xml_document->createTextNode(value);
parent_xml_node.appendChild(xml_value);
break;
}
/// For sequences we repeat the parent xml node. For example,
/// seq:
/// - val1
/// - val2
/// is converted into the following xml:
/// <seq>val1</seq>
/// <seq>val2</seq>
///
/// A sequence of mappings is converted in the same way:
/// seq:
/// - k1: val1
/// k2: val2
/// - k3: val3
/// is converted into the following xml:
/// <seq><k1>val1</k1><k2>val2</k2></seq>
/// <seq><k3>val3</k3></seq>
case YAML::NodeType::Sequence:
{
size_t i = 0;
for (auto it = node.begin(); it != node.end(); ++it, ++i)
{
const auto & child_node = *it;
bool need_clone_parent_xml_node = (i > 0);
if (need_clone_parent_xml_node)
{
/// Create a new parent node with same tag for each child node
processNode(child_node, *cloneXMLNode(parent_xml_node));
}
else
{
/// Map, so don't recreate the parent node but add directly
processNode(child_node, parent_xml_node);
}
}
break;
}
case YAML::NodeType::Map:
{
for (const auto & key_value_pair : node)
{
const auto & key_node = key_value_pair.first;
const auto & value_node = key_value_pair.second;
std::string key = key_node.as<std::string>();
bool is_attribute = (key.starts_with(YAML_ATTRIBUTE_PREFIX) && value_node.IsScalar());
if (is_attribute)
{
/// we use substr(1) here to remove YAML_ATTRIBUTE_PREFIX from key
auto attribute_name = key.substr(1);
std::string value = value_node.as<std::string>();
parent_xml_node.setAttribute(attribute_name, value);
}
else
{
Poco::AutoPtr<Poco::XML::Element> xml_key = xml_document->createElement(key);
parent_xml_node.appendChild(xml_key);
processNode(value_node, *xml_key);
}
}
break;
}
case YAML::NodeType::Null: break;
case YAML::NodeType::Undefined:
{
throw Exception(ErrorCodes::CANNOT_PARSE_YAML, "YAMLParser has encountered node with undefined type and cannot continue parsing of the file");
}
break;
}
case YAML::NodeType::Null: break;
case YAML::NodeType::Undefined:
{
throw Exception(ErrorCodes::CANNOT_PARSE_YAML, "YAMLParser has encountered node with undefined type and cannot continue parsing of the file");
}
}
}
}
Poco::AutoPtr<Poco::XML::Document> YAMLParser::parse(const String& path)
{

View File

@ -22,13 +22,13 @@ Elf::Elf(const std::string & path)
/// Check if it's an elf.
elf_size = in.buffer().size();
if (elf_size < sizeof(ElfEhdr))
throw Exception("The size of supposedly ELF file is too small", ErrorCodes::CANNOT_PARSE_ELF);
throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "The size of supposedly ELF file '{}' is too small", path);
mapped = in.buffer().begin();
header = reinterpret_cast<const ElfEhdr *>(mapped);
if (memcmp(header->e_ident, "\x7F""ELF", 4) != 0)
throw Exception("The file is not ELF according to magic", ErrorCodes::CANNOT_PARSE_ELF);
throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "The file '{}' is not ELF according to magic", path);
/// Get section header.
ElfOff section_header_offset = header->e_shoff;
@ -37,7 +37,7 @@ Elf::Elf(const std::string & path)
if (!section_header_offset
|| !section_header_num_entries
|| section_header_offset + section_header_num_entries * sizeof(ElfShdr) > elf_size)
throw Exception("The ELF is truncated (section header points after end of file)", ErrorCodes::CANNOT_PARSE_ELF);
throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "The ELF '{}' is truncated (section header points after end of file)", path);
section_headers = reinterpret_cast<const ElfShdr *>(mapped + section_header_offset);
@ -48,11 +48,11 @@ Elf::Elf(const std::string & path)
});
if (!section_names_strtab)
throw Exception("The ELF doesn't have string table with section names", ErrorCodes::CANNOT_PARSE_ELF);
throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "The ELF '{}' doesn't have string table with section names", path);
ElfOff section_names_offset = section_names_strtab->header.sh_offset;
if (section_names_offset >= elf_size)
throw Exception("The ELF is truncated (section names string table points after end of file)", ErrorCodes::CANNOT_PARSE_ELF);
throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "The ELF '{}' is truncated (section names string table points after end of file)", path);
section_names = reinterpret_cast<const char *>(mapped + section_names_offset);
@ -64,7 +64,7 @@ Elf::Elf(const std::string & path)
if (!program_header_offset
|| !program_header_num_entries
|| program_header_offset + program_header_num_entries * sizeof(ElfPhdr) > elf_size)
throw Exception("The ELF is truncated (program header points after end of file)", ErrorCodes::CANNOT_PARSE_ELF);
throw Exception(ErrorCodes::CANNOT_PARSE_ELF, "The ELF '{}' is truncated (program header points after end of file)", path);
program_headers = reinterpret_cast<const ElfPhdr *>(mapped + program_header_offset);
}

View File

@ -145,5 +145,11 @@ String FieldVisitorToString::operator() (const Object & x) const
}
String convertFieldToString(const Field & field)
{
if (field.getType() == Field::Types::Which::String)
return field.get<String>();
return applyVisitor(FieldVisitorToString(), field);
}
}

View File

@ -31,5 +31,8 @@ public:
String operator() (const bool & x) const;
};
}
/// Get value from field and convert it to string.
/// Also remove quotes from strings.
String convertFieldToString(const Field & field);
}

View File

@ -88,7 +88,13 @@ void Span::addAttribute(std::exception_ptr e) noexcept
SpanHolder::SpanHolder(std::string_view _operation_name)
{
if (current_thread_trace_context.isTraceEnabled())
if (!current_thread_trace_context.isTraceEnabled())
{
return;
}
/// Use try-catch to make sure the ctor is exception safe.
try
{
this->trace_id = current_thread_trace_context.trace_id;
this->parent_span_id = current_thread_trace_context.span_id;
@ -97,9 +103,19 @@ SpanHolder::SpanHolder(std::string_view _operation_name)
this->start_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
// set current span id to this
current_thread_trace_context.span_id = this->span_id;
/// Add new initialization here
}
catch (...)
{
tryLogCurrentException(__FUNCTION__);
/// Clear related fields to make sure the span won't be recorded.
this->trace_id = UUID();
return;
}
/// Set current span as parent of other spans created later on this thread.
current_thread_trace_context.span_id = this->span_id;
}
void SpanHolder::finish() noexcept
@ -216,7 +232,7 @@ const TracingContextOnThread & CurrentContext()
return current_thread_trace_context;
}
void TracingContextOnThread::reset()
void TracingContextOnThread::reset() noexcept
{
this->trace_id = UUID();
this->span_id = 0;
@ -231,63 +247,75 @@ TracingContextHolder::TracingContextHolder(
const Settings * settings_ptr,
const std::weak_ptr<OpenTelemetrySpanLog> & _span_log)
{
if (current_thread_trace_context.isTraceEnabled())
/// Use try-catch to make sure the ctor is exception safe.
/// If any exception is raised during the construction, the tracing is not enabled on current thread.
try
{
///
/// This is not the normal case,
/// it means that construction of current object is not at the start of current thread.
/// Usually this is due to:
/// 1. bad design
/// 2. right design but code changes so that original point where this object is constructing is not the new start execution of current thread
///
/// In such case, we should use current context as parent of this new constructing object,
/// So this branch ensures this class can be instantiated multiple times on one same thread safely.
///
this->is_context_owner = false;
this->root_span.trace_id = current_thread_trace_context.trace_id;
this->root_span.parent_span_id = current_thread_trace_context.span_id;
if (current_thread_trace_context.isTraceEnabled())
{
///
/// This is not the normal case,
/// it means that construction of current object is not at the start of current thread.
/// Usually this is due to:
/// 1. bad design
/// 2. right design but code changes so that original point where this object is constructing is not the new start execution of current thread
///
/// In such case, we should use current context as parent of this new constructing object,
/// So this branch ensures this class can be instantiated multiple times on one same thread safely.
///
this->is_context_owner = false;
this->root_span.trace_id = current_thread_trace_context.trace_id;
this->root_span.parent_span_id = current_thread_trace_context.span_id;
this->root_span.span_id = thread_local_rng();
this->root_span.operation_name = _operation_name;
this->root_span.start_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
/// Set the root span as parent of other spans created on current thread
current_thread_trace_context.span_id = this->root_span.span_id;
return;
}
if (!_parent_trace_context.isTraceEnabled())
{
if (settings_ptr == nullptr)
/// Skip tracing context initialization on current thread
return;
// Start the trace with some configurable probability.
std::bernoulli_distribution should_start_trace{settings_ptr->opentelemetry_start_trace_probability};
if (!should_start_trace(thread_local_rng))
/// skip tracing context initialization on current thread
return;
while (_parent_trace_context.trace_id == UUID())
{
// Make sure the random generated trace_id is not 0 which is an invalid id.
_parent_trace_context.trace_id.toUnderType().items[0] = thread_local_rng(); //-V656
_parent_trace_context.trace_id.toUnderType().items[1] = thread_local_rng(); //-V656
}
_parent_trace_context.span_id = 0;
}
this->root_span.trace_id = _parent_trace_context.trace_id;
this->root_span.parent_span_id = _parent_trace_context.span_id;
this->root_span.span_id = thread_local_rng();
this->root_span.operation_name = _operation_name;
this->root_span.start_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
current_thread_trace_context.span_id = this->root_span.span_id;
/// Add new initialization here
}
catch (...)
{
tryLogCurrentException(__FUNCTION__);
/// Clear related fields to make sure the tracing is not enabled.
this->root_span.trace_id = UUID();
return;
}
if (!_parent_trace_context.isTraceEnabled())
{
if (settings_ptr == nullptr)
/// skip tracing context initialization on current thread
return;
// start the trace ourselves, with some configurable probability.
std::bernoulli_distribution should_start_trace{settings_ptr->opentelemetry_start_trace_probability};
if (!should_start_trace(thread_local_rng))
/// skip tracing context initialization on current thread
return;
while (_parent_trace_context.trace_id == UUID())
{
// make sure the random generated trace_id is not 0 which is an invalid id
_parent_trace_context.trace_id.toUnderType().items[0] = thread_local_rng(); //-V656
_parent_trace_context.trace_id.toUnderType().items[1] = thread_local_rng(); //-V656
}
_parent_trace_context.span_id = 0;
}
this->root_span.trace_id = _parent_trace_context.trace_id;
this->root_span.parent_span_id = _parent_trace_context.span_id;
this->root_span.span_id = thread_local_rng();
this->root_span.operation_name = _operation_name;
this->root_span.start_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
/// This object is created to initialize tracing context on a new thread,
/// it's helpful to record the thread_id so that we know the thread switching from the span log
this->root_span.addAttribute("clickhouse.thread_id", getThreadId());
/// set up trace context on current thread
/// Set up trace context on current thread only when the root span is successfully initialized.
current_thread_trace_context = _parent_trace_context;
current_thread_trace_context.span_id = this->root_span.span_id;
current_thread_trace_context.trace_flags = TRACE_FLAG_SAMPLED;
@ -306,6 +334,18 @@ TracingContextHolder::~TracingContextHolder()
auto shared_span_log = current_thread_trace_context.span_log.lock();
if (shared_span_log)
{
try
{
/// This object is created to initialize tracing context on a new thread,
/// it's helpful to record the thread_id so that we know the thread switching from the span log
this->root_span.addAttribute("clickhouse.thread_id", getThreadId());
}
catch (...)
{
/// It's acceptable that the attribute is not recorded in case of any exception,
/// so the exception is ignored to try to log the span.
}
this->root_span.finish_time_us
= std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();

View File

@ -74,7 +74,7 @@ struct TracingContextOnThread : TracingContext
return *this;
}
void reset();
void reset() noexcept;
/// Use weak_ptr instead of shared_ptr to hold a reference to the underlying system.opentelemetry_span_log table
/// Since this object is kept on threads and passed across threads, a weak_ptr is more safe to prevent potential leak

View File

@ -286,6 +286,18 @@ The server successfully detected this situation and will download merged part fr
M(S3WriteRequestsThrottling, "Number of 429 and 503 errors in POST, DELETE, PUT and PATCH requests to S3 storage.") \
M(S3WriteRequestsRedirects, "Number of redirects in POST, DELETE, PUT and PATCH requests to S3 storage.") \
\
M(DiskS3ReadMicroseconds, "Time of GET and HEAD requests to DiskS3 storage.") \
M(DiskS3ReadRequestsCount, "Number of GET and HEAD requests to DiskS3 storage.") \
M(DiskS3ReadRequestsErrors, "Number of non-throttling errors in GET and HEAD requests to DiskS3 storage.") \
M(DiskS3ReadRequestsThrottling, "Number of 429 and 503 errors in GET and HEAD requests to DiskS3 storage.") \
M(DiskS3ReadRequestsRedirects, "Number of redirects in GET and HEAD requests to DiskS3 storage.") \
\
M(DiskS3WriteMicroseconds, "Time of POST, DELETE, PUT and PATCH requests to DiskS3 storage.") \
M(DiskS3WriteRequestsCount, "Number of POST, DELETE, PUT and PATCH requests to DiskS3 storage.") \
M(DiskS3WriteRequestsErrors, "Number of non-throttling errors in POST, DELETE, PUT and PATCH requests to DiskS3 storage.") \
M(DiskS3WriteRequestsThrottling, "Number of 429 and 503 errors in POST, DELETE, PUT and PATCH requests to DiskS3 storage.") \
M(DiskS3WriteRequestsRedirects, "Number of redirects in POST, DELETE, PUT and PATCH requests to DiskS3 storage.") \
\
M(ReadBufferFromS3Microseconds, "Time spend in reading from S3.") \
M(ReadBufferFromS3Bytes, "Bytes read from S3.") \
M(ReadBufferFromS3RequestsErrors, "Number of exceptions while reading from S3.") \

View File

@ -37,7 +37,7 @@ But because ClickHouse is linked with most of the symbols exported (-rdynamic fl
It allows to get source file names and line numbers from addresses. Only available if you use -g option for compiler.
It is also used by default for ClickHouse builds, but because of its weight (about two gigabytes)
it is split to separate binary and provided in clickhouse-common-static-dbg package.
This separate binary is placed in /usr/lib/debug/usr/bin/clickhouse and is loaded automatically by tools like gdb, addr2line.
This separate binary is placed in /usr/lib/debug/usr/bin/clickhouse.debug and is loaded automatically by tools like gdb, addr2line.
When you build ClickHouse by yourself, debug info is not split and present in a single huge binary.
What ClickHouse is using to provide good stack traces?
@ -391,10 +391,22 @@ void collectSymbolsFromELF(
std::filesystem::path local_debug_info_path = canonical_path.parent_path() / canonical_path.stem();
local_debug_info_path += ".debug";
std::filesystem::path debug_info_path = std::filesystem::path("/usr/lib/debug") / canonical_path.relative_path();
debug_info_path += ".debug";
if (std::filesystem::exists(local_debug_info_path))
/// NOTE: This is a workaround for current package system.
///
/// Since nfpm cannot copy file only if it exists,
/// and so in cmake empty .debug file is created instead,
/// but if we will try to load empty Elf file, then the CANNOT_PARSE_ELF
/// exception will be thrown from the Elf::Elf.
auto exists_not_empty = [](const std::filesystem::path & path)
{
return std::filesystem::exists(path) && !std::filesystem::is_empty(path);
};
if (exists_not_empty(local_debug_info_path))
object_name = local_debug_info_path;
else if (std::filesystem::exists(debug_info_path))
else if (exists_not_empty(debug_info_path))
object_name = debug_info_path;
else if (build_id.size() >= 2)
{
@ -412,7 +424,7 @@ void collectSymbolsFromELF(
std::filesystem::path build_id_debug_info_path(
fmt::format("/usr/lib/debug/.build-id/{}/{}.debug", build_id_hex.substr(0, 2), build_id_hex.substr(2)));
if (std::filesystem::exists(build_id_debug_info_path))
if (exists_not_empty(build_id_debug_info_path))
object_name = build_id_debug_info_path;
else
object_name = canonical_path;

View File

@ -898,4 +898,25 @@ ZooKeeperRequestFactory::ZooKeeperRequestFactory()
registerZooKeeperRequest<OpNum::FilteredList, ZooKeeperFilteredListRequest>(*this);
}
PathMatchResult matchPath(std::string_view path, std::string_view match_to)
{
using enum PathMatchResult;
if (path.ends_with('/'))
path.remove_suffix(1);
if (match_to.ends_with('/'))
match_to.remove_suffix(1);
auto [first_it, second_it] = std::mismatch(path.begin(), path.end(), match_to.begin(), match_to.end());
if (second_it != match_to.end())
return NOT_MATCH;
if (first_it == path.end())
return EXACT;
return *first_it == '/' ? IS_CHILD : NOT_MATCH;
}
}

View File

@ -554,4 +554,13 @@ private:
ZooKeeperRequestFactory();
};
enum class PathMatchResult
{
NOT_MATCH,
EXACT,
IS_CHILD
};
PathMatchResult matchPath(std::string_view path, std::string_view match_to);
}

View File

@ -0,0 +1,15 @@
#include <gtest/gtest.h>
#include <Common/ZooKeeper/ZooKeeperCommon.h>
TEST(ZooKeeperTest, TestMatchPath)
{
using namespace Coordination;
ASSERT_EQ(matchPath("/path/file", "/path"), PathMatchResult::IS_CHILD);
ASSERT_EQ(matchPath("/path/file", "/path/"), PathMatchResult::IS_CHILD);
ASSERT_EQ(matchPath("/path/file", "/"), PathMatchResult::IS_CHILD);
ASSERT_EQ(matchPath("/", "/"), PathMatchResult::EXACT);
ASSERT_EQ(matchPath("/path", "/path/"), PathMatchResult::EXACT);
ASSERT_EQ(matchPath("/path/", "/path"), PathMatchResult::EXACT);
}

View File

@ -43,11 +43,8 @@ clickhouse:
text_log:
database: system
table: text_log
partition_by:
"@remove": "1"
engine:
- "@replace" : "1"
- "ENGINE MergeTree"
partition_by: {"@remove": "1"}
engine: "ENGINE MergeTree"
flush_interval_milliseconds: 7500
level: debug
)YAML";
@ -112,11 +109,8 @@ clickhouse:
text_log :
database: system
table: text_log
partition_by:
"@remove": "1"
engine:
- "@replace" : "1"
- "ENGINE MergeTree"
partition_by: {"@remove": "1"}
engine: "ENGINE MergeTree"
flush_interval_milliseconds: 7500
level: debug
)YAML";

View File

@ -13,40 +13,12 @@
using namespace DB;
TEST(Common, YamlParserInvalidFile)
TEST(YamlParser, InvalidFile)
{
ASSERT_THROW(YAMLParser::parse("some-non-existing-file.yaml"), Exception);
}
TEST(Common, YamlParserProcessKeysList)
{
auto yaml_file = getFileWithContents("keys-list.yaml", R"YAML(
operator:
access_management: "1"
networks:
- ip: "10.1.6.168"
- ip: "::1"
- ip: "127.0.0.1"
)YAML");
SCOPE_EXIT({ yaml_file->remove(); });
Poco::AutoPtr<Poco::XML::Document> xml = YAMLParser::parse(yaml_file->path());
auto *p_node = xml->getNodeByPath("/clickhouse");
EXPECT_EQ(xmlNodeAsString(p_node), R"CONFIG(<clickhouse>
<operator>
<access_management>1</access_management>
<networks>
<ip>10.1.6.168</ip>
<ip>::1</ip>
<ip>127.0.0.1</ip>
</networks>
</operator>
</clickhouse>
)CONFIG");
}
TEST(Common, YamlParserProcessValuesList)
TEST(YamlParser, ProcessValuesList)
{
auto yaml_file = getFileWithContents("values-list.yaml", R"YAML(
rules:
@ -75,4 +47,141 @@ rules:
)CONFIG");
}
TEST(YamlParser, ProcessKeysList)
{
auto yaml_file = getFileWithContents("keys-list.yaml", R"YAML(
operator:
access_management: 1
networks:
ip:
- 10.1.6.168
- ::1
- 127.0.0.1
)YAML");
SCOPE_EXIT({ yaml_file->remove(); });
Poco::AutoPtr<Poco::XML::Document> xml = YAMLParser::parse(yaml_file->path());
auto *p_node = xml->getNodeByPath("/clickhouse");
EXPECT_EQ(xmlNodeAsString(p_node), R"CONFIG(<clickhouse>
<operator>
<access_management>1</access_management>
<networks>
<ip>10.1.6.168</ip>
<ip>::1</ip>
<ip>127.0.0.1</ip>
</networks>
</operator>
</clickhouse>
)CONFIG");
}
TEST(YamlParser, ProcessListAttributes)
{
auto yaml_file = getFileWithContents("list_attributes.yaml", R"YAML(
seq:
- "@attr1": x
- k1: val1
k2: val2
"@attr2": y
- k3: val3
"@attr3": z
)YAML");
SCOPE_EXIT({ yaml_file->remove(); });
Poco::AutoPtr<Poco::XML::Document> xml = YAMLParser::parse(yaml_file->path());
auto *p_node = xml->getNodeByPath("/clickhouse");
EXPECT_EQ(xmlNodeAsString(p_node), R"CONFIG(<clickhouse>
<seq attr1="x"></seq>
<seq attr2="y">
<k1>val1</k1>
<k2>val2</k2>
</seq>
<seq attr3="z">
<k3>val3</k3>
</seq>
</clickhouse>
)CONFIG");
}
TEST(YamlParser, ProcessMapAttributes)
{
auto yaml_file = getFileWithContents("map_attributes.yaml", R"YAML(
map:
"@attr1": x
k1: val1
k2: val2
"@attr2": y
k3: val3
"@attr3": z
)YAML");
SCOPE_EXIT({ yaml_file->remove(); });
Poco::AutoPtr<Poco::XML::Document> xml = YAMLParser::parse(yaml_file->path());
auto *p_node = xml->getNodeByPath("/clickhouse");
EXPECT_EQ(xmlNodeAsString(p_node), R"CONFIG(<clickhouse>
<map attr1="x" attr2="y" attr3="z">
<k1>val1</k1>
<k2>val2</k2>
<k3>val3</k3>
</map>
</clickhouse>
)CONFIG");
}
TEST(YamlParser, ClusterDef)
{
auto yaml_file = getFileWithContents("cluster_def.yaml", R"YAML(
test_cluster:
shard:
- internal_replication: false
replica:
- host: 127.0.0.1
port: 9000
- host: 127.0.0.2
port: 9000
- internal_replication: true
replica:
- host: 127.0.0.3
port: 9000
- host: 127.0.0.4
port: 9000
)YAML");
SCOPE_EXIT({ yaml_file->remove(); });
Poco::AutoPtr<Poco::XML::Document> xml = YAMLParser::parse(yaml_file->path());
auto *p_node = xml->getNodeByPath("/clickhouse");
EXPECT_EQ(xmlNodeAsString(p_node), R"CONFIG(<clickhouse>
<test_cluster>
<shard>
<internal_replication>false</internal_replication>
<replica>
<host>127.0.0.1</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.2</host>
<port>9000</port>
</replica>
</shard>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>127.0.0.3</host>
<port>9000</port>
</replica>
<replica>
<host>127.0.0.4</host>
<port>9000</port>
</replica>
</shard>
</test_cluster>
</clickhouse>
)CONFIG");
}
#endif

View File

@ -116,8 +116,8 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
}
};
ISerialization::SubstreamPath path;
column_type->getDefaultSerialization()->enumerateStreams(path, callback, column_type);
auto serialization = column_type->getDefaultSerialization();
serialization->enumerateStreams(callback, column_type);
if (!result_codec)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find any substream with data type for type {}. It's a bug", column_type->getName());

View File

@ -478,11 +478,7 @@ template <> void inline copyOverlap<32, true>(UInt8 * op, const UInt8 *& match,
/// See also https://stackoverflow.com/a/30669632
template <size_t copy_amount, bool use_shuffle>
bool NO_INLINE decompressImpl(
const char * const source,
char * const dest,
size_t source_size,
size_t dest_size)
bool NO_INLINE decompressImpl(const char * const source, char * const dest, size_t source_size, size_t dest_size)
{
const UInt8 * ip = reinterpret_cast<const UInt8 *>(source);
UInt8 * op = reinterpret_cast<UInt8 *>(dest);
@ -515,6 +511,18 @@ bool NO_INLINE decompressImpl(
const unsigned token = *ip++;
length = token >> 4;
UInt8 * copy_end;
size_t real_length;
/// It might be true fairly often for well-compressed columns.
/// ATST it may hurt performance in other cases because this condition is hard to predict (especially if the number of zeros is ~50%).
/// In such cases this `if` will significantly increase number of mispredicted instructions. But seems like it results in a
/// noticeable slowdown only for implementations with `copy_amount` > 8. Probably because they use havier instructions.
if constexpr (copy_amount == 8)
if (length == 0)
goto decompress_match;
if (length == 0x0F)
{
if (unlikely(ip + 1 >= input_end))
@ -524,7 +532,7 @@ bool NO_INLINE decompressImpl(
/// Copy literals.
UInt8 * copy_end = op + length;
copy_end = op + length;
/// input: Hello, world
/// ^-ip
@ -541,7 +549,7 @@ bool NO_INLINE decompressImpl(
return false;
// Due to implementation specifics the copy length is always a multiple of copy_amount
size_t real_length = 0;
real_length = 0;
static_assert(copy_amount == 8 || copy_amount == 16 || copy_amount == 32);
if constexpr (copy_amount == 8)
@ -552,9 +560,9 @@ bool NO_INLINE decompressImpl(
real_length = (((length >> 5) + 1) * 32);
if (unlikely(ip + real_length >= input_end + ADDITIONAL_BYTES_AT_END_OF_BUFFER))
return false;
return false;
wildCopy<copy_amount>(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer.
wildCopy<copy_amount>(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer.
if (copy_end == output_end)
return true;
@ -562,6 +570,8 @@ bool NO_INLINE decompressImpl(
ip += length;
op = copy_end;
decompress_match:
if (unlikely(ip + 1 >= input_end))
return false;

View File

@ -13,8 +13,10 @@
#include <filesystem>
#include <memory>
#include <Common/logger_useful.h>
#include "Coordination/KeeperContext.h"
#include <Coordination/KeeperContext.h>
#include <Coordination/KeeperConstants.h>
#include <Common/ZooKeeper/ZooKeeperCommon.h>
namespace DB
{
@ -146,33 +148,6 @@ namespace
}
}
namespace
{
enum class PathMatchResult
{
NOT_MATCH,
EXACT,
IS_CHILD
};
PathMatchResult matchPath(const std::string_view path, const std::string_view match_to)
{
using enum PathMatchResult;
auto [first_it, second_it] = std::mismatch(path.begin(), path.end(), match_to.begin(), match_to.end());
if (second_it != match_to.end())
return NOT_MATCH;
if (first_it == path.end())
return EXACT;
return *first_it == '/' ? IS_CHILD : NOT_MATCH;
}
}
void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, WriteBuffer & out, KeeperContextPtr keeper_context)
{
writeBinary(static_cast<uint8_t>(snapshot.version), out);
@ -217,7 +192,7 @@ void KeeperStorageSnapshot::serialize(const KeeperStorageSnapshot & snapshot, Wr
const auto & path = it->key;
// write only the root system path because of digest
if (matchPath(path.toView(), keeper_system_path) == PathMatchResult::IS_CHILD)
if (Coordination::matchPath(path.toView(), keeper_system_path) == Coordination::PathMatchResult::IS_CHILD)
{
++it;
continue;
@ -365,8 +340,8 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
KeeperStorage::Node node{};
readNode(node, in, current_version, storage.acl_map);
using enum PathMatchResult;
auto match_result = matchPath(path, keeper_system_path);
using enum Coordination::PathMatchResult;
auto match_result = Coordination::matchPath(path, keeper_system_path);
const std::string error_msg = fmt::format("Cannot read node on path {} from a snapshot because it is used as a system node", path);
if (match_result == IS_CHILD)

View File

@ -879,7 +879,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr
path_created += seq_num_str.str();
}
if (path_created.starts_with(keeper_system_path))
if (Coordination::matchPath(path_created, keeper_system_path) != Coordination::PathMatchResult::NOT_MATCH)
{
auto error_msg = fmt::format("Trying to create a node inside the internal Keeper path ({}) which is not allowed. Path: {}", keeper_system_path, path_created);
@ -1049,7 +1049,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr
std::vector<KeeperStorage::Delta> new_deltas;
if (request.path.starts_with(keeper_system_path))
if (Coordination::matchPath(request.path, keeper_system_path) != Coordination::PathMatchResult::NOT_MATCH)
{
auto error_msg = fmt::format("Trying to delete an internal Keeper path ({}) which is not allowed", request.path);
@ -1203,7 +1203,7 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce
std::vector<KeeperStorage::Delta> new_deltas;
if (request.path.starts_with(keeper_system_path))
if (Coordination::matchPath(request.path, keeper_system_path) != Coordination::PathMatchResult::NOT_MATCH)
{
auto error_msg = fmt::format("Trying to update an internal Keeper path ({}) which is not allowed", request.path);
@ -1472,7 +1472,7 @@ struct KeeperStorageSetACLRequestProcessor final : public KeeperStorageRequestPr
{
Coordination::ZooKeeperSetACLRequest & request = dynamic_cast<Coordination::ZooKeeperSetACLRequest &>(*zk_request);
if (request.path.starts_with(keeper_system_path))
if (Coordination::matchPath(request.path, keeper_system_path) != Coordination::PathMatchResult::NOT_MATCH)
{
auto error_msg = fmt::format("Trying to update an internal Keeper path ({}) which is not allowed", request.path);

View File

@ -2141,6 +2141,38 @@ TEST_P(CoordinationTest, TestCurrentApiVersion)
EXPECT_EQ(keeper_version, static_cast<uint8_t>(current_keeper_api_version));
}
TEST_P(CoordinationTest, TestSystemNodeModify)
{
using namespace Coordination;
int64_t zxid{0};
// On INIT we abort when a system path is modified
keeper_context->server_state = KeeperContext::Phase::RUNNING;
KeeperStorage storage{500, "", keeper_context};
const auto assert_create = [&](const std::string_view path, const auto expected_code)
{
auto request = std::make_shared<ZooKeeperCreateRequest>();
request->path = path;
storage.preprocessRequest(request, 0, 0, zxid);
auto responses = storage.processRequest(request, 0, zxid);
ASSERT_FALSE(responses.empty());
const auto & response = responses[0];
ASSERT_EQ(response.response->error, expected_code) << "Unexpected error for path " << path;
++zxid;
};
assert_create("/keeper", Error::ZBADARGUMENTS);
assert_create("/keeper/with_child", Error::ZBADARGUMENTS);
assert_create(DB::keeper_api_version_path, Error::ZBADARGUMENTS);
assert_create("/keeper_map", Error::ZOK);
assert_create("/keeper1", Error::ZOK);
assert_create("/keepe", Error::ZOK);
assert_create("/keeper1/test", Error::ZOK);
}
INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite,
CoordinationTest,
::testing::ValuesIn(std::initializer_list<CompressionParam>{

View File

@ -213,7 +213,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
\
M(Bool, insert_deduplicate, true, "For INSERT queries in the replicated table, specifies that deduplication of insertings blocks should be performed", 0) \
\
M(UInt64Auto, insert_quorum, 0, "For INSERT queries in the replicated table, wait writing for the specified number of replicas and linearize the addition of the data. 0 - disabled.", 0) \
M(UInt64Auto, insert_quorum, 0, "For INSERT queries in the replicated table, wait writing for the specified number of replicas and linearize the addition of the data. 0 - disabled, 'auto' - use majority", 0) \
M(Milliseconds, insert_quorum_timeout, 600000, "If the quorum of replicas did not meet in specified time (in milliseconds), exception will be thrown and insertion is aborted.", 0) \
M(Bool, insert_quorum_parallel, true, "For quorum INSERT queries - enable to make parallel inserts without linearizability", 0) \
M(UInt64, select_sequential_consistency, 0, "For SELECT queries from the replicated table, throw an exception if the replica does not have a chunk written with the quorum; do not read the parts that have not yet been written with the quorum.", 0) \

View File

@ -84,18 +84,20 @@ void IDataType::forEachSubcolumn(
{
for (size_t i = 0; i < subpath.size(); ++i)
{
if (!subpath[i].visited && ISerialization::hasSubcolumnForPath(subpath, i + 1))
size_t prefix_len = i + 1;
if (!subpath[i].visited && ISerialization::hasSubcolumnForPath(subpath, prefix_len))
{
auto name = ISerialization::getSubcolumnNameForStream(subpath, i + 1);
auto subdata = ISerialization::createFromPath(subpath, i);
auto name = ISerialization::getSubcolumnNameForStream(subpath, prefix_len);
auto subdata = ISerialization::createFromPath(subpath, prefix_len);
callback(subpath, name, subdata);
}
subpath[i].visited = true;
}
};
SubstreamPath path;
data.serialization->enumerateStreams(path, callback_with_data, data);
ISerialization::EnumerateStreamsSettings settings;
settings.position_independent_encoding = false;
data.serialization->enumerateStreams(settings, callback_with_data, data);
}
template <typename Ptr>
@ -118,33 +120,38 @@ Ptr IDataType::getForSubcolumn(
return res;
}
bool IDataType::hasSubcolumn(const String & subcolumn_name) const
{
return tryGetSubcolumnType(subcolumn_name) != nullptr;
}
DataTypePtr IDataType::tryGetSubcolumnType(const String & subcolumn_name) const
{
SubstreamData data = { getDefaultSerialization(), getPtr(), nullptr, nullptr };
auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
return getForSubcolumn<DataTypePtr>(subcolumn_name, data, &SubstreamData::type, false);
}
DataTypePtr IDataType::getSubcolumnType(const String & subcolumn_name) const
{
SubstreamData data = { getDefaultSerialization(), getPtr(), nullptr, nullptr };
auto data = SubstreamData(getDefaultSerialization()).withType(getPtr());
return getForSubcolumn<DataTypePtr>(subcolumn_name, data, &SubstreamData::type, true);
}
ColumnPtr IDataType::tryGetSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const
{
SubstreamData data = { getDefaultSerialization(), nullptr, column, nullptr };
auto data = SubstreamData(getDefaultSerialization()).withColumn(column);
return getForSubcolumn<ColumnPtr>(subcolumn_name, data, &SubstreamData::column, false);
}
ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const ColumnPtr & column) const
{
SubstreamData data = { getDefaultSerialization(), nullptr, column, nullptr };
auto data = SubstreamData(getDefaultSerialization()).withColumn(column);
return getForSubcolumn<ColumnPtr>(subcolumn_name, data, &SubstreamData::column, true);
}
SerializationPtr IDataType::getSubcolumnSerialization(const String & subcolumn_name, const SerializationPtr & serialization) const
{
SubstreamData data = { serialization, nullptr, nullptr, nullptr };
auto data = SubstreamData(serialization);
return getForSubcolumn<SerializationPtr>(subcolumn_name, data, &SubstreamData::serialization, true);
}
@ -154,7 +161,7 @@ Names IDataType::getSubcolumnNames() const
forEachSubcolumn([&](const auto &, const auto & name, const auto &)
{
res.push_back(name);
}, { getDefaultSerialization(), nullptr, nullptr, nullptr });
}, SubstreamData(getDefaultSerialization()));
return res;
}

View File

@ -79,6 +79,8 @@ public:
/// Data type id. It's used for runtime type checks.
virtual TypeIndex getTypeId() const = 0;
bool hasSubcolumn(const String & subcolumn_name) const;
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const;
DataTypePtr getSubcolumnType(const String & subcolumn_name) const;

View File

@ -73,24 +73,24 @@ String ISerialization::SubstreamPath::toString() const
}
void ISerialization::enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const
{
path.push_back(Substream::Regular);
path.back().data = data;
callback(path);
path.pop_back();
settings.path.push_back(Substream::Regular);
settings.path.back().data = data;
callback(settings.path);
settings.path.pop_back();
}
void ISerialization::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
void ISerialization::enumerateStreams(
const StreamCallback & callback,
const DataTypePtr & type,
const ColumnPtr & column) const
{
enumerateStreams(path, callback, {getPtr(), nullptr, nullptr, nullptr});
}
void ISerialization::enumerateStreams(SubstreamPath & path, const StreamCallback & callback, const DataTypePtr & type) const
{
enumerateStreams(path, callback, {getPtr(), type, nullptr, nullptr});
EnumerateStreamsSettings settings;
auto data = SubstreamData(getPtr()).withType(type).withColumn(column);
enumerateStreams(settings, callback, data);
}
void ISerialization::serializeBinaryBulk(const IColumn & column, WriteBuffer &, size_t, size_t) const
@ -184,7 +184,7 @@ String ISerialization::getFileNameForStream(const NameAndTypePair & column, cons
return getFileNameForStream(column.getNameInStorage(), path);
}
static size_t isOffsetsOfNested(const ISerialization::SubstreamPath & path)
bool isOffsetsOfNested(const ISerialization::SubstreamPath & path)
{
if (path.empty())
return false;
@ -287,10 +287,13 @@ bool ISerialization::hasSubcolumnForPath(const SubstreamPath & path, size_t pref
ISerialization::SubstreamData ISerialization::createFromPath(const SubstreamPath & path, size_t prefix_len)
{
assert(prefix_len < path.size());
assert(prefix_len <= path.size());
if (prefix_len == 0)
return {};
SubstreamData res = path[prefix_len].data;
for (ssize_t i = static_cast<ssize_t>(prefix_len) - 1; i >= 0; --i)
ssize_t last_elem = prefix_len - 1;
auto res = path[last_elem].data;
for (ssize_t i = last_elem - 1; i >= 0; --i)
{
const auto & creator = path[i].creator;
if (creator)

View File

@ -101,6 +101,30 @@ public:
struct SubstreamData
{
SubstreamData() = default;
SubstreamData(SerializationPtr serialization_)
: serialization(std::move(serialization_))
{
}
SubstreamData & withType(DataTypePtr type_)
{
type = std::move(type_);
return *this;
}
SubstreamData & withColumn(ColumnPtr column_)
{
column = std::move(column_);
return *this;
}
SubstreamData & withSerializationInfo(SerializationInfoPtr serialization_info_)
{
serialization_info = std::move(serialization_info_);
return *this;
}
SerializationPtr serialization;
DataTypePtr type;
ColumnPtr column;
@ -164,16 +188,22 @@ public:
using StreamCallback = std::function<void(const SubstreamPath &)>;
struct EnumerateStreamsSettings
{
SubstreamPath path;
bool position_independent_encoding = true;
};
virtual void enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const;
void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const;
void enumerateStreams(const StreamCallback & callback, SubstreamPath && path) const { enumerateStreams(callback, path); }
void enumerateStreams(const StreamCallback & callback) const { enumerateStreams(callback, {}); }
void enumerateStreams(SubstreamPath & path, const StreamCallback & callback, const DataTypePtr & type) const;
/// Enumerate streams with default settings.
void enumerateStreams(
const StreamCallback & callback,
const DataTypePtr & type = nullptr,
const ColumnPtr & column = nullptr) const;
using OutputStreamGetter = std::function<WriteBuffer*(const SubstreamPath &)>;
using InputStreamGetter = std::function<ReadBuffer*(const SubstreamPath &)>;
@ -375,4 +405,6 @@ State * ISerialization::checkAndGetState(const StatePtr & state) const
return state_concrete;
}
bool isOffsetsOfNested(const ISerialization::SubstreamPath & path);
}

View File

@ -155,30 +155,30 @@ namespace
return column_offsets;
}
}
ColumnPtr arrayOffsetsToSizes(const IColumn & column)
{
const auto & column_offsets = assert_cast<const ColumnArray::ColumnOffsets &>(column);
MutableColumnPtr column_sizes = column_offsets.cloneEmpty();
if (column_offsets.empty())
return column_sizes;
const auto & offsets_data = column_offsets.getData();
auto & sizes_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_sizes).getData();
sizes_data.resize(offsets_data.size());
IColumn::Offset prev_offset = 0;
for (size_t i = 0, size = offsets_data.size(); i < size; ++i)
ColumnPtr arrayOffsetsToSizes(const IColumn & column)
{
auto current_offset = offsets_data[i];
sizes_data[i] = current_offset - prev_offset;
prev_offset = current_offset;
}
const auto & column_offsets = assert_cast<const ColumnArray::ColumnOffsets &>(column);
MutableColumnPtr column_sizes = column_offsets.cloneEmpty();
return column_sizes;
if (column_offsets.empty())
return column_sizes;
const auto & offsets_data = column_offsets.getData();
auto & sizes_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_sizes).getData();
sizes_data.resize(offsets_data.size());
IColumn::Offset prev_offset = 0;
for (size_t i = 0, size = offsets_data.size(); i < size; ++i)
{
auto current_offset = offsets_data[i];
sizes_data[i] = current_offset - prev_offset;
prev_offset = current_offset;
}
return column_sizes;
}
}
DataTypePtr SerializationArray::SubcolumnCreator::create(const DataTypePtr & prev) const
@ -197,41 +197,42 @@ ColumnPtr SerializationArray::SubcolumnCreator::create(const ColumnPtr & prev) c
}
void SerializationArray::enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const
{
const auto * type_array = data.type ? &assert_cast<const DataTypeArray &>(*data.type) : nullptr;
const auto * column_array = data.column ? &assert_cast<const ColumnArray &>(*data.column) : nullptr;
auto offsets_column = column_array ? column_array->getOffsetsPtr() : nullptr;
auto offsets = column_array ? column_array->getOffsetsPtr() : nullptr;
path.push_back(Substream::ArraySizes);
path.back().data =
{
auto offsets_serialization =
std::make_shared<SerializationNamed>(
std::make_shared<SerializationNumber<UInt64>>(),
"size" + std::to_string(getArrayLevel(path)), false),
data.type ? std::make_shared<DataTypeUInt64>() : nullptr,
offsets_column ? arrayOffsetsToSizes(*offsets_column) : nullptr,
data.serialization_info,
};
"size" + std::to_string(getArrayLevel(settings.path)), false);
callback(path);
auto offsets_column = offsets && !settings.position_independent_encoding
? arrayOffsetsToSizes(*offsets)
: offsets;
path.back() = Substream::ArrayElements;
path.back().data = data;
path.back().creator = std::make_shared<SubcolumnCreator>(offsets_column);
settings.path.push_back(Substream::ArraySizes);
settings.path.back().data = SubstreamData(offsets_serialization)
.withType(type_array ? std::make_shared<DataTypeUInt64>() : nullptr)
.withColumn(std::move(offsets_column))
.withSerializationInfo(data.serialization_info);
SubstreamData next_data =
{
nested,
type_array ? type_array->getNestedType() : nullptr,
column_array ? column_array->getDataPtr() : nullptr,
data.serialization_info,
};
callback(settings.path);
nested->enumerateStreams(path, callback, next_data);
path.pop_back();
settings.path.back() = Substream::ArrayElements;
settings.path.back().data = data;
settings.path.back().creator = std::make_shared<SubcolumnCreator>(offsets);
auto next_data = SubstreamData(nested)
.withType(type_array ? type_array->getNestedType() : nullptr)
.withColumn(column_array ? column_array->getDataPtr() : nullptr)
.withSerializationInfo(data.serialization_info);
nested->enumerateStreams(settings, callback, next_data);
settings.path.pop_back();
}
void SerializationArray::serializeBinaryBulkStatePrefix(

View File

@ -36,7 +36,7 @@ public:
*/
void enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const override;
@ -79,6 +79,4 @@ private:
};
};
ColumnPtr arrayOffsetsToSizes(const IColumn & column);
}

View File

@ -41,30 +41,26 @@ SerializationLowCardinality::SerializationLowCardinality(const DataTypePtr & dic
}
void SerializationLowCardinality::enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const
{
const auto * column_lc = data.column ? &getColumnLowCardinality(*data.column) : nullptr;
SubstreamData dict_data =
{
dict_inner_serialization,
data.type ? dictionary_type : nullptr,
column_lc ? column_lc->getDictionary().getNestedColumn() : nullptr,
data.serialization_info,
};
settings.path.push_back(Substream::DictionaryKeys);
auto dict_data = SubstreamData(dict_inner_serialization)
.withType(data.type ? dictionary_type : nullptr)
.withColumn(column_lc ? column_lc->getDictionary().getNestedColumn() : nullptr)
.withSerializationInfo(data.serialization_info);
path.push_back(Substream::DictionaryKeys);
path.back().data = dict_data;
settings.path.back().data = dict_data;
dict_inner_serialization->enumerateStreams(settings, callback, dict_data);
dict_inner_serialization->enumerateStreams(path, callback, dict_data);
settings.path.back() = Substream::DictionaryIndexes;
settings.path.back().data = data;
path.back() = Substream::DictionaryIndexes;
path.back().data = data;
callback(path);
path.pop_back();
callback(settings.path);
settings.path.pop_back();
}
struct KeysSerializationVersion

View File

@ -18,7 +18,7 @@ public:
explicit SerializationLowCardinality(const DataTypePtr & dictionary_type);
void enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const override;

View File

@ -257,19 +257,16 @@ void SerializationMap::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c
}
void SerializationMap::enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const
{
SubstreamData next_data =
{
nested,
data.type ? assert_cast<const DataTypeMap &>(*data.type).getNestedType() : nullptr,
data.column ? assert_cast<const ColumnMap &>(*data.column).getNestedColumnPtr() : nullptr,
data.serialization_info,
};
auto next_data = SubstreamData(nested)
.withType(data.type ? assert_cast<const DataTypeMap &>(*data.type).getNestedType() : nullptr)
.withColumn(data.column ? assert_cast<const ColumnMap &>(*data.column).getNestedColumnPtr() : nullptr)
.withSerializationInfo(data.serialization_info);
nested->enumerateStreams(path, callback, next_data);
nested->enumerateStreams(settings, callback, next_data);
}
void SerializationMap::serializeBinaryBulkStatePrefix(

View File

@ -32,7 +32,7 @@ public:
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const override;

View File

@ -4,16 +4,16 @@ namespace DB
{
void SerializationNamed::enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const
{
addToPath(path);
path.back().data = data;
path.back().creator = std::make_shared<SubcolumnCreator>(name, escape_delimiter);
addToPath(settings.path);
settings.path.back().data = data;
settings.path.back().creator = std::make_shared<SubcolumnCreator>(name, escape_delimiter);
nested_serialization->enumerateStreams(path, callback, data);
path.pop_back();
nested_serialization->enumerateStreams(settings, callback, data);
settings.path.pop_back();
}
void SerializationNamed::serializeBinaryBulkStatePrefix(

View File

@ -26,7 +26,7 @@ public:
const String & getElementName() const { return name; }
void enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const override;

View File

@ -38,38 +38,35 @@ ColumnPtr SerializationNullable::SubcolumnCreator::create(const ColumnPtr & prev
}
void SerializationNullable::enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const
{
const auto * type_nullable = data.type ? &assert_cast<const DataTypeNullable &>(*data.type) : nullptr;
const auto * column_nullable = data.column ? &assert_cast<const ColumnNullable &>(*data.column) : nullptr;
path.push_back(Substream::NullMap);
path.back().data =
{
std::make_shared<SerializationNamed>(std::make_shared<SerializationNumber<UInt8>>(), "null", false),
type_nullable ? std::make_shared<DataTypeUInt8>() : nullptr,
column_nullable ? column_nullable->getNullMapColumnPtr() : nullptr,
data.serialization_info,
};
auto null_map_serialization = std::make_shared<SerializationNamed>(std::make_shared<SerializationNumber<UInt8>>(), "null", false);
callback(path);
settings.path.push_back(Substream::NullMap);
auto null_map_data = SubstreamData(null_map_serialization)
.withType(type_nullable ? std::make_shared<DataTypeUInt8>() : nullptr)
.withColumn(column_nullable ? column_nullable->getNullMapColumnPtr() : nullptr)
.withSerializationInfo(data.serialization_info);
path.back() = Substream::NullableElements;
path.back().creator = std::make_shared<SubcolumnCreator>(path.back().data.column);
path.back().data = data;
settings.path.back().data = null_map_data;
callback(settings.path);
SubstreamData next_data =
{
nested,
type_nullable ? type_nullable->getNestedType() : nullptr,
column_nullable ? column_nullable->getNestedColumnPtr() : nullptr,
data.serialization_info,
};
settings.path.back() = Substream::NullableElements;
settings.path.back().creator = std::make_shared<SubcolumnCreator>(null_map_data.column);
settings.path.back().data = data;
nested->enumerateStreams(path, callback, next_data);
path.pop_back();
auto next_data = SubstreamData(nested)
.withType(type_nullable ? type_nullable->getNestedType() : nullptr)
.withColumn(column_nullable ? column_nullable->getNestedColumnPtr() : nullptr)
.withSerializationInfo(data.serialization_info);
nested->enumerateStreams(settings, callback, next_data);
settings.path.pop_back();
}
void SerializationNullable::serializeBinaryBulkStatePrefix(

View File

@ -14,7 +14,7 @@ public:
explicit SerializationNullable(const SerializationPtr & nested_) : nested(nested_) {}
void enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const override;

View File

@ -148,39 +148,33 @@ ColumnPtr SerializationSparse::SubcolumnCreator::create(const ColumnPtr & prev)
}
void SerializationSparse::enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const
{
const auto * column_sparse = data.column ? &assert_cast<const ColumnSparse &>(*data.column) : nullptr;
size_t column_size = column_sparse ? column_sparse->size() : 0;
path.push_back(Substream::SparseOffsets);
path.back().data =
{
std::make_shared<SerializationNumber<UInt64>>(),
data.type ? std::make_shared<DataTypeUInt64>() : nullptr,
column_sparse ? column_sparse->getOffsetsPtr() : nullptr,
data.serialization_info,
};
settings.path.push_back(Substream::SparseOffsets);
auto offsets_data = SubstreamData(std::make_shared<SerializationNumber<UInt64>>())
.withType(data.type ? std::make_shared<DataTypeUInt64>() : nullptr)
.withColumn(column_sparse ? column_sparse->getOffsetsPtr() : nullptr)
.withSerializationInfo(data.serialization_info);
callback(path);
settings.path.back().data = offsets_data;
callback(settings.path);
path.back() = Substream::SparseElements;
path.back().creator = std::make_shared<SubcolumnCreator>(path.back().data.column, column_size);
path.back().data = data;
settings.path.back() = Substream::SparseElements;
settings.path.back().creator = std::make_shared<SubcolumnCreator>(offsets_data.column, column_size);
settings.path.back().data = data;
SubstreamData next_data =
{
nested,
data.type,
column_sparse ? column_sparse->getValuesPtr() : nullptr,
data.serialization_info,
};
auto next_data = SubstreamData(nested)
.withType(data.type)
.withColumn(column_sparse ? column_sparse->getValuesPtr() : nullptr)
.withSerializationInfo(data.serialization_info);
nested->enumerateStreams(path, callback, next_data);
path.pop_back();
nested->enumerateStreams(settings, callback, next_data);
settings.path.pop_back();
}
void SerializationSparse::serializeBinaryBulkStatePrefix(

View File

@ -28,7 +28,7 @@ public:
Kind getKind() const override { return Kind::SPARSE; }
virtual void enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const override;

View File

@ -283,7 +283,7 @@ void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
}
void SerializationTuple::enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const
{
@ -293,15 +293,12 @@ void SerializationTuple::enumerateStreams(
for (size_t i = 0; i < elems.size(); ++i)
{
SubstreamData next_data =
{
elems[i],
type_tuple ? type_tuple->getElement(i) : nullptr,
column_tuple ? column_tuple->getColumnPtr(i) : nullptr,
info_tuple ? info_tuple->getElementInfo(i) : nullptr,
};
auto next_data = SubstreamData(elems[i])
.withType(type_tuple ? type_tuple->getElement(i) : nullptr)
.withColumn(column_tuple ? column_tuple->getColumnPtr(i) : nullptr)
.withSerializationInfo(info_tuple ? info_tuple->getElementInfo(i) : nullptr);
elems[i]->enumerateStreams(path, callback, next_data);
elems[i]->enumerateStreams(settings, callback, next_data);
}
}

View File

@ -34,7 +34,7 @@ public:
/** Each sub-column in a tuple is serialized in separate stream.
*/
void enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const override;

View File

@ -5,11 +5,11 @@ namespace DB
{
void SerializationWrapper::enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const
{
nested_serialization->enumerateStreams(path, callback, data);
nested_serialization->enumerateStreams(settings, callback, data);
}
void SerializationWrapper::serializeBinaryBulkStatePrefix(

View File

@ -21,7 +21,7 @@ public:
Kind getKind() const override { return nested_serialization->getKind(); }
void enumerateStreams(
SubstreamPath & path,
EnumerateStreamsSettings & settings,
const StreamCallback & callback,
const SubstreamData & data) const override;

View File

@ -44,15 +44,6 @@ struct AttributeConfiguration
using AttributeNameToConfiguration = std::unordered_map<std::string, AttributeConfiguration>;
/// Get value from field and convert it to string.
/// Also remove quotes from strings.
String getFieldAsString(const Field & field)
{
if (field.getType() == Field::Types::Which::String)
return field.get<String>();
return applyVisitor(FieldVisitorToString(), field);
}
String getAttributeExpression(const ASTDictionaryAttributeDeclaration * dict_attr)
{
if (!dict_attr->expression)
@ -61,7 +52,7 @@ String getAttributeExpression(const ASTDictionaryAttributeDeclaration * dict_att
/// EXPRESSION PROPERTY should be expression or string
String expression_str;
if (const auto * literal = dict_attr->expression->as<ASTLiteral>(); literal && literal->value.getType() == Field::Types::String)
expression_str = getFieldAsString(literal->value);
expression_str = convertFieldToString(literal->value);
else
expression_str = queryToString(dict_attr->expression);
@ -275,7 +266,7 @@ void buildSingleAttribute(
AutoPtr<Element> null_value_element(doc->createElement("null_value"));
String null_value_str;
if (dict_attr->default_value)
null_value_str = getFieldAsString(dict_attr->default_value->as<ASTLiteral>()->value);
null_value_str = convertFieldToString(dict_attr->default_value->as<ASTLiteral>()->value);
AutoPtr<Text> null_value(doc->createTextNode(null_value_str));
null_value_element->appendChild(null_value);
attribute_element->appendChild(null_value_element);
@ -452,7 +443,7 @@ void buildConfigurationFromFunctionWithKeyValueArguments(
}
else if (const auto * literal = pair->second->as<const ASTLiteral>())
{
AutoPtr<Text> value(doc->createTextNode(getFieldAsString(literal->value)));
AutoPtr<Text> value(doc->createTextNode(convertFieldToString(literal->value)));
current_xml_element->appendChild(value);
}
else if (const auto * list = pair->second->as<const ASTExpressionList>())
@ -473,7 +464,7 @@ void buildConfigurationFromFunctionWithKeyValueArguments(
Field value;
result->get(0, value);
AutoPtr<Text> text_value(doc->createTextNode(getFieldAsString(value)));
AutoPtr<Text> text_value(doc->createTextNode(convertFieldToString(value)));
current_xml_element->appendChild(text_value);
}
else
@ -519,7 +510,7 @@ void buildSourceConfiguration(
{
AutoPtr<Element> setting_change_element(doc->createElement(name));
settings_element->appendChild(setting_change_element);
AutoPtr<Text> setting_value(doc->createTextNode(getFieldAsString(value)));
AutoPtr<Text> setting_value(doc->createTextNode(convertFieldToString(value)));
setting_change_element->appendChild(setting_value);
}
}

View File

@ -239,7 +239,16 @@ public:
}
/// For one local path there might be multiple remote paths in case of Log family engines.
using LocalPathWithObjectStoragePaths = std::pair<String, StoredObjects>;
struct LocalPathWithObjectStoragePaths
{
std::string local_path;
std::string common_prefix_for_objects;
StoredObjects objects;
LocalPathWithObjectStoragePaths(
const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_)
: local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)) {}
};
virtual void getRemotePathsRecursive(const String &, std::vector<LocalPathWithObjectStoragePaths> &)
{

View File

@ -127,7 +127,7 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::
{
try
{
paths_map.emplace_back(local_path, getStorageObjects(local_path));
paths_map.emplace_back(local_path, metadata_storage->getObjectStorageRootPath(), getStorageObjects(local_path));
}
catch (const Exception & e)
{
@ -282,7 +282,10 @@ String DiskObjectStorage::getUniqueId(const String & path) const
bool DiskObjectStorage::checkUniqueId(const String & id) const
{
if (!id.starts_with(object_storage_root_path))
{
LOG_DEBUG(log, "Blob with id {} doesn't start with blob storage prefix {}", id, object_storage_root_path);
return false;
}
auto object = StoredObject::create(*object_storage, id, {}, {}, true);
return object_storage->exists(object);

View File

@ -68,6 +68,14 @@ void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf)
}
}
void DiskObjectStorageMetadata::createFromSingleObject(const std::string & relative_path, size_t bytes_size, size_t ref_count_, bool read_only_)
{
storage_objects.emplace_back(relative_path, bytes_size);
total_size = bytes_size;
ref_count = ref_count_;
read_only = read_only_;
}
void DiskObjectStorageMetadata::deserializeFromString(const std::string & data)
{
ReadBufferFromString buf(data);

View File

@ -50,6 +50,7 @@ public:
void deserialize(ReadBuffer & buf);
void deserializeFromString(const std::string & data);
void createFromSingleObject(const std::string & relative_path, size_t bytes_size, size_t ref_count_, bool is_read_only_);
void serialize(WriteBuffer & buf, bool sync) const;
std::string serializeToString() const;

View File

@ -56,7 +56,7 @@ void throwIfError(const Aws::Utils::Outcome<Result, Error> & response)
if (!response.IsSuccess())
{
const auto & err = response.GetError();
throw Exception(ErrorCodes::S3_ERROR, "{} (Code: {})", err.GetMessage(), static_cast<size_t>(err.GetErrorType()));
throw S3Exception(fmt::format("{} (Code: {})", err.GetMessage(), static_cast<size_t>(err.GetErrorType())), err.GetErrorType());
}
}
@ -70,7 +70,7 @@ void throwIfUnexpectedError(const Aws::Utils::Outcome<Result, Error> & response,
if (!response.IsSuccess() && (!if_exists || !isNotFoundError(response.GetError().GetErrorType())))
{
const auto & err = response.GetError();
throw Exception(ErrorCodes::S3_ERROR, "{} (Code: {})", err.GetMessage(), static_cast<size_t>(err.GetErrorType()));
throw S3Exception(err.GetErrorType(), "{} (Code: {})", err.GetMessage(), static_cast<size_t>(err.GetErrorType()));
}
}

View File

@ -116,7 +116,8 @@ std::unique_ptr<Aws::S3::S3Client> getClient(const Poco::Util::AbstractConfigura
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
config.getString(config_prefix + ".region", ""),
context->getRemoteHostFilter(), context->getGlobalContext()->getSettingsRef().s3_max_redirects,
context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging);
context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
/* for_disk_s3 = */ true);
S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint")));
if (uri.key.back() != '/')

View File

@ -34,6 +34,7 @@ namespace ErrorCodes
extern const int CANNOT_SEEK_THROUGH_FILE;
extern const int SEEK_POSITION_OUT_OF_BOUND;
extern const int LOGICAL_ERROR;
extern const int CANNOT_ALLOCATE_MEMORY;
}
@ -136,6 +137,23 @@ bool ReadBufferFromS3::nextImpl()
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Microseconds, watch.elapsedMicroseconds());
ProfileEvents::increment(ProfileEvents::ReadBufferFromS3RequestsErrors, 1);
if (const auto * s3_exception = dynamic_cast<const S3Exception *>(&e))
{
/// It doesn't make sense to retry Access Denied or No Such Key
if (!s3_exception->isRetryableError())
{
tryLogCurrentException(log, fmt::format("while reading key: {}, from bucket: {}", key, bucket));
throw;
}
}
/// It doesn't make sense to retry allocator errors
if (e.code() == ErrorCodes::CANNOT_ALLOCATE_MEMORY)
{
tryLogCurrentException(log);
throw;
}
LOG_DEBUG(
log,
"Caught exception while reading S3 object. Bucket: {}, Key: {}, Version: {}, Offset: {}, Attempt: {}, Message: {}",
@ -306,7 +324,10 @@ std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
return std::make_unique<ReadBufferFromIStream>(read_result.GetBody(), buffer_size);
}
else
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
{
const auto & error = outcome.GetError();
throw S3Exception(error.GetMessage(), error.GetErrorType());
}
}
SeekableReadBufferPtr ReadBufferS3Factory::getReader()

View File

@ -42,6 +42,18 @@ namespace ProfileEvents
extern const Event S3WriteRequestsErrors;
extern const Event S3WriteRequestsThrottling;
extern const Event S3WriteRequestsRedirects;
extern const Event DiskS3ReadMicroseconds;
extern const Event DiskS3ReadRequestsCount;
extern const Event DiskS3ReadRequestsErrors;
extern const Event DiskS3ReadRequestsThrottling;
extern const Event DiskS3ReadRequestsRedirects;
extern const Event DiskS3WriteMicroseconds;
extern const Event DiskS3WriteRequestsCount;
extern const Event DiskS3WriteRequestsErrors;
extern const Event DiskS3WriteRequestsThrottling;
extern const Event DiskS3WriteRequestsRedirects;
}
namespace CurrentMetrics
@ -62,11 +74,13 @@ PocoHTTPClientConfiguration::PocoHTTPClientConfiguration(
const String & force_region_,
const RemoteHostFilter & remote_host_filter_,
unsigned int s3_max_redirects_,
bool enable_s3_requests_logging_)
bool enable_s3_requests_logging_,
bool for_disk_s3_)
: force_region(force_region_)
, remote_host_filter(remote_host_filter_)
, s3_max_redirects(s3_max_redirects_)
, enable_s3_requests_logging(enable_s3_requests_logging_)
, for_disk_s3(for_disk_s3_)
{
}
@ -112,6 +126,7 @@ PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_config
, remote_host_filter(client_configuration.remote_host_filter)
, s3_max_redirects(client_configuration.s3_max_redirects)
, enable_s3_requests_logging(client_configuration.enable_s3_requests_logging)
, for_disk_s3(client_configuration.for_disk_s3)
, extra_headers(client_configuration.extra_headers)
{
}
@ -176,6 +191,46 @@ namespace
}
}
PocoHTTPClient::S3MetricKind PocoHTTPClient::getMetricKind(const Aws::Http::HttpRequest & request)
{
switch (request.GetMethod())
{
case Aws::Http::HttpMethod::HTTP_GET:
case Aws::Http::HttpMethod::HTTP_HEAD:
return S3MetricKind::Read;
case Aws::Http::HttpMethod::HTTP_POST:
case Aws::Http::HttpMethod::HTTP_DELETE:
case Aws::Http::HttpMethod::HTTP_PUT:
case Aws::Http::HttpMethod::HTTP_PATCH:
return S3MetricKind::Write;
}
throw Exception("Unsupported request method", ErrorCodes::NOT_IMPLEMENTED);
}
void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount) const
{
const ProfileEvents::Event events_map[static_cast<size_t>(S3MetricType::EnumSize)][static_cast<size_t>(S3MetricKind::EnumSize)] = {
{ProfileEvents::S3ReadMicroseconds, ProfileEvents::S3WriteMicroseconds},
{ProfileEvents::S3ReadRequestsCount, ProfileEvents::S3WriteRequestsCount},
{ProfileEvents::S3ReadRequestsErrors, ProfileEvents::S3WriteRequestsErrors},
{ProfileEvents::S3ReadRequestsThrottling, ProfileEvents::S3WriteRequestsThrottling},
{ProfileEvents::S3ReadRequestsRedirects, ProfileEvents::S3WriteRequestsRedirects},
};
const ProfileEvents::Event disk_s3_events_map[static_cast<size_t>(S3MetricType::EnumSize)][static_cast<size_t>(S3MetricKind::EnumSize)] = {
{ProfileEvents::DiskS3ReadMicroseconds, ProfileEvents::DiskS3WriteMicroseconds},
{ProfileEvents::DiskS3ReadRequestsCount, ProfileEvents::DiskS3WriteRequestsCount},
{ProfileEvents::DiskS3ReadRequestsErrors, ProfileEvents::DiskS3WriteRequestsErrors},
{ProfileEvents::DiskS3ReadRequestsThrottling, ProfileEvents::DiskS3WriteRequestsThrottling},
{ProfileEvents::DiskS3ReadRequestsRedirects, ProfileEvents::DiskS3WriteRequestsRedirects},
};
S3MetricKind kind = getMetricKind(request);
ProfileEvents::increment(events_map[static_cast<unsigned int>(type)][static_cast<unsigned int>(kind)], amount);
if (for_disk_s3)
ProfileEvents::increment(disk_s3_events_map[static_cast<unsigned int>(type)][static_cast<unsigned int>(kind)], amount);
}
void PocoHTTPClient::makeRequestInternal(
Aws::Http::HttpRequest & request,
@ -189,45 +244,7 @@ void PocoHTTPClient::makeRequestInternal(
if (enable_s3_requests_logging)
LOG_TEST(log, "Make request to: {}", uri);
enum class S3MetricType
{
Microseconds,
Count,
Errors,
Throttling,
Redirects,
EnumSize,
};
auto select_metric = [&request](S3MetricType type)
{
const ProfileEvents::Event events_map[][2] = {
{ProfileEvents::S3ReadMicroseconds, ProfileEvents::S3WriteMicroseconds},
{ProfileEvents::S3ReadRequestsCount, ProfileEvents::S3WriteRequestsCount},
{ProfileEvents::S3ReadRequestsErrors, ProfileEvents::S3WriteRequestsErrors},
{ProfileEvents::S3ReadRequestsThrottling, ProfileEvents::S3WriteRequestsThrottling},
{ProfileEvents::S3ReadRequestsRedirects, ProfileEvents::S3WriteRequestsRedirects},
};
static_assert((sizeof(events_map) / sizeof(events_map[0])) == static_cast<unsigned int>(S3MetricType::EnumSize));
switch (request.GetMethod())
{
case Aws::Http::HttpMethod::HTTP_GET:
case Aws::Http::HttpMethod::HTTP_HEAD:
return events_map[static_cast<unsigned int>(type)][0]; // Read
case Aws::Http::HttpMethod::HTTP_POST:
case Aws::Http::HttpMethod::HTTP_DELETE:
case Aws::Http::HttpMethod::HTTP_PUT:
case Aws::Http::HttpMethod::HTTP_PATCH:
return events_map[static_cast<unsigned int>(type)][1]; // Write
}
throw Exception("Unsupported request method", ErrorCodes::NOT_IMPLEMENTED);
};
ProfileEvents::increment(select_metric(S3MetricType::Count));
addMetric(request, S3MetricType::Count);
CurrentMetrics::Increment metric_increment{CurrentMetrics::S3Requests};
try
@ -334,7 +351,7 @@ void PocoHTTPClient::makeRequestInternal(
auto & response_body_stream = session->receiveResponse(poco_response);
watch.stop();
ProfileEvents::increment(select_metric(S3MetricType::Microseconds), watch.elapsedMicroseconds());
addMetric(request, S3MetricType::Microseconds, watch.elapsedMicroseconds());
int status_code = static_cast<int>(poco_response.getStatus());
@ -349,7 +366,7 @@ void PocoHTTPClient::makeRequestInternal(
if (enable_s3_requests_logging)
LOG_TEST(log, "Redirecting request to new location: {}", location);
ProfileEvents::increment(select_metric(S3MetricType::Redirects));
addMetric(request, S3MetricType::Redirects);
continue;
}
@ -387,7 +404,7 @@ void PocoHTTPClient::makeRequestInternal(
LOG_WARNING(log, "Response for request contain <Error> tag in body, settings internal server error (500 code)");
response->SetResponseCode(Aws::Http::HttpResponseCode::INTERNAL_SERVER_ERROR);
ProfileEvents::increment(select_metric(S3MetricType::Errors));
addMetric(request, S3MetricType::Errors);
if (error_report)
error_report(request_configuration);
@ -401,11 +418,11 @@ void PocoHTTPClient::makeRequestInternal(
if (status_code == 429 || status_code == 503)
{ // API throttling
ProfileEvents::increment(select_metric(S3MetricType::Throttling));
addMetric(request, S3MetricType::Throttling);
}
else if (status_code >= 300)
{
ProfileEvents::increment(select_metric(S3MetricType::Errors));
addMetric(request, S3MetricType::Errors);
if (status_code >= 500 && error_report)
error_report(request_configuration);
}
@ -423,7 +440,7 @@ void PocoHTTPClient::makeRequestInternal(
response->SetClientErrorType(Aws::Client::CoreErrors::NETWORK_CONNECTION);
response->SetClientErrorMessage(getCurrentExceptionMessage(false));
ProfileEvents::increment(select_metric(S3MetricType::Errors));
addMetric(request, S3MetricType::Errors);
}
}

View File

@ -44,6 +44,7 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration
const RemoteHostFilter & remote_host_filter;
unsigned int s3_max_redirects;
bool enable_s3_requests_logging;
bool for_disk_s3;
HeaderCollection extra_headers;
void updateSchemeAndRegion();
@ -55,7 +56,8 @@ private:
const String & force_region_,
const RemoteHostFilter & remote_host_filter_,
unsigned int s3_max_redirects_,
bool enable_s3_requests_logging_
bool enable_s3_requests_logging_,
bool for_disk_s3_
);
/// Constructor of Aws::Client::ClientConfiguration must be called after AWS SDK initialization.
@ -113,18 +115,42 @@ public:
Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const override;
private:
void makeRequestInternal(
Aws::Http::HttpRequest & request,
std::shared_ptr<PocoHTTPResponse> & response,
Aws::Utils::RateLimits::RateLimiterInterface * readLimiter,
Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const;
enum class S3MetricType
{
Microseconds,
Count,
Errors,
Throttling,
Redirects,
EnumSize,
};
enum class S3MetricKind
{
Read,
Write,
EnumSize,
};
static S3MetricKind getMetricKind(const Aws::Http::HttpRequest & request);
void addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount = 1) const;
std::function<ClientConfigurationPerRequest(const Aws::Http::HttpRequest &)> per_request_configuration;
std::function<void(const ClientConfigurationPerRequest &)> error_report;
ConnectionTimeouts timeouts;
const RemoteHostFilter & remote_host_filter;
unsigned int s3_max_redirects;
bool enable_s3_requests_logging;
bool for_disk_s3;
const HeaderCollection extra_headers;
};

View File

@ -87,7 +87,8 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeaders)
region,
remote_host_filter,
s3_max_redirects,
enable_s3_requests_logging
enable_s3_requests_logging,
/* for_disk_s3 = */ false
);
client_configuration.endpointOverride = uri.endpoint;

View File

@ -35,6 +35,26 @@
# include <fstream>
namespace DB
{
bool S3Exception::isRetryableError() const
{
/// Looks like these list is quite conservative, add more codes if you wish
static const std::unordered_set<Aws::S3::S3Errors> unretryable_errors = {
Aws::S3::S3Errors::NO_SUCH_KEY,
Aws::S3::S3Errors::ACCESS_DENIED,
Aws::S3::S3Errors::INVALID_ACCESS_KEY_ID,
Aws::S3::S3Errors::INVALID_SIGNATURE,
Aws::S3::S3Errors::NO_SUCH_UPLOAD,
Aws::S3::S3Errors::NO_SUCH_BUCKET,
};
return !unretryable_errors.contains(code);
}
}
namespace
{
@ -543,7 +563,7 @@ public:
/// AWS API tries credentials providers one by one. Some of providers (like ProfileConfigFileAWSCredentialsProvider) can be
/// quite verbose even if nobody configured them. So we use our provider first and only after it use default providers.
{
DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging);
DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging, configuration.for_disk_s3);
AddProvider(std::make_shared<AwsAuthSTSAssumeRoleWebIdentityCredentialsProvider>(aws_client_configuration));
}
@ -580,7 +600,7 @@ public:
}
else if (Aws::Utils::StringUtils::ToLower(ec2_metadata_disabled.c_str()) != "true")
{
DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging);
DB::S3::PocoHTTPClientConfiguration aws_client_configuration = DB::S3::ClientFactory::instance().createClientConfiguration(configuration.region, configuration.remote_host_filter, configuration.s3_max_redirects, configuration.enable_s3_requests_logging, configuration.for_disk_s3);
/// See MakeDefaultHttpResourceClientConfiguration().
/// This is part of EC2 metadata client, but unfortunately it can't be accessed from outside
@ -700,9 +720,10 @@ namespace S3
const String & force_region,
const RemoteHostFilter & remote_host_filter,
unsigned int s3_max_redirects,
bool enable_s3_requests_logging)
bool enable_s3_requests_logging,
bool for_disk_s3)
{
return PocoHTTPClientConfiguration(force_region, remote_host_filter, s3_max_redirects, enable_s3_requests_logging);
return PocoHTTPClientConfiguration(force_region, remote_host_filter, s3_max_redirects, enable_s3_requests_logging, for_disk_s3);
}
URI::URI(const Poco::URI & uri_)

View File

@ -7,23 +7,62 @@
#include <base/types.h>
#include <aws/core/Aws.h>
#include <aws/core/client/ClientConfiguration.h>
#include <aws/s3/S3Errors.h>
#include <IO/S3/PocoHTTPClient.h>
#include <Poco/URI.h>
#include <Common/Exception.h>
namespace Aws::S3
{
class S3Client;
}
namespace DB
{
class RemoteHostFilter;
struct HttpHeader;
using HeaderCollection = std::vector<HttpHeader>;
namespace ErrorCodes
{
extern const int S3_ERROR;
}
class RemoteHostFilter;
struct HttpHeader;
using HeaderCollection = std::vector<HttpHeader>;
class S3Exception : public Exception
{
public:
// Format message with fmt::format, like the logging functions.
template <typename... Args>
S3Exception(Aws::S3::S3Errors code_, fmt::format_string<Args...> fmt, Args &&... args)
: Exception(fmt::format(fmt, std::forward<Args>(args)...), ErrorCodes::S3_ERROR)
, code(code_)
{
}
S3Exception(const std::string & msg, Aws::S3::S3Errors code_)
: Exception(msg, ErrorCodes::S3_ERROR)
, code(code_)
{}
Aws::S3::S3Errors getS3ErrorCode() const
{
return code;
}
bool isRetryableError() const;
private:
const Aws::S3::S3Errors code;
};
}
namespace DB::S3
{
class ClientFactory
{
public:
@ -45,7 +84,8 @@ public:
const String & force_region,
const RemoteHostFilter & remote_host_filter,
unsigned int s3_max_redirects,
bool enable_s3_requests_logging);
bool enable_s3_requests_logging,
bool for_disk_s3);
private:
ClientFactory();

View File

@ -8,6 +8,7 @@
#include <IO/WriteBufferFromS3.h>
#include <IO/WriteHelpers.h>
#include <IO/S3Common.h>
#include <Interpreters/Context.h>
#include <aws/s3/S3Client.h>
@ -173,7 +174,9 @@ void WriteBufferFromS3::finalizeImpl()
auto response = client_ptr->HeadObject(request);
if (!response.IsSuccess())
throw Exception(ErrorCodes::S3_ERROR, "Object {} from bucket {} disappeared immediately after upload, it's a bug in S3 or S3 API.", key, bucket);
throw S3Exception(fmt::format("Object {} from bucket {} disappeared immediately after upload, it's a bug in S3 or S3 API.", key, bucket), response.GetError().GetErrorType());
else
LOG_TRACE(log, "Object {} exists after upload", key);
}
}
@ -197,7 +200,7 @@ void WriteBufferFromS3::createMultipartUpload()
LOG_TRACE(log, "Multipart upload has created. Bucket: {}, Key: {}, Upload id: {}", bucket, key, multipart_upload_id);
}
else
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
}
void WriteBufferFromS3::writePart()
@ -309,7 +312,7 @@ void WriteBufferFromS3::processUploadRequest(UploadPartTask & task)
LOG_TRACE(log, "Writing part finished. Bucket: {}, Key: {}, Upload_id: {}, Etag: {}, Parts: {}", bucket, key, multipart_upload_id, task.tag, part_tags.size());
}
else
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
total_parts_uploaded++;
}
@ -343,9 +346,10 @@ void WriteBufferFromS3::completeMultipartUpload()
LOG_TRACE(log, "Multipart upload has completed. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, tags.size());
else
{
throw Exception(ErrorCodes::S3_ERROR, "{} Tags:{}",
outcome.GetError().GetMessage(),
fmt::join(tags.begin(), tags.end(), " "));
throw S3Exception(
outcome.GetError().GetErrorType(),
"Message: {}, Key: {}, Bucket: {}, Tags: {}",
outcome.GetError().GetMessage(), key, bucket, fmt::join(tags.begin(), tags.end(), " "));
}
}
@ -430,7 +434,10 @@ void WriteBufferFromS3::processPutRequest(const PutObjectTask & task)
if (outcome.IsSuccess())
LOG_TRACE(log, "Single part upload has completed. Bucket: {}, Key: {}, Object size: {}, WithPool: {}", bucket, key, task.req.GetContentLength(), with_pool);
else
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
throw S3Exception(
outcome.GetError().GetErrorType(),
"Message: {}, Key: {}, Bucket: {}, Object size: {}, WithPool: {}",
outcome.GetError().GetMessage(), key, bucket, task.req.GetContentLength(), with_pool);
}
void WriteBufferFromS3::waitForReadyBackGroundTasks()

View File

@ -31,7 +31,7 @@ void FileCacheSettings::loadFromConfig(const Poco::Util::AbstractConfiguration &
enable_filesystem_query_cache_limit = config.getUInt64(config_prefix + ".enable_filesystem_query_cache_limit", false);
enable_cache_hits_threshold = config.getUInt64(config_prefix + ".enable_cache_hits_threshold", REMOTE_FS_OBJECTS_CACHE_ENABLE_HITS_THRESHOLD);
do_not_evict_index_and_mark_files = config.getUInt64(config_prefix + ".do_not_evict_index_and_mark_files", true);
do_not_evict_index_and_mark_files = config.getUInt64(config_prefix + ".do_not_evict_index_and_mark_files", false);
}
}

View File

@ -21,7 +21,6 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int TABLE_IS_READ_ONLY;
extern const int SUPPORT_IS_DISABLED;
}
@ -34,11 +33,6 @@ InterpreterDeleteQuery::InterpreterDeleteQuery(const ASTPtr & query_ptr_, Contex
BlockIO InterpreterDeleteQuery::execute()
{
if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete)
{
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is experimental. Set `allow_experimental_lightweight_delete` setting to enable it");
}
FunctionNameNormalizer().visit(query_ptr.get());
const ASTDeleteQuery & delete_query = query_ptr->as<ASTDeleteQuery &>();
auto table_id = getContext()->resolveStorageID(delete_query, Context::ResolveOrdinary);
@ -49,10 +43,6 @@ BlockIO InterpreterDeleteQuery::execute()
/// First check table storage for validations.
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext());
auto merge_tree = std::dynamic_pointer_cast<MergeTreeData>(table);
if (!merge_tree)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only MergeTree family tables are supported");
checkStorageSupportsTransactionsIfNeeded(table, getContext());
if (table->isStaticStorage())
throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only");
@ -69,6 +59,27 @@ BlockIO InterpreterDeleteQuery::execute()
auto table_lock = table->lockForShare(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout);
auto metadata_snapshot = table->getInMemoryMetadataPtr();
auto merge_tree = std::dynamic_pointer_cast<MergeTreeData>(table);
if (!merge_tree)
{
/// Convert to MutationCommand
MutationCommands mutation_commands;
MutationCommand mut_command;
mut_command.type = MutationCommand::Type::DELETE;
mut_command.predicate = delete_query.predicate;
mutation_commands.emplace_back(mut_command);
table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef());
MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate();
table->mutate(mutation_commands, getContext());
return {};
}
if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is experimental. Set `allow_experimental_lightweight_delete` setting to enable it");
/// Convert to MutationCommand
MutationCommands mutation_commands;
MutationCommand mut_command;

View File

@ -163,7 +163,7 @@ BlockIO InterpreterDescribeQuery::execute()
res_columns[6]->insertDefault();
res_columns[7]->insert(1u);
}, { type->getDefaultSerialization(), type, nullptr, nullptr });
}, ISerialization::SubstreamData(type->getDefaultSerialization()).withType(type));
}
}

View File

@ -226,7 +226,7 @@ bool isStorageTouchedByMutations(
ASTPtr select_query = prepareQueryAffectedAST(commands, storage, context_copy);
/// Interpreter must be alive, when we use result of execute() method.
/// For some reason it may copy context and and give it into ExpressionTransform
/// For some reason it may copy context and give it into ExpressionTransform
/// after that we will use context from destroyed stack frame in our stream.
InterpreterSelectQuery interpreter(
select_query, context_copy, storage, metadata_snapshot, SelectQueryOptions().ignoreLimits().ignoreProjections());
@ -288,13 +288,17 @@ MutationsInterpreter::MutationsInterpreter(
const StorageMetadataPtr & metadata_snapshot_,
MutationCommands commands_,
ContextPtr context_,
bool can_execute_)
bool can_execute_,
bool return_all_columns_,
bool return_deleted_rows_)
: storage(std::move(storage_))
, metadata_snapshot(metadata_snapshot_)
, commands(std::move(commands_))
, context(Context::createCopy(context_))
, can_execute(can_execute_)
, select_limits(SelectQueryOptions().analyze(!can_execute).ignoreLimits().ignoreProjections())
, return_all_columns(return_all_columns_)
, return_deleted_rows(return_deleted_rows_)
{
mutation_ast = prepare(!can_execute);
}
@ -472,14 +476,21 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
/// First, break a sequence of commands into stages.
for (auto & command : commands)
{
// we can return deleted rows only if it's the only present command
assert(command.type == MutationCommand::DELETE || !return_deleted_rows);
if (command.type == MutationCommand::DELETE)
{
mutation_kind.set(MutationKind::MUTATE_OTHER);
if (stages.empty() || !stages.back().column_to_updated.empty())
stages.emplace_back(context);
auto negated_predicate = makeASTFunction("isZeroOrNull", getPartitionAndPredicateExpressionForMutationCommand(command));
stages.back().filters.push_back(negated_predicate);
auto predicate = getPartitionAndPredicateExpressionForMutationCommand(command);
if (!return_deleted_rows)
predicate = makeASTFunction("isZeroOrNull", predicate);
stages.back().filters.push_back(predicate);
}
else if (command.type == MutationCommand::UPDATE)
{
@ -789,7 +800,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
/// Next, for each stage calculate columns changed by this and previous stages.
for (size_t i = 0; i < prepared_stages.size(); ++i)
{
if (!prepared_stages[i].filters.empty())
if (return_all_columns || !prepared_stages[i].filters.empty())
{
for (const auto & column : all_columns)
prepared_stages[i].output_columns.insert(column.name);

View File

@ -43,7 +43,9 @@ public:
const StorageMetadataPtr & metadata_snapshot_,
MutationCommands commands_,
ContextPtr context_,
bool can_execute_);
bool can_execute_,
bool return_all_columns_ = false,
bool return_deleted_rows_ = false);
void validate();
@ -156,6 +158,12 @@ private:
/// Columns, that we need to read for calculation of skip indices, projections or TTL expressions.
ColumnDependencies dependencies;
// whether all columns should be returned, not just updated
bool return_all_columns;
// whether we should return deleted or nondeleted rows on DELETE mutation
bool return_deleted_rows;
};
}

View File

@ -12,6 +12,7 @@
#include <Parsers/ASTFunction.h>
#include <utility>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/ObjectUtils.h>
#include <Interpreters/RequiredSourceColumnsVisitor.h>
#include <Common/checkStackSize.h>
#include <Storages/ColumnsDescription.h>
@ -187,29 +188,56 @@ ActionsDAGPtr evaluateMissingDefaults(
return createExpressions(header, expr_list, save_unneeded_columns, context);
}
static bool arrayHasNoElementsRead(const IColumn & column)
static std::unordered_map<String, ColumnPtr> collectOffsetsColumns(
const NamesAndTypesList & available_columns, const Columns & res_columns)
{
const auto * column_array = typeid_cast<const ColumnArray *>(&column);
std::unordered_map<String, ColumnPtr> offsets_columns;
if (!column_array)
return false;
auto available_column = available_columns.begin();
for (size_t i = 0; i < available_columns.size(); ++i, ++available_column)
{
if (res_columns[i] == nullptr || isColumnConst(*res_columns[i]))
continue;
size_t size = column_array->size();
if (!size)
return false;
auto serialization = IDataType::getSerialization(*available_column);
serialization->enumerateStreams([&](const auto & subpath)
{
if (subpath.empty() || subpath.back().type != ISerialization::Substream::ArraySizes)
return;
size_t data_size = column_array->getData().size();
if (data_size)
return false;
auto stream_name = ISerialization::getFileNameForStream(*available_column, subpath);
const auto & current_offsets_column = subpath.back().data.column;
size_t last_offset = column_array->getOffsets()[size - 1];
return last_offset != 0;
/// If for some reason multiple offsets columns are present
/// for the same nested data structure, choose the one that is not empty.
if (current_offsets_column && !current_offsets_column->empty())
{
auto & offsets_column = offsets_columns[stream_name];
if (!offsets_column)
offsets_column = current_offsets_column;
#ifndef NDEBUG
const auto & offsets_data = assert_cast<const ColumnUInt64 &>(*offsets_column).getData();
const auto & current_offsets_data = assert_cast<const ColumnUInt64 &>(*current_offsets_column).getData();
if (offsets_data != current_offsets_data)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Found non-equal columns with offsets (sizes: {} and {}) for stream {}",
offsets_data.size(), current_offsets_data.size(), stream_name);
#endif
}
}, available_column->type, res_columns[i]);
}
return offsets_columns;
}
void fillMissingColumns(
Columns & res_columns,
size_t num_rows,
const NamesAndTypesList & requested_columns,
const NamesAndTypesList & available_columns,
const NameSet & partially_read_columns,
StorageMetadataPtr metadata_snapshot)
{
size_t num_columns = requested_columns.size();
@ -218,65 +246,79 @@ void fillMissingColumns(
"Invalid number of columns passed to fillMissingColumns. Expected {}, got {}",
num_columns, res_columns.size());
/// For a missing column of a nested data structure we must create not a column of empty
/// arrays, but a column of arrays of correct length.
/// For a missing column of a nested data structure
/// we must create not a column of empty arrays,
/// but a column of arrays of correct length.
/// First, collect offset columns for all arrays in the block.
auto offsets_columns = collectOffsetsColumns(available_columns, res_columns);
std::unordered_map<String, ColumnPtr> offset_columns;
/// Insert default values only for columns without default expressions.
auto requested_column = requested_columns.begin();
for (size_t i = 0; i < num_columns; ++i, ++requested_column)
{
if (res_columns[i] == nullptr)
continue;
if (const auto * array = typeid_cast<const ColumnArray *>(res_columns[i].get()))
{
String offsets_name = Nested::extractTableName(requested_column->name);
auto & offsets_column = offset_columns[offsets_name];
/// If for some reason multiple offsets columns are present for the same nested data structure,
/// choose the one that is not empty.
if (!offsets_column || offsets_column->empty())
offsets_column = array->getOffsetsPtr();
}
}
/// insert default values only for columns without default expressions
requested_column = requested_columns.begin();
for (size_t i = 0; i < num_columns; ++i, ++requested_column)
{
const auto & [name, type] = *requested_column;
if (res_columns[i] && arrayHasNoElementsRead(*res_columns[i]))
if (res_columns[i] && partially_read_columns.contains(name))
res_columns[i] = nullptr;
if (res_columns[i] == nullptr)
if (res_columns[i])
continue;
if (metadata_snapshot && metadata_snapshot->getColumns().hasDefault(name))
continue;
std::vector<ColumnPtr> current_offsets;
size_t num_dimensions = 0;
const auto * array_type = typeid_cast<const DataTypeArray *>(type.get());
if (array_type && !offsets_columns.empty())
{
if (metadata_snapshot && metadata_snapshot->getColumns().hasDefault(name))
continue;
num_dimensions = getNumberOfDimensions(*array_type);
current_offsets.resize(num_dimensions);
String offsets_name = Nested::extractTableName(name);
auto offset_it = offset_columns.find(offsets_name);
const auto * array_type = typeid_cast<const DataTypeArray *>(type.get());
if (offset_it != offset_columns.end() && array_type)
auto serialization = IDataType::getSerialization(*requested_column);
serialization->enumerateStreams([&](const auto & subpath)
{
const auto & nested_type = array_type->getNestedType();
ColumnPtr offsets_column = offset_it->second;
size_t nested_rows = typeid_cast<const ColumnUInt64 &>(*offsets_column).getData().back();
if (subpath.empty() || subpath.back().type != ISerialization::Substream::ArraySizes)
return;
ColumnPtr nested_column =
nested_type->createColumnConstWithDefaultValue(nested_rows)->convertToFullColumnIfConst();
size_t level = ISerialization::getArrayLevel(subpath);
assert(level < num_dimensions);
res_columns[i] = ColumnArray::create(nested_column, offsets_column);
}
else
auto stream_name = ISerialization::getFileNameForStream(*requested_column, subpath);
auto it = offsets_columns.find(stream_name);
if (it != offsets_columns.end())
current_offsets[level] = it->second;
});
for (size_t j = 0; j < num_dimensions; ++j)
{
/// We must turn a constant column into a full column because the interpreter could infer
/// that it is constant everywhere but in some blocks (from other parts) it can be a full column.
res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst();
if (!current_offsets[j])
{
current_offsets.resize(j);
break;
}
}
}
if (!current_offsets.empty())
{
size_t num_empty_dimensions = num_dimensions - current_offsets.size();
auto scalar_type = createArrayOfType(getBaseTypeOfArray(type), num_empty_dimensions);
size_t data_size = assert_cast<const ColumnUInt64 &>(*current_offsets.back()).getData().back();
res_columns[i] = scalar_type->createColumnConstWithDefaultValue(data_size)->convertToFullColumnIfConst();
for (auto it = current_offsets.rbegin(); it != current_offsets.rend(); ++it)
res_columns[i] = ColumnArray::create(res_columns[i], *it);
}
else
{
/// We must turn a constant column into a full column because the interpreter could infer
/// that it is constant everywhere but in some blocks (from other parts) it can be a full column.
res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst();
}
}
}

View File

@ -1,5 +1,6 @@
#pragma once
#include <Core/Names.h>
#include <Interpreters/Context_fwd.h>
#include <Common/COW.h>
@ -43,6 +44,8 @@ void fillMissingColumns(
Columns & res_columns,
size_t num_rows,
const NamesAndTypesList & requested_columns,
const NamesAndTypesList & available_columns,
const NameSet & partially_read_columns,
StorageMetadataPtr metadata_snapshot);
}

View File

@ -224,8 +224,6 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
select_query->group_by_with_rollup = true;
else if (s_cube.ignore(pos, expected))
select_query->group_by_with_cube = true;
else if (s_grouping_sets.ignore(pos, expected))
select_query->group_by_with_grouping_sets = true;
else if (s_totals.ignore(pos, expected))
select_query->group_by_with_totals = true;
else

View File

@ -251,14 +251,17 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B
outputs.push_back(grouping_node);
const auto & missing_columns = grouping_sets_params[set_counter].missing_keys;
const auto & used_keys = grouping_sets_params[set_counter].used_keys;
auto to_nullable_function = FunctionFactory::instance().get("toNullable", nullptr);
for (size_t i = 0; i < output_header.columns(); ++i)
{
auto & col = output_header.getByPosition(i);
const auto it = std::find_if(
const auto missing_it = std::find_if(
missing_columns.begin(), missing_columns.end(), [&](const auto & missing_col) { return missing_col == col.name; });
if (it != missing_columns.end())
const auto used_it = std::find_if(
used_keys.begin(), used_keys.end(), [&](const auto & used_col) { return used_col == col.name; });
if (missing_it != missing_columns.end())
{
auto column_with_default = col.column->cloneEmpty();
col.type->insertDefaultInto(*column_with_default);
@ -270,7 +273,7 @@ void AggregatingStep::transformPipeline(QueryPipelineBuilder & pipeline, const B
else
{
const auto * column_node = dag->getOutputs()[header.getPositionByName(col.name)];
if (group_by_use_nulls && column_node->result_type->canBeInsideNullable())
if (used_it != used_keys.end() && group_by_use_nulls && column_node->result_type->canBeInsideNullable())
outputs.push_back(&dag->addFunction(to_nullable_function, { column_node }, col.name));
else
outputs.push_back(column_node);

View File

@ -179,7 +179,6 @@ Pipe ReadFromMergeTree::readFromPool(
sum_marks,
min_marks_for_concurrent_read,
std::move(parts_with_range),
data,
storage_snapshot,
prewhere_info,
required_columns,

View File

@ -780,7 +780,7 @@ void ColumnsDescription::addSubcolumns(const String & name_in_storage, const Dat
"Cannot add subcolumn {}: column with this name already exists", subcolumn.name);
subcolumns.get<0>().insert(std::move(subcolumn));
}, {type_in_storage->getDefaultSerialization(), type_in_storage, nullptr, nullptr});
}, ISerialization::SubstreamData(type_in_storage->getDefaultSerialization()).withType(type_in_storage));
}
void ColumnsDescription::removeSubcolumns(const String & name_in_storage)

View File

@ -0,0 +1,24 @@
#pragma once
#include <string>
#include <unordered_map>
namespace DB
{
/// Alter conversions which should be applied on-fly for part. Build from of
/// the most recent mutation commands for part. Now we have only rename_map
/// here (from ALTER_RENAME) command, because for all other type of alters
/// we can deduce conversions for part from difference between
/// part->getColumns() and storage->getColumns().
struct AlterConversions
{
/// Rename map new_name -> old_name
std::unordered_map<std::string, std::string> rename_map;
bool isColumnRenamed(const std::string & new_name) const { return rename_map.count(new_name) > 0; }
std::string getColumnOldName(const std::string & new_name) const { return rename_map.at(new_name); }
};
}

Some files were not shown because too many files have changed in this diff Show More