mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-28 10:31:57 +00:00
Merge branch 'master' into mvcc_prototype
This commit is contained in:
commit
287d858fda
81
.github/workflows/master.yml
vendored
81
.github/workflows/master.yml
vendored
@ -360,6 +360,52 @@ jobs:
|
|||||||
docker kill "$(docker ps -q)" ||:
|
docker kill "$(docker ps -q)" ||:
|
||||||
docker rm -f "$(docker ps -a -q)" ||:
|
docker rm -f "$(docker ps -a -q)" ||:
|
||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
|
BuilderBinGCC:
|
||||||
|
needs: [DockerHubPush]
|
||||||
|
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
||||||
|
runs-on: [self-hosted, builder]
|
||||||
|
steps:
|
||||||
|
- name: Set envs
|
||||||
|
run: |
|
||||||
|
cat >> "$GITHUB_ENV" << 'EOF'
|
||||||
|
TEMP_PATH=${{runner.temp}}/build_check
|
||||||
|
IMAGES_PATH=${{runner.temp}}/images_path
|
||||||
|
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||||
|
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||||
|
CHECK_NAME=ClickHouse build check (actions)
|
||||||
|
BUILD_NAME=binary_gcc
|
||||||
|
EOF
|
||||||
|
- name: Download changed images
|
||||||
|
uses: actions/download-artifact@v2
|
||||||
|
with:
|
||||||
|
name: changed_images
|
||||||
|
path: ${{ env.IMAGES_PATH }}
|
||||||
|
- name: Clear repository
|
||||||
|
run: |
|
||||||
|
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||||
|
- name: Check out repository code
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
submodules: 'true'
|
||||||
|
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||||
|
- name: Build
|
||||||
|
run: |
|
||||||
|
sudo rm -fr "$TEMP_PATH"
|
||||||
|
mkdir -p "$TEMP_PATH"
|
||||||
|
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||||
|
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
|
||||||
|
- name: Upload build URLs to artifacts
|
||||||
|
if: ${{ success() || failure() }}
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
with:
|
||||||
|
name: ${{ env.BUILD_NAME }}
|
||||||
|
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||||
|
- name: Cleanup
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
docker kill "$(docker ps -q)" ||:
|
||||||
|
docker rm -f "$(docker ps -a -q)" ||:
|
||||||
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderDebAsan:
|
BuilderDebAsan:
|
||||||
needs: [DockerHubPush]
|
needs: [DockerHubPush]
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
@ -918,6 +964,7 @@ jobs:
|
|||||||
- BuilderDebRelease
|
- BuilderDebRelease
|
||||||
- BuilderDebAarch64
|
- BuilderDebAarch64
|
||||||
- BuilderBinRelease
|
- BuilderBinRelease
|
||||||
|
- BuilderBinGCC
|
||||||
- BuilderDebAsan
|
- BuilderDebAsan
|
||||||
- BuilderDebTsan
|
- BuilderDebTsan
|
||||||
- BuilderDebUBsan
|
- BuilderDebUBsan
|
||||||
@ -2608,6 +2655,40 @@ jobs:
|
|||||||
docker kill "$(docker ps -q)" ||:
|
docker kill "$(docker ps -q)" ||:
|
||||||
docker rm -f "$(docker ps -a -q)" ||:
|
docker rm -f "$(docker ps -a -q)" ||:
|
||||||
sudo rm -fr "$TEMP_PATH"
|
sudo rm -fr "$TEMP_PATH"
|
||||||
|
UnitTestsReleaseGCC:
|
||||||
|
needs: [BuilderBinGCC]
|
||||||
|
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||||
|
steps:
|
||||||
|
- name: Set envs
|
||||||
|
run: |
|
||||||
|
cat >> "$GITHUB_ENV" << 'EOF'
|
||||||
|
TEMP_PATH=${{runner.temp}}/unit_tests_asan
|
||||||
|
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||||
|
CHECK_NAME=Unit tests (release-gcc, actions)
|
||||||
|
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
|
||||||
|
EOF
|
||||||
|
- name: Download json reports
|
||||||
|
uses: actions/download-artifact@v2
|
||||||
|
with:
|
||||||
|
path: ${{ env.REPORTS_PATH }}
|
||||||
|
- name: Clear repository
|
||||||
|
run: |
|
||||||
|
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||||
|
- name: Check out repository code
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
- name: Unit test
|
||||||
|
run: |
|
||||||
|
sudo rm -fr "$TEMP_PATH"
|
||||||
|
mkdir -p "$TEMP_PATH"
|
||||||
|
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||||
|
cd "$REPO_COPY/tests/ci"
|
||||||
|
python3 unit_tests_check.py "$CHECK_NAME"
|
||||||
|
- name: Cleanup
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
docker kill "$(docker ps -q)" ||:
|
||||||
|
docker rm -f "$(docker ps -a -q)" ||:
|
||||||
|
sudo rm -fr "$TEMP_PATH"
|
||||||
UnitTestsTsan:
|
UnitTestsTsan:
|
||||||
needs: [BuilderDebTsan]
|
needs: [BuilderDebTsan]
|
||||||
runs-on: [self-hosted, fuzzer-unit-tester]
|
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||||
|
77
.github/workflows/pull_request.yml
vendored
77
.github/workflows/pull_request.yml
vendored
@ -370,6 +370,48 @@ jobs:
|
|||||||
docker kill "$(docker ps -q)" ||:
|
docker kill "$(docker ps -q)" ||:
|
||||||
docker rm -f "$(docker ps -a -q)" ||:
|
docker rm -f "$(docker ps -a -q)" ||:
|
||||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
|
BuilderBinGCC:
|
||||||
|
needs: [DockerHubPush, FastTest]
|
||||||
|
runs-on: [self-hosted, builder]
|
||||||
|
steps:
|
||||||
|
- name: Set envs
|
||||||
|
run: |
|
||||||
|
cat >> "$GITHUB_ENV" << 'EOF'
|
||||||
|
TEMP_PATH=${{runner.temp}}/build_check
|
||||||
|
IMAGES_PATH=${{runner.temp}}/images_path
|
||||||
|
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||||
|
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||||
|
CHECK_NAME=ClickHouse build check (actions)
|
||||||
|
BUILD_NAME=binary_gcc
|
||||||
|
EOF
|
||||||
|
- name: Download changed images
|
||||||
|
uses: actions/download-artifact@v2
|
||||||
|
with:
|
||||||
|
name: changed_images
|
||||||
|
path: ${{ runner.temp }}/images_path
|
||||||
|
- name: Check out repository code
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
submodules: 'true'
|
||||||
|
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||||
|
- name: Build
|
||||||
|
run: |
|
||||||
|
sudo rm -fr "$TEMP_PATH"
|
||||||
|
mkdir -p "$TEMP_PATH"
|
||||||
|
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||||
|
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
|
||||||
|
- name: Upload build URLs to artifacts
|
||||||
|
if: ${{ success() || failure() }}
|
||||||
|
uses: actions/upload-artifact@v2
|
||||||
|
with:
|
||||||
|
name: ${{ env.BUILD_NAME }}
|
||||||
|
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
|
||||||
|
- name: Cleanup
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
docker kill "$(docker ps -q)" ||:
|
||||||
|
docker rm -f "$(docker ps -a -q)" ||:
|
||||||
|
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||||
BuilderDebAarch64:
|
BuilderDebAarch64:
|
||||||
needs: [DockerHubPush, FastTest]
|
needs: [DockerHubPush, FastTest]
|
||||||
runs-on: [self-hosted, builder]
|
runs-on: [self-hosted, builder]
|
||||||
@ -963,6 +1005,7 @@ jobs:
|
|||||||
- BuilderDebRelease
|
- BuilderDebRelease
|
||||||
- BuilderDebAarch64
|
- BuilderDebAarch64
|
||||||
- BuilderBinRelease
|
- BuilderBinRelease
|
||||||
|
- BuilderBinGCC
|
||||||
- BuilderDebAsan
|
- BuilderDebAsan
|
||||||
- BuilderDebTsan
|
- BuilderDebTsan
|
||||||
- BuilderDebUBsan
|
- BuilderDebUBsan
|
||||||
@ -2808,6 +2851,40 @@ jobs:
|
|||||||
docker kill "$(docker ps -q)" ||:
|
docker kill "$(docker ps -q)" ||:
|
||||||
docker rm -f "$(docker ps -a -q)" ||:
|
docker rm -f "$(docker ps -a -q)" ||:
|
||||||
sudo rm -fr "$TEMP_PATH"
|
sudo rm -fr "$TEMP_PATH"
|
||||||
|
UnitTestsReleaseGCC:
|
||||||
|
needs: [BuilderBinGCC]
|
||||||
|
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||||
|
steps:
|
||||||
|
- name: Set envs
|
||||||
|
run: |
|
||||||
|
cat >> "$GITHUB_ENV" << 'EOF'
|
||||||
|
TEMP_PATH=${{runner.temp}}/unit_tests_asan
|
||||||
|
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||||
|
CHECK_NAME=Unit tests (release-gcc, actions)
|
||||||
|
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
|
||||||
|
EOF
|
||||||
|
- name: Download json reports
|
||||||
|
uses: actions/download-artifact@v2
|
||||||
|
with:
|
||||||
|
path: ${{ env.REPORTS_PATH }}
|
||||||
|
- name: Clear repository
|
||||||
|
run: |
|
||||||
|
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||||
|
- name: Check out repository code
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
- name: Unit test
|
||||||
|
run: |
|
||||||
|
sudo rm -fr "$TEMP_PATH"
|
||||||
|
mkdir -p "$TEMP_PATH"
|
||||||
|
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||||
|
cd "$REPO_COPY/tests/ci"
|
||||||
|
python3 unit_tests_check.py "$CHECK_NAME"
|
||||||
|
- name: Cleanup
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
docker kill "$(docker ps -q)" ||:
|
||||||
|
docker rm -f "$(docker ps -a -q)" ||:
|
||||||
|
sudo rm -fr "$TEMP_PATH"
|
||||||
UnitTestsTsan:
|
UnitTestsTsan:
|
||||||
needs: [BuilderDebTsan]
|
needs: [BuilderDebTsan]
|
||||||
runs-on: [self-hosted, fuzzer-unit-tester]
|
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||||
|
@ -261,7 +261,7 @@ endif ()
|
|||||||
# Add a section with the hash of the compiled machine code for integrity checks.
|
# Add a section with the hash of the compiled machine code for integrity checks.
|
||||||
# Only for official builds, because adding a section can be time consuming (rewrite of several GB).
|
# Only for official builds, because adding a section can be time consuming (rewrite of several GB).
|
||||||
# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary)
|
# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary)
|
||||||
if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
|
if (OBJCOPY_PATH AND CLICKHOUSE_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
|
||||||
set (USE_BINARY_HASH 1)
|
set (USE_BINARY_HASH 1)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
@ -51,6 +51,6 @@ if (GLIBC_COMPATIBILITY)
|
|||||||
|
|
||||||
message (STATUS "Some symbols from glibc will be replaced for compatibility")
|
message (STATUS "Some symbols from glibc will be replaced for compatibility")
|
||||||
|
|
||||||
elseif (YANDEX_OFFICIAL_BUILD)
|
elseif (CLICKHOUSE_OFFICIAL_BUILD)
|
||||||
message (WARNING "Option GLIBC_COMPATIBILITY must be turned on for production builds.")
|
message (WARNING "Option GLIBC_COMPATIBILITY must be turned on for production builds.")
|
||||||
endif ()
|
endif ()
|
||||||
|
@ -18,6 +18,6 @@ set (VERSION_STRING_SHORT "${VERSION_MAJOR}.${VERSION_MINOR}")
|
|||||||
|
|
||||||
math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000")
|
math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000")
|
||||||
|
|
||||||
if(YANDEX_OFFICIAL_BUILD)
|
if(CLICKHOUSE_OFFICIAL_BUILD)
|
||||||
set(VERSION_OFFICIAL " (official build)")
|
set(VERSION_OFFICIAL " (official build)")
|
||||||
endif()
|
endif()
|
||||||
|
@ -69,9 +69,10 @@ endif ()
|
|||||||
target_compile_options(_avrocpp PRIVATE ${SUPPRESS_WARNINGS})
|
target_compile_options(_avrocpp PRIVATE ${SUPPRESS_WARNINGS})
|
||||||
|
|
||||||
# create a symlink to include headers with <avro/...>
|
# create a symlink to include headers with <avro/...>
|
||||||
|
set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
|
||||||
ADD_CUSTOM_TARGET(avro_symlink_headers ALL
|
ADD_CUSTOM_TARGET(avro_symlink_headers ALL
|
||||||
COMMAND ${CMAKE_COMMAND} -E make_directory "${AVROCPP_ROOT_DIR}/include"
|
COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}"
|
||||||
COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVROCPP_ROOT_DIR}/include/avro"
|
COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro"
|
||||||
)
|
)
|
||||||
add_dependencies(_avrocpp avro_symlink_headers)
|
add_dependencies(_avrocpp avro_symlink_headers)
|
||||||
target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVROCPP_ROOT_DIR}/include")
|
target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}")
|
||||||
|
@ -27,7 +27,11 @@ target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRAR
|
|||||||
|
|
||||||
# asio
|
# asio
|
||||||
|
|
||||||
target_compile_definitions (_boost_headers_only INTERFACE BOOST_ASIO_STANDALONE=1)
|
target_compile_definitions (_boost_headers_only INTERFACE
|
||||||
|
BOOST_ASIO_STANDALONE=1
|
||||||
|
# Avoid using of deprecated in c++ > 17 std::result_of
|
||||||
|
BOOST_ASIO_HAS_STD_INVOKE_RESULT=1
|
||||||
|
)
|
||||||
|
|
||||||
# iostreams
|
# iostreams
|
||||||
|
|
||||||
|
@ -1,12 +1,9 @@
|
|||||||
# During cross-compilation in our CI we have to use llvm-tblgen and other building tools
|
if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined")
|
||||||
# tools to be build for host architecture and everything else for target architecture (e.g. AArch64)
|
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
|
||||||
# Possible workaround is to use llvm-tblgen from some package...
|
|
||||||
# But lets just enable LLVM for native builds
|
|
||||||
if (CMAKE_CROSSCOMPILING OR SANITIZE STREQUAL "undefined")
|
|
||||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
|
|
||||||
else()
|
else()
|
||||||
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
|
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
|
option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
|
||||||
|
|
||||||
if (NOT ENABLE_EMBEDDED_COMPILER)
|
if (NOT ENABLE_EMBEDDED_COMPILER)
|
||||||
|
@ -163,6 +163,7 @@ def parse_env_variables(
|
|||||||
cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
|
cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
|
||||||
cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
|
cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
|
||||||
cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
|
cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
|
||||||
|
cmake_flags.append("-DBUILD_STANDALONE_KEEPER=ON")
|
||||||
if is_release_build(build_type, package_type, sanitizer, split_binary):
|
if is_release_build(build_type, package_type, sanitizer, split_binary):
|
||||||
cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON")
|
cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON")
|
||||||
|
|
||||||
@ -244,7 +245,7 @@ def parse_env_variables(
|
|||||||
result.append(f"AUTHOR='{author}'")
|
result.append(f"AUTHOR='{author}'")
|
||||||
|
|
||||||
if official:
|
if official:
|
||||||
cmake_flags.append("-DYANDEX_OFFICIAL_BUILD=1")
|
cmake_flags.append("-DCLICKHOUSE_OFFICIAL_BUILD=1")
|
||||||
|
|
||||||
result.append('CMAKE_FLAGS="' + " ".join(cmake_flags) + '"')
|
result.append('CMAKE_FLAGS="' + " ".join(cmake_flags) + '"')
|
||||||
|
|
||||||
|
@ -13,7 +13,7 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
|||||||
echo "$script_dir"
|
echo "$script_dir"
|
||||||
repo_dir=ch
|
repo_dir=ch
|
||||||
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-13_debug_none_bundled_unsplitted_disable_False_binary"}
|
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-13_debug_none_bundled_unsplitted_disable_False_binary"}
|
||||||
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
|
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
|
||||||
|
|
||||||
function clone
|
function clone
|
||||||
{
|
{
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
|
|
||||||
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-13_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"}
|
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-13_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"}
|
||||||
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}
|
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}
|
||||||
|
|
||||||
|
|
||||||
@ -10,7 +10,7 @@ if [ -z "$CLICKHOUSE_REPO_PATH" ]; then
|
|||||||
CLICKHOUSE_REPO_PATH=ch
|
CLICKHOUSE_REPO_PATH=ch
|
||||||
rm -rf ch ||:
|
rm -rf ch ||:
|
||||||
mkdir ch ||:
|
mkdir ch ||:
|
||||||
wget -nv -nd -c "https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz"
|
wget -nv -nd -c "https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz"
|
||||||
tar -C ch --strip-components=1 -xf clickhouse_no_subs.tar.gz
|
tar -C ch --strip-components=1 -xf clickhouse_no_subs.tar.gz
|
||||||
ls -lath ||:
|
ls -lath ||:
|
||||||
fi
|
fi
|
||||||
|
@ -1294,15 +1294,15 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv')
|
|||||||
select '' test_name,
|
select '' test_name,
|
||||||
'$(sed -n 's/.*<!--message: \(.*\)-->/\1/p' report.html)' test_status,
|
'$(sed -n 's/.*<!--message: \(.*\)-->/\1/p' report.html)' test_status,
|
||||||
0 test_duration_ms,
|
0 test_duration_ms,
|
||||||
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#fail1' report_url
|
'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#fail1' report_url
|
||||||
union all
|
union all
|
||||||
select test || ' #' || toString(query_index), 'slower' test_status, 0 test_duration_ms,
|
select test || ' #' || toString(query_index), 'slower' test_status, 0 test_duration_ms,
|
||||||
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#changes-in-performance.'
|
'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#changes-in-performance.'
|
||||||
|| test || '.' || toString(query_index) report_url
|
|| test || '.' || toString(query_index) report_url
|
||||||
from queries where changed_fail != 0 and diff > 0
|
from queries where changed_fail != 0 and diff > 0
|
||||||
union all
|
union all
|
||||||
select test || ' #' || toString(query_index), 'unstable' test_status, 0 test_duration_ms,
|
select test || ' #' || toString(query_index), 'unstable' test_status, 0 test_duration_ms,
|
||||||
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#unstable-queries.'
|
'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#unstable-queries.'
|
||||||
|| test || '.' || toString(query_index) report_url
|
|| test || '.' || toString(query_index) report_url
|
||||||
from queries where unstable_fail != 0
|
from queries where unstable_fail != 0
|
||||||
)
|
)
|
||||||
|
@ -16,26 +16,17 @@ right_sha=$4
|
|||||||
datasets=${CHPC_DATASETS-"hits1 hits10 hits100 values"}
|
datasets=${CHPC_DATASETS-"hits1 hits10 hits100 values"}
|
||||||
|
|
||||||
declare -A dataset_paths
|
declare -A dataset_paths
|
||||||
if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
|
dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar"
|
||||||
dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar"
|
dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar"
|
||||||
dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar"
|
dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar"
|
||||||
dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar"
|
dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar"
|
||||||
dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar"
|
|
||||||
else
|
|
||||||
dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar"
|
|
||||||
dataset_paths["hits100"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar"
|
|
||||||
dataset_paths["hits1"]="https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar"
|
|
||||||
dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_expressions/partitions/test_values.tar"
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
function download
|
function download
|
||||||
{
|
{
|
||||||
# Historically there were various paths for the performance test package.
|
# Historically there were various paths for the performance test package.
|
||||||
# Test all of them.
|
# Test all of them.
|
||||||
declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz"
|
declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz")
|
||||||
"https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/clickhouse_build_check/performance/performance.tgz"
|
|
||||||
)
|
|
||||||
|
|
||||||
for path in "${urls_to_try[@]}"
|
for path in "${urls_to_try[@]}"
|
||||||
do
|
do
|
||||||
|
@ -4,7 +4,7 @@ set -ex
|
|||||||
CHPC_CHECK_START_TIMESTAMP="$(date +%s)"
|
CHPC_CHECK_START_TIMESTAMP="$(date +%s)"
|
||||||
export CHPC_CHECK_START_TIMESTAMP
|
export CHPC_CHECK_START_TIMESTAMP
|
||||||
|
|
||||||
S3_URL=${S3_URL:="https://clickhouse-builds.s3.yandex.net"}
|
S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}
|
||||||
|
|
||||||
COMMON_BUILD_PREFIX="/clickhouse_build_check"
|
COMMON_BUILD_PREFIX="/clickhouse_build_check"
|
||||||
if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
|
if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
|
||||||
@ -64,9 +64,7 @@ function find_reference_sha
|
|||||||
# Historically there were various path for the performance test package,
|
# Historically there were various path for the performance test package,
|
||||||
# test all of them.
|
# test all of them.
|
||||||
unset found
|
unset found
|
||||||
declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz"
|
declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz")
|
||||||
"https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/clickhouse_build_check/performance/performance.tgz"
|
|
||||||
)
|
|
||||||
for path in "${urls_to_try[@]}"
|
for path in "${urls_to_try[@]}"
|
||||||
do
|
do
|
||||||
if curl_with_retry "$path"
|
if curl_with_retry "$path"
|
||||||
|
@ -11,7 +11,7 @@ RUN apt-get update -y \
|
|||||||
|
|
||||||
COPY s3downloader /s3downloader
|
COPY s3downloader /s3downloader
|
||||||
|
|
||||||
ENV S3_URL="https://clickhouse-datasets.s3.yandex.net"
|
ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
|
||||||
ENV DATASETS="hits visits"
|
ENV DATASETS="hits visits"
|
||||||
ENV EXPORT_S3_STORAGE_POLICIES=1
|
ENV EXPORT_S3_STORAGE_POLICIES=1
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ import requests
|
|||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_URL = 'https://clickhouse-datasets.s3.yandex.net'
|
DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com'
|
||||||
|
|
||||||
AVAILABLE_DATASETS = {
|
AVAILABLE_DATASETS = {
|
||||||
'hits': 'hits_v1.tar',
|
'hits': 'hits_v1.tar',
|
||||||
|
@ -41,6 +41,7 @@ sleep 5
|
|||||||
./mc admin user add clickminio test testtest
|
./mc admin user add clickminio test testtest
|
||||||
./mc admin policy set clickminio readwrite user=test
|
./mc admin policy set clickminio readwrite user=test
|
||||||
./mc mb clickminio/test
|
./mc mb clickminio/test
|
||||||
|
./mc policy set public clickminio/test
|
||||||
|
|
||||||
|
|
||||||
# Upload data to Minio. By default after unpacking all tests will in
|
# Upload data to Minio. By default after unpacking all tests will in
|
||||||
|
@ -29,7 +29,7 @@ COPY ./download_previous_release /download_previous_release
|
|||||||
COPY run.sh /
|
COPY run.sh /
|
||||||
|
|
||||||
ENV DATASETS="hits visits"
|
ENV DATASETS="hits visits"
|
||||||
ENV S3_URL="https://clickhouse-datasets.s3.yandex.net"
|
ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
|
||||||
ENV EXPORT_S3_STORAGE_POLICIES=1
|
ENV EXPORT_S3_STORAGE_POLICIES=1
|
||||||
|
|
||||||
CMD ["/bin/bash", "/run.sh"]
|
CMD ["/bin/bash", "/run.sh"]
|
||||||
|
28
packages/clickhouse-keeper-dbg.yaml
Normal file
28
packages/clickhouse-keeper-dbg.yaml
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
# package sources should be placed in ${PWD}/root
|
||||||
|
# nfpm should run from the same directory with a config
|
||||||
|
name: "clickhouse-keeper-dbg"
|
||||||
|
arch: "${DEB_ARCH}" # amd64, arm64
|
||||||
|
platform: "linux"
|
||||||
|
version: "${CLICKHOUSE_VERSION_STRING}"
|
||||||
|
vendor: "ClickHouse Inc."
|
||||||
|
homepage: "https://clickhouse.com"
|
||||||
|
license: "Apache"
|
||||||
|
section: "database"
|
||||||
|
priority: "optional"
|
||||||
|
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
|
||||||
|
description: |
|
||||||
|
debugging symbols for clickhouse-keeper
|
||||||
|
This package contains the debugging symbols for clickhouse-keeper.
|
||||||
|
|
||||||
|
contents:
|
||||||
|
- src: root/usr/lib/debug/usr/bin/clickhouse-keeper.debug
|
||||||
|
dst: /usr/lib/debug/usr/bin/clickhouse-keeper.debug
|
||||||
|
# docs
|
||||||
|
- src: ../AUTHORS
|
||||||
|
dst: /usr/share/doc/clickhouse-keeper-dbg/AUTHORS
|
||||||
|
- src: ../CHANGELOG.md
|
||||||
|
dst: /usr/share/doc/clickhouse-keeper-dbg/CHANGELOG.md
|
||||||
|
- src: ../LICENSE
|
||||||
|
dst: /usr/share/doc/clickhouse-keeper-dbg/LICENSE
|
||||||
|
- src: ../README.md
|
||||||
|
dst: /usr/share/doc/clickhouse-keeper-dbg/README.md
|
40
packages/clickhouse-keeper.yaml
Normal file
40
packages/clickhouse-keeper.yaml
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
# package sources should be placed in ${PWD}/root
|
||||||
|
# nfpm should run from the same directory with a config
|
||||||
|
name: "clickhouse-keeper"
|
||||||
|
arch: "${DEB_ARCH}" # amd64, arm64
|
||||||
|
platform: "linux"
|
||||||
|
version: "${CLICKHOUSE_VERSION_STRING}"
|
||||||
|
vendor: "ClickHouse Inc."
|
||||||
|
homepage: "https://clickhouse.com"
|
||||||
|
license: "Apache"
|
||||||
|
section: "database"
|
||||||
|
priority: "optional"
|
||||||
|
|
||||||
|
conflicts:
|
||||||
|
- clickhouse-server
|
||||||
|
depends:
|
||||||
|
- adduser
|
||||||
|
suggests:
|
||||||
|
- clickhouse-keeper-dbg
|
||||||
|
|
||||||
|
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
|
||||||
|
description: |
|
||||||
|
Static clickhouse-keeper binary
|
||||||
|
A stand-alone clickhouse-keeper package
|
||||||
|
|
||||||
|
|
||||||
|
contents:
|
||||||
|
- src: root/etc/clickhouse-keeper
|
||||||
|
dst: /etc/clickhouse-keeper
|
||||||
|
type: config
|
||||||
|
- src: root/usr/bin/clickhouse-keeper
|
||||||
|
dst: /usr/bin/clickhouse-keeper
|
||||||
|
# docs
|
||||||
|
- src: ../AUTHORS
|
||||||
|
dst: /usr/share/doc/clickhouse-keeper/AUTHORS
|
||||||
|
- src: ../CHANGELOG.md
|
||||||
|
dst: /usr/share/doc/clickhouse-keeper/CHANGELOG.md
|
||||||
|
- src: ../LICENSE
|
||||||
|
dst: /usr/share/doc/clickhouse-keeper/LICENSE
|
||||||
|
- src: ../README.md
|
||||||
|
dst: /usr/share/doc/clickhouse-keeper/README.md
|
@ -71,17 +71,11 @@ if (BUILD_STANDALONE_KEEPER)
|
|||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecDelta.cpp
|
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecDoubleDelta.cpp
|
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp
|
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecGorilla.cpp
|
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecT64.cpp
|
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/getCompressionCodecForFile.cpp
|
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp
|
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp
|
||||||
|
|
||||||
|
@ -829,6 +829,36 @@ if (ThreadFuzzer::instance().isEffective())
|
|||||||
fs::create_directories(path / "metadata_dropped/");
|
fs::create_directories(path / "metadata_dropped/");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
/// Initialize merge tree metadata cache
|
||||||
|
if (config().has("merge_tree_metadata_cache"))
|
||||||
|
{
|
||||||
|
fs::create_directories(path / "rocksdb/");
|
||||||
|
size_t size = config().getUInt64("merge_tree_metadata_cache.lru_cache_size", 256 << 20);
|
||||||
|
bool continue_if_corrupted = config().getBool("merge_tree_metadata_cache.continue_if_corrupted", false);
|
||||||
|
try
|
||||||
|
{
|
||||||
|
LOG_DEBUG(
|
||||||
|
log, "Initiailizing merge tree metadata cache lru_cache_size:{} continue_if_corrupted:{}", size, continue_if_corrupted);
|
||||||
|
global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size);
|
||||||
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
if (continue_if_corrupted)
|
||||||
|
{
|
||||||
|
/// Rename rocksdb directory and reinitialize merge tree metadata cache
|
||||||
|
time_t now = time(nullptr);
|
||||||
|
fs::rename(path / "rocksdb", path / ("rocksdb.old." + std::to_string(now)));
|
||||||
|
global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (config().has("interserver_http_port") && config().has("interserver_https_port"))
|
if (config().has("interserver_http_port") && config().has("interserver_https_port"))
|
||||||
throw Exception("Both http and https interserver ports are specified", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
|
throw Exception("Both http and https interserver ports are specified", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
|
||||||
|
|
||||||
|
@ -1294,4 +1294,10 @@
|
|||||||
</tables>
|
</tables>
|
||||||
</rocksdb>
|
</rocksdb>
|
||||||
-->
|
-->
|
||||||
|
|
||||||
|
<!-- Uncomment if enable merge tree metadata cache -->
|
||||||
|
<merge_tree_metadata_cache>
|
||||||
|
<lru_cache_size>268435456</lru_cache_size>
|
||||||
|
<continue_if_corrupted>true</continue_if_corrupted>
|
||||||
|
</merge_tree_metadata_cache>
|
||||||
</clickhouse>
|
</clickhouse>
|
||||||
|
@ -67,7 +67,7 @@ auto parseArguments(const std::string & name, const DataTypes & arguments)
|
|||||||
values_types.push_back(array_type->getNestedType());
|
values_types.push_back(array_type->getNestedType());
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::tuple{std::move(keys_type), std::move(values_types), tuple_argument};
|
return std::tuple<DataTypePtr, DataTypes, bool>{std::move(keys_type), std::move(values_types), tuple_argument};
|
||||||
}
|
}
|
||||||
|
|
||||||
// This function instantiates a particular overload of the sumMap family of
|
// This function instantiates a particular overload of the sumMap family of
|
||||||
|
@ -494,6 +494,11 @@ endif()
|
|||||||
|
|
||||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::fast_float)
|
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::fast_float)
|
||||||
|
|
||||||
|
if (USE_ORC)
|
||||||
|
dbms_target_link_libraries(PUBLIC ${ORC_LIBRARIES})
|
||||||
|
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR} "${CMAKE_BINARY_DIR}/contrib/orc/c++/include")
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (TARGET ch_contrib::rocksdb)
|
if (TARGET ch_contrib::rocksdb)
|
||||||
dbms_target_link_libraries(PUBLIC ch_contrib::rocksdb)
|
dbms_target_link_libraries(PUBLIC ch_contrib::rocksdb)
|
||||||
endif()
|
endif()
|
||||||
@ -573,10 +578,6 @@ if (ENABLE_TESTS)
|
|||||||
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::simdjson)
|
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::simdjson)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(TARGET ch_contrib::rapidjson)
|
|
||||||
target_include_directories(unit_tests_dbms PRIVATE ch_contrib::rapidjson)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (TARGET ch_contrib::yaml_cpp)
|
if (TARGET ch_contrib::yaml_cpp)
|
||||||
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::yaml_cpp)
|
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::yaml_cpp)
|
||||||
endif()
|
endif()
|
||||||
|
@ -521,7 +521,7 @@ ColumnObject::ColumnObject(bool is_nullable_)
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnObject::ColumnObject(SubcolumnsTree && subcolumns_, bool is_nullable_)
|
ColumnObject::ColumnObject(Subcolumns && subcolumns_, bool is_nullable_)
|
||||||
: is_nullable(is_nullable_)
|
: is_nullable(is_nullable_)
|
||||||
, subcolumns(std::move(subcolumns_))
|
, subcolumns(std::move(subcolumns_))
|
||||||
, num_rows(subcolumns.empty() ? 0 : (*subcolumns.begin())->data.size())
|
, num_rows(subcolumns.empty() ? 0 : (*subcolumns.begin())->data.size())
|
||||||
@ -696,7 +696,7 @@ const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & ke
|
|||||||
ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key)
|
ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key)
|
||||||
{
|
{
|
||||||
if (const auto * node = subcolumns.findLeaf(key))
|
if (const auto * node = subcolumns.findLeaf(key))
|
||||||
return const_cast<SubcolumnsTree::Node *>(node)->data;
|
return const_cast<Subcolumns::Node *>(node)->data;
|
||||||
|
|
||||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in ColumnObject", key.getPath());
|
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in ColumnObject", key.getPath());
|
||||||
}
|
}
|
||||||
@ -794,7 +794,7 @@ bool ColumnObject::isFinalized() const
|
|||||||
void ColumnObject::finalize()
|
void ColumnObject::finalize()
|
||||||
{
|
{
|
||||||
size_t old_size = size();
|
size_t old_size = size();
|
||||||
SubcolumnsTree new_subcolumns;
|
Subcolumns new_subcolumns;
|
||||||
for (auto && entry : subcolumns)
|
for (auto && entry : subcolumns)
|
||||||
{
|
{
|
||||||
const auto & least_common_type = entry->data.getLeastCommonType();
|
const auto & least_common_type = entry->data.getLeastCommonType();
|
||||||
|
@ -138,20 +138,20 @@ public:
|
|||||||
size_t num_of_defaults_in_prefix = 0;
|
size_t num_of_defaults_in_prefix = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
using SubcolumnsTree = SubcolumnsTree<Subcolumn>;
|
using Subcolumns = SubcolumnsTree<Subcolumn>;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// If true then all subcolumns are nullable.
|
/// If true then all subcolumns are nullable.
|
||||||
const bool is_nullable;
|
const bool is_nullable;
|
||||||
|
|
||||||
SubcolumnsTree subcolumns;
|
Subcolumns subcolumns;
|
||||||
size_t num_rows;
|
size_t num_rows;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static constexpr auto COLUMN_NAME_DUMMY = "_dummy";
|
static constexpr auto COLUMN_NAME_DUMMY = "_dummy";
|
||||||
|
|
||||||
explicit ColumnObject(bool is_nullable_);
|
explicit ColumnObject(bool is_nullable_);
|
||||||
ColumnObject(SubcolumnsTree && subcolumns_, bool is_nullable_);
|
ColumnObject(Subcolumns && subcolumns_, bool is_nullable_);
|
||||||
|
|
||||||
/// Checks that all subcolumns have consistent sizes.
|
/// Checks that all subcolumns have consistent sizes.
|
||||||
void checkConsistency() const;
|
void checkConsistency() const;
|
||||||
@ -173,8 +173,8 @@ public:
|
|||||||
/// It cares about consistency of sizes of Nested arrays.
|
/// It cares about consistency of sizes of Nested arrays.
|
||||||
void addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size);
|
void addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size);
|
||||||
|
|
||||||
const SubcolumnsTree & getSubcolumns() const { return subcolumns; }
|
const Subcolumns & getSubcolumns() const { return subcolumns; }
|
||||||
SubcolumnsTree & getSubcolumns() { return subcolumns; }
|
Subcolumns & getSubcolumns() { return subcolumns; }
|
||||||
PathsInData getKeys() const;
|
PathsInData getKeys() const;
|
||||||
|
|
||||||
/// Finalizes all subcolumns.
|
/// Finalizes all subcolumns.
|
||||||
|
@ -437,6 +437,7 @@ String FileSegment::stateToString(FileSegment::State state)
|
|||||||
case FileSegment::State::SKIP_CACHE:
|
case FileSegment::State::SKIP_CACHE:
|
||||||
return "SKIP_CACHE";
|
return "SKIP_CACHE";
|
||||||
}
|
}
|
||||||
|
__builtin_unreachable();
|
||||||
}
|
}
|
||||||
|
|
||||||
String FileSegmentsHolder::toString()
|
String FileSegmentsHolder::toString()
|
||||||
|
@ -285,6 +285,13 @@
|
|||||||
\
|
\
|
||||||
M(MainConfigLoads, "Number of times the main configuration was reloaded.") \
|
M(MainConfigLoads, "Number of times the main configuration was reloaded.") \
|
||||||
\
|
\
|
||||||
|
M(MergeTreeMetadataCacheGet, "Number of rocksdb reads(used for merge tree metadata cache)") \
|
||||||
|
M(MergeTreeMetadataCachePut, "Number of rocksdb puts(used for merge tree metadata cache)") \
|
||||||
|
M(MergeTreeMetadataCacheDelete, "Number of rocksdb deletes(used for merge tree metadata cache)") \
|
||||||
|
M(MergeTreeMetadataCacheSeek, "Number of rocksdb seeks(used for merge tree metadata cache)") \
|
||||||
|
M(MergeTreeMetadataCacheHit, "Number of times the read of meta file was done from MergeTree metadata cache") \
|
||||||
|
M(MergeTreeMetadataCacheMiss, "Number of times the read of meta file was not done from MergeTree metadata cache") \
|
||||||
|
\
|
||||||
M(ScalarSubqueriesGlobalCacheHit, "Number of times a read from a scalar subquery was done using the global cache") \
|
M(ScalarSubqueriesGlobalCacheHit, "Number of times a read from a scalar subquery was done using the global cache") \
|
||||||
M(ScalarSubqueriesLocalCacheHit, "Number of times a read from a scalar subquery was done using the local cache") \
|
M(ScalarSubqueriesLocalCacheHit, "Number of times a read from a scalar subquery was done using the local cache") \
|
||||||
M(ScalarSubqueriesCacheMiss, "Number of times a read from a scalar subquery was not cached and had to be calculated completely")
|
M(ScalarSubqueriesCacheMiss, "Number of times a read from a scalar subquery was not cached and had to be calculated completely")
|
||||||
|
@ -1,7 +1,18 @@
|
|||||||
#include "gtest_global_context.h"
|
#include "gtest_global_context.h"
|
||||||
|
|
||||||
const ContextHolder & getContext()
|
const ContextHolder & getContext()
|
||||||
|
{
|
||||||
|
return getMutableContext();
|
||||||
|
}
|
||||||
|
|
||||||
|
ContextHolder & getMutableContext()
|
||||||
{
|
{
|
||||||
static ContextHolder holder;
|
static ContextHolder holder;
|
||||||
return holder;
|
return holder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void destroyContext()
|
||||||
|
{
|
||||||
|
auto & holder = getMutableContext();
|
||||||
|
return holder.destroy();
|
||||||
|
}
|
||||||
|
@ -16,6 +16,17 @@ struct ContextHolder
|
|||||||
}
|
}
|
||||||
|
|
||||||
ContextHolder(ContextHolder &&) = default;
|
ContextHolder(ContextHolder &&) = default;
|
||||||
|
|
||||||
|
void destroy()
|
||||||
|
{
|
||||||
|
context->shutdown();
|
||||||
|
context.reset();
|
||||||
|
shared_context.reset();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const ContextHolder & getContext();
|
const ContextHolder & getContext();
|
||||||
|
|
||||||
|
ContextHolder & getMutableContext();
|
||||||
|
|
||||||
|
void destroyContext();
|
||||||
|
@ -165,25 +165,36 @@ void registerCodecNone(CompressionCodecFactory & factory);
|
|||||||
void registerCodecLZ4(CompressionCodecFactory & factory);
|
void registerCodecLZ4(CompressionCodecFactory & factory);
|
||||||
void registerCodecLZ4HC(CompressionCodecFactory & factory);
|
void registerCodecLZ4HC(CompressionCodecFactory & factory);
|
||||||
void registerCodecZSTD(CompressionCodecFactory & factory);
|
void registerCodecZSTD(CompressionCodecFactory & factory);
|
||||||
|
void registerCodecMultiple(CompressionCodecFactory & factory);
|
||||||
|
|
||||||
|
|
||||||
|
/// Keeper use only general-purpose codecs, so we don't need these special codecs
|
||||||
|
/// in standalone build
|
||||||
|
#ifndef KEEPER_STANDALONE_BUILD
|
||||||
|
|
||||||
void registerCodecDelta(CompressionCodecFactory & factory);
|
void registerCodecDelta(CompressionCodecFactory & factory);
|
||||||
void registerCodecT64(CompressionCodecFactory & factory);
|
void registerCodecT64(CompressionCodecFactory & factory);
|
||||||
void registerCodecDoubleDelta(CompressionCodecFactory & factory);
|
void registerCodecDoubleDelta(CompressionCodecFactory & factory);
|
||||||
void registerCodecGorilla(CompressionCodecFactory & factory);
|
void registerCodecGorilla(CompressionCodecFactory & factory);
|
||||||
void registerCodecEncrypted(CompressionCodecFactory & factory);
|
void registerCodecEncrypted(CompressionCodecFactory & factory);
|
||||||
void registerCodecMultiple(CompressionCodecFactory & factory);
|
|
||||||
|
#endif
|
||||||
|
|
||||||
CompressionCodecFactory::CompressionCodecFactory()
|
CompressionCodecFactory::CompressionCodecFactory()
|
||||||
{
|
{
|
||||||
registerCodecLZ4(*this);
|
|
||||||
registerCodecNone(*this);
|
registerCodecNone(*this);
|
||||||
|
registerCodecLZ4(*this);
|
||||||
registerCodecZSTD(*this);
|
registerCodecZSTD(*this);
|
||||||
registerCodecLZ4HC(*this);
|
registerCodecLZ4HC(*this);
|
||||||
|
registerCodecMultiple(*this);
|
||||||
|
|
||||||
|
#ifndef KEEPER_STANDALONE_BUILD
|
||||||
registerCodecDelta(*this);
|
registerCodecDelta(*this);
|
||||||
registerCodecT64(*this);
|
registerCodecT64(*this);
|
||||||
registerCodecDoubleDelta(*this);
|
registerCodecDoubleDelta(*this);
|
||||||
registerCodecGorilla(*this);
|
registerCodecGorilla(*this);
|
||||||
registerCodecEncrypted(*this);
|
registerCodecEncrypted(*this);
|
||||||
registerCodecMultiple(*this);
|
#endif
|
||||||
|
|
||||||
default_codec = get("LZ4", {});
|
default_codec = get("LZ4", {});
|
||||||
}
|
}
|
||||||
|
@ -187,5 +187,4 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparin
|
|||||||
DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule)
|
DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule)
|
||||||
|
|
||||||
DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation)
|
DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -63,12 +63,12 @@ private:
|
|||||||
size_t num_dimensions_to_keep;
|
size_t num_dimensions_to_keep;
|
||||||
};
|
};
|
||||||
|
|
||||||
using Node = typename ColumnObject::SubcolumnsTree::Node;
|
using Node = typename ColumnObject::Subcolumns::Node;
|
||||||
|
|
||||||
/// Finds a subcolumn from the same Nested type as @entry and inserts
|
/// Finds a subcolumn from the same Nested type as @entry and inserts
|
||||||
/// an array with default values with consistent sizes as in Nested type.
|
/// an array with default values with consistent sizes as in Nested type.
|
||||||
bool tryInsertDefaultFromNested(
|
bool tryInsertDefaultFromNested(
|
||||||
const std::shared_ptr<Node> & entry, const ColumnObject::SubcolumnsTree & subcolumns)
|
const std::shared_ptr<Node> & entry, const ColumnObject::Subcolumns & subcolumns)
|
||||||
{
|
{
|
||||||
if (!entry->path.hasNested())
|
if (!entry->path.hasNested())
|
||||||
return false;
|
return false;
|
||||||
@ -198,7 +198,7 @@ void SerializationObject<Parser>::deserializeWholeText(IColumn & column, ReadBuf
|
|||||||
template <typename Parser>
|
template <typename Parser>
|
||||||
void SerializationObject<Parser>::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
void SerializationObject<Parser>::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||||
{
|
{
|
||||||
deserializeTextImpl(column, [&](String & s) { readEscapedStringInto(s, istr); });
|
deserializeTextImpl(column, [&](String & s) { readEscapedString(s, istr); });
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Parser>
|
template <typename Parser>
|
||||||
|
@ -96,6 +96,7 @@ private:
|
|||||||
case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE:
|
case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE:
|
||||||
return "REMOTE_FS_READ_AND_PUT_IN_CACHE";
|
return "REMOTE_FS_READ_AND_PUT_IN_CACHE";
|
||||||
}
|
}
|
||||||
|
__builtin_unreachable();
|
||||||
}
|
}
|
||||||
size_t first_offset = 0;
|
size_t first_offset = 0;
|
||||||
};
|
};
|
||||||
|
155
src/Functions/makeDate.cpp
Normal file
155
src/Functions/makeDate.cpp
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
#include <Functions/IFunction.h>
|
||||||
|
#include <Functions/FunctionFactory.h>
|
||||||
|
#include <DataTypes/DataTypeDate.h>
|
||||||
|
#include <DataTypes/DataTypeDate32.h>
|
||||||
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
|
#include <Columns/ColumnsNumber.h>
|
||||||
|
#include <Interpreters/castColumn.h>
|
||||||
|
|
||||||
|
#include <Common/DateLUT.h>
|
||||||
|
#include <Common/typeid_cast.h>
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
// A helper function to simplify comparisons of valid YYYY-MM-DD values for <,>,=
|
||||||
|
inline constexpr Int64 YearMonthDayToSingleInt(Int64 year, Int64 month, Int64 day)
|
||||||
|
{
|
||||||
|
return year * 512 + month * 32 + day;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Common implementation for makeDate, makeDate32
|
||||||
|
template <typename Traits>
|
||||||
|
class FunctionMakeDate : public IFunction
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
static constexpr std::array<const char*, 3> argument_names = {"year", "month", "day"};
|
||||||
|
|
||||||
|
public:
|
||||||
|
static constexpr auto name = Traits::name;
|
||||||
|
|
||||||
|
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionMakeDate>(); }
|
||||||
|
|
||||||
|
String getName() const override { return name; }
|
||||||
|
|
||||||
|
bool isVariadic() const override { return false; }
|
||||||
|
|
||||||
|
size_t getNumberOfArguments() const override { return argument_names.size(); }
|
||||||
|
|
||||||
|
bool isInjective(const ColumnsWithTypeAndName &) const override
|
||||||
|
{
|
||||||
|
return false; // {year,month,day} that are out of supported range are converted into a default value
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||||
|
|
||||||
|
bool useDefaultImplementationForNulls() const override { return true; }
|
||||||
|
|
||||||
|
bool useDefaultImplementationForConstants() const override { return true; }
|
||||||
|
|
||||||
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||||
|
{
|
||||||
|
if (arguments.size() != argument_names.size())
|
||||||
|
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||||
|
"Function {} requires 3 arguments, but {} given", getName(), arguments.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < argument_names.size(); ++i)
|
||||||
|
{
|
||||||
|
DataTypePtr argument_type = arguments[i];
|
||||||
|
if (!isNumber(argument_type))
|
||||||
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||||
|
"Argument '{}' for function {} must be number", std::string(argument_names[i]), getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::make_shared<typename Traits::ReturnDataType>();
|
||||||
|
}
|
||||||
|
|
||||||
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||||
|
{
|
||||||
|
const DataTypePtr converted_argument_type = std::make_shared<DataTypeFloat32>();
|
||||||
|
Columns converted_arguments;
|
||||||
|
converted_arguments.reserve(arguments.size());
|
||||||
|
for (const auto & argument : arguments)
|
||||||
|
{
|
||||||
|
ColumnPtr argument_column = castColumn(argument, converted_argument_type);
|
||||||
|
argument_column = argument_column->convertToFullColumnIfConst();
|
||||||
|
converted_arguments.push_back(argument_column);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto res_column = Traits::ReturnColumnType::create(input_rows_count);
|
||||||
|
auto & result_data = res_column->getData();
|
||||||
|
|
||||||
|
const auto & year_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[0]).getData();
|
||||||
|
const auto & month_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[1]).getData();
|
||||||
|
const auto & day_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[2]).getData();
|
||||||
|
|
||||||
|
const auto & date_lut = DateLUT::instance();
|
||||||
|
|
||||||
|
for (size_t i = 0; i < input_rows_count; ++i)
|
||||||
|
{
|
||||||
|
const auto year = year_data[i];
|
||||||
|
const auto month = month_data[i];
|
||||||
|
const auto day = day_data[i];
|
||||||
|
|
||||||
|
Int32 day_num = 0;
|
||||||
|
|
||||||
|
if (year >= Traits::MIN_YEAR &&
|
||||||
|
year <= Traits::MAX_YEAR &&
|
||||||
|
month >= 1 && month <= 12 &&
|
||||||
|
day >= 1 && day <= 31 &&
|
||||||
|
YearMonthDayToSingleInt(year, month, day) <= Traits::MAX_DATE)
|
||||||
|
{
|
||||||
|
day_num = date_lut.makeDayNum(year, month, day);
|
||||||
|
}
|
||||||
|
|
||||||
|
result_data[i] = day_num;
|
||||||
|
}
|
||||||
|
|
||||||
|
return res_column;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// makeDate(year, month, day)
|
||||||
|
struct MakeDateTraits
|
||||||
|
{
|
||||||
|
static constexpr auto name = "makeDate";
|
||||||
|
using ReturnDataType = DataTypeDate;
|
||||||
|
using ReturnColumnType = ColumnUInt16;
|
||||||
|
|
||||||
|
static constexpr auto MIN_YEAR = 1970;
|
||||||
|
static constexpr auto MAX_YEAR = 2149;
|
||||||
|
// This date has the maximum day number that fits in 16-bit uint
|
||||||
|
static constexpr auto MAX_DATE = YearMonthDayToSingleInt(MAX_YEAR, 6, 6);
|
||||||
|
};
|
||||||
|
|
||||||
|
// makeDate32(year, month, day)
|
||||||
|
struct MakeDate32Traits
|
||||||
|
{
|
||||||
|
static constexpr auto name = "makeDate32";
|
||||||
|
using ReturnDataType = DataTypeDate32;
|
||||||
|
using ReturnColumnType = ColumnInt32;
|
||||||
|
|
||||||
|
static constexpr auto MIN_YEAR = 1925;
|
||||||
|
static constexpr auto MAX_YEAR = 2283;
|
||||||
|
static constexpr auto MAX_DATE = YearMonthDayToSingleInt(MAX_YEAR, 11, 11);
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void registerFunctionsMakeDate(FunctionFactory & factory)
|
||||||
|
{
|
||||||
|
factory.registerFunction<FunctionMakeDate<MakeDateTraits>>();
|
||||||
|
factory.registerFunction<FunctionMakeDate<MakeDate32Traits>>();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -8,6 +8,7 @@ namespace DB
|
|||||||
void registerFunctionsArithmetic(FunctionFactory &);
|
void registerFunctionsArithmetic(FunctionFactory &);
|
||||||
void registerFunctionsArray(FunctionFactory &);
|
void registerFunctionsArray(FunctionFactory &);
|
||||||
void registerFunctionsTuple(FunctionFactory &);
|
void registerFunctionsTuple(FunctionFactory &);
|
||||||
|
void registerFunctionsMakeDate(FunctionFactory &);
|
||||||
void registerFunctionsMap(FunctionFactory &);
|
void registerFunctionsMap(FunctionFactory &);
|
||||||
void registerFunctionsBitmap(FunctionFactory &);
|
void registerFunctionsBitmap(FunctionFactory &);
|
||||||
void registerFunctionsBinaryRepr(FunctionFactory &);
|
void registerFunctionsBinaryRepr(FunctionFactory &);
|
||||||
@ -73,6 +74,7 @@ void registerFunctions()
|
|||||||
registerFunctionsArithmetic(factory);
|
registerFunctionsArithmetic(factory);
|
||||||
registerFunctionsArray(factory);
|
registerFunctionsArray(factory);
|
||||||
registerFunctionsTuple(factory);
|
registerFunctionsTuple(factory);
|
||||||
|
registerFunctionsMakeDate(factory);
|
||||||
registerFunctionsMap(factory);
|
registerFunctionsMap(factory);
|
||||||
registerFunctionsBitmap(factory);
|
registerFunctionsBitmap(factory);
|
||||||
registerFunctionsBinaryRepr(factory);
|
registerFunctionsBinaryRepr(factory);
|
||||||
|
@ -39,6 +39,10 @@ public:
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual ~ReadBufferFromFileDescriptor() override
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
int getFD() const
|
int getFD() const
|
||||||
{
|
{
|
||||||
return fd;
|
return fd;
|
||||||
@ -80,6 +84,9 @@ public:
|
|||||||
{
|
{
|
||||||
use_pread = true;
|
use_pread = true;
|
||||||
}
|
}
|
||||||
|
virtual ~ReadBufferFromFileDescriptorPRead() override
|
||||||
|
{
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -15,4 +15,11 @@ public:
|
|||||||
|
|
||||||
explicit ReadBufferFromString(std::string_view s) : ReadBufferFromMemory(s.data(), s.size()) {}
|
explicit ReadBufferFromString(std::string_view s) : ReadBufferFromMemory(s.data(), s.size()) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class ReadBufferFromOwnString : public String, public ReadBufferFromString
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit ReadBufferFromOwnString(const String & s_): String(s_), ReadBufferFromString(*this) {}
|
||||||
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -663,7 +663,7 @@ public:
|
|||||||
|
|
||||||
Range range{from, to};
|
Range range{from, to};
|
||||||
from = to;
|
from = to;
|
||||||
return std::move(range);
|
return range;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -2,18 +2,15 @@
|
|||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
#include <base/types.h>
|
#include <IO/ReadBuffer.h>
|
||||||
#include <IO/ReadHelpers.h>
|
|
||||||
#include <IO/ReadBufferFromString.h>
|
#include <IO/ReadBufferFromString.h>
|
||||||
|
#include <IO/ReadHelpers.h>
|
||||||
|
#include <base/types.h>
|
||||||
|
|
||||||
|
int readAndPrint(DB::ReadBuffer & in)
|
||||||
int main(int, char **)
|
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
|
|
||||||
DB::ReadBufferFromString in(s);
|
|
||||||
|
|
||||||
DB::Int64 a;
|
DB::Int64 a;
|
||||||
DB::Float64 b;
|
DB::Float64 b;
|
||||||
DB::String c, d;
|
DB::String c, d;
|
||||||
@ -31,12 +28,32 @@ int main(int, char **)
|
|||||||
|
|
||||||
std::cout << a << ' ' << b << ' ' << c << '\t' << '\'' << d << '\'' << std::endl;
|
std::cout << a << ' ' << b << ' ' << c << '\t' << '\'' << d << '\'' << std::endl;
|
||||||
std::cout << in.count() << std::endl;
|
std::cout << in.count() << std::endl;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
catch (const DB::Exception & e)
|
catch (const DB::Exception & e)
|
||||||
{
|
{
|
||||||
std::cerr << e.what() << ", " << e.displayText() << std::endl;
|
std::cerr << e.what() << ", " << e.displayText() << std::endl;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int, char **)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
|
||||||
|
DB::ReadBufferFromString in(s);
|
||||||
|
if (readAndPrint(in))
|
||||||
|
std::cout << "readAndPrint from ReadBufferFromString failed" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::shared_ptr<DB::ReadBufferFromOwnString> in;
|
||||||
|
{
|
||||||
|
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
|
||||||
|
in = std::make_shared<DB::ReadBufferFromOwnString>(s);
|
||||||
|
}
|
||||||
|
if (readAndPrint(*in))
|
||||||
|
std::cout << "readAndPrint from ReadBufferFromOwnString failed" << std::endl;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include <Storages/MarkCache.h>
|
#include <Storages/MarkCache.h>
|
||||||
#include <Storages/StorageMergeTree.h>
|
#include <Storages/StorageMergeTree.h>
|
||||||
#include <Storages/StorageReplicatedMergeTree.h>
|
#include <Storages/StorageReplicatedMergeTree.h>
|
||||||
|
#include <Storages/MergeTree/MergeTreeMetadataCache.h>
|
||||||
#include <IO/UncompressedCache.h>
|
#include <IO/UncompressedCache.h>
|
||||||
#include <IO/MMappedFileCache.h>
|
#include <IO/MMappedFileCache.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
@ -607,6 +608,15 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
{
|
||||||
|
if (auto metadata_cache = getContext()->tryGetMergeTreeMetadataCache())
|
||||||
|
{
|
||||||
|
new_values["MergeTreeMetadataCacheSize"] = metadata_cache->getEstimateNumKeys();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#if USE_EMBEDDED_COMPILER
|
#if USE_EMBEDDED_COMPILER
|
||||||
{
|
{
|
||||||
if (auto * compiled_expression_cache = CompiledExpressionCacheFactory::instance().tryGetCache())
|
if (auto * compiled_expression_cache = CompiledExpressionCacheFactory::instance().tryGetCache())
|
||||||
@ -617,6 +627,7 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
new_values["Uptime"] = getContext()->getUptimeSeconds();
|
new_values["Uptime"] = getContext()->getUptimeSeconds();
|
||||||
|
|
||||||
/// Process process memory usage according to OS
|
/// Process process memory usage according to OS
|
||||||
|
@ -85,12 +85,16 @@
|
|||||||
#include <Storages/MergeTree/BackgroundJobsAssignee.h>
|
#include <Storages/MergeTree/BackgroundJobsAssignee.h>
|
||||||
#include <Storages/MergeTree/MergeTreeBackgroundExecutor.h>
|
#include <Storages/MergeTree/MergeTreeBackgroundExecutor.h>
|
||||||
#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
|
#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
|
||||||
|
#include <Storages/MergeTree/MergeTreeMetadataCache.h>
|
||||||
#include <Interpreters/SynonymsExtensions.h>
|
#include <Interpreters/SynonymsExtensions.h>
|
||||||
#include <Interpreters/Lemmatizers.h>
|
#include <Interpreters/Lemmatizers.h>
|
||||||
#include <Interpreters/ClusterDiscovery.h>
|
#include <Interpreters/ClusterDiscovery.h>
|
||||||
#include <Interpreters/TransactionLog.h>
|
#include <Interpreters/TransactionLog.h>
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
#include <rocksdb/table.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace fs = std::filesystem;
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
@ -280,6 +284,11 @@ struct ContextSharedPart
|
|||||||
|
|
||||||
bool is_server_completely_started = false;
|
bool is_server_completely_started = false;
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
/// Global merge tree metadata cache, stored in rocksdb.
|
||||||
|
MergeTreeMetadataCachePtr merge_tree_metadata_cache;
|
||||||
|
#endif
|
||||||
|
|
||||||
ContextSharedPart()
|
ContextSharedPart()
|
||||||
: access_control(std::make_unique<AccessControl>())
|
: access_control(std::make_unique<AccessControl>())
|
||||||
, global_overcommit_tracker(&process_list)
|
, global_overcommit_tracker(&process_list)
|
||||||
@ -416,6 +425,15 @@ struct ContextSharedPart
|
|||||||
trace_collector.reset();
|
trace_collector.reset();
|
||||||
/// Stop zookeeper connection
|
/// Stop zookeeper connection
|
||||||
zookeeper.reset();
|
zookeeper.reset();
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
/// Shutdown merge tree metadata cache
|
||||||
|
if (merge_tree_metadata_cache)
|
||||||
|
{
|
||||||
|
merge_tree_metadata_cache->shutdown();
|
||||||
|
merge_tree_metadata_cache.reset();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Can be removed w/o context lock
|
/// Can be removed w/o context lock
|
||||||
@ -2056,6 +2074,23 @@ zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const
|
|||||||
return zookeeper->second;
|
return zookeeper->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
MergeTreeMetadataCachePtr Context::getMergeTreeMetadataCache() const
|
||||||
|
{
|
||||||
|
auto cache = tryGetMergeTreeMetadataCache();
|
||||||
|
if (!cache)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Merge tree metadata cache is not initialized, please add config merge_tree_metadata_cache in config.xml and restart");
|
||||||
|
return cache;
|
||||||
|
}
|
||||||
|
|
||||||
|
MergeTreeMetadataCachePtr Context::tryGetMergeTreeMetadataCache() const
|
||||||
|
{
|
||||||
|
return shared->merge_tree_metadata_cache;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void Context::resetZooKeeper() const
|
void Context::resetZooKeeper() const
|
||||||
{
|
{
|
||||||
std::lock_guard lock(shared->zookeeper_mutex);
|
std::lock_guard lock(shared->zookeeper_mutex);
|
||||||
@ -2299,6 +2334,13 @@ void Context::initializeTraceCollector()
|
|||||||
shared->initializeTraceCollector(getTraceLog());
|
shared->initializeTraceCollector(getTraceLog());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
void Context::initializeMergeTreeMetadataCache(const String & dir, size_t size)
|
||||||
|
{
|
||||||
|
shared->merge_tree_metadata_cache = MergeTreeMetadataCache::create(dir, size);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
bool Context::hasTraceCollector() const
|
bool Context::hasTraceCollector() const
|
||||||
{
|
{
|
||||||
return shared->hasTraceCollector();
|
return shared->hasTraceCollector();
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include <base/types.h>
|
#include <base/types.h>
|
||||||
#include <Storages/MergeTree/ParallelReplicasReadingCoordinator.h>
|
#include <Storages/MergeTree/ParallelReplicasReadingCoordinator.h>
|
||||||
|
|
||||||
|
|
||||||
#include "config_core.h"
|
#include "config_core.h"
|
||||||
|
|
||||||
#include <boost/container/flat_set.hpp>
|
#include <boost/container/flat_set.hpp>
|
||||||
@ -156,6 +157,12 @@ using ReadTaskCallback = std::function<String()>;
|
|||||||
|
|
||||||
using MergeTreeReadTaskCallback = std::function<std::optional<PartitionReadResponse>(PartitionReadRequest)>;
|
using MergeTreeReadTaskCallback = std::function<std::optional<PartitionReadResponse>(PartitionReadRequest)>;
|
||||||
|
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
class MergeTreeMetadataCache;
|
||||||
|
using MergeTreeMetadataCachePtr = std::shared_ptr<MergeTreeMetadataCache>;
|
||||||
|
#endif
|
||||||
|
|
||||||
/// An empty interface for an arbitrary object that may be attached by a shared pointer
|
/// An empty interface for an arbitrary object that may be attached by a shared pointer
|
||||||
/// to query context, when using ClickHouse as a library.
|
/// to query context, when using ClickHouse as a library.
|
||||||
struct IHostContext
|
struct IHostContext
|
||||||
@ -183,6 +190,7 @@ private:
|
|||||||
std::unique_ptr<ContextSharedPart> shared;
|
std::unique_ptr<ContextSharedPart> shared;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
/** A set of known objects that can be used in the query.
|
/** A set of known objects that can be used in the query.
|
||||||
* Consists of a shared part (always common to all sessions and queries)
|
* Consists of a shared part (always common to all sessions and queries)
|
||||||
* and copied part (which can be its own for each session or query).
|
* and copied part (which can be its own for each session or query).
|
||||||
@ -692,6 +700,11 @@ public:
|
|||||||
|
|
||||||
UInt32 getZooKeeperSessionUptime() const;
|
UInt32 getZooKeeperSessionUptime() const;
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
MergeTreeMetadataCachePtr getMergeTreeMetadataCache() const;
|
||||||
|
MergeTreeMetadataCachePtr tryGetMergeTreeMetadataCache() const;
|
||||||
|
#endif
|
||||||
|
|
||||||
#if USE_NURAFT
|
#if USE_NURAFT
|
||||||
std::shared_ptr<KeeperDispatcher> & getKeeperDispatcher() const;
|
std::shared_ptr<KeeperDispatcher> & getKeeperDispatcher() const;
|
||||||
#endif
|
#endif
|
||||||
@ -781,6 +794,10 @@ public:
|
|||||||
/// Call after initialization before using trace collector.
|
/// Call after initialization before using trace collector.
|
||||||
void initializeTraceCollector();
|
void initializeTraceCollector();
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
void initializeMergeTreeMetadataCache(const String & dir, size_t size);
|
||||||
|
#endif
|
||||||
|
|
||||||
bool hasTraceCollector() const;
|
bool hasTraceCollector() const;
|
||||||
|
|
||||||
/// Nullptr if the query log is not ready for this moment.
|
/// Nullptr if the query log is not ready for this moment.
|
||||||
|
@ -150,12 +150,12 @@ static ColumnsDescription createColumnsDescription(const NamesAndTypesList & col
|
|||||||
|
|
||||||
ColumnsDescription columns_description;
|
ColumnsDescription columns_description;
|
||||||
|
|
||||||
for (
|
/// FIXME: we could write it like auto [a, b] = std::tuple(x, y),
|
||||||
auto [column_name_and_type, declare_column_ast] = std::tuple{columns_name_and_type.begin(), columns_definition->children.begin()};
|
/// but this produce endless recursion in gcc-11, and leads to SIGSEGV
|
||||||
column_name_and_type != columns_name_and_type.end();
|
/// (see git blame for details).
|
||||||
column_name_and_type++,
|
auto column_name_and_type = columns_name_and_type.begin();
|
||||||
declare_column_ast++
|
auto declare_column_ast = columns_definition->children.begin();
|
||||||
)
|
for (; column_name_and_type != columns_name_and_type.end(); column_name_and_type++, declare_column_ast++)
|
||||||
{
|
{
|
||||||
const auto & declare_column = (*declare_column_ast)->as<MySQLParser::ASTDeclareColumn>();
|
const auto & declare_column = (*declare_column_ast)->as<MySQLParser::ASTDeclareColumn>();
|
||||||
String comment;
|
String comment;
|
||||||
|
@ -192,7 +192,7 @@ private:
|
|||||||
using Result = Element;
|
using Result = Element;
|
||||||
|
|
||||||
static TKey & extractKey(Element & elem) { return elem.value; }
|
static TKey & extractKey(Element & elem) { return elem.value; }
|
||||||
static Element extractResult(Element & elem) { return elem; }
|
static Result extractResult(Element & elem) { return elem; }
|
||||||
};
|
};
|
||||||
|
|
||||||
if constexpr (is_descending)
|
if constexpr (is_descending)
|
||||||
|
@ -384,7 +384,7 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
|
|||||||
for (const auto & name_and_type : log_element_names_and_types)
|
for (const auto & name_and_type : log_element_names_and_types)
|
||||||
log_element_columns.emplace_back(name_and_type.type, name_and_type.name);
|
log_element_columns.emplace_back(name_and_type.type, name_and_type.name);
|
||||||
|
|
||||||
Block block(std::move(log_element_columns));
|
Block block(log_element_columns);
|
||||||
|
|
||||||
MutableColumns columns = block.mutateColumns();
|
MutableColumns columns = block.mutateColumns();
|
||||||
for (const auto & elem : to_flush)
|
for (const auto & elem : to_flush)
|
||||||
|
@ -345,7 +345,10 @@ void replaceWithSumCount(String column_name, ASTFunction & func)
|
|||||||
{
|
{
|
||||||
/// Rewrite "avg" to sumCount().1 / sumCount().2
|
/// Rewrite "avg" to sumCount().1 / sumCount().2
|
||||||
auto new_arg1 = makeASTFunction("tupleElement", func_base, std::make_shared<ASTLiteral>(UInt8(1)));
|
auto new_arg1 = makeASTFunction("tupleElement", func_base, std::make_shared<ASTLiteral>(UInt8(1)));
|
||||||
auto new_arg2 = makeASTFunction("tupleElement", func_base, std::make_shared<ASTLiteral>(UInt8(2)));
|
auto new_arg2 = makeASTFunction("CAST",
|
||||||
|
makeASTFunction("tupleElement", func_base, std::make_shared<ASTLiteral>(UInt8(2))),
|
||||||
|
std::make_shared<ASTLiteral>("Float64"));
|
||||||
|
|
||||||
func.name = "divide";
|
func.name = "divide";
|
||||||
exp_list->children.push_back(new_arg1);
|
exp_list->children.push_back(new_arg1);
|
||||||
exp_list->children.push_back(new_arg2);
|
exp_list->children.push_back(new_arg2);
|
||||||
|
@ -626,6 +626,14 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
|||||||
|
|
||||||
if (async_insert)
|
if (async_insert)
|
||||||
{
|
{
|
||||||
|
quota = context->getQuota();
|
||||||
|
if (quota)
|
||||||
|
{
|
||||||
|
quota->used(QuotaType::QUERY_INSERTS, 1);
|
||||||
|
quota->used(QuotaType::QUERIES, 1);
|
||||||
|
quota->checkExceeded(QuotaType::ERRORS);
|
||||||
|
}
|
||||||
|
|
||||||
queue->push(ast, context);
|
queue->push(ast, context);
|
||||||
|
|
||||||
if (settings.wait_for_async_insert)
|
if (settings.wait_for_async_insert)
|
||||||
@ -636,13 +644,6 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
|||||||
res.pipeline = QueryPipeline(Pipe(std::move(source)));
|
res.pipeline = QueryPipeline(Pipe(std::move(source)));
|
||||||
}
|
}
|
||||||
|
|
||||||
quota = context->getQuota();
|
|
||||||
if (quota)
|
|
||||||
{
|
|
||||||
quota->used(QuotaType::QUERY_INSERTS, 1);
|
|
||||||
quota->used(QuotaType::QUERIES, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto & table_id = insert_query->table_id;
|
const auto & table_id = insert_query->table_id;
|
||||||
if (!table_id.empty())
|
if (!table_id.empty())
|
||||||
context->setInsertionTable(table_id);
|
context->setInsertionTable(table_id);
|
||||||
|
@ -25,7 +25,7 @@ CallbackRunner threadPoolCallbackRunner(ThreadPool & pool)
|
|||||||
/// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed.
|
/// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed.
|
||||||
/// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well.
|
/// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well.
|
||||||
/// When, finally, we destroy the thread (and the ThreadStatus),
|
/// When, finally, we destroy the thread (and the ThreadStatus),
|
||||||
/// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\
|
/// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,
|
||||||
/// and by this time user-level memory tracker may be already destroyed.
|
/// and by this time user-level memory tracker may be already destroyed.
|
||||||
///
|
///
|
||||||
/// As a work-around, reset memory tracker to total, which is always alive.
|
/// As a work-around, reset memory tracker to total, which is always alive.
|
||||||
|
@ -488,7 +488,7 @@ auto WindowTransform::moveRowNumberNoCheck(const RowNumber & _x, int64_t offset)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::tuple{x, offset};
|
return std::tuple<RowNumber, int64_t>{x, offset};
|
||||||
}
|
}
|
||||||
|
|
||||||
auto WindowTransform::moveRowNumber(const RowNumber & _x, int64_t offset) const
|
auto WindowTransform::moveRowNumber(const RowNumber & _x, int64_t offset) const
|
||||||
@ -505,7 +505,7 @@ auto WindowTransform::moveRowNumber(const RowNumber & _x, int64_t offset) const
|
|||||||
assert(oo == 0);
|
assert(oo == 0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return std::tuple{x, o};
|
return std::tuple<RowNumber, int64_t>{x, o};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -122,7 +122,7 @@ void ColumnDescription::readText(ReadBuffer & buf)
|
|||||||
if (col_ast->default_expression)
|
if (col_ast->default_expression)
|
||||||
{
|
{
|
||||||
default_desc.kind = columnDefaultKindFromString(col_ast->default_specifier);
|
default_desc.kind = columnDefaultKindFromString(col_ast->default_specifier);
|
||||||
default_desc.expression = std::move(col_ast->default_expression);
|
default_desc.expression = col_ast->default_expression;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (col_ast->comment)
|
if (col_ast->comment)
|
||||||
|
@ -325,6 +325,7 @@ void URLBasedDataSourceConfiguration::set(const URLBasedDataSourceConfiguration
|
|||||||
compression_method = conf.compression_method;
|
compression_method = conf.compression_method;
|
||||||
structure = conf.structure;
|
structure = conf.structure;
|
||||||
http_method = conf.http_method;
|
http_method = conf.http_method;
|
||||||
|
headers = conf.headers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -364,6 +365,10 @@ std::optional<URLBasedDataSourceConfig> getURLBasedDataSourceConfiguration(const
|
|||||||
{
|
{
|
||||||
configuration.structure = config.getString(config_prefix + ".structure", "");
|
configuration.structure = config.getString(config_prefix + ".structure", "");
|
||||||
}
|
}
|
||||||
|
else if (key == "compression_method")
|
||||||
|
{
|
||||||
|
configuration.compression_method = config.getString(config_prefix + ".compression_method", "");
|
||||||
|
}
|
||||||
else if (key == "headers")
|
else if (key == "headers")
|
||||||
{
|
{
|
||||||
Poco::Util::AbstractConfiguration::Keys header_keys;
|
Poco::Util::AbstractConfiguration::Keys header_keys;
|
||||||
|
@ -114,6 +114,12 @@ struct StorageS3Configuration : URLBasedDataSourceConfiguration
|
|||||||
String secret_access_key;
|
String secret_access_key;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct StorageS3ClusterConfiguration : StorageS3Configuration
|
||||||
|
{
|
||||||
|
String cluster_name;
|
||||||
|
};
|
||||||
|
|
||||||
struct URLBasedDataSourceConfig
|
struct URLBasedDataSourceConfig
|
||||||
{
|
{
|
||||||
URLBasedDataSourceConfiguration configuration;
|
URLBasedDataSourceConfiguration configuration;
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
#include "IMergeTreeDataPart.h"
|
#include "IMergeTreeDataPart.h"
|
||||||
|
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
#include <boost/algorithm/string/join.hpp>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
#include <Core/Defines.h>
|
#include <Core/Defines.h>
|
||||||
#include <IO/HashingWriteBuffer.h>
|
#include <IO/HashingWriteBuffer.h>
|
||||||
|
#include <IO/HashingReadBuffer.h>
|
||||||
#include <IO/ReadBufferFromString.h>
|
#include <IO/ReadBufferFromString.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
@ -11,6 +13,8 @@
|
|||||||
#include <Storages/MergeTree/localBackup.h>
|
#include <Storages/MergeTree/localBackup.h>
|
||||||
#include <Storages/MergeTree/checkDataPart.h>
|
#include <Storages/MergeTree/checkDataPart.h>
|
||||||
#include <Storages/StorageReplicatedMergeTree.h>
|
#include <Storages/StorageReplicatedMergeTree.h>
|
||||||
|
#include <Storages/MergeTree/PartMetadataManagerOrdinary.h>
|
||||||
|
#include <Storages/MergeTree/PartMetadataManagerWithCache.h>
|
||||||
#include <Common/StringUtils/StringUtils.h>
|
#include <Common/StringUtils/StringUtils.h>
|
||||||
#include <Common/escapeForFileName.h>
|
#include <Common/escapeForFileName.h>
|
||||||
#include <Common/CurrentMetrics.h>
|
#include <Common/CurrentMetrics.h>
|
||||||
@ -68,7 +72,7 @@ static std::unique_ptr<ReadBufferFromFileBase> openForReading(const DiskPtr & di
|
|||||||
return disk->readFile(path, ReadSettings().adjustBufferSize(file_size), file_size);
|
return disk->readFile(path, ReadSettings().adjustBufferSize(file_size), file_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const DiskPtr & disk_, const String & part_path)
|
void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const PartMetadataManagerPtr & manager)
|
||||||
{
|
{
|
||||||
auto metadata_snapshot = data.getInMemoryMetadataPtr();
|
auto metadata_snapshot = data.getInMemoryMetadataPtr();
|
||||||
const auto & partition_key = metadata_snapshot->getPartitionKey();
|
const auto & partition_key = metadata_snapshot->getPartitionKey();
|
||||||
@ -80,8 +84,8 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Dis
|
|||||||
hyperrectangle.reserve(minmax_idx_size);
|
hyperrectangle.reserve(minmax_idx_size);
|
||||||
for (size_t i = 0; i < minmax_idx_size; ++i)
|
for (size_t i = 0; i < minmax_idx_size; ++i)
|
||||||
{
|
{
|
||||||
String file_name = fs::path(part_path) / ("minmax_" + escapeForFileName(minmax_column_names[i]) + ".idx");
|
String file_name = "minmax_" + escapeForFileName(minmax_column_names[i]) + ".idx";
|
||||||
auto file = openForReading(disk_, file_name);
|
auto file = manager->read(file_name);
|
||||||
auto serialization = minmax_column_types[i]->getDefaultSerialization();
|
auto serialization = minmax_column_types[i]->getDefaultSerialization();
|
||||||
|
|
||||||
Field min_val;
|
Field min_val;
|
||||||
@ -193,6 +197,19 @@ void IMergeTreeDataPart::MinMaxIndex::merge(const MinMaxIndex & other)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IMergeTreeDataPart::MinMaxIndex::appendFiles(const MergeTreeData & data, Strings & files)
|
||||||
|
{
|
||||||
|
auto metadata_snapshot = data.getInMemoryMetadataPtr();
|
||||||
|
const auto & partition_key = metadata_snapshot->getPartitionKey();
|
||||||
|
auto minmax_column_names = data.getMinMaxColumnsNames(partition_key);
|
||||||
|
size_t minmax_idx_size = minmax_column_names.size();
|
||||||
|
for (size_t i = 0; i < minmax_idx_size; ++i)
|
||||||
|
{
|
||||||
|
String file_name = "minmax_" + escapeForFileName(minmax_column_names[i]) + ".idx";
|
||||||
|
files.push_back(file_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void incrementStateMetric(IMergeTreeDataPart::State state)
|
static void incrementStateMetric(IMergeTreeDataPart::State state)
|
||||||
{
|
{
|
||||||
@ -300,6 +317,7 @@ IMergeTreeDataPart::IMergeTreeDataPart(
|
|||||||
, index_granularity_info(storage_, part_type_)
|
, index_granularity_info(storage_, part_type_)
|
||||||
, part_type(part_type_)
|
, part_type(part_type_)
|
||||||
, parent_part(parent_part_)
|
, parent_part(parent_part_)
|
||||||
|
, use_metadata_cache(storage.use_metadata_cache)
|
||||||
{
|
{
|
||||||
if (parent_part)
|
if (parent_part)
|
||||||
state = State::Active;
|
state = State::Active;
|
||||||
@ -307,6 +325,8 @@ IMergeTreeDataPart::IMergeTreeDataPart(
|
|||||||
incrementTypeMetric(part_type);
|
incrementTypeMetric(part_type);
|
||||||
|
|
||||||
minmax_idx = std::make_shared<MinMaxIndex>();
|
minmax_idx = std::make_shared<MinMaxIndex>();
|
||||||
|
|
||||||
|
initializePartMetadataManager();
|
||||||
}
|
}
|
||||||
|
|
||||||
IMergeTreeDataPart::IMergeTreeDataPart(
|
IMergeTreeDataPart::IMergeTreeDataPart(
|
||||||
@ -325,6 +345,7 @@ IMergeTreeDataPart::IMergeTreeDataPart(
|
|||||||
, index_granularity_info(storage_, part_type_)
|
, index_granularity_info(storage_, part_type_)
|
||||||
, part_type(part_type_)
|
, part_type(part_type_)
|
||||||
, parent_part(parent_part_)
|
, parent_part(parent_part_)
|
||||||
|
, use_metadata_cache(storage.use_metadata_cache)
|
||||||
{
|
{
|
||||||
if (parent_part)
|
if (parent_part)
|
||||||
state = State::Active;
|
state = State::Active;
|
||||||
@ -332,6 +353,8 @@ IMergeTreeDataPart::IMergeTreeDataPart(
|
|||||||
incrementTypeMetric(part_type);
|
incrementTypeMetric(part_type);
|
||||||
|
|
||||||
minmax_idx = std::make_shared<MinMaxIndex>();
|
minmax_idx = std::make_shared<MinMaxIndex>();
|
||||||
|
|
||||||
|
initializePartMetadataManager();
|
||||||
}
|
}
|
||||||
|
|
||||||
IMergeTreeDataPart::~IMergeTreeDataPart()
|
IMergeTreeDataPart::~IMergeTreeDataPart()
|
||||||
@ -639,6 +662,33 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
|
|||||||
loadDefaultCompressionCodec();
|
loadDefaultCompressionCodec();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IMergeTreeDataPart::appendFilesOfColumnsChecksumsIndexes(Strings & files, bool include_projection) const
|
||||||
|
{
|
||||||
|
if (isStoredOnDisk())
|
||||||
|
{
|
||||||
|
appendFilesOfUUID(files);
|
||||||
|
appendFilesOfColumns(files);
|
||||||
|
appendFilesOfChecksums(files);
|
||||||
|
appendFilesOfIndexGranularity(files);
|
||||||
|
appendFilesOfIndex(files);
|
||||||
|
appendFilesOfRowsCount(files);
|
||||||
|
appendFilesOfPartitionAndMinMaxIndex(files);
|
||||||
|
appendFilesOfTTLInfos(files);
|
||||||
|
appendFilesOfDefaultCompressionCodec(files);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!parent_part && include_projection)
|
||||||
|
{
|
||||||
|
for (const auto & [projection_name, projection_part] : projection_parts)
|
||||||
|
{
|
||||||
|
Strings projection_files;
|
||||||
|
projection_part->appendFilesOfColumnsChecksumsIndexes(projection_files, true);
|
||||||
|
for (const auto & projection_file : projection_files)
|
||||||
|
files.push_back(fs::path(projection_part->relative_path) / projection_file);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency)
|
void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency)
|
||||||
{
|
{
|
||||||
auto metadata_snapshot = storage.getInMemoryMetadataPtr();
|
auto metadata_snapshot = storage.getInMemoryMetadataPtr();
|
||||||
@ -659,6 +709,11 @@ void IMergeTreeDataPart::loadIndexGranularity()
|
|||||||
throw Exception("Method 'loadIndexGranularity' is not implemented for part with type " + getType().toString(), ErrorCodes::NOT_IMPLEMENTED);
|
throw Exception("Method 'loadIndexGranularity' is not implemented for part with type " + getType().toString(), ErrorCodes::NOT_IMPLEMENTED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Currently we don't cache mark files of part, because cache other meta files is enough to speed up loading.
|
||||||
|
void IMergeTreeDataPart::appendFilesOfIndexGranularity(Strings & /* files */) const
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
void IMergeTreeDataPart::loadIndex()
|
void IMergeTreeDataPart::loadIndex()
|
||||||
{
|
{
|
||||||
/// It can be empty in case of mutations
|
/// It can be empty in case of mutations
|
||||||
@ -682,9 +737,9 @@ void IMergeTreeDataPart::loadIndex()
|
|||||||
loaded_index[i]->reserve(index_granularity.getMarksCount());
|
loaded_index[i]->reserve(index_granularity.getMarksCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
String index_path = fs::path(getFullRelativePath()) / "primary.idx";
|
String index_name = "primary.idx";
|
||||||
auto index_file = openForReading(volume->getDisk(), index_path);
|
String index_path = fs::path(getFullRelativePath()) / index_name;
|
||||||
|
auto index_file = metadata_manager->read(index_name);
|
||||||
size_t marks_count = index_granularity.getMarksCount();
|
size_t marks_count = index_granularity.getMarksCount();
|
||||||
|
|
||||||
Serializations key_serializations(key_size);
|
Serializations key_serializations(key_size);
|
||||||
@ -711,6 +766,19 @@ void IMergeTreeDataPart::loadIndex()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IMergeTreeDataPart::appendFilesOfIndex(Strings & files) const
|
||||||
|
{
|
||||||
|
auto metadata_snapshot = storage.getInMemoryMetadataPtr();
|
||||||
|
if (parent_part)
|
||||||
|
metadata_snapshot = metadata_snapshot->projections.has(name) ? metadata_snapshot->projections.get(name).metadata : nullptr;
|
||||||
|
|
||||||
|
if (!metadata_snapshot)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (metadata_snapshot->hasPrimaryKey())
|
||||||
|
files.push_back("primary.idx");
|
||||||
|
}
|
||||||
|
|
||||||
NameSet IMergeTreeDataPart::getFileNamesWithoutChecksums() const
|
NameSet IMergeTreeDataPart::getFileNamesWithoutChecksums() const
|
||||||
{
|
{
|
||||||
if (!isStoredOnDisk())
|
if (!isStoredOnDisk())
|
||||||
@ -739,14 +807,14 @@ void IMergeTreeDataPart::loadDefaultCompressionCodec()
|
|||||||
}
|
}
|
||||||
|
|
||||||
String path = fs::path(getFullRelativePath()) / DEFAULT_COMPRESSION_CODEC_FILE_NAME;
|
String path = fs::path(getFullRelativePath()) / DEFAULT_COMPRESSION_CODEC_FILE_NAME;
|
||||||
if (!volume->getDisk()->exists(path))
|
bool exists = metadata_manager->exists(DEFAULT_COMPRESSION_CODEC_FILE_NAME);
|
||||||
|
if (!exists)
|
||||||
{
|
{
|
||||||
default_codec = detectDefaultCompressionCodec();
|
default_codec = detectDefaultCompressionCodec();
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
auto file_buf = metadata_manager->read(DEFAULT_COMPRESSION_CODEC_FILE_NAME);
|
||||||
auto file_buf = openForReading(volume->getDisk(), path);
|
|
||||||
String codec_line;
|
String codec_line;
|
||||||
readEscapedStringUntilEOL(codec_line, *file_buf);
|
readEscapedStringUntilEOL(codec_line, *file_buf);
|
||||||
|
|
||||||
@ -754,7 +822,13 @@ void IMergeTreeDataPart::loadDefaultCompressionCodec()
|
|||||||
|
|
||||||
if (!checkString("CODEC", buf))
|
if (!checkString("CODEC", buf))
|
||||||
{
|
{
|
||||||
LOG_WARNING(storage.log, "Cannot parse default codec for part {} from file {}, content '{}'. Default compression codec will be deduced automatically, from data on disk", name, path, codec_line);
|
LOG_WARNING(
|
||||||
|
storage.log,
|
||||||
|
"Cannot parse default codec for part {} from file {}, content '{}'. Default compression codec will be deduced "
|
||||||
|
"automatically, from data on disk",
|
||||||
|
name,
|
||||||
|
path,
|
||||||
|
codec_line);
|
||||||
default_codec = detectDefaultCompressionCodec();
|
default_codec = detectDefaultCompressionCodec();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -772,6 +846,11 @@ void IMergeTreeDataPart::loadDefaultCompressionCodec()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IMergeTreeDataPart::appendFilesOfDefaultCompressionCodec(Strings & files)
|
||||||
|
{
|
||||||
|
files.push_back(DEFAULT_COMPRESSION_CODEC_FILE_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const
|
CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const
|
||||||
{
|
{
|
||||||
/// In memory parts doesn't have any compression
|
/// In memory parts doesn't have any compression
|
||||||
@ -834,7 +913,7 @@ void IMergeTreeDataPart::loadPartitionAndMinMaxIndex()
|
|||||||
{
|
{
|
||||||
String path = getFullRelativePath();
|
String path = getFullRelativePath();
|
||||||
if (!parent_part)
|
if (!parent_part)
|
||||||
partition.load(storage, volume->getDisk(), path);
|
partition.load(storage, metadata_manager);
|
||||||
|
|
||||||
if (!isEmpty())
|
if (!isEmpty())
|
||||||
{
|
{
|
||||||
@ -842,7 +921,7 @@ void IMergeTreeDataPart::loadPartitionAndMinMaxIndex()
|
|||||||
// projection parts don't have minmax_idx, and it's always initialized
|
// projection parts don't have minmax_idx, and it's always initialized
|
||||||
minmax_idx->initialized = true;
|
minmax_idx->initialized = true;
|
||||||
else
|
else
|
||||||
minmax_idx->load(storage, volume->getDisk(), path);
|
minmax_idx->load(storage, metadata_manager);
|
||||||
}
|
}
|
||||||
if (parent_part)
|
if (parent_part)
|
||||||
return;
|
return;
|
||||||
@ -857,13 +936,26 @@ void IMergeTreeDataPart::loadPartitionAndMinMaxIndex()
|
|||||||
ErrorCodes::CORRUPTED_DATA);
|
ErrorCodes::CORRUPTED_DATA);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IMergeTreeDataPart::appendFilesOfPartitionAndMinMaxIndex(Strings & files) const
|
||||||
|
{
|
||||||
|
if (storage.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING && !parent_part)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!parent_part)
|
||||||
|
partition.appendFiles(storage, files);
|
||||||
|
|
||||||
|
if (!isEmpty())
|
||||||
|
if (!parent_part)
|
||||||
|
minmax_idx->appendFiles(storage, files);
|
||||||
|
}
|
||||||
|
|
||||||
void IMergeTreeDataPart::loadChecksums(bool require)
|
void IMergeTreeDataPart::loadChecksums(bool require)
|
||||||
{
|
{
|
||||||
const String path = fs::path(getFullRelativePath()) / "checksums.txt";
|
const String path = fs::path(getFullRelativePath()) / "checksums.txt";
|
||||||
|
bool exists = metadata_manager->exists("checksums.txt");
|
||||||
if (volume->getDisk()->exists(path))
|
if (exists)
|
||||||
{
|
{
|
||||||
auto buf = openForReading(volume->getDisk(), path);
|
auto buf = metadata_manager->read("checksums.txt");
|
||||||
if (checksums.read(*buf))
|
if (checksums.read(*buf))
|
||||||
{
|
{
|
||||||
assertEOF(*buf);
|
assertEOF(*buf);
|
||||||
@ -894,13 +986,18 @@ void IMergeTreeDataPart::loadChecksums(bool require)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IMergeTreeDataPart::appendFilesOfChecksums(Strings & files)
|
||||||
|
{
|
||||||
|
files.push_back("checksums.txt");
|
||||||
|
}
|
||||||
|
|
||||||
void IMergeTreeDataPart::loadRowsCount()
|
void IMergeTreeDataPart::loadRowsCount()
|
||||||
{
|
{
|
||||||
String path = fs::path(getFullRelativePath()) / "count.txt";
|
String path = fs::path(getFullRelativePath()) / "count.txt";
|
||||||
|
|
||||||
auto read_rows_count = [&]()
|
auto read_rows_count = [&]()
|
||||||
{
|
{
|
||||||
auto buf = openForReading(volume->getDisk(), path);
|
auto buf = metadata_manager->read("count.txt");
|
||||||
readIntText(rows_count, *buf);
|
readIntText(rows_count, *buf);
|
||||||
assertEOF(*buf);
|
assertEOF(*buf);
|
||||||
};
|
};
|
||||||
@ -911,7 +1008,8 @@ void IMergeTreeDataPart::loadRowsCount()
|
|||||||
}
|
}
|
||||||
else if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || part_type == Type::COMPACT || parent_part)
|
else if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING || part_type == Type::COMPACT || parent_part)
|
||||||
{
|
{
|
||||||
if (!volume->getDisk()->exists(path))
|
bool exists = metadata_manager->exists("count.txt");
|
||||||
|
if (!exists)
|
||||||
throw Exception("No count.txt in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
|
throw Exception("No count.txt in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART);
|
||||||
|
|
||||||
read_rows_count();
|
read_rows_count();
|
||||||
@ -1009,12 +1107,17 @@ void IMergeTreeDataPart::loadRowsCount()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IMergeTreeDataPart::appendFilesOfRowsCount(Strings & files)
|
||||||
|
{
|
||||||
|
files.push_back("count.txt");
|
||||||
|
}
|
||||||
|
|
||||||
void IMergeTreeDataPart::loadTTLInfos()
|
void IMergeTreeDataPart::loadTTLInfos()
|
||||||
{
|
{
|
||||||
String path = fs::path(getFullRelativePath()) / "ttl.txt";
|
bool exists = metadata_manager->exists("ttl.txt");
|
||||||
if (volume->getDisk()->exists(path))
|
if (exists)
|
||||||
{
|
{
|
||||||
auto in = openForReading(volume->getDisk(), path);
|
auto in = metadata_manager->read("ttl.txt");
|
||||||
assertString("ttl format version: ", *in);
|
assertString("ttl format version: ", *in);
|
||||||
size_t format_version;
|
size_t format_version;
|
||||||
readText(format_version, *in);
|
readText(format_version, *in);
|
||||||
@ -1036,19 +1139,29 @@ void IMergeTreeDataPart::loadTTLInfos()
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void IMergeTreeDataPart::appendFilesOfTTLInfos(Strings & files)
|
||||||
|
{
|
||||||
|
files.push_back("ttl.txt");
|
||||||
|
}
|
||||||
|
|
||||||
void IMergeTreeDataPart::loadUUID()
|
void IMergeTreeDataPart::loadUUID()
|
||||||
{
|
{
|
||||||
String path = fs::path(getFullRelativePath()) / UUID_FILE_NAME;
|
bool exists = metadata_manager->exists(UUID_FILE_NAME);
|
||||||
|
if (exists)
|
||||||
if (volume->getDisk()->exists(path))
|
|
||||||
{
|
{
|
||||||
auto in = openForReading(volume->getDisk(), path);
|
auto in = metadata_manager->read(UUID_FILE_NAME);
|
||||||
readText(uuid, *in);
|
readText(uuid, *in);
|
||||||
if (uuid == UUIDHelpers::Nil)
|
if (uuid == UUIDHelpers::Nil)
|
||||||
throw Exception("Unexpected empty " + String(UUID_FILE_NAME) + " in part: " + name, ErrorCodes::LOGICAL_ERROR);
|
throw Exception("Unexpected empty " + String(UUID_FILE_NAME) + " in part: " + name, ErrorCodes::LOGICAL_ERROR);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IMergeTreeDataPart::appendFilesOfUUID(Strings & files)
|
||||||
|
{
|
||||||
|
files.push_back(UUID_FILE_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
void IMergeTreeDataPart::loadColumns(bool require)
|
void IMergeTreeDataPart::loadColumns(bool require)
|
||||||
{
|
{
|
||||||
String path = fs::path(getFullRelativePath()) / "columns.txt";
|
String path = fs::path(getFullRelativePath()) / "columns.txt";
|
||||||
@ -1057,7 +1170,8 @@ void IMergeTreeDataPart::loadColumns(bool require)
|
|||||||
metadata_snapshot = metadata_snapshot->projections.get(name).metadata;
|
metadata_snapshot = metadata_snapshot->projections.get(name).metadata;
|
||||||
NamesAndTypesList loaded_columns;
|
NamesAndTypesList loaded_columns;
|
||||||
|
|
||||||
if (!volume->getDisk()->exists(path))
|
bool exists = metadata_manager->exists("columns.txt");
|
||||||
|
if (!exists)
|
||||||
{
|
{
|
||||||
/// We can get list of columns only from columns.txt in compact parts.
|
/// We can get list of columns only from columns.txt in compact parts.
|
||||||
if (require || part_type == Type::COMPACT)
|
if (require || part_type == Type::COMPACT)
|
||||||
@ -1080,7 +1194,8 @@ void IMergeTreeDataPart::loadColumns(bool require)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
loaded_columns.readText(*volume->getDisk()->readFile(path));
|
auto in = metadata_manager->read("columns.txt");
|
||||||
|
loaded_columns.readText(*in);
|
||||||
|
|
||||||
for (const auto & column : loaded_columns)
|
for (const auto & column : loaded_columns)
|
||||||
{
|
{
|
||||||
@ -1097,9 +1212,12 @@ void IMergeTreeDataPart::loadColumns(bool require)
|
|||||||
};
|
};
|
||||||
|
|
||||||
SerializationInfoByName infos(loaded_columns, settings);
|
SerializationInfoByName infos(loaded_columns, settings);
|
||||||
path = getFullRelativePath() + SERIALIZATION_FILE_NAME;
|
exists = metadata_manager->exists(SERIALIZATION_FILE_NAME);
|
||||||
if (volume->getDisk()->exists(path))
|
if (exists)
|
||||||
infos.readJSON(*volume->getDisk()->readFile(path));
|
{
|
||||||
|
auto in = metadata_manager->read(SERIALIZATION_FILE_NAME);
|
||||||
|
infos.readJSON(*in);
|
||||||
|
}
|
||||||
|
|
||||||
setColumns(loaded_columns);
|
setColumns(loaded_columns);
|
||||||
setSerializationInfos(infos);
|
setSerializationInfos(infos);
|
||||||
@ -1317,6 +1435,12 @@ bool IMergeTreeDataPart::assertHasValidVersionMetadata() const
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void IMergeTreeDataPart::appendFilesOfColumns(Strings & files)
|
||||||
|
{
|
||||||
|
files.push_back("columns.txt");
|
||||||
|
files.push_back(SERIALIZATION_FILE_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
bool IMergeTreeDataPart::shallParticipateInMerges(const StoragePolicyPtr & storage_policy) const
|
bool IMergeTreeDataPart::shallParticipateInMerges(const StoragePolicyPtr & storage_policy) const
|
||||||
{
|
{
|
||||||
/// `IMergeTreeDataPart::volume` describes space where current part belongs, and holds
|
/// `IMergeTreeDataPart::volume` describes space where current part belongs, and holds
|
||||||
@ -1368,9 +1492,12 @@ try
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metadata_manager->deleteAll(true);
|
||||||
|
metadata_manager->assertAllDeleted(true);
|
||||||
volume->getDisk()->setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr)));
|
volume->getDisk()->setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr)));
|
||||||
volume->getDisk()->moveDirectory(from, to);
|
volume->getDisk()->moveDirectory(from, to);
|
||||||
relative_path = new_relative_path;
|
relative_path = new_relative_path;
|
||||||
|
metadata_manager->updateAll(true);
|
||||||
|
|
||||||
SyncGuardPtr sync_guard;
|
SyncGuardPtr sync_guard;
|
||||||
if (storage.getSettings()->fsync_part_directory)
|
if (storage.getSettings()->fsync_part_directory)
|
||||||
@ -1408,6 +1535,18 @@ std::optional<bool> IMergeTreeDataPart::keepSharedDataInDecoupledStorage() const
|
|||||||
return !storage.unlockSharedData(*this);
|
return !storage.unlockSharedData(*this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void IMergeTreeDataPart::initializePartMetadataManager()
|
||||||
|
{
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
if (use_metadata_cache)
|
||||||
|
metadata_manager = std::make_shared<PartMetadataManagerWithCache>(this, storage.getContext()->getMergeTreeMetadataCache());
|
||||||
|
else
|
||||||
|
metadata_manager = std::make_shared<PartMetadataManagerOrdinary>(this);
|
||||||
|
#else
|
||||||
|
metadata_manager = std::make_shared<PartMetadataManagerOrdinary>(this);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void IMergeTreeDataPart::remove() const
|
void IMergeTreeDataPart::remove() const
|
||||||
{
|
{
|
||||||
assert(assertHasValidVersionMetadata());
|
assert(assertHasValidVersionMetadata());
|
||||||
@ -1430,6 +1569,9 @@ void IMergeTreeDataPart::remove() const
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metadata_manager->deleteAll(false);
|
||||||
|
metadata_manager->assertAllDeleted(false);
|
||||||
|
|
||||||
/** Atomic directory removal:
|
/** Atomic directory removal:
|
||||||
* - rename directory to temporary name;
|
* - rename directory to temporary name;
|
||||||
* - remove it recursive.
|
* - remove it recursive.
|
||||||
@ -1536,6 +1678,9 @@ void IMergeTreeDataPart::remove() const
|
|||||||
|
|
||||||
void IMergeTreeDataPart::projectionRemove(const String & parent_to, bool keep_shared_data) const
|
void IMergeTreeDataPart::projectionRemove(const String & parent_to, bool keep_shared_data) const
|
||||||
{
|
{
|
||||||
|
metadata_manager->deleteAll(false);
|
||||||
|
metadata_manager->assertAllDeleted(false);
|
||||||
|
|
||||||
String to = fs::path(parent_to) / relative_path;
|
String to = fs::path(parent_to) / relative_path;
|
||||||
auto disk = volume->getDisk();
|
auto disk = volume->getDisk();
|
||||||
if (checksums.empty())
|
if (checksums.empty())
|
||||||
@ -1883,6 +2028,35 @@ String IMergeTreeDataPart::getZeroLevelPartBlockID(std::string_view token) const
|
|||||||
return info.partition_id + "_" + toString(hash_value.words[0]) + "_" + toString(hash_value.words[1]);
|
return info.partition_id + "_" + toString(hash_value.words[0]) + "_" + toString(hash_value.words[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IMergeTreeDataPart::uint128 IMergeTreeDataPart::getActualChecksumByFile(const String & file_path) const
|
||||||
|
{
|
||||||
|
assert(use_metadata_cache);
|
||||||
|
|
||||||
|
String file_name = std::filesystem::path(file_path).filename();
|
||||||
|
const auto filenames_without_checksums = getFileNamesWithoutChecksums();
|
||||||
|
auto it = checksums.files.find(file_name);
|
||||||
|
if (filenames_without_checksums.count(file_name) == 0 && it != checksums.files.end())
|
||||||
|
{
|
||||||
|
return it->second.file_hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!volume->getDisk()->exists(file_path))
|
||||||
|
{
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
std::unique_ptr<ReadBufferFromFileBase> in_file = volume->getDisk()->readFile(file_path);
|
||||||
|
HashingReadBuffer in_hash(*in_file);
|
||||||
|
|
||||||
|
String value;
|
||||||
|
readStringUntilEOF(value, in_hash);
|
||||||
|
return in_hash.getHash();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unordered_map<String, IMergeTreeDataPart::uint128> IMergeTreeDataPart::checkMetadata() const
|
||||||
|
{
|
||||||
|
return metadata_manager->check();
|
||||||
|
}
|
||||||
|
|
||||||
bool isCompactPart(const MergeTreeDataPartPtr & data_part)
|
bool isCompactPart(const MergeTreeDataPartPtr & data_part)
|
||||||
{
|
{
|
||||||
return (data_part && data_part->getType() == MergeTreeDataPartType::COMPACT);
|
return (data_part && data_part->getType() == MergeTreeDataPartType::COMPACT);
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include <Storages/MergeTree/KeyCondition.h>
|
#include <Storages/MergeTree/KeyCondition.h>
|
||||||
#include <Interpreters/TransactionVersionMetadata.h>
|
#include <Interpreters/TransactionVersionMetadata.h>
|
||||||
#include <DataTypes/Serializations/SerializationInfo.h>
|
#include <DataTypes/Serializations/SerializationInfo.h>
|
||||||
|
#include <Storages/MergeTree/IPartMetadataManager.h>
|
||||||
|
|
||||||
#include <shared_mutex>
|
#include <shared_mutex>
|
||||||
|
|
||||||
@ -62,6 +63,8 @@ public:
|
|||||||
|
|
||||||
using Type = MergeTreeDataPartType;
|
using Type = MergeTreeDataPartType;
|
||||||
|
|
||||||
|
using uint128 = IPartMetadataManager::uint128;
|
||||||
|
|
||||||
|
|
||||||
IMergeTreeDataPart(
|
IMergeTreeDataPart(
|
||||||
const MergeTreeData & storage_,
|
const MergeTreeData & storage_,
|
||||||
@ -150,6 +153,7 @@ public:
|
|||||||
/// Initialize columns (from columns.txt if exists, or create from column files if not).
|
/// Initialize columns (from columns.txt if exists, or create from column files if not).
|
||||||
/// Load checksums from checksums.txt if exists. Load index if required.
|
/// Load checksums from checksums.txt if exists. Load index if required.
|
||||||
void loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency);
|
void loadColumnsChecksumsIndexes(bool require_columns_checksums, bool check_consistency);
|
||||||
|
void appendFilesOfColumnsChecksumsIndexes(Strings & files, bool include_projection = false) const;
|
||||||
|
|
||||||
String getMarksFileExtension() const { return index_granularity_info.marks_file_extension; }
|
String getMarksFileExtension() const { return index_granularity_info.marks_file_extension; }
|
||||||
|
|
||||||
@ -245,7 +249,7 @@ public:
|
|||||||
using TTLInfo = MergeTreeDataPartTTLInfo;
|
using TTLInfo = MergeTreeDataPartTTLInfo;
|
||||||
using TTLInfos = MergeTreeDataPartTTLInfos;
|
using TTLInfos = MergeTreeDataPartTTLInfos;
|
||||||
|
|
||||||
TTLInfos ttl_infos;
|
mutable TTLInfos ttl_infos;
|
||||||
|
|
||||||
/// Current state of the part. If the part is in working set already, it should be accessed via data_parts mutex
|
/// Current state of the part. If the part is in working set already, it should be accessed via data_parts mutex
|
||||||
void setState(State new_state) const;
|
void setState(State new_state) const;
|
||||||
@ -302,14 +306,16 @@ public:
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void load(const MergeTreeData & data, const PartMetadataManagerPtr & manager);
|
||||||
|
|
||||||
using WrittenFiles = std::vector<std::unique_ptr<WriteBufferFromFileBase>>;
|
using WrittenFiles = std::vector<std::unique_ptr<WriteBufferFromFileBase>>;
|
||||||
|
|
||||||
void load(const MergeTreeData & data, const DiskPtr & disk_, const String & part_path);
|
|
||||||
[[nodiscard]] WrittenFiles store(const MergeTreeData & data, const DiskPtr & disk_, const String & part_path, Checksums & checksums) const;
|
[[nodiscard]] WrittenFiles store(const MergeTreeData & data, const DiskPtr & disk_, const String & part_path, Checksums & checksums) const;
|
||||||
[[nodiscard]] WrittenFiles store(const Names & column_names, const DataTypes & data_types, const DiskPtr & disk_, const String & part_path, Checksums & checksums) const;
|
[[nodiscard]] WrittenFiles store(const Names & column_names, const DataTypes & data_types, const DiskPtr & disk_, const String & part_path, Checksums & checksums) const;
|
||||||
|
|
||||||
void update(const Block & block, const Names & column_names);
|
void update(const Block & block, const Names & column_names);
|
||||||
void merge(const MinMaxIndex & other);
|
void merge(const MinMaxIndex & other);
|
||||||
|
static void appendFiles(const MergeTreeData & data, Strings & files);
|
||||||
};
|
};
|
||||||
|
|
||||||
using MinMaxIndexPtr = std::shared_ptr<MinMaxIndex>;
|
using MinMaxIndexPtr = std::shared_ptr<MinMaxIndex>;
|
||||||
@ -460,6 +466,13 @@ public:
|
|||||||
/// Required for keep data on remote FS when part has shadow copies.
|
/// Required for keep data on remote FS when part has shadow copies.
|
||||||
UInt32 getNumberOfRefereneces() const;
|
UInt32 getNumberOfRefereneces() const;
|
||||||
|
|
||||||
|
/// Get checksums of metadata file in part directory
|
||||||
|
IMergeTreeDataPart::uint128 getActualChecksumByFile(const String & file_path) const;
|
||||||
|
|
||||||
|
/// Check metadata in cache is consistent with actual metadata on disk(if use_metadata_cache is true)
|
||||||
|
std::unordered_map<String, uint128> checkMetadata() const;
|
||||||
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
/// Total size of all columns, calculated once in calcuateColumnSizesOnDisk
|
/// Total size of all columns, calculated once in calcuateColumnSizesOnDisk
|
||||||
@ -486,6 +499,11 @@ protected:
|
|||||||
|
|
||||||
std::map<String, std::shared_ptr<IMergeTreeDataPart>> projection_parts;
|
std::map<String, std::shared_ptr<IMergeTreeDataPart>> projection_parts;
|
||||||
|
|
||||||
|
/// Disabled when USE_ROCKSDB is OFF or use_metadata_cache is set to false in merge tree settings
|
||||||
|
bool use_metadata_cache = false;
|
||||||
|
|
||||||
|
mutable PartMetadataManagerPtr metadata_manager;
|
||||||
|
|
||||||
void removeIfNeeded();
|
void removeIfNeeded();
|
||||||
|
|
||||||
virtual void checkConsistency(bool require_part_metadata) const;
|
virtual void checkConsistency(bool require_part_metadata) const;
|
||||||
@ -499,6 +517,9 @@ protected:
|
|||||||
|
|
||||||
std::optional<bool> keepSharedDataInDecoupledStorage() const;
|
std::optional<bool> keepSharedDataInDecoupledStorage() const;
|
||||||
|
|
||||||
|
void initializePartMetadataManager();
|
||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// In compact parts order of columns is necessary
|
/// In compact parts order of columns is necessary
|
||||||
NameToNumber column_name_to_position;
|
NameToNumber column_name_to_position;
|
||||||
@ -509,36 +530,54 @@ private:
|
|||||||
/// Reads part unique identifier (if exists) from uuid.txt
|
/// Reads part unique identifier (if exists) from uuid.txt
|
||||||
void loadUUID();
|
void loadUUID();
|
||||||
|
|
||||||
|
static void appendFilesOfUUID(Strings & files);
|
||||||
|
|
||||||
/// Reads columns names and types from columns.txt
|
/// Reads columns names and types from columns.txt
|
||||||
void loadColumns(bool require);
|
void loadColumns(bool require);
|
||||||
|
|
||||||
|
static void appendFilesOfColumns(Strings & files);
|
||||||
|
|
||||||
/// If checksums.txt exists, reads file's checksums (and sizes) from it
|
/// If checksums.txt exists, reads file's checksums (and sizes) from it
|
||||||
void loadChecksums(bool require);
|
void loadChecksums(bool require);
|
||||||
|
|
||||||
|
static void appendFilesOfChecksums(Strings & files);
|
||||||
|
|
||||||
/// Loads marks index granularity into memory
|
/// Loads marks index granularity into memory
|
||||||
virtual void loadIndexGranularity();
|
virtual void loadIndexGranularity();
|
||||||
|
|
||||||
|
virtual void appendFilesOfIndexGranularity(Strings & files) const;
|
||||||
|
|
||||||
/// Loads index file.
|
/// Loads index file.
|
||||||
void loadIndex();
|
void loadIndex();
|
||||||
|
|
||||||
|
void appendFilesOfIndex(Strings & files) const;
|
||||||
|
|
||||||
/// Load rows count for this part from disk (for the newer storage format version).
|
/// Load rows count for this part from disk (for the newer storage format version).
|
||||||
/// For the older format version calculates rows count from the size of a column with a fixed size.
|
/// For the older format version calculates rows count from the size of a column with a fixed size.
|
||||||
void loadRowsCount();
|
void loadRowsCount();
|
||||||
|
|
||||||
|
static void appendFilesOfRowsCount(Strings & files);
|
||||||
|
|
||||||
/// Loads ttl infos in json format from file ttl.txt. If file doesn't exists assigns ttl infos with all zeros
|
/// Loads ttl infos in json format from file ttl.txt. If file doesn't exists assigns ttl infos with all zeros
|
||||||
void loadTTLInfos();
|
void loadTTLInfos();
|
||||||
|
|
||||||
|
static void appendFilesOfTTLInfos(Strings & files);
|
||||||
|
|
||||||
void loadPartitionAndMinMaxIndex();
|
void loadPartitionAndMinMaxIndex();
|
||||||
|
|
||||||
void calculateColumnsSizesOnDisk();
|
void calculateColumnsSizesOnDisk();
|
||||||
|
|
||||||
void calculateSecondaryIndicesSizesOnDisk();
|
void calculateSecondaryIndicesSizesOnDisk();
|
||||||
|
|
||||||
|
void appendFilesOfPartitionAndMinMaxIndex(Strings & files) const;
|
||||||
|
|
||||||
/// Load default compression codec from file default_compression_codec.txt
|
/// Load default compression codec from file default_compression_codec.txt
|
||||||
/// if it not exists tries to deduce codec from compressed column without
|
/// if it not exists tries to deduce codec from compressed column without
|
||||||
/// any specifial compression.
|
/// any specifial compression.
|
||||||
void loadDefaultCompressionCodec();
|
void loadDefaultCompressionCodec();
|
||||||
|
|
||||||
|
static void appendFilesOfDefaultCompressionCodec(Strings & files);
|
||||||
|
|
||||||
/// Found column without specific compression and return codec
|
/// Found column without specific compression and return codec
|
||||||
/// for this column with default parameters.
|
/// for this column with default parameters.
|
||||||
CompressionCodecPtr detectDefaultCompressionCodec() const;
|
CompressionCodecPtr detectDefaultCompressionCodec() const;
|
||||||
|
11
src/Storages/MergeTree/IPartMetadataManager.cpp
Normal file
11
src/Storages/MergeTree/IPartMetadataManager.cpp
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
#include "IPartMetadataManager.h"
|
||||||
|
|
||||||
|
#include <Disks/IVolume.h>
|
||||||
|
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
IPartMetadataManager::IPartMetadataManager(const IMergeTreeDataPart * part_) : part(part_), disk(part->volume->getDisk())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
}
|
59
src/Storages/MergeTree/IPartMetadataManager.h
Normal file
59
src/Storages/MergeTree/IPartMetadataManager.h
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <city.h>
|
||||||
|
#include <base/types.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
class IMergeTreeDataPart;
|
||||||
|
|
||||||
|
class SeekableReadBuffer;
|
||||||
|
|
||||||
|
class IDisk;
|
||||||
|
using DiskPtr = std::shared_ptr<IDisk>;
|
||||||
|
|
||||||
|
/// Interface for managing metadata of merge tree part.
|
||||||
|
/// IPartMetadataManager has two implementations:
|
||||||
|
/// - PartMetadataManagerOrdinary: manage metadata from disk directly. deleteAll/assertAllDeleted/updateAll/check
|
||||||
|
/// are all empty implementations because they are not needed for PartMetadataManagerOrdinary(those operations
|
||||||
|
/// are done implicitly when removing or renaming part directory).
|
||||||
|
/// - PartMetadataManagerWithCache: manage metadata from RocksDB cache and disk.
|
||||||
|
class IPartMetadataManager
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using uint128 = CityHash_v1_0_2::uint128;
|
||||||
|
|
||||||
|
explicit IPartMetadataManager(const IMergeTreeDataPart * part_);
|
||||||
|
|
||||||
|
virtual ~IPartMetadataManager() = default;
|
||||||
|
|
||||||
|
/// Read metadata content and return SeekableReadBuffer object.
|
||||||
|
virtual std::unique_ptr<SeekableReadBuffer> read(const String & file_name) const = 0;
|
||||||
|
|
||||||
|
/// Return true if metadata exists in part.
|
||||||
|
virtual bool exists(const String & file_name) const = 0;
|
||||||
|
|
||||||
|
/// Delete all metadatas in part.
|
||||||
|
/// If include_projection is true, also delete metadatas in projection parts.
|
||||||
|
virtual void deleteAll(bool include_projection) = 0;
|
||||||
|
|
||||||
|
/// Assert that all metadatas in part are deleted.
|
||||||
|
/// If include_projection is true, also assert that all metadatas in projection parts are deleted.
|
||||||
|
virtual void assertAllDeleted(bool include_projection) const = 0;
|
||||||
|
|
||||||
|
/// Update all metadatas in part.
|
||||||
|
/// If include_projection is true, also update metadatas in projection parts.
|
||||||
|
virtual void updateAll(bool include_projection) = 0;
|
||||||
|
|
||||||
|
/// Check all metadatas in part.
|
||||||
|
virtual std::unordered_map<String, uint128> check() const = 0;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const IMergeTreeDataPart * part;
|
||||||
|
const DiskPtr disk;
|
||||||
|
};
|
||||||
|
|
||||||
|
using PartMetadataManagerPtr = std::shared_ptr<IPartMetadataManager>;
|
||||||
|
}
|
@ -53,7 +53,7 @@ String Range::toString() const
|
|||||||
|
|
||||||
|
|
||||||
/// Example: for `Hello\_World% ...` string it returns `Hello_World`, and for `%test%` returns an empty string.
|
/// Example: for `Hello\_World% ...` string it returns `Hello_World`, and for `%test%` returns an empty string.
|
||||||
static String extractFixedPrefixFromLikePattern(const String & like_pattern)
|
String extractFixedPrefixFromLikePattern(const String & like_pattern)
|
||||||
{
|
{
|
||||||
String fixed_prefix;
|
String fixed_prefix;
|
||||||
|
|
||||||
|
@ -442,4 +442,6 @@ private:
|
|||||||
bool strict;
|
bool strict;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
String extractFixedPrefixFromLikePattern(const String & like_pattern);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -214,6 +214,7 @@ MergeTreeData::MergeTreeData(
|
|||||||
, parts_mover(this)
|
, parts_mover(this)
|
||||||
, background_operations_assignee(*this, BackgroundJobsAssignee::Type::DataProcessing, getContext())
|
, background_operations_assignee(*this, BackgroundJobsAssignee::Type::DataProcessing, getContext())
|
||||||
, background_moves_assignee(*this, BackgroundJobsAssignee::Type::Moving, getContext())
|
, background_moves_assignee(*this, BackgroundJobsAssignee::Type::Moving, getContext())
|
||||||
|
, use_metadata_cache(getSettings()->use_metadata_cache)
|
||||||
{
|
{
|
||||||
context_->getGlobalContext()->initializeBackgroundExecutorsIfNeeded();
|
context_->getGlobalContext()->initializeBackgroundExecutorsIfNeeded();
|
||||||
|
|
||||||
@ -333,6 +334,11 @@ MergeTreeData::MergeTreeData(
|
|||||||
LOG_WARNING(log, "{} Settings 'min_rows_for_wide_part', 'min_bytes_for_wide_part', "
|
LOG_WARNING(log, "{} Settings 'min_rows_for_wide_part', 'min_bytes_for_wide_part', "
|
||||||
"'min_rows_for_compact_part' and 'min_bytes_for_compact_part' will be ignored.", reason);
|
"'min_rows_for_compact_part' and 'min_bytes_for_compact_part' will be ignored.", reason);
|
||||||
|
|
||||||
|
#if !USE_ROCKSDB
|
||||||
|
if (use_metadata_cache)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't use merge tree metadata cache if clickhouse was compiled without rocksdb");
|
||||||
|
#endif
|
||||||
|
|
||||||
common_assignee_trigger = [this] (bool delay) noexcept
|
common_assignee_trigger = [this] (bool delay) noexcept
|
||||||
{
|
{
|
||||||
if (delay)
|
if (delay)
|
||||||
@ -1476,7 +1482,6 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
|
|||||||
LOG_DEBUG(log, "Loaded data parts ({} items)", data_parts_indexes.size());
|
LOG_DEBUG(log, "Loaded data parts ({} items)", data_parts_indexes.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Is the part directory old.
|
/// Is the part directory old.
|
||||||
/// True if its modification time and the modification time of all files inside it is less then threshold.
|
/// True if its modification time and the modification time of all files inside it is less then threshold.
|
||||||
/// (Only files on the first level of nesting are considered).
|
/// (Only files on the first level of nesting are considered).
|
||||||
|
@ -969,6 +969,7 @@ protected:
|
|||||||
friend class StorageReplicatedMergeTree;
|
friend class StorageReplicatedMergeTree;
|
||||||
friend class MergeTreeDataWriter;
|
friend class MergeTreeDataWriter;
|
||||||
friend class MergeTask;
|
friend class MergeTask;
|
||||||
|
friend class IPartMetadataManager;
|
||||||
|
|
||||||
bool require_part_metadata;
|
bool require_part_metadata;
|
||||||
|
|
||||||
@ -1054,6 +1055,7 @@ protected:
|
|||||||
/// And for ReplicatedMergeTree we don't have LogEntry type for this operation.
|
/// And for ReplicatedMergeTree we don't have LogEntry type for this operation.
|
||||||
BackgroundJobsAssignee background_operations_assignee;
|
BackgroundJobsAssignee background_operations_assignee;
|
||||||
BackgroundJobsAssignee background_moves_assignee;
|
BackgroundJobsAssignee background_moves_assignee;
|
||||||
|
bool use_metadata_cache;
|
||||||
|
|
||||||
/// Strongly connected with two fields above.
|
/// Strongly connected with two fields above.
|
||||||
/// Every task that is finished will ask to assign a new one into an executor.
|
/// Every task that is finished will ask to assign a new one into an executor.
|
||||||
|
@ -69,6 +69,7 @@ private:
|
|||||||
ColumnSize getColumnSizeImpl(const NameAndTypePair & column, std::unordered_set<String> * processed_substreams) const;
|
ColumnSize getColumnSizeImpl(const NameAndTypePair & column, std::unordered_set<String> * processed_substreams) const;
|
||||||
|
|
||||||
void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const override;
|
void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const override;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
107
src/Storages/MergeTree/MergeTreeMetadataCache.cpp
Normal file
107
src/Storages/MergeTree/MergeTreeMetadataCache.cpp
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
#include "MergeTreeMetadataCache.h"
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
#include <Common/ProfileEvents.h>
|
||||||
|
#include <base/logger_useful.h>
|
||||||
|
|
||||||
|
namespace ProfileEvents
|
||||||
|
{
|
||||||
|
extern const Event MergeTreeMetadataCachePut;
|
||||||
|
extern const Event MergeTreeMetadataCacheGet;
|
||||||
|
extern const Event MergeTreeMetadataCacheDelete;
|
||||||
|
extern const Event MergeTreeMetadataCacheSeek;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int SYSTEM_ERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::unique_ptr<MergeTreeMetadataCache> MergeTreeMetadataCache::create(const String & dir, size_t size)
|
||||||
|
{
|
||||||
|
assert(size != 0);
|
||||||
|
rocksdb::Options options;
|
||||||
|
rocksdb::BlockBasedTableOptions table_options;
|
||||||
|
rocksdb::DB * db;
|
||||||
|
|
||||||
|
options.create_if_missing = true;
|
||||||
|
auto cache = rocksdb::NewLRUCache(size);
|
||||||
|
table_options.block_cache = cache;
|
||||||
|
options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options));
|
||||||
|
rocksdb::Status status = rocksdb::DB::Open(options, dir, &db);
|
||||||
|
if (status != rocksdb::Status::OK())
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::SYSTEM_ERROR,
|
||||||
|
"Fail to open rocksdb path at: {} status:{}. You can try to remove the cache (this will not affect any table data).",
|
||||||
|
dir,
|
||||||
|
status.ToString());
|
||||||
|
return std::make_unique<MergeTreeMetadataCache>(db);
|
||||||
|
}
|
||||||
|
|
||||||
|
MergeTreeMetadataCache::Status MergeTreeMetadataCache::put(const String & key, const String & value)
|
||||||
|
{
|
||||||
|
auto options = rocksdb::WriteOptions();
|
||||||
|
options.sync = true;
|
||||||
|
options.disableWAL = false;
|
||||||
|
auto status = rocksdb->Put(options, key, value);
|
||||||
|
ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCachePut);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
MergeTreeMetadataCache::Status MergeTreeMetadataCache::del(const String & key)
|
||||||
|
{
|
||||||
|
auto options = rocksdb::WriteOptions();
|
||||||
|
options.sync = true;
|
||||||
|
options.disableWAL = false;
|
||||||
|
auto status = rocksdb->Delete(options, key);
|
||||||
|
ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheDelete);
|
||||||
|
LOG_TRACE(log, "Delete key:{} from MergeTreeMetadataCache status:{}", key, status.ToString());
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
MergeTreeMetadataCache::Status MergeTreeMetadataCache::get(const String & key, String & value)
|
||||||
|
{
|
||||||
|
auto status = rocksdb->Get(rocksdb::ReadOptions(), key, &value);
|
||||||
|
ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheGet);
|
||||||
|
LOG_TRACE(log, "Get key:{} from MergeTreeMetadataCache status:{}", key, status.ToString());
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MergeTreeMetadataCache::getByPrefix(const String & prefix, Strings & keys, Strings & values)
|
||||||
|
{
|
||||||
|
auto * it = rocksdb->NewIterator(rocksdb::ReadOptions());
|
||||||
|
rocksdb::Slice target(prefix);
|
||||||
|
for (it->Seek(target); it->Valid(); it->Next())
|
||||||
|
{
|
||||||
|
const auto key = it->key();
|
||||||
|
if (!key.starts_with(target))
|
||||||
|
break;
|
||||||
|
|
||||||
|
const auto value = it->value();
|
||||||
|
keys.emplace_back(key.data(), key.size());
|
||||||
|
values.emplace_back(value.data(), value.size());
|
||||||
|
}
|
||||||
|
LOG_TRACE(log, "Seek with prefix:{} from MergeTreeMetadataCache items:{}", prefix, keys.size());
|
||||||
|
ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheSeek);
|
||||||
|
delete it;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t MergeTreeMetadataCache::getEstimateNumKeys() const
|
||||||
|
{
|
||||||
|
uint64_t keys = 0;
|
||||||
|
rocksdb->GetAggregatedIntProperty("rocksdb.estimate-num-keys", &keys);
|
||||||
|
return keys;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MergeTreeMetadataCache::shutdown()
|
||||||
|
{
|
||||||
|
rocksdb->Close();
|
||||||
|
rocksdb.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
45
src/Storages/MergeTree/MergeTreeMetadataCache.h
Normal file
45
src/Storages/MergeTree/MergeTreeMetadataCache.h
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "config_core.h"
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
#include <base/types.h>
|
||||||
|
#include <Core/Types.h>
|
||||||
|
#include <Poco/Logger.h>
|
||||||
|
#include <rocksdb/table.h>
|
||||||
|
#include <rocksdb/db.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
class MergeTreeMetadataCache
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using Status = rocksdb::Status;
|
||||||
|
|
||||||
|
static std::unique_ptr<MergeTreeMetadataCache> create(const String & dir, size_t size);
|
||||||
|
|
||||||
|
explicit MergeTreeMetadataCache(rocksdb::DB * rocksdb_) : rocksdb{rocksdb_}
|
||||||
|
{
|
||||||
|
assert(rocksdb);
|
||||||
|
}
|
||||||
|
|
||||||
|
MergeTreeMetadataCache(const MergeTreeMetadataCache &) = delete;
|
||||||
|
|
||||||
|
MergeTreeMetadataCache & operator=(const MergeTreeMetadataCache &) = delete;
|
||||||
|
|
||||||
|
Status put(const String & key, const String & value);
|
||||||
|
Status del(const String & key);
|
||||||
|
Status get(const String & key, String & value);
|
||||||
|
void getByPrefix(const String & prefix, Strings & keys, Strings & values);
|
||||||
|
uint64_t getEstimateNumKeys() const;
|
||||||
|
|
||||||
|
void shutdown();
|
||||||
|
private:
|
||||||
|
std::unique_ptr<rocksdb::DB> rocksdb;
|
||||||
|
Poco::Logger * log = &Poco::Logger::get("MergeTreeMetadataCache");
|
||||||
|
};
|
||||||
|
|
||||||
|
using MergeTreeMetadataCachePtr = std::shared_ptr<MergeTreeMetadataCache>;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -178,12 +178,6 @@ namespace
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::unique_ptr<ReadBufferFromFileBase> openForReading(const DiskPtr & disk, const String & path)
|
|
||||||
{
|
|
||||||
size_t file_size = disk->getFileSize(path);
|
|
||||||
return disk->readFile(path, ReadSettings().adjustBufferSize(file_size), file_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
String MergeTreePartition::getID(const MergeTreeData & storage) const
|
String MergeTreePartition::getID(const MergeTreeData & storage) const
|
||||||
{
|
{
|
||||||
return getID(storage.getInMemoryMetadataPtr()->getPartitionKey().sample_block);
|
return getID(storage.getInMemoryMetadataPtr()->getPartitionKey().sample_block);
|
||||||
@ -373,15 +367,15 @@ void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffe
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MergeTreePartition::load(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path)
|
void MergeTreePartition::load(const MergeTreeData & storage, const PartMetadataManagerPtr & manager)
|
||||||
{
|
{
|
||||||
auto metadata_snapshot = storage.getInMemoryMetadataPtr();
|
auto metadata_snapshot = storage.getInMemoryMetadataPtr();
|
||||||
if (!metadata_snapshot->hasPartitionKey())
|
if (!metadata_snapshot->hasPartitionKey())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage.getContext()).sample_block;
|
const auto & partition_key_sample = adjustPartitionKey(metadata_snapshot, storage.getContext()).sample_block;
|
||||||
auto partition_file_path = part_path + "partition.dat";
|
|
||||||
auto file = openForReading(disk, partition_file_path);
|
auto file = manager->read("partition.dat");
|
||||||
value.resize(partition_key_sample.columns());
|
value.resize(partition_key_sample.columns());
|
||||||
for (size_t i = 0; i < partition_key_sample.columns(); ++i)
|
for (size_t i = 0; i < partition_key_sample.columns(); ++i)
|
||||||
partition_key_sample.getByPosition(i).type->getDefaultSerialization()->deserializeBinary(value[i], *file);
|
partition_key_sample.getByPosition(i).type->getDefaultSerialization()->deserializeBinary(value[i], *file);
|
||||||
@ -402,7 +396,9 @@ std::unique_ptr<WriteBufferFromFileBase> MergeTreePartition::store(const Block &
|
|||||||
auto out = disk->writeFile(part_path + "partition.dat");
|
auto out = disk->writeFile(part_path + "partition.dat");
|
||||||
HashingWriteBuffer out_hashing(*out);
|
HashingWriteBuffer out_hashing(*out);
|
||||||
for (size_t i = 0; i < value.size(); ++i)
|
for (size_t i = 0; i < value.size(); ++i)
|
||||||
|
{
|
||||||
partition_key_sample.getByPosition(i).type->getDefaultSerialization()->serializeBinary(value[i], out_hashing);
|
partition_key_sample.getByPosition(i).type->getDefaultSerialization()->serializeBinary(value[i], out_hashing);
|
||||||
|
}
|
||||||
|
|
||||||
out_hashing.next();
|
out_hashing.next();
|
||||||
checksums.files["partition.dat"].file_size = out_hashing.count();
|
checksums.files["partition.dat"].file_size = out_hashing.count();
|
||||||
@ -462,4 +458,14 @@ KeyDescription MergeTreePartition::adjustPartitionKey(const StorageMetadataPtr &
|
|||||||
return partition_key;
|
return partition_key;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void MergeTreePartition::appendFiles(const MergeTreeData & storage, Strings& files)
|
||||||
|
{
|
||||||
|
auto metadata_snapshot = storage.getInMemoryMetadataPtr();
|
||||||
|
if (!metadata_snapshot->hasPartitionKey())
|
||||||
|
return;
|
||||||
|
|
||||||
|
files.push_back("partition.dat");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <Disks/IDisk.h>
|
#include <Disks/IDisk.h>
|
||||||
#include <IO/WriteBuffer.h>
|
#include <IO/WriteBuffer.h>
|
||||||
#include <Storages/KeyDescription.h>
|
#include <Storages/KeyDescription.h>
|
||||||
|
#include <Storages/MergeTree/IPartMetadataManager.h>
|
||||||
#include <Core/Field.h>
|
#include <Core/Field.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -37,7 +38,8 @@ public:
|
|||||||
|
|
||||||
void serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const;
|
void serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const;
|
||||||
|
|
||||||
void load(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path);
|
void load(const MergeTreeData & storage, const PartMetadataManagerPtr & manager);
|
||||||
|
|
||||||
/// Store functions return write buffer with written but not finalized data.
|
/// Store functions return write buffer with written but not finalized data.
|
||||||
/// User must call finish() for returned object.
|
/// User must call finish() for returned object.
|
||||||
[[nodiscard]] std::unique_ptr<WriteBufferFromFileBase> store(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const;
|
[[nodiscard]] std::unique_ptr<WriteBufferFromFileBase> store(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const;
|
||||||
@ -47,6 +49,8 @@ public:
|
|||||||
|
|
||||||
void create(const StorageMetadataPtr & metadata_snapshot, Block block, size_t row, ContextPtr context);
|
void create(const StorageMetadataPtr & metadata_snapshot, Block block, size_t row, ContextPtr context);
|
||||||
|
|
||||||
|
static void appendFiles(const MergeTreeData & storage, Strings & files);
|
||||||
|
|
||||||
/// Adjust partition key and execute its expression on block. Return sample block according to used expression.
|
/// Adjust partition key and execute its expression on block. Return sample block according to used expression.
|
||||||
static NamesAndTypesList executePartitionByExpression(const StorageMetadataPtr & metadata_snapshot, Block & block, ContextPtr context);
|
static NamesAndTypesList executePartitionByExpression(const StorageMetadataPtr & metadata_snapshot, Block & block, ContextPtr context);
|
||||||
|
|
||||||
|
@ -140,6 +140,7 @@ struct Settings;
|
|||||||
/** Experimental/work in progress feature. Unsafe for production. */ \
|
/** Experimental/work in progress feature. Unsafe for production. */ \
|
||||||
M(UInt64, part_moves_between_shards_enable, 0, "Experimental/Incomplete feature to move parts between shards. Does not take into account sharding expressions.", 0) \
|
M(UInt64, part_moves_between_shards_enable, 0, "Experimental/Incomplete feature to move parts between shards. Does not take into account sharding expressions.", 0) \
|
||||||
M(UInt64, part_moves_between_shards_delay_seconds, 30, "Time to wait before/after moving parts between shards.", 0) \
|
M(UInt64, part_moves_between_shards_delay_seconds, 30, "Time to wait before/after moving parts between shards.", 0) \
|
||||||
|
M(Bool, use_metadata_cache, false, "Experimental feature to speed up parts loading process by using MergeTree metadata cache", 0) \
|
||||||
\
|
\
|
||||||
/** Obsolete settings. Kept for backward compatibility only. */ \
|
/** Obsolete settings. Kept for backward compatibility only. */ \
|
||||||
M(UInt64, min_relative_delay_to_yield_leadership, 120, "Obsolete setting, does nothing.", 0) \
|
M(UInt64, min_relative_delay_to_yield_leadership, 120, "Obsolete setting, does nothing.", 0) \
|
||||||
|
33
src/Storages/MergeTree/PartMetadataManagerOrdinary.cpp
Normal file
33
src/Storages/MergeTree/PartMetadataManagerOrdinary.cpp
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
#include "PartMetadataManagerOrdinary.h"
|
||||||
|
|
||||||
|
#include <IO/ReadBufferFromFileBase.h>
|
||||||
|
#include <Disks/IDisk.h>
|
||||||
|
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
static std::unique_ptr<ReadBufferFromFileBase> openForReading(const DiskPtr & disk, const String & path)
|
||||||
|
{
|
||||||
|
size_t file_size = disk->getFileSize(path);
|
||||||
|
return disk->readFile(path, ReadSettings().adjustBufferSize(file_size), file_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
PartMetadataManagerOrdinary::PartMetadataManagerOrdinary(const IMergeTreeDataPart * part_) : IPartMetadataManager(part_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
std::unique_ptr<SeekableReadBuffer> PartMetadataManagerOrdinary::read(const String & file_name) const
|
||||||
|
{
|
||||||
|
String file_path = fs::path(part->getFullRelativePath()) / file_name;
|
||||||
|
return openForReading(disk, file_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PartMetadataManagerOrdinary::exists(const String & file_name) const
|
||||||
|
{
|
||||||
|
return disk->exists(fs::path(part->getFullRelativePath()) / file_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
29
src/Storages/MergeTree/PartMetadataManagerOrdinary.h
Normal file
29
src/Storages/MergeTree/PartMetadataManagerOrdinary.h
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Storages/MergeTree/IPartMetadataManager.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
class PartMetadataManagerOrdinary : public IPartMetadataManager
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
explicit PartMetadataManagerOrdinary(const IMergeTreeDataPart * part_);
|
||||||
|
|
||||||
|
~PartMetadataManagerOrdinary() override = default;
|
||||||
|
|
||||||
|
std::unique_ptr<SeekableReadBuffer> read(const String & file_name) const override;
|
||||||
|
|
||||||
|
bool exists(const String & file_name) const override;
|
||||||
|
|
||||||
|
void deleteAll(bool /*include_projection*/) override {}
|
||||||
|
|
||||||
|
void assertAllDeleted(bool /*include_projection*/) const override {}
|
||||||
|
|
||||||
|
void updateAll(bool /*include_projection*/) override {}
|
||||||
|
|
||||||
|
std::unordered_map<String, uint128> check() const override { return {}; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
}
|
273
src/Storages/MergeTree/PartMetadataManagerWithCache.cpp
Normal file
273
src/Storages/MergeTree/PartMetadataManagerWithCache.cpp
Normal file
@ -0,0 +1,273 @@
|
|||||||
|
#include "PartMetadataManagerWithCache.h"
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
#include <Common/hex.h>
|
||||||
|
#include <Common/ErrorCodes.h>
|
||||||
|
#include <IO/HashingReadBuffer.h>
|
||||||
|
#include <IO/ReadBufferFromString.h>
|
||||||
|
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
||||||
|
|
||||||
|
namespace ProfileEvents
|
||||||
|
{
|
||||||
|
extern const Event MergeTreeMetadataCacheHit;
|
||||||
|
extern const Event MergeTreeMetadataCacheMiss;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int LOGICAL_ERROR;
|
||||||
|
extern const int CORRUPTED_DATA;
|
||||||
|
extern const int NO_SUCH_PROJECTION_IN_TABLE;
|
||||||
|
}
|
||||||
|
|
||||||
|
PartMetadataManagerWithCache::PartMetadataManagerWithCache(const IMergeTreeDataPart * part_, const MergeTreeMetadataCachePtr & cache_)
|
||||||
|
: IPartMetadataManager(part_), cache(cache_)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
String PartMetadataManagerWithCache::getKeyFromFilePath(const String & file_path) const
|
||||||
|
{
|
||||||
|
return disk->getName() + ":" + file_path;
|
||||||
|
}
|
||||||
|
|
||||||
|
String PartMetadataManagerWithCache::getFilePathFromKey(const String & key) const
|
||||||
|
{
|
||||||
|
return key.substr(disk->getName().size() + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<SeekableReadBuffer> PartMetadataManagerWithCache::read(const String & file_name) const
|
||||||
|
{
|
||||||
|
String file_path = fs::path(part->getFullRelativePath()) / file_name;
|
||||||
|
String key = getKeyFromFilePath(file_path);
|
||||||
|
String value;
|
||||||
|
auto status = cache->get(key, value);
|
||||||
|
if (!status.ok())
|
||||||
|
{
|
||||||
|
ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheMiss);
|
||||||
|
auto in = disk->readFile(file_path);
|
||||||
|
readStringUntilEOF(value, *in);
|
||||||
|
cache->put(key, value);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheHit);
|
||||||
|
}
|
||||||
|
return std::make_unique<ReadBufferFromOwnString>(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PartMetadataManagerWithCache::exists(const String & file_name) const
|
||||||
|
{
|
||||||
|
String file_path = fs::path(part->getFullRelativePath()) / file_name;
|
||||||
|
String key = getKeyFromFilePath(file_path);
|
||||||
|
String value;
|
||||||
|
auto status = cache->get(key, value);
|
||||||
|
if (status.ok())
|
||||||
|
{
|
||||||
|
ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheHit);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ProfileEvents::increment(ProfileEvents::MergeTreeMetadataCacheMiss);
|
||||||
|
return disk->exists(fs::path(part->getFullRelativePath()) / file_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PartMetadataManagerWithCache::deleteAll(bool include_projection)
|
||||||
|
{
|
||||||
|
Strings file_names;
|
||||||
|
part->appendFilesOfColumnsChecksumsIndexes(file_names, include_projection);
|
||||||
|
|
||||||
|
String value;
|
||||||
|
for (const auto & file_name : file_names)
|
||||||
|
{
|
||||||
|
String file_path = fs::path(part->getFullRelativePath()) / file_name;
|
||||||
|
String key = getKeyFromFilePath(file_path);
|
||||||
|
auto status = cache->del(key);
|
||||||
|
if (!status.ok())
|
||||||
|
{
|
||||||
|
status = cache->get(key, value);
|
||||||
|
if (status.IsNotFound())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"deleteAll failed include_projection:{} status:{}, file_path:{}",
|
||||||
|
include_projection,
|
||||||
|
status.ToString(),
|
||||||
|
file_path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PartMetadataManagerWithCache::updateAll(bool include_projection)
|
||||||
|
{
|
||||||
|
Strings file_names;
|
||||||
|
part->appendFilesOfColumnsChecksumsIndexes(file_names, include_projection);
|
||||||
|
|
||||||
|
String value;
|
||||||
|
String read_value;
|
||||||
|
for (const auto & file_name : file_names)
|
||||||
|
{
|
||||||
|
String file_path = fs::path(part->getFullRelativePath()) / file_name;
|
||||||
|
if (!disk->exists(file_path))
|
||||||
|
continue;
|
||||||
|
auto in = disk->readFile(file_path);
|
||||||
|
readStringUntilEOF(value, *in);
|
||||||
|
|
||||||
|
String key = getKeyFromFilePath(file_path);
|
||||||
|
auto status = cache->put(key, value);
|
||||||
|
if (!status.ok())
|
||||||
|
{
|
||||||
|
status = cache->get(key, read_value);
|
||||||
|
if (status.IsNotFound() || read_value == value)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"updateAll failed include_projection:{} status:{}, file_path:{}",
|
||||||
|
include_projection,
|
||||||
|
status.ToString(),
|
||||||
|
file_path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PartMetadataManagerWithCache::assertAllDeleted(bool include_projection) const
|
||||||
|
{
|
||||||
|
Strings keys;
|
||||||
|
std::vector<uint128> _;
|
||||||
|
getKeysAndCheckSums(keys, _);
|
||||||
|
if (keys.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
String file_path;
|
||||||
|
String file_name;
|
||||||
|
for (const auto & key : keys)
|
||||||
|
{
|
||||||
|
file_path = getFilePathFromKey(key);
|
||||||
|
file_name = fs::path(file_path).filename();
|
||||||
|
|
||||||
|
/// Metadata file belongs to current part
|
||||||
|
if (fs::path(part->getFullRelativePath()) / file_name == file_path)
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Data part {} with type {} with meta file {} still in cache",
|
||||||
|
part->name,
|
||||||
|
part->getType().toString(),
|
||||||
|
file_path);
|
||||||
|
|
||||||
|
/// File belongs to projection part of current part
|
||||||
|
if (!part->isProjectionPart() && include_projection)
|
||||||
|
{
|
||||||
|
const auto & projection_parts = part->getProjectionParts();
|
||||||
|
for (const auto & [projection_name, projection_part] : projection_parts)
|
||||||
|
{
|
||||||
|
if (fs::path(projection_part->getFullRelativePath()) / file_name == file_path)
|
||||||
|
{
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Data part {} with type {} with meta file {} with projection name still in cache",
|
||||||
|
part->name,
|
||||||
|
part->getType().toString(),
|
||||||
|
file_path,
|
||||||
|
projection_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PartMetadataManagerWithCache::getKeysAndCheckSums(Strings & keys, std::vector<uint128> & checksums) const
|
||||||
|
{
|
||||||
|
String prefix = getKeyFromFilePath(fs::path(part->getFullRelativePath()) / "");
|
||||||
|
Strings values;
|
||||||
|
cache->getByPrefix(prefix, keys, values);
|
||||||
|
size_t size = keys.size();
|
||||||
|
for (size_t i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
ReadBufferFromString rbuf(values[i]);
|
||||||
|
HashingReadBuffer hbuf(rbuf);
|
||||||
|
checksums.push_back(hbuf.getHash());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::unordered_map<String, IPartMetadataManager::uint128> PartMetadataManagerWithCache::check() const
|
||||||
|
{
|
||||||
|
/// Only applies for normal part stored on disk
|
||||||
|
if (part->isProjectionPart() || !part->isStoredOnDisk())
|
||||||
|
return {};
|
||||||
|
|
||||||
|
/// The directory of projection part is under the directory of its parent part
|
||||||
|
const auto filenames_without_checksums = part->getFileNamesWithoutChecksums();
|
||||||
|
|
||||||
|
std::unordered_map<String, uint128> results;
|
||||||
|
Strings keys;
|
||||||
|
std::vector<uint128> cache_checksums;
|
||||||
|
std::vector<uint128> disk_checksums;
|
||||||
|
getKeysAndCheckSums(keys, cache_checksums);
|
||||||
|
for (size_t i = 0; i < keys.size(); ++i)
|
||||||
|
{
|
||||||
|
const auto & key = keys[i];
|
||||||
|
String file_path = getFilePathFromKey(key);
|
||||||
|
String file_name = fs::path(file_path).filename();
|
||||||
|
results.emplace(file_name, cache_checksums[i]);
|
||||||
|
|
||||||
|
/// File belongs to normal part
|
||||||
|
if (fs::path(part->getFullRelativePath()) / file_name == file_path)
|
||||||
|
{
|
||||||
|
auto disk_checksum = part->getActualChecksumByFile(file_path);
|
||||||
|
if (disk_checksum != cache_checksums[i])
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::CORRUPTED_DATA,
|
||||||
|
"Checksums doesn't match in part {}. Expected: {}. Found {}.",
|
||||||
|
part->name,
|
||||||
|
getHexUIntUppercase(disk_checksum.first) + getHexUIntUppercase(disk_checksum.second),
|
||||||
|
getHexUIntUppercase(cache_checksums[i].first) + getHexUIntUppercase(cache_checksums[i].second));
|
||||||
|
|
||||||
|
disk_checksums.push_back(disk_checksum);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// File belongs to projection part
|
||||||
|
String proj_dir_name = fs::path(file_path).parent_path().filename();
|
||||||
|
auto pos = proj_dir_name.find_last_of('.');
|
||||||
|
if (pos == String::npos)
|
||||||
|
{
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE,
|
||||||
|
"There is no projection in part: {} contains file: {} with directory name: {}",
|
||||||
|
part->name,
|
||||||
|
file_path,
|
||||||
|
proj_dir_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
String proj_name = proj_dir_name.substr(0, pos);
|
||||||
|
const auto & projection_parts = part->getProjectionParts();
|
||||||
|
auto it = projection_parts.find(proj_name);
|
||||||
|
if (it == projection_parts.end())
|
||||||
|
{
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE,
|
||||||
|
"There is no projection {} in part: {} contains file: {}",
|
||||||
|
proj_name, part->name, file_path);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto disk_checksum = it->second->getActualChecksumByFile(file_path);
|
||||||
|
if (disk_checksum != cache_checksums[i])
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::CORRUPTED_DATA,
|
||||||
|
"Checksums doesn't match in projection part {} {}. Expected: {}. Found {}.",
|
||||||
|
part->name, proj_name,
|
||||||
|
getHexUIntUppercase(disk_checksum.first) + getHexUIntUppercase(disk_checksum.second),
|
||||||
|
getHexUIntUppercase(cache_checksums[i].first) + getHexUIntUppercase(cache_checksums[i].second));
|
||||||
|
disk_checksums.push_back(disk_checksum);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
56
src/Storages/MergeTree/PartMetadataManagerWithCache.h
Normal file
56
src/Storages/MergeTree/PartMetadataManagerWithCache.h
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "config_core.h"
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
#include <Storages/MergeTree/IPartMetadataManager.h>
|
||||||
|
#include <Storages/MergeTree/MergeTreeMetadataCache.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
|
||||||
|
/// PartMetadataManagerWithCache stores metadatas of part in RocksDB as cache layer to speed up
|
||||||
|
/// loading process of merge tree table.
|
||||||
|
class PartMetadataManagerWithCache : public IPartMetadataManager
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
PartMetadataManagerWithCache(const IMergeTreeDataPart * part_, const MergeTreeMetadataCachePtr & cache_);
|
||||||
|
|
||||||
|
~PartMetadataManagerWithCache() override = default;
|
||||||
|
|
||||||
|
/// First read the metadata from RocksDB cache, then from disk.
|
||||||
|
std::unique_ptr<SeekableReadBuffer> read(const String & file_name) const override;
|
||||||
|
|
||||||
|
/// First judge existence of the metadata in RocksDB cache, then in disk.
|
||||||
|
bool exists(const String & file_name) const override;
|
||||||
|
|
||||||
|
/// Delete all metadatas in part from RocksDB cache.
|
||||||
|
void deleteAll(bool include_projection) override;
|
||||||
|
|
||||||
|
/// Assert all metadatas in part from RocksDB cache are deleted.
|
||||||
|
void assertAllDeleted(bool include_projection) const override;
|
||||||
|
|
||||||
|
/// Update all metadatas in part from RocksDB cache.
|
||||||
|
/// Need to be called after part directory is renamed.
|
||||||
|
void updateAll(bool include_projection) override;
|
||||||
|
|
||||||
|
/// Check if all metadatas in part from RocksDB cache are up to date.
|
||||||
|
std::unordered_map<String, uint128> check() const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// Get cache key from path of metadata file.
|
||||||
|
/// Format: <disk_name>:relative/full/path/of/metadata/file
|
||||||
|
String getKeyFromFilePath(const String & file_path) const;
|
||||||
|
|
||||||
|
/// Get metadata file path from cache key.
|
||||||
|
String getFilePathFromKey(const String & key) const;
|
||||||
|
|
||||||
|
/// Get cache keys and checksums of corresponding metadata in a part(including projection parts)
|
||||||
|
void getKeysAndCheckSums(Strings & keys, std::vector<uint128> & checksums) const;
|
||||||
|
|
||||||
|
|
||||||
|
MergeTreeMetadataCachePtr cache;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
@ -399,6 +399,7 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na
|
|||||||
LOG_WARNING(log, "We have part {} covering part {}", part->name, part_name);
|
LOG_WARNING(log, "We have part {} covering part {}", part->name, part_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
part->checkMetadata();
|
||||||
return {part_name, true, ""};
|
return {part_name, true, ""};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -136,7 +136,7 @@ IMergeTreeDataPart::Checksums checkDataPart(
|
|||||||
IMergeTreeDataPart::Checksums projection_checksums_data;
|
IMergeTreeDataPart::Checksums projection_checksums_data;
|
||||||
const auto & projection_path = file_path;
|
const auto & projection_path = file_path;
|
||||||
|
|
||||||
if (part_type == MergeTreeDataPartType::COMPACT)
|
if (projection->getType() == MergeTreeDataPartType::COMPACT)
|
||||||
{
|
{
|
||||||
auto proj_path = file_path + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
|
auto proj_path = file_path + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION;
|
||||||
auto file_buf = disk->readFile(proj_path);
|
auto file_buf = disk->readFile(proj_path);
|
||||||
|
@ -0,0 +1,83 @@
|
|||||||
|
#include "config_core.h"
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <rocksdb/table.h>
|
||||||
|
#include <rocksdb/db.h>
|
||||||
|
#include <Interpreters/Context.h>
|
||||||
|
#include <Storages/MergeTree/MergeTreeMetadataCache.h>
|
||||||
|
|
||||||
|
using namespace DB;
|
||||||
|
|
||||||
|
class MergeTreeMetadataCacheTest : public ::testing::Test
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
void SetUp() override
|
||||||
|
{
|
||||||
|
cache = MergeTreeMetadataCache::create("./db/", 268435456);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TearDown() override
|
||||||
|
{
|
||||||
|
cache->shutdown();
|
||||||
|
cache.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
MergeTreeMetadataCachePtr cache;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(MergeTreeMetadataCacheTest, testCommon)
|
||||||
|
{
|
||||||
|
std::vector<String> files
|
||||||
|
= {"columns.txt", "checksums.txt", "primary.idx", "count.txt", "partition.dat", "minmax_p.idx", "default_compression_codec.txt"};
|
||||||
|
String prefix = "data/test_metadata_cache/check_part_metadata_cache/201806_1_1_0_4/";
|
||||||
|
|
||||||
|
for (const auto & file : files)
|
||||||
|
{
|
||||||
|
auto status = cache->put(prefix + file, prefix + file);
|
||||||
|
ASSERT_EQ(status.code(), rocksdb::Status::Code::kOk);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto & file : files)
|
||||||
|
{
|
||||||
|
String value;
|
||||||
|
auto status = cache->get(prefix + file, value);
|
||||||
|
ASSERT_EQ(status.code(), rocksdb::Status::Code::kOk);
|
||||||
|
ASSERT_EQ(value, prefix + file);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Strings keys;
|
||||||
|
Strings values;
|
||||||
|
cache->getByPrefix(prefix, keys, values);
|
||||||
|
ASSERT_EQ(keys.size(), files.size());
|
||||||
|
ASSERT_EQ(values.size(), files.size());
|
||||||
|
for (size_t i = 0; i < files.size(); ++i)
|
||||||
|
{
|
||||||
|
ASSERT_EQ(values[i], keys[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto & file : files)
|
||||||
|
{
|
||||||
|
auto status = cache->del(prefix + file);
|
||||||
|
ASSERT_EQ(status.code(), rocksdb::Status::Code::kOk);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto & file : files)
|
||||||
|
{
|
||||||
|
String value;
|
||||||
|
auto status = cache->get(prefix + file, value);
|
||||||
|
ASSERT_EQ(status.code(), rocksdb::Status::Code::kNotFound);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Strings keys;
|
||||||
|
Strings values;
|
||||||
|
cache->getByPrefix(prefix, keys, values);
|
||||||
|
ASSERT_EQ(keys.size(), 0);
|
||||||
|
ASSERT_EQ(values.size(), 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
@ -98,8 +98,24 @@ MaterializedPostgreSQLConsumer::StorageData::Buffer::Buffer(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void MaterializedPostgreSQLConsumer::assertCorrectInsertion(StorageData::Buffer & buffer, size_t column_idx)
|
||||||
|
{
|
||||||
|
if (column_idx >= buffer.description.sample_block.columns()
|
||||||
|
|| column_idx >= buffer.description.types.size()
|
||||||
|
|| column_idx >= buffer.columns.size())
|
||||||
|
throw Exception(
|
||||||
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
|
"Attempt to insert into buffer at position: {}, but block columns size is {}, types size: {}, columns size: {}, buffer structure: {}",
|
||||||
|
column_idx,
|
||||||
|
buffer.description.sample_block.columns(), buffer.description.types.size(), buffer.columns.size(),
|
||||||
|
buffer.description.sample_block.dumpStructure());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void MaterializedPostgreSQLConsumer::insertValue(StorageData::Buffer & buffer, const std::string & value, size_t column_idx)
|
void MaterializedPostgreSQLConsumer::insertValue(StorageData::Buffer & buffer, const std::string & value, size_t column_idx)
|
||||||
{
|
{
|
||||||
|
assertCorrectInsertion(buffer, column_idx);
|
||||||
|
|
||||||
const auto & sample = buffer.description.sample_block.getByPosition(column_idx);
|
const auto & sample = buffer.description.sample_block.getByPosition(column_idx);
|
||||||
bool is_nullable = buffer.description.types[column_idx].second;
|
bool is_nullable = buffer.description.types[column_idx].second;
|
||||||
|
|
||||||
@ -134,6 +150,8 @@ void MaterializedPostgreSQLConsumer::insertValue(StorageData::Buffer & buffer, c
|
|||||||
|
|
||||||
void MaterializedPostgreSQLConsumer::insertDefaultValue(StorageData::Buffer & buffer, size_t column_idx)
|
void MaterializedPostgreSQLConsumer::insertDefaultValue(StorageData::Buffer & buffer, size_t column_idx)
|
||||||
{
|
{
|
||||||
|
assertCorrectInsertion(buffer, column_idx);
|
||||||
|
|
||||||
const auto & sample = buffer.description.sample_block.getByPosition(column_idx);
|
const auto & sample = buffer.description.sample_block.getByPosition(column_idx);
|
||||||
insertDefaultPostgreSQLValue(*buffer.columns[column_idx], *sample.column);
|
insertDefaultPostgreSQLValue(*buffer.columns[column_idx], *sample.column);
|
||||||
}
|
}
|
||||||
@ -515,13 +533,14 @@ void MaterializedPostgreSQLConsumer::processReplicationMessage(const char * repl
|
|||||||
|
|
||||||
void MaterializedPostgreSQLConsumer::syncTables()
|
void MaterializedPostgreSQLConsumer::syncTables()
|
||||||
{
|
{
|
||||||
try
|
for (const auto & table_name : tables_to_sync)
|
||||||
{
|
{
|
||||||
for (const auto & table_name : tables_to_sync)
|
auto & storage_data = storages.find(table_name)->second;
|
||||||
{
|
Block result_rows = storage_data.buffer.description.sample_block.cloneWithColumns(std::move(storage_data.buffer.columns));
|
||||||
auto & storage_data = storages.find(table_name)->second;
|
storage_data.buffer.columns = storage_data.buffer.description.sample_block.cloneEmptyColumns();
|
||||||
Block result_rows = storage_data.buffer.description.sample_block.cloneWithColumns(std::move(storage_data.buffer.columns));
|
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
if (result_rows.rows())
|
if (result_rows.rows())
|
||||||
{
|
{
|
||||||
auto storage = storage_data.storage;
|
auto storage = storage_data.storage;
|
||||||
@ -543,13 +562,18 @@ void MaterializedPostgreSQLConsumer::syncTables()
|
|||||||
|
|
||||||
CompletedPipelineExecutor executor(io.pipeline);
|
CompletedPipelineExecutor executor(io.pipeline);
|
||||||
executor.execute();
|
executor.execute();
|
||||||
|
|
||||||
storage_data.buffer.columns = storage_data.buffer.description.sample_block.cloneEmptyColumns();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
catch (...)
|
||||||
|
{
|
||||||
|
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
LOG_DEBUG(log, "Table sync end for {} tables, last lsn: {} = {}, (attempted lsn {})", tables_to_sync.size(), current_lsn, getLSNValue(current_lsn), getLSNValue(final_lsn));
|
LOG_DEBUG(log, "Table sync end for {} tables, last lsn: {} = {}, (attempted lsn {})", tables_to_sync.size(), current_lsn, getLSNValue(current_lsn), getLSNValue(final_lsn));
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
auto tx = std::make_shared<pqxx::nontransaction>(connection->getRef());
|
auto tx = std::make_shared<pqxx::nontransaction>(connection->getRef());
|
||||||
current_lsn = advanceLSN(tx);
|
current_lsn = advanceLSN(tx);
|
||||||
tables_to_sync.clear();
|
tables_to_sync.clear();
|
||||||
|
@ -122,6 +122,8 @@ private:
|
|||||||
|
|
||||||
void markTableAsSkipped(Int32 relation_id, const String & relation_name);
|
void markTableAsSkipped(Int32 relation_id, const String & relation_name);
|
||||||
|
|
||||||
|
static void assertCorrectInsertion(StorageData::Buffer & buffer, size_t column_idx);
|
||||||
|
|
||||||
/// lsn - log sequnce nuumber, like wal offset (64 bit).
|
/// lsn - log sequnce nuumber, like wal offset (64 bit).
|
||||||
static Int64 getLSNValue(const std::string & lsn)
|
static Int64 getLSNValue(const std::string & lsn)
|
||||||
{
|
{
|
||||||
|
@ -64,8 +64,8 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler(
|
|||||||
bool is_attach_,
|
bool is_attach_,
|
||||||
const MaterializedPostgreSQLSettings & replication_settings,
|
const MaterializedPostgreSQLSettings & replication_settings,
|
||||||
bool is_materialized_postgresql_database_)
|
bool is_materialized_postgresql_database_)
|
||||||
: log(&Poco::Logger::get("PostgreSQLReplicationHandler"))
|
: WithContext(context_->getGlobalContext())
|
||||||
, context(context_)
|
, log(&Poco::Logger::get("PostgreSQLReplicationHandler"))
|
||||||
, is_attach(is_attach_)
|
, is_attach(is_attach_)
|
||||||
, postgres_database(postgres_database_)
|
, postgres_database(postgres_database_)
|
||||||
, postgres_schema(replication_settings.materialized_postgresql_schema)
|
, postgres_schema(replication_settings.materialized_postgresql_schema)
|
||||||
@ -94,9 +94,9 @@ PostgreSQLReplicationHandler::PostgreSQLReplicationHandler(
|
|||||||
}
|
}
|
||||||
publication_name = fmt::format("{}_ch_publication", replication_identifier);
|
publication_name = fmt::format("{}_ch_publication", replication_identifier);
|
||||||
|
|
||||||
startup_task = context->getSchedulePool().createTask("PostgreSQLReplicaStartup", [this]{ checkConnectionAndStart(); });
|
startup_task = getContext()->getSchedulePool().createTask("PostgreSQLReplicaStartup", [this]{ checkConnectionAndStart(); });
|
||||||
consumer_task = context->getSchedulePool().createTask("PostgreSQLReplicaStartup", [this]{ consumerFunc(); });
|
consumer_task = getContext()->getSchedulePool().createTask("PostgreSQLReplicaStartup", [this]{ consumerFunc(); });
|
||||||
cleanup_task = context->getSchedulePool().createTask("PostgreSQLReplicaStartup", [this]{ cleanupFunc(); });
|
cleanup_task = getContext()->getSchedulePool().createTask("PostgreSQLReplicaStartup", [this]{ cleanupFunc(); });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -296,7 +296,7 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error)
|
|||||||
/// (Apart from the case, when shutdownFinal is called).
|
/// (Apart from the case, when shutdownFinal is called).
|
||||||
/// Handler uses it only for loadFromSnapshot and shutdown methods.
|
/// Handler uses it only for loadFromSnapshot and shutdown methods.
|
||||||
consumer = std::make_shared<MaterializedPostgreSQLConsumer>(
|
consumer = std::make_shared<MaterializedPostgreSQLConsumer>(
|
||||||
context,
|
getContext(),
|
||||||
std::move(tmp_connection),
|
std::move(tmp_connection),
|
||||||
replication_slot,
|
replication_slot,
|
||||||
publication_name,
|
publication_name,
|
||||||
@ -921,9 +921,9 @@ void PostgreSQLReplicationHandler::reloadFromSnapshot(const std::vector<std::pai
|
|||||||
|
|
||||||
for (const auto & [relation_id, table_name] : relation_data)
|
for (const auto & [relation_id, table_name] : relation_data)
|
||||||
{
|
{
|
||||||
auto storage = DatabaseCatalog::instance().getTable(StorageID(current_database_name, table_name), context);
|
auto storage = DatabaseCatalog::instance().getTable(StorageID(current_database_name, table_name), getContext());
|
||||||
auto * materialized_storage = storage->as <StorageMaterializedPostgreSQL>();
|
auto * materialized_storage = storage->as <StorageMaterializedPostgreSQL>();
|
||||||
auto materialized_table_lock = materialized_storage->lockForShare(String(), context->getSettingsRef().lock_acquire_timeout);
|
auto materialized_table_lock = materialized_storage->lockForShare(String(), getContext()->getSettingsRef().lock_acquire_timeout);
|
||||||
|
|
||||||
/// If for some reason this temporary table already exists - also drop it.
|
/// If for some reason this temporary table already exists - also drop it.
|
||||||
auto temp_materialized_storage = materialized_storage->createTemporary();
|
auto temp_materialized_storage = materialized_storage->createTemporary();
|
||||||
|
@ -13,7 +13,7 @@ namespace DB
|
|||||||
class StorageMaterializedPostgreSQL;
|
class StorageMaterializedPostgreSQL;
|
||||||
struct SettingChange;
|
struct SettingChange;
|
||||||
|
|
||||||
class PostgreSQLReplicationHandler
|
class PostgreSQLReplicationHandler : WithContext
|
||||||
{
|
{
|
||||||
friend class TemporaryReplicationSlot;
|
friend class TemporaryReplicationSlot;
|
||||||
|
|
||||||
@ -98,7 +98,6 @@ private:
|
|||||||
std::pair<String, String> getSchemaAndTableName(const String & table_name) const;
|
std::pair<String, String> getSchemaAndTableName(const String & table_name) const;
|
||||||
|
|
||||||
Poco::Logger * log;
|
Poco::Logger * log;
|
||||||
ContextPtr context;
|
|
||||||
|
|
||||||
/// If it is not attach, i.e. a create query, then if publication already exists - always drop it.
|
/// If it is not attach, i.e. a create query, then if publication already exists - always drop it.
|
||||||
bool is_attach;
|
bool is_attach;
|
||||||
|
@ -44,7 +44,7 @@ class StorageDistributed final : public shared_ptr_helper<StorageDistributed>, p
|
|||||||
friend class StorageSystemDistributionQueue;
|
friend class StorageSystemDistributionQueue;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
~StorageDistributed() override;
|
virtual ~StorageDistributed() override;
|
||||||
|
|
||||||
std::string getName() const override { return "Distributed"; }
|
std::string getName() const override { return "Distributed"; }
|
||||||
|
|
||||||
|
@ -1750,6 +1750,8 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_
|
|||||||
auto out = disk->writeFile(tmp_checksums_path, 4096);
|
auto out = disk->writeFile(tmp_checksums_path, 4096);
|
||||||
part->checksums.write(*out);
|
part->checksums.write(*out);
|
||||||
disk->moveFile(tmp_checksums_path, checksums_path);
|
disk->moveFile(tmp_checksums_path, checksums_path);
|
||||||
|
|
||||||
|
part->checkMetadata();
|
||||||
results.emplace_back(part->name, true, "Checksums recounted and written to disk.");
|
results.emplace_back(part->name, true, "Checksums recounted and written to disk.");
|
||||||
}
|
}
|
||||||
catch (const Exception & ex)
|
catch (const Exception & ex)
|
||||||
@ -1766,6 +1768,7 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
checkDataPart(part, true);
|
checkDataPart(part, true);
|
||||||
|
part->checkMetadata();
|
||||||
results.emplace_back(part->name, true, "");
|
results.emplace_back(part->name, true, "");
|
||||||
}
|
}
|
||||||
catch (const Exception & ex)
|
catch (const Exception & ex)
|
||||||
|
@ -51,6 +51,7 @@ StorageS3Cluster::StorageS3Cluster(
|
|||||||
const StorageID & table_id_,
|
const StorageID & table_id_,
|
||||||
String cluster_name_,
|
String cluster_name_,
|
||||||
const String & format_name_,
|
const String & format_name_,
|
||||||
|
UInt64 max_single_read_retries_,
|
||||||
UInt64 max_connections_,
|
UInt64 max_connections_,
|
||||||
const ColumnsDescription & columns_,
|
const ColumnsDescription & columns_,
|
||||||
const ConstraintsDescription & constraints_,
|
const ConstraintsDescription & constraints_,
|
||||||
@ -63,11 +64,26 @@ StorageS3Cluster::StorageS3Cluster(
|
|||||||
, format_name(format_name_)
|
, format_name(format_name_)
|
||||||
, compression_method(compression_method_)
|
, compression_method(compression_method_)
|
||||||
{
|
{
|
||||||
|
context_->getGlobalContext()->getRemoteHostFilter().checkURL(Poco::URI{filename});
|
||||||
StorageInMemoryMetadata storage_metadata;
|
StorageInMemoryMetadata storage_metadata;
|
||||||
storage_metadata.setColumns(columns_);
|
StorageS3::updateClientAndAuthSettings(context_, client_auth);
|
||||||
|
|
||||||
|
if (columns_.empty())
|
||||||
|
{
|
||||||
|
const bool is_key_with_globs = filename.find_first_of("*?{") != std::string::npos;
|
||||||
|
|
||||||
|
/// `distributed_processing` is set to false, because this code is executed on the initiator, so there is no callback set
|
||||||
|
/// for asking for the next tasks.
|
||||||
|
/// `format_settings` is set to std::nullopt, because StorageS3Cluster is used only as table function
|
||||||
|
auto columns = StorageS3::getTableStructureFromDataImpl(format_name, client_auth, max_single_read_retries_, compression_method,
|
||||||
|
/*distributed_processing_*/false, is_key_with_globs, /*format_settings=*/std::nullopt, context_);
|
||||||
|
storage_metadata.setColumns(columns);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
storage_metadata.setColumns(columns_);
|
||||||
|
|
||||||
storage_metadata.setConstraints(constraints_);
|
storage_metadata.setConstraints(constraints_);
|
||||||
setInMemoryMetadata(storage_metadata);
|
setInMemoryMetadata(storage_metadata);
|
||||||
StorageS3::updateClientAndAuthSettings(context_, client_auth);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The code executes on initiator
|
/// The code executes on initiator
|
||||||
@ -83,7 +99,6 @@ Pipe StorageS3Cluster::read(
|
|||||||
StorageS3::updateClientAndAuthSettings(context, client_auth);
|
StorageS3::updateClientAndAuthSettings(context, client_auth);
|
||||||
|
|
||||||
auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef());
|
auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettingsRef());
|
||||||
StorageS3::updateClientAndAuthSettings(context, client_auth);
|
|
||||||
|
|
||||||
auto iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(*client_auth.client, client_auth.uri);
|
auto iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(*client_auth.client, client_auth.uri);
|
||||||
auto callback = std::make_shared<StorageS3Source::IteratorWrapper>([iterator]() mutable -> String
|
auto callback = std::make_shared<StorageS3Source::IteratorWrapper>([iterator]() mutable -> String
|
||||||
|
@ -41,6 +41,7 @@ protected:
|
|||||||
const StorageID & table_id_,
|
const StorageID & table_id_,
|
||||||
String cluster_name_,
|
String cluster_name_,
|
||||||
const String & format_name_,
|
const String & format_name_,
|
||||||
|
UInt64 max_single_read_retries_,
|
||||||
UInt64 max_connections_,
|
UInt64 max_connections_,
|
||||||
const ColumnsDescription & columns_,
|
const ColumnsDescription & columns_,
|
||||||
const ConstraintsDescription & constraints_,
|
const ConstraintsDescription & constraints_,
|
||||||
|
@ -560,6 +560,8 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData(
|
|||||||
const std::optional<FormatSettings> & format_settings,
|
const std::optional<FormatSettings> & format_settings,
|
||||||
ContextPtr context)
|
ContextPtr context)
|
||||||
{
|
{
|
||||||
|
context->getRemoteHostFilter().checkURL(Poco::URI(uri));
|
||||||
|
|
||||||
Poco::Net::HTTPBasicCredentials credentials;
|
Poco::Net::HTTPBasicCredentials credentials;
|
||||||
|
|
||||||
std::vector<String> urls_to_check;
|
std::vector<String> urls_to_check;
|
||||||
|
143
src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp
Normal file
143
src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp
Normal file
@ -0,0 +1,143 @@
|
|||||||
|
#include <Storages/System/StorageSystemMergeTreeMetadataCache.h>
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
#include <DataTypes/DataTypeDateTime.h>
|
||||||
|
#include <DataTypes/DataTypeString.h>
|
||||||
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
|
#include <Interpreters/Context.h>
|
||||||
|
#include <Parsers/ASTExpressionList.h>
|
||||||
|
#include <Parsers/ASTFunction.h>
|
||||||
|
#include <Parsers/ASTIdentifier.h>
|
||||||
|
#include <Parsers/ASTLiteral.h>
|
||||||
|
#include <Parsers/ASTSelectQuery.h>
|
||||||
|
#include <Storages/MergeTree/KeyCondition.h>
|
||||||
|
#include <Storages/MergeTree/MergeTreeMetadataCache.h>
|
||||||
|
#include <Common/typeid_cast.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int BAD_ARGUMENTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
NamesAndTypesList StorageSystemMergeTreeMetadataCache::getNamesAndTypes()
|
||||||
|
{
|
||||||
|
return {
|
||||||
|
{"key", std::make_shared<DataTypeString>()},
|
||||||
|
{"value", std::make_shared<DataTypeString>()},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool extractKeyImpl(const IAST & elem, String & res, bool & precise)
|
||||||
|
{
|
||||||
|
const auto * function = elem.as<ASTFunction>();
|
||||||
|
if (!function)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (function->name == "and")
|
||||||
|
{
|
||||||
|
for (const auto & child : function->arguments->children)
|
||||||
|
{
|
||||||
|
bool tmp_precise = false;
|
||||||
|
if (extractKeyImpl(*child, res, tmp_precise))
|
||||||
|
{
|
||||||
|
precise = tmp_precise;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (function->name == "equals" || function->name == "like")
|
||||||
|
{
|
||||||
|
const auto & args = function->arguments->as<ASTExpressionList &>();
|
||||||
|
const IAST * value;
|
||||||
|
|
||||||
|
if (args.children.size() != 2)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const ASTIdentifier * ident;
|
||||||
|
if ((ident = args.children.at(0)->as<ASTIdentifier>()))
|
||||||
|
value = args.children.at(1).get();
|
||||||
|
else if ((ident = args.children.at(1)->as<ASTIdentifier>()))
|
||||||
|
value = args.children.at(0).get();
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (ident->name() != "key")
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const auto * literal = value->as<ASTLiteral>();
|
||||||
|
if (!literal)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (literal->value.getType() != Field::Types::String)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
res = literal->value.safeGet<String>();
|
||||||
|
precise = function->name == "equals";
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Retrieve from the query a condition of the form `key= 'key'`, from conjunctions in the WHERE clause.
|
||||||
|
static String extractKey(const ASTPtr & query, bool& precise)
|
||||||
|
{
|
||||||
|
const auto & select = query->as<ASTSelectQuery &>();
|
||||||
|
if (!select.where())
|
||||||
|
return "";
|
||||||
|
|
||||||
|
String res;
|
||||||
|
return extractKeyImpl(*select.where(), res, precise) ? res : "";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void StorageSystemMergeTreeMetadataCache::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const
|
||||||
|
{
|
||||||
|
bool precise = false;
|
||||||
|
String key = extractKey(query_info.query, precise);
|
||||||
|
if (key.empty())
|
||||||
|
throw Exception(
|
||||||
|
"SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' or key LIKE 'prefix%' in WHERE clause.", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
|
auto cache = context->getMergeTreeMetadataCache();
|
||||||
|
if (precise)
|
||||||
|
{
|
||||||
|
String value;
|
||||||
|
if (cache->get(key, value) != MergeTreeMetadataCache::Status::OK())
|
||||||
|
return;
|
||||||
|
|
||||||
|
size_t col_num = 0;
|
||||||
|
res_columns[col_num++]->insert(key);
|
||||||
|
res_columns[col_num++]->insert(value);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
String target = extractFixedPrefixFromLikePattern(key);
|
||||||
|
if (target.empty())
|
||||||
|
throw Exception(
|
||||||
|
"SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' or key LIKE 'prefix%' in WHERE clause.", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
|
Strings keys;
|
||||||
|
Strings values;
|
||||||
|
keys.reserve(4096);
|
||||||
|
values.reserve(4096);
|
||||||
|
cache->getByPrefix(target, keys, values);
|
||||||
|
if (keys.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
assert(keys.size() == values.size());
|
||||||
|
for (size_t i = 0; i < keys.size(); ++i)
|
||||||
|
{
|
||||||
|
size_t col_num = 0;
|
||||||
|
res_columns[col_num++]->insert(keys[i]);
|
||||||
|
res_columns[col_num++]->insert(values[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
32
src/Storages/System/StorageSystemMergeTreeMetadataCache.h
Normal file
32
src/Storages/System/StorageSystemMergeTreeMetadataCache.h
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "config_core.h"
|
||||||
|
|
||||||
|
#if USE_ROCKSDB
|
||||||
|
#include <base/shared_ptr_helper.h>
|
||||||
|
#include <Storages/System/IStorageSystemOneBlock.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
class Context;
|
||||||
|
|
||||||
|
|
||||||
|
/// Implements `merge_tree_metadata_cache` system table, which allows you to view the metadata cache data in rocksdb for testing purposes.
|
||||||
|
class StorageSystemMergeTreeMetadataCache : public shared_ptr_helper<StorageSystemMergeTreeMetadataCache>, public IStorageSystemOneBlock<StorageSystemMergeTreeMetadataCache>
|
||||||
|
{
|
||||||
|
friend struct shared_ptr_helper<StorageSystemMergeTreeMetadataCache>;
|
||||||
|
|
||||||
|
public:
|
||||||
|
std::string getName() const override { return "SystemMergeTreeMetadataCache"; }
|
||||||
|
|
||||||
|
static NamesAndTypesList getNamesAndTypes();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
using IStorageSystemOneBlock::IStorageSystemOneBlock;
|
||||||
|
|
||||||
|
void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
#endif
|
@ -76,6 +76,7 @@
|
|||||||
|
|
||||||
#if USE_ROCKSDB
|
#if USE_ROCKSDB
|
||||||
#include <Storages/RocksDB/StorageSystemRocksDB.h>
|
#include <Storages/RocksDB/StorageSystemRocksDB.h>
|
||||||
|
#include <Storages/System/StorageSystemMergeTreeMetadataCache.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@ -130,6 +131,7 @@ void attachSystemTablesLocal(ContextPtr context, IDatabase & system_database)
|
|||||||
#endif
|
#endif
|
||||||
#if USE_ROCKSDB
|
#if USE_ROCKSDB
|
||||||
attach<StorageSystemRocksDB>(context, system_database, "rocksdb");
|
attach<StorageSystemRocksDB>(context, system_database, "rocksdb");
|
||||||
|
attach<StorageSystemMergeTreeMetadataCache>(context, system_database, "merge_tree_metadata_cache");
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,51 +22,29 @@ namespace ErrorCodes
|
|||||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context)
|
|
||||||
|
/// This is needed to avoid copy-pase. Because s3Cluster arguments only differ in additional argument (first) - cluster name
|
||||||
|
void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & args, ContextPtr context, StorageS3Configuration & s3_configuration)
|
||||||
{
|
{
|
||||||
/// Parse args
|
|
||||||
ASTs & args_func = ast_function->children;
|
|
||||||
|
|
||||||
const auto message = fmt::format(
|
|
||||||
"The signature of table function {} could be the following:\n" \
|
|
||||||
" - url\n"
|
|
||||||
" - url, format\n" \
|
|
||||||
" - url, format, structure\n" \
|
|
||||||
" - url, access_key_id, secret_access_key\n" \
|
|
||||||
" - url, format, structure, compression_method\n" \
|
|
||||||
" - url, access_key_id, secret_access_key, format\n"
|
|
||||||
" - url, access_key_id, secret_access_key, format, structure\n" \
|
|
||||||
" - url, access_key_id, secret_access_key, format, structure, compression_method",
|
|
||||||
getName());
|
|
||||||
|
|
||||||
if (args_func.size() != 1)
|
|
||||||
throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
|
||||||
|
|
||||||
ASTs & args = args_func.at(0)->children;
|
|
||||||
StorageS3Configuration configuration;
|
|
||||||
|
|
||||||
if (auto named_collection = getURLBasedDataSourceConfiguration(args, context))
|
if (auto named_collection = getURLBasedDataSourceConfiguration(args, context))
|
||||||
{
|
{
|
||||||
auto [common_configuration, storage_specific_args] = named_collection.value();
|
auto [common_configuration, storage_specific_args] = named_collection.value();
|
||||||
configuration.set(common_configuration);
|
s3_configuration.set(common_configuration);
|
||||||
|
|
||||||
for (const auto & [arg_name, arg_value] : storage_specific_args)
|
for (const auto & [arg_name, arg_value] : storage_specific_args)
|
||||||
{
|
{
|
||||||
if (arg_name == "access_key_id")
|
if (arg_name == "access_key_id")
|
||||||
configuration.access_key_id = arg_value->as<ASTLiteral>()->value.safeGet<String>();
|
s3_configuration.access_key_id = arg_value->as<ASTLiteral>()->value.safeGet<String>();
|
||||||
else if (arg_name == "secret_access_key")
|
else if (arg_name == "secret_access_key")
|
||||||
configuration.secret_access_key = arg_value->as<ASTLiteral>()->value.safeGet<String>();
|
s3_configuration.secret_access_key = arg_value->as<ASTLiteral>()->value.safeGet<String>();
|
||||||
else
|
else
|
||||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message);
|
||||||
"Unknown key-value argument `{}` for StorageS3, expected: "
|
|
||||||
"url, [access_key_id, secret_access_key], name of used format, structure and [compression_method].",
|
|
||||||
arg_name);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (args.empty() || args.size() > 6)
|
if (args.empty() || args.size() > 6)
|
||||||
throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message);
|
||||||
|
|
||||||
for (auto & arg : args)
|
for (auto & arg : args)
|
||||||
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
|
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
|
||||||
@ -110,53 +88,76 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// This argument is always the first
|
/// This argument is always the first
|
||||||
configuration.url = args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
s3_configuration.url = args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
||||||
|
|
||||||
if (args_to_idx.contains("format"))
|
if (args_to_idx.contains("format"))
|
||||||
configuration.format = args[args_to_idx["format"]]->as<ASTLiteral &>().value.safeGet<String>();
|
s3_configuration.format = args[args_to_idx["format"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||||
|
|
||||||
if (args_to_idx.contains("structure"))
|
if (args_to_idx.contains("structure"))
|
||||||
configuration.structure = args[args_to_idx["structure"]]->as<ASTLiteral &>().value.safeGet<String>();
|
s3_configuration.structure = args[args_to_idx["structure"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||||
|
|
||||||
if (args_to_idx.contains("compression_method"))
|
if (args_to_idx.contains("compression_method"))
|
||||||
configuration.compression_method = args[args_to_idx["compression_method"]]->as<ASTLiteral &>().value.safeGet<String>();
|
s3_configuration.compression_method = args[args_to_idx["compression_method"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||||
|
|
||||||
if (args_to_idx.contains("access_key_id"))
|
if (args_to_idx.contains("access_key_id"))
|
||||||
configuration.access_key_id = args[args_to_idx["access_key_id"]]->as<ASTLiteral &>().value.safeGet<String>();
|
s3_configuration.access_key_id = args[args_to_idx["access_key_id"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||||
|
|
||||||
if (args_to_idx.contains("secret_access_key"))
|
if (args_to_idx.contains("secret_access_key"))
|
||||||
configuration.secret_access_key = args[args_to_idx["secret_access_key"]]->as<ASTLiteral &>().value.safeGet<String>();
|
s3_configuration.secret_access_key = args[args_to_idx["secret_access_key"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (configuration.format == "auto")
|
if (s3_configuration.format == "auto")
|
||||||
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url, true);
|
s3_configuration.format = FormatFactory::instance().getFormatFromFileName(s3_configuration.url, true);
|
||||||
|
}
|
||||||
|
|
||||||
s3_configuration = std::move(configuration);
|
void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr context)
|
||||||
|
{
|
||||||
|
/// Parse args
|
||||||
|
ASTs & args_func = ast_function->children;
|
||||||
|
|
||||||
|
const auto message = fmt::format(
|
||||||
|
"The signature of table function {} could be the following:\n" \
|
||||||
|
" - url\n" \
|
||||||
|
" - url, format\n" \
|
||||||
|
" - url, format, structure\n" \
|
||||||
|
" - url, access_key_id, secret_access_key\n" \
|
||||||
|
" - url, format, structure, compression_method\n" \
|
||||||
|
" - url, access_key_id, secret_access_key, format\n" \
|
||||||
|
" - url, access_key_id, secret_access_key, format, structure\n" \
|
||||||
|
" - url, access_key_id, secret_access_key, format, structure, compression_method",
|
||||||
|
getName());
|
||||||
|
|
||||||
|
if (args_func.size() != 1)
|
||||||
|
throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||||
|
|
||||||
|
auto & args = args_func.at(0)->children;
|
||||||
|
|
||||||
|
parseArgumentsImpl(message, args, context, configuration);
|
||||||
}
|
}
|
||||||
|
|
||||||
ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context) const
|
ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context) const
|
||||||
{
|
{
|
||||||
if (s3_configuration->structure == "auto")
|
if (configuration.structure == "auto")
|
||||||
{
|
{
|
||||||
return StorageS3::getTableStructureFromData(
|
return StorageS3::getTableStructureFromData(
|
||||||
s3_configuration->format,
|
configuration.format,
|
||||||
S3::URI(Poco::URI(s3_configuration->url)),
|
S3::URI(Poco::URI(configuration.url)),
|
||||||
s3_configuration->access_key_id,
|
configuration.access_key_id,
|
||||||
s3_configuration->secret_access_key,
|
configuration.secret_access_key,
|
||||||
context->getSettingsRef().s3_max_connections,
|
context->getSettingsRef().s3_max_connections,
|
||||||
context->getSettingsRef().s3_max_single_read_retries,
|
context->getSettingsRef().s3_max_single_read_retries,
|
||||||
s3_configuration->compression_method,
|
configuration.compression_method,
|
||||||
false,
|
false,
|
||||||
std::nullopt,
|
std::nullopt,
|
||||||
context);
|
context);
|
||||||
}
|
}
|
||||||
|
|
||||||
return parseColumnsListFromString(s3_configuration->structure, context);
|
return parseColumnsListFromString(configuration.structure, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
|
StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const
|
||||||
{
|
{
|
||||||
Poco::URI uri (s3_configuration->url);
|
Poco::URI uri (configuration.url);
|
||||||
S3::URI s3_uri (uri);
|
S3::URI s3_uri (uri);
|
||||||
UInt64 max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries;
|
UInt64 max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries;
|
||||||
UInt64 min_upload_part_size = context->getSettingsRef().s3_min_upload_part_size;
|
UInt64 min_upload_part_size = context->getSettingsRef().s3_min_upload_part_size;
|
||||||
@ -166,17 +167,17 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context
|
|||||||
UInt64 max_connections = context->getSettingsRef().s3_max_connections;
|
UInt64 max_connections = context->getSettingsRef().s3_max_connections;
|
||||||
|
|
||||||
ColumnsDescription columns;
|
ColumnsDescription columns;
|
||||||
if (s3_configuration->structure != "auto")
|
if (configuration.structure != "auto")
|
||||||
columns = parseColumnsListFromString(s3_configuration->structure, context);
|
columns = parseColumnsListFromString(configuration.structure, context);
|
||||||
else if (!structure_hint.empty())
|
else if (!structure_hint.empty())
|
||||||
columns = structure_hint;
|
columns = structure_hint;
|
||||||
|
|
||||||
StoragePtr storage = StorageS3::create(
|
StoragePtr storage = StorageS3::create(
|
||||||
s3_uri,
|
s3_uri,
|
||||||
s3_configuration->access_key_id,
|
configuration.access_key_id,
|
||||||
s3_configuration->secret_access_key,
|
configuration.secret_access_key,
|
||||||
StorageID(getDatabaseName(), table_name),
|
StorageID(getDatabaseName(), table_name),
|
||||||
s3_configuration->format,
|
configuration.format,
|
||||||
max_single_read_retries,
|
max_single_read_retries,
|
||||||
min_upload_part_size,
|
min_upload_part_size,
|
||||||
upload_part_size_multiply_factor,
|
upload_part_size_multiply_factor,
|
||||||
@ -189,7 +190,7 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context
|
|||||||
context,
|
context,
|
||||||
/// No format_settings for table function S3
|
/// No format_settings for table function S3
|
||||||
std::nullopt,
|
std::nullopt,
|
||||||
s3_configuration->compression_method);
|
configuration.compression_method);
|
||||||
|
|
||||||
storage->startup();
|
storage->startup();
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@ namespace DB
|
|||||||
{
|
{
|
||||||
|
|
||||||
class Context;
|
class Context;
|
||||||
|
class TableFunctionS3Cluster;
|
||||||
|
|
||||||
/* s3(source, [access_key_id, secret_access_key,] format, structure[, compression]) - creates a temporary storage for a file in S3.
|
/* s3(source, [access_key_id, secret_access_key,] format, structure[, compression]) - creates a temporary storage for a file in S3.
|
||||||
*/
|
*/
|
||||||
@ -23,13 +24,15 @@ public:
|
|||||||
{
|
{
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
bool hasStaticStructure() const override { return s3_configuration->structure != "auto"; }
|
bool hasStaticStructure() const override { return configuration.structure != "auto"; }
|
||||||
|
|
||||||
bool needStructureHint() const override { return s3_configuration->structure == "auto"; }
|
bool needStructureHint() const override { return configuration.structure == "auto"; }
|
||||||
|
|
||||||
void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
|
void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
friend class TableFunctionS3Cluster;
|
||||||
|
|
||||||
StoragePtr executeImpl(
|
StoragePtr executeImpl(
|
||||||
const ASTPtr & ast_function,
|
const ASTPtr & ast_function,
|
||||||
ContextPtr context,
|
ContextPtr context,
|
||||||
@ -41,7 +44,9 @@ protected:
|
|||||||
ColumnsDescription getActualTableStructure(ContextPtr context) const override;
|
ColumnsDescription getActualTableStructure(ContextPtr context) const override;
|
||||||
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
|
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
|
||||||
|
|
||||||
std::optional<StorageS3Configuration> s3_configuration;
|
static void parseArgumentsImpl(const String & error_message, ASTs & args, ContextPtr context, StorageS3Configuration & configuration);
|
||||||
|
|
||||||
|
StorageS3Configuration configuration;
|
||||||
ColumnsDescription structure_hint;
|
ColumnsDescription structure_hint;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -32,6 +32,7 @@ namespace DB
|
|||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
|
extern const int BAD_GET;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -45,55 +46,58 @@ void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, Context
|
|||||||
|
|
||||||
ASTs & args = args_func.at(0)->children;
|
ASTs & args = args_func.at(0)->children;
|
||||||
|
|
||||||
|
for (auto & arg : args)
|
||||||
|
arg = evaluateConstantExpressionAsLiteral(arg, context);
|
||||||
|
|
||||||
const auto message = fmt::format(
|
const auto message = fmt::format(
|
||||||
"The signature of table function {} could be the following:\n" \
|
"The signature of table function {} could be the following:\n" \
|
||||||
|
" - cluster, url\n"
|
||||||
|
" - cluster, url, format\n" \
|
||||||
" - cluster, url, format, structure\n" \
|
" - cluster, url, format, structure\n" \
|
||||||
|
" - cluster, url, access_key_id, secret_access_key\n" \
|
||||||
" - cluster, url, format, structure, compression_method\n" \
|
" - cluster, url, format, structure, compression_method\n" \
|
||||||
|
" - cluster, url, access_key_id, secret_access_key, format\n"
|
||||||
" - cluster, url, access_key_id, secret_access_key, format, structure\n" \
|
" - cluster, url, access_key_id, secret_access_key, format, structure\n" \
|
||||||
" - cluster, url, access_key_id, secret_access_key, format, structure, compression_method",
|
" - cluster, url, access_key_id, secret_access_key, format, structure, compression_method",
|
||||||
getName());
|
getName());
|
||||||
|
|
||||||
if (args.size() < 4 || args.size() > 7)
|
if (args.size() < 2 || args.size() > 7)
|
||||||
throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||||
|
|
||||||
for (auto & arg : args)
|
|
||||||
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
|
|
||||||
|
|
||||||
/// This arguments are always the first
|
/// This arguments are always the first
|
||||||
cluster_name = args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
configuration.cluster_name = args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
||||||
filename = args[1]->as<ASTLiteral &>().value.safeGet<String>();
|
|
||||||
|
|
||||||
/// Size -> argument indexes
|
if (!context->tryGetCluster(configuration.cluster_name))
|
||||||
static auto size_to_args = std::map<size_t, std::map<String, size_t>>
|
throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", configuration.cluster_name);
|
||||||
{
|
|
||||||
{4, {{"format", 2}, {"structure", 3}}},
|
|
||||||
{5, {{"format", 2}, {"structure", 3}, {"compression_method", 4}}},
|
|
||||||
{6, {{"access_key_id", 2}, {"secret_access_key", 3}, {"format", 4}, {"structure", 5}}},
|
|
||||||
{7, {{"access_key_id", 2}, {"secret_access_key", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}}
|
|
||||||
};
|
|
||||||
|
|
||||||
auto & args_to_idx = size_to_args[args.size()];
|
/// Just cut the first arg (cluster_name) and try to parse s3 table function arguments as is
|
||||||
|
ASTs clipped_args;
|
||||||
|
clipped_args.reserve(args.size());
|
||||||
|
std::copy(args.begin() + 1, args.end(), std::back_inserter(clipped_args));
|
||||||
|
|
||||||
if (args_to_idx.contains("format"))
|
/// StorageS3ClusterConfiguration inherints from StorageS3Configuration, so it is safe to upcast it.
|
||||||
format = args[args_to_idx["format"]]->as<ASTLiteral &>().value.safeGet<String>();
|
TableFunctionS3::parseArgumentsImpl(message, clipped_args, context, static_cast<StorageS3Configuration & >(configuration));
|
||||||
|
|
||||||
if (args_to_idx.contains("structure"))
|
|
||||||
structure = args[args_to_idx["structure"]]->as<ASTLiteral &>().value.safeGet<String>();
|
|
||||||
|
|
||||||
if (args_to_idx.contains("compression_method"))
|
|
||||||
compression_method = args[args_to_idx["compression_method"]]->as<ASTLiteral &>().value.safeGet<String>();
|
|
||||||
|
|
||||||
if (args_to_idx.contains("access_key_id"))
|
|
||||||
access_key_id = args[args_to_idx["access_key_id"]]->as<ASTLiteral &>().value.safeGet<String>();
|
|
||||||
|
|
||||||
if (args_to_idx.contains("secret_access_key"))
|
|
||||||
secret_access_key = args[args_to_idx["secret_access_key"]]->as<ASTLiteral &>().value.safeGet<String>();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
ColumnsDescription TableFunctionS3Cluster::getActualTableStructure(ContextPtr context) const
|
ColumnsDescription TableFunctionS3Cluster::getActualTableStructure(ContextPtr context) const
|
||||||
{
|
{
|
||||||
return parseColumnsListFromString(structure, context);
|
if (configuration.structure == "auto")
|
||||||
|
{
|
||||||
|
return StorageS3::getTableStructureFromData(
|
||||||
|
configuration.format,
|
||||||
|
S3::URI(Poco::URI(configuration.url)),
|
||||||
|
configuration.access_key_id,
|
||||||
|
configuration.secret_access_key,
|
||||||
|
context->getSettingsRef().s3_max_connections,
|
||||||
|
context->getSettingsRef().s3_max_single_read_retries,
|
||||||
|
configuration.compression_method,
|
||||||
|
false,
|
||||||
|
std::nullopt,
|
||||||
|
context);
|
||||||
|
}
|
||||||
|
|
||||||
|
return parseColumnsListFromString(configuration.structure, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
StoragePtr TableFunctionS3Cluster::executeImpl(
|
StoragePtr TableFunctionS3Cluster::executeImpl(
|
||||||
@ -101,46 +105,60 @@ StoragePtr TableFunctionS3Cluster::executeImpl(
|
|||||||
const std::string & table_name, ColumnsDescription /*cached_columns*/) const
|
const std::string & table_name, ColumnsDescription /*cached_columns*/) const
|
||||||
{
|
{
|
||||||
StoragePtr storage;
|
StoragePtr storage;
|
||||||
|
|
||||||
|
UInt64 max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries;
|
||||||
|
UInt64 min_upload_part_size = context->getSettingsRef().s3_min_upload_part_size;
|
||||||
|
UInt64 upload_part_size_multiply_factor = context->getSettingsRef().s3_upload_part_size_multiply_factor;
|
||||||
|
UInt64 upload_part_size_multiply_parts_count_threshold = context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold;
|
||||||
|
UInt64 max_single_part_upload_size = context->getSettingsRef().s3_max_single_part_upload_size;
|
||||||
|
UInt64 max_connections = context->getSettingsRef().s3_max_connections;
|
||||||
|
|
||||||
|
ColumnsDescription columns;
|
||||||
|
if (configuration.structure != "auto")
|
||||||
|
columns = parseColumnsListFromString(configuration.structure, context);
|
||||||
|
else if (!structure_hint.empty())
|
||||||
|
columns = structure_hint;
|
||||||
|
|
||||||
if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
|
if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
|
||||||
{
|
{
|
||||||
/// On worker node this filename won't contains globs
|
/// On worker node this filename won't contains globs
|
||||||
Poco::URI uri (filename);
|
Poco::URI uri (configuration.url);
|
||||||
S3::URI s3_uri (uri);
|
S3::URI s3_uri (uri);
|
||||||
/// Actually this parameters are not used
|
|
||||||
UInt64 max_single_read_retries = context->getSettingsRef().s3_max_single_read_retries;
|
|
||||||
UInt64 min_upload_part_size = context->getSettingsRef().s3_min_upload_part_size;
|
|
||||||
UInt64 upload_part_size_multiply_factor = context->getSettingsRef().s3_upload_part_size_multiply_factor;
|
|
||||||
UInt64 upload_part_size_multiply_parts_count_threshold = context->getSettingsRef().s3_upload_part_size_multiply_parts_count_threshold;
|
|
||||||
UInt64 max_single_part_upload_size = context->getSettingsRef().s3_max_single_part_upload_size;
|
|
||||||
UInt64 max_connections = context->getSettingsRef().s3_max_connections;
|
|
||||||
storage = StorageS3::create(
|
storage = StorageS3::create(
|
||||||
s3_uri,
|
s3_uri,
|
||||||
access_key_id,
|
configuration.access_key_id,
|
||||||
secret_access_key,
|
configuration.secret_access_key,
|
||||||
StorageID(getDatabaseName(), table_name),
|
StorageID(getDatabaseName(), table_name),
|
||||||
format,
|
configuration.format,
|
||||||
max_single_read_retries,
|
max_single_read_retries,
|
||||||
min_upload_part_size,
|
min_upload_part_size,
|
||||||
upload_part_size_multiply_factor,
|
upload_part_size_multiply_factor,
|
||||||
upload_part_size_multiply_parts_count_threshold,
|
upload_part_size_multiply_parts_count_threshold,
|
||||||
max_single_part_upload_size,
|
max_single_part_upload_size,
|
||||||
max_connections,
|
max_connections,
|
||||||
getActualTableStructure(context),
|
columns,
|
||||||
ConstraintsDescription{},
|
ConstraintsDescription{},
|
||||||
String{},
|
String{},
|
||||||
context,
|
context,
|
||||||
// No format_settings for S3Cluster
|
// No format_settings for S3Cluster
|
||||||
std::nullopt,
|
std::nullopt,
|
||||||
compression_method,
|
configuration.compression_method,
|
||||||
/*distributed_processing=*/true);
|
/*distributed_processing=*/true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
storage = StorageS3Cluster::create(
|
storage = StorageS3Cluster::create(
|
||||||
filename, access_key_id, secret_access_key, StorageID(getDatabaseName(), table_name),
|
configuration.url,
|
||||||
cluster_name, format, context->getSettingsRef().s3_max_connections,
|
configuration.access_key_id,
|
||||||
getActualTableStructure(context), ConstraintsDescription{},
|
configuration.secret_access_key,
|
||||||
context, compression_method);
|
StorageID(getDatabaseName(), table_name),
|
||||||
|
configuration.cluster_name, configuration.format,
|
||||||
|
max_single_read_retries,
|
||||||
|
max_connections,
|
||||||
|
columns,
|
||||||
|
ConstraintsDescription{},
|
||||||
|
context,
|
||||||
|
configuration.compression_method);
|
||||||
}
|
}
|
||||||
|
|
||||||
storage->startup();
|
storage->startup();
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#if USE_AWS_S3
|
#if USE_AWS_S3
|
||||||
|
|
||||||
#include <TableFunctions/ITableFunction.h>
|
#include <TableFunctions/ITableFunction.h>
|
||||||
|
#include <Storages/ExternalDataSourceConfiguration.h>
|
||||||
|
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
@ -28,7 +29,12 @@ public:
|
|||||||
{
|
{
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
bool hasStaticStructure() const override { return true; }
|
|
||||||
|
bool hasStaticStructure() const override { return configuration.structure != "auto"; }
|
||||||
|
|
||||||
|
bool needStructureHint() const override { return configuration.structure == "auto"; }
|
||||||
|
|
||||||
|
void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
StoragePtr executeImpl(
|
StoragePtr executeImpl(
|
||||||
@ -42,13 +48,8 @@ protected:
|
|||||||
ColumnsDescription getActualTableStructure(ContextPtr) const override;
|
ColumnsDescription getActualTableStructure(ContextPtr) const override;
|
||||||
void parseArguments(const ASTPtr &, ContextPtr) override;
|
void parseArguments(const ASTPtr &, ContextPtr) override;
|
||||||
|
|
||||||
String cluster_name;
|
StorageS3ClusterConfiguration configuration;
|
||||||
String filename;
|
ColumnsDescription structure_hint;
|
||||||
String format;
|
|
||||||
String structure;
|
|
||||||
String access_key_id;
|
|
||||||
String secret_access_key;
|
|
||||||
String compression_method = "auto";
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -148,6 +148,17 @@ if __name__ == "__main__":
|
|||||||
build_name,
|
build_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
some_builds_are_missing = len(build_reports_map) < len(reports_order)
|
||||||
|
|
||||||
|
if some_builds_are_missing:
|
||||||
|
logging.info(
|
||||||
|
"Expected to get %s build results, got %s",
|
||||||
|
len(reports_order),
|
||||||
|
len(build_reports_map),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logging.info("Got exactly %s builds", len(build_reports_map))
|
||||||
|
|
||||||
build_reports = [
|
build_reports = [
|
||||||
build_reports_map[build_name]
|
build_reports_map[build_name]
|
||||||
for build_name in reports_order
|
for build_name in reports_order
|
||||||
@ -219,10 +230,10 @@ if __name__ == "__main__":
|
|||||||
if build_result.status == "success":
|
if build_result.status == "success":
|
||||||
ok_builds += 1
|
ok_builds += 1
|
||||||
|
|
||||||
if ok_builds == 0:
|
if ok_builds == 0 or some_builds_are_missing:
|
||||||
summary_status = "error"
|
summary_status = "error"
|
||||||
|
|
||||||
description = "{}/{} builds are OK".format(ok_builds, total_builds)
|
description = f"{ok_builds}/{total_builds} builds are OK"
|
||||||
|
|
||||||
print("::notice ::Report url: {}".format(url))
|
print("::notice ::Report url: {}".format(url))
|
||||||
|
|
||||||
|
@ -206,6 +206,7 @@ CI_CONFIG = {
|
|||||||
"binary_freebsd",
|
"binary_freebsd",
|
||||||
"binary_darwin_aarch64",
|
"binary_darwin_aarch64",
|
||||||
"binary_ppc64le",
|
"binary_ppc64le",
|
||||||
|
"binary_gcc",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
"tests_config": {
|
"tests_config": {
|
||||||
|
@ -379,12 +379,16 @@ def check_need_to_rerun(workflow_description):
|
|||||||
|
|
||||||
def rerun_workflow(workflow_description, token):
|
def rerun_workflow(workflow_description, token):
|
||||||
print("Going to rerun workflow")
|
print("Going to rerun workflow")
|
||||||
_exec_post_with_retry(workflow_description.rerun_url, token)
|
try:
|
||||||
|
_exec_post_with_retry(f"{workflow_description.rerun_url}-failed-jobs", token)
|
||||||
|
except Exception:
|
||||||
|
_exec_post_with_retry(workflow_description.rerun_url, token)
|
||||||
|
|
||||||
|
|
||||||
def main(event):
|
def main(event):
|
||||||
token = get_token_from_aws()
|
token = get_token_from_aws()
|
||||||
event_data = json.loads(event["body"])
|
event_data = json.loads(event["body"])
|
||||||
|
print("The body received:", event_data)
|
||||||
workflow_description = get_workflow_description_from_event(event_data)
|
workflow_description = get_workflow_description_from_event(event_data)
|
||||||
|
|
||||||
print("Got workflow description", workflow_description)
|
print("Got workflow description", workflow_description)
|
||||||
|
@ -14,5 +14,13 @@
|
|||||||
<user>default</user>
|
<user>default</user>
|
||||||
<table>s</table>
|
<table>s</table>
|
||||||
</clickhouse_dictionary>
|
</clickhouse_dictionary>
|
||||||
|
<url_with_headers>
|
||||||
|
<headers>
|
||||||
|
<header>
|
||||||
|
<name>X-ClickHouse-Format</name>
|
||||||
|
<value>JSONEachRow</value>
|
||||||
|
</header>
|
||||||
|
</headers>
|
||||||
|
</url_with_headers>
|
||||||
</named_collections>
|
</named_collections>
|
||||||
</clickhouse>
|
</clickhouse>
|
||||||
|
@ -276,3 +276,8 @@ def test_HDFS(start_cluster):
|
|||||||
assert "not allowed" in node7.query_and_get_error(
|
assert "not allowed" in node7.query_and_get_error(
|
||||||
"SELECT * FROM hdfs('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'word String')"
|
"SELECT * FROM hdfs('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'word String')"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_schema_inference(start_cluster):
|
||||||
|
error = node7.query_and_get_error("desc url('http://test.com`, 'TSVRaw'')")
|
||||||
|
assert(error.find('ReadWriteBufferFromHTTPBase') == -1)
|
||||||
|
@ -12,6 +12,8 @@ TABLE_NAME = "blob_storage_table"
|
|||||||
CONTAINER_NAME = "cont"
|
CONTAINER_NAME = "cont"
|
||||||
CLUSTER_NAME = "test_cluster"
|
CLUSTER_NAME = "test_cluster"
|
||||||
|
|
||||||
|
drop_table_statement = f"DROP TABLE {TABLE_NAME} ON CLUSTER {CLUSTER_NAME} SYNC"
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="module")
|
@pytest.fixture(scope="module")
|
||||||
def cluster():
|
def cluster():
|
||||||
@ -55,7 +57,6 @@ def create_table(node, table_name, replica, **additional_settings):
|
|||||||
ORDER BY id
|
ORDER BY id
|
||||||
SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))}"""
|
SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))}"""
|
||||||
|
|
||||||
node.query(f"DROP TABLE IF EXISTS {table_name}")
|
|
||||||
node.query(create_table_statement)
|
node.query(create_table_statement)
|
||||||
assert node.query(f"SELECT COUNT(*) FROM {table_name} FORMAT Values") == "(0)"
|
assert node.query(f"SELECT COUNT(*) FROM {table_name} FORMAT Values") == "(0)"
|
||||||
|
|
||||||
@ -104,3 +105,4 @@ def test_zero_copy_replication(cluster):
|
|||||||
)
|
)
|
||||||
|
|
||||||
assert get_large_objects_count(blob_container_client) == 2
|
assert get_large_objects_count(blob_container_client) == 2
|
||||||
|
node1.query(drop_table_statement)
|
||||||
|
@ -90,6 +90,9 @@ def test_backup_from_old_version(started_cluster):
|
|||||||
|
|
||||||
assert node1.query("CHECK TABLE dest_table") == "1\n"
|
assert node1.query("CHECK TABLE dest_table") == "1\n"
|
||||||
|
|
||||||
|
node1.query("DROP TABLE source_table")
|
||||||
|
node1.query("DROP TABLE dest_table")
|
||||||
|
|
||||||
|
|
||||||
def test_backup_from_old_version_setting(started_cluster):
|
def test_backup_from_old_version_setting(started_cluster):
|
||||||
node2.query(
|
node2.query(
|
||||||
@ -137,6 +140,9 @@ def test_backup_from_old_version_setting(started_cluster):
|
|||||||
|
|
||||||
assert node2.query("CHECK TABLE dest_table") == "1\n"
|
assert node2.query("CHECK TABLE dest_table") == "1\n"
|
||||||
|
|
||||||
|
node2.query("DROP TABLE source_table")
|
||||||
|
node2.query("DROP TABLE dest_table")
|
||||||
|
|
||||||
|
|
||||||
def test_backup_from_old_version_config(started_cluster):
|
def test_backup_from_old_version_config(started_cluster):
|
||||||
node3.query(
|
node3.query(
|
||||||
@ -190,6 +196,9 @@ def test_backup_from_old_version_config(started_cluster):
|
|||||||
|
|
||||||
assert node3.query("CHECK TABLE dest_table") == "1\n"
|
assert node3.query("CHECK TABLE dest_table") == "1\n"
|
||||||
|
|
||||||
|
node3.query("DROP TABLE source_table")
|
||||||
|
node3.query("DROP TABLE dest_table")
|
||||||
|
|
||||||
|
|
||||||
def test_backup_and_alter(started_cluster):
|
def test_backup_and_alter(started_cluster):
|
||||||
node4.query(
|
node4.query(
|
||||||
@ -223,3 +232,6 @@ def test_backup_and_alter(started_cluster):
|
|||||||
|
|
||||||
assert node4.query("SELECT sum(A) FROM test.backup_table") == "2\n"
|
assert node4.query("SELECT sum(A) FROM test.backup_table") == "2\n"
|
||||||
assert node4.query("SELECT B + 2 FROM test.backup_table") == "4\n"
|
assert node4.query("SELECT B + 2 FROM test.backup_table") == "4\n"
|
||||||
|
|
||||||
|
node4.query("DROP TABLE test.backup_table")
|
||||||
|
node4.query("DROP DATABASE test")
|
||||||
|
@ -14,4 +14,8 @@
|
|||||||
<table>part_log</table>
|
<table>part_log</table>
|
||||||
<flush_interval_milliseconds>500</flush_interval_milliseconds>
|
<flush_interval_milliseconds>500</flush_interval_milliseconds>
|
||||||
</part_log>
|
</part_log>
|
||||||
|
<merge_tree_metadata_cache>
|
||||||
|
<lru_cache_size>268435456</lru_cache_size>
|
||||||
|
<continue_if_corrupted>true</continue_if_corrupted>
|
||||||
|
</merge_tree_metadata_cache>
|
||||||
</clickhouse>
|
</clickhouse>
|
||||||
|
@ -846,14 +846,15 @@ def get_paths_for_partition_from_part_log(node, table, partition_id):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"name,engine",
|
"name,engine,use_metadata_cache",
|
||||||
[
|
[
|
||||||
pytest.param("altering_mt", "MergeTree()", id="mt"),
|
pytest.param("altering_mt", "MergeTree()", "false", id="mt"),
|
||||||
|
pytest.param("altering_mt", "MergeTree()", "true", id="mt_use_metadata_cache"),
|
||||||
# ("altering_replicated_mt","ReplicatedMergeTree('/clickhouse/altering_replicated_mt', '1')",),
|
# ("altering_replicated_mt","ReplicatedMergeTree('/clickhouse/altering_replicated_mt', '1')",),
|
||||||
# SYSTEM STOP MERGES doesn't disable merges assignments
|
# SYSTEM STOP MERGES doesn't disable merges assignments
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_alter_move(start_cluster, name, engine):
|
def test_alter_move(start_cluster, name, engine, use_metadata_cache):
|
||||||
try:
|
try:
|
||||||
node1.query(
|
node1.query(
|
||||||
"""
|
"""
|
||||||
@ -863,9 +864,9 @@ def test_alter_move(start_cluster, name, engine):
|
|||||||
) ENGINE = {engine}
|
) ENGINE = {engine}
|
||||||
ORDER BY tuple()
|
ORDER BY tuple()
|
||||||
PARTITION BY toYYYYMM(EventDate)
|
PARTITION BY toYYYYMM(EventDate)
|
||||||
SETTINGS storage_policy='jbods_with_external'
|
SETTINGS storage_policy='jbods_with_external', use_metadata_cache={use_metadata_cache}
|
||||||
""".format(
|
""".format(
|
||||||
name=name, engine=engine
|
name=name, engine=engine, use_metadata_cache=use_metadata_cache
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -875,6 +876,8 @@ def test_alter_move(start_cluster, name, engine):
|
|||||||
node1.query("INSERT INTO {} VALUES(toDate('2019-03-16'), 66)".format(name))
|
node1.query("INSERT INTO {} VALUES(toDate('2019-03-16'), 66)".format(name))
|
||||||
node1.query("INSERT INTO {} VALUES(toDate('2019-04-10'), 42)".format(name))
|
node1.query("INSERT INTO {} VALUES(toDate('2019-04-10'), 42)".format(name))
|
||||||
node1.query("INSERT INTO {} VALUES(toDate('2019-04-11'), 43)".format(name))
|
node1.query("INSERT INTO {} VALUES(toDate('2019-04-11'), 43)".format(name))
|
||||||
|
assert node1.query("CHECK TABLE " + name) == "1\n"
|
||||||
|
|
||||||
used_disks = get_used_disks_for_table(node1, name)
|
used_disks = get_used_disks_for_table(node1, name)
|
||||||
assert all(
|
assert all(
|
||||||
d.startswith("jbod") for d in used_disks
|
d.startswith("jbod") for d in used_disks
|
||||||
@ -892,6 +895,7 @@ def test_alter_move(start_cluster, name, engine):
|
|||||||
name, first_part
|
name, first_part
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
assert node1.query("CHECK TABLE " + name) == "1\n"
|
||||||
disk = node1.query(
|
disk = node1.query(
|
||||||
"SELECT disk_name FROM system.parts WHERE table = '{}' and name = '{}' and active = 1".format(
|
"SELECT disk_name FROM system.parts WHERE table = '{}' and name = '{}' and active = 1".format(
|
||||||
name, first_part
|
name, first_part
|
||||||
@ -906,6 +910,7 @@ def test_alter_move(start_cluster, name, engine):
|
|||||||
node1.query(
|
node1.query(
|
||||||
"ALTER TABLE {} MOVE PART '{}' TO DISK 'jbod1'".format(name, first_part)
|
"ALTER TABLE {} MOVE PART '{}' TO DISK 'jbod1'".format(name, first_part)
|
||||||
)
|
)
|
||||||
|
assert node1.query("CHECK TABLE " + name) == "1\n"
|
||||||
disk = node1.query(
|
disk = node1.query(
|
||||||
"SELECT disk_name FROM system.parts WHERE table = '{}' and name = '{}' and active = 1".format(
|
"SELECT disk_name FROM system.parts WHERE table = '{}' and name = '{}' and active = 1".format(
|
||||||
name, first_part
|
name, first_part
|
||||||
@ -920,6 +925,7 @@ def test_alter_move(start_cluster, name, engine):
|
|||||||
node1.query(
|
node1.query(
|
||||||
"ALTER TABLE {} MOVE PARTITION 201904 TO VOLUME 'external'".format(name)
|
"ALTER TABLE {} MOVE PARTITION 201904 TO VOLUME 'external'".format(name)
|
||||||
)
|
)
|
||||||
|
assert node1.query("CHECK TABLE " + name) == "1\n"
|
||||||
disks = (
|
disks = (
|
||||||
node1.query(
|
node1.query(
|
||||||
"SELECT disk_name FROM system.parts WHERE table = '{}' and partition = '201904' and active = 1".format(
|
"SELECT disk_name FROM system.parts WHERE table = '{}' and partition = '201904' and active = 1".format(
|
||||||
@ -938,6 +944,7 @@ def test_alter_move(start_cluster, name, engine):
|
|||||||
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
node1.query("ALTER TABLE {} MOVE PARTITION 201904 TO DISK 'jbod2'".format(name))
|
node1.query("ALTER TABLE {} MOVE PARTITION 201904 TO DISK 'jbod2'".format(name))
|
||||||
|
assert node1.query("CHECK TABLE " + name) == "1\n"
|
||||||
disks = (
|
disks = (
|
||||||
node1.query(
|
node1.query(
|
||||||
"SELECT disk_name FROM system.parts WHERE table = '{}' and partition = '201904' and active = 1".format(
|
"SELECT disk_name FROM system.parts WHERE table = '{}' and partition = '201904' and active = 1".format(
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user