mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge branch 'master' into Avogar-patch-3
This commit is contained in:
commit
f9a0692e2c
89
.github/workflows/master.yml
vendored
89
.github/workflows/master.yml
vendored
@ -149,7 +149,6 @@ jobs:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
SplitBuildSmokeTest:
|
||||
needs: [BuilderDebSplitted]
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
||||
runs-on: [self-hosted, style-checker]
|
||||
steps:
|
||||
- name: Set envs
|
||||
@ -316,7 +315,6 @@ jobs:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinRelease:
|
||||
needs: [DockerHubPush]
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
@ -360,6 +358,51 @@ jobs:
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinGCC:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
CHECK_NAME=ClickHouse build check (actions)
|
||||
BUILD_NAME=binary_gcc
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ env.IMAGES_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: 'true'
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_NAME }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderDebAsan:
|
||||
needs: [DockerHubPush]
|
||||
runs-on: [self-hosted, builder]
|
||||
@ -590,7 +633,6 @@ jobs:
|
||||
##########################################################################################
|
||||
BuilderDebSplitted:
|
||||
needs: [DockerHubPush]
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
@ -636,7 +678,6 @@ jobs:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinTidy:
|
||||
needs: [DockerHubPush]
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
@ -682,7 +723,6 @@ jobs:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinDarwin:
|
||||
needs: [DockerHubPush]
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
@ -728,7 +768,6 @@ jobs:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinAarch64:
|
||||
needs: [DockerHubPush]
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
@ -774,7 +813,6 @@ jobs:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinFreeBSD:
|
||||
needs: [DockerHubPush]
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
@ -820,7 +858,6 @@ jobs:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinDarwinAarch64:
|
||||
needs: [DockerHubPush]
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
@ -866,7 +903,6 @@ jobs:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinPPC64:
|
||||
needs: [DockerHubPush]
|
||||
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
@ -918,6 +954,7 @@ jobs:
|
||||
- BuilderDebRelease
|
||||
- BuilderDebAarch64
|
||||
- BuilderBinRelease
|
||||
- BuilderBinGCC
|
||||
- BuilderDebAsan
|
||||
- BuilderDebTsan
|
||||
- BuilderDebUBsan
|
||||
@ -2608,6 +2645,40 @@ jobs:
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
UnitTestsReleaseGCC:
|
||||
needs: [BuilderBinGCC]
|
||||
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/unit_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Unit tests (release-gcc, actions)
|
||||
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Unit test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 unit_tests_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
UnitTestsTsan:
|
||||
needs: [BuilderDebTsan]
|
||||
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||
|
77
.github/workflows/pull_request.yml
vendored
77
.github/workflows/pull_request.yml
vendored
@ -370,6 +370,48 @@ jobs:
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderBinGCC:
|
||||
needs: [DockerHubPush, FastTest]
|
||||
runs-on: [self-hosted, builder]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/build_check
|
||||
IMAGES_PATH=${{runner.temp}}/images_path
|
||||
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
|
||||
CACHES_PATH=${{runner.temp}}/../ccaches
|
||||
CHECK_NAME=ClickHouse build check (actions)
|
||||
BUILD_NAME=binary_gcc
|
||||
EOF
|
||||
- name: Download changed images
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
name: changed_images
|
||||
path: ${{ runner.temp }}/images_path
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: 'true'
|
||||
fetch-depth: 0 # otherwise we will have no info about contributors
|
||||
- name: Build
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME"
|
||||
- name: Upload build URLs to artifacts
|
||||
if: ${{ success() || failure() }}
|
||||
uses: actions/upload-artifact@v2
|
||||
with:
|
||||
name: ${{ env.BUILD_NAME }}
|
||||
path: ${{ runner.temp }}/build_check/${{ env.BUILD_NAME }}.json
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
|
||||
BuilderDebAarch64:
|
||||
needs: [DockerHubPush, FastTest]
|
||||
runs-on: [self-hosted, builder]
|
||||
@ -963,6 +1005,7 @@ jobs:
|
||||
- BuilderDebRelease
|
||||
- BuilderDebAarch64
|
||||
- BuilderBinRelease
|
||||
- BuilderBinGCC
|
||||
- BuilderDebAsan
|
||||
- BuilderDebTsan
|
||||
- BuilderDebUBsan
|
||||
@ -2808,6 +2851,40 @@ jobs:
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
UnitTestsReleaseGCC:
|
||||
needs: [BuilderBinGCC]
|
||||
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/unit_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Unit tests (release-gcc, actions)
|
||||
REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v2
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Clear repository
|
||||
run: |
|
||||
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
|
||||
- name: Check out repository code
|
||||
uses: actions/checkout@v2
|
||||
- name: Unit test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 unit_tests_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker kill "$(docker ps -q)" ||:
|
||||
docker rm -f "$(docker ps -a -q)" ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
UnitTestsTsan:
|
||||
needs: [BuilderDebTsan]
|
||||
runs-on: [self-hosted, fuzzer-unit-tester]
|
||||
|
@ -261,8 +261,8 @@ endif ()
|
||||
# Add a section with the hash of the compiled machine code for integrity checks.
|
||||
# Only for official builds, because adding a section can be time consuming (rewrite of several GB).
|
||||
# And cross compiled binaries are not supported (since you cannot execute clickhouse hash-binary)
|
||||
if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
|
||||
set (USE_BINARY_HASH 1)
|
||||
if (OBJCOPY_PATH AND CLICKHOUSE_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE OR CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64.cmake$"))
|
||||
set (USE_BINARY_HASH 1 CACHE STRING "Calculate binary hash and store it in the separate section")
|
||||
endif ()
|
||||
|
||||
# Allows to build stripped binary in a separate directory
|
||||
|
@ -2,6 +2,7 @@ set (SRCS
|
||||
argsToConfig.cpp
|
||||
coverage.cpp
|
||||
demangle.cpp
|
||||
getAvailableMemoryAmount.cpp
|
||||
getFQDNOrHostName.cpp
|
||||
getMemoryAmount.cpp
|
||||
getPageSize.cpp
|
||||
|
44
base/base/getAvailableMemoryAmount.cpp
Normal file
44
base/base/getAvailableMemoryAmount.cpp
Normal file
@ -0,0 +1,44 @@
|
||||
#include <stdexcept>
|
||||
#include <fstream>
|
||||
#include <base/getAvailableMemoryAmount.h>
|
||||
#include <base/getPageSize.h>
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#if defined(BSD)
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/vmmeter.h>
|
||||
#endif
|
||||
|
||||
|
||||
uint64_t getAvailableMemoryAmountOrZero()
|
||||
{
|
||||
#if defined(_SC_AVPHYS_PAGES) // linux
|
||||
return getPageSize() * sysconf(_SC_AVPHYS_PAGES);
|
||||
#elif defined(__FreeBSD__)
|
||||
struct vmtotal vmt;
|
||||
size_t vmt_size = sizeof(vmt);
|
||||
if (sysctlbyname("vm.vmtotal", &vmt, &vmt_size, NULL, 0) == 0)
|
||||
return getPageSize() * vmt.t_avm;
|
||||
else
|
||||
return 0;
|
||||
#else // darwin
|
||||
unsigned int usermem;
|
||||
size_t len = sizeof(usermem);
|
||||
static int mib[2] = { CTL_HW, HW_USERMEM };
|
||||
if (sysctl(mib, 2, &usermem, &len, nullptr, 0) == 0 && len == sizeof(usermem))
|
||||
return usermem;
|
||||
else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
uint64_t getAvailableMemoryAmount()
|
||||
{
|
||||
auto res = getAvailableMemoryAmountOrZero();
|
||||
if (!res)
|
||||
throw std::runtime_error("Cannot determine available memory amount");
|
||||
return res;
|
||||
}
|
12
base/base/getAvailableMemoryAmount.h
Normal file
12
base/base/getAvailableMemoryAmount.h
Normal file
@ -0,0 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
/** Returns the size of currently available physical memory (RAM) in bytes.
|
||||
* Returns 0 on unsupported platform or if it cannot determine the size of physical memory.
|
||||
*/
|
||||
uint64_t getAvailableMemoryAmountOrZero();
|
||||
|
||||
/** Throws exception if it cannot determine the size of physical memory.
|
||||
*/
|
||||
uint64_t getAvailableMemoryAmount();
|
@ -51,6 +51,6 @@ if (GLIBC_COMPATIBILITY)
|
||||
|
||||
message (STATUS "Some symbols from glibc will be replaced for compatibility")
|
||||
|
||||
elseif (YANDEX_OFFICIAL_BUILD)
|
||||
elseif (CLICKHOUSE_OFFICIAL_BUILD)
|
||||
message (WARNING "Option GLIBC_COMPATIBILITY must be turned on for production builds.")
|
||||
endif ()
|
||||
|
@ -18,6 +18,6 @@ set (VERSION_STRING_SHORT "${VERSION_MAJOR}.${VERSION_MINOR}")
|
||||
|
||||
math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000")
|
||||
|
||||
if(YANDEX_OFFICIAL_BUILD)
|
||||
if(CLICKHOUSE_OFFICIAL_BUILD)
|
||||
set(VERSION_OFFICIAL " (official build)")
|
||||
endif()
|
||||
|
@ -69,9 +69,10 @@ endif ()
|
||||
target_compile_options(_avrocpp PRIVATE ${SUPPRESS_WARNINGS})
|
||||
|
||||
# create a symlink to include headers with <avro/...>
|
||||
set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include")
|
||||
ADD_CUSTOM_TARGET(avro_symlink_headers ALL
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory "${AVROCPP_ROOT_DIR}/include"
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVROCPP_ROOT_DIR}/include/avro"
|
||||
COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}"
|
||||
COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro"
|
||||
)
|
||||
add_dependencies(_avrocpp avro_symlink_headers)
|
||||
target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVROCPP_ROOT_DIR}/include")
|
||||
target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}")
|
||||
|
@ -27,7 +27,11 @@ target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRAR
|
||||
|
||||
# asio
|
||||
|
||||
target_compile_definitions (_boost_headers_only INTERFACE BOOST_ASIO_STANDALONE=1)
|
||||
target_compile_definitions (_boost_headers_only INTERFACE
|
||||
BOOST_ASIO_STANDALONE=1
|
||||
# Avoid using of deprecated in c++ > 17 std::result_of
|
||||
BOOST_ASIO_HAS_STD_INVOKE_RESULT=1
|
||||
)
|
||||
|
||||
# iostreams
|
||||
|
||||
|
@ -163,6 +163,7 @@ def parse_env_variables(
|
||||
cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
|
||||
cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
|
||||
cmake_flags.append("-DBUILD_STANDALONE_KEEPER=ON")
|
||||
if is_release_build(build_type, package_type, sanitizer, split_binary):
|
||||
cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON")
|
||||
|
||||
@ -244,7 +245,7 @@ def parse_env_variables(
|
||||
result.append(f"AUTHOR='{author}'")
|
||||
|
||||
if official:
|
||||
cmake_flags.append("-DYANDEX_OFFICIAL_BUILD=1")
|
||||
cmake_flags.append("-DCLICKHOUSE_OFFICIAL_BUILD=1")
|
||||
|
||||
result.append('CMAKE_FLAGS="' + " ".join(cmake_flags) + '"')
|
||||
|
||||
|
@ -267,6 +267,7 @@ function run_tests
|
||||
local test_opts=(
|
||||
--hung-check
|
||||
--fast-tests-only
|
||||
--no-random-settings
|
||||
--no-long
|
||||
--testname
|
||||
--shard
|
||||
|
@ -13,7 +13,7 @@ script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
echo "$script_dir"
|
||||
repo_dir=ch
|
||||
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-13_debug_none_bundled_unsplitted_disable_False_binary"}
|
||||
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
|
||||
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
|
||||
|
||||
function clone
|
||||
{
|
||||
|
@ -2,7 +2,7 @@
|
||||
set -euo pipefail
|
||||
|
||||
|
||||
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-13_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"}
|
||||
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-13_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"}
|
||||
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}
|
||||
|
||||
|
||||
@ -10,7 +10,7 @@ if [ -z "$CLICKHOUSE_REPO_PATH" ]; then
|
||||
CLICKHOUSE_REPO_PATH=ch
|
||||
rm -rf ch ||:
|
||||
mkdir ch ||:
|
||||
wget -nv -nd -c "https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz"
|
||||
wget -nv -nd -c "https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/repo/clickhouse_no_subs.tar.gz"
|
||||
tar -C ch --strip-components=1 -xf clickhouse_no_subs.tar.gz
|
||||
ls -lath ||:
|
||||
fi
|
||||
|
@ -1294,15 +1294,15 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv')
|
||||
select '' test_name,
|
||||
'$(sed -n 's/.*<!--message: \(.*\)-->/\1/p' report.html)' test_status,
|
||||
0 test_duration_ms,
|
||||
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#fail1' report_url
|
||||
'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#fail1' report_url
|
||||
union all
|
||||
select test || ' #' || toString(query_index), 'slower' test_status, 0 test_duration_ms,
|
||||
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#changes-in-performance.'
|
||||
'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#changes-in-performance.'
|
||||
|| test || '.' || toString(query_index) report_url
|
||||
from queries where changed_fail != 0 and diff > 0
|
||||
union all
|
||||
select test || ' #' || toString(query_index), 'unstable' test_status, 0 test_duration_ms,
|
||||
'https://clickhouse-test-reports.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#unstable-queries.'
|
||||
'https://clickhouse-test-reports.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/performance_comparison/report.html#unstable-queries.'
|
||||
|| test || '.' || toString(query_index) report_url
|
||||
from queries where unstable_fail != 0
|
||||
)
|
||||
@ -1378,7 +1378,7 @@ $REF_SHA $SHA_TO_TEST $(numactl --hardware | sed -n 's/^available:[[:space:]]\+/
|
||||
EOF
|
||||
|
||||
# Also insert some data about the check into the CI checks table.
|
||||
"${client[@]}" --query "INSERT INTO "'"'"gh-data"'"'".checks FORMAT TSVWithNamesAndTypes" \
|
||||
"${client[@]}" --query "INSERT INTO "'"'"default"'"'".checks FORMAT TSVWithNamesAndTypes" \
|
||||
< ci-checks.tsv
|
||||
|
||||
set -x
|
||||
|
@ -16,26 +16,17 @@ right_sha=$4
|
||||
datasets=${CHPC_DATASETS-"hits1 hits10 hits100 values"}
|
||||
|
||||
declare -A dataset_paths
|
||||
if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
|
||||
dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar"
|
||||
dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar"
|
||||
dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar"
|
||||
dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar"
|
||||
else
|
||||
dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar"
|
||||
dataset_paths["hits100"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar"
|
||||
dataset_paths["hits1"]="https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar"
|
||||
dataset_paths["values"]="https://clickhouse-datasets.s3.yandex.net/values_with_expressions/partitions/test_values.tar"
|
||||
fi
|
||||
dataset_paths["hits10"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_10m_single/partitions/hits_10m_single.tar"
|
||||
dataset_paths["hits100"]="https://clickhouse-private-datasets.s3.amazonaws.com/hits_100m_single/partitions/hits_100m_single.tar"
|
||||
dataset_paths["hits1"]="https://clickhouse-datasets.s3.amazonaws.com/hits/partitions/hits_v1.tar"
|
||||
dataset_paths["values"]="https://clickhouse-datasets.s3.amazonaws.com/values_with_expressions/partitions/test_values.tar"
|
||||
|
||||
|
||||
function download
|
||||
{
|
||||
# Historically there were various paths for the performance test package.
|
||||
# Test all of them.
|
||||
declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz"
|
||||
"https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/clickhouse_build_check/performance/performance.tgz"
|
||||
)
|
||||
declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/performance/performance.tgz")
|
||||
|
||||
for path in "${urls_to_try[@]}"
|
||||
do
|
||||
|
@ -4,7 +4,7 @@ set -ex
|
||||
CHPC_CHECK_START_TIMESTAMP="$(date +%s)"
|
||||
export CHPC_CHECK_START_TIMESTAMP
|
||||
|
||||
S3_URL=${S3_URL:="https://clickhouse-builds.s3.yandex.net"}
|
||||
S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}
|
||||
|
||||
COMMON_BUILD_PREFIX="/clickhouse_build_check"
|
||||
if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
|
||||
@ -64,9 +64,7 @@ function find_reference_sha
|
||||
# Historically there were various path for the performance test package,
|
||||
# test all of them.
|
||||
unset found
|
||||
declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz"
|
||||
"https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/clickhouse_build_check/performance/performance.tgz"
|
||||
)
|
||||
declare -a urls_to_try=("https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/performance/performance.tgz")
|
||||
for path in "${urls_to_try[@]}"
|
||||
do
|
||||
if curl_with_retry "$path"
|
||||
|
@ -11,7 +11,7 @@ RUN apt-get update -y \
|
||||
|
||||
COPY s3downloader /s3downloader
|
||||
|
||||
ENV S3_URL="https://clickhouse-datasets.s3.yandex.net"
|
||||
ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
|
||||
ENV DATASETS="hits visits"
|
||||
ENV EXPORT_S3_STORAGE_POLICIES=1
|
||||
|
||||
|
@ -115,7 +115,7 @@ function run_tests()
|
||||
fi
|
||||
|
||||
set +e
|
||||
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \
|
||||
clickhouse-test -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time \
|
||||
--skip 00168_parallel_processing_on_replicas "${ADDITIONAL_OPTIONS[@]}" \
|
||||
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
|
||||
|
@ -10,7 +10,7 @@ import requests
|
||||
import tempfile
|
||||
|
||||
|
||||
DEFAULT_URL = 'https://clickhouse-datasets.s3.yandex.net'
|
||||
DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com'
|
||||
|
||||
AVAILABLE_DATASETS = {
|
||||
'hits': 'hits_v1.tar',
|
||||
|
@ -32,7 +32,9 @@ RUN apt-get update -y \
|
||||
mysql-client=8.0* \
|
||||
postgresql-client \
|
||||
sqlite3 \
|
||||
awscli
|
||||
awscli \
|
||||
openjdk-11-jre-headless
|
||||
|
||||
|
||||
RUN pip3 install numpy scipy pandas Jinja2
|
||||
|
||||
@ -58,10 +60,16 @@ RUN arch=${TARGETARCH:-amd64} \
|
||||
&& wget "https://dl.min.io/client/mc/release/linux-${arch}/mc" \
|
||||
&& chmod +x ./mc
|
||||
|
||||
|
||||
RUN wget 'https://dlcdn.apache.org/hadoop/common/hadoop-3.2.2/hadoop-3.2.2.tar.gz' \
|
||||
&& tar -xvf hadoop-3.2.2.tar.gz \
|
||||
&& rm -rf hadoop-3.2.2.tar.gz
|
||||
|
||||
ENV MINIO_ROOT_USER="clickhouse"
|
||||
ENV MINIO_ROOT_PASSWORD="clickhouse"
|
||||
ENV EXPORT_S3_STORAGE_POLICIES=1
|
||||
|
||||
COPY run.sh /
|
||||
COPY setup_minio.sh /
|
||||
COPY setup_hdfs_minicluster.sh /
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
|
@ -19,6 +19,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
./setup_minio.sh
|
||||
./setup_hdfs_minicluster.sh
|
||||
|
||||
# For flaky check we also enable thread fuzzer
|
||||
if [ "$NUM_TRIES" -gt "1" ]; then
|
||||
@ -131,8 +132,23 @@ clickhouse-client -q "system flush logs" ||:
|
||||
|
||||
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||:
|
||||
pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz &
|
||||
clickhouse-client -q "select * from system.query_log format TSVWithNamesAndTypes" | pigz > /test_output/query-log.tsv.gz &
|
||||
clickhouse-client -q "select * from system.query_thread_log format TSVWithNamesAndTypes" | pigz > /test_output/query-thread-log.tsv.gz &
|
||||
|
||||
# Compress tables.
|
||||
#
|
||||
# NOTE:
|
||||
# - that due to tests with s3 storage we cannot use /var/lib/clickhouse/data
|
||||
# directly
|
||||
# - even though ci auto-compress some files (but not *.tsv) it does this only
|
||||
# for files >64MB, we want this files to be compressed explicitly
|
||||
for table in query_log zookeeper_log trace_log
|
||||
do
|
||||
clickhouse-client -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz &
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.1.tsv.gz &
|
||||
clickhouse-client --port 29000 -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.2.tsv.gz &
|
||||
fi
|
||||
done
|
||||
wait ||:
|
||||
|
||||
# Also export trace log in flamegraph-friendly format.
|
||||
for trace_type in CPU Memory Real
|
||||
@ -161,14 +177,6 @@ fi
|
||||
|
||||
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
|
||||
|
||||
# Replace the engine with Ordinary to avoid extra symlinks stuff in artifacts.
|
||||
# (so that clickhouse-local --path can read it w/o extra care).
|
||||
sed -i -e "s/ATTACH DATABASE _ UUID '[^']*'/ATTACH DATABASE system/" -e "s/Atomic/Ordinary/" /var/lib/clickhouse/metadata/system.sql
|
||||
for table in text_log query_log zookeeper_log trace_log; do
|
||||
sed -i "s/ATTACH TABLE _ UUID '[^']*'/ATTACH TABLE $table/" /var/lib/clickhouse/metadata/system/${table}.sql
|
||||
tar -chf /test_output/${table}_dump.tar /var/lib/clickhouse/metadata/system.sql /var/lib/clickhouse/metadata/system/${table}.sql /var/lib/clickhouse/data/system/${table} ||:
|
||||
done
|
||||
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server1.log ||:
|
||||
grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server2.log ||:
|
||||
@ -179,8 +187,6 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
|
||||
rm /var/log/clickhouse-server/clickhouse-server2.log
|
||||
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:
|
||||
mv /var/log/clickhouse-server/stderr2.log /test_output/ ||:
|
||||
tar -chf /test_output/zookeeper_log_dump1.tar /var/lib/clickhouse1/data/system/zookeeper_log ||:
|
||||
tar -chf /test_output/zookeeper_log_dump2.tar /var/lib/clickhouse2/data/system/zookeeper_log ||:
|
||||
tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||:
|
||||
tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||:
|
||||
fi
|
||||
|
20
docker/test/stateless/setup_hdfs_minicluster.sh
Executable file
20
docker/test/stateless/setup_hdfs_minicluster.sh
Executable file
@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e -x -a -u
|
||||
|
||||
ls -lha
|
||||
|
||||
cd hadoop-3.2.2
|
||||
|
||||
export JAVA_HOME=/usr
|
||||
mkdir -p target/test/data
|
||||
chown clickhouse ./target/test/data
|
||||
sudo -E -u clickhouse bin/mapred minicluster -format -nomr -nnport 12222 &
|
||||
|
||||
while ! nc -z localhost 12222; do
|
||||
sleep 1
|
||||
done
|
||||
|
||||
lsof -i :12222
|
||||
|
||||
sleep 5
|
@ -41,6 +41,7 @@ sleep 5
|
||||
./mc admin user add clickminio test testtest
|
||||
./mc admin policy set clickminio readwrite user=test
|
||||
./mc mb clickminio/test
|
||||
./mc policy set public clickminio/test
|
||||
|
||||
|
||||
# Upload data to Minio. By default after unpacking all tests will in
|
||||
|
@ -29,7 +29,7 @@ COPY ./download_previous_release /download_previous_release
|
||||
COPY run.sh /
|
||||
|
||||
ENV DATASETS="hits visits"
|
||||
ENV S3_URL="https://clickhouse-datasets.s3.yandex.net"
|
||||
ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
|
||||
ENV EXPORT_S3_STORAGE_POLICIES=1
|
||||
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
|
@ -0,0 +1,48 @@
|
||||
---
|
||||
toc_priority: 108
|
||||
---
|
||||
|
||||
# groupArraySorted {#groupArraySorted}
|
||||
|
||||
Returns an array with the first N items in ascending order.
|
||||
|
||||
``` sql
|
||||
groupArraySorted(N)(column)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `N` – The number of elements to return.
|
||||
|
||||
If the parameter is omitted, default value 10 is used.
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `column` – The value.
|
||||
- `expr` — Optional. The field or expresion to sort by. If not set values are sorted by themselves.
|
||||
|
||||
**Example**
|
||||
|
||||
Gets the first 10 numbers:
|
||||
|
||||
``` sql
|
||||
SELECT groupArraySorted(10)(number) FROM numbers(100)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─groupArraySorted(10)(number)─┐
|
||||
│ [0,1,2,3,4,5,6,7,8,9] │
|
||||
└──────────────────────────────┘
|
||||
```
|
||||
|
||||
Or the last 10:
|
||||
|
||||
``` sql
|
||||
SELECT groupArraySorted(10)(number, -number) FROM numbers(100)
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─groupArraySorted(10)(number, negate(number))─┐
|
||||
│ [99,98,97,96,95,94,93,92,91,90] │
|
||||
└──────────────────────────────────────────────┘
|
||||
```
|
@ -35,6 +35,7 @@ ClickHouse-specific aggregate functions:
|
||||
- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
|
||||
- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
|
||||
- [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
|
||||
- [groupArraySorted](../../../sql-reference/aggregate-functions/reference/grouparraysorted.md)
|
||||
- [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md)
|
||||
- [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md)
|
||||
- [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md)
|
||||
|
@ -8,7 +8,7 @@ toc_title: "版本折叠MergeTree"
|
||||
这个引擎:
|
||||
|
||||
- 允许快速写入不断变化的对象状态。
|
||||
- 删除后台中的旧对象状态。 这显着降低了存储体积。
|
||||
- 删除后台中的旧对象状态。 这显著降低了存储体积。
|
||||
|
||||
请参阅部分 [崩溃](#table_engines_versionedcollapsingmergetree) 有关详细信息。
|
||||
|
||||
|
28
packages/clickhouse-keeper-dbg.yaml
Normal file
28
packages/clickhouse-keeper-dbg.yaml
Normal file
@ -0,0 +1,28 @@
|
||||
# package sources should be placed in ${PWD}/root
|
||||
# nfpm should run from the same directory with a config
|
||||
name: "clickhouse-keeper-dbg"
|
||||
arch: "${DEB_ARCH}" # amd64, arm64
|
||||
platform: "linux"
|
||||
version: "${CLICKHOUSE_VERSION_STRING}"
|
||||
vendor: "ClickHouse Inc."
|
||||
homepage: "https://clickhouse.com"
|
||||
license: "Apache"
|
||||
section: "database"
|
||||
priority: "optional"
|
||||
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
|
||||
description: |
|
||||
debugging symbols for clickhouse-keeper
|
||||
This package contains the debugging symbols for clickhouse-keeper.
|
||||
|
||||
contents:
|
||||
- src: root/usr/lib/debug/usr/bin/clickhouse-keeper.debug
|
||||
dst: /usr/lib/debug/usr/bin/clickhouse-keeper.debug
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-keeper-dbg/AUTHORS
|
||||
- src: ../CHANGELOG.md
|
||||
dst: /usr/share/doc/clickhouse-keeper-dbg/CHANGELOG.md
|
||||
- src: ../LICENSE
|
||||
dst: /usr/share/doc/clickhouse-keeper-dbg/LICENSE
|
||||
- src: ../README.md
|
||||
dst: /usr/share/doc/clickhouse-keeper-dbg/README.md
|
40
packages/clickhouse-keeper.yaml
Normal file
40
packages/clickhouse-keeper.yaml
Normal file
@ -0,0 +1,40 @@
|
||||
# package sources should be placed in ${PWD}/root
|
||||
# nfpm should run from the same directory with a config
|
||||
name: "clickhouse-keeper"
|
||||
arch: "${DEB_ARCH}" # amd64, arm64
|
||||
platform: "linux"
|
||||
version: "${CLICKHOUSE_VERSION_STRING}"
|
||||
vendor: "ClickHouse Inc."
|
||||
homepage: "https://clickhouse.com"
|
||||
license: "Apache"
|
||||
section: "database"
|
||||
priority: "optional"
|
||||
|
||||
conflicts:
|
||||
- clickhouse-server
|
||||
depends:
|
||||
- adduser
|
||||
suggests:
|
||||
- clickhouse-keeper-dbg
|
||||
|
||||
maintainer: "ClickHouse Dev Team <packages+linux@clickhouse.com>"
|
||||
description: |
|
||||
Static clickhouse-keeper binary
|
||||
A stand-alone clickhouse-keeper package
|
||||
|
||||
|
||||
contents:
|
||||
- src: root/etc/clickhouse-keeper
|
||||
dst: /etc/clickhouse-keeper
|
||||
type: config
|
||||
- src: root/usr/bin/clickhouse-keeper
|
||||
dst: /usr/bin/clickhouse-keeper
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-keeper/AUTHORS
|
||||
- src: ../CHANGELOG.md
|
||||
dst: /usr/share/doc/clickhouse-keeper/CHANGELOG.md
|
||||
- src: ../LICENSE
|
||||
dst: /usr/share/doc/clickhouse-keeper/LICENSE
|
||||
- src: ../README.md
|
||||
dst: /usr/share/doc/clickhouse-keeper/README.md
|
@ -71,17 +71,11 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecDelta.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecDoubleDelta.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecGorilla.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecT64.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/getCompressionCodecForFile.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp
|
||||
|
||||
|
@ -184,6 +184,11 @@ void LocalServer::tryInitPath()
|
||||
if (path.back() != '/')
|
||||
path += '/';
|
||||
|
||||
fs::create_directories(fs::path(path) / "user_defined/");
|
||||
fs::create_directories(fs::path(path) / "data/");
|
||||
fs::create_directories(fs::path(path) / "metadata/");
|
||||
fs::create_directories(fs::path(path) / "metadata_dropped/");
|
||||
|
||||
global_context->setPath(path);
|
||||
|
||||
global_context->setTemporaryStorage(path + "tmp");
|
||||
@ -565,7 +570,6 @@ void LocalServer::processConfig()
|
||||
/// Lock path directory before read
|
||||
status.emplace(fs::path(path) / "status", StatusFile::write_full_info);
|
||||
|
||||
fs::create_directories(fs::path(path) / "user_defined/");
|
||||
LOG_DEBUG(log, "Loading user defined objects from {}", path);
|
||||
Poco::File(path + "user_defined/").createDirectories();
|
||||
UserDefinedSQLObjectsLoader::instance().loadObjects(global_context);
|
||||
@ -573,9 +577,6 @@ void LocalServer::processConfig()
|
||||
LOG_DEBUG(log, "Loaded user defined objects.");
|
||||
|
||||
LOG_DEBUG(log, "Loading metadata from {}", path);
|
||||
fs::create_directories(fs::path(path) / "data/");
|
||||
fs::create_directories(fs::path(path) / "metadata/");
|
||||
|
||||
loadMetadataSystem(global_context);
|
||||
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
|
||||
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <base/phdr_cache.h>
|
||||
#include <base/ErrorHandlers.h>
|
||||
#include <base/getMemoryAmount.h>
|
||||
#include <base/getAvailableMemoryAmount.h>
|
||||
#include <base/errnoToString.h>
|
||||
#include <base/coverage.h>
|
||||
#include <base/getFQDNOrHostName.h>
|
||||
@ -45,6 +46,7 @@
|
||||
#include <Core/ServerUUID.h>
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/IOThreadPool.h>
|
||||
#include <IO/UseSSL.h>
|
||||
#include <Interpreters/AsynchronousMetrics.h>
|
||||
@ -80,6 +82,7 @@
|
||||
#include <Common/SensitiveDataMasker.h>
|
||||
#include <Common/ThreadFuzzer.h>
|
||||
#include <Common/getHashOfLoadedBinary.h>
|
||||
#include <Common/filesystemHelpers.h>
|
||||
#include <Common/Elf.h>
|
||||
#include <Server/MySQLHandlerFactory.h>
|
||||
#include <Server/PostgreSQLHandlerFactory.h>
|
||||
@ -505,6 +508,101 @@ void checkForUsersNotInMainConfig(
|
||||
}
|
||||
}
|
||||
|
||||
/// Unused in other builds
|
||||
#if defined(OS_LINUX)
|
||||
static String readString(const String & path)
|
||||
{
|
||||
ReadBufferFromFile in(path);
|
||||
String contents;
|
||||
readStringUntilEOF(contents, in);
|
||||
return contents;
|
||||
}
|
||||
|
||||
static int readNumber(const String & path)
|
||||
{
|
||||
ReadBufferFromFile in(path);
|
||||
int result;
|
||||
readText(result, in);
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
static void sanityChecks(Server * server)
|
||||
{
|
||||
std::string data_path = getCanonicalPath(server->config().getString("path", DBMS_DEFAULT_PATH));
|
||||
std::string logs_path = server->config().getString("logger.log", "");
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
try
|
||||
{
|
||||
if (readString("/sys/devices/system/clocksource/clocksource0/current_clocksource").find("tsc") == std::string::npos)
|
||||
server->context()->addWarningMessage("Linux is not using fast TSC clock source. Performance can be degraded.");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if (readNumber("/proc/sys/vm/overcommit_memory") == 2)
|
||||
server->context()->addWarningMessage("Linux memory overcommit is disabled.");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if (readString("/sys/kernel/mm/transparent_hugepage/enabled").find("[always]") != std::string::npos)
|
||||
server->context()->addWarningMessage("Linux transparent hugepage are set to \"always\".");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if (readNumber("/proc/sys/kernel/pid_max") < 30000)
|
||||
server->context()->addWarningMessage("Linux max PID is too low.");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
if (readNumber("/proc/sys/kernel/threads-max") < 30000)
|
||||
server->context()->addWarningMessage("Linux threads max count is too low.");
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
|
||||
std::string dev_id = getBlockDeviceId(data_path);
|
||||
if (getBlockDeviceType(dev_id) == BlockDeviceType::ROT && getBlockDeviceReadAheadBytes(dev_id) == 0)
|
||||
server->context()->addWarningMessage("Rotational disk with disabled readahead is in use. Performance can be degraded.");
|
||||
#endif
|
||||
|
||||
try
|
||||
{
|
||||
if (getAvailableMemoryAmount() < (2l << 30))
|
||||
server->context()->addWarningMessage("Available memory at server startup is too low (2GiB).");
|
||||
|
||||
if (!enoughSpaceInDirectory(data_path, 1ull << 30))
|
||||
server->context()->addWarningMessage("Available disk space at server startup is too low (1GiB).");
|
||||
|
||||
if (!logs_path.empty())
|
||||
{
|
||||
if (!enoughSpaceInDirectory(fs::path(logs_path).parent_path(), 1ull << 30))
|
||||
server->context()->addWarningMessage("Available disk space at server startup is too low (1GiB).");
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
int Server::main(const std::vector<std::string> & /*args*/)
|
||||
{
|
||||
Poco::Logger * log = &logger();
|
||||
@ -538,13 +636,14 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
global_context->addWarningMessage("Server was built in debug mode. It will work slowly.");
|
||||
#endif
|
||||
|
||||
if (ThreadFuzzer::instance().isEffective())
|
||||
global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable.");
|
||||
if (ThreadFuzzer::instance().isEffective())
|
||||
global_context->addWarningMessage("ThreadFuzzer is enabled. Application will run slowly and unstable.");
|
||||
|
||||
#if defined(SANITIZER)
|
||||
global_context->addWarningMessage("Server was built with sanitizer. It will work slowly.");
|
||||
#endif
|
||||
|
||||
sanityChecks(this);
|
||||
|
||||
// Initialize global thread pool. Do it before we fetch configs from zookeeper
|
||||
// nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
|
||||
@ -766,6 +865,38 @@ if (ThreadFuzzer::instance().isEffective())
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to increase limit on number of threads.
|
||||
{
|
||||
rlimit rlim;
|
||||
if (getrlimit(RLIMIT_NPROC, &rlim))
|
||||
throw Poco::Exception("Cannot getrlimit");
|
||||
|
||||
if (rlim.rlim_cur == rlim.rlim_max)
|
||||
{
|
||||
LOG_DEBUG(log, "rlimit on number of threads is {}", rlim.rlim_cur);
|
||||
}
|
||||
else
|
||||
{
|
||||
rlim_t old = rlim.rlim_cur;
|
||||
rlim.rlim_cur = rlim.rlim_max;
|
||||
int rc = setrlimit(RLIMIT_NPROC, &rlim);
|
||||
if (rc != 0)
|
||||
{
|
||||
LOG_WARNING(log, "Cannot set max number of threads to {}. error: {}", rlim.rlim_cur, strerror(errno));
|
||||
rlim.rlim_cur = old;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_DEBUG(log, "Set max number of threads to {} (was {}).", rlim.rlim_cur, old);
|
||||
}
|
||||
}
|
||||
|
||||
if (rlim.rlim_cur < 30000)
|
||||
{
|
||||
global_context->addWarningMessage("Maximum number of threads is lower than 30000. There could be problems with handling a lot of simultaneous queries.");
|
||||
}
|
||||
}
|
||||
|
||||
static ServerErrorHandler error_handler;
|
||||
Poco::ErrorHandler::set(&error_handler);
|
||||
|
||||
@ -829,6 +960,36 @@ if (ThreadFuzzer::instance().isEffective())
|
||||
fs::create_directories(path / "metadata_dropped/");
|
||||
}
|
||||
|
||||
#if USE_ROCKSDB
|
||||
/// Initialize merge tree metadata cache
|
||||
if (config().has("merge_tree_metadata_cache"))
|
||||
{
|
||||
fs::create_directories(path / "rocksdb/");
|
||||
size_t size = config().getUInt64("merge_tree_metadata_cache.lru_cache_size", 256 << 20);
|
||||
bool continue_if_corrupted = config().getBool("merge_tree_metadata_cache.continue_if_corrupted", false);
|
||||
try
|
||||
{
|
||||
LOG_DEBUG(
|
||||
log, "Initiailizing merge tree metadata cache lru_cache_size:{} continue_if_corrupted:{}", size, continue_if_corrupted);
|
||||
global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (continue_if_corrupted)
|
||||
{
|
||||
/// Rename rocksdb directory and reinitialize merge tree metadata cache
|
||||
time_t now = time(nullptr);
|
||||
fs::rename(path / "rocksdb", path / ("rocksdb.old." + std::to_string(now)));
|
||||
global_context->initializeMergeTreeMetadataCache(path_str + "/" + "rocksdb", size);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (config().has("interserver_http_port") && config().has("interserver_https_port"))
|
||||
throw Exception("Both http and https interserver ports are specified", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
|
||||
|
||||
|
@ -1294,4 +1294,10 @@
|
||||
</tables>
|
||||
</rocksdb>
|
||||
-->
|
||||
|
||||
<!-- Uncomment if enable merge tree metadata cache -->
|
||||
<merge_tree_metadata_cache>
|
||||
<lru_cache_size>268435456</lru_cache_size>
|
||||
<continue_if_corrupted>true</continue_if_corrupted>
|
||||
</merge_tree_metadata_cache>
|
||||
</clickhouse>
|
||||
|
@ -13,7 +13,7 @@ enum class QuotaType
|
||||
{
|
||||
QUERIES, /// Number of queries.
|
||||
QUERY_SELECTS, /// Number of select queries.
|
||||
QUERY_INSERTS, /// Number of inserts queries.
|
||||
QUERY_INSERTS, /// Number of insert queries.
|
||||
ERRORS, /// Number of queries with exceptions.
|
||||
RESULT_ROWS, /// Number of rows returned as result.
|
||||
RESULT_BYTES, /// Number of bytes returned as result.
|
||||
|
147
src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp
Normal file
147
src/AggregateFunctions/AggregateFunctionGroupArraySorted.cpp
Normal file
@ -0,0 +1,147 @@
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionGroupArraySorted.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Common/FieldVisitorConvertToNumber.h>
|
||||
|
||||
|
||||
static inline constexpr UInt64 GROUP_SORTED_ARRAY_MAX_SIZE = 0xFFFFFF;
|
||||
static inline constexpr UInt64 GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD = 10;
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
|
||||
class AggregateFunctionGroupArraySortedNumeric : public AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>
|
||||
{
|
||||
using AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>::AggregateFunctionGroupArraySorted;
|
||||
};
|
||||
|
||||
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
|
||||
class AggregateFunctionGroupArraySortedFieldType
|
||||
: public AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>
|
||||
{
|
||||
using AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>::
|
||||
AggregateFunctionGroupArraySorted;
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<T>()); }
|
||||
};
|
||||
|
||||
template <template <typename, bool, typename, bool> class AggregateFunctionTemplate, typename TColumnA, bool expr_sorted, typename TColumnB, bool is_plain_b, typename... TArgs>
|
||||
AggregateFunctionPtr
|
||||
createAggregateFunctionGroupArraySortedTypedFinal(TArgs && ... args)
|
||||
{
|
||||
return AggregateFunctionPtr(new AggregateFunctionTemplate<TColumnA, expr_sorted, TColumnB, is_plain_b>(std::forward<TArgs>(args)...));
|
||||
}
|
||||
|
||||
template <bool expr_sorted = false, typename TColumnB = UInt64, bool is_plain_b = false>
|
||||
AggregateFunctionPtr
|
||||
createAggregateFunctionGroupArraySortedTyped(const DataTypes & argument_types, const Array & params, UInt64 threshold)
|
||||
{
|
||||
#define DISPATCH(A, C, B) \
|
||||
if (which.idx == TypeIndex::A) \
|
||||
return createAggregateFunctionGroupArraySortedTypedFinal<C, B, expr_sorted, TColumnB, is_plain_b>(threshold, argument_types, params);
|
||||
#define DISPATCH_NUMERIC(A) DISPATCH(A, AggregateFunctionGroupArraySortedNumeric, A)
|
||||
WhichDataType which(argument_types[0]);
|
||||
FOR_NUMERIC_TYPES(DISPATCH_NUMERIC)
|
||||
DISPATCH(Enum8, AggregateFunctionGroupArraySortedNumeric, Int8)
|
||||
DISPATCH(Enum16, AggregateFunctionGroupArraySortedNumeric, Int16)
|
||||
DISPATCH(Date, AggregateFunctionGroupArraySortedFieldType, DataTypeDate)
|
||||
DISPATCH(DateTime, AggregateFunctionGroupArraySortedFieldType, DataTypeDateTime)
|
||||
#undef DISPATCH
|
||||
#undef DISPATCH_NUMERIC
|
||||
|
||||
if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
||||
{
|
||||
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, true, expr_sorted, TColumnB, is_plain_b>(
|
||||
threshold, argument_types, params));
|
||||
}
|
||||
else
|
||||
{
|
||||
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, false, expr_sorted, TColumnB, is_plain_b>(
|
||||
threshold, argument_types, params));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionGroupArraySorted(
|
||||
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
|
||||
{
|
||||
UInt64 threshold = GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD;
|
||||
|
||||
if (params.size() == 1)
|
||||
{
|
||||
UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
|
||||
|
||||
if (k > GROUP_SORTED_ARRAY_MAX_SIZE)
|
||||
throw Exception(
|
||||
"Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(GROUP_SORTED_ARRAY_MAX_SIZE),
|
||||
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
|
||||
if (k == 0)
|
||||
throw Exception("Parameter 0 is illegal for aggregate function " + name, ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
|
||||
threshold = k;
|
||||
}
|
||||
else if (!params.empty())
|
||||
{
|
||||
throw Exception("Aggregate function " + name + " only supports 1 parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
}
|
||||
|
||||
if (argument_types.size() == 2)
|
||||
{
|
||||
if (isNumber(argument_types[1]))
|
||||
{
|
||||
#define DISPATCH2(A, B) \
|
||||
if (which.idx == TypeIndex::A) \
|
||||
return createAggregateFunctionGroupArraySortedTyped<true, B>(argument_types, params, threshold);
|
||||
#define DISPATCH(A) DISPATCH2(A, A)
|
||||
WhichDataType which(argument_types[1]);
|
||||
FOR_NUMERIC_TYPES(DISPATCH)
|
||||
DISPATCH2(Enum8, Int8)
|
||||
DISPATCH2(Enum16, Int16)
|
||||
#undef DISPATCH
|
||||
#undef DISPATCH2
|
||||
throw Exception("Invalid parameter type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
else if (argument_types[1]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
|
||||
{
|
||||
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, true>(argument_types, params, threshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, false>(argument_types, params, threshold);
|
||||
}
|
||||
}
|
||||
else if (argument_types.size() == 1)
|
||||
{
|
||||
return createAggregateFunctionGroupArraySortedTyped<>(argument_types, params, threshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(
|
||||
"Aggregate function " + name + " requires one or two parameters.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
|
||||
{
|
||||
AggregateFunctionProperties properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
|
||||
factory.registerFunction("groupArraySorted", {createAggregateFunctionGroupArraySorted, properties});
|
||||
}
|
||||
}
|
310
src/AggregateFunctions/AggregateFunctionGroupArraySorted.h
Normal file
310
src/AggregateFunctions/AggregateFunctionGroupArraySorted.h
Normal file
@ -0,0 +1,310 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionGroupArraySortedData.h>
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
template <typename TColumn, bool is_plain>
|
||||
inline TColumn readItem(const IColumn * column, Arena * arena, size_t row)
|
||||
{
|
||||
if constexpr (std::is_same_v<TColumn, StringRef>)
|
||||
{
|
||||
if constexpr (is_plain)
|
||||
{
|
||||
StringRef str = column->getDataAt(row);
|
||||
auto ptr = arena->alloc(str.size);
|
||||
std::copy(str.data, str.data + str.size, ptr);
|
||||
return StringRef(ptr, str.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
const char * begin = nullptr;
|
||||
return column->serializeValueIntoArena(row, *arena, begin);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (std::is_same_v<TColumn, UInt64>)
|
||||
return column->getUInt(row);
|
||||
else
|
||||
return column->getInt(row);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TColumn, typename TFilter = void>
|
||||
size_t
|
||||
getFirstNElements_low_threshold(const TColumn * data, int num_elements, int threshold, size_t * results, const TFilter * filter = nullptr)
|
||||
{
|
||||
for (int i = 0; i < threshold; i++)
|
||||
{
|
||||
results[i] = 0;
|
||||
}
|
||||
|
||||
threshold = std::min(num_elements, threshold);
|
||||
int current_max = 0;
|
||||
int cur;
|
||||
int z;
|
||||
for (int i = 0; i < num_elements; i++)
|
||||
{
|
||||
if constexpr (!std::is_same_v<TFilter, void>)
|
||||
{
|
||||
if (filter[i] == 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
//Starting from the highest values and we look for the immediately lower than the given one
|
||||
for (cur = current_max; cur > 0; cur--)
|
||||
{
|
||||
if (data[i] > data[results[cur - 1]])
|
||||
break;
|
||||
}
|
||||
|
||||
if (cur < threshold)
|
||||
{
|
||||
//Move all the higher values 1 position to the right
|
||||
for (z = std::min(threshold - 1, current_max); z > cur; z--)
|
||||
results[z] = results[z - 1];
|
||||
|
||||
if (current_max < threshold)
|
||||
++current_max;
|
||||
|
||||
//insert element into the given position
|
||||
results[cur] = i;
|
||||
}
|
||||
}
|
||||
|
||||
return current_max;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct SortableItem
|
||||
{
|
||||
T a;
|
||||
size_t b;
|
||||
bool operator<(const SortableItem & other) const { return (this->a < other.a); }
|
||||
};
|
||||
|
||||
template <typename TColumn, typename TFilter = void>
|
||||
size_t getFirstNElements_high_threshold(
|
||||
const TColumn * data, size_t num_elements, size_t threshold, size_t * results, const TFilter * filter = nullptr)
|
||||
{
|
||||
std::vector<SortableItem<TColumn>> dataIndexed(num_elements);
|
||||
size_t num_elements_filtered = 0;
|
||||
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
if constexpr (!std::is_same_v<TFilter, void>)
|
||||
{
|
||||
if (filter[i] == 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
dataIndexed.data()[num_elements_filtered].a = data[i];
|
||||
dataIndexed.data()[num_elements_filtered].b = i;
|
||||
num_elements_filtered++;
|
||||
}
|
||||
|
||||
threshold = std::min(num_elements_filtered, threshold);
|
||||
|
||||
std::nth_element(dataIndexed.data(), dataIndexed.data() + threshold, dataIndexed.data() + num_elements_filtered);
|
||||
std::sort(dataIndexed.data(), dataIndexed.data() + threshold);
|
||||
|
||||
for (size_t i = 0; i < threshold; i++)
|
||||
{
|
||||
results[i] = dataIndexed[i].b;
|
||||
}
|
||||
|
||||
return threshold;
|
||||
}
|
||||
|
||||
static const size_t THRESHOLD_MAX_CUSTOM_FUNCTION = 1000;
|
||||
|
||||
template <typename TColumn>
|
||||
size_t getFirstNElements(const TColumn * data, size_t num_elements, size_t threshold, size_t * results, const UInt8 * filter = nullptr)
|
||||
{
|
||||
if (threshold < THRESHOLD_MAX_CUSTOM_FUNCTION)
|
||||
{
|
||||
if (filter != nullptr)
|
||||
return getFirstNElements_low_threshold(data, num_elements, threshold, results, filter);
|
||||
else
|
||||
return getFirstNElements_low_threshold(data, num_elements, threshold, results);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (filter != nullptr)
|
||||
return getFirstNElements_high_threshold(data, num_elements, threshold, results, filter);
|
||||
else
|
||||
return getFirstNElements_high_threshold(data, num_elements, threshold, results);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TColumnA, bool is_plain_a, bool use_column_b, typename TColumnB, bool is_plain_b>
|
||||
class AggregateFunctionGroupArraySorted : public IAggregateFunctionDataHelper<
|
||||
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
||||
AggregateFunctionGroupArraySorted<TColumnA, is_plain_a, use_column_b, TColumnB, is_plain_b>>
|
||||
{
|
||||
protected:
|
||||
using State = AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>;
|
||||
using Base = IAggregateFunctionDataHelper<
|
||||
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
||||
AggregateFunctionGroupArraySorted>;
|
||||
|
||||
UInt64 threshold;
|
||||
DataTypePtr & input_data_type;
|
||||
mutable std::mutex mutex;
|
||||
|
||||
static void deserializeAndInsert(StringRef str, IColumn & data_to);
|
||||
|
||||
public:
|
||||
AggregateFunctionGroupArraySorted(UInt64 threshold_, const DataTypes & argument_types_, const Array & params)
|
||||
: IAggregateFunctionDataHelper<
|
||||
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
|
||||
AggregateFunctionGroupArraySorted>(argument_types_, params)
|
||||
, threshold(threshold_)
|
||||
, input_data_type(this->argument_types[0])
|
||||
{
|
||||
}
|
||||
|
||||
void create(AggregateDataPtr place) const override
|
||||
{
|
||||
Base::create(place);
|
||||
this->data(place).threshold = threshold;
|
||||
}
|
||||
|
||||
String getName() const override { return "groupArraySorted"; }
|
||||
|
||||
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(input_data_type); }
|
||||
|
||||
bool allocatesMemoryInArena() const override
|
||||
{
|
||||
if constexpr (std::is_same_v<TColumnA, StringRef>)
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
State & data = this->data(place);
|
||||
if constexpr (use_column_b)
|
||||
{
|
||||
data.add(
|
||||
readItem<TColumnA, is_plain_a>(columns[0], arena, row_num), readItem<TColumnB, is_plain_b>(columns[1], arena, row_num));
|
||||
}
|
||||
else
|
||||
{
|
||||
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row_num));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TColumn, bool is_plain, typename TFunc>
|
||||
void
|
||||
forFirstRows(size_t batch_size, const IColumn ** columns, size_t data_column, Arena * arena, ssize_t if_argument_pos, TFunc func) const
|
||||
{
|
||||
const TColumn * values = nullptr;
|
||||
std::unique_ptr<std::vector<TColumn>> values_vector;
|
||||
std::vector<size_t> best_rows(threshold);
|
||||
|
||||
if constexpr (std::is_same_v<TColumn, StringRef>)
|
||||
{
|
||||
values_vector.reset(new std::vector<TColumn>(batch_size));
|
||||
for (size_t i = 0; i < batch_size; i++)
|
||||
(*values_vector)[i] = readItem<TColumn, is_plain>(columns[data_column], arena, i);
|
||||
values = (*values_vector).data();
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & column = assert_cast<const ColumnVector<TColumn> &>(*columns[data_column]);
|
||||
values = column.getData().data();
|
||||
}
|
||||
|
||||
const UInt8 * filter = nullptr;
|
||||
StringRef refFilter;
|
||||
|
||||
if (if_argument_pos >= 0)
|
||||
{
|
||||
refFilter = columns[if_argument_pos]->getRawData();
|
||||
filter = reinterpret_cast<const UInt8 *>(refFilter.data);
|
||||
}
|
||||
|
||||
size_t num_elements = getFirstNElements(values, batch_size, threshold, best_rows.data(), filter);
|
||||
for (size_t i = 0; i < num_elements; i++)
|
||||
{
|
||||
func(best_rows[i], values);
|
||||
}
|
||||
}
|
||||
|
||||
void addBatchSinglePlace(
|
||||
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos) const override
|
||||
{
|
||||
State & data = this->data(place);
|
||||
|
||||
if constexpr (use_column_b)
|
||||
{
|
||||
forFirstRows<TColumnB, is_plain_b>(
|
||||
batch_size, columns, 1, arena, if_argument_pos, [columns, &arena, &data](size_t row, const TColumnB * values)
|
||||
{
|
||||
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row), values[row]);
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
forFirstRows<TColumnA, is_plain_a>(
|
||||
batch_size, columns, 0, arena, if_argument_pos, [&data](size_t row, const TColumnA * values)
|
||||
{
|
||||
data.add(values[row]);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
this->data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void
|
||||
deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
this->data(place).deserialize(buf, arena);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
|
||||
{
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
auto & values = this->data(place).values;
|
||||
offsets_to.push_back(offsets_to.back() + values.size());
|
||||
|
||||
IColumn & data_to = arr_to.getData();
|
||||
for (auto value : values)
|
||||
{
|
||||
if constexpr (std::is_same_v<TColumnA, StringRef>)
|
||||
{
|
||||
auto str = State::itemValue(value);
|
||||
if constexpr (is_plain_a)
|
||||
{
|
||||
data_to.insertData(str.data, str.size);
|
||||
}
|
||||
else
|
||||
{
|
||||
data_to.deserializeAndInsertFromArena(str.data);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
data_to.insert(State::itemValue(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
162
src/AggregateFunctions/AggregateFunctionGroupArraySortedData.h
Normal file
162
src/AggregateFunctions/AggregateFunctionGroupArraySortedData.h
Normal file
@ -0,0 +1,162 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/VarInt.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
static inline constexpr UInt64 GROUP_SORTED_DEFAULT_THRESHOLD = 0xFFFFFF;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
template <typename T>
|
||||
static void writeOneItem(WriteBuffer & buf, T item)
|
||||
{
|
||||
if constexpr (std::numeric_limits<T>::is_signed)
|
||||
{
|
||||
writeVarInt(item, buf);
|
||||
}
|
||||
else
|
||||
{
|
||||
writeVarUInt(item, buf);
|
||||
}
|
||||
}
|
||||
|
||||
static void writeOneItem(WriteBuffer & buf, const StringRef & item)
|
||||
{
|
||||
writeBinary(item, buf);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void readOneItem(ReadBuffer & buf, Arena * /*arena*/, T & item)
|
||||
{
|
||||
if constexpr (std::numeric_limits<T>::is_signed)
|
||||
{
|
||||
DB::Int64 val;
|
||||
readVarT(val, buf);
|
||||
item = val;
|
||||
}
|
||||
else
|
||||
{
|
||||
DB::UInt64 val;
|
||||
readVarT(val, buf);
|
||||
item = val;
|
||||
}
|
||||
}
|
||||
|
||||
static void readOneItem(ReadBuffer & buf, Arena * arena, StringRef & item)
|
||||
{
|
||||
item = readStringBinaryInto(*arena, buf);
|
||||
}
|
||||
|
||||
template <typename Storage>
|
||||
struct AggregateFunctionGroupArraySortedDataBase
|
||||
{
|
||||
typedef typename Storage::value_type ValueType;
|
||||
AggregateFunctionGroupArraySortedDataBase(UInt64 threshold_ = GROUP_SORTED_DEFAULT_THRESHOLD) : threshold(threshold_) { }
|
||||
|
||||
virtual ~AggregateFunctionGroupArraySortedDataBase() { }
|
||||
inline void narrowDown()
|
||||
{
|
||||
while (values.size() > threshold)
|
||||
values.erase(--values.end());
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionGroupArraySortedDataBase & other)
|
||||
{
|
||||
values.merge(Storage(other.values));
|
||||
narrowDown();
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeOneItem(buf, UInt64(values.size()));
|
||||
for (auto value : values)
|
||||
{
|
||||
serializeItem(buf, value);
|
||||
}
|
||||
}
|
||||
|
||||
virtual void serializeItem(WriteBuffer & buf, ValueType & val) const = 0;
|
||||
virtual ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const = 0;
|
||||
|
||||
void deserialize(ReadBuffer & buf, Arena * arena)
|
||||
{
|
||||
values.clear();
|
||||
UInt64 length;
|
||||
readOneItem(buf, nullptr, length);
|
||||
|
||||
while (length--)
|
||||
{
|
||||
values.insert(deserializeItem(buf, arena));
|
||||
}
|
||||
|
||||
narrowDown();
|
||||
}
|
||||
|
||||
UInt64 threshold;
|
||||
Storage values;
|
||||
};
|
||||
|
||||
template <typename T, bool expr_sorted, typename TIndex>
|
||||
struct AggregateFunctionGroupArraySortedData
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename TIndex>
|
||||
struct AggregateFunctionGroupArraySortedData<T, true, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>
|
||||
{
|
||||
using Base = AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>;
|
||||
using Base::Base;
|
||||
|
||||
void add(T item, TIndex weight)
|
||||
{
|
||||
Base::values.insert({weight, item});
|
||||
Base::narrowDown();
|
||||
}
|
||||
|
||||
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override
|
||||
{
|
||||
writeOneItem(buf, value.first);
|
||||
writeOneItem(buf, value.second);
|
||||
}
|
||||
|
||||
virtual typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
|
||||
{
|
||||
TIndex first;
|
||||
T second;
|
||||
readOneItem(buf, arena, first);
|
||||
readOneItem(buf, arena, second);
|
||||
|
||||
return {first, second};
|
||||
}
|
||||
|
||||
static T itemValue(typename Base::ValueType & value) { return value.second; }
|
||||
};
|
||||
|
||||
template <typename T, typename TIndex>
|
||||
struct AggregateFunctionGroupArraySortedData<T, false, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>
|
||||
{
|
||||
using Base = AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>;
|
||||
using Base::Base;
|
||||
|
||||
void add(T item)
|
||||
{
|
||||
Base::values.insert(item);
|
||||
Base::narrowDown();
|
||||
}
|
||||
|
||||
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override { writeOneItem(buf, value); }
|
||||
|
||||
typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
|
||||
{
|
||||
T value;
|
||||
readOneItem(buf, arena, value);
|
||||
return value;
|
||||
}
|
||||
|
||||
static T itemValue(typename Base::ValueType & value) { return value; }
|
||||
};
|
||||
}
|
@ -67,7 +67,7 @@ auto parseArguments(const std::string & name, const DataTypes & arguments)
|
||||
values_types.push_back(array_type->getNestedType());
|
||||
}
|
||||
|
||||
return std::tuple{std::move(keys_type), std::move(values_types), tuple_argument};
|
||||
return std::tuple<DataTypePtr, DataTypes, bool>{std::move(keys_type), std::move(values_types), tuple_argument};
|
||||
}
|
||||
|
||||
// This function instantiates a particular overload of the sumMap family of
|
||||
|
@ -59,6 +59,7 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
|
||||
|
||||
class AggregateFunctionCombinatorFactory;
|
||||
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
||||
@ -130,6 +131,7 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionIntervalLengthSum(factory);
|
||||
registerAggregateFunctionExponentialMovingAverage(factory);
|
||||
registerAggregateFunctionSparkbar(factory);
|
||||
registerAggregateFunctionGroupArraySorted(factory);
|
||||
|
||||
registerWindowFunctions(factory);
|
||||
}
|
||||
|
@ -494,6 +494,11 @@ endif()
|
||||
|
||||
target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::fast_float)
|
||||
|
||||
if (USE_ORC)
|
||||
dbms_target_link_libraries(PUBLIC ${ORC_LIBRARIES})
|
||||
dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ORC_INCLUDE_DIR} "${CMAKE_BINARY_DIR}/contrib/orc/c++/include")
|
||||
endif ()
|
||||
|
||||
if (TARGET ch_contrib::rocksdb)
|
||||
dbms_target_link_libraries(PUBLIC ch_contrib::rocksdb)
|
||||
endif()
|
||||
@ -573,10 +578,6 @@ if (ENABLE_TESTS)
|
||||
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::simdjson)
|
||||
endif()
|
||||
|
||||
if(TARGET ch_contrib::rapidjson)
|
||||
target_include_directories(unit_tests_dbms PRIVATE ch_contrib::rapidjson)
|
||||
endif()
|
||||
|
||||
if (TARGET ch_contrib::yaml_cpp)
|
||||
target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::yaml_cpp)
|
||||
endif()
|
||||
|
@ -521,7 +521,7 @@ ColumnObject::ColumnObject(bool is_nullable_)
|
||||
{
|
||||
}
|
||||
|
||||
ColumnObject::ColumnObject(SubcolumnsTree && subcolumns_, bool is_nullable_)
|
||||
ColumnObject::ColumnObject(Subcolumns && subcolumns_, bool is_nullable_)
|
||||
: is_nullable(is_nullable_)
|
||||
, subcolumns(std::move(subcolumns_))
|
||||
, num_rows(subcolumns.empty() ? 0 : (*subcolumns.begin())->data.size())
|
||||
@ -696,7 +696,7 @@ const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & ke
|
||||
ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key)
|
||||
{
|
||||
if (const auto * node = subcolumns.findLeaf(key))
|
||||
return const_cast<SubcolumnsTree::Node *>(node)->data;
|
||||
return const_cast<Subcolumns::Node *>(node)->data;
|
||||
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in ColumnObject", key.getPath());
|
||||
}
|
||||
@ -794,7 +794,7 @@ bool ColumnObject::isFinalized() const
|
||||
void ColumnObject::finalize()
|
||||
{
|
||||
size_t old_size = size();
|
||||
SubcolumnsTree new_subcolumns;
|
||||
Subcolumns new_subcolumns;
|
||||
for (auto && entry : subcolumns)
|
||||
{
|
||||
const auto & least_common_type = entry->data.getLeastCommonType();
|
||||
|
@ -138,20 +138,20 @@ public:
|
||||
size_t num_of_defaults_in_prefix = 0;
|
||||
};
|
||||
|
||||
using SubcolumnsTree = SubcolumnsTree<Subcolumn>;
|
||||
using Subcolumns = SubcolumnsTree<Subcolumn>;
|
||||
|
||||
private:
|
||||
/// If true then all subcolumns are nullable.
|
||||
const bool is_nullable;
|
||||
|
||||
SubcolumnsTree subcolumns;
|
||||
Subcolumns subcolumns;
|
||||
size_t num_rows;
|
||||
|
||||
public:
|
||||
static constexpr auto COLUMN_NAME_DUMMY = "_dummy";
|
||||
|
||||
explicit ColumnObject(bool is_nullable_);
|
||||
ColumnObject(SubcolumnsTree && subcolumns_, bool is_nullable_);
|
||||
ColumnObject(Subcolumns && subcolumns_, bool is_nullable_);
|
||||
|
||||
/// Checks that all subcolumns have consistent sizes.
|
||||
void checkConsistency() const;
|
||||
@ -173,8 +173,8 @@ public:
|
||||
/// It cares about consistency of sizes of Nested arrays.
|
||||
void addNestedSubcolumn(const PathInData & key, const FieldInfo & field_info, size_t new_size);
|
||||
|
||||
const SubcolumnsTree & getSubcolumns() const { return subcolumns; }
|
||||
SubcolumnsTree & getSubcolumns() { return subcolumns; }
|
||||
const Subcolumns & getSubcolumns() const { return subcolumns; }
|
||||
Subcolumns & getSubcolumns() { return subcolumns; }
|
||||
PathsInData getKeys() const;
|
||||
|
||||
/// Finalizes all subcolumns.
|
||||
|
@ -35,10 +35,10 @@ public:
|
||||
{}
|
||||
|
||||
// Format message with fmt::format, like the logging functions.
|
||||
template <typename ...Args>
|
||||
Exception(int code, const std::string & fmt, Args&&... args)
|
||||
: Exception(fmt::format(fmt::runtime(fmt), std::forward<Args>(args)...), code)
|
||||
{}
|
||||
template <typename... Args>
|
||||
Exception(int code, fmt::format_string<Args...> fmt, Args &&... args) : Exception(fmt::format(fmt, std::forward<Args>(args)...), code)
|
||||
{
|
||||
}
|
||||
|
||||
struct CreateFromPocoTag {};
|
||||
struct CreateFromSTDTag {};
|
||||
@ -52,10 +52,10 @@ public:
|
||||
const char * what() const throw() override { return message().data(); }
|
||||
|
||||
/// Add something to the existing message.
|
||||
template <typename ...Args>
|
||||
void addMessage(const std::string& format, Args&&... args)
|
||||
template <typename... Args>
|
||||
void addMessage(fmt::format_string<Args...> format, Args &&... args)
|
||||
{
|
||||
extendedMessage(fmt::format(fmt::runtime(format), std::forward<Args>(args)...));
|
||||
extendedMessage(fmt::format(format, std::forward<Args>(args)...));
|
||||
}
|
||||
|
||||
void addMessage(const std::string& message)
|
||||
@ -117,10 +117,10 @@ public:
|
||||
ParsingException(int code, const std::string & message);
|
||||
|
||||
// Format message with fmt::format, like the logging functions.
|
||||
template <typename ...Args>
|
||||
ParsingException(int code, const std::string & fmt, Args&&... args)
|
||||
: Exception(fmt::format(fmt::runtime(fmt), std::forward<Args>(args)...), code)
|
||||
{}
|
||||
template <typename... Args>
|
||||
ParsingException(int code, fmt::format_string<Args...> fmt, Args &&... args) : Exception(code, fmt, std::forward<Args>(args)...)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
std::string displayText() const
|
||||
|
@ -437,6 +437,7 @@ String FileSegment::stateToString(FileSegment::State state)
|
||||
case FileSegment::State::SKIP_CACHE:
|
||||
return "SKIP_CACHE";
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
String FileSegmentsHolder::toString()
|
||||
|
@ -67,6 +67,9 @@ struct FixedHashTableCalculatedSize
|
||||
{
|
||||
size_t getSize(const Cell * buf, const typename Cell::State & state, size_t num_cells) const
|
||||
{
|
||||
if (!buf)
|
||||
return 0;
|
||||
|
||||
size_t res = 0;
|
||||
for (const Cell * end = buf + num_cells; buf != end; ++buf)
|
||||
if (!buf->isZero(state))
|
||||
@ -76,6 +79,9 @@ struct FixedHashTableCalculatedSize
|
||||
|
||||
bool isEmpty(const Cell * buf, const typename Cell::State & state, size_t num_cells) const
|
||||
{
|
||||
if (!buf)
|
||||
return true;
|
||||
|
||||
for (const Cell * end = buf + num_cells; buf != end; ++buf)
|
||||
if (!buf->isZero(state))
|
||||
return false;
|
||||
|
@ -94,6 +94,12 @@ public:
|
||||
|
||||
TwoLevelHashTable() = default;
|
||||
|
||||
explicit TwoLevelHashTable(size_t size_hint)
|
||||
{
|
||||
for (auto & impl : impls)
|
||||
impl.reserve(size_hint / NUM_BUCKETS);
|
||||
}
|
||||
|
||||
/// Copy the data from another (normal) hash table. It should have the same hash function.
|
||||
template <typename Source>
|
||||
explicit TwoLevelHashTable(const Source & src)
|
||||
|
@ -9,6 +9,7 @@
|
||||
M(SelectQuery, "Same as Query, but only for SELECT queries.") \
|
||||
M(InsertQuery, "Same as Query, but only for INSERT queries.") \
|
||||
M(AsyncInsertQuery, "Same as InsertQuery, but only for asynchronous INSERT queries.") \
|
||||
M(AsyncInsertBytes, "Data size in bytes of asynchronous INSERT queries.") \
|
||||
M(FailedQuery, "Number of failed queries.") \
|
||||
M(FailedSelectQuery, "Same as FailedQuery, but only for SELECT queries.") \
|
||||
M(FailedInsertQuery, "Same as FailedQuery, but only for INSERT queries.") \
|
||||
@ -284,6 +285,16 @@
|
||||
\
|
||||
M(MainConfigLoads, "Number of times the main configuration was reloaded.") \
|
||||
\
|
||||
M(AggregationPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for aggregation.") \
|
||||
M(AggregationHashTablesInitializedAsTwoLevel, "How many hash tables were inited as two-level for aggregation.") \
|
||||
\
|
||||
M(MergeTreeMetadataCacheGet, "Number of rocksdb reads(used for merge tree metadata cache)") \
|
||||
M(MergeTreeMetadataCachePut, "Number of rocksdb puts(used for merge tree metadata cache)") \
|
||||
M(MergeTreeMetadataCacheDelete, "Number of rocksdb deletes(used for merge tree metadata cache)") \
|
||||
M(MergeTreeMetadataCacheSeek, "Number of rocksdb seeks(used for merge tree metadata cache)") \
|
||||
M(MergeTreeMetadataCacheHit, "Number of times the read of meta file was done from MergeTree metadata cache") \
|
||||
M(MergeTreeMetadataCacheMiss, "Number of times the read of meta file was not done from MergeTree metadata cache") \
|
||||
\
|
||||
M(ScalarSubqueriesGlobalCacheHit, "Number of times a read from a scalar subquery was done using the global cache") \
|
||||
M(ScalarSubqueriesLocalCacheHit, "Number of times a read from a scalar subquery was done using the local cache") \
|
||||
M(ScalarSubqueriesCacheMiss, "Number of times a read from a scalar subquery was not cached and had to be calculated completely")
|
||||
|
46
src/Common/RangeGenerator.h
Normal file
46
src/Common/RangeGenerator.h
Normal file
@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <cmath>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class RangeGenerator
|
||||
{
|
||||
public:
|
||||
explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
|
||||
: from(range_start), range_step(range_step_), total_size(total_size_)
|
||||
{
|
||||
}
|
||||
|
||||
size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
|
||||
|
||||
using Range = std::pair<size_t, size_t>;
|
||||
|
||||
// return upper exclusive range of values, i.e. [from_range, to_range>
|
||||
std::optional<Range> nextRange()
|
||||
{
|
||||
if (from >= total_size)
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto to = from + range_step;
|
||||
if (to >= total_size)
|
||||
{
|
||||
to = total_size;
|
||||
}
|
||||
|
||||
Range range{from, to};
|
||||
from = to;
|
||||
return range;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t from;
|
||||
size_t range_step;
|
||||
size_t total_size;
|
||||
};
|
||||
|
||||
}
|
@ -4,6 +4,8 @@
|
||||
#if defined(__linux__)
|
||||
# include <cstdio>
|
||||
# include <mntent.h>
|
||||
# include <sys/stat.h>
|
||||
# include <sys/sysmacros.h>
|
||||
#endif
|
||||
#include <cerrno>
|
||||
#include <Poco/Version.h>
|
||||
@ -13,6 +15,9 @@
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <utime.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
@ -24,6 +29,7 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int SYSTEM_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int CANNOT_STAT;
|
||||
extern const int CANNOT_STATVFS;
|
||||
extern const int PATH_ACCESS_DENIED;
|
||||
extern const int CANNOT_CREATE_FILE;
|
||||
@ -57,6 +63,68 @@ std::unique_ptr<TemporaryFile> createTemporaryFile(const std::string & path)
|
||||
return std::make_unique<TemporaryFile>(path);
|
||||
}
|
||||
|
||||
#if !defined(__linux__)
|
||||
[[noreturn]]
|
||||
#endif
|
||||
String getBlockDeviceId([[maybe_unused]] const String & path)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
struct stat sb;
|
||||
if (lstat(path.c_str(), &sb))
|
||||
throwFromErrnoWithPath("Cannot lstat " + path, path, ErrorCodes::CANNOT_STAT);
|
||||
WriteBufferFromOwnString ss;
|
||||
ss << major(sb.st_dev) << ":" << minor(sb.st_dev);
|
||||
return ss.str();
|
||||
#else
|
||||
throw DB::Exception("The function getDeviceId is supported on Linux only", ErrorCodes::NOT_IMPLEMENTED);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !defined(__linux__)
|
||||
[[noreturn]]
|
||||
#endif
|
||||
BlockDeviceType getBlockDeviceType([[maybe_unused]] const String & device_id)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
try
|
||||
{
|
||||
ReadBufferFromFile in("/sys/dev/block/" + device_id + "/queue/rotational");
|
||||
int rotational;
|
||||
readText(rotational, in);
|
||||
return rotational ? BlockDeviceType::ROT : BlockDeviceType::NONROT;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
return BlockDeviceType::UNKNOWN;
|
||||
}
|
||||
#else
|
||||
throw DB::Exception("The function getDeviceType is supported on Linux only", ErrorCodes::NOT_IMPLEMENTED);
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !defined(__linux__)
|
||||
[[noreturn]]
|
||||
#endif
|
||||
UInt64 getBlockDeviceReadAheadBytes([[maybe_unused]] const String & device_id)
|
||||
{
|
||||
#if defined(__linux__)
|
||||
try
|
||||
{
|
||||
ReadBufferFromFile in("/sys/dev/block/" + device_id + "/queue/read_ahead_kb");
|
||||
int read_ahead_kb;
|
||||
readText(read_ahead_kb, in);
|
||||
return read_ahead_kb * 1024;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
return static_cast<UInt64>(-1);
|
||||
}
|
||||
#else
|
||||
throw DB::Exception("The function getDeviceType is supported on Linux only", ErrorCodes::NOT_IMPLEMENTED);
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Returns name of filesystem mounted to mount_point
|
||||
std::filesystem::path getMountPoint(std::filesystem::path absolute_path)
|
||||
{
|
||||
if (absolute_path.is_relative())
|
||||
|
@ -18,6 +18,31 @@ using TemporaryFile = Poco::TemporaryFile;
|
||||
bool enoughSpaceInDirectory(const std::string & path, size_t data_size);
|
||||
std::unique_ptr<TemporaryFile> createTemporaryFile(const std::string & path);
|
||||
|
||||
// Determine what block device is responsible for specified path
|
||||
#if !defined(__linux__)
|
||||
[[noreturn]]
|
||||
#endif
|
||||
String getBlockDeviceId([[maybe_unused]] const String & path);
|
||||
|
||||
enum class BlockDeviceType
|
||||
{
|
||||
UNKNOWN = 0, // we were unable to determine device type
|
||||
NONROT = 1, // not a rotational device (SSD, NVME, etc)
|
||||
ROT = 2 // rotational device (HDD)
|
||||
};
|
||||
|
||||
// Try to determine block device type
|
||||
#if !defined(__linux__)
|
||||
[[noreturn]]
|
||||
#endif
|
||||
BlockDeviceType getBlockDeviceType([[maybe_unused]] const String & device_id);
|
||||
|
||||
// Get size of read-ahead in bytes for specified block device
|
||||
#if !defined(__linux__)
|
||||
[[noreturn]]
|
||||
#endif
|
||||
UInt64 getBlockDeviceReadAheadBytes([[maybe_unused]] const String & device_id);
|
||||
|
||||
/// Returns mount point of filesystem where absolute_path (must exist) is located
|
||||
std::filesystem::path getMountPoint(std::filesystem::path absolute_path);
|
||||
|
||||
|
178
src/Common/format.h
Normal file
178
src/Common/format.h
Normal file
@ -0,0 +1,178 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/PODArray.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace Format
|
||||
{
|
||||
using IndexPositions = PODArrayWithStackMemory<UInt64, 64>;
|
||||
|
||||
static inline void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res, UInt64 argument_number)
|
||||
{
|
||||
res = 0;
|
||||
for (UInt64 pos = l; pos < r; ++pos)
|
||||
{
|
||||
if (!isNumericASCII(description[pos]))
|
||||
throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS);
|
||||
res = res * 10 + description[pos] - '0';
|
||||
if (res >= argument_number)
|
||||
throw Exception(
|
||||
"Too big number for arguments, must be at most " + std::to_string(argument_number - 1), ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void init(
|
||||
const String & pattern,
|
||||
size_t argument_number,
|
||||
const std::vector<std::optional<String>> & constant_strings,
|
||||
IndexPositions & index_positions,
|
||||
std::vector<String> & substrings)
|
||||
{
|
||||
/// Is current position after open curly brace.
|
||||
bool is_open_curly = false;
|
||||
/// The position of last open token.
|
||||
size_t last_open = -1;
|
||||
|
||||
/// Is formatting in a plain {} token.
|
||||
std::optional<bool> is_plain_numbering;
|
||||
UInt64 index_if_plain = 0;
|
||||
|
||||
/// Left position of adding substrings, just to the closed brace position or the start of the string.
|
||||
/// Invariant --- the start of substring is in this position.
|
||||
size_t start_pos = 0;
|
||||
|
||||
/// A flag to decide whether we should glue the constant strings.
|
||||
bool glue_to_next = false;
|
||||
|
||||
/// Handling double braces (escaping).
|
||||
auto double_brace_removal = [](String & str)
|
||||
{
|
||||
size_t i = 0;
|
||||
bool should_delete = true;
|
||||
str.erase(
|
||||
std::remove_if(
|
||||
str.begin(),
|
||||
str.end(),
|
||||
[&i, &should_delete, &str](char)
|
||||
{
|
||||
bool is_double_brace = (str[i] == '{' && str[i + 1] == '{') || (str[i] == '}' && str[i + 1] == '}');
|
||||
++i;
|
||||
if (is_double_brace && should_delete)
|
||||
{
|
||||
should_delete = false;
|
||||
return true;
|
||||
}
|
||||
should_delete = true;
|
||||
return false;
|
||||
}),
|
||||
str.end());
|
||||
};
|
||||
|
||||
index_positions.emplace_back();
|
||||
|
||||
for (size_t i = 0; i < pattern.size(); ++i)
|
||||
{
|
||||
if (pattern[i] == '{')
|
||||
{
|
||||
/// Escaping handling
|
||||
/// It is safe to access because of null termination
|
||||
if (pattern[i + 1] == '{')
|
||||
{
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_open_curly)
|
||||
throw Exception("Two open curly braces without close one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
String to_add = String(pattern.data() + start_pos, i - start_pos);
|
||||
double_brace_removal(to_add);
|
||||
if (!glue_to_next)
|
||||
substrings.emplace_back(to_add);
|
||||
else
|
||||
substrings.back() += to_add;
|
||||
|
||||
glue_to_next = false;
|
||||
|
||||
is_open_curly = true;
|
||||
last_open = i + 1;
|
||||
}
|
||||
else if (pattern[i] == '}')
|
||||
{
|
||||
if (pattern[i + 1] == '}')
|
||||
{
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!is_open_curly)
|
||||
throw Exception("Closed curly brace without open one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
is_open_curly = false;
|
||||
|
||||
if (last_open == i)
|
||||
{
|
||||
if (is_plain_numbering && !*is_plain_numbering)
|
||||
throw Exception(
|
||||
"Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
|
||||
is_plain_numbering = true;
|
||||
if (index_if_plain >= argument_number)
|
||||
throw Exception("Argument is too big for formatting", ErrorCodes::BAD_ARGUMENTS);
|
||||
index_positions.back() = index_if_plain++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_plain_numbering && *is_plain_numbering)
|
||||
throw Exception(
|
||||
"Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
|
||||
is_plain_numbering = false;
|
||||
|
||||
UInt64 arg;
|
||||
parseNumber(pattern, last_open, i, arg, argument_number);
|
||||
|
||||
if (arg >= argument_number)
|
||||
throw Exception(
|
||||
"Argument is too big for formatting. Note that indexing starts from zero", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
index_positions.back() = arg;
|
||||
}
|
||||
|
||||
if (!constant_strings.empty() && constant_strings[index_positions.back()])
|
||||
{
|
||||
/// The next string should be glued to last `A {} C`.format('B') -> `A B C`.
|
||||
glue_to_next = true;
|
||||
substrings.back() += *constant_strings[index_positions.back()];
|
||||
}
|
||||
else
|
||||
index_positions.emplace_back(); /// Otherwise we commit arg number and proceed.
|
||||
|
||||
start_pos = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_open_curly)
|
||||
throw Exception("Last open curly brace is not closed", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
String to_add = String(pattern.data() + start_pos, pattern.size() - start_pos);
|
||||
double_brace_removal(to_add);
|
||||
|
||||
if (!glue_to_next)
|
||||
substrings.emplace_back(to_add);
|
||||
else
|
||||
substrings.back() += to_add;
|
||||
|
||||
index_positions.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,7 +1,18 @@
|
||||
#include "gtest_global_context.h"
|
||||
|
||||
const ContextHolder & getContext()
|
||||
{
|
||||
return getMutableContext();
|
||||
}
|
||||
|
||||
ContextHolder & getMutableContext()
|
||||
{
|
||||
static ContextHolder holder;
|
||||
return holder;
|
||||
}
|
||||
|
||||
void destroyContext()
|
||||
{
|
||||
auto & holder = getMutableContext();
|
||||
return holder.destroy();
|
||||
}
|
||||
|
@ -16,6 +16,17 @@ struct ContextHolder
|
||||
}
|
||||
|
||||
ContextHolder(ContextHolder &&) = default;
|
||||
|
||||
void destroy()
|
||||
{
|
||||
context->shutdown();
|
||||
context.reset();
|
||||
shared_context.reset();
|
||||
}
|
||||
};
|
||||
|
||||
const ContextHolder & getContext();
|
||||
|
||||
ContextHolder & getMutableContext();
|
||||
|
||||
void destroyContext();
|
||||
|
@ -165,25 +165,36 @@ void registerCodecNone(CompressionCodecFactory & factory);
|
||||
void registerCodecLZ4(CompressionCodecFactory & factory);
|
||||
void registerCodecLZ4HC(CompressionCodecFactory & factory);
|
||||
void registerCodecZSTD(CompressionCodecFactory & factory);
|
||||
void registerCodecMultiple(CompressionCodecFactory & factory);
|
||||
|
||||
|
||||
/// Keeper use only general-purpose codecs, so we don't need these special codecs
|
||||
/// in standalone build
|
||||
#ifndef KEEPER_STANDALONE_BUILD
|
||||
|
||||
void registerCodecDelta(CompressionCodecFactory & factory);
|
||||
void registerCodecT64(CompressionCodecFactory & factory);
|
||||
void registerCodecDoubleDelta(CompressionCodecFactory & factory);
|
||||
void registerCodecGorilla(CompressionCodecFactory & factory);
|
||||
void registerCodecEncrypted(CompressionCodecFactory & factory);
|
||||
void registerCodecMultiple(CompressionCodecFactory & factory);
|
||||
|
||||
#endif
|
||||
|
||||
CompressionCodecFactory::CompressionCodecFactory()
|
||||
{
|
||||
registerCodecLZ4(*this);
|
||||
registerCodecNone(*this);
|
||||
registerCodecLZ4(*this);
|
||||
registerCodecZSTD(*this);
|
||||
registerCodecLZ4HC(*this);
|
||||
registerCodecMultiple(*this);
|
||||
|
||||
#ifndef KEEPER_STANDALONE_BUILD
|
||||
registerCodecDelta(*this);
|
||||
registerCodecT64(*this);
|
||||
registerCodecDoubleDelta(*this);
|
||||
registerCodecGorilla(*this);
|
||||
registerCodecEncrypted(*this);
|
||||
registerCodecMultiple(*this);
|
||||
#endif
|
||||
|
||||
default_codec = get("LZ4", {});
|
||||
}
|
||||
|
@ -500,6 +500,10 @@ class IColumn;
|
||||
M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
|
||||
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
|
||||
\
|
||||
M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \
|
||||
M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \
|
||||
M(UInt64, max_size_to_preallocate_for_aggregation, 10'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before aggregation", 0) \
|
||||
\
|
||||
/** Experimental feature for moving data between shards. */ \
|
||||
\
|
||||
M(Bool, allow_experimental_query_deduplication, false, "Experimental data deduplication for SELECT queries based on part UUIDs", 0) \
|
||||
|
@ -187,5 +187,4 @@ DECLARE_SETTING_ENUM_WITH_RENAME(EnumComparingMode, FormatSettings::EnumComparin
|
||||
DECLARE_SETTING_ENUM_WITH_RENAME(EscapingRule, FormatSettings::EscapingRule)
|
||||
|
||||
DECLARE_SETTING_ENUM_WITH_RENAME(MsgPackUUIDRepresentation, FormatSettings::MsgPackUUIDRepresentation)
|
||||
|
||||
}
|
||||
|
@ -63,12 +63,12 @@ private:
|
||||
size_t num_dimensions_to_keep;
|
||||
};
|
||||
|
||||
using Node = typename ColumnObject::SubcolumnsTree::Node;
|
||||
using Node = typename ColumnObject::Subcolumns::Node;
|
||||
|
||||
/// Finds a subcolumn from the same Nested type as @entry and inserts
|
||||
/// an array with default values with consistent sizes as in Nested type.
|
||||
bool tryInsertDefaultFromNested(
|
||||
const std::shared_ptr<Node> & entry, const ColumnObject::SubcolumnsTree & subcolumns)
|
||||
const std::shared_ptr<Node> & entry, const ColumnObject::Subcolumns & subcolumns)
|
||||
{
|
||||
if (!entry->path.hasNested())
|
||||
return false;
|
||||
@ -198,7 +198,7 @@ void SerializationObject<Parser>::deserializeWholeText(IColumn & column, ReadBuf
|
||||
template <typename Parser>
|
||||
void SerializationObject<Parser>::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
|
||||
{
|
||||
deserializeTextImpl(column, [&](String & s) { readEscapedStringInto(s, istr); });
|
||||
deserializeTextImpl(column, [&](String & s) { readEscapedString(s, istr); });
|
||||
}
|
||||
|
||||
template <typename Parser>
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/InterpreterCreateQuery.h>
|
||||
#include <Interpreters/ApplyWithSubqueryVisitor.h>
|
||||
#include <Parsers/ASTCreateQuery.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ParserCreateQuery.h>
|
||||
@ -55,6 +56,9 @@ std::pair<String, StoragePtr> createTableFromAST(
|
||||
ast_create_query.attach = true;
|
||||
ast_create_query.setDatabase(database_name);
|
||||
|
||||
if (ast_create_query.select && ast_create_query.isView())
|
||||
ApplyWithSubqueryVisitor().visit(*ast_create_query.select);
|
||||
|
||||
if (ast_create_query.as_table_function)
|
||||
{
|
||||
const auto & factory = TableFunctionFactory::instance();
|
||||
|
@ -179,8 +179,12 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
|
||||
|
||||
if (!task->was_executed)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry {} was executed, but was not committed: code {}: {}",
|
||||
task->execution_status.code, task->execution_status.message);
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Entry {} was executed, but was not committed: code {}: {}",
|
||||
task->entry_name,
|
||||
task->execution_status.code,
|
||||
task->execution_status.message);
|
||||
}
|
||||
|
||||
try_node->setAlreadyRemoved();
|
||||
|
@ -50,7 +50,7 @@ namespace
|
||||
{
|
||||
if (!qualified_name.database.empty())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Dictionary source of type {} specifies a schema but schema is not supported by {}-driver",
|
||||
"Dictionary source specifies a schema but schema is not supported by {}-driver",
|
||||
bridge_.getName());
|
||||
}
|
||||
|
||||
|
@ -392,8 +392,13 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment)
|
||||
if (bytes_to_predownload)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Failed to predownload remaining {} bytes. Current file segment: {}, current download offset: {}, expected: {}, eof: {}",
|
||||
file_segment->range().toString(), file_segment->getDownloadOffset(), file_offset_of_buffer_end, implementation_buffer->eof());
|
||||
"Failed to predownload remaining {} bytes. Current file segment: {}, current download offset: {}, expected: {}, "
|
||||
"eof: {}",
|
||||
bytes_to_predownload,
|
||||
file_segment->range().toString(),
|
||||
file_segment->getDownloadOffset(),
|
||||
file_offset_of_buffer_end,
|
||||
implementation_buffer->eof());
|
||||
|
||||
auto result = implementation_buffer->hasPendingData();
|
||||
|
||||
|
@ -96,6 +96,7 @@ private:
|
||||
case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE:
|
||||
return "REMOTE_FS_READ_AND_PUT_IN_CACHE";
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
size_t first_offset = 0;
|
||||
};
|
||||
|
@ -44,7 +44,7 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S
|
||||
{
|
||||
return std::make_unique<ReadBufferFromS3>(
|
||||
client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries,
|
||||
settings, /* use_external_buffer */true, read_until_position, /* restricted_seek */true);
|
||||
settings, /* use_external_buffer */true, /* offset */ 0, read_until_position, /* restricted_seek */true);
|
||||
};
|
||||
|
||||
if (with_cache)
|
||||
|
@ -85,9 +85,12 @@ FormatSchemaInfo::FormatSchemaInfo(const String & format_schema, const String &
|
||||
else if (path.has_parent_path() && !fs::weakly_canonical(default_schema_directory_path / path).string().starts_with(fs::weakly_canonical(default_schema_directory_path).string()))
|
||||
{
|
||||
if (is_server)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Path in the 'format_schema' setting shouldn't go outside the 'format_schema_path' directory: {} ({} not in {})",
|
||||
path.string());
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"Path in the 'format_schema' setting shouldn't go outside the 'format_schema_path' directory: {} ({} not in {})",
|
||||
default_schema_directory(),
|
||||
path.string(),
|
||||
default_schema_directory());
|
||||
path = default_schema_directory_path / path;
|
||||
schema_path = path.filename();
|
||||
schema_directory = path.parent_path() / "";
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeObject.h>
|
||||
#include <Common/JSONParsers/SimdJSONParser.h>
|
||||
#include <Common/JSONParsers/RapidJSONParser.h>
|
||||
#include <Common/JSONParsers/DummyJSONParser.h>
|
||||
@ -158,22 +159,37 @@ DataTypePtr getDataTypeFromJSONFieldImpl(const Element & field)
|
||||
{
|
||||
auto object = field.getObject();
|
||||
DataTypePtr value_type;
|
||||
bool is_object = false;
|
||||
for (const auto key_value_pair : object)
|
||||
{
|
||||
auto type = getDataTypeFromJSONFieldImpl(key_value_pair.second);
|
||||
if (!type)
|
||||
return nullptr;
|
||||
continue;
|
||||
|
||||
if (value_type && value_type->getName() != type->getName())
|
||||
return nullptr;
|
||||
if (isObject(type))
|
||||
{
|
||||
is_object = true;
|
||||
break;
|
||||
}
|
||||
|
||||
value_type = type;
|
||||
if (!value_type)
|
||||
{
|
||||
value_type = type;
|
||||
}
|
||||
else if (!value_type->equals(*type))
|
||||
{
|
||||
is_object = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!value_type)
|
||||
return nullptr;
|
||||
if (is_object)
|
||||
return std::make_shared<DataTypeObject>("json", false);
|
||||
|
||||
return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), value_type);
|
||||
if (value_type)
|
||||
return std::make_shared<DataTypeMap>(std::make_shared<DataTypeString>(), value_type);
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
throw Exception{ErrorCodes::INCORRECT_DATA, "Unexpected JSON type"};
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include <Formats/ReadSchemaUtils.h>
|
||||
#include <Processors/Formats/ISchemaReader.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Storages/IStorage.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -17,6 +19,28 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
static std::optional<NamesAndTypesList> getOrderedColumnsList(
|
||||
const NamesAndTypesList & columns_list, const Names & columns_order_hint)
|
||||
{
|
||||
if (columns_list.size() != columns_order_hint.size())
|
||||
return {};
|
||||
|
||||
std::unordered_map<String, DataTypePtr> available_columns;
|
||||
for (const auto & [name, type] : columns_list)
|
||||
available_columns.emplace(name, type);
|
||||
|
||||
NamesAndTypesList res;
|
||||
for (const auto & name : columns_order_hint)
|
||||
{
|
||||
auto it = available_columns.find(name);
|
||||
if (it == available_columns.end())
|
||||
return {};
|
||||
|
||||
res.emplace_back(name, it->second);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
ColumnsDescription readSchemaFromFormat(
|
||||
const String & format_name,
|
||||
const std::optional<FormatSettings> & format_settings,
|
||||
@ -52,6 +76,22 @@ ColumnsDescription readSchemaFromFormat(
|
||||
{
|
||||
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}", format_name, e.message());
|
||||
}
|
||||
|
||||
/// If we have "INSERT SELECT" query then try to order
|
||||
/// columns as they are ordered in table schema for formats
|
||||
/// without strict column order (like JSON and TSKV).
|
||||
/// It will allow to execute simple data loading with query
|
||||
/// "INSERT INTO table SELECT * FROM ..."
|
||||
const auto & insertion_table = context->getInsertionTable();
|
||||
if (!schema_reader->hasStrictOrderOfColumns() && !insertion_table.empty())
|
||||
{
|
||||
auto storage = DatabaseCatalog::instance().getTable(insertion_table, context);
|
||||
auto metadata = storage->getInMemoryMetadataPtr();
|
||||
auto names_in_storage = metadata->getColumns().getNamesOfPhysical();
|
||||
auto ordered_list = getOrderedColumnsList(names_and_types, names_in_storage);
|
||||
if (ordered_list)
|
||||
names_and_types = *ordered_list;
|
||||
}
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} file format doesn't support schema inference", format_name);
|
||||
|
@ -13,6 +13,7 @@ void registerFileSegmentationEngineCSV(FormatFactory & factory);
|
||||
void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory);
|
||||
void registerFileSegmentationEngineRegexp(FormatFactory & factory);
|
||||
void registerFileSegmentationEngineJSONAsString(FormatFactory & factory);
|
||||
void registerFileSegmentationEngineJSONAsObject(FormatFactory & factory);
|
||||
void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory);
|
||||
|
||||
/// Formats for both input/output.
|
||||
@ -103,6 +104,7 @@ void registerProtobufSchemaReader(FormatFactory & factory);
|
||||
void registerProtobufListSchemaReader(FormatFactory & factory);
|
||||
void registerLineAsStringSchemaReader(FormatFactory & factory);
|
||||
void registerJSONAsStringSchemaReader(FormatFactory & factory);
|
||||
void registerJSONAsObjectSchemaReader(FormatFactory & factory);
|
||||
void registerRawBLOBSchemaReader(FormatFactory & factory);
|
||||
void registerMsgPackSchemaReader(FormatFactory & factory);
|
||||
void registerCapnProtoSchemaReader(FormatFactory & factory);
|
||||
@ -123,6 +125,7 @@ void registerFormats()
|
||||
registerFileSegmentationEngineJSONEachRow(factory);
|
||||
registerFileSegmentationEngineRegexp(factory);
|
||||
registerFileSegmentationEngineJSONAsString(factory);
|
||||
registerFileSegmentationEngineJSONAsObject(factory);
|
||||
registerFileSegmentationEngineJSONCompactEachRow(factory);
|
||||
|
||||
registerInputFormatNative(factory);
|
||||
@ -207,6 +210,7 @@ void registerFormats()
|
||||
registerProtobufListSchemaReader(factory);
|
||||
registerLineAsStringSchemaReader(factory);
|
||||
registerJSONAsStringSchemaReader(factory);
|
||||
registerJSONAsObjectSchemaReader(factory);
|
||||
registerRawBLOBSchemaReader(factory);
|
||||
registerMsgPackSchemaReader(factory);
|
||||
registerCapnProtoSchemaReader(factory);
|
||||
|
@ -53,6 +53,7 @@
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -886,7 +887,7 @@ struct ConvertImplGenericToString
|
||||
const IColumn & col_from = *col_with_type_and_name.column;
|
||||
|
||||
size_t size = col_from.size();
|
||||
auto col_to = result_type->createColumn();
|
||||
auto col_to = removeNullable(result_type)->createColumn();
|
||||
|
||||
{
|
||||
ColumnStringHelpers::WriteHelper write_helper(
|
||||
@ -3140,52 +3141,138 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
WrapperType createTupleToObjectWrapper(const DataTypeTuple & from_tuple, bool has_nullable_subcolumns) const
|
||||
{
|
||||
if (!from_tuple.haveExplicitNames())
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"Cast to Object can be performed only from flatten Named Tuple. Got: {}", from_tuple.getName());
|
||||
|
||||
PathsInData paths;
|
||||
DataTypes from_types;
|
||||
|
||||
std::tie(paths, from_types) = flattenTuple(from_tuple.getPtr());
|
||||
auto to_types = from_types;
|
||||
|
||||
for (auto & type : to_types)
|
||||
{
|
||||
if (isTuple(type) || isNested(type))
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"Cast to Object can be performed only from flatten Named Tuple. Got: {}",
|
||||
from_tuple.getName());
|
||||
|
||||
type = recursiveRemoveLowCardinality(type);
|
||||
}
|
||||
|
||||
return [element_wrappers = getElementWrappers(from_types, to_types),
|
||||
has_nullable_subcolumns, from_types, to_types, paths]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count)
|
||||
{
|
||||
size_t tuple_size = to_types.size();
|
||||
auto flattened_column = flattenTuple(arguments.front().column);
|
||||
const auto & column_tuple = assert_cast<const ColumnTuple &>(*flattened_column);
|
||||
|
||||
if (tuple_size != column_tuple.getColumns().size())
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"Expected tuple with {} subcolumn, but got {} subcolumns",
|
||||
tuple_size, column_tuple.getColumns().size());
|
||||
|
||||
auto res = ColumnObject::create(has_nullable_subcolumns);
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
{
|
||||
ColumnsWithTypeAndName element = {{column_tuple.getColumns()[i], from_types[i], "" }};
|
||||
auto converted_column = element_wrappers[i](element, to_types[i], nullable_source, input_rows_count);
|
||||
res->addSubcolumn(paths[i], converted_column->assumeMutable());
|
||||
}
|
||||
|
||||
return res;
|
||||
};
|
||||
}
|
||||
|
||||
WrapperType createMapToObjectWrapper(const DataTypeMap & from_map, bool has_nullable_subcolumns) const
|
||||
{
|
||||
auto key_value_types = from_map.getKeyValueTypes();
|
||||
|
||||
if (!isStringOrFixedString(key_value_types[0]))
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"Cast to Object from Map can be performed only from Map "
|
||||
"with String or FixedString key. Got: {}", from_map.getName());
|
||||
|
||||
const auto & value_type = key_value_types[1];
|
||||
auto to_value_type = value_type;
|
||||
|
||||
if (!has_nullable_subcolumns && value_type->isNullable())
|
||||
to_value_type = removeNullable(value_type);
|
||||
|
||||
if (has_nullable_subcolumns && !value_type->isNullable())
|
||||
to_value_type = makeNullable(value_type);
|
||||
|
||||
DataTypes to_key_value_types{std::make_shared<DataTypeString>(), std::move(to_value_type)};
|
||||
auto element_wrappers = getElementWrappers(key_value_types, to_key_value_types);
|
||||
|
||||
return [has_nullable_subcolumns, element_wrappers, key_value_types, to_key_value_types]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t) -> ColumnPtr
|
||||
{
|
||||
const auto & column_map = assert_cast<const ColumnMap &>(*arguments.front().column);
|
||||
const auto & offsets = column_map.getNestedColumn().getOffsets();
|
||||
auto key_value_columns = column_map.getNestedData().getColumnsCopy();
|
||||
|
||||
for (size_t i = 0; i < 2; ++i)
|
||||
{
|
||||
ColumnsWithTypeAndName element{{key_value_columns[i], key_value_types[i], ""}};
|
||||
key_value_columns[i] = element_wrappers[i](element, to_key_value_types[i], nullable_source, key_value_columns[i]->size());
|
||||
}
|
||||
|
||||
const auto & key_column_str = assert_cast<const ColumnString &>(*key_value_columns[0]);
|
||||
const auto & value_column = *key_value_columns[1];
|
||||
|
||||
using SubcolumnsMap = HashMap<StringRef, MutableColumnPtr, StringRefHash>;
|
||||
SubcolumnsMap subcolumns;
|
||||
|
||||
for (size_t row = 0; row < offsets.size(); ++row)
|
||||
{
|
||||
for (size_t i = offsets[static_cast<ssize_t>(row) - 1]; i < offsets[row]; ++i)
|
||||
{
|
||||
auto ref = key_column_str.getDataAt(i);
|
||||
|
||||
bool inserted;
|
||||
SubcolumnsMap::LookupResult it;
|
||||
subcolumns.emplace(ref, it, inserted);
|
||||
auto & subcolumn = it->getMapped();
|
||||
|
||||
if (inserted)
|
||||
subcolumn = value_column.cloneEmpty()->cloneResized(row);
|
||||
|
||||
/// Map can have duplicated keys. We insert only first one.
|
||||
if (subcolumn->size() == row)
|
||||
subcolumn->insertFrom(value_column, i);
|
||||
}
|
||||
|
||||
/// Insert default values for keys missed in current row.
|
||||
for (const auto & [_, subcolumn] : subcolumns)
|
||||
if (subcolumn->size() == row)
|
||||
subcolumn->insertDefault();
|
||||
}
|
||||
|
||||
auto column_object = ColumnObject::create(has_nullable_subcolumns);
|
||||
for (auto && [key, subcolumn] : subcolumns)
|
||||
{
|
||||
PathInData path(key.toView());
|
||||
column_object->addSubcolumn(path, std::move(subcolumn));
|
||||
}
|
||||
|
||||
return column_object;
|
||||
};
|
||||
}
|
||||
|
||||
WrapperType createObjectWrapper(const DataTypePtr & from_type, const DataTypeObject * to_type) const
|
||||
{
|
||||
if (const auto * from_tuple = checkAndGetDataType<DataTypeTuple>(from_type.get()))
|
||||
{
|
||||
if (!from_tuple->haveExplicitNames())
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"Cast to Object can be performed only from flatten Named Tuple. Got: {}", from_type->getName());
|
||||
|
||||
PathsInData paths;
|
||||
DataTypes from_types;
|
||||
|
||||
std::tie(paths, from_types) = flattenTuple(from_type);
|
||||
auto to_types = from_types;
|
||||
|
||||
for (auto & type : to_types)
|
||||
{
|
||||
if (isTuple(type) || isNested(type))
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"Cast to Object can be performed only from flatten Named Tuple. Got: {}", from_type->getName());
|
||||
|
||||
type = recursiveRemoveLowCardinality(type);
|
||||
}
|
||||
|
||||
return [element_wrappers = getElementWrappers(from_types, to_types),
|
||||
has_nullable_subcolumns = to_type->hasNullableSubcolumns(), from_types, to_types, paths]
|
||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable * nullable_source, size_t input_rows_count)
|
||||
{
|
||||
size_t tuple_size = to_types.size();
|
||||
auto flattened_column = flattenTuple(arguments.front().column);
|
||||
const auto & column_tuple = assert_cast<const ColumnTuple &>(*flattened_column);
|
||||
|
||||
if (tuple_size != column_tuple.getColumns().size())
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"Expected tuple with {} subcolumn, but got {} subcolumns",
|
||||
tuple_size, column_tuple.getColumns().size());
|
||||
|
||||
auto res = ColumnObject::create(has_nullable_subcolumns);
|
||||
for (size_t i = 0; i < tuple_size; ++i)
|
||||
{
|
||||
ColumnsWithTypeAndName element = {{column_tuple.getColumns()[i], from_types[i], "" }};
|
||||
auto converted_column = element_wrappers[i](element, to_types[i], nullable_source, input_rows_count);
|
||||
res->addSubcolumn(paths[i], converted_column->assumeMutable());
|
||||
}
|
||||
|
||||
return res;
|
||||
};
|
||||
return createTupleToObjectWrapper(*from_tuple, to_type->hasNullableSubcolumns());
|
||||
}
|
||||
else if (const auto * from_map = checkAndGetDataType<DataTypeMap>(from_type.get()))
|
||||
{
|
||||
return createMapToObjectWrapper(*from_map, to_type->hasNullableSubcolumns());
|
||||
}
|
||||
else if (checkAndGetDataType<DataTypeString>(from_type.get()))
|
||||
{
|
||||
@ -3199,7 +3286,7 @@ private:
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::TYPE_MISMATCH,
|
||||
"Cast to Object can be performed only from flatten named tuple or string. Got: {}", from_type->getName());
|
||||
"Cast to Object can be performed only from flatten named Tuple, Map or String. Got: {}", from_type->getName());
|
||||
}
|
||||
|
||||
template <typename FieldType>
|
||||
|
@ -259,7 +259,7 @@ public:
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Function '{}' needs at least 2 arguments, at most 3 arguments; passed {}.",
|
||||
arguments.size());
|
||||
name, arguments.size());
|
||||
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.",
|
||||
|
@ -181,9 +181,12 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
|
||||
// Default implementation for nulls returns null result for null arguments,
|
||||
// so the result type must be nullable.
|
||||
if (!result_type->isNullable())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Function {} with Null argument and default implementation for Nulls "
|
||||
"is expected to return Nullable result, got {}", result_type->getName());
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Function {} with Null argument and default implementation for Nulls "
|
||||
"is expected to return Nullable result, got {}",
|
||||
getName(),
|
||||
result_type->getName());
|
||||
|
||||
return result_type->createColumnConstWithDefaultValue(input_rows_count);
|
||||
}
|
||||
|
@ -231,7 +231,7 @@ private:
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Function {} decimal scale should have native UInt type. Actual {}",
|
||||
scale_argument.type->getName());
|
||||
getName(), scale_argument.type->getName());
|
||||
}
|
||||
|
||||
scale = arguments[additional_argument_index].column->getUInt(0);
|
||||
|
@ -52,23 +52,21 @@ public:
|
||||
{
|
||||
if (arguments.size() < 2)
|
||||
throw Exception(
|
||||
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
|
||||
+ ", should be at least 2.",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
if (arguments.size() > FormatImpl::argument_threshold)
|
||||
throw Exception(
|
||||
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
|
||||
+ ", should be at most " + std::to_string(FormatImpl::argument_threshold),
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Number of arguments for function {} doesn't match: passed {}, should be at least 2",
|
||||
getName(),
|
||||
arguments.size());
|
||||
|
||||
for (const auto arg_idx : collections::range(0, arguments.size()))
|
||||
{
|
||||
const auto * arg = arguments[arg_idx].get();
|
||||
if (!isStringOrFixedString(arg))
|
||||
throw Exception{"Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function "
|
||||
+ getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of argument {} of function {}",
|
||||
arg->getName(),
|
||||
arg_idx + 1,
|
||||
getName());
|
||||
}
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
@ -125,7 +123,7 @@ private:
|
||||
std::vector<const ColumnString::Chars *> data(num_arguments);
|
||||
std::vector<const ColumnString::Offsets *> offsets(num_arguments);
|
||||
std::vector<size_t> fixed_string_sizes(num_arguments);
|
||||
std::vector<String> constant_strings(num_arguments);
|
||||
std::vector<std::optional<String>> constant_strings(num_arguments);
|
||||
bool has_column_string = false;
|
||||
bool has_column_fixed_string = false;
|
||||
for (size_t i = 0; i < num_arguments; ++i)
|
||||
|
@ -112,7 +112,7 @@ public:
|
||||
|| (res = executeType<DataTypeDateTime64>(arguments, result_type))))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of function {], must be Date or DateTime.",
|
||||
"Illegal column {} of function {}, must be Date or DateTime.",
|
||||
arguments[1].column->getName(),
|
||||
getName());
|
||||
|
||||
|
68
src/Functions/flattenTuple.cpp
Normal file
68
src/Functions/flattenTuple.cpp
Normal file
@ -0,0 +1,68 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class FunctionFlattenTuple : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "flattenTuple";
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionFlattenTuple>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
const auto & type = arguments[0];
|
||||
const auto * type_tuple = checkAndGetDataType<DataTypeTuple>(type.get());
|
||||
if (!type_tuple || !type_tuple->haveExplicitNames())
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Argument for function '{}' must be Named Tuple. Got '{}'",
|
||||
getName(), type->getName());
|
||||
|
||||
auto [paths, types] = flattenTuple(type);
|
||||
Names names;
|
||||
names.reserve(paths.size());
|
||||
for (const auto & path : paths)
|
||||
names.push_back(path.getPath());
|
||||
|
||||
return std::make_shared<DataTypeTuple>(types, names);
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
auto column = arguments.at(0).column;
|
||||
if (!checkAndGetColumn<ColumnTuple>(column.get()))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {} of first argument of function {}. Expected ColumnTuple",
|
||||
column->getName(), getName());
|
||||
|
||||
return flattenTuple(column);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void registerFunctionFlattenTuple(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionFlattenTuple>();
|
||||
}
|
||||
|
||||
}
|
@ -45,25 +45,23 @@ public:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (arguments.empty())
|
||||
if (arguments.size() < 2)
|
||||
throw Exception(
|
||||
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
|
||||
+ ", should be at least 1",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
if (arguments.size() > FormatImpl::argument_threshold)
|
||||
throw Exception(
|
||||
"Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size())
|
||||
+ ", should be at most " + std::to_string(FormatImpl::argument_threshold),
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Number of arguments for function {} doesn't match: passed {}, should be at least 2",
|
||||
getName(),
|
||||
arguments.size());
|
||||
|
||||
for (const auto arg_idx : collections::range(0, arguments.size()))
|
||||
{
|
||||
const auto * arg = arguments[arg_idx].get();
|
||||
if (!isStringOrFixedString(arg))
|
||||
throw Exception(
|
||||
"Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of argument {} of function {}",
|
||||
arg->getName(),
|
||||
arg_idx + 1,
|
||||
getName());
|
||||
}
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
@ -84,7 +82,7 @@ public:
|
||||
std::vector<const ColumnString::Chars *> data(arguments.size() - 1);
|
||||
std::vector<const ColumnString::Offsets *> offsets(arguments.size() - 1);
|
||||
std::vector<size_t> fixed_string_sizes(arguments.size() - 1);
|
||||
std::vector<String> constant_strings(arguments.size() - 1);
|
||||
std::vector<std::optional<String>> constant_strings(arguments.size() - 1);
|
||||
|
||||
bool has_column_string = false;
|
||||
bool has_column_fixed_string = false;
|
||||
|
@ -4,8 +4,10 @@
|
||||
#include <base/types.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/format.h>
|
||||
#include <Common/memcpySmall.h>
|
||||
|
||||
|
||||
#include <algorithm>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
@ -15,15 +17,9 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
struct FormatImpl
|
||||
{
|
||||
static constexpr size_t small_argument_threshold = 1024;
|
||||
static constexpr size_t argument_threshold = std::numeric_limits<UInt32>::max();
|
||||
static constexpr size_t right_padding = 15;
|
||||
|
||||
template <typename... Args>
|
||||
@ -39,165 +35,10 @@ struct FormatImpl
|
||||
format<false, false>(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
static void parseNumber(const String & description, UInt64 l, UInt64 r, UInt64 & res)
|
||||
{
|
||||
res = 0;
|
||||
for (UInt64 pos = l; pos < r; ++pos)
|
||||
{
|
||||
if (!isNumericASCII(description[pos]))
|
||||
throw Exception("Not a number in curly braces at position " + std::to_string(pos), ErrorCodes::BAD_ARGUMENTS);
|
||||
res = res * 10 + description[pos] - '0';
|
||||
if (res >= argument_threshold)
|
||||
throw Exception(
|
||||
"Too big number for arguments, must be at most " + std::to_string(argument_threshold), ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void init(
|
||||
const String & pattern,
|
||||
const std::vector<const ColumnString::Chars *> & data,
|
||||
size_t argument_number,
|
||||
const std::vector<String> & constant_strings,
|
||||
UInt64 * index_positions_ptr,
|
||||
std::vector<String> & substrings)
|
||||
{
|
||||
/// Is current position after open curly brace.
|
||||
bool is_open_curly = false;
|
||||
/// The position of last open token.
|
||||
size_t last_open = -1;
|
||||
|
||||
/// Is formatting in a plain {} token.
|
||||
std::optional<bool> is_plain_numbering;
|
||||
UInt64 index_if_plain = 0;
|
||||
|
||||
/// Left position of adding substrings, just to the closed brace position or the start of the string.
|
||||
/// Invariant --- the start of substring is in this position.
|
||||
size_t start_pos = 0;
|
||||
|
||||
/// A flag to decide whether we should glue the constant strings.
|
||||
bool glue_to_next = false;
|
||||
|
||||
/// Handling double braces (escaping).
|
||||
auto double_brace_removal = [](String & str)
|
||||
{
|
||||
size_t i = 0;
|
||||
bool should_delete = true;
|
||||
str.erase(
|
||||
std::remove_if(
|
||||
str.begin(),
|
||||
str.end(),
|
||||
[&i, &should_delete, &str](char)
|
||||
{
|
||||
bool is_double_brace = (str[i] == '{' && str[i + 1] == '{') || (str[i] == '}' && str[i + 1] == '}');
|
||||
++i;
|
||||
if (is_double_brace && should_delete)
|
||||
{
|
||||
should_delete = false;
|
||||
return true;
|
||||
}
|
||||
should_delete = true;
|
||||
return false;
|
||||
}),
|
||||
str.end());
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < pattern.size(); ++i)
|
||||
{
|
||||
if (pattern[i] == '{')
|
||||
{
|
||||
/// Escaping handling
|
||||
/// It is safe to access because of null termination
|
||||
if (pattern[i + 1] == '{')
|
||||
{
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_open_curly)
|
||||
throw Exception("Two open curly braces without close one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
String to_add = String(pattern.data() + start_pos, i - start_pos);
|
||||
double_brace_removal(to_add);
|
||||
if (!glue_to_next)
|
||||
substrings.emplace_back(to_add);
|
||||
else
|
||||
substrings.back() += to_add;
|
||||
|
||||
glue_to_next = false;
|
||||
|
||||
is_open_curly = true;
|
||||
last_open = i + 1;
|
||||
}
|
||||
else if (pattern[i] == '}')
|
||||
{
|
||||
if (pattern[i + 1] == '}')
|
||||
{
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!is_open_curly)
|
||||
throw Exception("Closed curly brace without open one at position " + std::to_string(i), ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
is_open_curly = false;
|
||||
|
||||
if (last_open == i)
|
||||
{
|
||||
if (is_plain_numbering && !*is_plain_numbering)
|
||||
throw Exception(
|
||||
"Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
|
||||
is_plain_numbering = true;
|
||||
if (index_if_plain >= argument_number)
|
||||
throw Exception("Argument is too big for formatting", ErrorCodes::BAD_ARGUMENTS);
|
||||
*index_positions_ptr = index_if_plain++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_plain_numbering && *is_plain_numbering)
|
||||
throw Exception(
|
||||
"Cannot switch from automatic field numbering to manual field specification", ErrorCodes::BAD_ARGUMENTS);
|
||||
is_plain_numbering = false;
|
||||
|
||||
UInt64 arg;
|
||||
parseNumber(pattern, last_open, i, arg);
|
||||
|
||||
if (arg >= argument_number)
|
||||
throw Exception(
|
||||
"Argument is too big for formatting. Note that indexing starts from zero", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
*index_positions_ptr = arg;
|
||||
}
|
||||
|
||||
/// Constant string.
|
||||
if (!data[*index_positions_ptr])
|
||||
{
|
||||
/// The next string should be glued to last `A {} C`.format('B') -> `A B C`.
|
||||
glue_to_next = true;
|
||||
substrings.back() += constant_strings[*index_positions_ptr];
|
||||
}
|
||||
else
|
||||
++index_positions_ptr; /// Otherwise we commit arg number and proceed.
|
||||
|
||||
start_pos = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_open_curly)
|
||||
throw Exception("Last open curly brace is not closed", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
String to_add = String(pattern.data() + start_pos, pattern.size() - start_pos);
|
||||
double_brace_removal(to_add);
|
||||
|
||||
if (!glue_to_next)
|
||||
substrings.emplace_back(to_add);
|
||||
else
|
||||
substrings.back() += to_add;
|
||||
}
|
||||
|
||||
/// data for ColumnString and ColumnFixed. Nullptr means no data, it is const string.
|
||||
/// offsets for ColumnString, nullptr is an indicator that there is a fixed string rather than ColumnString.
|
||||
/// fixed_string_N for savings N to fixed strings.
|
||||
/// constant_strings for constant strings. If data[i] is nullptr, than it is constant string.
|
||||
/// constant_strings for constant strings. If data[i] is nullptr, it is constant string.
|
||||
/// res_data is result_data, res_offsets is offset result.
|
||||
/// input_rows_count is the number of rows processed.
|
||||
/// Precondition: data.size() == offsets.size() == fixed_string_N.size() == constant_strings.size().
|
||||
@ -207,29 +48,22 @@ struct FormatImpl
|
||||
const std::vector<const ColumnString::Chars *> & data,
|
||||
const std::vector<const ColumnString::Offsets *> & offsets,
|
||||
[[maybe_unused]] /* Because sometimes !has_column_fixed_string */ const std::vector<size_t> & fixed_string_N,
|
||||
const std::vector<String> & constant_strings,
|
||||
const std::vector<std::optional<String>> & constant_strings,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets,
|
||||
size_t input_rows_count)
|
||||
{
|
||||
const size_t argument_number = offsets.size();
|
||||
|
||||
UInt64 small_index_positions_buffer[small_argument_threshold];
|
||||
/// The subsequent indexes of strings we should use. e.g `Hello world {1} {3} {1} {0}` this array will be filled with [1, 3, 1, 0, ... (garbage)] but without constant string indices.
|
||||
UInt64 * index_positions = small_index_positions_buffer;
|
||||
|
||||
std::unique_ptr<UInt64[]> big_index_positions_buffer;
|
||||
if (argument_number > small_argument_threshold)
|
||||
{
|
||||
big_index_positions_buffer.reset(new UInt64[argument_number]);
|
||||
index_positions = big_index_positions_buffer.get();
|
||||
}
|
||||
/// The subsequent indexes of strings we should use. e.g `Hello world {1} {3} {1} {0}` this
|
||||
/// array will be filled with [1, 3, 1, 0] but without constant string indices.
|
||||
Format::IndexPositions index_positions;
|
||||
|
||||
/// Vector of substrings of pattern that will be copied to the answer, not string view because of escaping and iterators invalidation.
|
||||
/// These are exactly what is between {} tokens, for `Hello {} world {}` we will have [`Hello `, ` world `, ``].
|
||||
std::vector<String> substrings;
|
||||
|
||||
init(pattern, data, argument_number, constant_strings, index_positions, substrings);
|
||||
Format::init(pattern, argument_number, constant_strings, index_positions, substrings);
|
||||
|
||||
UInt64 final_size = 0;
|
||||
|
||||
@ -271,7 +105,7 @@ struct FormatImpl
|
||||
for (size_t j = 1; j < substrings.size(); ++j)
|
||||
{
|
||||
UInt64 arg = index_positions[j - 1];
|
||||
auto offset_ptr = offsets[arg];
|
||||
const auto * offset_ptr = offsets[arg];
|
||||
UInt64 arg_offset = 0;
|
||||
UInt64 size = 0;
|
||||
|
||||
|
155
src/Functions/makeDate.cpp
Normal file
155
src/Functions/makeDate.cpp
Normal file
@ -0,0 +1,155 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDate32.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
|
||||
#include <Common/DateLUT.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
#include <array>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
// A helper function to simplify comparisons of valid YYYY-MM-DD values for <,>,=
|
||||
inline constexpr Int64 YearMonthDayToSingleInt(Int64 year, Int64 month, Int64 day)
|
||||
{
|
||||
return year * 512 + month * 32 + day;
|
||||
}
|
||||
|
||||
// Common implementation for makeDate, makeDate32
|
||||
template <typename Traits>
|
||||
class FunctionMakeDate : public IFunction
|
||||
{
|
||||
private:
|
||||
static constexpr std::array<const char*, 3> argument_names = {"year", "month", "day"};
|
||||
|
||||
public:
|
||||
static constexpr auto name = Traits::name;
|
||||
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionMakeDate>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isVariadic() const override { return false; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return argument_names.size(); }
|
||||
|
||||
bool isInjective(const ColumnsWithTypeAndName &) const override
|
||||
{
|
||||
return false; // {year,month,day} that are out of supported range are converted into a default value
|
||||
}
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
bool useDefaultImplementationForNulls() const override { return true; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (arguments.size() != argument_names.size())
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Function {} requires 3 arguments, but {} given", getName(), arguments.size());
|
||||
|
||||
for (size_t i = 0; i < argument_names.size(); ++i)
|
||||
{
|
||||
DataTypePtr argument_type = arguments[i];
|
||||
if (!isNumber(argument_type))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Argument '{}' for function {} must be number", std::string(argument_names[i]), getName());
|
||||
}
|
||||
|
||||
return std::make_shared<typename Traits::ReturnDataType>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
const DataTypePtr converted_argument_type = std::make_shared<DataTypeFloat32>();
|
||||
Columns converted_arguments;
|
||||
converted_arguments.reserve(arguments.size());
|
||||
for (const auto & argument : arguments)
|
||||
{
|
||||
ColumnPtr argument_column = castColumn(argument, converted_argument_type);
|
||||
argument_column = argument_column->convertToFullColumnIfConst();
|
||||
converted_arguments.push_back(argument_column);
|
||||
}
|
||||
|
||||
auto res_column = Traits::ReturnColumnType::create(input_rows_count);
|
||||
auto & result_data = res_column->getData();
|
||||
|
||||
const auto & year_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[0]).getData();
|
||||
const auto & month_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[1]).getData();
|
||||
const auto & day_data = typeid_cast<const ColumnFloat32 &>(*converted_arguments[2]).getData();
|
||||
|
||||
const auto & date_lut = DateLUT::instance();
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
const auto year = year_data[i];
|
||||
const auto month = month_data[i];
|
||||
const auto day = day_data[i];
|
||||
|
||||
Int32 day_num = 0;
|
||||
|
||||
if (year >= Traits::MIN_YEAR &&
|
||||
year <= Traits::MAX_YEAR &&
|
||||
month >= 1 && month <= 12 &&
|
||||
day >= 1 && day <= 31 &&
|
||||
YearMonthDayToSingleInt(year, month, day) <= Traits::MAX_DATE)
|
||||
{
|
||||
day_num = date_lut.makeDayNum(year, month, day);
|
||||
}
|
||||
|
||||
result_data[i] = day_num;
|
||||
}
|
||||
|
||||
return res_column;
|
||||
}
|
||||
};
|
||||
|
||||
// makeDate(year, month, day)
|
||||
struct MakeDateTraits
|
||||
{
|
||||
static constexpr auto name = "makeDate";
|
||||
using ReturnDataType = DataTypeDate;
|
||||
using ReturnColumnType = ColumnUInt16;
|
||||
|
||||
static constexpr auto MIN_YEAR = 1970;
|
||||
static constexpr auto MAX_YEAR = 2149;
|
||||
// This date has the maximum day number that fits in 16-bit uint
|
||||
static constexpr auto MAX_DATE = YearMonthDayToSingleInt(MAX_YEAR, 6, 6);
|
||||
};
|
||||
|
||||
// makeDate32(year, month, day)
|
||||
struct MakeDate32Traits
|
||||
{
|
||||
static constexpr auto name = "makeDate32";
|
||||
using ReturnDataType = DataTypeDate32;
|
||||
using ReturnColumnType = ColumnInt32;
|
||||
|
||||
static constexpr auto MIN_YEAR = 1925;
|
||||
static constexpr auto MAX_YEAR = 2283;
|
||||
static constexpr auto MAX_DATE = YearMonthDayToSingleInt(MAX_YEAR, 11, 11);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void registerFunctionsMakeDate(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionMakeDate<MakeDateTraits>>();
|
||||
factory.registerFunction<FunctionMakeDate<MakeDate32Traits>>();
|
||||
}
|
||||
|
||||
}
|
@ -8,6 +8,7 @@ namespace DB
|
||||
void registerFunctionsArithmetic(FunctionFactory &);
|
||||
void registerFunctionsArray(FunctionFactory &);
|
||||
void registerFunctionsTuple(FunctionFactory &);
|
||||
void registerFunctionsMakeDate(FunctionFactory &);
|
||||
void registerFunctionsMap(FunctionFactory &);
|
||||
void registerFunctionsBitmap(FunctionFactory &);
|
||||
void registerFunctionsBinaryRepr(FunctionFactory &);
|
||||
@ -73,6 +74,7 @@ void registerFunctions()
|
||||
registerFunctionsArithmetic(factory);
|
||||
registerFunctionsArray(factory);
|
||||
registerFunctionsTuple(factory);
|
||||
registerFunctionsMakeDate(factory);
|
||||
registerFunctionsMap(factory);
|
||||
registerFunctionsBitmap(factory);
|
||||
registerFunctionsBinaryRepr(factory);
|
||||
|
@ -80,6 +80,7 @@ void registerFunctionInitialQueryID(FunctionFactory & factory);
|
||||
void registerFunctionServerUUID(FunctionFactory &);
|
||||
void registerFunctionZooKeeperSessionUptime(FunctionFactory &);
|
||||
void registerFunctionGetOSKernelVersion(FunctionFactory &);
|
||||
void registerFunctionFlattenTuple(FunctionFactory &);
|
||||
|
||||
#if USE_ICU
|
||||
void registerFunctionConvertCharset(FunctionFactory &);
|
||||
@ -166,6 +167,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
|
||||
registerFunctionServerUUID(factory);
|
||||
registerFunctionZooKeeperSessionUptime(factory);
|
||||
registerFunctionGetOSKernelVersion(factory);
|
||||
registerFunctionFlattenTuple(factory);
|
||||
|
||||
#if USE_ICU
|
||||
registerFunctionConvertCharset(factory);
|
||||
|
@ -13,18 +13,11 @@ namespace ErrorCodes
|
||||
|
||||
}
|
||||
|
||||
ParallelReadBuffer::ParallelReadBuffer(
|
||||
std::unique_ptr<ReadBufferFactory> reader_factory_,
|
||||
ThreadPool * pool_,
|
||||
size_t max_working_readers_,
|
||||
WorkerSetup worker_setup_,
|
||||
WorkerCleanup worker_cleanup_)
|
||||
ParallelReadBuffer::ParallelReadBuffer(std::unique_ptr<ReadBufferFactory> reader_factory_, CallbackRunner schedule_, size_t max_working_readers_)
|
||||
: SeekableReadBufferWithSize(nullptr, 0)
|
||||
, pool(pool_)
|
||||
, max_working_readers(max_working_readers_)
|
||||
, schedule(std::move(schedule_))
|
||||
, reader_factory(std::move(reader_factory_))
|
||||
, worker_setup(std::move(worker_setup_))
|
||||
, worker_cleanup(std::move(worker_cleanup_))
|
||||
{
|
||||
std::unique_lock<std::mutex> lock{mutex};
|
||||
addReaders(lock);
|
||||
@ -40,30 +33,8 @@ bool ParallelReadBuffer::addReaderToPool(std::unique_lock<std::mutex> & /*buffer
|
||||
|
||||
auto worker = read_workers.emplace_back(std::make_shared<ReadWorker>(std::move(reader)));
|
||||
|
||||
pool->scheduleOrThrow(
|
||||
[&, this, worker = std::move(worker)]() mutable
|
||||
{
|
||||
ThreadStatus thread_status;
|
||||
schedule([this, worker = std::move(worker)]() mutable { readerThreadFunction(std::move(worker)); });
|
||||
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
++active_working_reader;
|
||||
}
|
||||
|
||||
SCOPE_EXIT({
|
||||
worker_cleanup(thread_status);
|
||||
|
||||
std::lock_guard lock{mutex};
|
||||
--active_working_reader;
|
||||
if (active_working_reader == 0)
|
||||
{
|
||||
readers_done.notify_all();
|
||||
}
|
||||
});
|
||||
worker_setup(thread_status);
|
||||
|
||||
readerThreadFunction(std::move(worker));
|
||||
});
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -232,12 +203,27 @@ bool ParallelReadBuffer::nextImpl()
|
||||
|
||||
void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
|
||||
{
|
||||
{
|
||||
std::lock_guard lock{mutex};
|
||||
++active_working_reader;
|
||||
}
|
||||
|
||||
SCOPE_EXIT({
|
||||
std::lock_guard lock{mutex};
|
||||
--active_working_reader;
|
||||
if (active_working_reader == 0)
|
||||
{
|
||||
readers_done.notify_all();
|
||||
}
|
||||
});
|
||||
|
||||
try
|
||||
{
|
||||
while (!emergency_stop && !read_worker->cancel)
|
||||
{
|
||||
if (!read_worker->reader->next())
|
||||
throw Exception("Failed to read all the data from the reader", ErrorCodes::LOGICAL_ERROR);
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR, "Failed to read all the data from the reader, missing {} bytes", read_worker->bytes_left);
|
||||
|
||||
if (emergency_stop || read_worker->cancel)
|
||||
break;
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/SeekableReadBuffer.h>
|
||||
#include <Interpreters/threadPoolCallbackRunner.h>
|
||||
#include <Common/ArenaWithFreeLists.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
|
||||
@ -76,14 +77,7 @@ public:
|
||||
virtual std::optional<size_t> getTotalSize() = 0;
|
||||
};
|
||||
|
||||
using WorkerSetup = std::function<void(ThreadStatus &)>;
|
||||
using WorkerCleanup = std::function<void(ThreadStatus &)>;
|
||||
explicit ParallelReadBuffer(
|
||||
std::unique_ptr<ReadBufferFactory> reader_factory_,
|
||||
ThreadPool * pool,
|
||||
size_t max_working_readers,
|
||||
WorkerSetup worker_setup = {},
|
||||
WorkerCleanup worker_cleanup = {});
|
||||
explicit ParallelReadBuffer(std::unique_ptr<ReadBufferFactory> reader_factory_, CallbackRunner schedule_, size_t max_working_readers);
|
||||
|
||||
~ParallelReadBuffer() override { finishAndWait(); }
|
||||
|
||||
@ -140,16 +134,14 @@ private:
|
||||
|
||||
Segment current_segment;
|
||||
|
||||
ThreadPool * pool;
|
||||
size_t max_working_readers;
|
||||
size_t active_working_reader{0};
|
||||
// Triggered when all reader workers are done
|
||||
std::condition_variable readers_done;
|
||||
|
||||
std::unique_ptr<ReadBufferFactory> reader_factory;
|
||||
CallbackRunner schedule;
|
||||
|
||||
WorkerSetup worker_setup;
|
||||
WorkerCleanup worker_cleanup;
|
||||
std::unique_ptr<ReadBufferFactory> reader_factory;
|
||||
|
||||
/**
|
||||
* FIFO queue of readers.
|
||||
|
@ -39,6 +39,10 @@ public:
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~ReadBufferFromFileDescriptor() override
|
||||
{
|
||||
}
|
||||
|
||||
int getFD() const
|
||||
{
|
||||
return fd;
|
||||
@ -80,6 +84,9 @@ public:
|
||||
{
|
||||
use_pread = true;
|
||||
}
|
||||
virtual ~ReadBufferFromFileDescriptorPRead() override
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Common/config.h>
|
||||
#include "IO/S3Common.h"
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
@ -42,6 +43,7 @@ ReadBufferFromS3::ReadBufferFromS3(
|
||||
UInt64 max_single_read_retries_,
|
||||
const ReadSettings & settings_,
|
||||
bool use_external_buffer_,
|
||||
size_t offset_,
|
||||
size_t read_until_position_,
|
||||
bool restricted_seek_)
|
||||
: SeekableReadBufferWithSize(nullptr, 0)
|
||||
@ -49,9 +51,10 @@ ReadBufferFromS3::ReadBufferFromS3(
|
||||
, bucket(bucket_)
|
||||
, key(key_)
|
||||
, max_single_read_retries(max_single_read_retries_)
|
||||
, offset(offset_)
|
||||
, read_until_position(read_until_position_)
|
||||
, read_settings(settings_)
|
||||
, use_external_buffer(use_external_buffer_)
|
||||
, read_until_position(read_until_position_)
|
||||
, restricted_seek(restricted_seek_)
|
||||
{
|
||||
}
|
||||
@ -210,13 +213,14 @@ std::optional<size_t> ReadBufferFromS3::getTotalSize()
|
||||
if (file_size)
|
||||
return file_size;
|
||||
|
||||
Aws::S3::Model::HeadObjectRequest request;
|
||||
request.SetBucket(bucket);
|
||||
request.SetKey(key);
|
||||
auto object_size = S3::getObjectSize(client_ptr, bucket, key, false);
|
||||
|
||||
auto outcome = client_ptr->HeadObject(request);
|
||||
auto head_result = outcome.GetResultWithOwnership();
|
||||
file_size = head_result.GetContentLength();
|
||||
if (!object_size)
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
file_size = object_size;
|
||||
return file_size;
|
||||
}
|
||||
|
||||
@ -234,6 +238,11 @@ void ReadBufferFromS3::setReadUntilPosition(size_t position)
|
||||
}
|
||||
}
|
||||
|
||||
SeekableReadBuffer::Range ReadBufferFromS3::getRemainingReadRange() const
|
||||
{
|
||||
return Range{.left = static_cast<size_t>(offset), .right = read_until_position ? std::optional{read_until_position - 1} : std::nullopt};
|
||||
}
|
||||
|
||||
std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
|
||||
{
|
||||
Aws::S3::Model::GetObjectRequest req;
|
||||
@ -272,6 +281,36 @@ std::unique_ptr<ReadBuffer> ReadBufferFromS3::initialize()
|
||||
throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||
}
|
||||
|
||||
SeekableReadBufferPtr ReadBufferS3Factory::getReader()
|
||||
{
|
||||
const auto next_range = range_generator.nextRange();
|
||||
if (!next_range)
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto reader = std::make_shared<ReadBufferFromS3>(
|
||||
client_ptr,
|
||||
bucket,
|
||||
key,
|
||||
s3_max_single_read_retries,
|
||||
read_settings,
|
||||
false /*use_external_buffer*/,
|
||||
next_range->first,
|
||||
next_range->second);
|
||||
return reader;
|
||||
}
|
||||
|
||||
off_t ReadBufferS3Factory::seek(off_t off, [[maybe_unused]] int whence)
|
||||
{
|
||||
range_generator = RangeGenerator{object_size, range_step, static_cast<size_t>(off)};
|
||||
return off;
|
||||
}
|
||||
|
||||
std::optional<size_t> ReadBufferS3Factory::getTotalSize()
|
||||
{
|
||||
return object_size;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/RangeGenerator.h>
|
||||
#include <Common/config.h>
|
||||
|
||||
#if USE_AWS_S3
|
||||
@ -7,6 +8,7 @@
|
||||
#include <memory>
|
||||
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <IO/ParallelReadBuffer.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <IO/SeekableReadBuffer.h>
|
||||
@ -30,7 +32,9 @@ private:
|
||||
String bucket;
|
||||
String key;
|
||||
UInt64 max_single_read_retries;
|
||||
|
||||
off_t offset = 0;
|
||||
off_t read_until_position = 0;
|
||||
|
||||
Aws::S3::Model::GetObjectResult read_result;
|
||||
std::unique_ptr<ReadBuffer> impl;
|
||||
@ -45,6 +49,7 @@ public:
|
||||
UInt64 max_single_read_retries_,
|
||||
const ReadSettings & settings_,
|
||||
bool use_external_buffer = false,
|
||||
size_t offset_ = 0,
|
||||
size_t read_until_position_ = 0,
|
||||
bool restricted_seek_ = false);
|
||||
|
||||
@ -58,7 +63,7 @@ public:
|
||||
|
||||
void setReadUntilPosition(size_t position) override;
|
||||
|
||||
Range getRemainingReadRange() const override { return Range{ .left = static_cast<size_t>(offset), .right = read_until_position }; }
|
||||
Range getRemainingReadRange() const override;
|
||||
|
||||
size_t getFileOffsetOfBufferEnd() const override { return offset; }
|
||||
|
||||
@ -69,13 +74,55 @@ private:
|
||||
|
||||
bool use_external_buffer;
|
||||
|
||||
off_t read_until_position = 0;
|
||||
|
||||
/// There is different seek policy for disk seek and for non-disk seek
|
||||
/// (non-disk seek is applied for seekable input formats: orc, arrow, parquet).
|
||||
bool restricted_seek;
|
||||
};
|
||||
|
||||
/// Creates separate ReadBufferFromS3 for sequence of ranges of particular object
|
||||
class ReadBufferS3Factory : public ParallelReadBuffer::ReadBufferFactory
|
||||
{
|
||||
public:
|
||||
explicit ReadBufferS3Factory(
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr_,
|
||||
const String & bucket_,
|
||||
const String & key_,
|
||||
size_t range_step_,
|
||||
size_t object_size_,
|
||||
UInt64 s3_max_single_read_retries_,
|
||||
const ReadSettings & read_settings_)
|
||||
: client_ptr(client_ptr_)
|
||||
, bucket(bucket_)
|
||||
, key(key_)
|
||||
, read_settings(read_settings_)
|
||||
, range_generator(object_size_, range_step_)
|
||||
, range_step(range_step_)
|
||||
, object_size(object_size_)
|
||||
, s3_max_single_read_retries(s3_max_single_read_retries_)
|
||||
{
|
||||
assert(range_step > 0);
|
||||
assert(range_step < object_size);
|
||||
}
|
||||
|
||||
SeekableReadBufferPtr getReader() override;
|
||||
|
||||
off_t seek(off_t off, [[maybe_unused]] int whence) override;
|
||||
|
||||
std::optional<size_t> getTotalSize() override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<Aws::S3::S3Client> client_ptr;
|
||||
const String bucket;
|
||||
const String key;
|
||||
ReadSettings read_settings;
|
||||
|
||||
RangeGenerator range_generator;
|
||||
size_t range_step;
|
||||
size_t object_size;
|
||||
|
||||
UInt64 s3_max_single_read_retries;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -15,4 +15,11 @@ public:
|
||||
|
||||
explicit ReadBufferFromString(std::string_view s) : ReadBufferFromMemory(s.data(), s.size()) {}
|
||||
};
|
||||
|
||||
class ReadBufferFromOwnString : public String, public ReadBufferFromString
|
||||
{
|
||||
public:
|
||||
explicit ReadBufferFromOwnString(const String & s_): String(s_), ReadBufferFromString(*this) {}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
#include <Common/RangeGenerator.h>
|
||||
#include <IO/ConnectionTimeouts.h>
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <IO/ParallelReadBuffer.h>
|
||||
@ -635,43 +636,6 @@ public:
|
||||
void buildNewSession(const Poco::URI & uri) override { session = makeHTTPSession(uri, timeouts); }
|
||||
};
|
||||
|
||||
class RangeGenerator
|
||||
{
|
||||
public:
|
||||
explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
|
||||
: from(range_start), range_step(range_step_), total_size(total_size_)
|
||||
{
|
||||
}
|
||||
|
||||
size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
|
||||
|
||||
using Range = std::pair<size_t, size_t>;
|
||||
|
||||
// return upper exclusive range of values, i.e. [from_range, to_range>
|
||||
std::optional<Range> nextRange()
|
||||
{
|
||||
if (from >= total_size)
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
auto to = from + range_step;
|
||||
if (to >= total_size)
|
||||
{
|
||||
to = total_size;
|
||||
}
|
||||
|
||||
Range range{from, to};
|
||||
from = to;
|
||||
return std::move(range);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t from;
|
||||
size_t range_step;
|
||||
size_t total_size;
|
||||
};
|
||||
|
||||
class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>
|
||||
{
|
||||
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>;
|
||||
|
@ -24,6 +24,7 @@
|
||||
# include <aws/core/utils/UUID.h>
|
||||
# include <aws/core/http/HttpClientFactory.h>
|
||||
# include <aws/s3/S3Client.h>
|
||||
# include <aws/s3/model/HeadObjectRequest.h> // Y_IGNORE
|
||||
|
||||
# include <IO/S3/PocoHTTPClientFactory.h>
|
||||
# include <IO/S3/PocoHTTPClient.h>
|
||||
@ -682,6 +683,7 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int S3_ERROR;
|
||||
}
|
||||
|
||||
namespace S3
|
||||
@ -839,6 +841,26 @@ namespace S3
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}",
|
||||
quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : "");
|
||||
}
|
||||
|
||||
size_t getObjectSize(std::shared_ptr<Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, bool throw_on_error)
|
||||
{
|
||||
Aws::S3::Model::HeadObjectRequest req;
|
||||
req.SetBucket(bucket);
|
||||
req.SetKey(key);
|
||||
|
||||
Aws::S3::Model::HeadObjectOutcome outcome = client_ptr->HeadObject(req);
|
||||
|
||||
if (outcome.IsSuccess())
|
||||
{
|
||||
auto read_result = outcome.GetResultWithOwnership();
|
||||
return static_cast<size_t>(read_result.GetContentLength());
|
||||
}
|
||||
else if (throw_on_error)
|
||||
{
|
||||
throw DB::Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -75,6 +75,8 @@ struct URI
|
||||
static void validateBucket(const String & bucket, const Poco::URI & uri);
|
||||
};
|
||||
|
||||
size_t getObjectSize(std::shared_ptr<Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, bool throw_on_error = true);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -2,18 +2,15 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <base/types.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <base/types.h>
|
||||
|
||||
|
||||
int main(int, char **)
|
||||
int readAndPrint(DB::ReadBuffer & in)
|
||||
{
|
||||
try
|
||||
{
|
||||
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
|
||||
DB::ReadBufferFromString in(s);
|
||||
|
||||
DB::Int64 a;
|
||||
DB::Float64 b;
|
||||
DB::String c, d;
|
||||
@ -31,12 +28,32 @@ int main(int, char **)
|
||||
|
||||
std::cout << a << ' ' << b << ' ' << c << '\t' << '\'' << d << '\'' << std::endl;
|
||||
std::cout << in.count() << std::endl;
|
||||
return 0;
|
||||
}
|
||||
catch (const DB::Exception & e)
|
||||
{
|
||||
std::cerr << e.what() << ", " << e.displayText() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
int main(int, char **)
|
||||
{
|
||||
{
|
||||
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
|
||||
DB::ReadBufferFromString in(s);
|
||||
if (readAndPrint(in))
|
||||
std::cout << "readAndPrint from ReadBufferFromString failed" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<DB::ReadBufferFromOwnString> in;
|
||||
{
|
||||
std::string s = "-123456 123.456 вася пе\\tтя\t'\\'xyz\\\\'";
|
||||
in = std::make_shared<DB::ReadBufferFromOwnString>(s);
|
||||
}
|
||||
if (readAndPrint(*in))
|
||||
std::cout << "readAndPrint from ReadBufferFromOwnString failed" << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,4 +1,6 @@
|
||||
#include <algorithm>
|
||||
#include <future>
|
||||
#include <numeric>
|
||||
#include <Poco/Util/Application.h>
|
||||
|
||||
#include <base/sort.h>
|
||||
@ -15,6 +17,7 @@
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <Compression/CompressedWriteBuffer.h>
|
||||
#include <Interpreters/Aggregator.h>
|
||||
#include <Common/LRUCache.h>
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
@ -27,12 +30,236 @@
|
||||
#include <Interpreters/JIT/CompiledExpressionCache.h>
|
||||
#include <Core/ProtocolDefines.h>
|
||||
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event ExternalAggregationWritePart;
|
||||
extern const Event ExternalAggregationCompressedBytes;
|
||||
extern const Event ExternalAggregationUncompressedBytes;
|
||||
extern const Event ExternalAggregationWritePart;
|
||||
extern const Event ExternalAggregationCompressedBytes;
|
||||
extern const Event ExternalAggregationUncompressedBytes;
|
||||
extern const Event AggregationPreallocatedElementsInHashTables;
|
||||
extern const Event AggregationHashTablesInitializedAsTwoLevel;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
/** Collects observed HashMap-s sizes to avoid redundant intermediate resizes.
|
||||
*/
|
||||
class HashTablesStatistics
|
||||
{
|
||||
public:
|
||||
struct Entry
|
||||
{
|
||||
size_t sum_of_sizes; // used to determine if it's better to convert aggregation to two-level from the beginning
|
||||
size_t median_size; // roughly the size we're going to preallocate on each thread
|
||||
};
|
||||
|
||||
using Cache = DB::LRUCache<UInt64, Entry>;
|
||||
using CachePtr = std::shared_ptr<Cache>;
|
||||
using Params = DB::Aggregator::Params::StatsCollectingParams;
|
||||
|
||||
/// Collection and use of the statistics should be enabled.
|
||||
std::optional<Entry> getSizeHint(const Params & params)
|
||||
{
|
||||
if (!params.isCollectionAndUseEnabled())
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled.");
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
const auto cache = getHashTableStatsCache(params, lock);
|
||||
if (const auto hint = cache->get(params.key))
|
||||
{
|
||||
LOG_DEBUG(
|
||||
&Poco::Logger::get("Aggregator"),
|
||||
"An entry for key={} found in cache: sum_of_sizes={}, median_size={}",
|
||||
params.key,
|
||||
hint->sum_of_sizes,
|
||||
hint->median_size);
|
||||
return *hint;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/// Collection and use of the statistics should be enabled.
|
||||
void update(size_t sum_of_sizes, size_t median_size, const Params & params)
|
||||
{
|
||||
if (!params.isCollectionAndUseEnabled())
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled.");
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
const auto cache = getHashTableStatsCache(params, lock);
|
||||
const auto hint = cache->get(params.key);
|
||||
// We'll maintain the maximum among all the observed values until the next prediction turns out to be too wrong.
|
||||
if (!hint || sum_of_sizes < hint->sum_of_sizes / 2 || hint->sum_of_sizes < sum_of_sizes || median_size < hint->median_size / 2
|
||||
|| hint->median_size < median_size)
|
||||
{
|
||||
LOG_DEBUG(
|
||||
&Poco::Logger::get("Aggregator"),
|
||||
"Statistics updated for key={}: new sum_of_sizes={}, median_size={}",
|
||||
params.key,
|
||||
sum_of_sizes,
|
||||
median_size);
|
||||
cache->set(params.key, std::make_shared<Entry>(Entry{.sum_of_sizes = sum_of_sizes, .median_size = median_size}));
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<DB::HashTablesCacheStatistics> getCacheStats() const
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
if (hash_table_stats)
|
||||
{
|
||||
size_t hits = 0, misses = 0;
|
||||
hash_table_stats->getStats(hits, misses);
|
||||
return DB::HashTablesCacheStatistics{.entries = hash_table_stats->count(), .hits = hits, .misses = misses};
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static size_t calculateCacheKey(const DB::ASTPtr & select_query)
|
||||
{
|
||||
if (!select_query)
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Query ptr cannot be null");
|
||||
|
||||
const auto & select = select_query->as<DB::ASTSelectQuery &>();
|
||||
|
||||
// It may happen in some corner cases like `select 1 as num group by num`.
|
||||
if (!select.tables())
|
||||
return 0;
|
||||
|
||||
SipHash hash;
|
||||
hash.update(select.tables()->getTreeHash());
|
||||
if (const auto where = select.where())
|
||||
hash.update(where->getTreeHash());
|
||||
if (const auto group_by = select.groupBy())
|
||||
hash.update(group_by->getTreeHash());
|
||||
return hash.get64();
|
||||
}
|
||||
|
||||
private:
|
||||
CachePtr getHashTableStatsCache(const Params & params, const std::lock_guard<std::mutex> &)
|
||||
{
|
||||
if (!hash_table_stats || hash_table_stats->maxSize() != params.max_entries_for_hash_table_stats)
|
||||
hash_table_stats = std::make_shared<Cache>(params.max_entries_for_hash_table_stats);
|
||||
return hash_table_stats;
|
||||
}
|
||||
|
||||
mutable std::mutex mutex;
|
||||
CachePtr hash_table_stats;
|
||||
};
|
||||
|
||||
HashTablesStatistics & getHashTablesStatistics()
|
||||
{
|
||||
static HashTablesStatistics hash_tables_stats;
|
||||
return hash_tables_stats;
|
||||
}
|
||||
|
||||
bool worthConvertToTwoLevel(
|
||||
size_t group_by_two_level_threshold, size_t result_size, size_t group_by_two_level_threshold_bytes, auto result_size_bytes)
|
||||
{
|
||||
// params.group_by_two_level_threshold will be equal to 0 if we have only one thread to execute aggregation (refer to AggregatingStep::transformPipeline).
|
||||
return (group_by_two_level_threshold && result_size >= group_by_two_level_threshold)
|
||||
|| (group_by_two_level_threshold_bytes && result_size_bytes >= static_cast<Int64>(group_by_two_level_threshold_bytes));
|
||||
}
|
||||
|
||||
DB::AggregatedDataVariants::Type convertToTwoLevelTypeIfPossible(DB::AggregatedDataVariants::Type type)
|
||||
{
|
||||
using Type = DB::AggregatedDataVariants::Type;
|
||||
switch (type)
|
||||
{
|
||||
#define M(NAME) \
|
||||
case Type::NAME: \
|
||||
return Type::NAME##_two_level;
|
||||
APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M)
|
||||
#undef M
|
||||
default:
|
||||
return type;
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
void initDataVariantsWithSizeHint(
|
||||
DB::AggregatedDataVariants & result, DB::AggregatedDataVariants::Type method_chosen, const DB::Aggregator::Params & params)
|
||||
{
|
||||
const auto & stats_collecting_params = params.stats_collecting_params;
|
||||
if (stats_collecting_params.isCollectionAndUseEnabled())
|
||||
{
|
||||
if (auto hint = getHashTablesStatistics().getSizeHint(stats_collecting_params))
|
||||
{
|
||||
const auto max_threads = params.group_by_two_level_threshold != 0 ? std::max(params.max_threads, 1ul) : 1;
|
||||
const auto lower_limit = hint->sum_of_sizes / max_threads;
|
||||
const auto upper_limit = stats_collecting_params.max_size_to_preallocate_for_aggregation / max_threads;
|
||||
const auto adjusted = std::min(std::max(lower_limit, hint->median_size), upper_limit);
|
||||
if (worthConvertToTwoLevel(
|
||||
params.group_by_two_level_threshold,
|
||||
hint->sum_of_sizes,
|
||||
/*group_by_two_level_threshold_bytes*/ 0,
|
||||
/*result_size_bytes*/ 0))
|
||||
method_chosen = convertToTwoLevelTypeIfPossible(method_chosen);
|
||||
result.init(method_chosen, adjusted);
|
||||
ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel());
|
||||
return;
|
||||
}
|
||||
}
|
||||
result.init(method_chosen);
|
||||
}
|
||||
|
||||
/// Collection and use of the statistics should be enabled.
|
||||
void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, const DB::Aggregator::Params::StatsCollectingParams & params)
|
||||
{
|
||||
if (!params.isCollectionAndUseEnabled())
|
||||
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled.");
|
||||
|
||||
std::vector<size_t> sizes(data_variants.size());
|
||||
for (size_t i = 0; i < data_variants.size(); ++i)
|
||||
sizes[i] = data_variants[i]->size();
|
||||
const auto median_size = sizes.begin() + sizes.size() / 2; // not precisely though...
|
||||
std::nth_element(sizes.begin(), median_size, sizes.end());
|
||||
const auto sum_of_sizes = std::accumulate(sizes.begin(), sizes.end(), 0ull);
|
||||
getHashTablesStatistics().update(sum_of_sizes, *median_size, params);
|
||||
}
|
||||
|
||||
// The std::is_constructible trait isn't suitable here because some classes have template constructors with semantics different from providing size hints.
|
||||
// Also string hash table variants are not supported due to the fact that both local perf tests and tests in CI showed slowdowns for them.
|
||||
template <typename...>
|
||||
struct HasConstructorOfNumberOfElements : std::false_type
|
||||
{
|
||||
};
|
||||
|
||||
template <typename... Ts>
|
||||
struct HasConstructorOfNumberOfElements<HashMapTable<Ts...>> : std::true_type
|
||||
{
|
||||
};
|
||||
|
||||
template <typename Key, typename Cell, typename Hash, typename Grower, typename Allocator, template <typename...> typename ImplTable>
|
||||
struct HasConstructorOfNumberOfElements<TwoLevelHashMapTable<Key, Cell, Hash, Grower, Allocator, ImplTable>> : std::true_type
|
||||
{
|
||||
};
|
||||
|
||||
template <typename... Ts>
|
||||
struct HasConstructorOfNumberOfElements<HashTable<Ts...>> : std::true_type
|
||||
{
|
||||
};
|
||||
|
||||
template <typename... Ts>
|
||||
struct HasConstructorOfNumberOfElements<TwoLevelHashTable<Ts...>> : std::true_type
|
||||
{
|
||||
};
|
||||
|
||||
template <template <typename> typename Method, typename Base>
|
||||
struct HasConstructorOfNumberOfElements<Method<Base>> : HasConstructorOfNumberOfElements<Base>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename Method>
|
||||
auto constructWithReserveIfPossible(size_t size_hint)
|
||||
{
|
||||
if constexpr (HasConstructorOfNumberOfElements<typename Method::Data>::value)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::AggregationPreallocatedElementsInHashTables, size_hint);
|
||||
return std::make_unique<Method>(size_hint);
|
||||
}
|
||||
else
|
||||
return std::make_unique<Method>();
|
||||
}
|
||||
}
|
||||
|
||||
namespace DB
|
||||
@ -64,6 +291,10 @@ AggregatedDataVariants::~AggregatedDataVariants()
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<HashTablesCacheStatistics> getHashTablesCacheStatistics()
|
||||
{
|
||||
return getHashTablesStatistics().getCacheStats();
|
||||
}
|
||||
|
||||
void AggregatedDataVariants::convertToTwoLevel()
|
||||
{
|
||||
@ -88,6 +319,47 @@ void AggregatedDataVariants::convertToTwoLevel()
|
||||
}
|
||||
}
|
||||
|
||||
void AggregatedDataVariants::init(Type type_, std::optional<size_t> size_hint)
|
||||
{
|
||||
switch (type_)
|
||||
{
|
||||
case Type::EMPTY:
|
||||
break;
|
||||
case Type::without_key:
|
||||
break;
|
||||
|
||||
#define M(NAME, IS_TWO_LEVEL) \
|
||||
case Type::NAME: \
|
||||
if (size_hint) \
|
||||
(NAME) = constructWithReserveIfPossible<decltype(NAME)::element_type>(*size_hint); \
|
||||
else \
|
||||
(NAME) = std::make_unique<decltype(NAME)::element_type>(); \
|
||||
break;
|
||||
APPLY_FOR_AGGREGATED_VARIANTS(M)
|
||||
#undef M
|
||||
}
|
||||
|
||||
type = type_;
|
||||
}
|
||||
|
||||
Aggregator::Params::StatsCollectingParams::StatsCollectingParams() = default;
|
||||
|
||||
Aggregator::Params::StatsCollectingParams::StatsCollectingParams(
|
||||
const ASTPtr & select_query_,
|
||||
bool collect_hash_table_stats_during_aggregation_,
|
||||
size_t max_entries_for_hash_table_stats_,
|
||||
size_t max_size_to_preallocate_for_aggregation_)
|
||||
: key(collect_hash_table_stats_during_aggregation_ ? HashTablesStatistics::calculateCacheKey(select_query_) : 0)
|
||||
, max_entries_for_hash_table_stats(max_entries_for_hash_table_stats_)
|
||||
, max_size_to_preallocate_for_aggregation(max_size_to_preallocate_for_aggregation_)
|
||||
{
|
||||
}
|
||||
|
||||
bool Aggregator::Params::StatsCollectingParams::isCollectionAndUseEnabled() const
|
||||
{
|
||||
return key != 0;
|
||||
}
|
||||
|
||||
Block Aggregator::getHeader(bool final) const
|
||||
{
|
||||
return params.getHeader(final);
|
||||
@ -237,8 +509,7 @@ public:
|
||||
|
||||
#endif
|
||||
|
||||
Aggregator::Aggregator(const Params & params_)
|
||||
: params(params_)
|
||||
Aggregator::Aggregator(const Params & params_) : params(params_)
|
||||
{
|
||||
/// Use query-level memory tracker
|
||||
if (auto * memory_tracker_child = CurrentThread::getMemoryTracker())
|
||||
@ -292,7 +563,6 @@ Aggregator::Aggregator(const Params & params_)
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
compileAggregateFunctionsIfNeeded();
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
@ -958,7 +1228,7 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData
|
||||
/// How to perform the aggregation?
|
||||
if (result.empty())
|
||||
{
|
||||
result.init(method_chosen);
|
||||
initDataVariantsWithSizeHint(result, method_chosen, params);
|
||||
result.keys_size = params.keys_size;
|
||||
result.key_sizes = key_sizes;
|
||||
LOG_TRACE(log, "Aggregation method: {}", result.getMethodName());
|
||||
@ -1038,9 +1308,8 @@ bool Aggregator::executeOnBlock(Columns columns, UInt64 num_rows, AggregatedData
|
||||
/// Here all the results in the sum are taken into account, from different threads.
|
||||
auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation;
|
||||
|
||||
bool worth_convert_to_two_level
|
||||
= (params.group_by_two_level_threshold && result_size >= params.group_by_two_level_threshold)
|
||||
|| (params.group_by_two_level_threshold_bytes && result_size_bytes >= static_cast<Int64>(params.group_by_two_level_threshold_bytes));
|
||||
bool worth_convert_to_two_level = worthConvertToTwoLevel(
|
||||
params.group_by_two_level_threshold, result_size, params.group_by_two_level_threshold_bytes, result_size_bytes);
|
||||
|
||||
/** Converting to a two-level data structure.
|
||||
* It allows you to make, in the subsequent, an effective merge - either economical from memory or parallel.
|
||||
@ -1327,10 +1596,7 @@ void Aggregator::convertToBlockImpl(
|
||||
|
||||
|
||||
template <typename Mapped>
|
||||
inline void Aggregator::insertAggregatesIntoColumns(
|
||||
Mapped & mapped,
|
||||
MutableColumns & final_aggregate_columns,
|
||||
Arena * arena) const
|
||||
inline void Aggregator::insertAggregatesIntoColumns(Mapped & mapped, MutableColumns & final_aggregate_columns, Arena * arena) const
|
||||
{
|
||||
/** Final values of aggregate functions are inserted to columns.
|
||||
* Then states of aggregate functions, that are not longer needed, are destroyed.
|
||||
@ -2179,6 +2445,9 @@ ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedData
|
||||
|
||||
LOG_TRACE(log, "Merging aggregated data");
|
||||
|
||||
if (params.stats_collecting_params.isCollectionAndUseEnabled())
|
||||
updateStatistics(data_variants, params.stats_collecting_params);
|
||||
|
||||
ManyAggregatedDataVariants non_empty_data;
|
||||
non_empty_data.reserve(data_variants.size());
|
||||
for (auto & data : data_variants)
|
||||
@ -2388,9 +2657,8 @@ bool Aggregator::mergeOnBlock(Block block, AggregatedDataVariants & result, bool
|
||||
/// Here all the results in the sum are taken into account, from different threads.
|
||||
auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation;
|
||||
|
||||
bool worth_convert_to_two_level
|
||||
= (params.group_by_two_level_threshold && result_size >= params.group_by_two_level_threshold)
|
||||
|| (params.group_by_two_level_threshold_bytes && result_size_bytes >= static_cast<Int64>(params.group_by_two_level_threshold_bytes));
|
||||
bool worth_convert_to_two_level = worthConvertToTwoLevel(
|
||||
params.group_by_two_level_threshold, result_size, params.group_by_two_level_threshold_bytes, result_size_bytes);
|
||||
|
||||
/** Converting to a two-level data structure.
|
||||
* It allows you to make, in the subsequent, an effective merge - either economical from memory or parallel.
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -129,6 +130,7 @@ private:
|
||||
template <typename Base>
|
||||
struct AggregationDataWithNullKeyTwoLevel : public Base
|
||||
{
|
||||
using Base::Base;
|
||||
using Base::impls;
|
||||
|
||||
AggregationDataWithNullKeyTwoLevel() = default;
|
||||
@ -183,6 +185,8 @@ struct AggregationMethodOneNumber
|
||||
|
||||
AggregationMethodOneNumber() = default;
|
||||
|
||||
explicit AggregationMethodOneNumber(size_t size_hint) : data(size_hint) { }
|
||||
|
||||
template <typename Other>
|
||||
explicit AggregationMethodOneNumber(const Other & other) : data(other.data)
|
||||
{
|
||||
@ -225,6 +229,8 @@ struct AggregationMethodString
|
||||
{
|
||||
}
|
||||
|
||||
explicit AggregationMethodString(size_t size_hint) : data(size_hint) { }
|
||||
|
||||
using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped>;
|
||||
|
||||
static const bool low_cardinality_optimization = false;
|
||||
@ -250,6 +256,8 @@ struct AggregationMethodStringNoCache
|
||||
|
||||
AggregationMethodStringNoCache() = default;
|
||||
|
||||
explicit AggregationMethodStringNoCache(size_t size_hint) : data(size_hint) { }
|
||||
|
||||
template <typename Other>
|
||||
explicit AggregationMethodStringNoCache(const Other & other) : data(other.data)
|
||||
{
|
||||
@ -280,6 +288,8 @@ struct AggregationMethodFixedString
|
||||
|
||||
AggregationMethodFixedString() = default;
|
||||
|
||||
explicit AggregationMethodFixedString(size_t size_hint) : data(size_hint) { }
|
||||
|
||||
template <typename Other>
|
||||
explicit AggregationMethodFixedString(const Other & other) : data(other.data)
|
||||
{
|
||||
@ -309,6 +319,8 @@ struct AggregationMethodFixedStringNoCache
|
||||
|
||||
AggregationMethodFixedStringNoCache() = default;
|
||||
|
||||
explicit AggregationMethodFixedStringNoCache(size_t size_hint) : data(size_hint) { }
|
||||
|
||||
template <typename Other>
|
||||
explicit AggregationMethodFixedStringNoCache(const Other & other) : data(other.data)
|
||||
{
|
||||
@ -382,6 +394,8 @@ struct AggregationMethodKeysFixed
|
||||
|
||||
AggregationMethodKeysFixed() = default;
|
||||
|
||||
explicit AggregationMethodKeysFixed(size_t size_hint) : data(size_hint) { }
|
||||
|
||||
template <typename Other>
|
||||
explicit AggregationMethodKeysFixed(const Other & other) : data(other.data)
|
||||
{
|
||||
@ -473,6 +487,8 @@ struct AggregationMethodSerialized
|
||||
|
||||
AggregationMethodSerialized() = default;
|
||||
|
||||
explicit AggregationMethodSerialized(size_t size_hint) : data(size_hint) { }
|
||||
|
||||
template <typename Other>
|
||||
explicit AggregationMethodSerialized(const Other & other) : data(other.data)
|
||||
{
|
||||
@ -652,21 +668,7 @@ struct AggregatedDataVariants : private boost::noncopyable
|
||||
|
||||
~AggregatedDataVariants();
|
||||
|
||||
void init(Type type_)
|
||||
{
|
||||
switch (type_)
|
||||
{
|
||||
case Type::EMPTY: break;
|
||||
case Type::without_key: break;
|
||||
|
||||
#define M(NAME, IS_TWO_LEVEL) \
|
||||
case Type::NAME: (NAME) = std::make_unique<decltype(NAME)::element_type>(); break;
|
||||
APPLY_FOR_AGGREGATED_VARIANTS(M)
|
||||
#undef M
|
||||
}
|
||||
|
||||
type = type_;
|
||||
}
|
||||
void init(Type type_, std::optional<size_t> size_hint = std::nullopt);
|
||||
|
||||
/// Number of rows (different keys).
|
||||
size_t size() const
|
||||
@ -929,29 +931,61 @@ public:
|
||||
bool compile_aggregate_expressions;
|
||||
size_t min_count_to_compile_aggregate_expression;
|
||||
|
||||
struct StatsCollectingParams
|
||||
{
|
||||
StatsCollectingParams();
|
||||
|
||||
StatsCollectingParams(
|
||||
const ASTPtr & select_query_,
|
||||
bool collect_hash_table_stats_during_aggregation_,
|
||||
size_t max_entries_for_hash_table_stats_,
|
||||
size_t max_size_to_preallocate_for_aggregation_);
|
||||
|
||||
bool isCollectionAndUseEnabled() const;
|
||||
|
||||
const UInt64 key = 0;
|
||||
const size_t max_entries_for_hash_table_stats = 0;
|
||||
const size_t max_size_to_preallocate_for_aggregation = 0;
|
||||
};
|
||||
StatsCollectingParams stats_collecting_params;
|
||||
|
||||
Params(
|
||||
const Block & src_header_,
|
||||
const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_,
|
||||
bool overflow_row_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_,
|
||||
size_t group_by_two_level_threshold_, size_t group_by_two_level_threshold_bytes_,
|
||||
const ColumnNumbers & keys_,
|
||||
const AggregateDescriptions & aggregates_,
|
||||
bool overflow_row_,
|
||||
size_t max_rows_to_group_by_,
|
||||
OverflowMode group_by_overflow_mode_,
|
||||
size_t group_by_two_level_threshold_,
|
||||
size_t group_by_two_level_threshold_bytes_,
|
||||
size_t max_bytes_before_external_group_by_,
|
||||
bool empty_result_for_aggregation_by_empty_set_,
|
||||
VolumePtr tmp_volume_, size_t max_threads_,
|
||||
VolumePtr tmp_volume_,
|
||||
size_t max_threads_,
|
||||
size_t min_free_disk_space_,
|
||||
bool compile_aggregate_expressions_,
|
||||
size_t min_count_to_compile_aggregate_expression_,
|
||||
const Block & intermediate_header_ = {})
|
||||
: src_header(src_header_),
|
||||
intermediate_header(intermediate_header_),
|
||||
keys(keys_), aggregates(aggregates_), keys_size(keys.size()), aggregates_size(aggregates.size()),
|
||||
overflow_row(overflow_row_), max_rows_to_group_by(max_rows_to_group_by_), group_by_overflow_mode(group_by_overflow_mode_),
|
||||
group_by_two_level_threshold(group_by_two_level_threshold_), group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_),
|
||||
max_bytes_before_external_group_by(max_bytes_before_external_group_by_),
|
||||
empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_),
|
||||
tmp_volume(tmp_volume_), max_threads(max_threads_),
|
||||
min_free_disk_space(min_free_disk_space_),
|
||||
compile_aggregate_expressions(compile_aggregate_expressions_),
|
||||
min_count_to_compile_aggregate_expression(min_count_to_compile_aggregate_expression_)
|
||||
const Block & intermediate_header_ = {},
|
||||
const StatsCollectingParams & stats_collecting_params_ = {})
|
||||
: src_header(src_header_)
|
||||
, intermediate_header(intermediate_header_)
|
||||
, keys(keys_)
|
||||
, aggregates(aggregates_)
|
||||
, keys_size(keys.size())
|
||||
, aggregates_size(aggregates.size())
|
||||
, overflow_row(overflow_row_)
|
||||
, max_rows_to_group_by(max_rows_to_group_by_)
|
||||
, group_by_overflow_mode(group_by_overflow_mode_)
|
||||
, group_by_two_level_threshold(group_by_two_level_threshold_)
|
||||
, group_by_two_level_threshold_bytes(group_by_two_level_threshold_bytes_)
|
||||
, max_bytes_before_external_group_by(max_bytes_before_external_group_by_)
|
||||
, empty_result_for_aggregation_by_empty_set(empty_result_for_aggregation_by_empty_set_)
|
||||
, tmp_volume(tmp_volume_)
|
||||
, max_threads(max_threads_)
|
||||
, min_free_disk_space(min_free_disk_space_)
|
||||
, compile_aggregate_expressions(compile_aggregate_expressions_)
|
||||
, min_count_to_compile_aggregate_expression(min_count_to_compile_aggregate_expression_)
|
||||
, stats_collecting_params(stats_collecting_params_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -1350,4 +1384,13 @@ APPLY_FOR_AGGREGATED_VARIANTS(M)
|
||||
|
||||
#undef M
|
||||
|
||||
|
||||
struct HashTablesCacheStatistics
|
||||
{
|
||||
size_t entries = 0;
|
||||
size_t hits = 0;
|
||||
size_t misses = 0;
|
||||
};
|
||||
|
||||
std::optional<HashTablesCacheStatistics> getHashTablesCacheStatistics();
|
||||
}
|
||||
|
@ -32,6 +32,7 @@ namespace CurrentMetrics
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event AsyncInsertQuery;
|
||||
extern const Event AsyncInsertBytes;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
@ -222,7 +223,9 @@ void AsynchronousInsertQueue::pushImpl(InsertData::EntryPtr entry, QueueIterator
|
||||
if (!data)
|
||||
data = std::make_unique<InsertData>();
|
||||
|
||||
data->size += entry->bytes.size();
|
||||
size_t entry_data_size = entry->bytes.size();
|
||||
|
||||
data->size += entry_data_size;
|
||||
data->last_update = std::chrono::steady_clock::now();
|
||||
data->entries.emplace_back(entry);
|
||||
|
||||
@ -239,6 +242,7 @@ void AsynchronousInsertQueue::pushImpl(InsertData::EntryPtr entry, QueueIterator
|
||||
|
||||
CurrentMetrics::add(CurrentMetrics::PendingAsyncInsert);
|
||||
ProfileEvents::increment(ProfileEvents::AsyncInsertQuery);
|
||||
ProfileEvents::increment(ProfileEvents::AsyncInsertBytes, entry_data_size);
|
||||
}
|
||||
|
||||
void AsynchronousInsertQueue::waitForProcessingQuery(const String & query_id, const Milliseconds & timeout)
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <Interpreters/Aggregator.h>
|
||||
#include <Interpreters/AsynchronousMetrics.h>
|
||||
#include <Interpreters/AsynchronousMetricLog.h>
|
||||
#include <Interpreters/JIT/CompiledExpressionCache.h>
|
||||
@ -12,6 +13,7 @@
|
||||
#include <Storages/MarkCache.h>
|
||||
#include <Storages/StorageMergeTree.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/MergeTree/MergeTreeMetadataCache.h>
|
||||
#include <IO/UncompressedCache.h>
|
||||
#include <IO/MMappedFileCache.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
@ -607,6 +609,15 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
|
||||
}
|
||||
}
|
||||
|
||||
#if USE_ROCKSDB
|
||||
{
|
||||
if (auto metadata_cache = getContext()->tryGetMergeTreeMetadataCache())
|
||||
{
|
||||
new_values["MergeTreeMetadataCacheSize"] = metadata_cache->getEstimateNumKeys();
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if USE_EMBEDDED_COMPILER
|
||||
{
|
||||
if (auto * compiled_expression_cache = CompiledExpressionCacheFactory::instance().tryGetCache())
|
||||
@ -617,8 +628,18 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
new_values["Uptime"] = getContext()->getUptimeSeconds();
|
||||
|
||||
{
|
||||
if (const auto stats = getHashTablesCacheStatistics())
|
||||
{
|
||||
new_values["HashTableStatsCacheEntries"] = stats->entries;
|
||||
new_values["HashTableStatsCacheHits"] = stats->hits;
|
||||
new_values["HashTableStatsCacheMisses"] = stats->misses;
|
||||
}
|
||||
}
|
||||
|
||||
/// Process process memory usage according to OS
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD)
|
||||
{
|
||||
|
@ -169,6 +169,7 @@ public:
|
||||
if (columns.size() != float_features_count + cat_features_count)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
|
||||
columns.size(),
|
||||
float_features_count,
|
||||
cat_features_count);
|
||||
|
||||
|
@ -85,11 +85,15 @@
|
||||
#include <Storages/MergeTree/BackgroundJobsAssignee.h>
|
||||
#include <Storages/MergeTree/MergeTreeBackgroundExecutor.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
|
||||
#include <Storages/MergeTree/MergeTreeMetadataCache.h>
|
||||
#include <Interpreters/SynonymsExtensions.h>
|
||||
#include <Interpreters/Lemmatizers.h>
|
||||
#include <Interpreters/ClusterDiscovery.h>
|
||||
#include <filesystem>
|
||||
|
||||
#if USE_ROCKSDB
|
||||
#include <rocksdb/table.h>
|
||||
#endif
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
@ -276,6 +280,11 @@ struct ContextSharedPart
|
||||
|
||||
Context::ConfigReloadCallback config_reload_callback;
|
||||
|
||||
#if USE_ROCKSDB
|
||||
/// Global merge tree metadata cache, stored in rocksdb.
|
||||
MergeTreeMetadataCachePtr merge_tree_metadata_cache;
|
||||
#endif
|
||||
|
||||
ContextSharedPart()
|
||||
: access_control(std::make_unique<AccessControl>())
|
||||
, global_overcommit_tracker(&process_list)
|
||||
@ -410,6 +419,15 @@ struct ContextSharedPart
|
||||
trace_collector.reset();
|
||||
/// Stop zookeeper connection
|
||||
zookeeper.reset();
|
||||
|
||||
#if USE_ROCKSDB
|
||||
/// Shutdown merge tree metadata cache
|
||||
if (merge_tree_metadata_cache)
|
||||
{
|
||||
merge_tree_metadata_cache->shutdown();
|
||||
merge_tree_metadata_cache.reset();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Can be removed w/o context lock
|
||||
@ -2048,6 +2066,23 @@ zkutil::ZooKeeperPtr Context::getAuxiliaryZooKeeper(const String & name) const
|
||||
return zookeeper->second;
|
||||
}
|
||||
|
||||
#if USE_ROCKSDB
|
||||
MergeTreeMetadataCachePtr Context::getMergeTreeMetadataCache() const
|
||||
{
|
||||
auto cache = tryGetMergeTreeMetadataCache();
|
||||
if (!cache)
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Merge tree metadata cache is not initialized, please add config merge_tree_metadata_cache in config.xml and restart");
|
||||
return cache;
|
||||
}
|
||||
|
||||
MergeTreeMetadataCachePtr Context::tryGetMergeTreeMetadataCache() const
|
||||
{
|
||||
return shared->merge_tree_metadata_cache;
|
||||
}
|
||||
#endif
|
||||
|
||||
void Context::resetZooKeeper() const
|
||||
{
|
||||
std::lock_guard lock(shared->zookeeper_mutex);
|
||||
@ -2291,6 +2326,13 @@ void Context::initializeTraceCollector()
|
||||
shared->initializeTraceCollector(getTraceLog());
|
||||
}
|
||||
|
||||
#if USE_ROCKSDB
|
||||
void Context::initializeMergeTreeMetadataCache(const String & dir, size_t size)
|
||||
{
|
||||
shared->merge_tree_metadata_cache = MergeTreeMetadataCache::create(dir, size);
|
||||
}
|
||||
#endif
|
||||
|
||||
bool Context::hasTraceCollector() const
|
||||
{
|
||||
return shared->hasTraceCollector();
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <base/types.h>
|
||||
#include <Storages/MergeTree/ParallelReplicasReadingCoordinator.h>
|
||||
|
||||
|
||||
#include "config_core.h"
|
||||
|
||||
#include <boost/container/flat_set.hpp>
|
||||
@ -152,6 +153,12 @@ using ReadTaskCallback = std::function<String()>;
|
||||
|
||||
using MergeTreeReadTaskCallback = std::function<std::optional<PartitionReadResponse>(PartitionReadRequest)>;
|
||||
|
||||
|
||||
#if USE_ROCKSDB
|
||||
class MergeTreeMetadataCache;
|
||||
using MergeTreeMetadataCachePtr = std::shared_ptr<MergeTreeMetadataCache>;
|
||||
#endif
|
||||
|
||||
/// An empty interface for an arbitrary object that may be attached by a shared pointer
|
||||
/// to query context, when using ClickHouse as a library.
|
||||
struct IHostContext
|
||||
@ -179,6 +186,7 @@ private:
|
||||
std::unique_ptr<ContextSharedPart> shared;
|
||||
};
|
||||
|
||||
|
||||
/** A set of known objects that can be used in the query.
|
||||
* Consists of a shared part (always common to all sessions and queries)
|
||||
* and copied part (which can be its own for each session or query).
|
||||
@ -680,6 +688,11 @@ public:
|
||||
|
||||
UInt32 getZooKeeperSessionUptime() const;
|
||||
|
||||
#if USE_ROCKSDB
|
||||
MergeTreeMetadataCachePtr getMergeTreeMetadataCache() const;
|
||||
MergeTreeMetadataCachePtr tryGetMergeTreeMetadataCache() const;
|
||||
#endif
|
||||
|
||||
#if USE_NURAFT
|
||||
std::shared_ptr<KeeperDispatcher> & getKeeperDispatcher() const;
|
||||
#endif
|
||||
@ -769,6 +782,10 @@ public:
|
||||
/// Call after initialization before using trace collector.
|
||||
void initializeTraceCollector();
|
||||
|
||||
#if USE_ROCKSDB
|
||||
void initializeMergeTreeMetadataCache(const String & dir, size_t size);
|
||||
#endif
|
||||
|
||||
bool hasTraceCollector() const;
|
||||
|
||||
/// Nullptr if the query log is not ready for this moment.
|
||||
|
@ -233,7 +233,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
||||
{
|
||||
assert(!db_and_table.first && !db_and_table.second);
|
||||
if (exception)
|
||||
exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs());
|
||||
exception->emplace(fmt::format("Table {} doesn't exist", table_id.getNameForLogs()), ErrorCodes::UNKNOWN_TABLE);
|
||||
return {};
|
||||
}
|
||||
|
||||
@ -263,7 +263,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
||||
/// If table_id has no UUID, then the name of database was specified by user and table_id was not resolved through context.
|
||||
/// Do not allow access to TEMPORARY_DATABASE because it contains all temporary tables of all contexts and users.
|
||||
if (exception)
|
||||
exception->emplace(ErrorCodes::DATABASE_ACCESS_DENIED, "Direct access to `{}` database is not allowed", String(TEMPORARY_DATABASE));
|
||||
exception->emplace(fmt::format("Direct access to `{}` database is not allowed", TEMPORARY_DATABASE), ErrorCodes::DATABASE_ACCESS_DENIED);
|
||||
return {};
|
||||
}
|
||||
|
||||
@ -274,7 +274,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
||||
if (databases.end() == it)
|
||||
{
|
||||
if (exception)
|
||||
exception->emplace(ErrorCodes::UNKNOWN_DATABASE, "Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName()));
|
||||
exception->emplace(fmt::format("Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName())), ErrorCodes::UNKNOWN_DATABASE);
|
||||
return {};
|
||||
}
|
||||
database = it->second;
|
||||
@ -282,7 +282,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl(
|
||||
|
||||
auto table = database->tryGetTable(table_id.table_name, context_);
|
||||
if (!table && exception)
|
||||
exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Table {} doesn't exist", table_id.getNameForLogs());
|
||||
exception->emplace(fmt::format("Table {} doesn't exist", table_id.getNameForLogs()), ErrorCodes::UNKNOWN_TABLE);
|
||||
if (!table)
|
||||
database = nullptr;
|
||||
|
||||
|
@ -358,6 +358,7 @@ BlockIO InterpreterInsertQuery::execute()
|
||||
|
||||
auto new_context = Context::createCopy(context);
|
||||
new_context->setSettings(new_settings);
|
||||
new_context->setInsertionTable(getContext()->getInsertionTable());
|
||||
|
||||
InterpreterSelectWithUnionQuery interpreter_select{
|
||||
query.select, new_context, SelectQueryOptions(QueryProcessingStage::Complete, 1)};
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user