mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-14 19:45:11 +00:00
Merge branch 'master' into fix-read-settings-in-index-loading
This commit is contained in:
commit
dddf7d3c86
3
.github/PULL_REQUEST_TEMPLATE.md
vendored
3
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -59,6 +59,9 @@ At a minimum, the following information should be added (but add more as needed)
|
||||
- [ ] <!---ci_exclude_tsan|msan|ubsan|coverage--> Exclude: All with TSAN, MSAN, UBSAN, Coverage
|
||||
- [ ] <!---ci_exclude_aarch64|release|debug--> Exclude: All with aarch64, release, debug
|
||||
---
|
||||
- [ ] <!---ci_include_fuzzer--> Run only fuzzers related jobs (libFuzzer fuzzers, AST fuzzers, etc.)
|
||||
- [ ] <!---ci_exclude_ast--> Exclude: AST fuzzers
|
||||
---
|
||||
- [ ] <!---do_not_test--> Do not test
|
||||
- [ ] <!---woolen_wolfdog--> Woolen Wolfdog
|
||||
- [ ] <!---upload_all--> Upload binaries for special builds
|
||||
|
18
.github/actions/debug/action.yml
vendored
Normal file
18
.github/actions/debug/action.yml
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
name: DebugInfo
|
||||
description: Prints workflow debug info
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Print envs
|
||||
shell: bash
|
||||
run: |
|
||||
echo "::group::Envs"
|
||||
env
|
||||
echo "::endgroup::"
|
||||
- name: Print Event.json
|
||||
shell: bash
|
||||
run: |
|
||||
echo "::group::Event.json"
|
||||
python3 -m json.tool "$GITHUB_EVENT_PATH"
|
||||
echo "::endgroup::"
|
109
.github/workflows/auto_releases.yml
vendored
Normal file
109
.github/workflows/auto_releases.yml
vendored
Normal file
@ -0,0 +1,109 @@
|
||||
name: AutoReleases
|
||||
|
||||
env:
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
concurrency:
|
||||
group: autoreleases
|
||||
|
||||
on:
|
||||
# schedule:
|
||||
# - cron: '0 9 * * *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry-run:
|
||||
description: 'Dry run'
|
||||
required: false
|
||||
default: true
|
||||
type: boolean
|
||||
|
||||
jobs:
|
||||
AutoReleaseInfo:
|
||||
runs-on: [self-hosted, style-checker-aarch64]
|
||||
outputs:
|
||||
data: ${{ steps.info.outputs.AUTO_RELEASE_PARAMS }}
|
||||
dry_run: ${{ steps.info.outputs.DRY_RUN }}
|
||||
steps:
|
||||
- name: Debug Info
|
||||
uses: ./.github/actions/debug
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
ROBOT_CLICKHOUSE_SSH_KEY<<RCSK
|
||||
${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
|
||||
RCSK
|
||||
EOF
|
||||
echo "DRY_RUN=true" >> "$GITHUB_ENV"
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
- name: Prepare Info
|
||||
id: info
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 auto_release.py --prepare
|
||||
echo "::group::Auto Release Info"
|
||||
python3 -m json.tool /tmp/autorelease_info.json
|
||||
echo "::endgroup::"
|
||||
{
|
||||
echo 'AUTO_RELEASE_PARAMS<<EOF'
|
||||
cat /tmp/autorelease_info.json
|
||||
echo 'EOF'
|
||||
} >> "$GITHUB_ENV"
|
||||
{
|
||||
echo 'AUTO_RELEASE_PARAMS<<EOF'
|
||||
cat /tmp/autorelease_info.json
|
||||
echo 'EOF'
|
||||
} >> "$GITHUB_OUTPUT"
|
||||
echo "DRY_RUN=true" >> "$GITHUB_OUTPUT"
|
||||
- name: Post Release Branch statuses
|
||||
run: |
|
||||
cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
python3 auto_release.py --post-status
|
||||
- name: Clean up
|
||||
uses: ./.github/actions/clean
|
||||
|
||||
Release_0:
|
||||
needs: AutoReleaseInfo
|
||||
name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].release_branch }}
|
||||
if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].ready }}
|
||||
uses: ./.github/workflows/create_release.yml
|
||||
with:
|
||||
ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].commit_sha }}
|
||||
type: patch
|
||||
dry-run: ${{ needs.AutoReleaseInfo.outputs.dry_run }}
|
||||
#
|
||||
# Release_1:
|
||||
# needs: [AutoReleaseInfo, Release_0]
|
||||
# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].release_branch }}
|
||||
# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].ready }}
|
||||
# uses: ./.github/workflows/create_release.yml
|
||||
# with:
|
||||
# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[1].commit_sha }}
|
||||
# type: patch
|
||||
# dry-run: ${{ env.DRY_RUN }}
|
||||
#
|
||||
# Release_2:
|
||||
# needs: [AutoReleaseInfo, Release_1]
|
||||
# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[2].release_branch }}
|
||||
# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[2].ready }}
|
||||
# uses: ./.github/workflow/create_release.yml
|
||||
# with:
|
||||
# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[0].commit_sha }}
|
||||
# type: patch
|
||||
# dry-run: ${{ env.DRY_RUN }}
|
||||
#
|
||||
# Release_3:
|
||||
# needs: [AutoReleaseInfo, Release_2]
|
||||
# name: Release ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].release_branch }}
|
||||
# if: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3] && fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].ready }}
|
||||
# uses: ./.github/workflow/create_release.yml
|
||||
# with:
|
||||
# ref: ${{ fromJson(needs.AutoReleaseInfo.outputs.data).releases[3].commit_sha }}
|
||||
# type: patch
|
||||
# dry-run: ${{ env.DRY_RUN }}
|
||||
|
||||
# - name: Post Slack Message
|
||||
# if: ${{ !cancelled() }}
|
||||
# run: |
|
||||
# cd "$GITHUB_WORKSPACE/tests/ci"
|
||||
# python3 auto_release.py --post-auto-release-complete --wf-status ${{ job.status }}
|
24
.github/workflows/create_release.yml
vendored
24
.github/workflows/create_release.yml
vendored
@ -2,6 +2,7 @@ name: CreateRelease
|
||||
|
||||
concurrency:
|
||||
group: release
|
||||
|
||||
'on':
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
@ -26,6 +27,26 @@ concurrency:
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
workflow_call:
|
||||
inputs:
|
||||
ref:
|
||||
description: 'Git reference (branch or commit sha) from which to create the release'
|
||||
required: true
|
||||
type: string
|
||||
type:
|
||||
description: 'The type of release: "new" for a new release or "patch" for a patch release'
|
||||
required: true
|
||||
type: string
|
||||
only-repo:
|
||||
description: 'Run only repos updates including docker (repo-recovery, tests)'
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
dry-run:
|
||||
description: 'Dry run'
|
||||
required: false
|
||||
default: false
|
||||
type: boolean
|
||||
|
||||
jobs:
|
||||
CreateRelease:
|
||||
@ -101,6 +122,7 @@ jobs:
|
||||
--volume=".:/wd" --workdir="/wd" \
|
||||
clickhouse/style-test \
|
||||
./tests/ci/changelog.py -v --debug-helpers \
|
||||
--gh-user-or-token ${{ secrets.ROBOT_CLICKHOUSE_COMMIT_TOKEN }} \
|
||||
--jobs=5 \
|
||||
--output="./docs/changelogs/${{ env.RELEASE_TAG }}.md" ${{ env.RELEASE_TAG }}
|
||||
git add ./docs/changelogs/${{ env.RELEASE_TAG }}.md
|
||||
@ -129,9 +151,9 @@ jobs:
|
||||
if: ${{ inputs.type == 'patch' && ! inputs.only-repo }}
|
||||
shell: bash
|
||||
run: |
|
||||
python3 ./tests/ci/create_release.py --set-progress-completed
|
||||
git reset --hard HEAD
|
||||
git checkout "$GITHUB_REF_NAME"
|
||||
python3 ./tests/ci/create_release.py --set-progress-completed
|
||||
- name: Create GH Release
|
||||
if: ${{ inputs.type == 'patch' && ! inputs.only-repo }}
|
||||
shell: bash
|
||||
|
3
.github/workflows/release_branches.yml
vendored
3
.github/workflows/release_branches.yml
vendored
@ -482,7 +482,7 @@ jobs:
|
||||
if: ${{ !failure() }}
|
||||
run: |
|
||||
# update overall ci report
|
||||
python3 finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
python3 ./tests/ci/finish_check.py --wf-status ${{ contains(needs.*.result, 'failure') && 'failure' || 'success' }}
|
||||
- name: Check Workflow results
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
@ -490,5 +490,4 @@ jobs:
|
||||
cat > "$WORKFLOW_RESULT_FILE" << 'EOF'
|
||||
${{ toJson(needs) }}
|
||||
EOF
|
||||
|
||||
python3 ./tests/ci/ci_buddy.py --check-wf-status
|
||||
|
6
.gitmodules
vendored
6
.gitmodules
vendored
@ -230,9 +230,6 @@
|
||||
[submodule "contrib/minizip-ng"]
|
||||
path = contrib/minizip-ng
|
||||
url = https://github.com/zlib-ng/minizip-ng
|
||||
[submodule "contrib/annoy"]
|
||||
path = contrib/annoy
|
||||
url = https://github.com/ClickHouse/annoy
|
||||
[submodule "contrib/qpl"]
|
||||
path = contrib/qpl
|
||||
url = https://github.com/intel/qpl
|
||||
@ -348,9 +345,6 @@
|
||||
[submodule "contrib/FP16"]
|
||||
path = contrib/FP16
|
||||
url = https://github.com/Maratyszcza/FP16.git
|
||||
[submodule "contrib/robin-map"]
|
||||
path = contrib/robin-map
|
||||
url = https://github.com/Tessil/robin-map.git
|
||||
[submodule "contrib/aklomp-base64"]
|
||||
path = contrib/aklomp-base64
|
||||
url = https://github.com/aklomp/base64.git
|
||||
|
@ -322,17 +322,21 @@ if (DISABLE_OMIT_FRAME_POINTER)
|
||||
set (CMAKE_ASM_FLAGS_ADD "${CMAKE_ASM_FLAGS_ADD} -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer")
|
||||
endif()
|
||||
|
||||
# Before you start hating your debugger because it refuses to show variables ('<optimized out>'), try building with -DDEBUG_O_LEVEL="0"
|
||||
# https://stackoverflow.com/questions/63386189/whats-the-difference-between-a-compilers-o0-option-and-og-option/63386263#63386263
|
||||
set(DEBUG_O_LEVEL "g" CACHE STRING "The -Ox level used for debug builds")
|
||||
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${CMAKE_CXX_FLAGS_ADD}")
|
||||
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}")
|
||||
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}")
|
||||
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O${DEBUG_O_LEVEL} ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}")
|
||||
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O${DEBUG_O_LEVEL} ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}")
|
||||
|
||||
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMPILER_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
|
||||
set (CMAKE_ASM_FLAGS_RELWITHDEBINFO "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
|
||||
set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -Og ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
|
||||
set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O${DEBUG_O_LEVEL} ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}")
|
||||
|
||||
if (OS_DARWIN)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
|
||||
|
@ -1,4 +1,4 @@
|
||||
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
|
||||
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
|
||||
|
||||
if (USE_CLANG_TIDY)
|
||||
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
|
||||
@ -8,6 +8,8 @@ endif ()
|
||||
# when instantiated from JSON.cpp. Try again when libcxx(abi) and Clang are upgraded to 16.
|
||||
set (CMAKE_CXX_STANDARD 20)
|
||||
|
||||
configure_file(GitHash.cpp.in GitHash.generated.cpp)
|
||||
|
||||
set (SRCS
|
||||
argsToConfig.cpp
|
||||
cgroupsv2.cpp
|
||||
@ -33,6 +35,7 @@ set (SRCS
|
||||
safeExit.cpp
|
||||
throwError.cpp
|
||||
Numa.cpp
|
||||
GitHash.generated.cpp
|
||||
)
|
||||
|
||||
add_library (common ${SRCS})
|
||||
|
@ -27,27 +27,6 @@ bool cgroupsV2Enabled()
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cgroupsV2MemoryControllerEnabled()
|
||||
{
|
||||
#if defined(OS_LINUX)
|
||||
chassert(cgroupsV2Enabled());
|
||||
/// According to https://docs.kernel.org/admin-guide/cgroup-v2.html, file "cgroup.controllers" defines which controllers are available
|
||||
/// for the current + child cgroups. The set of available controllers can be restricted from level to level using file
|
||||
/// "cgroups.subtree_control". It is therefore sufficient to check the bottom-most nested "cgroup.controllers" file.
|
||||
fs::path cgroup_dir = cgroupV2PathOfProcess();
|
||||
if (cgroup_dir.empty())
|
||||
return false;
|
||||
std::ifstream controllers_file(cgroup_dir / "cgroup.controllers");
|
||||
if (!controllers_file.is_open())
|
||||
return false;
|
||||
std::string controllers;
|
||||
std::getline(controllers_file, controllers);
|
||||
return controllers.find("memory") != std::string::npos;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
fs::path cgroupV2PathOfProcess()
|
||||
{
|
||||
#if defined(OS_LINUX)
|
||||
@ -71,3 +50,28 @@ fs::path cgroupV2PathOfProcess()
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
std::optional<std::string> getCgroupsV2PathContainingFile([[maybe_unused]] std::string_view file_name)
|
||||
{
|
||||
#if defined(OS_LINUX)
|
||||
if (!cgroupsV2Enabled())
|
||||
return {};
|
||||
|
||||
fs::path current_cgroup = cgroupV2PathOfProcess();
|
||||
if (current_cgroup.empty())
|
||||
return {};
|
||||
|
||||
/// Return the bottom-most nested file. If there is no such file at the current
|
||||
/// level, try again at the parent level as settings are inherited.
|
||||
while (current_cgroup != default_cgroups_mount.parent_path())
|
||||
{
|
||||
const auto path = current_cgroup / file_name;
|
||||
if (fs::exists(path))
|
||||
return {current_cgroup};
|
||||
current_cgroup = current_cgroup.parent_path();
|
||||
}
|
||||
return {};
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <string_view>
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
/// I think it is possible to mount the cgroups hierarchy somewhere else (e.g. when in containers).
|
||||
@ -11,11 +12,11 @@ static inline const std::filesystem::path default_cgroups_mount = "/sys/fs/cgrou
|
||||
/// Is cgroups v2 enabled on the system?
|
||||
bool cgroupsV2Enabled();
|
||||
|
||||
/// Is the memory controller of cgroups v2 enabled on the system?
|
||||
/// Assumes that cgroupsV2Enabled() is enabled.
|
||||
bool cgroupsV2MemoryControllerEnabled();
|
||||
|
||||
/// Detects which cgroup v2 the process belongs to and returns the filesystem path to the cgroup.
|
||||
/// Returns an empty path the cgroup cannot be determined.
|
||||
/// Assumes that cgroupsV2Enabled() is enabled.
|
||||
std::filesystem::path cgroupV2PathOfProcess();
|
||||
|
||||
/// Returns the most nested cgroup dir containing the specified file.
|
||||
/// If cgroups v2 is not enabled - returns an empty optional.
|
||||
std::optional<std::string> getCgroupsV2PathContainingFile([[maybe_unused]] std::string_view file_name);
|
||||
|
@ -19,9 +19,6 @@ std::optional<uint64_t> getCgroupsV2MemoryLimit()
|
||||
if (!cgroupsV2Enabled())
|
||||
return {};
|
||||
|
||||
if (!cgroupsV2MemoryControllerEnabled())
|
||||
return {};
|
||||
|
||||
std::filesystem::path current_cgroup = cgroupV2PathOfProcess();
|
||||
if (current_cgroup.empty())
|
||||
return {};
|
||||
|
@ -58,6 +58,10 @@ namespace Net
|
||||
|
||||
void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout);
|
||||
|
||||
size_t getKeepAliveTimeout() const { return _keepAliveTimeout.totalSeconds(); }
|
||||
|
||||
size_t getMaxKeepAliveRequests() const { return _maxKeepAliveRequests; }
|
||||
|
||||
private:
|
||||
bool _firstRequest;
|
||||
Poco::Timespan _keepAliveTimeout;
|
||||
|
@ -19,11 +19,11 @@ namespace Poco {
|
||||
namespace Net {
|
||||
|
||||
|
||||
HTTPServerSession::HTTPServerSession(const StreamSocket& socket, HTTPServerParams::Ptr pParams):
|
||||
HTTPSession(socket, pParams->getKeepAlive()),
|
||||
_firstRequest(true),
|
||||
_keepAliveTimeout(pParams->getKeepAliveTimeout()),
|
||||
_maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
|
||||
HTTPServerSession::HTTPServerSession(const StreamSocket & socket, HTTPServerParams::Ptr pParams)
|
||||
: HTTPSession(socket, pParams->getKeepAlive())
|
||||
, _firstRequest(true)
|
||||
, _keepAliveTimeout(pParams->getKeepAliveTimeout())
|
||||
, _maxKeepAliveRequests(pParams->getMaxKeepAliveRequests())
|
||||
{
|
||||
setTimeout(pParams->getTimeout());
|
||||
}
|
||||
@ -52,11 +52,12 @@ bool HTTPServerSession::hasMoreRequests()
|
||||
}
|
||||
else if (_maxKeepAliveRequests != 0 && getKeepAlive())
|
||||
{
|
||||
if (_maxKeepAliveRequests > 0)
|
||||
--_maxKeepAliveRequests;
|
||||
return buffered() > 0 || socket().poll(_keepAliveTimeout, Socket::SELECT_READ);
|
||||
}
|
||||
else return false;
|
||||
if (_maxKeepAliveRequests > 0)
|
||||
--_maxKeepAliveRequests;
|
||||
return buffered() > 0 || socket().poll(_keepAliveTimeout, Socket::SELECT_READ);
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
@ -2,11 +2,11 @@
|
||||
|
||||
# NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
|
||||
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
|
||||
SET(VERSION_REVISION 54489)
|
||||
SET(VERSION_REVISION 54490)
|
||||
SET(VERSION_MAJOR 24)
|
||||
SET(VERSION_MINOR 8)
|
||||
SET(VERSION_MINOR 9)
|
||||
SET(VERSION_PATCH 1)
|
||||
SET(VERSION_GITHASH 3f8b27d7accd2b5ec4afe7d0dd459115323304af)
|
||||
SET(VERSION_DESCRIBE v24.8.1.1-testing)
|
||||
SET(VERSION_STRING 24.8.1.1)
|
||||
SET(VERSION_GITHASH e02b434d2fc0c4fbee29ca675deab7474d274608)
|
||||
SET(VERSION_DESCRIBE v24.9.1.1-testing)
|
||||
SET(VERSION_STRING 24.9.1.1)
|
||||
# end of autochange
|
||||
|
@ -42,19 +42,9 @@ endif ()
|
||||
# But use 2 parallel jobs, since:
|
||||
# - this is what llvm does
|
||||
# - and I've verfied that lld-11 does not use all available CPU time (in peak) while linking one binary
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO)
|
||||
if (ARCH_AARCH64)
|
||||
# aarch64 builds start to often fail with OOMs (reason not yet clear), for now let's limit the concurrency
|
||||
message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 1.")
|
||||
set (PARALLEL_LINK_JOBS 1)
|
||||
if (LINKER_NAME MATCHES "lld")
|
||||
math(EXPR LTO_JOBS ${NUMBER_OF_LOGICAL_CORES}/4)
|
||||
set (CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} -Wl,--thinlto-jobs=${LTO_JOBS}")
|
||||
endif()
|
||||
elseif (PARALLEL_LINK_JOBS GREATER 2)
|
||||
message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.")
|
||||
set (PARALLEL_LINK_JOBS 2)
|
||||
endif ()
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLEL_LINK_JOBS GREATER 2)
|
||||
message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.")
|
||||
set (PARALLEL_LINK_JOBS 2)
|
||||
endif()
|
||||
|
||||
message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).")
|
||||
|
@ -57,8 +57,8 @@ option(WITH_COVERAGE "Instrumentation for code coverage with default implementat
|
||||
|
||||
if (WITH_COVERAGE)
|
||||
message (STATUS "Enabled instrumentation for code coverage")
|
||||
set(COVERAGE_FLAGS "SHELL:-fprofile-instr-generate -fcoverage-mapping")
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
|
||||
set (COVERAGE_FLAGS -fprofile-instr-generate -fcoverage-mapping)
|
||||
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")
|
||||
endif()
|
||||
|
||||
option (SANITIZE_COVERAGE "Instrumentation for code coverage with custom callbacks" OFF)
|
||||
|
6
contrib/CMakeLists.txt
vendored
6
contrib/CMakeLists.txt
vendored
@ -205,14 +205,12 @@ add_contrib (morton-nd-cmake morton-nd)
|
||||
if (ARCH_S390X)
|
||||
add_contrib(crc32-s390x-cmake crc32-s390x)
|
||||
endif()
|
||||
add_contrib (annoy-cmake annoy)
|
||||
|
||||
option(ENABLE_USEARCH "Enable USearch (Approximate Neighborhood Search, HNSW) support" ${ENABLE_LIBRARIES})
|
||||
option(ENABLE_USEARCH "Enable USearch" ${ENABLE_LIBRARIES})
|
||||
if (ENABLE_USEARCH)
|
||||
add_contrib (FP16-cmake FP16)
|
||||
add_contrib (robin-map-cmake robin-map)
|
||||
add_contrib (SimSIMD-cmake SimSIMD)
|
||||
add_contrib (usearch-cmake usearch) # requires: FP16, robin-map, SimdSIMD
|
||||
add_contrib (usearch-cmake usearch) # requires: FP16, SimdSIMD
|
||||
else ()
|
||||
message(STATUS "Not using USearch")
|
||||
endif ()
|
||||
|
2
contrib/SimSIMD
vendored
2
contrib/SimSIMD
vendored
@ -1 +1 @@
|
||||
Subproject commit de2cb75b9e9e3389d5e1e51fd9f8ed151f3c17cf
|
||||
Subproject commit 91a76d1ac519b3b9dc8957734a3dabd985f00c26
|
1
contrib/annoy
vendored
1
contrib/annoy
vendored
@ -1 +0,0 @@
|
||||
Subproject commit f2ac8e7b48f9a9cf676d3b58286e5455aba8e956
|
@ -1,24 +0,0 @@
|
||||
option(ENABLE_ANNOY "Enable Annoy index support" ${ENABLE_LIBRARIES})
|
||||
|
||||
# Annoy index should be disabled with undefined sanitizer. Because of memory storage optimizations
|
||||
# (https://github.com/ClickHouse/annoy/blob/9d8a603a4cd252448589e84c9846f94368d5a289/src/annoylib.h#L442-L463)
|
||||
# UBSan fails and leads to crash. Simmilar issue is already opened in Annoy repo
|
||||
# https://github.com/spotify/annoy/issues/456
|
||||
# Problem with aligment can lead to errors like
|
||||
# (https://stackoverflow.com/questions/46790550/c-undefined-behavior-strict-aliasing-rule-or-incorrect-alignment)
|
||||
# or will lead to crash on arm https://developer.arm.com/documentation/ka003038/latest
|
||||
# This issues should be resolved before annoy became non-experimental (--> setting "allow_experimental_annoy_index")
|
||||
if ((NOT ENABLE_ANNOY) OR (SANITIZE STREQUAL "undefined") OR (ARCH_AARCH64))
|
||||
message (STATUS "Not using annoy")
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(ANNOY_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/annoy")
|
||||
set(ANNOY_SOURCE_DIR "${ANNOY_PROJECT_DIR}/src")
|
||||
|
||||
add_library(_annoy INTERFACE)
|
||||
target_include_directories(_annoy SYSTEM INTERFACE ${ANNOY_SOURCE_DIR})
|
||||
|
||||
add_library(ch_contrib::annoy ALIAS _annoy)
|
||||
target_compile_definitions(_annoy INTERFACE ENABLE_ANNOY)
|
||||
target_compile_definitions(_annoy INTERFACE ANNOYLIB_MULTITHREADED_BUILD)
|
2
contrib/aws
vendored
2
contrib/aws
vendored
@ -1 +1 @@
|
||||
Subproject commit 1c2946bfcb7f1e3ae0a858de0b59d4f1a7b4ccaf
|
||||
Subproject commit d5450d76abda556ce145ddabe7e0cc6a7644ec59
|
2
contrib/aws-crt-cpp
vendored
2
contrib/aws-crt-cpp
vendored
@ -1 +1 @@
|
||||
Subproject commit f532d6abc0d2b0d8b5d6fe9e7c51eaedbe4afbd0
|
||||
Subproject commit e5aa45cacfdcda7719ead38760e7c61076f5745f
|
2
contrib/libprotobuf-mutator
vendored
2
contrib/libprotobuf-mutator
vendored
@ -1 +1 @@
|
||||
Subproject commit 1f95f8083066f5b38fd2db172e7e7f9aa7c49d2d
|
||||
Subproject commit b922c8ab9004ef9944982e4f165e2747b13223fa
|
2
contrib/libunwind
vendored
2
contrib/libunwind
vendored
@ -1 +1 @@
|
||||
Subproject commit a89d904befea07814628c6ce0b44083c4e149c62
|
||||
Subproject commit 601db0b0e03018c01710470a37703b618f9cf08b
|
1
contrib/robin-map
vendored
1
contrib/robin-map
vendored
@ -1 +0,0 @@
|
||||
Subproject commit 851a59e0e3063ee0e23089062090a73fd3de482d
|
@ -1 +0,0 @@
|
||||
# See contrib/usearch-cmake/CMakeLists.txt
|
2
contrib/usearch
vendored
2
contrib/usearch
vendored
@ -1 +1 @@
|
||||
Subproject commit 30810452bec5d3d3aa0931bb5d761e2f09aa6356
|
||||
Subproject commit e21a5778a0d4469ddaf38c94b7be0196bb701ee4
|
@ -1,17 +1,22 @@
|
||||
set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
|
||||
set(USEARCH_SOURCE_DIR "${USEARCH_PROJECT_DIR}/include")
|
||||
|
||||
set(FP16_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/FP16")
|
||||
set(ROBIN_MAP_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/robin-map")
|
||||
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD-map")
|
||||
set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD")
|
||||
set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch")
|
||||
|
||||
add_library(_usearch INTERFACE)
|
||||
|
||||
target_include_directories(_usearch SYSTEM INTERFACE
|
||||
${FP16_PROJECT_DIR}/include
|
||||
${ROBIN_MAP_PROJECT_DIR}/include
|
||||
${SIMSIMD_PROJECT_DIR}/include
|
||||
${USEARCH_SOURCE_DIR})
|
||||
${USEARCH_PROJECT_DIR}/include)
|
||||
|
||||
target_compile_definitions(_usearch INTERFACE USEARCH_USE_FP16LIB)
|
||||
|
||||
# target_compile_definitions(_usearch INTERFACE USEARCH_USE_SIMSIMD)
|
||||
# ^^ simsimd is not enabled at the moment. Reasons:
|
||||
# - Vectorization is important for raw scans but not so much for HNSW. We use usearch only for HNSW.
|
||||
# - Simsimd does compile-time dispatch (choice of SIMD kernels determined by capabilities of the build machine) or dynamic dispatch (SIMD
|
||||
# kernels chosen at runtime based on cpuid instruction). Since current builds are limited to SSE 4.2 (x86) and NEON (ARM), the speedup of
|
||||
# the former would be moderate compared to AVX-512 / SVE. The latter is at the moment too fragile with respect to portability across x86
|
||||
# and ARM machines ... certain conbinations of quantizations / distance functions / SIMD instructions are not implemented at the moment.
|
||||
|
||||
add_library(ch_contrib::usearch ALIAS _usearch)
|
||||
target_compile_definitions(_usearch INTERFACE ENABLE_USEARCH)
|
||||
|
@ -108,7 +108,8 @@ if [ -n "$MAKE_DEB" ]; then
|
||||
bash -x /build/packages/build
|
||||
fi
|
||||
|
||||
mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output
|
||||
mv ./programs/clickhouse* /output ||:
|
||||
mv ./programs/*_fuzzer /output ||:
|
||||
[ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output
|
||||
[ -x ./programs/self-extracting/clickhouse-stripped ] && mv ./programs/self-extracting/clickhouse-stripped /output
|
||||
[ -x ./programs/self-extracting/clickhouse-keeper ] && mv ./programs/self-extracting/clickhouse-keeper /output
|
||||
|
@ -256,22 +256,6 @@ function configure
|
||||
rm -f "$FASTTEST_DATA/config.d/secure_ports.xml"
|
||||
}
|
||||
|
||||
function timeout_with_logging() {
|
||||
local exit_code=0
|
||||
|
||||
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
|
||||
|
||||
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
|
||||
if [[ "${exit_code}" -eq "124" ]]
|
||||
then
|
||||
echo "The command 'timeout ${*}' has been killed by timeout."
|
||||
else
|
||||
echo "No, it isn't a timeout."
|
||||
fi
|
||||
|
||||
return $exit_code
|
||||
}
|
||||
|
||||
function run_tests
|
||||
{
|
||||
clickhouse-server --version
|
||||
@ -340,7 +324,7 @@ case "$stage" in
|
||||
configure 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/install_log.txt"
|
||||
;&
|
||||
"run_tests")
|
||||
timeout_with_logging 35m bash -c run_tests ||:
|
||||
run_tests ||:
|
||||
/process_functional_tests_result.py --in-results-dir "$FASTTEST_OUTPUT/" \
|
||||
--out-results-file "$FASTTEST_OUTPUT/test_results.tsv" \
|
||||
--out-status-file "$FASTTEST_OUTPUT/check_status.tsv" || echo -e "failure\tCannot parse results" > "$FASTTEST_OUTPUT/check_status.tsv"
|
||||
|
@ -35,7 +35,6 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \
|
||||
|
||||
|
||||
ENV TZ=Europe/Amsterdam
|
||||
ENV MAX_RUN_TIME=9000
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git"
|
||||
|
@ -94,7 +94,7 @@ function run_tests()
|
||||
|
||||
export -f run_tests
|
||||
|
||||
timeout "${MAX_RUN_TIME:-9000}" bash -c run_tests || echo "timeout reached" >&2
|
||||
run_tests
|
||||
|
||||
#/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv
|
||||
|
||||
|
@ -22,7 +22,6 @@ ARG sqltest_repo="https://github.com/elliotchance/sqltest/"
|
||||
RUN git clone ${sqltest_repo}
|
||||
|
||||
ENV TZ=UTC
|
||||
ENV MAX_RUN_TIME=900
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
COPY run.sh /
|
||||
|
@ -4,9 +4,6 @@
|
||||
source /setup_export_logs.sh
|
||||
set -e -x
|
||||
|
||||
MAX_RUN_TIME=${MAX_RUN_TIME:-3600}
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 3600 : MAX_RUN_TIME))
|
||||
|
||||
# Choose random timezone for this test run
|
||||
TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
|
||||
echo "Choosen random timezone $TZ"
|
||||
@ -118,14 +115,11 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
|
||||
clickhouse-client --query "CREATE TABLE test.hits AS datasets.hits_v1"
|
||||
clickhouse-client --query "CREATE TABLE test.visits AS datasets.visits_v1"
|
||||
|
||||
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1"
|
||||
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1"
|
||||
clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1"
|
||||
clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1"
|
||||
|
||||
clickhouse-client --query "DROP TABLE datasets.hits_v1"
|
||||
clickhouse-client --query "DROP TABLE datasets.visits_v1"
|
||||
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours)
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited)
|
||||
else
|
||||
clickhouse-client --query "CREATE DATABASE test"
|
||||
clickhouse-client --query "SHOW TABLES FROM test"
|
||||
@ -191,8 +185,8 @@ else
|
||||
ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
|
||||
SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
|
||||
clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
|
||||
clickhouse-client --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
|
||||
clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
|
||||
clickhouse-client --max_memory_usage 10G --query "INSERT INTO test.visits SELECT * FROM datasets.visits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
|
||||
clickhouse-client --query "DROP TABLE datasets.visits_v1 SYNC"
|
||||
clickhouse-client --query "DROP TABLE datasets.hits_v1 SYNC"
|
||||
else
|
||||
@ -200,7 +194,8 @@ else
|
||||
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
|
||||
fi
|
||||
clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'"
|
||||
clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
|
||||
# AWS S3 is very inefficient, so increase memory even further:
|
||||
clickhouse-client --max_memory_usage 30G --max_memory_usage_for_user 30G --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits SETTINGS enable_filesystem_cache_on_write_operations=0, max_insert_threads=16"
|
||||
fi
|
||||
|
||||
clickhouse-client --query "SHOW TABLES FROM test"
|
||||
@ -257,24 +252,7 @@ function run_tests()
|
||||
|
||||
export -f run_tests
|
||||
|
||||
function timeout_with_logging() {
|
||||
local exit_code=0
|
||||
|
||||
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
|
||||
|
||||
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
|
||||
if [[ "${exit_code}" -eq "124" ]]
|
||||
then
|
||||
echo "The command 'timeout ${*}' has been killed by timeout."
|
||||
else
|
||||
echo "No, it isn't a timeout."
|
||||
fi
|
||||
|
||||
return $exit_code
|
||||
}
|
||||
|
||||
TIMEOUT=$((MAX_RUN_TIME - 700))
|
||||
timeout_with_logging "$TIMEOUT" bash -c run_tests ||:
|
||||
run_tests ||:
|
||||
|
||||
echo "Files in current directory"
|
||||
ls -la ./
|
||||
|
@ -65,7 +65,6 @@ ENV TZ=Europe/Amsterdam
|
||||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
ENV NUM_TRIES=1
|
||||
ENV MAX_RUN_TIME=0
|
||||
|
||||
# Unrelated to vars in setup_minio.sh, but should be the same there
|
||||
# to have the same binaries for local running scenario
|
||||
|
@ -12,9 +12,6 @@ dmesg --clear
|
||||
# fail on errors, verbose and export all env variables
|
||||
set -e -x -a
|
||||
|
||||
MAX_RUN_TIME=${MAX_RUN_TIME:-9000}
|
||||
MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 9000 : MAX_RUN_TIME))
|
||||
|
||||
USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0}
|
||||
USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0}
|
||||
|
||||
@ -54,8 +51,6 @@ source /utils.lib
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
./setup_minio.sh stateless
|
||||
./mc admin trace clickminio > /test_output/minio.log &
|
||||
MC_ADMIN_PID=$!
|
||||
|
||||
./setup_hdfs_minicluster.sh
|
||||
|
||||
@ -176,6 +171,55 @@ done
|
||||
setup_logs_replication
|
||||
attach_gdb_to_clickhouse
|
||||
|
||||
# create tables for minio log webhooks
|
||||
clickhouse-client --query "CREATE TABLE minio_audit_logs
|
||||
(
|
||||
log String,
|
||||
event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC')
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY tuple()"
|
||||
|
||||
clickhouse-client --query "CREATE TABLE minio_server_logs
|
||||
(
|
||||
log String,
|
||||
event_time DateTime64(9) MATERIALIZED parseDateTime64BestEffortOrZero(trim(BOTH '\"' FROM JSONExtractRaw(log, 'time')), 9, 'UTC')
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY tuple()"
|
||||
|
||||
# create minio log webhooks for both audit and server logs
|
||||
# use async inserts to avoid creating too many parts
|
||||
./mc admin config set clickminio logger_webhook:ch_server_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_server_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500
|
||||
./mc admin config set clickminio audit_webhook:ch_audit_webhook endpoint="http://localhost:8123/?async_insert=1&wait_for_async_insert=0&async_insert_busy_timeout_min_ms=5000&async_insert_busy_timeout_max_ms=5000&async_insert_max_query_number=1000&async_insert_max_data_size=10485760&query=INSERT%20INTO%20minio_audit_logs%20FORMAT%20LineAsString" queue_size=1000000 batch_size=500
|
||||
|
||||
max_retries=100
|
||||
retry=1
|
||||
while [ $retry -le $max_retries ]; do
|
||||
echo "clickminio restart attempt $retry:"
|
||||
|
||||
output=$(./mc admin service restart clickminio --wait --json 2>&1 | jq -r .status)
|
||||
echo "Output of restart status: $output"
|
||||
|
||||
expected_output="success
|
||||
success"
|
||||
if [ "$output" = "$expected_output" ]; then
|
||||
echo "Restarted clickminio successfully."
|
||||
break
|
||||
fi
|
||||
|
||||
sleep 1
|
||||
|
||||
retry=$((retry + 1))
|
||||
done
|
||||
|
||||
if [ $retry -gt $max_retries ]; then
|
||||
echo "Failed to restart clickminio after $max_retries attempts."
|
||||
fi
|
||||
|
||||
./mc admin trace clickminio > /test_output/minio.log &
|
||||
MC_ADMIN_PID=$!
|
||||
|
||||
function fn_exists() {
|
||||
declare -F "$1" > /dev/null;
|
||||
}
|
||||
@ -261,8 +305,6 @@ function run_tests()
|
||||
|
||||
try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')"
|
||||
|
||||
TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800))
|
||||
START_TIME=${SECONDS}
|
||||
set +e
|
||||
|
||||
TEST_ARGS=(
|
||||
@ -277,32 +319,22 @@ function run_tests()
|
||||
--test-runs "$NUM_TRIES"
|
||||
"${ADDITIONAL_OPTIONS[@]}"
|
||||
)
|
||||
timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s clickhouse-test "${TEST_ARGS[@]}" 2>&1 \
|
||||
clickhouse-test "${TEST_ARGS[@]}" 2>&1 \
|
||||
| ts '%Y-%m-%d %H:%M:%S' \
|
||||
| tee -a test_output/test_result.txt
|
||||
set -e
|
||||
DURATION=$((SECONDS - START_TIME))
|
||||
|
||||
echo "Elapsed ${DURATION} seconds."
|
||||
if [[ $DURATION -ge $TIMEOUT ]]
|
||||
then
|
||||
echo "It looks like the command is terminated by the timeout, which is ${TIMEOUT} seconds."
|
||||
fi
|
||||
}
|
||||
|
||||
export -f run_tests
|
||||
|
||||
|
||||
# This should be enough to setup job and collect artifacts
|
||||
TIMEOUT=$((MAX_RUN_TIME - 700))
|
||||
if [ "$NUM_TRIES" -gt "1" ]; then
|
||||
# We don't run tests with Ordinary database in PRs, only in master.
|
||||
# So run new/changed tests with Ordinary at least once in flaky check.
|
||||
timeout_with_logging "$TIMEOUT" bash -c 'NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests' \
|
||||
NUM_TRIES=1; USE_DATABASE_ORDINARY=1; run_tests \
|
||||
| sed 's/All tests have finished/Redacted: a message about tests finish is deleted/' | sed 's/No tests were run/Redacted: a message about no tests run is deleted/' ||:
|
||||
fi
|
||||
|
||||
timeout_with_logging "$TIMEOUT" bash -c run_tests ||:
|
||||
run_tests ||:
|
||||
|
||||
echo "Files in current directory"
|
||||
ls -la ./
|
||||
@ -339,6 +371,14 @@ do
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
# collect minio audit and server logs
|
||||
# wait for minio to flush its batch if it has any
|
||||
sleep 1
|
||||
clickhouse-client -q "SYSTEM FLUSH ASYNC INSERT QUEUE"
|
||||
clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_audit_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_audit_logs.jsonl.zst' FORMAT JSONEachRow"
|
||||
clickhouse-client --max_block_size 8192 --max_memory_usage 10G --max_threads 1 --max_result_bytes 0 --max_result_rows 0 --max_rows_to_read 0 --max_bytes_to_read 0 -q "SELECT log FROM minio_server_logs ORDER BY event_time INTO OUTFILE '/test_output/minio_server_logs.jsonl.zst' FORMAT JSONEachRow"
|
||||
|
||||
# Stop server so we can safely read data with clickhouse-local.
|
||||
# Why do we read data with clickhouse-local?
|
||||
# Because it's the simplest way to read it when server has crashed.
|
||||
|
@ -99,10 +99,9 @@ upload_data() {
|
||||
# iterating over globs will cause redundant file variable to be
|
||||
# a path to a file, not a filename
|
||||
# shellcheck disable=SC2045
|
||||
for file in $(ls "${data_path}"); do
|
||||
echo "${file}";
|
||||
./mc cp "${data_path}"/"${file}" clickminio/test/"${file}";
|
||||
done
|
||||
if [ -d "${data_path}" ]; then
|
||||
./mc cp --recursive "${data_path}"/ clickminio/test/
|
||||
fi
|
||||
}
|
||||
|
||||
setup_aws_credentials() {
|
||||
|
@ -40,22 +40,6 @@ function fn_exists() {
|
||||
declare -F "$1" > /dev/null;
|
||||
}
|
||||
|
||||
function timeout_with_logging() {
|
||||
local exit_code=0
|
||||
|
||||
timeout -s TERM --preserve-status "${@}" || exit_code="${?}"
|
||||
|
||||
echo "Checking if it is a timeout. The code 124 will indicate a timeout."
|
||||
if [[ "${exit_code}" -eq "124" ]]
|
||||
then
|
||||
echo "The command 'timeout ${*}' has been killed by timeout."
|
||||
else
|
||||
echo "No, it isn't a timeout."
|
||||
fi
|
||||
|
||||
return $exit_code
|
||||
}
|
||||
|
||||
function collect_core_dumps()
|
||||
{
|
||||
find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do
|
||||
|
@ -129,6 +129,7 @@ configure
|
||||
|
||||
# Check that all new/changed setting were added in settings changes history.
|
||||
# Some settings can be different for builds with sanitizers, so we check
|
||||
# Also the automatic value of 'max_threads' and similar was displayed as "'auto(...)'" in previous versions instead of "auto(...)".
|
||||
# settings changes only for non-sanitizer builds.
|
||||
IS_SANITIZED=$(clickhouse-local --query "SELECT value LIKE '%-fsanitize=%' FROM system.build_options WHERE name = 'CXX_FLAGS'")
|
||||
if [ "${IS_SANITIZED}" -eq "0" ]
|
||||
@ -145,7 +146,9 @@ then
|
||||
old_settings.value AS old_value
|
||||
FROM new_settings
|
||||
LEFT JOIN old_settings ON new_settings.name = old_settings.name
|
||||
WHERE (new_settings.value != old_settings.value) AND (name NOT IN (
|
||||
WHERE (new_value != old_value)
|
||||
AND NOT (startsWith(new_value, 'auto(') AND old_value LIKE '%auto(%')
|
||||
AND (name NOT IN (
|
||||
SELECT arrayJoin(tupleElement(changes, 'name'))
|
||||
FROM
|
||||
(
|
||||
@ -177,7 +180,7 @@ then
|
||||
if [ -s changed_settings.txt ]
|
||||
then
|
||||
mv changed_settings.txt /test_output/
|
||||
echo -e "Changed settings are not reflected in settings changes history (see changed_settings.txt)$FAIL$(head_escaped /test_output/changed_settings.txt)" >> /test_output/test_results.tsv
|
||||
echo -e "Changed settings are not reflected in the settings changes history (see changed_settings.txt)$FAIL$(head_escaped /test_output/changed_settings.txt)" >> /test_output/test_results.tsv
|
||||
else
|
||||
echo -e "There are no changed settings or they are reflected in settings changes history$OK" >> /test_output/test_results.tsv
|
||||
fi
|
||||
|
29
docs/changelogs/v24.3.7.30-lts.md
Normal file
29
docs/changelogs/v24.3.7.30-lts.md
Normal file
@ -0,0 +1,29 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.3.7.30-lts (c8a28cf4331) FIXME as compared to v24.3.6.48-lts (b2d33c3c45d)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#68103](https://github.com/ClickHouse/ClickHouse/issues/68103): Distinguish booleans and integers while parsing values for custom settings: ``` SET custom_a = true; SET custom_b = 1; ```. [#62206](https://github.com/ClickHouse/ClickHouse/pull/62206) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
* Backported in [#67931](https://github.com/ClickHouse/ClickHouse/issues/67931): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Backported in [#68062](https://github.com/ClickHouse/ClickHouse/issues/68062): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
|
||||
* Backported in [#67812](https://github.com/ClickHouse/ClickHouse/issues/67812): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#67848](https://github.com/ClickHouse/ClickHouse/issues/67848): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* Backported in [#68271](https://github.com/ClickHouse/ClickHouse/issues/68271): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Backported in [#67806](https://github.com/ClickHouse/ClickHouse/issues/67806): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#67834](https://github.com/ClickHouse/ClickHouse/issues/67834): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#68206](https://github.com/ClickHouse/ClickHouse/issues/68206): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
|
||||
* Backported in [#68089](https://github.com/ClickHouse/ClickHouse/issues/68089): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Backported in [#68120](https://github.com/ClickHouse/ClickHouse/issues/68120): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Update version after release. [#67676](https://github.com/ClickHouse/ClickHouse/pull/67676) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Backported in [#68074](https://github.com/ClickHouse/ClickHouse/issues/68074): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
|
||||
|
@ -14,7 +14,7 @@ Each functional test sends one or multiple queries to the running ClickHouse ser
|
||||
|
||||
Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from ClickHouse and it is available to general public.
|
||||
|
||||
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
|
||||
Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`.
|
||||
|
||||
:::note
|
||||
A common mistake when testing data types `DateTime` and `DateTime64` is assuming that the server uses a specific time zone (e.g. "UTC"). This is not the case, time zones in CI test runs
|
||||
@ -38,7 +38,7 @@ For more options, see `tests/clickhouse-test --help`. You can simply run all tes
|
||||
|
||||
### Adding a New Test
|
||||
|
||||
To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client --multiquery < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
|
||||
To add new test, create a `.sql` or `.sh` file in `queries/0_stateless` directory, check it manually and then generate `.reference` file in the following way: `clickhouse-client < 00000_test.sql > 00000_test.reference` or `./00000_test.sh > ./00000_test.reference`.
|
||||
|
||||
Tests should use (create, drop, etc) only tables in `test` database that is assumed to be created beforehand; also tests can use temporary tables.
|
||||
|
||||
|
@ -17,85 +17,121 @@ In terms of SQL, the nearest neighborhood problem can be expressed as follows:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
FROM table
|
||||
ORDER BY Distance(vectors, Point)
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
`vectors` contains N-dimensional values of type [Array(Float32)](../../../sql-reference/data-types/array.md), for example embeddings.
|
||||
Function `Distance` computes the distance between two vectors. Often, the Euclidean (L2) distance is chosen as distance function but [other
|
||||
distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point, e.g. `(0.17,
|
||||
0.33, ...)`, and `N` limits the number of search results.
|
||||
`vectors` contains N-dimensional values of type [Array(Float32)](../../../sql-reference/data-types/array.md) or Array(Float64), for example
|
||||
embeddings. Function `Distance` computes the distance between two vectors. Often, the Euclidean (L2) distance is chosen as distance function
|
||||
but [other distance functions](/docs/en/sql-reference/functions/distance-functions.md) are also possible. `Point` is the reference point,
|
||||
e.g. `(0.17, 0.33, ...)`, and `N` limits the number of search results.
|
||||
|
||||
An alternative formulation of the nearest neighborhood search problem looks as follows:
|
||||
This query returns the top-`N` closest points to the reference point. Parameter `N` limits the number of returned values which is useful for
|
||||
situations where `MaxDistance` is difficult to determine in advance.
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
WHERE Distance(vectors, Point) < MaxDistance
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
While the first query returns the top-`N` closest points to the reference point, the second query returns all points closer to the reference
|
||||
point than a maximally allowed radius `MaxDistance`. Parameter `N` limits the number of returned values which is useful for situations where
|
||||
`MaxDistance` is difficult to determine in advance.
|
||||
|
||||
With brute force search, both queries are expensive (linear in the number of points) because the distance between all points in `vectors` and
|
||||
With brute force search, the query is expensive (linear in the number of points) because the distance between all points in `vectors` and
|
||||
`Point` must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation
|
||||
of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer much quicker (in sub-linear time).
|
||||
|
||||
# Creating and Using ANN Indexes {#creating_using_ann_indexes}
|
||||
# Creating and Using Vector Similarity Indexes
|
||||
|
||||
Syntax to create an ANN index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
|
||||
Syntax to create a vector similarity index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_ann_index
|
||||
CREATE TABLE table
|
||||
(
|
||||
`id` Int64,
|
||||
`vectors` Array(Float32),
|
||||
INDEX [ann_index_name vectors TYPE [ann_index_type]([ann_index_parameters]) [GRANULARITY [N]]
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX index_name vectors TYPE vector_similarity(method, distance_function[, quantization, connectivity, expansion_add, expansion_search]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Parameters:
|
||||
- `method`: Supports currently only `hnsw`.
|
||||
- `distance_function`: either `L2Distance` (the [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) - the length of a
|
||||
line between two points in Euclidean space), or `cosineDistance` (the [cosine
|
||||
distance](https://en.wikipedia.org/wiki/Cosine_similarity#Cosine_distance)- the angle between two non-zero vectors).
|
||||
- `quantization`: either `f32`, `f16`, or `i8` for storing the vector with reduced precision (optional, default: `f32`)
|
||||
- `m`: the number of neighbors per graph node (optional, default: 16)
|
||||
- `ef_construction`: (optional, default: 128)
|
||||
- `ef_search`: (optional, default: 64)
|
||||
|
||||
Value 0 for parameters `m`, `ef_construction`, and `ef_search` refers to the default value.
|
||||
|
||||
Example:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX idx vectors TYPE vector_similarity('hnsw', 'L2Distance') -- Alternative syntax: TYPE vector_similarity(hnsw, L2Distance)
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Vector similarity indexes are based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
|
||||
algorithm](https://arxiv.org/abs/1603.09320), i.e., a hierarchical graph where each point represents a vector and the edges represent
|
||||
similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
|
||||
overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors, that are expensive
|
||||
to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further distance computations on modern
|
||||
Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient navigation around immutable persistent
|
||||
files, without loading them into RAM.
|
||||
|
||||
USearch indexes are currently experimental, to use them you first need to `SET allow_experimental_vector_similarity_index = 1`.
|
||||
|
||||
Vector similarity indexes currently support two distance functions:
|
||||
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
|
||||
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
Vector similarity indexes allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`.
|
||||
If no scalar kind was specified during index creation, `f16` is used as default.
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
|
||||
:::note
|
||||
All arrays must have same length. To avoid errors, you can use a
|
||||
[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK
|
||||
length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported.
|
||||
:::
|
||||
|
||||
:::note
|
||||
The vector similarity index currently does not work with per-table, non-default `index_granularity` settings (see
|
||||
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
|
||||
:::
|
||||
|
||||
ANN indexes are built during column insertion and merge. As a result, `INSERT` and `OPTIMIZE` statements will be slower than for ordinary
|
||||
tables. ANNIndexes are ideally used only with immutable or rarely changed data, respectively when are far more read requests than write
|
||||
requests.
|
||||
|
||||
ANN indexes support two types of queries:
|
||||
|
||||
- ORDER BY queries:
|
||||
ANN indexes support these queries:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
FROM table
|
||||
[WHERE ...]
|
||||
ORDER BY Distance(vectors, Point)
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
- WHERE queries:
|
||||
|
||||
``` sql
|
||||
SELECT *
|
||||
FROM table_with_ann_index
|
||||
WHERE Distance(vectors, Point) < MaxDistance
|
||||
LIMIT N
|
||||
```
|
||||
|
||||
:::tip
|
||||
To avoid writing out large vectors, you can use [query
|
||||
parameters](/docs/en/interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g.
|
||||
|
||||
```bash
|
||||
clickhouse-client --param_vec='hello' --query="SELECT * FROM table_with_ann_index WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0"
|
||||
clickhouse-client --param_vec='hello' --query="SELECT * FROM table WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0"
|
||||
```
|
||||
:::
|
||||
|
||||
**Restrictions**: Queries that contain both a `WHERE Distance(vectors, Point) < MaxDistance` and an `ORDER BY Distance(vectors, Point)`
|
||||
clause cannot use ANN indexes. Also, the approximate algorithms used to determine the nearest neighbors require a limit, hence queries
|
||||
without `LIMIT` clause cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting
|
||||
**Restrictions**: Approximate algorithms used to determine the nearest neighbors require a limit, hence queries without `LIMIT` clause
|
||||
cannot utilize ANN indexes. Also, ANN indexes are only used if the query has a `LIMIT` value smaller than setting
|
||||
`max_limit_for_ann_queries` (default: 1 million rows). This is a safeguard to prevent large memory allocations by external libraries for
|
||||
approximate neighbor search.
|
||||
|
||||
@ -122,128 +158,3 @@ brute-force distance calculation over all rows of the granules. With a small `GR
|
||||
equally good, only the processing performance differs. It is generally recommended to use a large `GRANULARITY` for ANN indexes and fall
|
||||
back to a smaller `GRANULARITY` values only in case of problems like excessive memory consumption of the ANN structures. If no `GRANULARITY`
|
||||
was specified for ANN indexes, the default value is 100 million.
|
||||
|
||||
|
||||
# Available ANN Indexes {#available_ann_indexes}
|
||||
|
||||
- [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
|
||||
|
||||
- [USearch](/docs/en/engines/table-engines/mergetree-family/annindexes.md#usearch-usearch)
|
||||
|
||||
## Annoy {#annoy}
|
||||
|
||||
Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently
|
||||
disabled on ARM due to memory safety problems with the algorithm.
|
||||
|
||||
This type of ANN index is based on the [Annoy library](https://github.com/spotify/annoy) which recursively divides the space into random
|
||||
linear surfaces (lines in 2D, planes in 3D etc.).
|
||||
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/embed/QkCCyLW0ehU"
|
||||
width="640"
|
||||
height="360"
|
||||
frameborder="0"
|
||||
allow="autoplay;
|
||||
fullscreen;
|
||||
picture-in-picture"
|
||||
allowfullscreen>
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
Syntax to create an Annoy index over an [Array(Float32)](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_annoy_index
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX [ann_index_name] vectors TYPE annoy([Distance[, NumTrees]]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
Annoy currently supports two distance functions:
|
||||
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
|
||||
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
|
||||
Parameter `NumTrees` is the number of trees which the algorithm creates (default if not specified: 100). Higher values of `NumTree` mean
|
||||
more accurate search results but slower index creation / query times (approximately linearly) as well as larger index sizes.
|
||||
|
||||
:::note
|
||||
All arrays must have same length. To avoid errors, you can use a
|
||||
[CONSTRAINT](/docs/en/sql-reference/statements/create/table.md#constraints), for example, `CONSTRAINT constraint_name_1 CHECK
|
||||
length(vectors) = 256`. Also, empty `Arrays` and unspecified `Array` values in INSERT statements (i.e. default values) are not supported.
|
||||
:::
|
||||
|
||||
The creation of Annoy indexes (whenever a new part is build, e.g. at the end of a merge) is a relatively slow process. You can increase
|
||||
setting `max_threads_for_annoy_index_creation` (default: 4) which controls how many threads are used to create an Annoy index. Please be
|
||||
careful with this setting, it is possible that multiple indexes are created in parallel in which case there can be overparallelization.
|
||||
|
||||
Setting `annoy_index_search_k_nodes` (default: `NumTrees * LIMIT`) determines how many tree nodes are inspected during SELECTs. Larger
|
||||
values mean more accurate results at the cost of longer query runtime:
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM table_name
|
||||
ORDER BY L2Distance(vectors, Point)
|
||||
LIMIT N
|
||||
SETTINGS annoy_index_search_k_nodes=100;
|
||||
```
|
||||
|
||||
:::note
|
||||
The Annoy index currently does not work with per-table, non-default `index_granularity` settings (see
|
||||
[here](https://github.com/ClickHouse/ClickHouse/pull/51325#issuecomment-1605920475)). If necessary, the value must be changed in config.xml.
|
||||
:::
|
||||
|
||||
## USearch {#usearch}
|
||||
|
||||
This type of ANN index is based on the [USearch library](https://github.com/unum-cloud/usearch), which implements the [HNSW
|
||||
algorithm](https://arxiv.org/abs/1603.09320), i.e., builds a hierarchical graph where each point represents a vector and the edges represent
|
||||
similarity. Such hierarchical structures can be very efficient on large collections. They may often fetch 0.05% or less data from the
|
||||
overall dataset, while still providing 99% recall. This is especially useful when working with high-dimensional vectors,
|
||||
that are expensive to load and compare. The library also has several hardware-specific SIMD optimizations to accelerate further
|
||||
distance computations on modern Arm (NEON and SVE) and x86 (AVX2 and AVX-512) CPUs and OS-specific optimizations to allow efficient
|
||||
navigation around immutable persistent files, without loading them into RAM.
|
||||
|
||||
<div class='vimeo-container'>
|
||||
<iframe src="//www.youtube.com/embed/UMrhB3icP9w"
|
||||
width="640"
|
||||
height="360"
|
||||
frameborder="0"
|
||||
allow="autoplay;
|
||||
fullscreen;
|
||||
picture-in-picture"
|
||||
allowfullscreen>
|
||||
</iframe>
|
||||
</div>
|
||||
|
||||
Syntax to create an USearch index over an [Array](../../../sql-reference/data-types/array.md) column:
|
||||
|
||||
```sql
|
||||
CREATE TABLE table_with_usearch_index
|
||||
(
|
||||
id Int64,
|
||||
vectors Array(Float32),
|
||||
INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N]
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
```
|
||||
|
||||
USearch currently supports two distance functions:
|
||||
- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)).
|
||||
- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors
|
||||
([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)).
|
||||
|
||||
USearch allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`. If no scalar kind
|
||||
was specified during index creation, `f16` is used as default.
|
||||
|
||||
For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no
|
||||
distance function was specified during index creation, `L2Distance` is used as default.
|
||||
|
@ -359,13 +359,14 @@ DESC format(JSONEachRow, '{"int" : 42, "float" : 42.42, "string" : "Hello, World
|
||||
Dates, DateTimes:
|
||||
|
||||
```sql
|
||||
DESC format(JSONEachRow, '{"date" : "2022-01-01", "datetime" : "2022-01-01 00:00:00"}')
|
||||
DESC format(JSONEachRow, '{"date" : "2022-01-01", "datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}')
|
||||
```
|
||||
```response
|
||||
┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ date │ Nullable(Date) │ │ │ │ │ │
|
||||
│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ date │ Nullable(Date) │ │ │ │ │ │
|
||||
│ datetime │ Nullable(DateTime) │ │ │ │ │ │
|
||||
│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
Arrays:
|
||||
@ -759,12 +760,13 @@ DESC format(CSV, 'Hello world!,World hello!')
|
||||
Dates, DateTimes:
|
||||
|
||||
```sql
|
||||
DESC format(CSV, '"2020-01-01","2020-01-01 00:00:00"')
|
||||
DESC format(CSV, '"2020-01-01","2020-01-01 00:00:00","2022-01-01 00:00:00.000"')
|
||||
```
|
||||
```response
|
||||
┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ c1 │ Nullable(Date) │ │ │ │ │ │
|
||||
│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
│ c2 │ Nullable(DateTime) │ │ │ │ │ │
|
||||
│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
@ -956,12 +958,13 @@ DESC format(TSKV, 'int=42 float=42.42 bool=true string=Hello,World!\n')
|
||||
Dates, DateTimes:
|
||||
|
||||
```sql
|
||||
DESC format(TSV, '2020-01-01 2020-01-01 00:00:00')
|
||||
DESC format(TSV, '2020-01-01 2020-01-01 00:00:00 2022-01-01 00:00:00.000')
|
||||
```
|
||||
```response
|
||||
┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ c1 │ Nullable(Date) │ │ │ │ │ │
|
||||
│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
│ c2 │ Nullable(DateTime) │ │ │ │ │ │
|
||||
│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
@ -1126,12 +1129,13 @@ DESC format(Values, $$(42, 42.42, true, 'Hello,World!')$$)
|
||||
Dates, DateTimes:
|
||||
|
||||
```sql
|
||||
DESC format(Values, $$('2020-01-01', '2020-01-01 00:00:00')$$)
|
||||
```
|
||||
DESC format(Values, $$('2020-01-01', '2020-01-01 00:00:00', '2022-01-01 00:00:00.000')$$)
|
||||
```
|
||||
```response
|
||||
┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ c1 │ Nullable(Date) │ │ │ │ │ │
|
||||
│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
│ c2 │ Nullable(DateTime) │ │ │ │ │ │
|
||||
│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
@ -1504,8 +1508,8 @@ DESC format(JSONEachRow, $$
|
||||
|
||||
#### input_format_try_infer_datetimes
|
||||
|
||||
If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats.
|
||||
If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`,
|
||||
If enabled, ClickHouse will try to infer type `DateTime` or `DateTime64` from string fields in schema inference for text formats.
|
||||
If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime` or `DateTime64(9)` (if any datetime had fractional part),
|
||||
if at least one field was not parsed as datetime, the result type will be `String`.
|
||||
|
||||
Enabled by default.
|
||||
@ -1513,39 +1517,66 @@ Enabled by default.
|
||||
**Examples**
|
||||
|
||||
```sql
|
||||
SET input_format_try_infer_datetimes = 0
|
||||
SET input_format_try_infer_datetimes = 0;
|
||||
DESC format(JSONEachRow, $$
|
||||
{"datetime" : "2021-01-01 00:00:00.000"}
|
||||
{"datetime" : "2022-01-01 00:00:00.000"}
|
||||
{"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"}
|
||||
{"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}
|
||||
$$)
|
||||
```
|
||||
```response
|
||||
┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ datetime │ Nullable(String) │ │ │ │ │ │
|
||||
└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
┌─name───────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ datetime │ Nullable(String) │ │ │ │ │ │
|
||||
│ datetime64 │ Nullable(String) │ │ │ │ │ │
|
||||
└────────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
```sql
|
||||
SET input_format_try_infer_datetimes = 1
|
||||
SET input_format_try_infer_datetimes = 1;
|
||||
DESC format(JSONEachRow, $$
|
||||
{"datetime" : "2021-01-01 00:00:00.000"}
|
||||
{"datetime" : "2022-01-01 00:00:00.000"}
|
||||
{"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"}
|
||||
{"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}
|
||||
$$)
|
||||
```
|
||||
```response
|
||||
┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ datetime │ Nullable(DateTime) │ │ │ │ │ │
|
||||
│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
```sql
|
||||
DESC format(JSONEachRow, $$
|
||||
{"datetime" : "2021-01-01 00:00:00.000"}
|
||||
{"datetime" : "unknown"}
|
||||
{"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"}
|
||||
{"datetime" : "unknown", "datetime64" : "unknown"}
|
||||
$$)
|
||||
```
|
||||
```response
|
||||
┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ datetime │ Nullable(String) │ │ │ │ │ │
|
||||
└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
┌─name───────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ datetime │ Nullable(String) │ │ │ │ │ │
|
||||
│ datetime64 │ Nullable(String) │ │ │ │ │ │
|
||||
└────────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
#### input_format_try_infer_datetimes_only_datetime64
|
||||
|
||||
If enabled, ClickHouse will always infer `DateTime64(9)` when `input_format_try_infer_datetimes` is enabled even if datetime values don't contain fractional part.
|
||||
|
||||
Disabled by default.
|
||||
|
||||
**Examples**
|
||||
|
||||
```sql
|
||||
SET input_format_try_infer_datetimes = 1;
|
||||
SET input_format_try_infer_datetimes_only_datetime64 = 1;
|
||||
DESC format(JSONEachRow, $$
|
||||
{"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"}
|
||||
{"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}
|
||||
$$)
|
||||
```
|
||||
|
||||
```text
|
||||
┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │
|
||||
└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format)
|
||||
|
26
docs/en/interfaces/third-party/gui.md
vendored
26
docs/en/interfaces/third-party/gui.md
vendored
@ -10,7 +10,7 @@ sidebar_label: Visual Interfaces
|
||||
|
||||
### ch-ui {#ch-ui}
|
||||
|
||||
[ch-ui](https://github.com/caioricciuti/ch-ui) is a simple React.js app interface for ClickHouse databases, designed for executing queries and visualizing data. Built with React and the ClickHouse client for web, it offers a sleek and user-friendly UI for easy database interactions.
|
||||
[ch-ui](https://github.com/caioricciuti/ch-ui) is a simple React.js app interface for ClickHouse databases designed for executing queries and visualizing data. Built with React and the ClickHouse client for web, it offers a sleek and user-friendly UI for easy database interactions.
|
||||
|
||||
Features:
|
||||
|
||||
@ -25,7 +25,7 @@ Web interface for ClickHouse in the [Tabix](https://github.com/tabixio/tabix) pr
|
||||
|
||||
Features:
|
||||
|
||||
- Works with ClickHouse directly from the browser, without the need to install additional software.
|
||||
- Works with ClickHouse directly from the browser without the need to install additional software.
|
||||
- Query editor with syntax highlighting.
|
||||
- Auto-completion of commands.
|
||||
- Tools for graphical analysis of query execution.
|
||||
@ -63,7 +63,7 @@ Features:
|
||||
|
||||
- Table list with filtering and metadata.
|
||||
- Table preview with filtering and sorting.
|
||||
- Read-only queries execution.
|
||||
- Read-only query execution.
|
||||
|
||||
### Redash {#redash}
|
||||
|
||||
@ -75,23 +75,23 @@ Features:
|
||||
|
||||
- Powerful editor of queries.
|
||||
- Database explorer.
|
||||
- Visualization tools, that allow you to represent data in different forms.
|
||||
- Visualization tool that allows you to represent data in different forms.
|
||||
|
||||
### Grafana {#grafana}
|
||||
|
||||
[Grafana](https://grafana.com/grafana/plugins/grafana-clickhouse-datasource/) is a platform for monitoring and visualization.
|
||||
|
||||
"Grafana allows you to query, visualize, alert on and understand your metrics no matter where they are stored. Create, explore, and share dashboards with your team and foster a data driven culture. Trusted and loved by the community" — grafana.com.
|
||||
"Grafana allows you to query, visualize, alert on and understand your metrics no matter where they are stored. Create, explore, and share dashboards with your team and foster a data-driven culture. Trusted and loved by the community" — grafana.com.
|
||||
|
||||
ClickHouse datasource plugin provides a support for ClickHouse as a backend database.
|
||||
ClickHouse data source plugin provides support for ClickHouse as a backend database.
|
||||
|
||||
### qryn (#qryn)
|
||||
### qryn {#qryn}
|
||||
|
||||
[qryn](https://metrico.in) is a polyglot, high-performance observability stack for ClickHouse _(formerly cLoki)_ with native Grafana integrations allowing users to ingest and analyze logs, metrics and telemetry traces from any agent supporting Loki/LogQL, Prometheus/PromQL, OTLP/Tempo, Elastic, InfluxDB and many more.
|
||||
|
||||
Features:
|
||||
|
||||
- Built in Explore UI and LogQL CLI for querying, extracting and visualizing data
|
||||
- Built-in Explore UI and LogQL CLI for querying, extracting and visualizing data
|
||||
- Native Grafana APIs support for querying, processing, ingesting, tracing and alerting without plugins
|
||||
- Powerful pipeline to dynamically search, filter and extract data from logs, events, traces and beyond
|
||||
- Ingestion and PUSH APIs transparently compatible with LogQL, PromQL, InfluxDB, Elastic and many more
|
||||
@ -139,7 +139,7 @@ Features:
|
||||
|
||||
### DBM {#dbm}
|
||||
|
||||
[DBM](https://dbm.incubator.edurt.io/) DBM is a visual management tool for ClickHouse!
|
||||
[DBM](https://github.com/devlive-community/dbm) DBM is a visual management tool for ClickHouse!
|
||||
|
||||
Features:
|
||||
|
||||
@ -151,7 +151,7 @@ Features:
|
||||
- Support custom query
|
||||
- Support multiple data sources management(connection test, monitoring)
|
||||
- Support monitor (processor, connection, query)
|
||||
- Support migrate data
|
||||
- Support migrating data
|
||||
|
||||
### Bytebase {#bytebase}
|
||||
|
||||
@ -169,7 +169,7 @@ Features:
|
||||
|
||||
### Zeppelin-Interpreter-for-ClickHouse {#zeppelin-interpreter-for-clickhouse}
|
||||
|
||||
[Zeppelin-Interpreter-for-ClickHouse](https://github.com/SiderZhang/Zeppelin-Interpreter-for-ClickHouse) is a [Zeppelin](https://zeppelin.apache.org) interpreter for ClickHouse. Compared with JDBC interpreter, it can provide better timeout control for long running queries.
|
||||
[Zeppelin-Interpreter-for-ClickHouse](https://github.com/SiderZhang/Zeppelin-Interpreter-for-ClickHouse) is a [Zeppelin](https://zeppelin.apache.org) interpreter for ClickHouse. Compared with the JDBC interpreter, it can provide better timeout control for long-running queries.
|
||||
|
||||
### ClickCat {#clickcat}
|
||||
|
||||
@ -179,7 +179,7 @@ Features:
|
||||
|
||||
- An online SQL editor which can run your SQL code without any installing.
|
||||
- You can observe all processes and mutations. For those unfinished processes, you can kill them in ui.
|
||||
- The Metrics contains Cluster Analysis,Data Analysis,Query Analysis.
|
||||
- The Metrics contain Cluster Analysis, Data Analysis, and Query Analysis.
|
||||
|
||||
### ClickVisual {#clickvisual}
|
||||
|
||||
@ -332,7 +332,7 @@ Learn more about the product at [TABLUM.IO](https://tablum.io/)
|
||||
|
||||
### CKMAN {#ckman}
|
||||
|
||||
[CKMAN] (https://www.github.com/housepower/ckman) is a tool for managing and monitoring ClickHouse clusters!
|
||||
[CKMAN](https://www.github.com/housepower/ckman) is a tool for managing and monitoring ClickHouse clusters!
|
||||
|
||||
Features:
|
||||
|
||||
|
@ -307,8 +307,22 @@ SELECT dictGet('dict', 'B', 2);
|
||||
|
||||
## Named collections for accessing PostgreSQL database
|
||||
|
||||
The description of parameters see [postgresql](../sql-reference/table-functions/postgresql.md).
|
||||
The description of parameters see [postgresql](../sql-reference/table-functions/postgresql.md). Additionally, there are aliases:
|
||||
|
||||
- `username` for `user`
|
||||
- `db` for `database`.
|
||||
|
||||
Parameter `addresses_expr` is used in a collection instead of `host:port`. The parameter is optional, because there are other optional ones: `host`, `hostname`, `port`. The following pseudo code explains the priority:
|
||||
|
||||
```sql
|
||||
CASE
|
||||
WHEN collection['addresses_expr'] != '' THEN collection['addresses_expr']
|
||||
WHEN collection['host'] != '' THEN collection['host'] || ':' || if(collection['port'] != '', collection['port'], '5432')
|
||||
WHEN collection['hostname'] != '' THEN collection['hostname'] || ':' || if(collection['port'] != '', collection['port'], '5432')
|
||||
END
|
||||
```
|
||||
|
||||
Example of creation:
|
||||
```sql
|
||||
CREATE NAMED COLLECTION mypg AS
|
||||
user = 'pguser',
|
||||
@ -316,7 +330,7 @@ password = 'jw8s0F4',
|
||||
host = '127.0.0.1',
|
||||
port = 5432,
|
||||
database = 'test',
|
||||
schema = 'test_schema',
|
||||
schema = 'test_schema'
|
||||
```
|
||||
|
||||
Example of configuration:
|
||||
@ -369,6 +383,10 @@ SELECT * FROM mypgtable;
|
||||
└───┘
|
||||
```
|
||||
|
||||
:::note
|
||||
PostgreSQL copies data from the named collection when the table is being created. A change in the collection does not affect the existing tables.
|
||||
:::
|
||||
|
||||
### Example of using named collections with database with engine PostgreSQL
|
||||
|
||||
```sql
|
||||
|
@ -143,6 +143,18 @@ value can be specified at session, profile or query level using setting [query_c
|
||||
Entries in the query cache are compressed by default. This reduces the overall memory consumption at the cost of slower writes into / reads
|
||||
from the query cache. To disable compression, use setting [query_cache_compress_entries](settings/settings.md#query-cache-compress-entries).
|
||||
|
||||
Sometimes it is useful to keep multiple results for the same query cached. This can be achieved using setting
|
||||
[query_cache_tag](settings/settings.md#query-cache-tag) that acts as as a label (or namespace) for a query cache entries. The query cache
|
||||
considers results of the same query with different tags different.
|
||||
|
||||
Example for creating three different query cache entries for the same query:
|
||||
|
||||
```sql
|
||||
SELECT 1 SETTINGS use_query_cache = true; -- query_cache_tag is implicitly '' (empty string)
|
||||
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 1';
|
||||
SELECT 1 SETTINGS use_query_cache = true, query_cache_tag = 'tag 2';
|
||||
```
|
||||
|
||||
ClickHouse reads table data in blocks of [max_block_size](settings/settings.md#setting-max_block_size) rows. Due to filtering, aggregation,
|
||||
etc., result blocks are typically much smaller than 'max_block_size' but there are also cases where they are much bigger. Setting
|
||||
[query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results) (enabled by default) controls if result blocks
|
||||
|
@ -1400,6 +1400,16 @@ The number of seconds that ClickHouse waits for incoming requests before closing
|
||||
<keep_alive_timeout>10</keep_alive_timeout>
|
||||
```
|
||||
|
||||
## max_keep_alive_requests {#max-keep-alive-requests}
|
||||
|
||||
Maximal number of requests through a single keep-alive connection until it will be closed by ClickHouse server. Default to 10000.
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<max_keep_alive_requests>10</max_keep_alive_requests>
|
||||
```
|
||||
|
||||
## listen_host {#listen_host}
|
||||
|
||||
Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`.
|
||||
|
@ -1041,3 +1041,27 @@ Compression rates of LZ4 or ZSTD improve on average by 20-40%.
|
||||
|
||||
This setting works best for tables with no primary key or a low-cardinality primary key, i.e. a table with only few distinct primary key values.
|
||||
High-cardinality primary keys, e.g. involving timestamp columns of type `DateTime64`, are not expected to benefit from this setting.
|
||||
|
||||
## lightweight_mutation_projection_mode
|
||||
|
||||
By default, lightweight delete `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation. So the default value would be `throw`.
|
||||
However, this option can change the behavior. With the value either `drop` or `rebuild`, deletes will work with projections. `drop` would delete the projection so it might be fast in the current query as projection gets deleted but slow in future queries as no projection attached.
|
||||
`rebuild` would rebuild the projection which might affect the performance of the current query, but might speedup for future queries. A good thing is that these options would only work in the part level,
|
||||
which means projections in the part that don't get touched would stay intact instead of triggering any action like drop or rebuild.
|
||||
|
||||
Possible values:
|
||||
|
||||
- throw, drop, rebuild
|
||||
|
||||
Default value: throw
|
||||
|
||||
## deduplicate_merge_projection_mode
|
||||
|
||||
Whether to allow create projection for the table with non-classic MergeTree, that is not (Replicated, Shared) MergeTree. If allowed, what is the action when merge projections, either drop or rebuild. So classic MergeTree would ignore this setting.
|
||||
It also controls `OPTIMIZE DEDUPLICATE` as well, but has effect on all MergeTree family members. Similar to the option `lightweight_mutation_projection_mode`, it is also part level.
|
||||
|
||||
Possible values:
|
||||
|
||||
- throw, drop, rebuild
|
||||
|
||||
Default value: throw
|
@ -1381,7 +1381,7 @@ Default value: `2`.
|
||||
|
||||
Close connection before returning connection to the pool.
|
||||
|
||||
Default value: true.
|
||||
Default value: false.
|
||||
|
||||
## odbc_bridge_connection_pool_size {#odbc-bridge-connection-pool-size}
|
||||
|
||||
@ -1800,6 +1800,17 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## query_cache_tag {#query-cache-tag}
|
||||
|
||||
A string which acts as a label for [query cache](../query-cache.md) entries.
|
||||
The same queries with different tags are considered different by the query cache.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Any string
|
||||
|
||||
Default value: `''`
|
||||
|
||||
## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes}
|
||||
|
||||
The maximum amount of memory (in bytes) the current user may allocate in the [query cache](../query-cache.md). 0 means unlimited.
|
||||
@ -5609,6 +5620,19 @@ Minimal size of block to compress in CROSS JOIN. Zero value means - disable this
|
||||
|
||||
Default value: `1GiB`.
|
||||
|
||||
## use_json_alias_for_old_object_type
|
||||
|
||||
When enabled, `JSON` data type alias will be used to create an old [Object('json')](../../sql-reference/data-types/json.md) type instead of the new [JSON](../../sql-reference/data-types/newjson.md) type.
|
||||
This setting requires server restart to take effect when changed.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## type_json_skip_duplicated_paths
|
||||
|
||||
When enabled, ClickHouse will skip duplicated paths during parsing of [JSON](../../sql-reference/data-types/newjson.md) object. Only the value of the first occurrence of each path will be inserted.
|
||||
|
||||
Default value: `false`
|
||||
|
||||
## restore_replace_external_engines_to_null
|
||||
|
||||
For testing purposes. Replaces all external engines to Null to not initiate external connections.
|
||||
@ -5627,6 +5651,12 @@ Disable all insert and mutations (alter table update / alter table delete / alte
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## use_hive_partitioning
|
||||
|
||||
When enabled, ClickHouse will detect Hive-style partitioning in path (`/name=value/`) in file-like table engines [File](../../engines/table-engines/special/file.md#hive-style-partitioning)/[S3](../../engines/table-engines/integrations/s3.md#hive-style-partitioning)/[URL](../../engines/table-engines/special/url.md#hive-style-partitioning)/[HDFS](../../engines/table-engines/integrations/hdfs.md#hive-style-partitioning)/[AzureBlobStorage](../../engines/table-engines/integrations/azureBlobStorage.md#hive-style-partitioning) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
## allow_experimental_time_series_table {#allow-experimental-time-series-table}
|
||||
|
||||
Allows creation of tables with the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine.
|
||||
@ -5637,3 +5667,9 @@ Possible values:
|
||||
- 1 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is enabled.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## create_if_not_exists
|
||||
|
||||
Enable `IF NOT EXISTS` for `CREATE` statement by default. If either this setting or `IF NOT EXISTS` is specified and a table with the provided name already exists, no exception will be thrown.
|
||||
|
||||
Default value: `false`.
|
||||
|
@ -24,6 +24,7 @@ Columns:
|
||||
- `num_rebalance_revocations`, (UInt64) - number of times the consumer was revoked its partitions
|
||||
- `num_rebalance_assignments`, (UInt64) - number of times the consumer was assigned to Kafka cluster
|
||||
- `is_currently_used`, (UInt8) - consumer is in use
|
||||
- `last_used`, (UInt64) - last time this consumer was in use, unix time in microseconds
|
||||
- `rdkafka_stat` (String) - library internal statistic. See https://github.com/ClickHouse/librdkafka/blob/master/STATISTICS.md . Set `statistics_interval_ms` to 0 disable, default is 3000 (once in three seconds).
|
||||
|
||||
Example:
|
||||
|
@ -9,6 +9,7 @@ Columns:
|
||||
|
||||
- `query` ([String](../../sql-reference/data-types/string.md)) — Query string.
|
||||
- `result_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Size of the query cache entry.
|
||||
- `tag` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Tag of the query cache entry.
|
||||
- `stale` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is stale.
|
||||
- `shared` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is shared between multiple users.
|
||||
- `compressed` ([UInt8](../../sql-reference/data-types/int-uint.md)) — If the query cache entry is compressed.
|
||||
@ -26,6 +27,7 @@ Row 1:
|
||||
──────
|
||||
query: SELECT 1 SETTINGS use_query_cache = 1
|
||||
result_size: 128
|
||||
tag:
|
||||
stale: 0
|
||||
shared: 0
|
||||
compressed: 1
|
||||
|
@ -17,7 +17,8 @@ Columns:
|
||||
- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — How long the last refresh attempt took.
|
||||
- `next_refresh_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time at which the next refresh is scheduled to start.
|
||||
- `remaining_dependencies` ([Array(String)](../../sql-reference/data-types/array.md)) — If the view has [refresh dependencies](../../sql-reference/statements/create/view.md#refresh-dependencies), this array contains the subset of those dependencies that are not satisfied for the current refresh yet. If `status = 'WaitingForDependencies'`, a refresh is ready to start as soon as these dependencies are fulfilled.
|
||||
- `exception` ([String](../../sql-reference/data-types/string.md)) — if `last_refresh_result = 'Exception'`, i.e. the last refresh attempt failed, this column contains the corresponding error message and stack trace.
|
||||
- `exception` ([String](../../sql-reference/data-types/string.md)) — if `last_refresh_result = 'Error'`, i.e. the last refresh attempt failed, this column contains the corresponding error message and stack trace.
|
||||
- `retry` ([UInt64](../../sql-reference/data-types/int-uint.md)) — If nonzero, the current or next refresh is a retry (see `refresh_retries` refresh setting), and `retry` is the 1-based index of that retry.
|
||||
- `refresh_count` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of successful refreshes since last server restart or table creation.
|
||||
- `progress` ([Float64](../../sql-reference/data-types/float.md)) — Progress of the current refresh, between 0 and 1.
|
||||
- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of rows read by the current refresh so far.
|
||||
|
@ -12,57 +12,59 @@ This specification describes the binary format that can be used for binary encod
|
||||
The table below describes how each data type is represented in binary format. Each data type encoding consist of 1 byte that indicates the type and some optional additional information.
|
||||
`var_uint` in the binary encoding means that the size is encoded using Variable-Length Quantity compression.
|
||||
|
||||
| ClickHouse data type | Binary encoding |
|
||||
|--------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `Nothing` | `0x00` |
|
||||
| `UInt8` | `0x01` |
|
||||
| `UInt16` | `0x02` |
|
||||
| `UInt32` | `0x03` |
|
||||
| `UInt64` | `0x04` |
|
||||
| `UInt128` | `0x05` |
|
||||
| `UInt256` | `0x06` |
|
||||
| `Int8` | `0x07` |
|
||||
| `Int16` | `0x08` |
|
||||
| `Int32` | `0x09` |
|
||||
| `Int64` | `0x0A` |
|
||||
| `Int128` | `0x0B` |
|
||||
| `Int256` | `0x0C` |
|
||||
| `Float32` | `0x0D` |
|
||||
| `Float64` | `0x0E` |
|
||||
| `Date` | `0x0F` |
|
||||
| `Date32` | `0x10` |
|
||||
| `DateTime` | `0x11` |
|
||||
| `DateTime(time_zone)` | `0x12<var_uint_time_zone_name_size><time_zone_name_data>` |
|
||||
| `DateTime64(P)` | `0x13<uint8_precision>` |
|
||||
| `DateTime64(P, time_zone)` | `0x14<uint8_precision><var_uint_time_zone_name_size><time_zone_name_data>` |
|
||||
| `String` | `0x15` |
|
||||
| `FixedString(N)` | `0x16<var_uint_size>` |
|
||||
| `Enum8` | `0x17<var_uint_number_of_elements><var_uint_name_size_1><name_data_1><int8_value_1>...<var_uint_name_size_N><name_data_N><int8_value_N>` |
|
||||
| `Enum16` | `0x18<var_uint_number_of_elements><var_uint_name_size_1><name_data_1><int16_little_endian_value_1>...><var_uint_name_size_N><name_data_N><int16_little_endian_value_N>` |
|
||||
| `Decimal32(P, S)` | `0x19<uint8_precision><uint8_scale>` |
|
||||
| `Decimal64(P, S)` | `0x1A<uint8_precision><uint8_scale>` |
|
||||
| `Decimal128(P, S)` | `0x1B<uint8_precision><uint8_scale>` |
|
||||
| `Decimal256(P, S)` | `0x1C<uint8_precision><uint8_scale>` |
|
||||
| `UUID` | `0x1D` |
|
||||
| `Array(T)` | `0x1E<nested_type_encoding>` |
|
||||
| `Tuple(T1, ..., TN)` | `0x1F<var_uint_number_of_elements><nested_type_encoding_1>...<nested_type_encoding_N>` |
|
||||
| `Tuple(name1 T1, ..., nameN TN)` | `0x20<var_uint_number_of_elements><var_uint_name_size_1><name_data_1><nested_type_encoding_1>...<var_uint_name_size_N><name_data_N><nested_type_encoding_N>` |
|
||||
| `Set` | `0x21` |
|
||||
| `Interval` | `0x22<interval_kind>` (see [interval kind binary encoding](#interval-kind-binary-encoding)) |
|
||||
| `Nullable(T)` | `0x23<nested_type_encoding>` |
|
||||
| `Function` | `0x24<var_uint_number_of_arguments><argument_type_encoding_1>...<argument_type_encoding_N><return_type_encoding>` |
|
||||
| `AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x25<var_uint_version><var_uint_function_name_size><function_name_data><var_uint_number_of_parameters><param_1>...<param_N><var_uint_number_of_arguments><argument_type_encoding_1>...<argument_type_encoding_N>` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) |
|
||||
| `LowCardinality(T)` | `0x26<nested_type_encoding>` |
|
||||
| `Map(K, V)` | `0x27<key_type_encoding><value_type_encoding>` |
|
||||
| `IPv4` | `0x28` |
|
||||
| `IPv6` | `0x29` |
|
||||
| `Variant(T1, ..., TN)` | `0x2A<var_uint_number_of_variants><variant_type_encoding_1>...<variant_type_encoding_N>` |
|
||||
| `Dynamic(max_types=N)` | `0x2B<uint8_max_types>` |
|
||||
| `Custom type` (`Ring`, `Polygon`, etc) | `0x2C<var_uint_type_name_size><type_name_data>` |
|
||||
| `Bool` | `0x2D` |
|
||||
| `SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x2E<var_uint_function_name_size><function_name_data><var_uint_number_of_parameters><param_1>...<param_N><var_uint_number_of_arguments><argument_type_encoding_1>...<argument_type_encoding_N>` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) |
|
||||
| `Nested(name1 T1, ..., nameN TN)` | `0x2F<var_uint_number_of_elements><var_uint_name_size_1><name_data_1><nested_type_encoding_1>...<var_uint_name_size_N><name_data_N><nested_type_encoding_N>` |
|
||||
| ClickHouse data type | Binary encoding |
|
||||
|-----------------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `Nothing` | `0x00` |
|
||||
| `UInt8` | `0x01` |
|
||||
| `UInt16` | `0x02` |
|
||||
| `UInt32` | `0x03` |
|
||||
| `UInt64` | `0x04` |
|
||||
| `UInt128` | `0x05` |
|
||||
| `UInt256` | `0x06` |
|
||||
| `Int8` | `0x07` |
|
||||
| `Int16` | `0x08` |
|
||||
| `Int32` | `0x09` |
|
||||
| `Int64` | `0x0A` |
|
||||
| `Int128` | `0x0B` |
|
||||
| `Int256` | `0x0C` |
|
||||
| `Float32` | `0x0D` |
|
||||
| `Float64` | `0x0E` |
|
||||
| `Date` | `0x0F` |
|
||||
| `Date32` | `0x10` |
|
||||
| `DateTime` | `0x11` |
|
||||
| `DateTime(time_zone)` | `0x12<var_uint_time_zone_name_size><time_zone_name_data>` |
|
||||
| `DateTime64(P)` | `0x13<uint8_precision>` |
|
||||
| `DateTime64(P, time_zone)` | `0x14<uint8_precision><var_uint_time_zone_name_size><time_zone_name_data>` |
|
||||
| `String` | `0x15` |
|
||||
| `FixedString(N)` | `0x16<var_uint_size>` |
|
||||
| `Enum8` | `0x17<var_uint_number_of_elements><var_uint_name_size_1><name_data_1><int8_value_1>...<var_uint_name_size_N><name_data_N><int8_value_N>` |
|
||||
| `Enum16` | `0x18<var_uint_number_of_elements><var_uint_name_size_1><name_data_1><int16_little_endian_value_1>...><var_uint_name_size_N><name_data_N><int16_little_endian_value_N>` |
|
||||
| `Decimal32(P, S)` | `0x19<uint8_precision><uint8_scale>` |
|
||||
| `Decimal64(P, S)` | `0x1A<uint8_precision><uint8_scale>` |
|
||||
| `Decimal128(P, S)` | `0x1B<uint8_precision><uint8_scale>` |
|
||||
| `Decimal256(P, S)` | `0x1C<uint8_precision><uint8_scale>` |
|
||||
| `UUID` | `0x1D` |
|
||||
| `Array(T)` | `0x1E<nested_type_encoding>` |
|
||||
| `Tuple(T1, ..., TN)` | `0x1F<var_uint_number_of_elements><nested_type_encoding_1>...<nested_type_encoding_N>` |
|
||||
| `Tuple(name1 T1, ..., nameN TN)` | `0x20<var_uint_number_of_elements><var_uint_name_size_1><name_data_1><nested_type_encoding_1>...<var_uint_name_size_N><name_data_N><nested_type_encoding_N>` |
|
||||
| `Set` | `0x21` |
|
||||
| `Interval` | `0x22<interval_kind>` (see [interval kind binary encoding](#interval-kind-binary-encoding)) |
|
||||
| `Nullable(T)` | `0x23<nested_type_encoding>` |
|
||||
| `Function` | `0x24<var_uint_number_of_arguments><argument_type_encoding_1>...<argument_type_encoding_N><return_type_encoding>` |
|
||||
| `AggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x25<var_uint_version><var_uint_function_name_size><function_name_data><var_uint_number_of_parameters><param_1>...<param_N><var_uint_number_of_arguments><argument_type_encoding_1>...<argument_type_encoding_N>` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) |
|
||||
| `LowCardinality(T)` | `0x26<nested_type_encoding>` |
|
||||
| `Map(K, V)` | `0x27<key_type_encoding><value_type_encoding>` |
|
||||
| `IPv4` | `0x28` |
|
||||
| `IPv6` | `0x29` |
|
||||
| `Variant(T1, ..., TN)` | `0x2A<var_uint_number_of_variants><variant_type_encoding_1>...<variant_type_encoding_N>` |
|
||||
| `Dynamic(max_types=N)` | `0x2B<uint8_max_types>` |
|
||||
| `Custom type` (`Ring`, `Polygon`, etc) | `0x2C<var_uint_type_name_size><type_name_data>` |
|
||||
| `Bool` | `0x2D` |
|
||||
| `SimpleAggregateFunction(function_name(param_1, ..., param_N), arg_T1, ..., arg_TN)` | `0x2E<var_uint_function_name_size><function_name_data><var_uint_number_of_parameters><param_1>...<param_N><var_uint_number_of_arguments><argument_type_encoding_1>...<argument_type_encoding_N>` (see [aggregate function parameter binary encoding](#aggregate-function-parameter-binary-encoding)) |
|
||||
| `Nested(name1 T1, ..., nameN TN)` | `0x2F<var_uint_number_of_elements><var_uint_name_size_1><name_data_1><nested_type_encoding_1>...<var_uint_name_size_N><name_data_N><nested_type_encoding_N>` |
|
||||
| `JSON(max_dynamic_paths=N, max_dynamic_types=M, path Type, SKIP skip_path, SKIP REGEXP skip_path_regexp)` | `0x30<uint8_serialization_version><var_int_max_dynamic_paths><uint8_max_dynamic_types><var_uint_number_of_typed_paths><var_uint_path_name_size_1><path_name_data_1><encoded_type_1>...<var_uint_number_of_skip_paths><var_uint_skip_path_size_1><skip_path_data_1>...<var_uint_number_of_skip_path_regexps><var_uint_skip_path_regexp_size_1><skip_path_data_regexp_1>...` |
|
||||
|
||||
For type `JSON` byte `uint8_serialization_version` indicates the version of the serialization. Right now the version is always 0 but can change in future if new arguments will be introduced for `JSON` type.
|
||||
|
||||
### Interval kind binary encoding
|
||||
|
||||
|
@ -14,7 +14,7 @@ To declare a column of `Dynamic` type, use the following syntax:
|
||||
<column_name> Dynamic(max_types=N)
|
||||
```
|
||||
|
||||
Where `N` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_types` is `32`.
|
||||
Where `N` is an optional parameter between `0` and `254` indicating how many different data types can be stored as separate subcolumns inside a column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all values with new types will be stored together in a special shared data structure in binary form. Default value of `max_types` is `32`.
|
||||
|
||||
:::note
|
||||
The Dynamic data type is an experimental feature. To use it, set `allow_experimental_dynamic_type = 1`.
|
||||
@ -224,41 +224,43 @@ SELECT d::Dynamic(max_types=5) as d2, dynamicType(d2) FROM test;
|
||||
└───────┴────────────────┘
|
||||
```
|
||||
|
||||
If `K < N`, then the values with the rarest types are converted to `String`:
|
||||
If `K < N`, then the values with the rarest types will be inserted into a single special subcolumn, but still will be accessible:
|
||||
```text
|
||||
CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory;
|
||||
INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]);
|
||||
SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2) FROM test;
|
||||
SELECT d, dynamicType(d), d::Dynamic(max_types=2) as d2, dynamicType(d2), isDynamicElementInSharedData(d2) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐
|
||||
│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │
|
||||
│ 42 │ Int64 │ 42 │ Int64 │
|
||||
│ 43 │ Int64 │ 43 │ Int64 │
|
||||
│ 42.42 │ String │ 42.42 │ String │
|
||||
│ true │ Bool │ true │ String │
|
||||
│ [1,2,3] │ Array(Int64) │ [1,2,3] │ String │
|
||||
└─────────┴────────────────┴─────────┴─────────────────┘
|
||||
┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┬─isDynamicElementInSharedData(d2)─┐
|
||||
│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ false │
|
||||
│ 42 │ Int64 │ 42 │ Int64 │ false │
|
||||
│ 43 │ Int64 │ 43 │ Int64 │ false │
|
||||
│ 42.42 │ String │ 42.42 │ String │ false │
|
||||
│ true │ Bool │ true │ Bool │ true │
|
||||
│ [1,2,3] │ Array(Int64) │ [1,2,3] │ Array(Int64) │ true │
|
||||
└─────────┴────────────────┴─────────┴─────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
If `K=1`, all types are converted to `String`:
|
||||
Functions `isDynamicElementInSharedData` returns `true` for rows that are stored in a special shared data structure inside `Dynamic` and as we can see, resulting column contains only 2 types that are not stored in shared data structure.
|
||||
|
||||
If `K=0`, all types will be inserted into single special subcolumn:
|
||||
|
||||
```text
|
||||
CREATE TABLE test (d Dynamic(max_types=4)) ENGINE = Memory;
|
||||
INSERT INTO test VALUES (NULL), (42), (43), ('42.42'), (true), ([1, 2, 3]);
|
||||
SELECT d, dynamicType(d), d::Dynamic(max_types=1) as d2, dynamicType(d2) FROM test;
|
||||
SELECT d, dynamicType(d), d::Dynamic(max_types=0) as d2, dynamicType(d2), isDynamicElementInSharedData(d2) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┐
|
||||
│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │
|
||||
│ 42 │ Int64 │ 42 │ String │
|
||||
│ 43 │ Int64 │ 43 │ String │
|
||||
│ 42.42 │ String │ 42.42 │ String │
|
||||
│ true │ Bool │ true │ String │
|
||||
│ [1,2,3] │ Array(Int64) │ [1,2,3] │ String │
|
||||
└─────────┴────────────────┴─────────┴─────────────────┘
|
||||
┌─d───────┬─dynamicType(d)─┬─d2──────┬─dynamicType(d2)─┬─isDynamicElementInSharedData(d2)─┐
|
||||
│ ᴺᵁᴸᴸ │ None │ ᴺᵁᴸᴸ │ None │ false │
|
||||
│ 42 │ Int64 │ 42 │ Int64 │ true │
|
||||
│ 43 │ Int64 │ 43 │ Int64 │ true │
|
||||
│ 42.42 │ String │ 42.42 │ String │ true │
|
||||
│ true │ Bool │ true │ Bool │ true │
|
||||
│ [1,2,3] │ Array(Int64) │ [1,2,3] │ Array(Int64) │ true │
|
||||
└─────────┴────────────────┴─────────┴─────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Reading Dynamic type from the data
|
||||
@ -411,17 +413,17 @@ SELECT d, dynamicType(d) FROM test ORDER by d;
|
||||
|
||||
## Reaching the limit in number of different data types stored inside Dynamic
|
||||
|
||||
`Dynamic` data type can store only limited number of different data types inside. By default, this limit is 32, but you can change it in type declaration using syntax `Dynamic(max_types=N)` where N is between 1 and 255 (due to implementation details, it's impossible to have more than 255 different data types inside Dynamic).
|
||||
When the limit is reached, all new data types inserted to `Dynamic` column will be casted to `String` and stored as `String` values.
|
||||
`Dynamic` data type can store only limited number of different data types as separate subcolumns. By default, this limit is 32, but you can change it in type declaration using syntax `Dynamic(max_types=N)` where N is between 0 and 254 (due to implementation details, it's impossible to have more than 254 different data types that can be stored as separate subcolumns inside Dynamic).
|
||||
When the limit is reached, all new data types inserted to `Dynamic` column will be inserted into a single shared data structure that stores values with different data types in binary form.
|
||||
|
||||
Let's see what happens when the limit is reached in different scenarios.
|
||||
|
||||
### Reaching the limit during data parsing
|
||||
|
||||
During parsing of `Dynamic` values from the data, when the limit is reached for current block of data, all new values will be inserted as `String` values:
|
||||
During parsing of `Dynamic` values from the data, when the limit is reached for current block of data, all new values will be inserted into shared data structure:
|
||||
|
||||
```sql
|
||||
SELECT d, dynamicType(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', '
|
||||
SELECT d, dynamicType(d), isDynamicElementInSharedData(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', '
|
||||
{"d" : 42}
|
||||
{"d" : [1, 2, 3]}
|
||||
{"d" : "Hello, World!"}
|
||||
@ -432,22 +434,22 @@ SELECT d, dynamicType(d) FROM format(JSONEachRow, 'd Dynamic(max_types=3)', '
|
||||
```
|
||||
|
||||
```text
|
||||
┌─d──────────────────────────┬─dynamicType(d)─┐
|
||||
│ 42 │ Int64 │
|
||||
│ [1,2,3] │ Array(Int64) │
|
||||
│ Hello, World! │ String │
|
||||
│ 2020-01-01 │ String │
|
||||
│ ["str1", "str2", "str3"] │ String │
|
||||
│ {"a" : 1, "b" : [1, 2, 3]} │ String │
|
||||
└────────────────────────────┴────────────────┘
|
||||
┌─d──────────────────────┬─dynamicType(d)─────────────────┬─isDynamicElementInSharedData(d)─┐
|
||||
│ 42 │ Int64 │ false │
|
||||
│ [1,2,3] │ Array(Int64) │ false │
|
||||
│ Hello, World! │ String │ false │
|
||||
│ 2020-01-01 │ Date │ true │
|
||||
│ ['str1','str2','str3'] │ Array(String) │ true │
|
||||
│ (1,[1,2,3]) │ Tuple(a Int64, b Array(Int64)) │ true │
|
||||
└────────────────────────┴────────────────────────────────┴─────────────────────────────────┘
|
||||
```
|
||||
|
||||
As we can see, after inserting 3 different data types `Int64`, `Array(Int64)` and `String` all new types were converted to `String`.
|
||||
As we can see, after inserting 3 different data types `Int64`, `Array(Int64)` and `String` all new types were inserted into special shared data structure.
|
||||
|
||||
### During merges of data parts in MergeTree table engines
|
||||
|
||||
During merge of several data parts in MergeTree table the `Dynamic` column in the resulting data part can reach the limit of different data types inside and won't be able to store all types from source parts.
|
||||
In this case ClickHouse chooses what types will remain after merge and what types will be casted to `String`. In most cases ClickHouse tries to keep the most frequent types and cast the rarest types to `String`, but it depends on the implementation.
|
||||
During merge of several data parts in MergeTree table the `Dynamic` column in the resulting data part can reach the limit of different data types that can be stored in separate subcolumns inside and won't be able to store all types as subcolumns from source parts.
|
||||
In this case ClickHouse chooses what types will remain as separate subcolumns after merge and what types will be inserted into shared data structure. In most cases ClickHouse tries to keep the most frequent types and store the rarest types in shared data structure, but it depends on the implementation.
|
||||
|
||||
Let's see an example of such merge. First, let's create a table with `Dynamic` column, set the limit of different data types to `3` and insert values with `5` different types:
|
||||
|
||||
@ -463,17 +465,17 @@ INSERT INTO test SELECT number, 'str_' || toString(number) FROM numbers(1);
|
||||
|
||||
Each insert will create a separate data pert with `Dynamic` column containing single type:
|
||||
```sql
|
||||
SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part;
|
||||
SELECT count(), dynamicType(d), isDynamicElementInSharedData(d), _part FROM test GROUP BY _part, dynamicType(d), isDynamicElementInSharedData(d) ORDER BY _part, count();
|
||||
```
|
||||
|
||||
```text
|
||||
┌─count()─┬─dynamicType(d)──────┬─_part─────┐
|
||||
│ 5 │ UInt64 │ all_1_1_0 │
|
||||
│ 4 │ Array(UInt64) │ all_2_2_0 │
|
||||
│ 3 │ Date │ all_3_3_0 │
|
||||
│ 2 │ Map(UInt64, UInt64) │ all_4_4_0 │
|
||||
│ 1 │ String │ all_5_5_0 │
|
||||
└─────────┴─────────────────────┴───────────┘
|
||||
┌─count()─┬─dynamicType(d)──────┬─isDynamicElementInSharedData(d)─┬─_part─────┐
|
||||
│ 5 │ UInt64 │ false │ all_1_1_0 │
|
||||
│ 4 │ Array(UInt64) │ false │ all_2_2_0 │
|
||||
│ 3 │ Date │ false │ all_3_3_0 │
|
||||
│ 2 │ Map(UInt64, UInt64) │ false │ all_4_4_0 │
|
||||
│ 1 │ String │ false │ all_5_5_0 │
|
||||
└─────────┴─────────────────────┴─────────────────────────────────┴───────────┘
|
||||
```
|
||||
|
||||
Now, let's merge all parts into one and see what will happen:
|
||||
@ -481,18 +483,20 @@ Now, let's merge all parts into one and see what will happen:
|
||||
```sql
|
||||
SYSTEM START MERGES test;
|
||||
OPTIMIZE TABLE test FINAL;
|
||||
SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part;
|
||||
SELECT count(), dynamicType(d), isDynamicElementInSharedData(d), _part FROM test GROUP BY _part, dynamicType(d), isDynamicElementInSharedData(d) ORDER BY _part, count() desc;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─count()─┬─dynamicType(d)─┬─_part─────┐
|
||||
│ 5 │ UInt64 │ all_1_5_2 │
|
||||
│ 6 │ String │ all_1_5_2 │
|
||||
│ 4 │ Array(UInt64) │ all_1_5_2 │
|
||||
└─────────┴────────────────┴───────────┘
|
||||
┌─count()─┬─dynamicType(d)──────┬─isDynamicElementInSharedData(d)─┬─_part─────┐
|
||||
│ 5 │ UInt64 │ false │ all_1_5_2 │
|
||||
│ 4 │ Array(UInt64) │ false │ all_1_5_2 │
|
||||
│ 3 │ Date │ false │ all_1_5_2 │
|
||||
│ 2 │ Map(UInt64, UInt64) │ true │ all_1_5_2 │
|
||||
│ 1 │ String │ true │ all_1_5_2 │
|
||||
└─────────┴─────────────────────┴─────────────────────────────────┴───────────┘
|
||||
```
|
||||
|
||||
As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`.
|
||||
As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` as subcolumns and inserted all other types into shared data.
|
||||
|
||||
## JSONExtract functions with Dynamic
|
||||
|
||||
@ -509,22 +513,23 @@ SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Dynamic') AS dynamic, dynamicType(
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_dynamics, mapApply((k, v) -> (k, variantType(v)), map_of_dynamics) AS map_of_dynamic_types```
|
||||
SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Dynamic)') AS map_of_dynamics, mapApply((k, v) -> (k, dynamicType(v)), map_of_dynamics) AS map_of_dynamic_types
|
||||
```
|
||||
|
||||
```text
|
||||
┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────┐
|
||||
│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │
|
||||
└──────────────────────────────────┴─────────────────────────────────────────────────┘
|
||||
┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────────────┐
|
||||
│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'Int64','b':'String','c':'Array(Nullable(Int64))'} │
|
||||
└──────────────────────────────────┴─────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS dynamics, arrayMap(x -> (x.1, variantType(x.2)), dynamics) AS dynamic_types```
|
||||
SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Dynamic') AS dynamics, arrayMap(x -> (x.1, dynamicType(x.2)), dynamics) AS dynamic_types```
|
||||
```
|
||||
|
||||
```text
|
||||
┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────┐
|
||||
│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │
|
||||
└────────────────────────────────────────┴───────────────────────────────────────────────────────┘
|
||||
┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────────────┐
|
||||
│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','Int64'),('b','String'),('c','Array(Nullable(Int64))')] │
|
||||
└────────────────────────────────────────┴───────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Binary output format
|
||||
|
@ -19,7 +19,8 @@ ClickHouse data types include:
|
||||
- **Boolean**: ClickHouse has a [`Boolean` type](./boolean.md)
|
||||
- **Strings**: [`String`](./string.md) and [`FixedString`](./fixedstring.md)
|
||||
- **Dates**: use [`Date`](./date.md) and [`Date32`](./date32.md) for days, and [`DateTime`](./datetime.md) and [`DateTime64`](./datetime64.md) for instances in time
|
||||
- **JSON**: the [`JSON` object](./json.md) stores a JSON document in a single column
|
||||
- **Object**: the [`Object`](./json.md) stores a JSON document in a single column (deprecated)
|
||||
- **JSON**: the [`JSON` object](./newjson.md) stores a JSON document in a single column
|
||||
- **UUID**: a performant option for storing [`UUID` values](./uuid.md)
|
||||
- **Low cardinality types**: use an [`Enum`](./enum.md) when you have a handful of unique values, or use [`LowCardinality`](./lowcardinality.md) when you have up to 10,000 unique values of a column
|
||||
- **Arrays**: any column can be defined as an [`Array` of values](./array.md)
|
||||
|
@ -13,7 +13,7 @@ keywords: [object, data type]
|
||||
|
||||
Stores JavaScript Object Notation (JSON) documents in a single column.
|
||||
|
||||
`JSON` is an alias for `Object('json')`.
|
||||
`JSON` can be used as an alias to `Object('json')` when setting [use_json_alias_for_old_object_type](../../operations/settings/settings.md#usejsonaliasforoldobjecttype) is enabled.
|
||||
|
||||
## Example
|
||||
|
||||
@ -79,5 +79,5 @@ SELECT * FROM json FORMAT JSONEachRow
|
||||
|
||||
## Related Content
|
||||
|
||||
- [Using JSON in ClickHouse](/docs/en/integrations/data-formats/json)
|
||||
- [Using JSON in ClickHouse](/en/integrations/data-formats/json/overview)
|
||||
- [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json)
|
||||
|
516
docs/en/sql-reference/data-types/newjson.md
Normal file
516
docs/en/sql-reference/data-types/newjson.md
Normal file
@ -0,0 +1,516 @@
|
||||
---
|
||||
slug: /en/sql-reference/data-types/newjson
|
||||
sidebar_position: 63
|
||||
sidebar_label: JSON
|
||||
keywords: [json, data type]
|
||||
---
|
||||
|
||||
# JSON
|
||||
|
||||
Stores JavaScript Object Notation (JSON) documents in a single column.
|
||||
|
||||
:::note
|
||||
This feature is experimental and is not production-ready. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-formats/json/overview) instead.
|
||||
If you want to use JSON type, set `allow_experimental_json_type = 1`.
|
||||
:::
|
||||
|
||||
To declare a column of `JSON` type, use the following syntax:
|
||||
|
||||
``` sql
|
||||
<column_name> JSON(max_dynamic_paths=N, max_dynamic_types=M, some.path TypeName, SKIP path.to.skip, SKIP REGEXP 'paths_regexp')
|
||||
```
|
||||
Where:
|
||||
- `max_dynamic_paths` is an optional parameter indicating how many paths can be stored separately as subcolumns across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all other paths will be stored together in a single structure. Default value of `max_dynamic_paths` is `1024`.
|
||||
- `max_dynamic_types` is an optional parameter between `1` and `255` indicating how many different data types can be stored inside a single path column with type `Dynamic` across single block of data that is stored separately (for example across single data part for MergeTree table). If this limit is exceeded, all new types will be converted to type `String`. Default value of `max_dynamic_types` is `32`.
|
||||
- `some.path TypeName` is an optional type hint for particular path in the JSON. Such paths will be always stored as subcolumns with specified type.
|
||||
- `SKIP path.to.skip` is an optional hint for particular path that should be skipped during JSON parsing. Such paths will never be stored in the JSON column. If specified path is a nested JSON object, the whole nested object will be skipped.
|
||||
- `SKIP REGEXP 'path_regexp'` is an optional hint with a regular expression that is used to skip paths during JSON parsing. All paths that match this regular expression will never be stored in the JSON column.
|
||||
|
||||
## Creating JSON
|
||||
|
||||
Using `JSON` type in table column definition:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (json JSON) ENGINE = Memory;
|
||||
INSERT INTO test VALUES ('{"a" : {"b" : 42}, "c" : [1, 2, 3]}'), ('{"f" : "Hello, World!"}'), ('{"a" : {"b" : 43, "e" : 10}, "c" : [4, 5, 6]}');
|
||||
SELECT json FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json────────────────────────────────────────┐
|
||||
│ {"a":{"b":"42"},"c":["1","2","3"]} │
|
||||
│ {"f":"Hello, World!"} │
|
||||
│ {"a":{"b":"43","e":"10"},"c":["4","5","6"]} │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (json JSON(a.b UInt32, SKIP a.e)) ENGINE = Memory;
|
||||
INSERT INTO test VALUES ('{"a" : {"b" : 42}, "c" : [1, 2, 3]}'), ('{"f" : "Hello, World!"}'), ('{"a" : {"b" : 43, "e" : 10}, "c" : [4, 5, 6]}');
|
||||
SELECT json FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json──────────────────────────────┐
|
||||
│ {"a":{"b":42},"c":[1,2,3]} │
|
||||
│ {"a":{"b":0},"f":"Hello, World!"} │
|
||||
│ {"a":{"b":43},"c":[4,5,6]} │
|
||||
└───────────────────────────────────┘
|
||||
```
|
||||
|
||||
Using CAST from 'String':
|
||||
|
||||
```sql
|
||||
SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON as json;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json───────────────────────────────────────────┐
|
||||
│ {"a":{"b":42},"c":[1,2,3],"d":"Hello, World!"} │
|
||||
└────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
CAST from named `Tuple`, `Map` and `Object('json')` to `JSON` type will be supported later.
|
||||
|
||||
## Reading JSON paths as subcolumns
|
||||
|
||||
JSON type supports reading every path as a separate subcolumn. If type of the requested path was not specified in the JSON type declaration, the subcolumn of the path will always have type [Dynamic](/docs/en/sql-reference/data-types/dynamic.md).
|
||||
|
||||
For example:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (json JSON(a.b UInt32, SKIP a.e)) ENGINE = Memory;
|
||||
INSERT INTO test VALUES ('{"a" : {"b" : 42, "g" : 42.42}, "c" : [1, 2, 3], "d" : "2020-01-01"}'), ('{"f" : "Hello, World!", "d" : "2020-01-02"}'), ('{"a" : {"b" : 43, "e" : 10, "g" : 43.43}, "c" : [4, 5, 6]}');
|
||||
SELECT json FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json──────────────────────────────────────────────────┐
|
||||
│ {"a":{"b":42,"g":42.42},"c":[1,2,3],"d":"2020-01-01"} │
|
||||
│ {"a":{"b":0},"d":"2020-01-02","f":"Hello, World!"} │
|
||||
│ {"a":{"b":43,"g":43.43},"c":[4,5,6]} │
|
||||
└───────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT json.a.b, json.a.g, json.c, json.d FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json.a.b─┬─json.a.g─┬─json.c──┬─json.d─────┐
|
||||
│ 42 │ 42.42 │ [1,2,3] │ 2020-01-01 │
|
||||
│ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 2020-01-02 │
|
||||
│ 43 │ 43.43 │ [4,5,6] │ ᴺᵁᴸᴸ │
|
||||
└──────────┴──────────┴─────────┴────────────┘
|
||||
```
|
||||
|
||||
If the requested path wasn't found in the data, it will be filled with `NULL` values:
|
||||
|
||||
```sql
|
||||
SELECT json.non.existing.path FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json.non.existing.path─┐
|
||||
│ ᴺᵁᴸᴸ │
|
||||
│ ᴺᵁᴸᴸ │
|
||||
│ ᴺᵁᴸᴸ │
|
||||
└────────────────────────┘
|
||||
```
|
||||
|
||||
Let's check the data types of returned subcolumns:
|
||||
```sql
|
||||
SELECT toTypeName(json.a.b), toTypeName(json.a.g), toTypeName(json.c), toTypeName(json.d) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─toTypeName(json.a.b)─┬─toTypeName(json.a.g)─┬─toTypeName(json.c)─┬─toTypeName(json.d)─┐
|
||||
│ UInt32 │ Dynamic │ Dynamic │ Dynamic │
|
||||
│ UInt32 │ Dynamic │ Dynamic │ Dynamic │
|
||||
│ UInt32 │ Dynamic │ Dynamic │ Dynamic │
|
||||
└──────────────────────┴──────────────────────┴────────────────────┴────────────────────┘
|
||||
```
|
||||
|
||||
As we can see, for `a.b` the type is `UInt32` as we specified in the JSON type declaration, and for all other subcolumns the type is `Dynamic`.
|
||||
|
||||
It is also possible to read subcolumns of a `Dynamic` type using special syntax `json.some.path.:TypeName`:
|
||||
|
||||
```sql
|
||||
select json.a.g.:Float64, dynamicType(json.a.g), json.d.:Date, dynamicType(json.d) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json.a.g.:`Float64`─┬─dynamicType(json.a.g)─┬─json.d.:`Date`─┬─dynamicType(json.d)─┐
|
||||
│ 42.42 │ Float64 │ 2020-01-01 │ Date │
|
||||
│ ᴺᵁᴸᴸ │ None │ 2020-01-02 │ Date │
|
||||
│ 43.43 │ Float64 │ ᴺᵁᴸᴸ │ None │
|
||||
└─────────────────────┴───────────────────────┴────────────────┴─────────────────────┘
|
||||
```
|
||||
|
||||
`Dynamic` subcolumns can be casted to any data type. In this case the exception will be thrown if internal type inside `Dynamic` cannot be casted to the requested type:
|
||||
|
||||
```sql
|
||||
select json.a.g::UInt64 as uint FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─uint─┐
|
||||
│ 42 │
|
||||
│ 0 │
|
||||
│ 43 │
|
||||
└──────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
select json.a.g::UUID as float FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
Received exception:
|
||||
Code: 48. DB::Exception: Conversion between numeric types and UUID is not supported. Probably the passed UUID is unquoted: while executing 'FUNCTION CAST(__table1.json.a.g :: 2, 'UUID'_String :: 1) -> CAST(__table1.json.a.g, 'UUID'_String) UUID : 0'. (NOT_IMPLEMENTED)
|
||||
```
|
||||
|
||||
## Reading JSON sub-objects as subcolumns
|
||||
|
||||
JSON type supports reading nested objects as subcolumns with type `JSON` using special syntax `json.^some.path`:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (json JSON) ENGINE = Memory;
|
||||
INSERT INTO test VALUES ('{"a" : {"b" : {"c" : 42, "g" : 42.42}}, "c" : [1, 2, 3], "d" : {"e" : {"f" : {"g" : "Hello, World", "h" : [1, 2, 3]}}}}'), ('{"f" : "Hello, World!", "d" : {"e" : {"f" : {"h" : [4, 5, 6]}}}}'), ('{"a" : {"b" : {"c" : 43, "e" : 10, "g" : 43.43}}, "c" : [4, 5, 6]}');
|
||||
SELECT json FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ {"a":{"b":{"c":42,"g":42.42}},"c":[1,2,3],"d":{"e":{"f":{"g":"Hello, World","h":[1,2,3]}}}} │
|
||||
│ {"d":{"e":{"f":{"h":[4,5,6]}}},"f":"Hello, World!"} │
|
||||
│ {"a":{"b":{"c":43,"e":10,"g":43.43}},"c":[4,5,6]} │
|
||||
└─────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT json.^a.b, json.^d.e.f FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json.^`a`.b───────────────┬─json.^`d`.e.f────────────────────┐
|
||||
│ {"c":42,"g":42.42} │ {"g":"Hello, World","h":[1,2,3]} │
|
||||
│ {} │ {"h":[4,5,6]} │
|
||||
│ {"c":43,"e":10,"g":43.43} │ {} │
|
||||
└───────────────────────────┴──────────────────────────────────┘
|
||||
```
|
||||
|
||||
:::note
|
||||
Reading sub-objects as subcolumns may be inefficient, as this may require almost full scan of the JSON data.
|
||||
:::
|
||||
|
||||
## Types inference for paths
|
||||
|
||||
During JSON parsing ClickHouse tries to detect the most appropriate data type for each JSON path. It works similar to [automatic schema inference from input data](/docs/en/interfaces/schema-inference.md) and controlled by the same settings:
|
||||
|
||||
- [input_format_try_infer_integers](/docs/en/interfaces/schema-inference.md#inputformattryinferintegers)
|
||||
- [input_format_try_infer_dates](/docs/en/interfaces/schema-inference.md#inputformattryinferdates)
|
||||
- [input_format_try_infer_datetimes](/docs/en/interfaces/schema-inference.md#inputformattryinferdatetimes)
|
||||
- [schema_inference_make_columns_nullable](/docs/en/interfaces/schema-inference.md#schemainferencemakecolumnsnullable)
|
||||
- [input_format_json_try_infer_numbers_from_strings](/docs/en/interfaces/schema-inference.md#inputformatjsontryinfernumbersfromstrings)
|
||||
- [input_format_json_infer_incomplete_types_as_strings](/docs/en/interfaces/schema-inference.md#inputformatjsoninferincompletetypesasstrings)
|
||||
- [input_format_json_read_numbers_as_strings](/docs/en/interfaces/schema-inference.md#inputformatjsonreadnumbersasstrings)
|
||||
- [input_format_json_read_bools_as_strings](/docs/en/interfaces/schema-inference.md#inputformatjsonreadboolsasstrings)
|
||||
- [input_format_json_read_bools_as_numbers](/docs/en/interfaces/schema-inference.md#inputformatjsonreadboolsasnumbers)
|
||||
- [input_format_json_read_arrays_as_strings](/docs/en/interfaces/schema-inference.md#inputformatjsonreadarraysasstrings)
|
||||
|
||||
Let's see some examples:
|
||||
|
||||
```sql
|
||||
SELECT JSONAllPathsWithTypes('{"a" : "2020-01-01", "b" : "2020-01-01 10:00:00"}'::JSON) AS paths_with_types settings input_format_try_infer_dates=1, input_format_try_infer_datetimes=1;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─paths_with_types─────────────────┐
|
||||
│ {'a':'Date','b':'DateTime64(9)'} │
|
||||
└──────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT JSONAllPathsWithTypes('{"a" : "2020-01-01", "b" : "2020-01-01 10:00:00"}'::JSON) AS paths_with_types settings input_format_try_infer_dates=0, input_format_try_infer_datetimes=0;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─paths_with_types────────────┐
|
||||
│ {'a':'String','b':'String'} │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT JSONAllPathsWithTypes('{"a" : [1, 2, 3]}'::JSON) AS paths_with_types settings schema_inference_make_columns_nullable=1;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─paths_with_types───────────────┐
|
||||
│ {'a':'Array(Nullable(Int64))'} │
|
||||
└────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT JSONAllPathsWithTypes('{"a" : [1, 2, 3]}'::JSON) AS paths_with_types settings schema_inference_make_columns_nullable=0;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─paths_with_types─────┐
|
||||
│ {'a':'Array(Int64)'} │
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
## Handling arrays of JSON objects
|
||||
|
||||
JSON paths that contains an array of objects are parsed as type `Array(JSON)` and inserted into `Dynamic` column for this path. To read an array of objects you can extract it from `Dynamic` column as a subcolumn:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (json JSON) ENGINE = Memory;
|
||||
INSERT INTO test VALUES
|
||||
('{"a" : {"b" : [{"c" : 42, "d" : "Hello", "f" : [[{"g" : 42.42}]], "k" : {"j" : 1000}}, {"c" : 43}, {"e" : [1, 2, 3], "d" : "My", "f" : [[{"g" : 43.43, "h" : "2020-01-01"}]], "k" : {"j" : 2000}}]}}'),
|
||||
('{"a" : {"b" : [1, 2, 3]}}'),
|
||||
('{"a" : {"b" : [{"c" : 44, "f" : [[{"h" : "2020-01-02"}]]}, {"e" : [4, 5, 6], "d" : "World", "f" : [[{"g" : 44.44}]], "k" : {"j" : 3000}}]}}');
|
||||
SELECT json FROM test;
|
||||
```
|
||||
|
||||
```text3
|
||||
┌─json────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
|
||||
│ {"a":{"b":[{"c":"42","d":"Hello","f":[[{"g":42.42}]],"k":{"j":"1000"}},{"c":"43"},{"d":"My","e":["1","2","3"],"f":[[{"g":43.43,"h":"2020-01-01"}]],"k":{"j":"2000"}}]}} │
|
||||
│ {"a":{"b":["1","2","3"]}} │
|
||||
│ {"a":{"b":[{"c":"44","f":[[{"h":"2020-01-02"}]]},{"d":"World","e":["4","5","6"],"f":[[{"g":44.44}]],"k":{"j":"3000"}}]}} │
|
||||
└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
```sql
|
||||
SELECT json.a.b, dynamicType(json.a.b) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json.a.b──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─dynamicType(json.a.b)────────────────────────────────────┐
|
||||
│ ['{"c":"42","d":"Hello","f":[[{"g":42.42}]],"k":{"j":"1000"}}','{"c":"43"}','{"d":"My","e":["1","2","3"],"f":[[{"g":43.43,"h":"2020-01-01"}]],"k":{"j":"2000"}}'] │ Array(JSON(max_dynamic_types=16, max_dynamic_paths=256)) │
|
||||
│ [1,2,3] │ Array(Nullable(Int64)) │
|
||||
│ ['{"c":"44","f":[[{"h":"2020-01-02"}]]}','{"d":"World","e":["4","5","6"],"f":[[{"g":44.44}]],"k":{"j":"3000"}}'] │ Array(JSON(max_dynamic_types=16, max_dynamic_paths=256)) │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
As you can notice, the `max_dynamic_types/max_dynamic_paths` parameters of the nested `JSON` type were reduced compared to the default values. It's needed to avoid number of subcolumns to grow uncontrolled on nested arrays of JSON objects.
|
||||
|
||||
Let's try to read subcolumns from this nested `JSON` column:
|
||||
|
||||
```sql
|
||||
SELECT json.a.b.:`Array(JSON)`.c, json.a.b.:`Array(JSON)`.f, json.a.b.:`Array(JSON)`.d FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json.a.b.:`Array(JSON)`.c─┬─json.a.b.:`Array(JSON)`.f───────────────────────────────────┬─json.a.b.:`Array(JSON)`.d─┐
|
||||
│ [42,43,NULL] │ [[['{"g":42.42}']],NULL,[['{"g":43.43,"h":"2020-01-01"}']]] │ ['Hello',NULL,'My'] │
|
||||
│ [] │ [] │ [] │
|
||||
│ [44,NULL] │ [[['{"h":"2020-01-02"}']],[['{"g":44.44}']]] │ [NULL,'World'] │
|
||||
└───────────────────────────┴─────────────────────────────────────────────────────────────┴───────────────────────────┘
|
||||
```
|
||||
|
||||
We can avoid writing `Array(JSON)` subcolumn name using special syntax:
|
||||
|
||||
```sql
|
||||
SELECT json.a.b[].c, json.a.b[].f, json.a.b[].d FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json.a.b.:`Array(JSON)`.c─┬─json.a.b.:`Array(JSON)`.f───────────────────────────────────┬─json.a.b.:`Array(JSON)`.d─┐
|
||||
│ [42,43,NULL] │ [[['{"g":42.42}']],NULL,[['{"g":43.43,"h":"2020-01-01"}']]] │ ['Hello',NULL,'My'] │
|
||||
│ [] │ [] │ [] │
|
||||
│ [44,NULL] │ [[['{"h":"2020-01-02"}']],[['{"g":44.44}']]] │ [NULL,'World'] │
|
||||
└───────────────────────────┴─────────────────────────────────────────────────────────────┴───────────────────────────┘
|
||||
```
|
||||
|
||||
The number of `[]` after path indicates the array level. `json.path[][]` will be transformed to `json.path.:Array(Array(JSON))`
|
||||
|
||||
Let's check the paths and types inside our `Array(JSON)`:
|
||||
|
||||
```sql
|
||||
SELECT DISTINCT arrayJoin(JSONAllPathsWithTypes(arrayJoin(json.a.b[]))) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─arrayJoin(JSONAllPathsWithTypes(arrayJoin(json.a.b.:`Array(JSON)`)))──┐
|
||||
│ ('c','Int64') │
|
||||
│ ('d','String') │
|
||||
│ ('f','Array(Array(JSON(max_dynamic_types=8, max_dynamic_paths=64)))') │
|
||||
│ ('k.j','Int64') │
|
||||
│ ('e','Array(Nullable(Int64))') │
|
||||
└───────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Let's read subcolumns from `Array(JSON)` column:
|
||||
|
||||
```sql
|
||||
SELECT json.a.b[].c.:Int64, json.a.b[].f[][].g.:Float64, json.a.b[].f[][].h.:Date FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json.a.b.:`Array(JSON)`.c.:`Int64`─┬─json.a.b.:`Array(JSON)`.f.:`Array(Array(JSON))`.g.:`Float64`─┬─json.a.b.:`Array(JSON)`.f.:`Array(Array(JSON))`.h.:`Date`─┐
|
||||
│ [42,43,NULL] │ [[[42.42]],[],[[43.43]]] │ [[[NULL]],[],[['2020-01-01']]] │
|
||||
│ [] │ [] │ [] │
|
||||
│ [44,NULL] │ [[[NULL]],[[44.44]]] │ [[['2020-01-02']],[[NULL]]] │
|
||||
└────────────────────────────────────┴──────────────────────────────────────────────────────────────┴───────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
We can also read sub-object subcolumns from nested `JSON` column:
|
||||
|
||||
```sql
|
||||
SELECT json.a.b[].^k FROM test
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json.a.b.:`Array(JSON)`.^`k`─────────┐
|
||||
│ ['{"j":"1000"}','{}','{"j":"2000"}'] │
|
||||
│ [] │
|
||||
│ ['{}','{"j":"3000"}'] │
|
||||
└──────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Reading JSON type from the data
|
||||
|
||||
All text formats (JSONEachRow, TSV, CSV, CustomSeparated, Values, etc) supports reading `JSON` type.
|
||||
|
||||
Examples:
|
||||
|
||||
```sql
|
||||
SELECT json FROM format(JSONEachRow, 'json JSON(a.b.c UInt32, SKIP a.b.d, SKIP d.e, SKIP REGEXP \'b.*\')', '
|
||||
{"json" : {"a" : {"b" : {"c" : 1, "d" : [0, 1]}}, "b" : "2020-01-01", "c" : 42, "d" : {"e" : {"f" : ["s1", "s2"]}, "i" : [1, 2, 3]}}}
|
||||
{"json" : {"a" : {"b" : {"c" : 2, "d" : [2, 3]}}, "b" : [1, 2, 3], "c" : null, "d" : {"e" : {"g" : 43}, "i" : [4, 5, 6]}}}
|
||||
{"json" : {"a" : {"b" : {"c" : 3, "d" : [4, 5]}}, "b" : {"c" : 10}, "e" : "Hello, World!"}}
|
||||
{"json" : {"a" : {"b" : {"c" : 4, "d" : [6, 7]}}, "c" : 43}}
|
||||
{"json" : {"a" : {"b" : {"c" : 5, "d" : [8, 9]}}, "b" : {"c" : 11, "j" : [1, 2, 3]}, "d" : {"e" : {"f" : ["s3", "s4"], "g" : 44}, "h" : "2020-02-02 10:00:00"}}}
|
||||
')
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json──────────────────────────────────────────────────────────┐
|
||||
│ {"a":{"b":{"c":1}},"c":"42","d":{"i":["1","2","3"]}} │
|
||||
│ {"a":{"b":{"c":2}},"d":{"i":["4","5","6"]}} │
|
||||
│ {"a":{"b":{"c":3}},"e":"Hello, World!"} │
|
||||
│ {"a":{"b":{"c":4}},"c":"43"} │
|
||||
│ {"a":{"b":{"c":5}},"d":{"h":"2020-02-02 10:00:00.000000000"}} │
|
||||
└───────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
For text formats like CSV/TSV/etc `JSON` is parsed from a string containing JSON object
|
||||
|
||||
```sql
|
||||
SELECT json FROM format(TSV, 'json JSON(a.b.c UInt32, SKIP a.b.d, SKIP REGEXP \'b.*\')',
|
||||
'{"a" : {"b" : {"c" : 1, "d" : [0, 1]}}, "b" : "2020-01-01", "c" : 42, "d" : {"e" : {"f" : ["s1", "s2"]}, "i" : [1, 2, 3]}}
|
||||
{"a" : {"b" : {"c" : 2, "d" : [2, 3]}}, "b" : [1, 2, 3], "c" : null, "d" : {"e" : {"g" : 43}, "i" : [4, 5, 6]}}
|
||||
{"a" : {"b" : {"c" : 3, "d" : [4, 5]}}, "b" : {"c" : 10}, "e" : "Hello, World!"}
|
||||
{"a" : {"b" : {"c" : 4, "d" : [6, 7]}}, "c" : 43}
|
||||
{"a" : {"b" : {"c" : 5, "d" : [8, 9]}}, "b" : {"c" : 11, "j" : [1, 2, 3]}, "d" : {"e" : {"f" : ["s3", "s4"], "g" : 44}, "h" : "2020-02-02 10:00:00"}}')
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json──────────────────────────────────────────────────────────┐
|
||||
│ {"a":{"b":{"c":1}},"c":"42","d":{"i":["1","2","3"]}} │
|
||||
│ {"a":{"b":{"c":2}},"d":{"i":["4","5","6"]}} │
|
||||
│ {"a":{"b":{"c":3}},"e":"Hello, World!"} │
|
||||
│ {"a":{"b":{"c":4}},"c":"43"} │
|
||||
│ {"a":{"b":{"c":5}},"d":{"h":"2020-02-02 10:00:00.000000000"}} │
|
||||
└───────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## Reaching the limit of dynamic paths inside JSON
|
||||
|
||||
`JSON` data type can store only limited number of paths as separate subcolumns inside. By default, this limit is 1024, but you can change it in type declaration using parameter `max_dynamic_paths`.
|
||||
When the limit is reached, all new paths inserted to `JSON` column will be stored in a single shared data structure. It's still possible to read such paths as subcolumns, but it will require reading the whole
|
||||
shared data structure to extract the values of this path. This limit is needed to avoid the enormous number of different subcolumns that can make the table unusable.
|
||||
|
||||
Let's see what happens when the limit is reached in different scenarios.
|
||||
|
||||
### Reaching the limit during data parsing
|
||||
|
||||
During parsing of `JSON` object from the data, when the limit is reached for current block of data, all new paths will be stored in a shared data structure. We can check it using introspection functions `JSONDynamicPaths, JSONSharedDataPaths`:
|
||||
|
||||
```sql
|
||||
SELECT json, JSONDynamicPaths(json), JSONSharedDataPaths(json) FROM format(JSONEachRow, 'json JSON(max_dynamic_paths=3)', '
|
||||
{"json" : {"a" : {"b" : 42}, "c" : [1, 2, 3]}}
|
||||
{"json" : {"a" : {"b" : 43}, "d" : "2020-01-01"}}
|
||||
{"json" : {"a" : {"b" : 44}, "c" : [4, 5, 6]}}
|
||||
{"json" : {"a" : {"b" : 43}, "d" : "2020-01-02", "e" : "Hello", "f" : {"g" : 42.42}}}
|
||||
{"json" : {"a" : {"b" : 43}, "c" : [7, 8, 9], "f" : {"g" : 43.43}, "h" : "World"}}
|
||||
')
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json───────────────────────────────────────────────────────────┬─JSONDynamicPaths(json)─┬─JSONSharedDataPaths(json)─┐
|
||||
│ {"a":{"b":"42"},"c":["1","2","3"]} │ ['a.b','c','d'] │ [] │
|
||||
│ {"a":{"b":"43"},"d":"2020-01-01"} │ ['a.b','c','d'] │ [] │
|
||||
│ {"a":{"b":"44"},"c":["4","5","6"]} │ ['a.b','c','d'] │ [] │
|
||||
│ {"a":{"b":"43"},"d":"2020-01-02","e":"Hello","f":{"g":42.42}} │ ['a.b','c','d'] │ ['e','f.g'] │
|
||||
│ {"a":{"b":"43"},"c":["7","8","9"],"f":{"g":43.43},"h":"World"} │ ['a.b','c','d'] │ ['f.g','h'] │
|
||||
└────────────────────────────────────────────────────────────────┴────────────────────────┴───────────────────────────┘
|
||||
```
|
||||
|
||||
As we can see, after inserting paths `e` and `f.g` the limit was reached and we inserted them into shared data structure.
|
||||
|
||||
### During merges of data parts in MergeTree table engines
|
||||
|
||||
During merge of several data parts in MergeTree table the `JSON` column in the resulting data part can reach the limit of dynamic paths won't be able to store all paths from source parts as subcolumns.
|
||||
In this case ClickHouse chooses what paths will remain as subcolumns after merge and what types will be stored in the shared data structure. In most cases ClickHouse tries to keep paths that contains
|
||||
the largest number of non-null values and move the rarest paths to the shared data structure, but it depends on the implementation.
|
||||
|
||||
Let's see an example of such merge. First, let's create a table with `JSON` column, set the limit of dynamic paths to `3` and insert values with `5` different paths:
|
||||
|
||||
```sql
|
||||
CREATE TABLE test (id UInt64, json JSON(max_dynamic_paths=3)) engine=MergeTree ORDER BY id;
|
||||
SYSTEM STOP MERGES test;
|
||||
INSERT INTO test SELECT number, formatRow('JSONEachRow', number as a) FROM numbers(5);
|
||||
INSERT INTO test SELECT number, formatRow('JSONEachRow', number as b) FROM numbers(4);
|
||||
INSERT INTO test SELECT number, formatRow('JSONEachRow', number as c) FROM numbers(3);
|
||||
INSERT INTO test SELECT number, formatRow('JSONEachRow', number as d) FROM numbers(2);
|
||||
INSERT INTO test SELECT number, formatRow('JSONEachRow', number as e) FROM numbers(1);
|
||||
```
|
||||
|
||||
Each insert will create a separate data pert with `JSON` column containing single path:
|
||||
```sql
|
||||
SELECT count(), JSONDynamicPaths(json) AS dynamic_paths, JSONSharedDataPaths(json) AS shared_data_paths, _part FROM test GROUP BY _part, dynamic_paths, shared_data_paths ORDER BY _part ASC
|
||||
```
|
||||
|
||||
```text
|
||||
┌─count()─┬─dynamic_paths─┬─shared_data_paths─┬─_part─────┐
|
||||
│ 5 │ ['a'] │ [] │ all_1_1_0 │
|
||||
│ 4 │ ['b'] │ [] │ all_2_2_0 │
|
||||
│ 3 │ ['c'] │ [] │ all_3_3_0 │
|
||||
│ 2 │ ['d'] │ [] │ all_4_4_0 │
|
||||
│ 1 │ ['e'] │ [] │ all_5_5_0 │
|
||||
└─────────┴───────────────┴───────────────────┴───────────┘
|
||||
|
||||
```
|
||||
|
||||
Now, let's merge all parts into one and see what will happen:
|
||||
|
||||
```sql
|
||||
SYSTEM START MERGES test;
|
||||
OPTIMIZE TABLE test FINAL;
|
||||
SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) ORDER BY _part;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─count()─┬─dynamic_paths─┬─shared_data_paths─┬─_part─────┐
|
||||
│ 1 │ ['a','b','c'] │ ['e'] │ all_1_5_2 │
|
||||
│ 2 │ ['a','b','c'] │ ['d'] │ all_1_5_2 │
|
||||
│ 12 │ ['a','b','c'] │ [] │ all_1_5_2 │
|
||||
└─────────┴───────────────┴───────────────────┴───────────┘
|
||||
```
|
||||
|
||||
As we can see, ClickHouse kept the most frequent paths `a`, `b` and `c` and moved paths `e` and `d` to shared data structure.
|
||||
|
||||
## Introspection functions
|
||||
|
||||
There are several functions that can help to inspect the content of the JSON column: [JSONAllPaths](../functions/json-functions.md#jsonallpaths), [JSONAllPathsWithTypes](../functions/json-functions.md#jsonallpathswithtypes), [JSONDynamicPaths](../functions/json-functions.md#jsondynamicpaths), [JSONDynamicPathsWithTypes](../functions/json-functions.md#jsondynamicpathswithtypes), [JSONSharedDataPaths](../functions/json-functions.md#jsonshareddatapaths), [JSONSharedDataPathsWithTypes](../functions/json-functions.md#jsonshareddatapathswithtypes).
|
||||
|
||||
## Tips for better usage of the JSON type
|
||||
|
||||
Before creating `JSON` column and loading data into it, consider the following tips:
|
||||
|
||||
- Investigate your data and specify as many path hints with types as you can. It will make the storage and the reading much more efficient.
|
||||
- Think about what paths you will need and what paths you will never need. Specify paths that you won't need in the SKIP section and SKIP REGEXP if needed. It will improve the storage.
|
||||
- Don't set `max_dynamic_paths` parameter to very high values, it can make the storage and reading less efficient.
|
@ -1155,3 +1155,207 @@ SELECT jsonMergePatch('{"a":1}', '{"name": "joey"}', '{"name": "tom"}', '{"name"
|
||||
│ {"a":1,"name":"zoey"} │
|
||||
└───────────────────────┘
|
||||
```
|
||||
|
||||
### JSONAllPaths
|
||||
|
||||
Returns the list of all paths stored in each row in [JSON](../data-types/newjson.md) column.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
JSONAllPaths(json)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `json` — [JSON](../data-types/newjson.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- An array of paths. [Array(String)](../data-types/array.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory;
|
||||
INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}}
|
||||
SELECT json, JSONAllPaths(json) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json─────────────────────────────────┬─JSONAllPaths(json)─┐
|
||||
│ {"a":"42"} │ ['a'] │
|
||||
│ {"b":"Hello"} │ ['b'] │
|
||||
│ {"a":["1","2","3"],"c":"2020-01-01"} │ ['a','c'] │
|
||||
└──────────────────────────────────────┴────────────────────┘
|
||||
```
|
||||
|
||||
### JSONAllPathsWithTypes
|
||||
|
||||
Returns the map of all paths and their data types stored in each row in [JSON](../data-types/newjson.md) column.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
JSONAllPathsWithTypes(json)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `json` — [JSON](../data-types/newjson.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- An array of paths. [Map(String, String)](../data-types/array.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory;
|
||||
INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}}
|
||||
SELECT json, JSONAllPathsWithTypes(json) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json─────────────────────────────────┬─JSONAllPathsWithTypes(json)───────────────┐
|
||||
│ {"a":"42"} │ {'a':'Int64'} │
|
||||
│ {"b":"Hello"} │ {'b':'String'} │
|
||||
│ {"a":["1","2","3"],"c":"2020-01-01"} │ {'a':'Array(Nullable(Int64))','c':'Date'} │
|
||||
└──────────────────────────────────────┴───────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### JSONDynamicPaths
|
||||
|
||||
Returns the list of dynamic paths that are stored as separate subcolumns in [JSON](../data-types/newjson.md) column.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
JSONDynamicPaths(json)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `json` — [JSON](../data-types/newjson.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- An array of paths. [Array(String)](../data-types/array.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory;
|
||||
INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}}
|
||||
SELECT json, JSONDynamicPaths(json) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json─────────────────────────────────┬─JSONDynamicPaths(json)─┐
|
||||
| {"a":"42"} │ ['a'] │
|
||||
│ {"b":"Hello"} │ [] │
|
||||
│ {"a":["1","2","3"],"c":"2020-01-01"} │ ['a'] │
|
||||
└──────────────────────────────────────┴────────────────────────┘
|
||||
```
|
||||
|
||||
### JSONDynamicPathsWithTypes
|
||||
|
||||
Returns the map of dynamic paths that are stored as separate subcolumns and their types in each row in [JSON](../data-types/newjson.md) column.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
JSONAllPathsWithTypes(json)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `json` — [JSON](../data-types/newjson.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- An array of paths. [Map(String, String)](../data-types/array.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory;
|
||||
INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}}
|
||||
SELECT json, JSONDynamicPathsWithTypes(json) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json─────────────────────────────────┬─JSONDynamicPathsWithTypes(json)─┐
|
||||
│ {"a":"42"} │ {'a':'Int64'} │
|
||||
│ {"b":"Hello"} │ {} │
|
||||
│ {"a":["1","2","3"],"c":"2020-01-01"} │ {'a':'Array(Nullable(Int64))'} │
|
||||
└──────────────────────────────────────┴─────────────────────────────────┘
|
||||
```
|
||||
|
||||
### JSONSharedDataPaths
|
||||
|
||||
Returns the list of paths that are stored in shared data structure in [JSON](../data-types/newjson.md) column.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
JSONSharedDataPaths(json)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `json` — [JSON](../data-types/newjson.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- An array of paths. [Array(String)](../data-types/array.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory;
|
||||
INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}}
|
||||
SELECT json, JSONSharedDataPaths(json) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json─────────────────────────────────┬─JSONSharedDataPaths(json)─┐
|
||||
│ {"a":"42"} │ [] │
|
||||
│ {"b":"Hello"} │ ['b'] │
|
||||
│ {"a":["1","2","3"],"c":"2020-01-01"} │ ['c'] │
|
||||
└──────────────────────────────────────┴───────────────────────────┘
|
||||
```
|
||||
|
||||
### JSONSharedDataPathsWithTypes
|
||||
|
||||
Returns the map of paths that are stored in shared data structure and their types in each row in [JSON](../data-types/newjson.md) column.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
JSONSharedDataPathsWithTypes(json)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `json` — [JSON](../data-types/newjson.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- An array of paths. [Map(String, String)](../data-types/array.md).
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
CREATE TABLE test (json JSON(max_dynamic_paths=1)) ENGINE = Memory;
|
||||
INSERT INTO test FORMAT JSONEachRow {"json" : {"a" : 42}}, {"json" : {"b" : "Hello"}}, {"json" : {"a" : [1, 2, 3], "c" : "2020-01-01"}}
|
||||
SELECT json, JSONSharedDataPathsWithTypes(json) FROM test;
|
||||
```
|
||||
|
||||
```text
|
||||
┌─json─────────────────────────────────┬─JSONSharedDataPathsWithTypes(json)─┐
|
||||
│ {"a":"42"} │ {} │
|
||||
│ {"b":"Hello"} │ {'b':'String'} │
|
||||
│ {"a":["1","2","3"],"c":"2020-01-01"} │ {'c':'Date'} │
|
||||
└──────────────────────────────────────┴────────────────────────────────────┘
|
||||
```
|
||||
|
@ -4189,3 +4189,94 @@ Result:
|
||||
│ 32 │
|
||||
└─────────────────────────────┘
|
||||
```
|
||||
|
||||
## getSubcolumn
|
||||
|
||||
Takes a table expression or identifier and constant string with the name of the sub-column, and returns the requested sub-column extracted from the expression.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
getSubcolumn(col_name, subcol_name)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `col_name` — Table expression or identifier. [Expression](../syntax.md/#expressions), [Identifier](../syntax.md/#identifiers).
|
||||
- `subcol_name` — The name of the sub-column. [String](../data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns the extracted sub-column.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
CREATE TABLE t_arr (arr Array(Tuple(subcolumn1 UInt32, subcolumn2 String))) ENGINE = MergeTree ORDER BY tuple();
|
||||
INSERT INTO t_arr VALUES ([(1, 'Hello'), (2, 'World')]), ([(3, 'This'), (4, 'is'), (5, 'subcolumn')]);
|
||||
SELECT getSubcolumn(arr, 'subcolumn1'), getSubcolumn(arr, 'subcolumn2') FROM t_arr;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─getSubcolumn(arr, 'subcolumn1')─┬─getSubcolumn(arr, 'subcolumn2')─┐
|
||||
1. │ [1,2] │ ['Hello','World'] │
|
||||
2. │ [3,4,5] │ ['This','is','subcolumn'] │
|
||||
└─────────────────────────────────┴─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## getTypeSerializationStreams
|
||||
|
||||
Enumerates stream paths of a data type.
|
||||
|
||||
:::note
|
||||
This function is intended for use by developers.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
getTypeSerializationStreams(col)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `col` — Column or string representation of a data-type from which the data type will be detected.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- Returns an array with all the serialization sub-stream paths.[Array](../data-types/array.md)([String](../data-types/string.md)).
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT getTypeSerializationStreams(tuple('a', 1, 'b', 2));
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─getTypeSerializationStreams(('a', 1, 'b', 2))─────────────────────────────────────────────────────────────────────────┐
|
||||
1. │ ['{TupleElement(1), Regular}','{TupleElement(2), Regular}','{TupleElement(3), Regular}','{TupleElement(4), Regular}'] │
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT getTypeSerializationStreams('Map(String, Int64)');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
```response
|
||||
┌─getTypeSerializationStreams('Map(String, Int64)')────────────────────────────────────────────────────────────────┐
|
||||
1. │ ['{ArraySizes}','{ArrayElements, TupleElement(keys), Regular}','{ArrayElements, TupleElement(values), Regular}'] │
|
||||
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -8,26 +8,28 @@ sidebar_label: STATISTICS
|
||||
|
||||
The following operations are available:
|
||||
|
||||
- `ALTER TABLE [db].table ADD STATISTICS (columns list) TYPE (type list)` - Adds statistic description to tables metadata.
|
||||
- `ALTER TABLE [db].table ADD STATISTICS [IF NOT EXISTS] (column list) TYPE (type list)` - Adds statistic description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].table MODIFY STATISTICS (columns list) TYPE (type list)` - Modifies statistic description to tables metadata.
|
||||
- `ALTER TABLE [db].table MODIFY STATISTICS (column list) TYPE (type list)` - Modifies statistic description to tables metadata.
|
||||
|
||||
- `ALTER TABLE [db].table DROP STATISTICS (columns list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns.
|
||||
- `ALTER TABLE [db].table DROP STATISTICS [IF EXISTS] (column list)` - Removes statistics from the metadata of the specified columns and deletes all statistics objects in all parts for the specified columns.
|
||||
|
||||
- `ALTER TABLE [db].table CLEAR STATISTICS (columns list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`.
|
||||
- `ALTER TABLE [db].table CLEAR STATISTICS [IF EXISTS] (column list)` - Deletes all statistics objects in all parts for the specified columns. Statistics objects can be rebuild using `ALTER TABLE MATERIALIZE STATISTICS`.
|
||||
|
||||
- `ALTER TABLE [db.]table MATERIALIZE STATISTICS (columns list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
- `ALTER TABLE [db.]table MATERIALIZE STATISTICS [IF EXISTS] (column list)` - Rebuilds the statistic for columns. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
|
||||
|
||||
The first two commands are lightweight in a sense that they only change metadata or remove files.
|
||||
|
||||
Also, they are replicated, syncing statistics metadata via ZooKeeper.
|
||||
|
||||
There is an example adding two statistics types to two columns:
|
||||
## Example:
|
||||
|
||||
Adding two statistics types to two columns:
|
||||
|
||||
```
|
||||
ALTER TABLE t1 MODIFY STATISTICS c, d TYPE TDigest, Uniq;
|
||||
```
|
||||
|
||||
:::note
|
||||
Statistic manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
|
||||
Statistic are supported only for [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
|
||||
:::
|
||||
|
@ -13,8 +13,8 @@ Creates a new view. Views can be [normal](#normal-view), [materialized](#materia
|
||||
Syntax:
|
||||
|
||||
``` sql
|
||||
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name]
|
||||
[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }]
|
||||
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster_name]
|
||||
[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }]
|
||||
AS SELECT ...
|
||||
[COMMENT 'comment']
|
||||
```
|
||||
@ -55,8 +55,8 @@ SELECT * FROM view(column1=value1, column2=value2 ...)
|
||||
## Materialized View
|
||||
|
||||
``` sql
|
||||
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE]
|
||||
[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }]
|
||||
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE]
|
||||
[DEFINER = { user | CURRENT_USER }] [SQL SECURITY { DEFINER | INVOKER | NONE }]
|
||||
AS SELECT ...
|
||||
[COMMENT 'comment']
|
||||
```
|
||||
@ -92,7 +92,7 @@ Given that `POPULATE` works like `CREATE TABLE ... AS SELECT ...` it has limitat
|
||||
- It is not supported with Replicated database
|
||||
- It is not supported in ClickHouse cloud
|
||||
|
||||
Instead a separate `INSERT ... SELECT` can be used.
|
||||
Instead a separate `INSERT ... SELECT` can be used.
|
||||
:::
|
||||
|
||||
A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`. Note that the corresponding conversions are performed independently on each block of inserted data. For example, if `GROUP BY` is set, data is aggregated during insertion, but only within a single packet of inserted data. The data won’t be further aggregated. The exception is when using an `ENGINE` that independently performs data aggregation, such as `SummingMergeTree`.
|
||||
@ -110,7 +110,7 @@ To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop
|
||||
`DEFINER` and `SQL SECURITY` allow you to specify which ClickHouse user to use when executing the view's underlying query.
|
||||
`SQL SECURITY` has three legal values: `DEFINER`, `INVOKER`, or `NONE`. You can specify any existing user or `CURRENT_USER` in the `DEFINER` clause.
|
||||
|
||||
The following table will explain which rights are required for which user in order to select from view.
|
||||
The following table will explain which rights are required for which user in order to select from view.
|
||||
Note that regardless of the SQL security option, in every case it is still required to have `GRANT SELECT ON <view>` in order to read from it.
|
||||
|
||||
| SQL security option | View | Materialized View |
|
||||
@ -130,7 +130,7 @@ If `DEFINER`/`SQL SECURITY` aren't specified, the default values are used:
|
||||
|
||||
If a view is attached without `DEFINER`/`SQL SECURITY` specified, the default value is `SQL SECURITY NONE` for the materialized view and `SQL SECURITY INVOKER` for the normal view.
|
||||
|
||||
To change SQL security for an existing view, use
|
||||
To change SQL security for an existing view, use
|
||||
```sql
|
||||
ALTER TABLE MODIFY SQL SECURITY { DEFINER | INVOKER | NONE } [DEFINER = { user | CURRENT_USER }]
|
||||
```
|
||||
@ -161,6 +161,8 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name
|
||||
REFRESH EVERY|AFTER interval [OFFSET interval]
|
||||
RANDOMIZE FOR interval
|
||||
DEPENDS ON [db.]name [, [db.]name [, ...]]
|
||||
SETTINGS name = value [, name = value [, ...]]
|
||||
[APPEND]
|
||||
[TO[db.]name] [(columns)] [ENGINE = engine] [EMPTY]
|
||||
AS SELECT ...
|
||||
[COMMENT 'comment']
|
||||
@ -170,18 +172,23 @@ where `interval` is a sequence of simple intervals:
|
||||
number SECOND|MINUTE|HOUR|DAY|WEEK|MONTH|YEAR
|
||||
```
|
||||
|
||||
Periodically runs the corresponding query and stores its result in a table, atomically replacing the table's previous contents.
|
||||
Periodically runs the corresponding query and stores its result in a table.
|
||||
* If the query says `APPEND`, each refresh inserts rows into the table without deleting existing rows. The insert is not atomic, just like a regular INSERT SELECT.
|
||||
* Otherwise each refresh atomically replaces the table's previous contents.
|
||||
|
||||
Differences from regular non-refreshable materialized views:
|
||||
* No insert trigger. I.e. when new data is inserted into the table specified in SELECT, it's *not* automatically pushed to the refreshable materialized view. The periodic refresh runs the entire query and replaces the entire table.
|
||||
* No insert trigger. I.e. when new data is inserted into the table specified in SELECT, it's *not* automatically pushed to the refreshable materialized view. The periodic refresh runs the entire query.
|
||||
* No restrictions on the SELECT query. Table functions (e.g. `url()`), views, UNION, JOIN, are all allowed.
|
||||
|
||||
:::note
|
||||
The settings in the `REFRESH ... SETTINGS` part of the query are refresh settings (e.g. `refresh_retries`), distinct from regular settings (e.g. `max_threads`). Regular settings can be specified using `SETTINGS` at the end of the query.
|
||||
:::
|
||||
|
||||
:::note
|
||||
Refreshable materialized views are a work in progress. Setting `allow_experimental_refreshable_materialized_view = 1` is required for creating one. Current limitations:
|
||||
* not compatible with Replicated database or table engines
|
||||
* It is not supported in ClickHouse Cloud
|
||||
* require [Atomic database engine](../../../engines/database-engines/atomic.md),
|
||||
* no retries for failed refresh - we just skip to the next scheduled refresh time,
|
||||
* no limit on number of concurrent refreshes.
|
||||
:::
|
||||
|
||||
@ -246,15 +253,22 @@ A few more examples:
|
||||
`DEPENDS ON` only works between refreshable materialized views. Listing a regular table in the `DEPENDS ON` list will prevent the view from ever refreshing (dependencies can be removed with `ALTER`, see below).
|
||||
:::
|
||||
|
||||
### Settings
|
||||
|
||||
Available refresh settings:
|
||||
* `refresh_retries` - How many times to retry if refresh query fails with an exception. If all retries fail, skip to the next scheduled refresh time. 0 means no retries, -1 means infinite retries. Default: 0.
|
||||
* `refresh_retry_initial_backoff_ms` - Delay before the first retry, if `refresh_retries` is not zero. Each subsequent retry doubles the delay, up to `refresh_retry_max_backoff_ms`. Default: 100 ms.
|
||||
* `refresh_retry_max_backoff_ms` - Limit on the exponential growth of delay between refresh attempts. Default: 60000 ms (1 minute).
|
||||
|
||||
### Changing Refresh Parameters {#changing-refresh-parameters}
|
||||
|
||||
To change refresh parameters:
|
||||
```
|
||||
ALTER TABLE [db.]name MODIFY REFRESH EVERY|AFTER ... [RANDOMIZE FOR ...] [DEPENDS ON ...]
|
||||
ALTER TABLE [db.]name MODIFY REFRESH EVERY|AFTER ... [RANDOMIZE FOR ...] [DEPENDS ON ...] [SETTINGS ...]
|
||||
```
|
||||
|
||||
:::note
|
||||
This replaces refresh schedule *and* dependencies. If the table had a `DEPENDS ON`, doing a `MODIFY REFRESH` without `DEPENDS ON` will remove the dependencies.
|
||||
This replaces *all* refresh parameters at once: schedule, dependencies, settings, and APPEND-ness. E.g. if the table had a `DEPENDS ON`, doing a `MODIFY REFRESH` without `DEPENDS ON` will remove the dependencies.
|
||||
:::
|
||||
|
||||
### Other operations
|
||||
@ -263,6 +277,10 @@ The status of all refreshable materialized views is available in table [`system.
|
||||
|
||||
To manually stop, start, trigger, or cancel refreshes use [`SYSTEM STOP|START|REFRESH|CANCEL VIEW`](../system.md#refreshable-materialized-views).
|
||||
|
||||
:::note
|
||||
Fun fact: the refresh query is allowed to read from the view that's being refreshed, seeing pre-refresh version of the data. This means you can implement Conway's game of life: https://pastila.nl/?00021a4b/d6156ff819c83d490ad2dcec05676865#O0LGWTO7maUQIA4AcGUtlA==
|
||||
:::
|
||||
|
||||
## Window View [Experimental]
|
||||
|
||||
:::info
|
||||
|
@ -38,8 +38,7 @@ If you anticipate frequent deletes, consider using a [custom partitioning key](/
|
||||
|
||||
### Lightweight `DELETE`s with projections
|
||||
|
||||
By default, `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation and may require the projection to be rebuilt, negatively affecting `DELETE` performance.
|
||||
However, there is an option to change this behavior. By changing setting `lightweight_mutation_projection_mode = 'drop'`, deletes will work with projections.
|
||||
By default, `DELETE` does not work for tables with projections. This is because rows in a projection may be affected by a `DELETE` operation. But there is a [MergeTree setting](https://clickhouse.com/docs/en/operations/settings/merge-tree-settings) `lightweight_mutation_projection_mode` can change the behavior.
|
||||
|
||||
## Performance considerations when using lightweight `DELETE`
|
||||
|
||||
|
@ -400,7 +400,7 @@ SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_
|
||||
After running this statement the `[db.]replicated_merge_tree_family_table_name` fetches commands from the common replicated log into its own replication queue, and then the query waits till the replica processes all of the fetched commands. The following modifiers are supported:
|
||||
|
||||
- If a `STRICT` modifier was specified then the query waits for the replication queue to become empty. The `STRICT` version may never succeed if new entries constantly appear in the replication queue.
|
||||
- If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed.
|
||||
- If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed.
|
||||
Additionally, the LIGHTWEIGHT modifier supports an optional FROM 'srcReplicas' clause, where 'srcReplicas' is a comma-separated list of source replica names. This extension allows for more targeted synchronization by focusing only on replication tasks originating from the specified source replicas.
|
||||
- If a `PULL` modifier was specified then the query pulls new replication queue entries from ZooKeeper, but does not wait for anything to be processed.
|
||||
|
||||
@ -526,6 +526,10 @@ Trigger an immediate out-of-schedule refresh of a given view.
|
||||
SYSTEM REFRESH VIEW [db.]name
|
||||
```
|
||||
|
||||
### REFRESH VIEW
|
||||
|
||||
Wait for the currently running refresh to complete. If the refresh fails, throws an exception. If no refresh is running, completes immediately, throwing an exception if previous refresh failed.
|
||||
|
||||
### STOP VIEW, STOP VIEWS
|
||||
|
||||
Disable periodic refreshing of the given view or all refreshable views. If a refresh is in progress, cancel it too.
|
||||
|
@ -77,3 +77,16 @@ SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountNam
|
||||
**See Also**
|
||||
|
||||
- [AzureBlobStorage Table Engine](/docs/en/engines/table-engines/integrations/azureBlobStorage.md)
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from azureBlobStorage(config, storage_account_url='...', container='...', blob_path='http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
@ -103,7 +103,7 @@ LIMIT 2;
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
### Inserting data from a file into a table:
|
||||
### Inserting data from a file into a table
|
||||
|
||||
``` sql
|
||||
INSERT INTO FUNCTION
|
||||
@ -206,6 +206,19 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from file('data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
||||
## Settings {#settings}
|
||||
|
||||
- [engine_file_empty_if_not_exists](/docs/en/operations/settings/settings.md#engine-file-empty_if-not-exists) - allows to select empty data from a file that doesn't exist. Disabled by default.
|
||||
|
@ -100,6 +100,19 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from HDFS('hdfs://hdfs1:9000/data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
|
@ -274,6 +274,19 @@ FROM s3(
|
||||
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. In case of archive shows uncompressed file size of the file inside the archive.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from s3('s3://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [s3_truncate_on_insert](/docs/en/operations/settings/settings.md#s3_truncate_on_insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
|
@ -55,6 +55,19 @@ Character `|` inside patterns is used to specify failover addresses. They are it
|
||||
- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
|
||||
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
|
||||
|
||||
## Hive-style partitioning {#hive-style-partitioning}
|
||||
|
||||
When setting `use_hive_partitioning` is set to 1, ClickHouse will detect Hive-style partitioning in the path (`/name=value/`) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
|
||||
|
||||
**Example**
|
||||
|
||||
Use virtual column, created with Hive-style partitioning
|
||||
|
||||
``` sql
|
||||
SET use_hive_partitioning = 1;
|
||||
SELECT * from url('http://data/path/date=*/country=*/code=*/*.parquet') where _date > '2020-01-01' and _country = 'Netherlands' and _code = 42;
|
||||
```
|
||||
|
||||
## Storage Settings {#storage-settings}
|
||||
|
||||
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
|
@ -146,7 +146,30 @@ SELECT dictGet('dict', 'B', 2);
|
||||
|
||||
## Пример использования именованных соединений с базой данных PostgreSQL
|
||||
|
||||
Описание параметров смотрите [postgresql](../sql-reference/table-functions/postgresql.md).
|
||||
Описание параметров смотрите [postgresql](../sql-reference/table-functions/postgresql.md). Дополнительно есть алиасы:
|
||||
- `username` для `user`
|
||||
- `db` для `database`.
|
||||
|
||||
Параметр `addresses_expr` используется в коллекции вместо `host:port`. Параметр опционален, потому что есть так же другие: `host`, `hostname`, `port`. Следующий псевдокод показывает приоритет:
|
||||
|
||||
```sql
|
||||
CASE
|
||||
WHEN collection['addresses_expr'] != '' THEN collection['addresses_expr']
|
||||
WHEN collection['host'] != '' THEN collection['host'] || ':' || if(collection['port'] != '', collection['port'], '5432')
|
||||
WHEN collection['hostname'] != '' THEN collection['hostname'] || ':' || if(collection['port'] != '', collection['port'], '5432')
|
||||
END
|
||||
```
|
||||
|
||||
Пример создания:
|
||||
```sql
|
||||
CREATE NAMED COLLECTION mypg AS
|
||||
user = 'pguser',
|
||||
password = 'jw8s0F4',
|
||||
host = '127.0.0.1',
|
||||
port = 5432,
|
||||
database = 'test',
|
||||
schema = 'test_schema'
|
||||
```
|
||||
|
||||
Пример конфигурации:
|
||||
```xml
|
||||
@ -199,6 +222,10 @@ SELECT * FROM mypgtable;
|
||||
└───┘
|
||||
```
|
||||
|
||||
:::note
|
||||
PostgreSQL копирует данные из named collection при создании таблицы. Изменения в коллекции не влияют на существующие таблицы.
|
||||
:::
|
||||
|
||||
### Пример использования именованных соединений базой данных с движком PostgreSQL
|
||||
|
||||
```sql
|
||||
|
@ -23,30 +23,30 @@ slug: /zh/operations/external-authenticators/kerberos
|
||||
|
||||
示例 (进入 `config.xml`):
|
||||
```xml
|
||||
<yandex>
|
||||
<clickhouse>
|
||||
<!- ... -->
|
||||
<kerberos />
|
||||
</yandex>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
主体规范:
|
||||
```xml
|
||||
<yandex>
|
||||
<clickhouse>
|
||||
<!- ... -->
|
||||
<kerberos>
|
||||
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
|
||||
</kerberos>
|
||||
</yandex>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
按领域过滤:
|
||||
```xml
|
||||
<yandex>
|
||||
<clickhouse>
|
||||
<!- ... -->
|
||||
<kerberos>
|
||||
<realm>EXAMPLE.COM</realm>
|
||||
</kerberos>
|
||||
</yandex>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
!!! warning "注意"
|
||||
@ -74,7 +74,7 @@ Kerberos主体名称格式通常遵循以下模式:
|
||||
|
||||
示例 (进入 `users.xml`):
|
||||
```
|
||||
<yandex>
|
||||
<clickhouse>
|
||||
<!- ... -->
|
||||
<users>
|
||||
<!- ... -->
|
||||
@ -85,7 +85,7 @@ Kerberos主体名称格式通常遵循以下模式:
|
||||
</kerberos>
|
||||
</my_user>
|
||||
</users>
|
||||
</yandex>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
!!! warning "警告"
|
||||
|
@ -1,4 +1,4 @@
|
||||
add_compile_options($<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>)
|
||||
add_compile_options("$<$<OR:$<COMPILE_LANGUAGE:C>,$<COMPILE_LANGUAGE:CXX>>:${COVERAGE_FLAGS}>")
|
||||
|
||||
if (USE_CLANG_TIDY)
|
||||
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
|
||||
|
@ -75,6 +75,8 @@ public:
|
||||
const String & default_database_,
|
||||
const String & user_,
|
||||
const String & password_,
|
||||
const String & proto_send_chunked_,
|
||||
const String & proto_recv_chunked_,
|
||||
const String & quota_key_,
|
||||
const String & stage,
|
||||
bool randomize_,
|
||||
@ -128,7 +130,9 @@ public:
|
||||
connections.emplace_back(std::make_unique<ConnectionPool>(
|
||||
concurrency,
|
||||
cur_host, cur_port,
|
||||
default_database_, user_, password_, quota_key_,
|
||||
default_database_, user_, password_,
|
||||
proto_send_chunked_, proto_recv_chunked_,
|
||||
quota_key_,
|
||||
/* cluster_= */ "",
|
||||
/* cluster_secret_= */ "",
|
||||
/* client_name_= */ std::string(DEFAULT_CLIENT_NAME),
|
||||
@ -662,6 +666,50 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
|
||||
|
||||
Strings hosts = options.count("host") ? options["host"].as<Strings>() : Strings({"localhost"});
|
||||
|
||||
String proto_send_chunked {"notchunked"};
|
||||
String proto_recv_chunked {"notchunked"};
|
||||
|
||||
if (options.count("proto_caps"))
|
||||
{
|
||||
std::string proto_caps_str = options["proto_caps"].as<std::string>();
|
||||
|
||||
std::vector<std::string_view> proto_caps;
|
||||
splitInto<','>(proto_caps, proto_caps_str);
|
||||
|
||||
for (auto cap_str : proto_caps)
|
||||
{
|
||||
std::string direction;
|
||||
|
||||
if (cap_str.starts_with("send_"))
|
||||
{
|
||||
direction = "send";
|
||||
cap_str = cap_str.substr(std::string_view("send_").size());
|
||||
}
|
||||
else if (cap_str.starts_with("recv_"))
|
||||
{
|
||||
direction = "recv";
|
||||
cap_str = cap_str.substr(std::string_view("recv_").size());
|
||||
}
|
||||
|
||||
if (cap_str != "chunked" && cap_str != "notchunked" && cap_str != "chunked_optional" && cap_str != "notchunked_optional")
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "proto_caps option is incorrect ({})", proto_caps_str);
|
||||
|
||||
if (direction.empty())
|
||||
{
|
||||
proto_send_chunked = cap_str;
|
||||
proto_recv_chunked = cap_str;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (direction == "send")
|
||||
proto_send_chunked = cap_str;
|
||||
else
|
||||
proto_recv_chunked = cap_str;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Benchmark benchmark(
|
||||
options["concurrency"].as<unsigned>(),
|
||||
options["delay"].as<double>(),
|
||||
@ -673,6 +721,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
|
||||
options["database"].as<std::string>(),
|
||||
options["user"].as<std::string>(),
|
||||
options["password"].as<std::string>(),
|
||||
proto_send_chunked,
|
||||
proto_recv_chunked,
|
||||
options["quota_key"].as<std::string>(),
|
||||
options["stage"].as<std::string>(),
|
||||
options.count("randomize"),
|
||||
|
@ -223,7 +223,7 @@ std::vector<String> Client::loadWarningMessages()
|
||||
|
||||
size_t rows = packet.block.rows();
|
||||
for (size_t i = 0; i < rows; ++i)
|
||||
messages.emplace_back(column[i].get<String>());
|
||||
messages.emplace_back(column[i].safeGet<String>());
|
||||
}
|
||||
continue;
|
||||
|
||||
@ -1164,6 +1164,9 @@ void Client::processOptions(const OptionsDescription & options_description,
|
||||
/// (There is no need to copy the context because clickhouse-client has no background tasks so it won't use that context in parallel.)
|
||||
client_context = global_context;
|
||||
initClientContext();
|
||||
|
||||
/// Allow to pass-through unknown settings to the server.
|
||||
client_context->getAccessControl().allowAllSettings();
|
||||
}
|
||||
|
||||
|
||||
|
@ -38,6 +38,21 @@
|
||||
<production>{display_name} \e[1;31m:)\e[0m </production> <!-- if it matched to the substring "production" in the server display name -->
|
||||
</prompt_by_server_display_name>
|
||||
|
||||
<!-- Chunked capabilities for native protocol by client.
|
||||
Can be enabled separately for send and receive channels.
|
||||
Supported modes:
|
||||
- chunked - client will only work with server supporting chunked protocol;
|
||||
- chunked_optional - client prefer server to enable chunked protocol, but can switch to notchunked if server does not support this;
|
||||
- notchunked - client will only work with server supporting notchunked protocol (current default);
|
||||
- notchunked_optional - client prefer server notchunked protocol, but can switch to chunked if server does not support this.
|
||||
-->
|
||||
<!--
|
||||
<proto_caps>
|
||||
<send>chunked_optional</send>
|
||||
<recv>chunked_optional</recv>
|
||||
</proto_caps>
|
||||
-->
|
||||
|
||||
<!--
|
||||
Settings adjustable via command-line parameters
|
||||
can take their defaults from that config file, see examples:
|
||||
|
@ -175,6 +175,11 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
||||
hash_func.update(options["seed"].as<std::string>());
|
||||
}
|
||||
|
||||
SharedContextHolder shared_context = Context::createShared();
|
||||
auto context = Context::createGlobal(shared_context.get());
|
||||
auto context_const = WithContext(context).getContext();
|
||||
context->makeGlobalContext();
|
||||
|
||||
registerInterpreters();
|
||||
registerFunctions();
|
||||
registerAggregateFunctions();
|
||||
@ -259,7 +264,11 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
||||
if (!backslash)
|
||||
{
|
||||
WriteBufferFromOwnString str_buf;
|
||||
formatAST(*res, str_buf, hilite, oneline || approx_query_length < max_line_length);
|
||||
bool oneline_current_query = oneline || approx_query_length < max_line_length;
|
||||
IAST::FormatSettings settings(str_buf, oneline_current_query, hilite);
|
||||
settings.show_secrets = true;
|
||||
settings.print_pretty_type_names = !oneline_current_query;
|
||||
res->format(settings);
|
||||
|
||||
if (insert_query_payload)
|
||||
{
|
||||
@ -302,7 +311,11 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
|
||||
else
|
||||
{
|
||||
WriteBufferFromOwnString str_buf;
|
||||
formatAST(*res, str_buf, hilite, oneline);
|
||||
bool oneline_current_query = oneline || approx_query_length < max_line_length;
|
||||
IAST::FormatSettings settings(str_buf, oneline_current_query, hilite);
|
||||
settings.show_secrets = true;
|
||||
settings.print_pretty_type_names = !oneline_current_query;
|
||||
res->format(settings);
|
||||
|
||||
auto res_string = str_buf.str();
|
||||
WriteBufferFromOStream res_cout(std::cout, 4096);
|
||||
|
@ -95,7 +95,7 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
|
||||
client->zookeeper->set(
|
||||
client->getAbsolutePath(query->args[0].safeGet<String>()),
|
||||
query->args[1].safeGet<String>(),
|
||||
static_cast<Int32>(query->args[2].get<Int32>()));
|
||||
static_cast<Int32>(query->args[2].safeGet<Int32>()));
|
||||
}
|
||||
|
||||
bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
@ -494,7 +494,7 @@ void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
|
||||
{
|
||||
Int32 version{-1};
|
||||
if (query->args.size() == 2)
|
||||
version = static_cast<Int32>(query->args[1].get<Int32>());
|
||||
version = static_cast<Int32>(query->args[1].safeGet<Int32>());
|
||||
|
||||
client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet<String>()), version);
|
||||
}
|
||||
@ -549,7 +549,7 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient
|
||||
String leaving;
|
||||
String new_members;
|
||||
|
||||
auto operation = query->args[0].get<ReconfigCommand::Operation>();
|
||||
auto operation = query->args[0].safeGet<ReconfigCommand::Operation>();
|
||||
switch (operation)
|
||||
{
|
||||
case static_cast<UInt8>(ReconfigCommand::Operation::ADD):
|
||||
|
@ -66,6 +66,8 @@
|
||||
/// A minimal file used when the keeper is run without installation
|
||||
INCBIN(keeper_resource_embedded_xml, SOURCE_DIR "/programs/keeper/keeper_embedded.xml");
|
||||
|
||||
extern const char * GIT_HASH;
|
||||
|
||||
int mainEntryClickHouseKeeper(int argc, char ** argv)
|
||||
{
|
||||
DB::Keeper app;
|
||||
@ -675,7 +677,7 @@ void Keeper::logRevision() const
|
||||
"Starting ClickHouse Keeper {} (revision: {}, git hash: {}, build id: {}), PID {}",
|
||||
VERSION_STRING,
|
||||
ClickHouseRevision::getVersionRevision(),
|
||||
git_hash.empty() ? "<unknown>" : git_hash,
|
||||
GIT_HASH,
|
||||
build_id.empty() ? "<unknown>" : build_id,
|
||||
getpid());
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ std::string LibraryBridge::bridgeName() const
|
||||
|
||||
LibraryBridge::HandlerFactoryPtr LibraryBridge::getHandlerFactoryPtr(ContextPtr context) const
|
||||
{
|
||||
return std::make_shared<LibraryBridgeHandlerFactory>("LibraryRequestHandlerFactory", keep_alive_timeout, context);
|
||||
return std::make_shared<LibraryBridgeHandlerFactory>("LibraryRequestHandlerFactory", context);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,12 +9,10 @@ namespace DB
|
||||
{
|
||||
LibraryBridgeHandlerFactory::LibraryBridgeHandlerFactory(
|
||||
const std::string & name_,
|
||||
size_t keep_alive_timeout_,
|
||||
ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger(name_))
|
||||
, name(name_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -26,17 +24,17 @@ std::unique_ptr<HTTPRequestHandler> LibraryBridgeHandlerFactory::createRequestHa
|
||||
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
|
||||
{
|
||||
if (uri.getPath() == "/extdict_ping")
|
||||
return std::make_unique<ExternalDictionaryLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<ExternalDictionaryLibraryBridgeExistsHandler>(getContext());
|
||||
else if (uri.getPath() == "/catboost_ping")
|
||||
return std::make_unique<CatBoostLibraryBridgeExistsHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<CatBoostLibraryBridgeExistsHandler>(getContext());
|
||||
}
|
||||
|
||||
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
|
||||
{
|
||||
if (uri.getPath() == "/extdict_request")
|
||||
return std::make_unique<ExternalDictionaryLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<ExternalDictionaryLibraryBridgeRequestHandler>(getContext());
|
||||
else if (uri.getPath() == "/catboost_request")
|
||||
return std::make_unique<CatBoostLibraryBridgeRequestHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<CatBoostLibraryBridgeRequestHandler>(getContext());
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
|
@ -13,7 +13,6 @@ class LibraryBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContex
|
||||
public:
|
||||
LibraryBridgeHandlerFactory(
|
||||
const std::string & name_,
|
||||
size_t keep_alive_timeout_,
|
||||
ContextPtr context_);
|
||||
|
||||
std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
|
||||
@ -21,7 +20,6 @@ public:
|
||||
private:
|
||||
LoggerPtr log;
|
||||
const std::string name;
|
||||
const size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -87,10 +87,8 @@ static void writeData(Block data, OutputFormatPtr format)
|
||||
}
|
||||
|
||||
|
||||
ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(getLogger("ExternalDictionaryLibraryBridgeRequestHandler"))
|
||||
ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRequestHandler(ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger("ExternalDictionaryLibraryBridgeRequestHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
@ -137,7 +135,7 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
const String & dictionary_id = params.get("dictionary_id");
|
||||
|
||||
LOG_TRACE(log, "Library method: '{}', dictionary id: {}", method, dictionary_id);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
|
||||
try
|
||||
{
|
||||
@ -374,10 +372,8 @@ void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequ
|
||||
}
|
||||
|
||||
|
||||
ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(getLogger("ExternalDictionaryLibraryBridgeExistsHandler"))
|
||||
ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExistsHandler(ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger("ExternalDictionaryLibraryBridgeExistsHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
@ -401,7 +397,7 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
|
||||
|
||||
String res = library_handler ? "1" : "0";
|
||||
|
||||
setResponseDefaultHeaders(response, keep_alive_timeout);
|
||||
setResponseDefaultHeaders(response);
|
||||
LOG_TRACE(log, "Sending ping response: {} (dictionary id: {})", res, dictionary_id);
|
||||
response.sendBuffer(res.data(), res.size());
|
||||
}
|
||||
@ -412,11 +408,8 @@ void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerReque
|
||||
}
|
||||
|
||||
|
||||
CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(
|
||||
size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(getLogger("CatBoostLibraryBridgeRequestHandler"))
|
||||
CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger("CatBoostLibraryBridgeRequestHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
@ -455,7 +448,7 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
|
||||
const String & method = params.get("method");
|
||||
|
||||
LOG_TRACE(log, "Library method: '{}'", method);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
|
||||
try
|
||||
{
|
||||
@ -617,10 +610,8 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
|
||||
}
|
||||
|
||||
|
||||
CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, log(getLogger("CatBoostLibraryBridgeExistsHandler"))
|
||||
CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger("CatBoostLibraryBridgeExistsHandler"))
|
||||
{
|
||||
}
|
||||
|
||||
@ -634,7 +625,7 @@ void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & reque
|
||||
|
||||
String res = "1";
|
||||
|
||||
setResponseDefaultHeaders(response, keep_alive_timeout);
|
||||
setResponseDefaultHeaders(response);
|
||||
LOG_TRACE(log, "Sending ping response: {}", res);
|
||||
response.sendBuffer(res.data(), res.size());
|
||||
}
|
||||
|
@ -18,14 +18,13 @@ namespace DB
|
||||
class ExternalDictionaryLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
explicit ExternalDictionaryLibraryBridgeRequestHandler(ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
static constexpr auto FORMAT = "RowBinary";
|
||||
|
||||
const size_t keep_alive_timeout;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
@ -34,12 +33,11 @@ private:
|
||||
class ExternalDictionaryLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
explicit ExternalDictionaryLibraryBridgeExistsHandler(ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
const size_t keep_alive_timeout;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
@ -63,12 +61,11 @@ private:
|
||||
class CatBoostLibraryBridgeRequestHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
CatBoostLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
explicit CatBoostLibraryBridgeRequestHandler(ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
const size_t keep_alive_timeout;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
@ -77,12 +74,11 @@ private:
|
||||
class CatBoostLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);
|
||||
explicit CatBoostLibraryBridgeExistsHandler(ContextPtr context_);
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
const size_t keep_alive_timeout;
|
||||
LoggerPtr log;
|
||||
};
|
||||
|
||||
|
@ -367,7 +367,7 @@ std::string LocalServer::getInitialCreateTableQuery()
|
||||
else
|
||||
table_structure = "(" + table_structure + ")";
|
||||
|
||||
return fmt::format("CREATE TABLE {} {} ENGINE = File({}, {});",
|
||||
return fmt::format("CREATE TEMPORARY TABLE {} {} ENGINE = File({}, {});",
|
||||
table_name, table_structure, data_format, table_file);
|
||||
}
|
||||
|
||||
|
@ -1307,6 +1307,7 @@ try
|
||||
throw ErrnoException(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Input must be seekable file (it will be read twice)");
|
||||
|
||||
SingleReadBufferIterator read_buffer_iterator(std::move(file));
|
||||
|
||||
schema_columns = readSchemaFromFormat(input_format, {}, read_buffer_iterator, context_const);
|
||||
}
|
||||
else
|
||||
|
@ -202,10 +202,7 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
|
||||
if (columns.empty())
|
||||
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Columns definition was not returned");
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(
|
||||
response,
|
||||
request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD,
|
||||
keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
try
|
||||
{
|
||||
writeStringBinary(columns.toString(), out);
|
||||
|
@ -15,18 +15,12 @@ namespace DB
|
||||
class ODBCColumnsInfoHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
ODBCColumnsInfoHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger("ODBCColumnsInfoHandler"))
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
{
|
||||
}
|
||||
explicit ODBCColumnsInfoHandler(ContextPtr context_) : WithContext(context_), log(getLogger("ODBCColumnsInfoHandler")) { }
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
LoggerPtr log;
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -74,7 +74,7 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ
|
||||
|
||||
auto identifier = getIdentifierQuote(std::move(connection));
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
try
|
||||
{
|
||||
writeStringBinary(identifier, out);
|
||||
|
@ -14,18 +14,12 @@ namespace DB
|
||||
class IdentifierQuoteHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
IdentifierQuoteHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger("IdentifierQuoteHandler"))
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
{
|
||||
}
|
||||
explicit IdentifierQuoteHandler(ContextPtr context_) : WithContext(context_), log(getLogger("IdentifierQuoteHandler")) { }
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
LoggerPtr log;
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -132,7 +132,7 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
|
||||
return;
|
||||
}
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
|
||||
try
|
||||
{
|
||||
|
@ -20,12 +20,10 @@ class ODBCHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
ODBCHandler(
|
||||
size_t keep_alive_timeout_,
|
||||
ContextPtr context_,
|
||||
const String & mode_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger("ODBCHandler"))
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
, mode(mode_)
|
||||
{
|
||||
}
|
||||
@ -35,7 +33,6 @@ public:
|
||||
private:
|
||||
LoggerPtr log;
|
||||
|
||||
size_t keep_alive_timeout;
|
||||
String mode;
|
||||
|
||||
static inline std::mutex mutex;
|
||||
|
@ -27,7 +27,7 @@ std::string ODBCBridge::bridgeName() const
|
||||
|
||||
ODBCBridge::HandlerFactoryPtr ODBCBridge::getHandlerFactoryPtr(ContextPtr context) const
|
||||
{
|
||||
return std::make_shared<ODBCBridgeHandlerFactory>("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context);
|
||||
return std::make_shared<ODBCBridgeHandlerFactory>("ODBCRequestHandlerFactory-factory", context);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -9,11 +9,8 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ODBCBridgeHandlerFactory::ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger(name_))
|
||||
, name(name_)
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
ODBCBridgeHandlerFactory::ODBCBridgeHandlerFactory(const std::string & name_, ContextPtr context_)
|
||||
: WithContext(context_), log(getLogger(name_)), name(name_)
|
||||
{
|
||||
}
|
||||
|
||||
@ -23,33 +20,33 @@ std::unique_ptr<HTTPRequestHandler> ODBCBridgeHandlerFactory::createRequestHandl
|
||||
LOG_TRACE(log, "Request URI: {}", uri.toString());
|
||||
|
||||
if (uri.getPath() == "/ping" && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
|
||||
return std::make_unique<PingHandler>(keep_alive_timeout);
|
||||
return std::make_unique<PingHandler>();
|
||||
|
||||
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST)
|
||||
{
|
||||
|
||||
if (uri.getPath() == "/columns_info")
|
||||
#if USE_ODBC
|
||||
return std::make_unique<ODBCColumnsInfoHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<ODBCColumnsInfoHandler>(getContext());
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
else if (uri.getPath() == "/identifier_quote")
|
||||
#if USE_ODBC
|
||||
return std::make_unique<IdentifierQuoteHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<IdentifierQuoteHandler>(getContext());
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
else if (uri.getPath() == "/schema_allowed")
|
||||
#if USE_ODBC
|
||||
return std::make_unique<SchemaAllowedHandler>(keep_alive_timeout, getContext());
|
||||
return std::make_unique<SchemaAllowedHandler>(getContext());
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
else if (uri.getPath() == "/write")
|
||||
return std::make_unique<ODBCHandler>(keep_alive_timeout, getContext(), "write");
|
||||
return std::make_unique<ODBCHandler>(getContext(), "write");
|
||||
else
|
||||
return std::make_unique<ODBCHandler>(keep_alive_timeout, getContext(), "read");
|
||||
return std::make_unique<ODBCHandler>(getContext(), "read");
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -17,14 +17,13 @@ namespace DB
|
||||
class ODBCBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContext
|
||||
{
|
||||
public:
|
||||
ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_);
|
||||
ODBCBridgeHandlerFactory(const std::string & name_, ContextPtr context_);
|
||||
|
||||
std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
|
||||
|
||||
private:
|
||||
LoggerPtr log;
|
||||
std::string name;
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerRes
|
||||
{
|
||||
try
|
||||
{
|
||||
setResponseDefaultHeaders(response, keep_alive_timeout);
|
||||
setResponseDefaultHeaders(response);
|
||||
const char * data = "Ok.\n";
|
||||
response.sendBuffer(data, strlen(data));
|
||||
}
|
||||
|
@ -9,11 +9,7 @@ namespace DB
|
||||
class PingHandler : public HTTPRequestHandler
|
||||
{
|
||||
public:
|
||||
explicit PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {}
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer
|
||||
|
||||
bool result = isSchemaAllowed(std::move(connection));
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD);
|
||||
try
|
||||
{
|
||||
writeBoolText(result, out);
|
||||
|
@ -17,18 +17,12 @@ class Context;
|
||||
class SchemaAllowedHandler : public HTTPRequestHandler, WithContext
|
||||
{
|
||||
public:
|
||||
SchemaAllowedHandler(size_t keep_alive_timeout_, ContextPtr context_)
|
||||
: WithContext(context_)
|
||||
, log(getLogger("SchemaAllowedHandler"))
|
||||
, keep_alive_timeout(keep_alive_timeout_)
|
||||
{
|
||||
}
|
||||
explicit SchemaAllowedHandler(ContextPtr context_) : WithContext(context_), log(getLogger("SchemaAllowedHandler")) { }
|
||||
|
||||
void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;
|
||||
|
||||
private:
|
||||
LoggerPtr log;
|
||||
size_t keep_alive_timeout;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1769,6 +1769,8 @@ try
|
||||
new_server_settings.http_connections_store_limit,
|
||||
});
|
||||
|
||||
DNSResolver::instance().setFilterSettings(new_server_settings.dns_allow_resolve_names_to_ipv4, new_server_settings.dns_allow_resolve_names_to_ipv6);
|
||||
|
||||
if (global_context->isServerCompletelyStarted())
|
||||
CannotAllocateThreadFaultInjector::setFaultProbability(new_server_settings.cannot_allocate_thread_fault_injection_probability);
|
||||
|
||||
@ -1920,7 +1922,7 @@ try
|
||||
auto & access_control = global_context->getAccessControl();
|
||||
try
|
||||
{
|
||||
access_control.setUpFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
|
||||
access_control.setupFromMainConfig(config(), config_path, [&] { return global_context->getZooKeeper(); });
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
@ -2428,6 +2430,7 @@ void Server::createServers(
|
||||
Poco::Net::HTTPServerParams::Ptr http_params = new Poco::Net::HTTPServerParams;
|
||||
http_params->setTimeout(settings.http_receive_timeout);
|
||||
http_params->setKeepAliveTimeout(global_context->getServerSettings().keep_alive_timeout);
|
||||
http_params->setMaxKeepAliveRequests(static_cast<int>(global_context->getServerSettings().max_keep_alive_requests));
|
||||
|
||||
Poco::Util::AbstractConfiguration::Keys protocols;
|
||||
config.keys("protocols", protocols);
|
||||
|
1
programs/server/config.d/transactions.xml
Symbolic link
1
programs/server/config.d/transactions.xml
Symbolic link
@ -0,0 +1 @@
|
||||
../../../tests/config/config.d/transactions.xml
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user