mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Merge branch 'master' into last_removal_attempt_time_typo
This commit is contained in:
commit
ed5310ce48
216
.github/workflows/master.yml
vendored
216
.github/workflows/master.yml
vendored
@ -2870,6 +2870,216 @@ jobs:
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan0:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=0
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan1:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=1
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan2:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=2
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan3:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=3
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan4:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=4
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsAnalyzerAsan5:
|
||||
needs: [BuilderDebAsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
steps:
|
||||
- name: Set envs
|
||||
run: |
|
||||
cat >> "$GITHUB_ENV" << 'EOF'
|
||||
TEMP_PATH=${{runner.temp}}/integration_tests_asan
|
||||
REPORTS_PATH=${{runner.temp}}/reports_dir
|
||||
CHECK_NAME=Integration tests (asan, analyzer)
|
||||
REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse
|
||||
RUN_BY_HASH_NUM=5
|
||||
RUN_BY_HASH_TOTAL=6
|
||||
EOF
|
||||
- name: Download json reports
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
path: ${{ env.REPORTS_PATH }}
|
||||
- name: Check out repository code
|
||||
uses: ClickHouse/checkout@v1
|
||||
with:
|
||||
clear-repository: true
|
||||
- name: Integration test
|
||||
run: |
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
mkdir -p "$TEMP_PATH"
|
||||
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
|
||||
cd "$REPO_COPY/tests/ci"
|
||||
python3 integration_test_check.py "$CHECK_NAME"
|
||||
- name: Cleanup
|
||||
if: always()
|
||||
run: |
|
||||
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
|
||||
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
|
||||
sudo rm -fr "$TEMP_PATH"
|
||||
IntegrationTestsTsan0:
|
||||
needs: [BuilderDebTsan]
|
||||
runs-on: [self-hosted, stress-tester]
|
||||
@ -3963,6 +4173,12 @@ jobs:
|
||||
- IntegrationTestsAsan3
|
||||
- IntegrationTestsAsan4
|
||||
- IntegrationTestsAsan5
|
||||
- IntegrationTestsAnalyzerAsan0
|
||||
- IntegrationTestsAnalyzerAsan1
|
||||
- IntegrationTestsAnalyzerAsan2
|
||||
- IntegrationTestsAnalyzerAsan3
|
||||
- IntegrationTestsAnalyzerAsan4
|
||||
- IntegrationTestsAnalyzerAsan5
|
||||
- IntegrationTestsRelease0
|
||||
- IntegrationTestsRelease1
|
||||
- IntegrationTestsRelease2
|
||||
|
6
.github/workflows/pull_request.yml
vendored
6
.github/workflows/pull_request.yml
vendored
@ -5099,6 +5099,12 @@ jobs:
|
||||
- IntegrationTestsAsan3
|
||||
- IntegrationTestsAsan4
|
||||
- IntegrationTestsAsan5
|
||||
- IntegrationTestsAnalyzerAsan0
|
||||
- IntegrationTestsAnalyzerAsan1
|
||||
- IntegrationTestsAnalyzerAsan2
|
||||
- IntegrationTestsAnalyzerAsan3
|
||||
- IntegrationTestsAnalyzerAsan4
|
||||
- IntegrationTestsAnalyzerAsan5
|
||||
- IntegrationTestsRelease0
|
||||
- IntegrationTestsRelease1
|
||||
- IntegrationTestsRelease2
|
||||
|
@ -23,7 +23,7 @@ curl https://clickhouse.com/ | sh
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [**v23.6 Release Webinar**](https://clickhouse.com/company/events/v23-6-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-06) - Jun 29 - 23.6 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
|
||||
* [**v23.7 Release Webinar**](https://clickhouse.com/company/events/v23-7-community-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-07) - Jul 27 - 23.7 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release.
|
||||
* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/293913596) - Jul 18
|
||||
* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/293913441) - Jul 19
|
||||
* [**ClickHouse Meetup in Toronto**](https://www.meetup.com/clickhouse-toronto-user-group/events/294183127) - Jul 20
|
||||
@ -34,13 +34,13 @@ Also, keep an eye out for upcoming meetups around the world. Somewhere else you
|
||||
|
||||
## Recent Recordings
|
||||
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
|
||||
* **Recording available**: [**v23.4 Release Webinar**](https://www.youtube.com/watch?v=4rrf6bk_mOg) Faster Parquet Reading, Asynchonous Connections to Reoplicas, Trailing Comma before FROM, extractKeyValuePairs, integrations updates, and so much more! Watch it now!
|
||||
* **Recording available**: [**v23.6 Release Webinar**](https://www.youtube.com/watch?v=cuf_hYn7dqU) All the features of 23.6, one convenient video! Watch it now!
|
||||
* **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU)
|
||||
|
||||
|
||||
## Interested in joining ClickHouse and making it your full time job?
|
||||
## Interested in joining ClickHouse and making it your full-time job?
|
||||
|
||||
We are a globally diverse and distributed team, united behind a common goal of creating industry-leading, real-time analytics. Here, you will have an opportunity to solve some of the most cutting edge technical challenges and have direct ownership of your work and vision. If you are a contributor by nature, a thinker as well as a doer - we’ll definitely click!
|
||||
We are a globally diverse and distributed team, united behind a common goal of creating industry-leading, real-time analytics. Here, you will have an opportunity to solve some of the most cutting-edge technical challenges and have direct ownership of your work and vision. If you are a contributor by nature, a thinker and a doer - we’ll definitely click!
|
||||
|
||||
Check out our **current openings** here: https://clickhouse.com/company/careers
|
||||
|
||||
|
@ -1,43 +1,38 @@
|
||||
# Usage:
|
||||
# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # In megabytes
|
||||
# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "")
|
||||
# include (cmake/limit_jobs.cmake)
|
||||
# Limit compiler/linker job concurrency to avoid OOMs on subtrees where compilation/linking is memory-intensive.
|
||||
#
|
||||
# Usage from CMake:
|
||||
# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # megabyte
|
||||
# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") # megabyte
|
||||
# include (cmake/limit_jobs.cmake)
|
||||
#
|
||||
# (bigger values mean fewer jobs)
|
||||
|
||||
cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY) # Not available under freebsd
|
||||
cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY)
|
||||
cmake_host_system_information(RESULT NUMBER_OF_LOGICAL_CORES QUERY NUMBER_OF_LOGICAL_CORES)
|
||||
|
||||
# 1 if not set
|
||||
option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" "")
|
||||
# Set to disable the automatic job-limiting
|
||||
option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" OFF)
|
||||
option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" OFF)
|
||||
|
||||
# 1 if not set
|
||||
option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" "")
|
||||
|
||||
if (NOT PARALLEL_COMPILE_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_COMPILER_MEMORY)
|
||||
if (NOT PARALLEL_COMPILE_JOBS AND MAX_COMPILER_MEMORY)
|
||||
math(EXPR PARALLEL_COMPILE_JOBS ${TOTAL_PHYSICAL_MEMORY}/${MAX_COMPILER_MEMORY})
|
||||
|
||||
if (NOT PARALLEL_COMPILE_JOBS)
|
||||
set (PARALLEL_COMPILE_JOBS 1)
|
||||
endif ()
|
||||
if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
|
||||
set (PARALLEL_COMPILE_JOBS_LESS TRUE)
|
||||
if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
|
||||
message(WARNING "The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
if (PARALLEL_COMPILE_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES))
|
||||
set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR})
|
||||
string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE})
|
||||
set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS})
|
||||
endif ()
|
||||
|
||||
|
||||
if (NOT PARALLEL_LINK_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY)
|
||||
if (NOT PARALLEL_LINK_JOBS AND MAX_LINKER_MEMORY)
|
||||
math(EXPR PARALLEL_LINK_JOBS ${TOTAL_PHYSICAL_MEMORY}/${MAX_LINKER_MEMORY})
|
||||
|
||||
if (NOT PARALLEL_LINK_JOBS)
|
||||
set (PARALLEL_LINK_JOBS 1)
|
||||
endif ()
|
||||
if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
|
||||
set (PARALLEL_LINK_JOBS_LESS TRUE)
|
||||
if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
|
||||
message(WARNING "The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
@ -52,20 +47,16 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLE
|
||||
set (PARALLEL_LINK_JOBS 2)
|
||||
endif()
|
||||
|
||||
if (PARALLEL_LINK_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES))
|
||||
message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB DRAM, 'OFF' means the native core count).")
|
||||
|
||||
if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
|
||||
set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR})
|
||||
string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE})
|
||||
set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS})
|
||||
endif ()
|
||||
|
||||
if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
|
||||
set(CMAKE_JOB_POOL_LINK link_job_pool${CMAKE_CURRENT_SOURCE_DIR})
|
||||
string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_LINK ${CMAKE_JOB_POOL_LINK})
|
||||
set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_LINK}=${PARALLEL_LINK_JOBS})
|
||||
endif ()
|
||||
|
||||
if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS)
|
||||
message(STATUS
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}: Have ${TOTAL_PHYSICAL_MEMORY} megabytes of memory.
|
||||
Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)")
|
||||
if (PARALLEL_COMPILE_JOBS_LESS)
|
||||
message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
|
||||
endif()
|
||||
if (PARALLEL_LINK_JOBS_LESS)
|
||||
message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
|
||||
endif()
|
||||
endif ()
|
||||
|
2
contrib/cctz
vendored
2
contrib/cctz
vendored
@ -1 +1 @@
|
||||
Subproject commit 5e05432420f9692418e2e12aff09859e420b14a2
|
||||
Subproject commit 8529bcef5cd996b7c0f4d7475286b76b5d126c4c
|
@ -1,5 +1,5 @@
|
||||
if (SANITIZE OR NOT (
|
||||
((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE OR ARCH_RISCV64)) OR
|
||||
((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE OR ARCH_RISCV64 OR ARCH_S390X)) OR
|
||||
(OS_DARWIN AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG"))
|
||||
))
|
||||
if (ENABLE_JEMALLOC)
|
||||
@ -17,17 +17,17 @@ if (NOT ENABLE_JEMALLOC)
|
||||
endif ()
|
||||
|
||||
if (NOT OS_LINUX)
|
||||
message (WARNING "jemalloc support on non-linux is EXPERIMENTAL")
|
||||
message (WARNING "jemalloc support on non-Linux is EXPERIMENTAL")
|
||||
endif()
|
||||
|
||||
if (OS_LINUX)
|
||||
# ThreadPool select job randomly, and there can be some threads that had been
|
||||
# performed some memory heavy task before and will be inactive for some time,
|
||||
# but until it will became active again, the memory will not be freed since by
|
||||
# default each thread has it's own arena, but there should be not more then
|
||||
# ThreadPool select job randomly, and there can be some threads that have been
|
||||
# performed some memory-heavy tasks before and will be inactive for some time,
|
||||
# but until it becomes active again, the memory will not be freed since, by
|
||||
# default, each thread has its arena, but there should be no more than
|
||||
# 4*CPU arenas (see opt.nareans description).
|
||||
#
|
||||
# By enabling percpu_arena number of arenas limited to number of CPUs and hence
|
||||
# By enabling percpu_arena number of arenas is limited to the number of CPUs, and hence
|
||||
# this problem should go away.
|
||||
#
|
||||
# muzzy_decay_ms -- use MADV_FREE when available on newer Linuxes, to
|
||||
@ -38,7 +38,7 @@ if (OS_LINUX)
|
||||
else()
|
||||
set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000")
|
||||
endif()
|
||||
# CACHE variable is empty, to allow changing defaults without necessity
|
||||
# CACHE variable is empty to allow changing defaults without the necessity
|
||||
# to purge cache
|
||||
set (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE "" CACHE STRING "Change default configuration string of JEMalloc" )
|
||||
if (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE)
|
||||
@ -148,6 +148,8 @@ elseif (ARCH_PPC64LE)
|
||||
set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_ppc64le")
|
||||
elseif (ARCH_RISCV64)
|
||||
set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_riscv64")
|
||||
elseif (ARCH_S390X)
|
||||
set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_s390x")
|
||||
else ()
|
||||
message (FATAL_ERROR "internal jemalloc: This arch is not supported")
|
||||
endif ()
|
||||
@ -172,7 +174,7 @@ target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1)
|
||||
|
||||
# jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++.
|
||||
# The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`.
|
||||
# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracing.
|
||||
# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracking.
|
||||
#
|
||||
# ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1).
|
||||
target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1)
|
||||
|
@ -0,0 +1,435 @@
|
||||
/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */
|
||||
#ifndef JEMALLOC_INTERNAL_DEFS_H_
|
||||
#define JEMALLOC_INTERNAL_DEFS_H_
|
||||
/*
|
||||
* If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
|
||||
* public APIs to be prefixed. This makes it possible, with some care, to use
|
||||
* multiple allocators simultaneously.
|
||||
*/
|
||||
/* #undef JEMALLOC_PREFIX */
|
||||
/* #undef JEMALLOC_CPREFIX */
|
||||
|
||||
/*
|
||||
* Define overrides for non-standard allocator-related functions if they are
|
||||
* present on the system.
|
||||
*/
|
||||
#define JEMALLOC_OVERRIDE___LIBC_CALLOC
|
||||
#define JEMALLOC_OVERRIDE___LIBC_FREE
|
||||
#define JEMALLOC_OVERRIDE___LIBC_MALLOC
|
||||
#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN
|
||||
#define JEMALLOC_OVERRIDE___LIBC_REALLOC
|
||||
#define JEMALLOC_OVERRIDE___LIBC_VALLOC
|
||||
#define JEMALLOC_OVERRIDE___LIBC_PVALLOC
|
||||
/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */
|
||||
|
||||
/*
|
||||
* JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
|
||||
* For shared libraries, symbol visibility mechanisms prevent these symbols
|
||||
* from being exported, but for static libraries, naming collisions are a real
|
||||
* possibility.
|
||||
*/
|
||||
#define JEMALLOC_PRIVATE_NAMESPACE je_
|
||||
|
||||
/*
|
||||
* Hyper-threaded CPUs may need a special instruction inside spin loops in
|
||||
* order to yield to another virtual CPU.
|
||||
*/
|
||||
#define CPU_SPINWAIT
|
||||
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
|
||||
#define HAVE_CPU_SPINWAIT 0
|
||||
|
||||
/*
|
||||
* Number of significant bits in virtual addresses. This may be less than the
|
||||
* total number of bits in a pointer, e.g. on x64, for which the uppermost 16
|
||||
* bits are the same as bit 47.
|
||||
*/
|
||||
#define LG_VADDR 64
|
||||
|
||||
/* Defined if C11 atomics are available. */
|
||||
#define JEMALLOC_C11_ATOMICS
|
||||
|
||||
/* Defined if GCC __atomic atomics are available. */
|
||||
#define JEMALLOC_GCC_ATOMIC_ATOMICS
|
||||
/* and the 8-bit variant support. */
|
||||
#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS
|
||||
|
||||
/* Defined if GCC __sync atomics are available. */
|
||||
#define JEMALLOC_GCC_SYNC_ATOMICS
|
||||
/* and the 8-bit variant support. */
|
||||
#define JEMALLOC_GCC_U8_SYNC_ATOMICS
|
||||
|
||||
/*
|
||||
* Defined if __builtin_clz() and __builtin_clzl() are available.
|
||||
*/
|
||||
#define JEMALLOC_HAVE_BUILTIN_CLZ
|
||||
|
||||
/*
|
||||
* Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
|
||||
*/
|
||||
/* #undef JEMALLOC_OS_UNFAIR_LOCK */
|
||||
|
||||
/* Defined if syscall(2) is usable. */
|
||||
#define JEMALLOC_USE_SYSCALL
|
||||
|
||||
/*
|
||||
* Defined if secure_getenv(3) is available.
|
||||
*/
|
||||
#define JEMALLOC_HAVE_SECURE_GETENV
|
||||
|
||||
/*
|
||||
* Defined if issetugid(2) is available.
|
||||
*/
|
||||
/* #undef JEMALLOC_HAVE_ISSETUGID */
|
||||
|
||||
/* Defined if pthread_atfork(3) is available. */
|
||||
#define JEMALLOC_HAVE_PTHREAD_ATFORK
|
||||
|
||||
/* Defined if pthread_setname_np(3) is available. */
|
||||
#define JEMALLOC_HAVE_PTHREAD_SETNAME_NP
|
||||
|
||||
/* Defined if pthread_getname_np(3) is available. */
|
||||
#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP
|
||||
|
||||
/* Defined if pthread_get_name_np(3) is available. */
|
||||
/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */
|
||||
|
||||
/*
|
||||
* Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
|
||||
*/
|
||||
#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
|
||||
|
||||
/*
|
||||
* Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
|
||||
*/
|
||||
#define JEMALLOC_HAVE_CLOCK_MONOTONIC
|
||||
|
||||
/*
|
||||
* Defined if mach_absolute_time() is available.
|
||||
*/
|
||||
/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */
|
||||
|
||||
/*
|
||||
* Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
|
||||
*/
|
||||
#define JEMALLOC_HAVE_CLOCK_REALTIME
|
||||
|
||||
/*
|
||||
* Defined if _malloc_thread_cleanup() exists. At least in the case of
|
||||
* FreeBSD, pthread_key_create() allocates, which if used during malloc
|
||||
* bootstrapping will cause recursion into the pthreads library. Therefore, if
|
||||
* _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
|
||||
* malloc_tsd.
|
||||
*/
|
||||
/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */
|
||||
|
||||
/*
|
||||
* Defined if threaded initialization is known to be safe on this platform.
|
||||
* Among other things, it must be possible to initialize a mutex without
|
||||
* triggering allocation in order for threaded allocation to be safe.
|
||||
*/
|
||||
#define JEMALLOC_THREADED_INIT
|
||||
|
||||
/*
|
||||
* Defined if the pthreads implementation defines
|
||||
* _pthread_mutex_init_calloc_cb(), in which case the function is used in order
|
||||
* to avoid recursive allocation during mutex initialization.
|
||||
*/
|
||||
/* #undef JEMALLOC_MUTEX_INIT_CB */
|
||||
|
||||
/* Non-empty if the tls_model attribute is supported. */
|
||||
#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
|
||||
|
||||
/*
|
||||
* JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
|
||||
* inline functions.
|
||||
*/
|
||||
/* #undef JEMALLOC_DEBUG */
|
||||
|
||||
/* JEMALLOC_STATS enables statistics calculation. */
|
||||
#define JEMALLOC_STATS
|
||||
|
||||
/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
|
||||
/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
|
||||
|
||||
/* JEMALLOC_PROF enables allocation profiling. */
|
||||
/* #undef JEMALLOC_PROF */
|
||||
|
||||
/* Use libunwind for profile backtracing if defined. */
|
||||
/* #undef JEMALLOC_PROF_LIBUNWIND */
|
||||
|
||||
/* Use libgcc for profile backtracing if defined. */
|
||||
/* #undef JEMALLOC_PROF_LIBGCC */
|
||||
|
||||
/* Use gcc intrinsics for profile backtracing if defined. */
|
||||
/* #undef JEMALLOC_PROF_GCC */
|
||||
|
||||
/* JEMALLOC_PAGEID enabled page id */
|
||||
/* #undef JEMALLOC_PAGEID */
|
||||
|
||||
/* JEMALLOC_HAVE_PRCTL checks prctl */
|
||||
#define JEMALLOC_HAVE_PRCTL
|
||||
|
||||
/*
|
||||
* JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
|
||||
* segment (DSS).
|
||||
*/
|
||||
#define JEMALLOC_DSS
|
||||
|
||||
/* Support memory filling (junk/zero). */
|
||||
#define JEMALLOC_FILL
|
||||
|
||||
/* Support utrace(2)-based tracing. */
|
||||
/* #undef JEMALLOC_UTRACE */
|
||||
|
||||
/* Support utrace(2)-based tracing (label based signature). */
|
||||
/* #undef JEMALLOC_UTRACE_LABEL */
|
||||
|
||||
/* Support optional abort() on OOM. */
|
||||
/* #undef JEMALLOC_XMALLOC */
|
||||
|
||||
/* Support lazy locking (avoid locking unless a second thread is launched). */
|
||||
/* #undef JEMALLOC_LAZY_LOCK */
|
||||
|
||||
/*
|
||||
* Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
|
||||
* classes).
|
||||
*/
|
||||
/* #undef LG_QUANTUM */
|
||||
|
||||
/* One page is 2^LG_PAGE bytes. */
|
||||
#define LG_PAGE 12
|
||||
|
||||
/* Maximum number of regions in a slab. */
|
||||
/* #undef CONFIG_LG_SLAB_MAXREGS */
|
||||
|
||||
/*
|
||||
* One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the
|
||||
* system does not explicitly support huge pages; system calls that require
|
||||
* explicit huge page support are separately configured.
|
||||
*/
|
||||
#define LG_HUGEPAGE 20
|
||||
|
||||
/*
|
||||
* If defined, adjacent virtual memory mappings with identical attributes
|
||||
* automatically coalesce, and they fragment when changes are made to subranges.
|
||||
* This is the normal order of things for mmap()/munmap(), but on Windows
|
||||
* VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
|
||||
* mappings do *not* coalesce/fragment.
|
||||
*/
|
||||
#define JEMALLOC_MAPS_COALESCE
|
||||
|
||||
/*
|
||||
* If defined, retain memory for later reuse by default rather than using e.g.
|
||||
* munmap() to unmap freed extents. This is enabled on 64-bit Linux because
|
||||
* common sequences of mmap()/munmap() calls will cause virtual memory map
|
||||
* holes.
|
||||
*/
|
||||
#define JEMALLOC_RETAIN
|
||||
|
||||
/* TLS is used to map arenas and magazine caches to threads. */
|
||||
#define JEMALLOC_TLS
|
||||
|
||||
/*
|
||||
* Used to mark unreachable code to quiet "end of non-void" compiler warnings.
|
||||
* Don't use this directly; instead use unreachable() from util.h
|
||||
*/
|
||||
#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
|
||||
|
||||
/*
|
||||
* ffs*() functions to use for bitmapping. Don't use these directly; instead,
|
||||
* use ffs_*() from util.h.
|
||||
*/
|
||||
#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
|
||||
#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
|
||||
#define JEMALLOC_INTERNAL_FFS __builtin_ffs
|
||||
|
||||
/*
|
||||
* popcount*() functions to use for bitmapping.
|
||||
*/
|
||||
#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
|
||||
#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
|
||||
|
||||
/*
|
||||
* If defined, explicitly attempt to more uniformly distribute large allocation
|
||||
* pointer alignments across all cache indices.
|
||||
*/
|
||||
#define JEMALLOC_CACHE_OBLIVIOUS
|
||||
|
||||
/*
|
||||
* If defined, enable logging facilities. We make this a configure option to
|
||||
* avoid taking extra branches everywhere.
|
||||
*/
|
||||
/* #undef JEMALLOC_LOG */
|
||||
|
||||
/*
|
||||
* If defined, use readlinkat() (instead of readlink()) to follow
|
||||
* /etc/malloc_conf.
|
||||
*/
|
||||
/* #undef JEMALLOC_READLINKAT */
|
||||
|
||||
/*
|
||||
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
|
||||
*/
|
||||
/* #undef JEMALLOC_ZONE */
|
||||
|
||||
/*
|
||||
* Methods for determining whether the OS overcommits.
|
||||
* JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
|
||||
* /proc/sys/vm.overcommit_memory file.
|
||||
* JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
|
||||
*/
|
||||
/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */
|
||||
#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
|
||||
|
||||
/* Defined if madvise(2) is available. */
|
||||
#define JEMALLOC_HAVE_MADVISE
|
||||
|
||||
/*
|
||||
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
|
||||
* arguments to madvise(2).
|
||||
*/
|
||||
#define JEMALLOC_HAVE_MADVISE_HUGE
|
||||
|
||||
/*
|
||||
* Methods for purging unused pages differ between operating systems.
|
||||
*
|
||||
* madvise(..., MADV_FREE) : This marks pages as being unused, such that they
|
||||
* will be discarded rather than swapped out.
|
||||
* madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
|
||||
* defined, this immediately discards pages,
|
||||
* such that new pages will be demand-zeroed if
|
||||
* the address region is later touched;
|
||||
* otherwise this behaves similarly to
|
||||
* MADV_FREE, though typically with higher
|
||||
* system overhead.
|
||||
*/
|
||||
#define JEMALLOC_PURGE_MADVISE_FREE
|
||||
#define JEMALLOC_PURGE_MADVISE_DONTNEED
|
||||
#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
|
||||
|
||||
/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
|
||||
/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
|
||||
|
||||
/*
|
||||
* Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
|
||||
*/
|
||||
#define JEMALLOC_MADVISE_DONTDUMP
|
||||
|
||||
/*
|
||||
* Defined if MADV_[NO]CORE is supported as an argument to madvise.
|
||||
*/
|
||||
/* #undef JEMALLOC_MADVISE_NOCORE */
|
||||
|
||||
/* Defined if mprotect(2) is available. */
|
||||
#define JEMALLOC_HAVE_MPROTECT
|
||||
|
||||
/*
|
||||
* Defined if transparent huge pages (THPs) are supported via the
|
||||
* MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
|
||||
*/
|
||||
/* #undef JEMALLOC_THP */
|
||||
|
||||
/* Defined if posix_madvise is available. */
|
||||
/* #undef JEMALLOC_HAVE_POSIX_MADVISE */
|
||||
|
||||
/*
|
||||
* Method for purging unused pages using posix_madvise.
|
||||
*
|
||||
* posix_madvise(..., POSIX_MADV_DONTNEED)
|
||||
*/
|
||||
/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */
|
||||
/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */
|
||||
|
||||
/*
|
||||
* Defined if memcntl page admin call is supported
|
||||
*/
|
||||
/* #undef JEMALLOC_HAVE_MEMCNTL */
|
||||
|
||||
/*
|
||||
* Defined if malloc_size is supported
|
||||
*/
|
||||
/* #undef JEMALLOC_HAVE_MALLOC_SIZE */
|
||||
|
||||
/* Define if operating system has alloca.h header. */
|
||||
#define JEMALLOC_HAS_ALLOCA_H
|
||||
|
||||
/* C99 restrict keyword supported. */
|
||||
#define JEMALLOC_HAS_RESTRICT
|
||||
|
||||
/* For use by hash code. */
|
||||
#define JEMALLOC_BIG_ENDIAN
|
||||
|
||||
/* sizeof(int) == 2^LG_SIZEOF_INT. */
|
||||
#define LG_SIZEOF_INT 2
|
||||
|
||||
/* sizeof(long) == 2^LG_SIZEOF_LONG. */
|
||||
#define LG_SIZEOF_LONG 3
|
||||
|
||||
/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
|
||||
#define LG_SIZEOF_LONG_LONG 3
|
||||
|
||||
/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
|
||||
#define LG_SIZEOF_INTMAX_T 3
|
||||
|
||||
/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
|
||||
/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */
|
||||
|
||||
/* glibc memalign hook. */
|
||||
/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */
|
||||
|
||||
/* pthread support */
|
||||
#define JEMALLOC_HAVE_PTHREAD
|
||||
|
||||
/* dlsym() support */
|
||||
#define JEMALLOC_HAVE_DLSYM
|
||||
|
||||
/* Adaptive mutex support in pthreads. */
|
||||
#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
|
||||
|
||||
/* GNU specific sched_getcpu support */
|
||||
#define JEMALLOC_HAVE_SCHED_GETCPU
|
||||
|
||||
/* GNU specific sched_setaffinity support */
|
||||
#define JEMALLOC_HAVE_SCHED_SETAFFINITY
|
||||
|
||||
/*
|
||||
* If defined, all the features necessary for background threads are present.
|
||||
*/
|
||||
#define JEMALLOC_BACKGROUND_THREAD
|
||||
|
||||
/*
|
||||
* If defined, jemalloc symbols are not exported (doesn't work when
|
||||
* JEMALLOC_PREFIX is not defined).
|
||||
*/
|
||||
/* #undef JEMALLOC_EXPORT */
|
||||
|
||||
/* config.malloc_conf options string. */
|
||||
#define JEMALLOC_CONFIG_MALLOC_CONF ""
|
||||
|
||||
/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
|
||||
#define JEMALLOC_IS_MALLOC
|
||||
|
||||
/*
|
||||
* Defined if strerror_r returns char * if _GNU_SOURCE is defined.
|
||||
*/
|
||||
#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
|
||||
|
||||
/* Performs additional safety checks when defined. */
|
||||
/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
|
||||
|
||||
/* Is C++ support being built? */
|
||||
#define JEMALLOC_ENABLE_CXX
|
||||
|
||||
/* Performs additional size checks when defined. */
|
||||
/* #undef JEMALLOC_OPT_SIZE_CHECKS */
|
||||
|
||||
/* Allows sampled junk and stash for checking use-after-free when defined. */
|
||||
/* #undef JEMALLOC_UAF_DETECTION */
|
||||
|
||||
/* Darwin VM_MAKE_TAG support */
|
||||
/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */
|
||||
|
||||
/* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */
|
||||
#define JEMALLOC_ZERO_REALLOC_DEFAULT_FREE
|
||||
|
||||
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */
|
@ -98,6 +98,7 @@ RUN python3 -m pip install --no-cache-dir \
|
||||
redis \
|
||||
requests-kerberos \
|
||||
tzlocal==2.1 \
|
||||
retry \
|
||||
urllib3
|
||||
|
||||
# Hudi supports only spark 3.3.*, not 3.4
|
||||
|
@ -11,7 +11,8 @@ Supported platforms:
|
||||
|
||||
- x86_64
|
||||
- AArch64
|
||||
- Power9 (experimental)
|
||||
- PowerPC 64 LE (experimental)
|
||||
- RISC-V 64 (experimental)
|
||||
|
||||
## Building on Ubuntu
|
||||
|
||||
@ -42,7 +43,7 @@ sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
|
||||
For other Linux distribution - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html).
|
||||
|
||||
As of April 2023, any version of Clang >= 15 will work.
|
||||
As of April 2023, clang-16 or higher will work.
|
||||
GCC as a compiler is not supported.
|
||||
To build with a specific Clang version:
|
||||
|
||||
@ -86,8 +87,8 @@ The build requires the following components:
|
||||
|
||||
- Git (used to checkout the sources, not needed for the build)
|
||||
- CMake 3.20 or newer
|
||||
- Compiler: Clang 15 or newer
|
||||
- Linker: lld 15 or newer
|
||||
- Compiler: clang-16 or newer
|
||||
- Linker: lld-16 or newer
|
||||
- Ninja
|
||||
- Yasm
|
||||
- Gawk
|
||||
|
@ -1,11 +1,11 @@
|
||||
---
|
||||
slug: /en/operations/server-configuration-parameters/settings
|
||||
sidebar_position: 57
|
||||
sidebar_label: Server Settings
|
||||
sidebar_label: Global Server Settings
|
||||
description: This section contains descriptions of server settings that cannot be changed at the session or query level.
|
||||
---
|
||||
|
||||
# Server Settings
|
||||
# Global Server Settings
|
||||
|
||||
This section contains descriptions of server settings that cannot be changed at the session or query level.
|
||||
|
||||
@ -1201,13 +1201,58 @@ Keys:
|
||||
- `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`.
|
||||
- `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`.
|
||||
|
||||
Both log and error log file names (only file names, not directories) support date and time format specifiers.
|
||||
|
||||
**Format specifiers**
|
||||
Using the following format specifiers, you can define a pattern for the resulting file name. “Example” column shows possible results for `2023-07-06 18:32:07`.
|
||||
|
||||
| Specifier | Description | Example |
|
||||
|-------------|---------------------------------------------------------------------------------------------------------------------|--------------------------|
|
||||
| %% | Literal % | % |
|
||||
| %n | New-line character | |
|
||||
| %t | Horizontal tab character | |
|
||||
| %Y | Year as a decimal number, e.g. 2017 | 2023 |
|
||||
| %y | Last 2 digits of year as a decimal number (range [00,99]) | 23 |
|
||||
| %C | First 2 digits of year as a decimal number (range [00,99]) | 20 |
|
||||
| %G | Four-digit [ISO 8601 week-based year](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), i.e. the year that contains the specified week. Normally useful only with %V | 2023 |
|
||||
| %g | Last 2 digits of [ISO 8601 week-based year](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), i.e. the year that contains the specified week. | 23 |
|
||||
| %b | Abbreviated month name, e.g. Oct (locale dependent) | Jul |
|
||||
| %h | Synonym of %b | Jul |
|
||||
| %B | Full month name, e.g. October (locale dependent) | July |
|
||||
| %m | Month as a decimal number (range [01,12]) | 07 |
|
||||
| %U | Week of the year as a decimal number (Sunday is the first day of the week) (range [00,53]) | 27 |
|
||||
| %W | Week of the year as a decimal number (Monday is the first day of the week) (range [00,53]) | 27 |
|
||||
| %V | ISO 8601 week number (range [01,53]) | 27 |
|
||||
| %j | Day of the year as a decimal number (range [001,366]) | 187 |
|
||||
| %d | Day of the month as a zero-padded decimal number (range [01,31]). Single digit is preceded by zero. | 06 |
|
||||
| %e | Day of the month as a space-padded decimal number (range [1,31]). Single digit is preceded by a space. | 6 |
|
||||
| %a | Abbreviated weekday name, e.g. Fri (locale dependent) | Thu |
|
||||
| %A | Full weekday name, e.g. Friday (locale dependent) | Thursday |
|
||||
| %w | Weekday as a integer number with Sunday as 0 (range [0-6]) | 4 |
|
||||
| %u | Weekday as a decimal number, where Monday is 1 (ISO 8601 format) (range [1-7]) | 4 |
|
||||
| %H | Hour as a decimal number, 24 hour clock (range [00-23]) | 18 |
|
||||
| %I | Hour as a decimal number, 12 hour clock (range [01,12]) | 06 |
|
||||
| %M | Minute as a decimal number (range [00,59]) | 32 |
|
||||
| %S | Second as a decimal number (range [00,60]) | 07 |
|
||||
| %c | Standard date and time string, e.g. Sun Oct 17 04:41:13 2010 (locale dependent) | Thu Jul 6 18:32:07 2023 |
|
||||
| %x | Localized date representation (locale dependent) | 07/06/23 |
|
||||
| %X | Localized time representation, e.g. 18:40:20 or 6:40:20 PM (locale dependent) | 18:32:07 |
|
||||
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 07/06/23 |
|
||||
| %F | Short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2023-07-06 |
|
||||
| %r | Localized 12-hour clock time (locale dependent) | 06:32:07 PM |
|
||||
| %R | Equivalent to "%H:%M" | 18:32 |
|
||||
| %T | Equivalent to "%H:%M:%S" (the ISO 8601 time format) | 18:32:07 |
|
||||
| %p | Localized a.m. or p.m. designation (locale dependent) | PM |
|
||||
| %z | Offset from UTC in the ISO 8601 format (e.g. -0430), or no characters if the time zone information is not available | +0800 |
|
||||
| %Z | Locale-dependent time zone name or abbreviation, or no characters if the time zone information is not available | Z AWST |
|
||||
|
||||
**Example**
|
||||
|
||||
``` xml
|
||||
<logger>
|
||||
<level>trace</level>
|
||||
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
|
||||
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
|
||||
<log>/var/log/clickhouse-server/clickhouse-server-%F-%T.log</log>
|
||||
<errorlog>/var/log/clickhouse-server/clickhouse-server-%F-%T.err.log</errorlog>
|
||||
<size>1000M</size>
|
||||
<count>10</count>
|
||||
<stream_compress>true</stream_compress>
|
||||
|
@ -7,90 +7,16 @@ pagination_next: en/operations/settings/settings
|
||||
|
||||
# Settings Overview
|
||||
|
||||
There are multiple ways to define ClickHouse settings. Settings are configured in layers, and each subsequent layer redefines the previous values of a setting.
|
||||
There are two main groups of ClickHouse settings:
|
||||
|
||||
The order of priority for defining a setting is:
|
||||
- Global server settings
|
||||
- Query-level settings
|
||||
|
||||
1. Settings in the `users.xml` server configuration file
|
||||
The main distinction between global server settings and query-level settings is that
|
||||
global server settings must be set in configuration files while query-level settings
|
||||
can be set in configuration files or with SQL queries.
|
||||
|
||||
- Set in the element `<profiles>`.
|
||||
Read about [global server settings](/docs/en/operations/server-configuration-parameters/settings.md) to learn more about configuring your ClickHouse server at the global server level.
|
||||
|
||||
2. Session settings
|
||||
Read about [query-level settings](/docs/en/operations/settings/settings-query-level.md) to learn more about configuring your ClickHouse server at the query-level.
|
||||
|
||||
- Send `SET setting=value` from the ClickHouse console client in interactive mode.
|
||||
Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you need to specify the `session_id` HTTP parameter.
|
||||
|
||||
3. Query settings
|
||||
|
||||
- When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`.
|
||||
- When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`).
|
||||
- Define settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) clause of the SELECT query. The setting value is applied only to that query and is reset to the default or previous value after the query is executed.
|
||||
|
||||
View the [Settings](./settings.md) page for a description of the ClickHouse settings.
|
||||
|
||||
## Converting a Setting to its Default Value
|
||||
|
||||
If you change a setting and would like to revert it back to its default value, set the value to `DEFAULT`. The syntax looks like:
|
||||
|
||||
```sql
|
||||
SET setting_name = DEFAULT
|
||||
```
|
||||
|
||||
For example, the default value of `max_insert_block_size` is 1048449. Suppose you change its value to 100000:
|
||||
|
||||
```sql
|
||||
SET max_insert_block_size=100000;
|
||||
|
||||
SELECT value FROM system.settings where name='max_insert_block_size';
|
||||
```
|
||||
|
||||
The response is:
|
||||
|
||||
```response
|
||||
┌─value──┐
|
||||
│ 100000 │
|
||||
└────────┘
|
||||
```
|
||||
|
||||
The following command sets its value back to 1048449:
|
||||
|
||||
```sql
|
||||
SET max_insert_block_size=DEFAULT;
|
||||
|
||||
SELECT value FROM system.settings where name='max_insert_block_size';
|
||||
```
|
||||
|
||||
The setting is now back to its default:
|
||||
|
||||
```response
|
||||
┌─value───┐
|
||||
│ 1048449 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
|
||||
## Custom Settings {#custom_settings}
|
||||
|
||||
In addition to the common [settings](../../operations/settings/settings.md), users can define custom settings.
|
||||
|
||||
A custom setting name must begin with one of predefined prefixes. The list of these prefixes must be declared in the [custom_settings_prefixes](../../operations/server-configuration-parameters/settings.md#custom_settings_prefixes) parameter in the server configuration file.
|
||||
|
||||
```xml
|
||||
<custom_settings_prefixes>custom_</custom_settings_prefixes>
|
||||
```
|
||||
|
||||
To define a custom setting use `SET` command:
|
||||
|
||||
```sql
|
||||
SET custom_a = 123;
|
||||
```
|
||||
|
||||
To get the current value of a custom setting use `getSetting()` function:
|
||||
|
||||
```sql
|
||||
SELECT getSetting('custom_a');
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Server Configuration Settings](../../operations/server-configuration-parameters/settings.md)
|
||||
|
@ -242,6 +242,26 @@ See also:
|
||||
- [DateTime data type.](../../sql-reference/data-types/datetime.md)
|
||||
- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md)
|
||||
|
||||
## interval_output_format {#interval_output_format}
|
||||
|
||||
Allows choosing different output formats of the text representation of interval types.
|
||||
|
||||
Possible values:
|
||||
|
||||
- `kusto` - KQL-style output format.
|
||||
|
||||
ClickHouse outputs intervals in [KQL format](https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-timespan-format-strings#the-constant-c-format-specifier). For example, `toIntervalDay(2)` would be formatted as `2.00:00:00`. Please note that for interval types of varying length (ie. `IntervalMonth` and `IntervalYear`) the average number of seconds per interval is taken into account.
|
||||
|
||||
- `numeric` - Numeric output format.
|
||||
|
||||
ClickHouse outputs intervals as their underlying numeric representation. For example, `toIntervalDay(2)` would be formatted as `2`.
|
||||
|
||||
Default value: `numeric`.
|
||||
|
||||
See also:
|
||||
|
||||
- [Interval](../../sql-reference/data-types/special-data-types/interval.md)
|
||||
|
||||
## input_format_ipv4_default_on_conversion_error {#input_format_ipv4_default_on_conversion_error}
|
||||
|
||||
Deserialization of IPv4 will use default values instead of throwing exception on conversion error.
|
||||
|
217
docs/en/operations/settings/settings-query-level.md
Normal file
217
docs/en/operations/settings/settings-query-level.md
Normal file
@ -0,0 +1,217 @@
|
||||
---
|
||||
sidebar_label: Query-level Settings
|
||||
title: Query-level Settings
|
||||
slug: /en/operations/settings/query-level
|
||||
---
|
||||
|
||||
There are multiple ways to set ClickHouse query-level settings. Settings are configured in layers, and each subsequent layer redefines the previous values of a setting.
|
||||
|
||||
The order of priority for defining a setting is:
|
||||
|
||||
1. Applying a setting to a user directly, or within a settings profile
|
||||
|
||||
- SQL (recommended)
|
||||
- adding one or more XML or YAML files to `/etc/clickhouse-server/users.d`
|
||||
|
||||
2. Session settings
|
||||
|
||||
- Send `SET setting=value` from the ClickHouse Cloud SQL console or
|
||||
`clickhouse client` in interactive mode. Similarly, you can use ClickHouse
|
||||
sessions in the HTTP protocol. To do this, you need to specify the
|
||||
`session_id` HTTP parameter.
|
||||
|
||||
3. Query settings
|
||||
|
||||
- When starting `clickhouse client` in non-interactive mode, set the startup
|
||||
parameter `--setting=value`.
|
||||
- When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`).
|
||||
- Define settings in the
|
||||
[SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query)
|
||||
clause of the SELECT query. The setting value is applied only to that query
|
||||
and is reset to the default or previous value after the query is executed.
|
||||
|
||||
## Examples
|
||||
|
||||
These examples all set the value of the `async_insert` setting to `1`, and
|
||||
show how to examine the settings in a running system.
|
||||
|
||||
### Using SQL to apply a setting to a user directly
|
||||
|
||||
This creates the user `ingester` with the setting `async_inset = 1`:
|
||||
|
||||
```sql
|
||||
CREATE USER ingester
|
||||
IDENTIFIED WITH sha256_hash BY '7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3'
|
||||
# highlight-next-line
|
||||
SETTINGS async_insert = 1
|
||||
```
|
||||
|
||||
#### Examine the settings profile and assignment
|
||||
|
||||
```sql
|
||||
SHOW ACCESS
|
||||
```
|
||||
|
||||
```response
|
||||
┌─ACCESS─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ ... │
|
||||
# highlight-next-line
|
||||
│ CREATE USER ingester IDENTIFIED WITH sha256_password SETTINGS async_insert = true │
|
||||
│ ... │
|
||||
└────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
### Using SQL to create a settings profile and assign to a user
|
||||
|
||||
This creates the profile `log_ingest` with the setting `async_inset = 1`:
|
||||
|
||||
```sql
|
||||
CREATE
|
||||
SETTINGS PROFILE log_ingest SETTINGS async_insert = 1
|
||||
```
|
||||
|
||||
This creates the user `ingester` and assigns the user the settings profile `log_ingest`:
|
||||
|
||||
```sql
|
||||
CREATE USER ingester
|
||||
IDENTIFIED WITH sha256_hash BY '7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3'
|
||||
# highlight-next-line
|
||||
SETTINGS PROFILE log_ingest
|
||||
```
|
||||
|
||||
|
||||
### Using XML to create a settings profile and user
|
||||
|
||||
```xml title=/etc/clickhouse-server/users.d/users.xml
|
||||
<clickhouse>
|
||||
# highlight-start
|
||||
<profiles>
|
||||
<log_ingest>
|
||||
<async_insert>1</async_insert>
|
||||
</log_ingest>
|
||||
</profiles>
|
||||
# highlight-end
|
||||
|
||||
<users>
|
||||
<ingester>
|
||||
<password_sha256_hex>7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3</password_sha256_hex>
|
||||
# highlight-start
|
||||
<profile>log_ingest</profile>
|
||||
# highlight-end
|
||||
</ingester>
|
||||
<default replace="true">
|
||||
<password_sha256_hex>7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3</password_sha256_hex>
|
||||
<access_management>1</access_management>
|
||||
<named_collection_control>1</named_collection_control>
|
||||
</default>
|
||||
</users>
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
#### Examine the settings profile and assignment
|
||||
|
||||
```sql
|
||||
SHOW ACCESS
|
||||
```
|
||||
|
||||
```response
|
||||
┌─ACCESS─────────────────────────────────────────────────────────────────────────────┐
|
||||
│ CREATE USER default IDENTIFIED WITH sha256_password │
|
||||
# highlight-next-line
|
||||
│ CREATE USER ingester IDENTIFIED WITH sha256_password SETTINGS PROFILE log_ingest │
|
||||
│ CREATE SETTINGS PROFILE default │
|
||||
# highlight-next-line
|
||||
│ CREATE SETTINGS PROFILE log_ingest SETTINGS async_insert = true │
|
||||
│ CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1 │
|
||||
│ ... │
|
||||
└────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Assign a setting to a session
|
||||
|
||||
```sql
|
||||
SET async_insert =1;
|
||||
SELECT value FROM system.settings where name='async_insert';
|
||||
```
|
||||
|
||||
```response
|
||||
┌─value──┐
|
||||
│ 1 │
|
||||
└────────┘
|
||||
```
|
||||
|
||||
### Assign a setting during a query
|
||||
|
||||
```sql
|
||||
INSERT INTO YourTable
|
||||
# highlight-next-line
|
||||
SETTINGS async_insert=1
|
||||
VALUES (...)
|
||||
```
|
||||
|
||||
|
||||
## Converting a Setting to its Default Value
|
||||
|
||||
If you change a setting and would like to revert it back to its default value, set the value to `DEFAULT`. The syntax looks like:
|
||||
|
||||
```sql
|
||||
SET setting_name = DEFAULT
|
||||
```
|
||||
|
||||
For example, the default value of `async_insert` is `0`. Suppose you change its value to `1`:
|
||||
|
||||
```sql
|
||||
SET async_insert = 1;
|
||||
|
||||
SELECT value FROM system.settings where name='async_insert';
|
||||
```
|
||||
|
||||
The response is:
|
||||
|
||||
```response
|
||||
┌─value──┐
|
||||
│ 1 │
|
||||
└────────┘
|
||||
```
|
||||
|
||||
The following command sets its value back to 0:
|
||||
|
||||
```sql
|
||||
SET async_insert = DEFAULT;
|
||||
|
||||
SELECT value FROM system.settings where name='async_insert';
|
||||
```
|
||||
|
||||
The setting is now back to its default:
|
||||
|
||||
```response
|
||||
┌─value───┐
|
||||
│ 0 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
## Custom Settings {#custom_settings}
|
||||
|
||||
In addition to the common [settings](../../operations/settings/settings.md), users can define custom settings.
|
||||
|
||||
A custom setting name must begin with one of predefined prefixes. The list of these prefixes must be declared in the [custom_settings_prefixes](../../operations/server-configuration-parameters/settings.md#custom_settings_prefixes) parameter in the server configuration file.
|
||||
|
||||
```xml
|
||||
<custom_settings_prefixes>custom_</custom_settings_prefixes>
|
||||
```
|
||||
|
||||
To define a custom setting use `SET` command:
|
||||
|
||||
```sql
|
||||
SET custom_a = 123;
|
||||
```
|
||||
|
||||
To get the current value of a custom setting use `getSetting()` function:
|
||||
|
||||
```sql
|
||||
SELECT getSetting('custom_a');
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- View the [Settings](./settings.md) page for a description of the ClickHouse settings.
|
||||
- [Global server settings](../../operations/server-configuration-parameters/settings.md)
|
@ -4524,6 +4524,7 @@ This setting allows to specify renaming pattern for files processed by `file` ta
|
||||
|
||||
### Placeholders
|
||||
|
||||
- `%a` — Full original filename (e.g., "sample.csv").
|
||||
- `%f` — Original filename without extension (e.g., "sample").
|
||||
- `%e` — Original file extension with dot (e.g., ".csv").
|
||||
- `%t` — Timestamp (in microseconds).
|
||||
|
@ -39,6 +39,8 @@ Columns:
|
||||
|
||||
- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included.
|
||||
|
||||
- `primary_key_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) used by primary key values in the primary.idx/cidx file on disk.
|
||||
|
||||
- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks.
|
||||
|
||||
- `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included.
|
||||
|
@ -300,7 +300,7 @@ SELECT groupArrayResample(30, 75, 30)(name, age) FROM people
|
||||
|
||||
Consider the results.
|
||||
|
||||
`Jonh` is out of the sample because he’s too young. Other people are distributed according to the specified age intervals.
|
||||
`John` is out of the sample because he’s too young. Other people are distributed according to the specified age intervals.
|
||||
|
||||
Now let’s count the total number of people and their average wage in the specified age intervals.
|
||||
|
||||
|
@ -0,0 +1,32 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/array_concat_agg
|
||||
sidebar_position: 110
|
||||
---
|
||||
|
||||
# array_concat_agg
|
||||
- Alias of `groupArrayArray`. The function is case insensitive.
|
||||
|
||||
**Example**
|
||||
|
||||
```text
|
||||
SELECT *
|
||||
FROM t
|
||||
|
||||
┌─a───────┐
|
||||
│ [1,2,3] │
|
||||
│ [4,5] │
|
||||
│ [6] │
|
||||
└─────────┘
|
||||
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT array_concat_agg(a) AS a
|
||||
FROM t
|
||||
|
||||
┌─a─────────────┐
|
||||
│ [1,2,3,4,5,6] │
|
||||
└───────────────┘
|
||||
```
|
@ -44,3 +44,5 @@ Result:
|
||||
```
|
||||
|
||||
The groupArray function will remove ᴺᵁᴸᴸ value based on the above results.
|
||||
|
||||
- Alias: `array_agg`.
|
||||
|
@ -143,5 +143,6 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse
|
||||
- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format)
|
||||
- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format)
|
||||
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone)
|
||||
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime)
|
||||
- [The `Date` data type](../../sql-reference/data-types/date.md)
|
||||
|
@ -119,6 +119,7 @@ FROM dt;
|
||||
- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format)
|
||||
- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format)
|
||||
- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone)
|
||||
- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-for-working-with-dates-and-times)
|
||||
- [`Date` data type](../../sql-reference/data-types/date.md)
|
||||
- [`DateTime` data type](../../sql-reference/data-types/datetime.md)
|
||||
|
@ -102,6 +102,8 @@ The function also works for strings.
|
||||
|
||||
Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT length(arr) FROM table` transforms to `SELECT arr.size0 FROM TABLE`.
|
||||
|
||||
Alias: `OCTET_LENGTH`
|
||||
|
||||
## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64
|
||||
|
||||
## emptyArrayInt8, emptyArrayInt16, emptyArrayInt32, emptyArrayInt64
|
||||
@ -142,6 +144,7 @@ range([start, ] end [, step])
|
||||
|
||||
- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments.
|
||||
- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting.
|
||||
- Returns Null if any argument has Nullable(Nothing) type. An exception is thrown if any argument has Null value (Nullable(T) type).
|
||||
|
||||
**Examples**
|
||||
|
||||
@ -878,7 +881,7 @@ A special function. See the section [“ArrayJoin function”](../../sql-referen
|
||||
|
||||
## arrayDifference
|
||||
|
||||
Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`).
|
||||
Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -996,6 +999,24 @@ SELECT
|
||||
└──────────────┴───────────┘
|
||||
```
|
||||
|
||||
## arrayJaccardIndex
|
||||
|
||||
Returns the [Jaccard index](https://en.wikipedia.org/wiki/Jaccard_index) of two arrays.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
``` sql
|
||||
SELECT arrayJaccardIndex([1, 2], [2, 3]) AS res
|
||||
```
|
||||
|
||||
Result:
|
||||
``` text
|
||||
┌─res────────────────┐
|
||||
│ 0.3333333333333333 │
|
||||
└────────────────────┘
|
||||
```
|
||||
|
||||
## arrayReduce
|
||||
|
||||
Applies an aggregate function to array elements and returns its result. The name of the aggregation function is passed as a string in single quotes `'max'`, `'sum'`. When using parametric aggregate functions, the parameter is indicated after the function name in parentheses `'uniqUpTo(6)'`.
|
||||
|
@ -694,10 +694,14 @@ SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS we
|
||||
|
||||
Returns year and week for a date. The year in the result may be different from the year in the date argument for the first and the last week of the year.
|
||||
|
||||
The mode argument works exactly like the mode argument to `toWeek()`. For the single-argument syntax, a mode value of 0 is used.
|
||||
The mode argument works like the mode argument to `toWeek()`. For the single-argument syntax, a mode value of 0 is used.
|
||||
|
||||
`toISOYear()` is a compatibility function that is equivalent to `intDiv(toYearWeek(date,3),100)`.
|
||||
|
||||
:::warning
|
||||
The week number returned by `toYearWeek()` can be different from what the `toWeek()` returns. `toWeek()` always returns week number in the context of the given year, and in case `toWeek()` returns `0`, `toYearWeek()` returns the value corresponding to the last week of previous year. See `prev_yearWeek` in example below.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
@ -707,18 +711,18 @@ toYearWeek(t[, mode[, timezone]])
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9;
|
||||
SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9, toYearWeek(toDate('2022-01-01')) AS prev_yearWeek;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┐
|
||||
│ 2016-12-27 │ 201652 │ 201652 │ 201701 │
|
||||
└────────────┴───────────┴───────────┴───────────┘
|
||||
┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┬─prev_yearWeek─┐
|
||||
│ 2016-12-27 │ 201652 │ 201652 │ 201701 │ 202152 │
|
||||
└────────────┴───────────┴───────────┴───────────┴───────────────┘
|
||||
```
|
||||
|
||||
## age
|
||||
|
||||
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond.
|
||||
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second.
|
||||
E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit.
|
||||
|
||||
For an alternative to `age`, see function `date\_diff`.
|
||||
@ -734,8 +738,6 @@ age('unit', startdate, enddate, [timezone])
|
||||
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
|
||||
Possible values:
|
||||
|
||||
- `microsecond` (possible abbreviations: `us`, `u`)
|
||||
- `millisecond` (possible abbreviations: `ms`)
|
||||
- `second` (possible abbreviations: `ss`, `s`)
|
||||
- `minute` (possible abbreviations: `mi`, `n`)
|
||||
- `hour` (possible abbreviations: `hh`, `h`)
|
||||
@ -811,8 +813,6 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
|
||||
- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
|
||||
Possible values:
|
||||
|
||||
- `microsecond` (possible abbreviations: `us`, `u`)
|
||||
- `millisecond` (possible abbreviations: `ms`)
|
||||
- `second` (possible abbreviations: `ss`, `s`)
|
||||
- `minute` (possible abbreviations: `mi`, `n`)
|
||||
- `hour` (possible abbreviations: `hh`, `h`)
|
||||
|
@ -90,6 +90,8 @@ Returns the length of a string in bytes (not: in characters or Unicode code poin
|
||||
|
||||
The function also works for arrays.
|
||||
|
||||
Alias: `OCTET_LENGTH`
|
||||
|
||||
## lengthUTF8
|
||||
|
||||
Returns the length of a string in Unicode code points (not: in bytes or characters). It assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
@ -1253,3 +1255,15 @@ Result:
|
||||
│ A240 │
|
||||
└──────────────────┘
|
||||
```
|
||||
|
||||
## initcap
|
||||
|
||||
Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters.
|
||||
|
||||
## initcapUTF8
|
||||
|
||||
Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
|
||||
|
||||
Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I).
|
||||
|
||||
If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point.
|
||||
|
@ -399,7 +399,11 @@ toDateTime(expr[, time_zone ])
|
||||
- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [Int](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md).
|
||||
- `time_zone` — Time zone. [String](/docs/en/sql-reference/data-types/string.md).
|
||||
|
||||
If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp).
|
||||
:::note
|
||||
If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp).
|
||||
If `expr` is a [String](/docs/en/sql-reference/data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time.
|
||||
Thus, parsing of short numbers' string representations (up to 4 digits) is explicitly disabled due to ambiguity, e.g. a string `'1999'` may be both a year (an incomplete string representation of Date / DateTime) or a unix timestamp. Longer numeric strings are allowed.
|
||||
:::
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -5,7 +5,27 @@ sidebar_label: WITH
|
||||
|
||||
# WITH Clause
|
||||
|
||||
ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)), that is provides to use results of `WITH` clause in the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression.
|
||||
ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)) and substitutes the code defined in the `WITH` clause in all places of use for the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression.
|
||||
|
||||
Please note that CTEs do not guarantee the same results in all places they are called because the query will be re-executed for each use case.
|
||||
|
||||
An example of such behavior is below
|
||||
``` sql
|
||||
with cte_numbers as
|
||||
(
|
||||
select
|
||||
num
|
||||
from generateRandom('num UInt64', NULL)
|
||||
limit 1000000
|
||||
)
|
||||
select
|
||||
count()
|
||||
from cte_numbers
|
||||
where num in (select num from cte_numbers)
|
||||
```
|
||||
If CTEs were to pass exactly the results and not just a piece of code, you would always see `1000000`
|
||||
|
||||
However, due to the fact that we are referring `cte_numbers` twice, random numbers are generated each time and, accordingly, we see different random results, `280501, 392454, 261636, 196227` and so on...
|
||||
|
||||
## Syntax
|
||||
|
||||
|
@ -134,7 +134,7 @@ Multiple path components can have globs. For being processed file must exist and
|
||||
|
||||
- `*` — Substitutes any number of any characters except `/` including empty string.
|
||||
- `?` — Substitutes any single character.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`.
|
||||
- `{N..M}` — Substitutes any number in range from N to M including both borders.
|
||||
- `**` - Fetches all files inside the folder recursively.
|
||||
|
||||
|
@ -575,14 +575,60 @@ ClickHouse поддерживает динамическое изменение
|
||||
- `errorlog` - Файл лога ошибок.
|
||||
- `size` - Размер файла. Действует для `log` и `errorlog`. Как только файл достиг размера `size`, ClickHouse архивирует и переименовывает его, а на его месте создает новый файл лога.
|
||||
- `count` - Количество заархивированных файлов логов, которые сохраняет ClickHouse.
|
||||
- `stream_compress` – Сжимать `log` и `errorlog` с помощью алгоритма `lz4`. Чтобы активировать, узтановите значение `1` или `true`.
|
||||
|
||||
Имена файлов `log` и `errorlog` (только имя файла, а не директорий) поддерживают спецификаторы шаблонов даты и времени.
|
||||
|
||||
**Спецификаторы форматирования**
|
||||
С помощью следующих спецификаторов, можно определить шаблон для формирования имени файла. Столбец “Пример” показывает возможные значения на момент времени `2023-07-06 18:32:07`.
|
||||
|
||||
| Спецификатор | Описание | Пример |
|
||||
|--------------|---------------------------------------------------------------------------------------------------------------------|--------------------------|
|
||||
| %% | Литерал % | % |
|
||||
| %n | Символ новой строки | |
|
||||
| %t | Символ горизонтальной табуляции | |
|
||||
| %Y | Год как десятичное число, например, 2017 | 2023 |
|
||||
| %y | Последние 2 цифры года в виде десятичного числа (диапазон [00,99]) | 23 |
|
||||
| %C | Первые 2 цифры года в виде десятичного числа (диапазон [00,99]) | 20 |
|
||||
| %G | Год по неделям согласно [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), то есть год, который содержит указанную неделю. Обычно используется вместе с %V. | 2023 |
|
||||
| %g | Последние 2 цифры [года по неделям ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), т.е. года, содержащего указанную неделю (диапазон [00,99]). | 23 |
|
||||
| %b | Сокращённое название месяца, например Oct (зависит от локали) | Jul |
|
||||
| %h | Синоним %b | Jul |
|
||||
| %B | Полное название месяца, например, October (зависит от локали) | July |
|
||||
| %m | Месяц в виде десятичного числа (диапазон [01,12]) | 07 |
|
||||
| %U | Неделя года в виде десятичного числа (воскресенье - первый день недели) (диапазон [00,53]) | 27 |
|
||||
| %W | Неделя года в виде десятичного числа (понедельник - первый день недели) (диапазон [00,53]) | 27 |
|
||||
| %V | Неделя года ISO 8601 (диапазон [01,53]) | 27 |
|
||||
| %j | День года в виде десятичного числа (диапазон [001,366]) | 187 |
|
||||
| %d | День месяца в виде десятичного числа (диапазон [01,31]) Перед одиночной цифрой ставится ноль. | 06 |
|
||||
| %e | День месяца в виде десятичного числа (диапазон [1,31]). Перед одиночной цифрой ставится пробел. | 6 |
|
||||
| %a | Сокращённое название дня недели, например, Fri (зависит от локали) | Thu |
|
||||
| %A | Полный день недели, например, Friday (зависит от локали) | Thursday |
|
||||
| %w | День недели в виде десятичного числа, где воскресенье равно 0 (диапазон [0-6]) | 4 |
|
||||
| %u | День недели в виде десятичного числа, где понедельник равен 1 (формат ISO 8601) (диапазон [1-7]) | 4 |
|
||||
| %H | Час в виде десятичного числа, 24-часовой формат (диапазон [00-23]) | 18 |
|
||||
| %I | Час в виде десятичного числа, 12-часовой формат (диапазон [01,12]) | 06 |
|
||||
| %M | Минуты в виде десятичного числа (диапазон [00,59]) | 32 |
|
||||
| %S | Секунды как десятичное число (диапазон [00,60]) | 07 |
|
||||
| %c | Стандартная строка даты и времени, например, Sun Oct 17 04:41:13 2010 (зависит от локали) | Thu Jul 6 18:32:07 2023 |
|
||||
| %x | Локализованное представление даты (зависит от локали) | 07/06/23 |
|
||||
| %X | Локализованное представление времени, например, 18:40:20 или 6:40:20 PM (зависит от локали) | 18:32:07 |
|
||||
| %D | Эквивалентно "%m/%d/%y" | 07/06/23 |
|
||||
| %F | Эквивалентно "%Y-%m-%d" (формат даты ISO 8601) | 2023-07-06 |
|
||||
| %r | Локализованное 12-часовое время (зависит от локали) | 06:32:07 PM |
|
||||
| %R | Эквивалентно "%H:%M" | 18:32 |
|
||||
| %T | Эквивалентно "%H:%M:%S" (формат времени ISO 8601) | 18:32:07 |
|
||||
| %p | Локализованное обозначение a.m. или p.m. (зависит от локали) | PM |
|
||||
| %z | Смещение от UTC в формате ISO 8601 (например, -0430), или без символов, если информация о часовом поясе недоступна | +0800 |
|
||||
| %Z | Зависящее от локали название или аббревиатура часового пояса, если информация о часовом поясе доступна | Z AWST |
|
||||
|
||||
**Пример**
|
||||
|
||||
``` xml
|
||||
<logger>
|
||||
<level>trace</level>
|
||||
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
|
||||
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
|
||||
<log>/var/log/clickhouse-server/clickhouse-server-%F-%T.log</log>
|
||||
<errorlog>/var/log/clickhouse-server/clickhouse-server-%F-%T.err.log</errorlog>
|
||||
<size>1000M</size>
|
||||
<count>10</count>
|
||||
</logger>
|
||||
|
@ -4201,6 +4201,7 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi
|
||||
### Шаблон
|
||||
Шаблон поддерживает следующие виды плейсхолдеров:
|
||||
|
||||
- `%a` — Полное исходное имя файла (например "sample.csv").
|
||||
- `%f` — Исходное имя файла без расширения (например "sample").
|
||||
- `%e` — Оригинальное расширение файла с точкой (например ".csv").
|
||||
- `%t` — Текущее время (в микросекундах).
|
||||
|
@ -122,6 +122,7 @@ FROM dt
|
||||
- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format)
|
||||
- [Настройка `date_time_output_format`](../../operations/settings/index.md)
|
||||
- [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone)
|
||||
- [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime)
|
||||
- [Тип данных `Date`](date.md)
|
||||
- [Тип данных `DateTime64`](datetime64.md)
|
||||
|
@ -102,6 +102,7 @@ FROM dt;
|
||||
- [Настройка `date_time_input_format`](../../operations/settings/settings.md#settings-date_time_input_format)
|
||||
- [Настройка `date_time_output_format`](../../operations/settings/settings.md)
|
||||
- [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
|
||||
- [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone)
|
||||
- [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime)
|
||||
- [Тип данных `Date`](date.md)
|
||||
- [Тип данных `DateTime`](datetime.md)
|
||||
|
@ -145,6 +145,8 @@ range([start, ] end [, step])
|
||||
|
||||
- Если в результате запроса создаются массивы суммарной длиной больше, чем количество элементов, указанное настройкой [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block), то генерируется исключение.
|
||||
|
||||
- Возвращает Null если любой аргумент Nullable(Nothing) типа. Генерируется исключение если любой аргумент Null (Nullable(T) тип).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
@ -599,29 +599,33 @@ SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS we
|
||||
## toYearWeek(date[,mode]) {#toyearweek}
|
||||
Возвращает год и неделю для даты. Год в результате может отличаться от года в аргументе даты для первой и последней недели года.
|
||||
|
||||
Аргумент mode работает точно так же, как аргумент mode [toWeek()](#toweek). Если mode не задан, используется режим 0.
|
||||
Аргумент mode работает так же, как аргумент mode [toWeek()](#toweek), значение mode по умолчанию -- `0`.
|
||||
|
||||
`toISOYear() ` эквивалентно `intDiv(toYearWeek(date,3),100)`.
|
||||
`toISOYear() ` эквивалентно `intDiv(toYearWeek(date,3),100)`
|
||||
|
||||
:::warning
|
||||
Однако, есть отличие в работе функций `toWeek()` и `toYearWeek()`. `toWeek()` возвращает номер недели в контексте заданного года, и в случае, когда `toWeek()` вернёт `0`, `toYearWeek()` вернёт значение, соответствующее последней неделе предыдущего года (см. `prev_yearWeek` в примере).
|
||||
:::
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9;
|
||||
SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9, toYearWeek(toDate('2022-01-01')) AS prev_yearWeek;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
```text
|
||||
┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┐
|
||||
│ 2016-12-27 │ 201652 │ 201652 │ 201701 │
|
||||
└────────────┴───────────┴───────────┴───────────┘
|
||||
┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┬─prev_yearWeek─┐
|
||||
│ 2016-12-27 │ 201652 │ 201652 │ 201701 │ 202152 │
|
||||
└────────────┴───────────┴───────────┴───────────┴───────────────┘
|
||||
```
|
||||
|
||||
## age
|
||||
|
||||
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду.
|
||||
Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду.
|
||||
Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`.
|
||||
|
||||
**Синтаксис**
|
||||
@ -635,8 +639,6 @@ age('unit', startdate, enddate, [timezone])
|
||||
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
|
||||
Возможные значения:
|
||||
|
||||
- `microsecond` (возможные сокращения: `us`, `u`)
|
||||
- `millisecond` (возможные сокращения: `ms`)
|
||||
- `second` (возможные сокращения: `ss`, `s`)
|
||||
- `minute` (возможные сокращения: `mi`, `n`)
|
||||
- `hour` (возможные сокращения: `hh`, `h`)
|
||||
@ -710,8 +712,6 @@ date_diff('unit', startdate, enddate, [timezone])
|
||||
- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
|
||||
Возможные значения:
|
||||
|
||||
- `microsecond` (возможные сокращения: `us`, `u`)
|
||||
- `millisecond` (возможные сокращения: `ms`)
|
||||
- `second` (возможные сокращения: `ss`, `s`)
|
||||
- `minute` (возможные сокращения: `mi`, `n`)
|
||||
- `hour` (возможные сокращения: `hh`, `h`)
|
||||
|
@ -1113,3 +1113,14 @@ A text with tags .
|
||||
The content within <b>CDATA</b>
|
||||
Do Nothing for 2 Minutes 2:00
|
||||
```
|
||||
|
||||
## initcap {#initcap}
|
||||
|
||||
Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами.
|
||||
|
||||
## initcapUTF8 {#initcapUTF8}
|
||||
|
||||
Как [initcap](#initcap), предполагая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8.
|
||||
Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным.
|
||||
Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным.
|
||||
Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено.
|
||||
|
@ -284,7 +284,13 @@ toDateTime(expr[, time_zone ])
|
||||
- `expr` — Значение для преобразования. [String](/docs/ru/sql-reference/data-types/string.md), [Int](/docs/ru/sql-reference/data-types/int-uint.md), [Date](/docs/ru/sql-reference/data-types/date.md) или [DateTime](/docs/ru/sql-reference/data-types/datetime.md).
|
||||
- `time_zone` — Часовой пояс. [String](/docs/ru/sql-reference/data-types/string.md).
|
||||
|
||||
Если `expr` является числом, оно интерпретируется как количество секунд от начала unix эпохи.
|
||||
:::note
|
||||
Если `expr` является числом, то оно интерпретируется как число секунд с начала Unix-эпохи (Unix Timestamp).
|
||||
|
||||
Если же `expr` -- [строка (String)](/docs/ru/sql-reference/data-types/string.md), то оно может быть интерпретировано и как Unix Timestamp, и как строковое представление даты / даты со временем.
|
||||
Ввиду неоднозначности запрещён парсинг строк длиной 4 и меньше. Так, строка `'1999'` могла бы представлять собой как год (неполное строковое представление даты или даты со временем), так и Unix Timestamp.
|
||||
Строки длиной 5 символов и более не несут неоднозначности, а следовательно, их парсинг разрешён.
|
||||
:::
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
|
@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
|
||||
|
||||
- `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
|
||||
- `?` — заменяет ровно один любой символ.
|
||||
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
|
||||
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`.
|
||||
- `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
|
||||
|
||||
Конструкция с `{}` аналогична табличной функции [remote](remote.md).
|
||||
|
@ -643,8 +643,6 @@ date_diff('unit', startdate, enddate, [timezone])
|
||||
- `unit` — `value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。
|
||||
可能的值:
|
||||
|
||||
- `microsecond`
|
||||
- `millisecond`
|
||||
- `second`
|
||||
- `minute`
|
||||
- `hour`
|
||||
|
@ -67,29 +67,38 @@ struct AggregateFunctionBoundingRatioData
|
||||
}
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeBinary(empty, buf);
|
||||
|
||||
if (!empty)
|
||||
{
|
||||
writePODBinary(left, buf);
|
||||
writePODBinary(right, buf);
|
||||
}
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
readBinary(empty, buf);
|
||||
|
||||
if (!empty)
|
||||
{
|
||||
readPODBinary(left, buf);
|
||||
readPODBinary(right, buf);
|
||||
}
|
||||
}
|
||||
void serialize(WriteBuffer & buf) const;
|
||||
void deserialize(ReadBuffer & buf);
|
||||
};
|
||||
|
||||
template <std::endian endian>
|
||||
inline void transformEndianness(AggregateFunctionBoundingRatioData::Point & p)
|
||||
{
|
||||
transformEndianness<endian>(p.x);
|
||||
transformEndianness<endian>(p.y);
|
||||
}
|
||||
|
||||
void AggregateFunctionBoundingRatioData::serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeBinaryLittleEndian(empty, buf);
|
||||
|
||||
if (!empty)
|
||||
{
|
||||
writeBinaryLittleEndian(left, buf);
|
||||
writeBinaryLittleEndian(right, buf);
|
||||
}
|
||||
}
|
||||
|
||||
void AggregateFunctionBoundingRatioData::deserialize(ReadBuffer & buf)
|
||||
{
|
||||
readBinaryLittleEndian(empty, buf);
|
||||
|
||||
if (!empty)
|
||||
{
|
||||
readBinaryLittleEndian(left, buf);
|
||||
readBinaryLittleEndian(right, buf);
|
||||
}
|
||||
}
|
||||
|
||||
class AggregateFunctionBoundingRatio final : public IAggregateFunctionDataHelper<AggregateFunctionBoundingRatioData, AggregateFunctionBoundingRatio>
|
||||
{
|
||||
|
@ -103,18 +103,18 @@ public:
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
writeIntBinary(this->data(place).sum, buf);
|
||||
writeIntBinary(this->data(place).first, buf);
|
||||
writeIntBinary(this->data(place).last, buf);
|
||||
writePODBinary<bool>(this->data(place).seen, buf);
|
||||
writeBinaryLittleEndian(this->data(place).sum, buf);
|
||||
writeBinaryLittleEndian(this->data(place).first, buf);
|
||||
writeBinaryLittleEndian(this->data(place).last, buf);
|
||||
writeBinaryLittleEndian(this->data(place).seen, buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
readIntBinary(this->data(place).sum, buf);
|
||||
readIntBinary(this->data(place).first, buf);
|
||||
readIntBinary(this->data(place).last, buf);
|
||||
readPODBinary<bool>(this->data(place).seen, buf);
|
||||
readBinaryLittleEndian(this->data(place).sum, buf);
|
||||
readBinaryLittleEndian(this->data(place).first, buf);
|
||||
readBinaryLittleEndian(this->data(place).last, buf);
|
||||
readBinaryLittleEndian(this->data(place).seen, buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
|
@ -144,22 +144,22 @@ public:
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
writeIntBinary(this->data(place).sum, buf);
|
||||
writeIntBinary(this->data(place).first, buf);
|
||||
writeIntBinary(this->data(place).first_ts, buf);
|
||||
writeIntBinary(this->data(place).last, buf);
|
||||
writeIntBinary(this->data(place).last_ts, buf);
|
||||
writePODBinary<bool>(this->data(place).seen, buf);
|
||||
writeBinaryLittleEndian(this->data(place).sum, buf);
|
||||
writeBinaryLittleEndian(this->data(place).first, buf);
|
||||
writeBinaryLittleEndian(this->data(place).first_ts, buf);
|
||||
writeBinaryLittleEndian(this->data(place).last, buf);
|
||||
writeBinaryLittleEndian(this->data(place).last_ts, buf);
|
||||
writeBinaryLittleEndian(this->data(place).seen, buf);
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
|
||||
{
|
||||
readIntBinary(this->data(place).sum, buf);
|
||||
readIntBinary(this->data(place).first, buf);
|
||||
readIntBinary(this->data(place).first_ts, buf);
|
||||
readIntBinary(this->data(place).last, buf);
|
||||
readIntBinary(this->data(place).last_ts, buf);
|
||||
readPODBinary<bool>(this->data(place).seen, buf);
|
||||
readBinaryLittleEndian(this->data(place).sum, buf);
|
||||
readBinaryLittleEndian(this->data(place).first, buf);
|
||||
readBinaryLittleEndian(this->data(place).first_ts, buf);
|
||||
readBinaryLittleEndian(this->data(place).last, buf);
|
||||
readBinaryLittleEndian(this->data(place).last_ts, buf);
|
||||
readBinaryLittleEndian(this->data(place).seen, buf);
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
|
@ -222,7 +222,6 @@ AggregateFunctionPtr AggregateFunctionFactory::tryGet(
|
||||
: nullptr;
|
||||
}
|
||||
|
||||
|
||||
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(String name) const
|
||||
{
|
||||
if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
|
||||
|
@ -125,6 +125,8 @@ void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory)
|
||||
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
|
||||
|
||||
factory.registerFunction("groupArray", { createAggregateFunctionGroupArray<false>, properties });
|
||||
factory.registerAlias("array_agg", "groupArray", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerAliasUnchecked("array_concat_agg", "groupArrayArray", AggregateFunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction("groupArraySample", { createAggregateFunctionGroupArraySample, properties });
|
||||
factory.registerFunction("groupArrayLast", { createAggregateFunctionGroupArray<true>, properties });
|
||||
}
|
||||
|
@ -266,19 +266,20 @@ public:
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
const auto & value = this->data(place).value;
|
||||
size_t size = value.size();
|
||||
const size_t size = value.size();
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
|
||||
for (const auto & element : value)
|
||||
writeBinaryLittleEndian(element, buf);
|
||||
|
||||
if constexpr (Trait::last)
|
||||
DB::writeIntBinary<size_t>(this->data(place).total_values, buf);
|
||||
writeBinaryLittleEndian(this->data(place).total_values, buf);
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
DB::writeIntBinary<size_t>(this->data(place).total_values, buf);
|
||||
writeBinaryLittleEndian(this->data(place).total_values, buf);
|
||||
WriteBufferFromOwnString rng_buf;
|
||||
rng_buf << this->data(place).rng;
|
||||
DB::writeStringBinary(rng_buf.str(), buf);
|
||||
writeStringBinary(rng_buf.str(), buf);
|
||||
}
|
||||
}
|
||||
|
||||
@ -297,16 +298,17 @@ public:
|
||||
auto & value = this->data(place).value;
|
||||
|
||||
value.resize_exact(size, arena);
|
||||
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
|
||||
for (auto & element : value)
|
||||
readBinaryLittleEndian(element, buf);
|
||||
|
||||
if constexpr (Trait::last)
|
||||
DB::readIntBinary<size_t>(this->data(place).total_values, buf);
|
||||
readBinaryLittleEndian(this->data(place).total_values, buf);
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
DB::readIntBinary<size_t>(this->data(place).total_values, buf);
|
||||
readBinaryLittleEndian(this->data(place).total_values, buf);
|
||||
std::string rng_string;
|
||||
DB::readStringBinary(rng_string, buf);
|
||||
readStringBinary(rng_string, buf);
|
||||
ReadBufferFromString rng_buf(rng_string);
|
||||
rng_buf >> this->data(place).rng;
|
||||
}
|
||||
@ -603,14 +605,14 @@ public:
|
||||
node->write(buf);
|
||||
|
||||
if constexpr (Trait::last)
|
||||
DB::writeIntBinary<size_t>(data(place).total_values, buf);
|
||||
writeBinaryLittleEndian(data(place).total_values, buf);
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
DB::writeIntBinary<size_t>(data(place).total_values, buf);
|
||||
writeBinaryLittleEndian(data(place).total_values, buf);
|
||||
WriteBufferFromOwnString rng_buf;
|
||||
rng_buf << data(place).rng;
|
||||
DB::writeStringBinary(rng_buf.str(), buf);
|
||||
writeStringBinary(rng_buf.str(), buf);
|
||||
}
|
||||
}
|
||||
|
||||
@ -636,13 +638,13 @@ public:
|
||||
value[i] = Node::read(buf, arena);
|
||||
|
||||
if constexpr (Trait::last)
|
||||
DB::readIntBinary<size_t>(data(place).total_values, buf);
|
||||
readBinaryLittleEndian(data(place).total_values, buf);
|
||||
|
||||
if constexpr (Trait::sampler == Sampler::RNG)
|
||||
{
|
||||
DB::readIntBinary<size_t>(data(place).total_values, buf);
|
||||
readBinaryLittleEndian(data(place).total_values, buf);
|
||||
std::string rng_string;
|
||||
DB::readStringBinary(rng_string, buf);
|
||||
readStringBinary(rng_string, buf);
|
||||
ReadBufferFromString rng_buf(rng_string);
|
||||
rng_buf >> data(place).rng;
|
||||
}
|
||||
|
@ -1,10 +1,25 @@
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/AggregateFunctionGroupArrayMoving.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDateTime64.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#define AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE 0xFFFFFF
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -13,11 +28,186 @@ struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct MovingData
|
||||
{
|
||||
/// For easy serialization.
|
||||
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
|
||||
|
||||
using Accumulator = T;
|
||||
|
||||
/// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
|
||||
using Array = PODArray<T, 32, Allocator>;
|
||||
|
||||
Array value; /// Prefix sums.
|
||||
T sum{};
|
||||
|
||||
void NO_SANITIZE_UNDEFINED add(T val, Arena * arena)
|
||||
{
|
||||
sum += val;
|
||||
value.push_back(sum, arena);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct MovingSumData : public MovingData<T>
|
||||
{
|
||||
static constexpr auto name = "groupArrayMovingSum";
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
|
||||
{
|
||||
if (idx < window_size)
|
||||
return this->value[idx];
|
||||
else
|
||||
return this->value[idx] - this->value[idx - window_size];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct MovingAvgData : public MovingData<T>
|
||||
{
|
||||
static constexpr auto name = "groupArrayMovingAvg";
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
|
||||
{
|
||||
if (idx < window_size)
|
||||
return this->value[idx] / T(window_size);
|
||||
else
|
||||
return (this->value[idx] - this->value[idx - window_size]) / T(window_size);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename T, typename LimitNumElements, typename Data>
|
||||
class MovingImpl final
|
||||
: public IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>
|
||||
{
|
||||
static constexpr bool limit_num_elems = LimitNumElements::value;
|
||||
UInt64 window_size;
|
||||
|
||||
public:
|
||||
using ResultT = typename Data::Accumulator;
|
||||
|
||||
using ColumnSource = ColumnVectorOrDecimal<T>;
|
||||
|
||||
/// Probably for overflow function in the future.
|
||||
using ColumnResult = ColumnVectorOrDecimal<ResultT>;
|
||||
|
||||
explicit MovingImpl(const DataTypePtr & data_type_, UInt64 window_size_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>({data_type_}, {}, createResultType(data_type_))
|
||||
, window_size(window_size_) {}
|
||||
|
||||
String getName() const override { return Data::name; }
|
||||
|
||||
static DataTypePtr createResultType(const DataTypePtr & argument)
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(getReturnTypeElement(argument));
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
|
||||
this->data(place).add(static_cast<ResultT>(value), arena);
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = this->data(place);
|
||||
auto & rhs_elems = this->data(rhs);
|
||||
|
||||
size_t cur_size = cur_elems.value.size();
|
||||
|
||||
if (rhs_elems.value.size())
|
||||
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
|
||||
|
||||
for (size_t i = cur_size; i < cur_elems.value.size(); ++i)
|
||||
{
|
||||
cur_elems.value[i] += cur_elems.sum;
|
||||
}
|
||||
|
||||
cur_elems.sum += rhs_elems.sum;
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
const auto & value = this->data(place).value;
|
||||
size_t size = value.size();
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
if (unlikely(size > AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE);
|
||||
|
||||
if (size > 0)
|
||||
{
|
||||
auto & value = this->data(place).value;
|
||||
value.resize(size, arena);
|
||||
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
|
||||
this->data(place).sum = value.back();
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
const auto & data = this->data(place);
|
||||
size_t size = data.value.size();
|
||||
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
offsets_to.push_back(offsets_to.back() + size);
|
||||
|
||||
if (size)
|
||||
{
|
||||
typename ColumnResult::Container & data_to = assert_cast<ColumnResult &>(arr_to.getData()).getData();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if (!limit_num_elems)
|
||||
{
|
||||
data_to.push_back(data.get(i, size));
|
||||
}
|
||||
else
|
||||
{
|
||||
data_to.push_back(data.get(i, window_size));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
static auto getReturnTypeElement(const DataTypePtr & argument)
|
||||
{
|
||||
if constexpr (!is_decimal<ResultT>)
|
||||
return std::make_shared<DataTypeNumber<ResultT>>();
|
||||
else
|
||||
{
|
||||
using Res = DataTypeDecimal<ResultT>;
|
||||
return std::make_shared<Res>(Res::maxPrecision(), getDecimalScale(*argument));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
@ -79,7 +269,7 @@ AggregateFunctionPtr createAggregateFunctionMoving(
|
||||
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name);
|
||||
|
||||
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() < 0) ||
|
||||
if ((type == Field::Types::Int64 && parameters[0].get<Int64>() <= 0) ||
|
||||
(type == Field::Types::UInt64 && parameters[0].get<UInt64>() == 0))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter for aggregate function {} should be positive integer", name);
|
||||
|
||||
|
@ -1,207 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypesDecimal.h>
|
||||
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
#include <Common/ArenaAllocator.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#define AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE 0xFFFFFF
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct Settings;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_LARGE_ARRAY_SIZE;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct MovingData
|
||||
{
|
||||
/// For easy serialization.
|
||||
static_assert(std::has_unique_object_representations_v<T> || std::is_floating_point_v<T>);
|
||||
|
||||
using Accumulator = T;
|
||||
|
||||
/// Switch to ordinary Allocator after 4096 bytes to avoid fragmentation and trash in Arena
|
||||
using Allocator = MixedAlignedArenaAllocator<alignof(T), 4096>;
|
||||
using Array = PODArray<T, 32, Allocator>;
|
||||
|
||||
Array value; /// Prefix sums.
|
||||
T sum{};
|
||||
|
||||
void NO_SANITIZE_UNDEFINED add(T val, Arena * arena)
|
||||
{
|
||||
sum += val;
|
||||
value.push_back(sum, arena);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct MovingSumData : public MovingData<T>
|
||||
{
|
||||
static constexpr auto name = "groupArrayMovingSum";
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
|
||||
{
|
||||
if (idx < window_size)
|
||||
return this->value[idx];
|
||||
else
|
||||
return this->value[idx] - this->value[idx - window_size];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct MovingAvgData : public MovingData<T>
|
||||
{
|
||||
static constexpr auto name = "groupArrayMovingAvg";
|
||||
|
||||
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
|
||||
{
|
||||
if (idx < window_size)
|
||||
return this->value[idx] / T(window_size);
|
||||
else
|
||||
return (this->value[idx] - this->value[idx - window_size]) / T(window_size);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename T, typename LimitNumElements, typename Data>
|
||||
class MovingImpl final
|
||||
: public IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>
|
||||
{
|
||||
static constexpr bool limit_num_elems = LimitNumElements::value;
|
||||
UInt64 window_size;
|
||||
|
||||
public:
|
||||
using ResultT = typename Data::Accumulator;
|
||||
|
||||
using ColumnSource = ColumnVectorOrDecimal<T>;
|
||||
|
||||
/// Probably for overflow function in the future.
|
||||
using ColumnResult = ColumnVectorOrDecimal<ResultT>;
|
||||
|
||||
explicit MovingImpl(const DataTypePtr & data_type_, UInt64 window_size_ = std::numeric_limits<UInt64>::max())
|
||||
: IAggregateFunctionDataHelper<Data, MovingImpl<T, LimitNumElements, Data>>({data_type_}, {}, createResultType(data_type_))
|
||||
, window_size(window_size_) {}
|
||||
|
||||
String getName() const override { return Data::name; }
|
||||
|
||||
static DataTypePtr createResultType(const DataTypePtr & argument)
|
||||
{
|
||||
return std::make_shared<DataTypeArray>(getReturnTypeElement(argument));
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
|
||||
this->data(place).add(static_cast<ResultT>(value), arena);
|
||||
}
|
||||
|
||||
void NO_SANITIZE_UNDEFINED merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
|
||||
{
|
||||
auto & cur_elems = this->data(place);
|
||||
auto & rhs_elems = this->data(rhs);
|
||||
|
||||
size_t cur_size = cur_elems.value.size();
|
||||
|
||||
if (rhs_elems.value.size())
|
||||
cur_elems.value.insert(rhs_elems.value.begin(), rhs_elems.value.end(), arena);
|
||||
|
||||
for (size_t i = cur_size; i < cur_elems.value.size(); ++i)
|
||||
{
|
||||
cur_elems.value[i] += cur_elems.sum;
|
||||
}
|
||||
|
||||
cur_elems.sum += rhs_elems.sum;
|
||||
}
|
||||
|
||||
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
|
||||
{
|
||||
const auto & value = this->data(place).value;
|
||||
size_t size = value.size();
|
||||
writeVarUInt(size, buf);
|
||||
buf.write(reinterpret_cast<const char *>(value.data()), size * sizeof(value[0]));
|
||||
}
|
||||
|
||||
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
|
||||
{
|
||||
size_t size = 0;
|
||||
readVarUInt(size, buf);
|
||||
|
||||
if (unlikely(size > AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too large array size (maximum: {})", AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE);
|
||||
|
||||
if (size > 0)
|
||||
{
|
||||
auto & value = this->data(place).value;
|
||||
value.resize(size, arena);
|
||||
buf.readStrict(reinterpret_cast<char *>(value.data()), size * sizeof(value[0]));
|
||||
this->data(place).sum = value.back();
|
||||
}
|
||||
}
|
||||
|
||||
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
|
||||
{
|
||||
const auto & data = this->data(place);
|
||||
size_t size = data.value.size();
|
||||
|
||||
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
|
||||
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
|
||||
|
||||
offsets_to.push_back(offsets_to.back() + size);
|
||||
|
||||
if (size)
|
||||
{
|
||||
typename ColumnResult::Container & data_to = assert_cast<ColumnResult &>(arr_to.getData()).getData();
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
if (!limit_num_elems)
|
||||
{
|
||||
data_to.push_back(data.get(i, size));
|
||||
}
|
||||
else
|
||||
{
|
||||
data_to.push_back(data.get(i, window_size));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool allocatesMemoryInArena() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
static auto getReturnTypeElement(const DataTypePtr & argument)
|
||||
{
|
||||
if constexpr (!is_decimal<ResultT>)
|
||||
return std::make_shared<DataTypeNumber<ResultT>>();
|
||||
else
|
||||
{
|
||||
using Res = DataTypeDecimal<ResultT>;
|
||||
return std::make_shared<Res>(Res::maxPrecision(), getDecimalScale(*argument));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#undef AGGREGATE_FUNCTION_MOVING_MAX_ARRAY_SIZE
|
||||
|
||||
}
|
@ -233,35 +233,35 @@ public:
|
||||
|
||||
void write(WriteBuffer & buf) const
|
||||
{
|
||||
writeIntBinary<size_t>(compress_threshold, buf);
|
||||
writeFloatBinary<double>(relative_error, buf);
|
||||
writeIntBinary<size_t>(count, buf);
|
||||
writeIntBinary<size_t>(sampled.size(), buf);
|
||||
writeBinaryLittleEndian(compress_threshold, buf);
|
||||
writeBinaryLittleEndian(relative_error, buf);
|
||||
writeBinaryLittleEndian(count, buf);
|
||||
writeBinaryLittleEndian(sampled.size(), buf);
|
||||
|
||||
for (const auto & stats : sampled)
|
||||
{
|
||||
writeFloatBinary<T>(stats.value, buf);
|
||||
writeIntBinary<Int64>(stats.g, buf);
|
||||
writeIntBinary<Int64>(stats.delta, buf);
|
||||
writeBinaryLittleEndian(stats.value, buf);
|
||||
writeBinaryLittleEndian(stats.g, buf);
|
||||
writeBinaryLittleEndian(stats.delta, buf);
|
||||
}
|
||||
}
|
||||
|
||||
void read(ReadBuffer & buf)
|
||||
{
|
||||
readIntBinary<size_t>(compress_threshold, buf);
|
||||
readFloatBinary<double>(relative_error, buf);
|
||||
readIntBinary<size_t>(count, buf);
|
||||
readBinaryLittleEndian(compress_threshold, buf);
|
||||
readBinaryLittleEndian(relative_error, buf);
|
||||
readBinaryLittleEndian(count, buf);
|
||||
|
||||
size_t sampled_len = 0;
|
||||
readIntBinary<size_t>(sampled_len, buf);
|
||||
readBinaryLittleEndian(sampled_len, buf);
|
||||
sampled.resize(sampled_len);
|
||||
|
||||
for (size_t i = 0; i < sampled_len; ++i)
|
||||
{
|
||||
auto stats = sampled[i];
|
||||
readFloatBinary<T>(stats.value, buf);
|
||||
readIntBinary<Int64>(stats.g, buf);
|
||||
readIntBinary<Int64>(stats.delta, buf);
|
||||
readBinaryLittleEndian(stats.value, buf);
|
||||
readBinaryLittleEndian(stats.g, buf);
|
||||
readBinaryLittleEndian(stats.delta, buf);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -207,8 +207,8 @@ public:
|
||||
|
||||
void read(DB::ReadBuffer & buf)
|
||||
{
|
||||
DB::readIntBinary<size_t>(sample_count, buf);
|
||||
DB::readIntBinary<size_t>(total_values, buf);
|
||||
DB::readBinaryLittleEndian(sample_count, buf);
|
||||
DB::readBinaryLittleEndian(total_values, buf);
|
||||
|
||||
size_t size = std::min(total_values, sample_count);
|
||||
static constexpr size_t MAX_RESERVOIR_SIZE = 1_GiB;
|
||||
@ -224,22 +224,22 @@ public:
|
||||
rng_buf >> rng;
|
||||
|
||||
for (size_t i = 0; i < samples.size(); ++i)
|
||||
DB::readBinary(samples[i], buf);
|
||||
DB::readBinaryLittleEndian(samples[i], buf);
|
||||
|
||||
sorted = false;
|
||||
}
|
||||
|
||||
void write(DB::WriteBuffer & buf) const
|
||||
{
|
||||
DB::writeIntBinary<size_t>(sample_count, buf);
|
||||
DB::writeIntBinary<size_t>(total_values, buf);
|
||||
DB::writeBinaryLittleEndian(sample_count, buf);
|
||||
DB::writeBinaryLittleEndian(total_values, buf);
|
||||
|
||||
DB::WriteBufferFromOwnString rng_buf;
|
||||
rng_buf << rng;
|
||||
DB::writeStringBinary(rng_buf.str(), buf);
|
||||
|
||||
for (size_t i = 0; i < std::min(sample_count, total_values); ++i)
|
||||
DB::writeBinary(samples[i], buf);
|
||||
DB::writeBinaryLittleEndian(samples[i], buf);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -6223,7 +6223,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
|
||||
const auto & insertion_table = scope_context->getInsertionTable();
|
||||
if (!insertion_table.empty())
|
||||
{
|
||||
const auto & insert_structure = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->getColumns();
|
||||
const auto & insert_structure = DatabaseCatalog::instance()
|
||||
.getTable(insertion_table, scope_context)
|
||||
->getInMemoryMetadataPtr()
|
||||
->getColumns()
|
||||
.getInsertable();
|
||||
DB::ColumnsDescription structure_hint;
|
||||
|
||||
bool use_columns_from_insert_query = true;
|
||||
|
@ -35,6 +35,7 @@ public:
|
||||
std::shared_ptr<IBackupCoordination> backup_coordination;
|
||||
std::optional<UUID> backup_uuid;
|
||||
bool deduplicate_files = true;
|
||||
bool allow_s3_native_copy = true;
|
||||
};
|
||||
|
||||
static BackupFactory & instance();
|
||||
|
@ -101,14 +101,16 @@ namespace
|
||||
|
||||
|
||||
BackupReaderS3::BackupReaderS3(
|
||||
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_)
|
||||
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_)
|
||||
: BackupReaderDefault(&Poco::Logger::get("BackupReaderS3"), context_)
|
||||
, s3_uri(s3_uri_)
|
||||
, client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
|
||||
, request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
|
||||
, data_source_description{DataSourceType::S3, s3_uri.endpoint, false, false}
|
||||
{
|
||||
request_settings.updateFromSettings(context_->getSettingsRef());
|
||||
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
|
||||
request_settings.allow_native_copy = allow_s3_native_copy;
|
||||
}
|
||||
|
||||
BackupReaderS3::~BackupReaderS3() = default;
|
||||
@ -141,8 +143,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
|
||||
if (destination_data_source_description.sameKind(data_source_description)
|
||||
&& (destination_data_source_description.is_encrypted == encrypted_in_backup))
|
||||
{
|
||||
/// Use native copy, the more optimal way.
|
||||
LOG_TRACE(log, "Copying {} from S3 to disk {} using native copy", path_in_backup, destination_disk->getName());
|
||||
LOG_TRACE(log, "Copying {} from S3 to disk {}", path_in_backup, destination_disk->getName());
|
||||
auto write_blob_function = [&](const Strings & blob_path, WriteMode mode, const std::optional<ObjectAttributes> & object_attributes) -> size_t
|
||||
{
|
||||
/// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files.
|
||||
@ -177,7 +178,7 @@ void BackupReaderS3::copyFileToDisk(const String & path_in_backup, size_t file_s
|
||||
|
||||
|
||||
BackupWriterS3::BackupWriterS3(
|
||||
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_)
|
||||
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_)
|
||||
: BackupWriterDefault(&Poco::Logger::get("BackupWriterS3"), context_)
|
||||
, s3_uri(s3_uri_)
|
||||
, client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
|
||||
@ -186,6 +187,7 @@ BackupWriterS3::BackupWriterS3(
|
||||
{
|
||||
request_settings.updateFromSettings(context_->getSettingsRef());
|
||||
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
|
||||
request_settings.allow_native_copy = allow_s3_native_copy;
|
||||
}
|
||||
|
||||
void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src_disk, const String & src_path,
|
||||
@ -200,8 +202,7 @@ void BackupWriterS3::copyFileFromDisk(const String & path_in_backup, DiskPtr src
|
||||
/// In this case we can't use the native copy.
|
||||
if (auto blob_path = src_disk->getBlobPath(src_path); blob_path.size() == 2)
|
||||
{
|
||||
/// Use native copy, the more optimal way.
|
||||
LOG_TRACE(log, "Copying file {} from disk {} to S3 using native copy", src_path, src_disk->getName());
|
||||
LOG_TRACE(log, "Copying file {} from disk {} to S3", src_path, src_disk->getName());
|
||||
copyS3File(
|
||||
client,
|
||||
/* src_bucket */ blob_path[1],
|
||||
|
@ -17,7 +17,7 @@ namespace DB
|
||||
class BackupReaderS3 : public BackupReaderDefault
|
||||
{
|
||||
public:
|
||||
BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_);
|
||||
BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_);
|
||||
~BackupReaderS3() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
@ -38,7 +38,7 @@ private:
|
||||
class BackupWriterS3 : public BackupWriterDefault
|
||||
{
|
||||
public:
|
||||
BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_);
|
||||
BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ContextPtr & context_);
|
||||
~BackupWriterS3() override;
|
||||
|
||||
bool fileExists(const String & file_name) override;
|
||||
|
@ -25,6 +25,7 @@ namespace ErrorCodes
|
||||
M(Bool, async) \
|
||||
M(Bool, decrypt_files_from_encrypted_disks) \
|
||||
M(Bool, deduplicate_files) \
|
||||
M(Bool, allow_s3_native_copy) \
|
||||
M(UInt64, shard_num) \
|
||||
M(UInt64, replica_num) \
|
||||
M(Bool, internal) \
|
||||
|
@ -38,6 +38,9 @@ struct BackupSettings
|
||||
/// Whether the BACKUP will omit similar files (within one backup only).
|
||||
bool deduplicate_files = true;
|
||||
|
||||
/// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs)
|
||||
bool allow_s3_native_copy = true;
|
||||
|
||||
/// 1-based shard index to store in the backup. 0 means all shards.
|
||||
/// Can only be used with BACKUP ON CLUSTER.
|
||||
size_t shard_num = 0;
|
||||
|
@ -348,6 +348,7 @@ void BackupsWorker::doBackup(
|
||||
backup_create_params.backup_coordination = backup_coordination;
|
||||
backup_create_params.backup_uuid = backup_settings.backup_uuid;
|
||||
backup_create_params.deduplicate_files = backup_settings.deduplicate_files;
|
||||
backup_create_params.allow_s3_native_copy = backup_settings.allow_s3_native_copy;
|
||||
BackupMutablePtr backup = BackupFactory::instance().createBackup(backup_create_params);
|
||||
|
||||
/// Write the backup.
|
||||
@ -647,6 +648,7 @@ void BackupsWorker::doRestore(
|
||||
backup_open_params.backup_info = backup_info;
|
||||
backup_open_params.base_backup_info = restore_settings.base_backup_info;
|
||||
backup_open_params.password = restore_settings.password;
|
||||
backup_open_params.allow_s3_native_copy = restore_settings.allow_s3_native_copy;
|
||||
BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);
|
||||
|
||||
String current_database = context->getCurrentDatabase();
|
||||
|
@ -161,6 +161,7 @@ namespace
|
||||
M(RestoreAccessCreationMode, create_access) \
|
||||
M(Bool, allow_unresolved_access_dependencies) \
|
||||
M(RestoreUDFCreationMode, create_function) \
|
||||
M(Bool, allow_s3_native_copy) \
|
||||
M(Bool, internal) \
|
||||
M(String, host_id) \
|
||||
M(OptionalUUID, restore_uuid)
|
||||
|
@ -107,6 +107,9 @@ struct RestoreSettings
|
||||
/// How the RESTORE command will handle if a user-defined function which it's going to restore already exists.
|
||||
RestoreUDFCreationMode create_function = RestoreUDFCreationMode::kCreateIfNotExists;
|
||||
|
||||
/// Whether native copy is allowed (optimization for cloud storages, that sometimes could have bugs)
|
||||
bool allow_s3_native_copy = true;
|
||||
|
||||
/// Internal, should not be specified by user.
|
||||
bool internal = false;
|
||||
|
||||
|
@ -107,12 +107,12 @@ void registerBackupEngineS3(BackupFactory & factory)
|
||||
|
||||
if (params.open_mode == IBackup::OpenMode::READ)
|
||||
{
|
||||
auto reader = std::make_shared<BackupReaderS3>(S3::URI{s3_uri}, access_key_id, secret_access_key, params.context);
|
||||
auto reader = std::make_shared<BackupReaderS3>(S3::URI{s3_uri}, access_key_id, secret_access_key, params.allow_s3_native_copy, params.context);
|
||||
return std::make_unique<BackupImpl>(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto writer = std::make_shared<BackupWriterS3>(S3::URI{s3_uri}, access_key_id, secret_access_key, params.context);
|
||||
auto writer = std::make_shared<BackupWriterS3>(S3::URI{s3_uri}, access_key_id, secret_access_key, params.allow_s3_native_copy, params.context);
|
||||
return std::make_unique<BackupImpl>(
|
||||
backup_name_for_logging,
|
||||
archive_params,
|
||||
|
@ -848,6 +848,9 @@ ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query)
|
||||
|
||||
void QueryFuzzer::notifyQueryFailed(ASTPtr ast)
|
||||
{
|
||||
if (ast == nullptr)
|
||||
return;
|
||||
|
||||
auto remove_fuzzed_table = [this](const auto & table_name)
|
||||
{
|
||||
auto pos = table_name.find("__fuzz_");
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
|
||||
|
@ -47,6 +47,7 @@ String FileRenamer::generateNewFilename(const String & filename) const
|
||||
// Define placeholders and their corresponding values
|
||||
std::map<String, String> placeholders =
|
||||
{
|
||||
{"%a", filename},
|
||||
{"%f", file_base},
|
||||
{"%e", file_ext},
|
||||
{"%t", timestamp},
|
||||
@ -69,16 +70,17 @@ bool FileRenamer::isEmpty() const
|
||||
bool FileRenamer::validateRenamingRule(const String & rule, bool throw_on_error)
|
||||
{
|
||||
// Check if the rule contains invalid placeholders
|
||||
re2::RE2 invalid_placeholder_pattern("^([^%]|%[fet%])*$");
|
||||
re2::RE2 invalid_placeholder_pattern("^([^%]|%[afet%])*$");
|
||||
if (!re2::RE2::FullMatch(rule, invalid_placeholder_pattern))
|
||||
{
|
||||
if (throw_on_error)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid renaming rule: Allowed placeholders only %f, %e, %t, and %%");
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid renaming rule: Allowed placeholders only %a, %f, %e, %t, and %%");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Replace valid placeholders with empty strings and count remaining percentage signs.
|
||||
String replaced_rule = rule;
|
||||
boost::replace_all(replaced_rule, "%a", "");
|
||||
boost::replace_all(replaced_rule, "%f", "");
|
||||
boost::replace_all(replaced_rule, "%e", "");
|
||||
boost::replace_all(replaced_rule, "%t", "");
|
||||
|
@ -9,6 +9,7 @@ namespace DB
|
||||
/**
|
||||
* The FileRenamer class provides functionality for renaming files based on given pattern with placeholders
|
||||
* The supported placeholders are:
|
||||
* %a - Full original file name ("sample.csv")
|
||||
* %f - Original filename without extension ("sample")
|
||||
* %e - Original file extension with dot (".csv")
|
||||
* %t - Timestamp (in microseconds)
|
||||
|
@ -57,28 +57,25 @@ inline DB::UInt64 intHash64(DB::UInt64 x)
|
||||
|
||||
inline uint32_t s390x_crc32_u8(uint32_t crc, uint8_t v)
|
||||
{
|
||||
return crc32_be(crc, reinterpret_cast<unsigned char *>(&v), sizeof(v));
|
||||
return crc32c_le_vx(crc, reinterpret_cast<unsigned char *>(&v), sizeof(v));
|
||||
}
|
||||
|
||||
inline uint32_t s390x_crc32_u16(uint32_t crc, uint16_t v)
|
||||
{
|
||||
return crc32_be(crc, reinterpret_cast<unsigned char *>(&v), sizeof(v));
|
||||
v = std::byteswap(v);
|
||||
return crc32c_le_vx(crc, reinterpret_cast<unsigned char *>(&v), sizeof(v));
|
||||
}
|
||||
|
||||
inline uint32_t s390x_crc32_u32(uint32_t crc, uint32_t v)
|
||||
{
|
||||
return crc32_be(crc, reinterpret_cast<unsigned char *>(&v), sizeof(v));
|
||||
v = std::byteswap(v);
|
||||
return crc32c_le_vx(crc, reinterpret_cast<unsigned char *>(&v), sizeof(v));
|
||||
}
|
||||
|
||||
inline uint64_t s390x_crc32(uint64_t crc, uint64_t v)
|
||||
{
|
||||
uint64_t _crc = crc;
|
||||
uint32_t value_h, value_l;
|
||||
value_h = (v >> 32) & 0xffffffff;
|
||||
value_l = v & 0xffffffff;
|
||||
_crc = crc32_be(static_cast<uint32_t>(_crc), reinterpret_cast<unsigned char *>(&value_h), sizeof(uint32_t));
|
||||
_crc = crc32_be(static_cast<uint32_t>(_crc), reinterpret_cast<unsigned char *>(&value_l), sizeof(uint32_t));
|
||||
return _crc;
|
||||
v = std::byteswap(v);
|
||||
return crc32c_le_vx(static_cast<uint32_t>(crc), reinterpret_cast<unsigned char *>(&v), sizeof(uint64_t));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -52,35 +52,38 @@ public:
|
||||
{
|
||||
const auto & creator_map = getMap();
|
||||
const auto & case_insensitive_creator_map = getCaseInsensitiveMap();
|
||||
const String factory_name = getFactoryName();
|
||||
|
||||
String real_dict_name;
|
||||
if (creator_map.count(real_name))
|
||||
real_dict_name = real_name;
|
||||
else if (auto real_name_lowercase = Poco::toLower(real_name); case_insensitive_creator_map.count(real_name_lowercase))
|
||||
real_dict_name = real_name_lowercase;
|
||||
else
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: can't create alias '{}', the real name '{}' is not registered",
|
||||
factory_name, alias_name, real_name);
|
||||
auto real_name_lowercase = Poco::toLower(real_name);
|
||||
if (!creator_map.contains(real_name) && !case_insensitive_creator_map.contains(real_name_lowercase))
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"{}: can't create alias '{}', the real name '{}' is not registered",
|
||||
getFactoryName(),
|
||||
alias_name,
|
||||
real_name);
|
||||
|
||||
registerAliasUnchecked(alias_name, real_name, case_sensitiveness);
|
||||
}
|
||||
|
||||
/// We need sure the real_name exactly exists when call the function directly.
|
||||
void registerAliasUnchecked(const String & alias_name, const String & real_name, CaseSensitiveness case_sensitiveness = CaseSensitive)
|
||||
{
|
||||
String alias_name_lowercase = Poco::toLower(alias_name);
|
||||
|
||||
if (creator_map.count(alias_name) || case_insensitive_creator_map.count(alias_name_lowercase))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: the alias name '{}' is already registered as real name",
|
||||
factory_name, alias_name);
|
||||
String real_name_lowercase = Poco::toLower(real_name);
|
||||
const String factory_name = getFactoryName();
|
||||
|
||||
if (case_sensitiveness == CaseInsensitive)
|
||||
{
|
||||
if (!case_insensitive_aliases.emplace(alias_name_lowercase, real_dict_name).second)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: case insensitive alias name '{}' is not unique",
|
||||
factory_name, alias_name);
|
||||
if (!case_insensitive_aliases.emplace(alias_name_lowercase, real_name).second)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: case insensitive alias name '{}' is not unique", factory_name, alias_name);
|
||||
case_insensitive_name_mapping[alias_name_lowercase] = real_name;
|
||||
}
|
||||
|
||||
if (!aliases.emplace(alias_name, real_dict_name).second)
|
||||
if (!aliases.emplace(alias_name, real_name).second)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: alias name '{}' is not unique", factory_name, alias_name);
|
||||
}
|
||||
|
||||
|
||||
std::vector<String> getAllRegisteredNames() const override
|
||||
{
|
||||
std::vector<String> result;
|
||||
@ -93,7 +96,7 @@ public:
|
||||
bool isCaseInsensitive(const String & name) const
|
||||
{
|
||||
String name_lowercase = Poco::toLower(name);
|
||||
return getCaseInsensitiveMap().count(name_lowercase) || case_insensitive_aliases.count(name_lowercase);
|
||||
return getCaseInsensitiveMap().contains(name_lowercase) || case_insensitive_aliases.contains(name_lowercase);
|
||||
}
|
||||
|
||||
const String & aliasTo(const String & name) const
|
||||
@ -106,14 +109,11 @@ public:
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "{}: name '{}' is not alias", getFactoryName(), name);
|
||||
}
|
||||
|
||||
bool isAlias(const String & name) const
|
||||
{
|
||||
return aliases.count(name) || case_insensitive_aliases.contains(name);
|
||||
}
|
||||
bool isAlias(const String & name) const { return aliases.contains(name) || case_insensitive_aliases.contains(name); }
|
||||
|
||||
bool hasNameOrAlias(const String & name) const
|
||||
{
|
||||
return getMap().count(name) || getCaseInsensitiveMap().count(name) || isAlias(name);
|
||||
return getMap().contains(name) || getCaseInsensitiveMap().contains(name) || isAlias(name);
|
||||
}
|
||||
|
||||
/// Return the canonical name (the name used in registration) if it's different from `name`.
|
||||
@ -129,7 +129,7 @@ public:
|
||||
|
||||
private:
|
||||
using InnerMap = std::unordered_map<String, Value>; // name -> creator
|
||||
using AliasMap = std::unordered_map<String, String>; // alias -> original type
|
||||
using AliasMap = std::unordered_map<String, String>; // alias -> original name
|
||||
|
||||
virtual const InnerMap & getMap() const = 0;
|
||||
virtual const InnerMap & getCaseInsensitiveMap() const = 0;
|
||||
|
@ -10,6 +10,27 @@ namespace ErrorCodes
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
Int64 IntervalKind::toAvgNanoseconds() const
|
||||
{
|
||||
static constexpr Int64 NANOSECONDS_PER_MICROSECOND = 1000;
|
||||
static constexpr auto NANOSECONDS_PER_MILLISECOND = NANOSECONDS_PER_MICROSECOND * 1000;
|
||||
static constexpr auto NANOSECONDS_PER_SECOND = NANOSECONDS_PER_MILLISECOND * 1000;
|
||||
|
||||
switch (kind)
|
||||
{
|
||||
case IntervalKind::Millisecond:
|
||||
return NANOSECONDS_PER_MILLISECOND;
|
||||
case IntervalKind::Microsecond:
|
||||
return NANOSECONDS_PER_MICROSECOND;
|
||||
case IntervalKind::Nanosecond:
|
||||
return 1;
|
||||
default:
|
||||
return toAvgSeconds() * NANOSECONDS_PER_SECOND;
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
Int32 IntervalKind::toAvgSeconds() const
|
||||
{
|
||||
switch (kind)
|
||||
|
@ -29,6 +29,10 @@ struct IntervalKind
|
||||
|
||||
constexpr std::string_view toString() const { return magic_enum::enum_name(kind); }
|
||||
|
||||
/// Returns number of nanoseconds in one interval.
|
||||
/// For `Month`, `Quarter` and `Year` the function returns an average number of nanoseconds.
|
||||
Int64 toAvgNanoseconds() const;
|
||||
|
||||
/// Returns number of seconds in one interval.
|
||||
/// For `Month`, `Quarter` and `Year` the function returns an average number of seconds.
|
||||
Int32 toAvgSeconds() const;
|
||||
|
@ -6,17 +6,13 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
struct MemoryTrackerSwitcher
|
||||
{
|
||||
explicit MemoryTrackerSwitcher(MemoryTracker * new_tracker)
|
||||
{
|
||||
/// current_thread is not initialized for the main thread, so simply do not switch anything
|
||||
if (!current_thread)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "current_thread is not initialized");
|
||||
return;
|
||||
|
||||
auto * thread_tracker = CurrentThread::getMemoryTracker();
|
||||
prev_untracked_memory = current_thread->untracked_memory;
|
||||
@ -28,6 +24,10 @@ struct MemoryTrackerSwitcher
|
||||
|
||||
~MemoryTrackerSwitcher()
|
||||
{
|
||||
/// current_thread is not initialized for the main thread, so simply do not switch anything
|
||||
if (!current_thread)
|
||||
return;
|
||||
|
||||
CurrentThread::flushUntrackedMemory();
|
||||
auto * thread_tracker = CurrentThread::getMemoryTracker();
|
||||
|
||||
@ -35,6 +35,7 @@ struct MemoryTrackerSwitcher
|
||||
thread_tracker->setParent(prev_memory_tracker_parent);
|
||||
}
|
||||
|
||||
private:
|
||||
MemoryTracker * prev_memory_tracker_parent = nullptr;
|
||||
Int64 prev_untracked_memory = 0;
|
||||
};
|
||||
|
@ -59,4 +59,10 @@ inline void transformEndianness(std::pair<A, B> & pair)
|
||||
transformEndianness<endian>(pair.first);
|
||||
transformEndianness<endian>(pair.second);
|
||||
}
|
||||
|
||||
template <std::endian endian, typename T, typename Tag>
|
||||
inline void transformEndianness(StrongTypedef<T, Tag> & x)
|
||||
{
|
||||
transformEndianness<endian>(x.toUnderType());
|
||||
}
|
||||
}
|
||||
|
@ -25,8 +25,6 @@ void Pool::Entry::incrementRefCount()
|
||||
/// First reference, initialize thread
|
||||
if (data->ref_count.fetch_add(1) == 0)
|
||||
mysql_thread_init();
|
||||
|
||||
chassert(!data->removed_from_pool);
|
||||
}
|
||||
|
||||
|
||||
@ -43,7 +41,10 @@ void Pool::Entry::decrementRefCount()
|
||||
/// In Pool::Entry::disconnect() we remove connection from the list of pool's connections.
|
||||
/// So now we must deallocate the memory.
|
||||
if (data->removed_from_pool)
|
||||
{
|
||||
data->conn.disconnect();
|
||||
::delete data;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -230,8 +231,6 @@ void Pool::removeConnection(Connection* connection)
|
||||
std::lock_guard lock(mutex);
|
||||
if (connection)
|
||||
{
|
||||
if (!connection->removed_from_pool)
|
||||
connection->conn.disconnect();
|
||||
connections.remove(connection);
|
||||
connection->removed_from_pool = true;
|
||||
}
|
||||
@ -240,6 +239,7 @@ void Pool::removeConnection(Connection* connection)
|
||||
|
||||
void Pool::Entry::disconnect()
|
||||
{
|
||||
// Remove the Entry from the Pool. Actual disconnection is delayed until refcount == 0.
|
||||
pool->removeConnection(data);
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,20 @@
|
||||
#include <unistd.h>
|
||||
#include <bit>
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
String formatZxid(int64_t zxid)
|
||||
{
|
||||
/// ZooKeeper print zxid in hex and
|
||||
String hex = getHexUIntLowercase(zxid);
|
||||
/// without leading zeros
|
||||
trimLeft(hex, '0');
|
||||
return "0x" + hex;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -348,7 +362,7 @@ String ServerStatCommand::run()
|
||||
write("Sent", toString(stats.getPacketsSent()));
|
||||
write("Connections", toString(keeper_info.alive_connections_count));
|
||||
write("Outstanding", toString(keeper_info.outstanding_requests_count));
|
||||
write("Zxid", toString(keeper_info.last_zxid));
|
||||
write("Zxid", formatZxid(keeper_info.last_zxid));
|
||||
write("Mode", keeper_info.getRole());
|
||||
write("Node count", toString(keeper_info.total_nodes_count));
|
||||
|
||||
@ -381,7 +395,7 @@ String StatCommand::run()
|
||||
write("Sent", toString(stats.getPacketsSent()));
|
||||
write("Connections", toString(keeper_info.alive_connections_count));
|
||||
write("Outstanding", toString(keeper_info.outstanding_requests_count));
|
||||
write("Zxid", toString(keeper_info.last_zxid));
|
||||
write("Zxid", formatZxid(keeper_info.last_zxid));
|
||||
write("Mode", keeper_info.getRole());
|
||||
write("Node count", toString(keeper_info.total_nodes_count));
|
||||
|
||||
|
@ -48,11 +48,7 @@ inline auto scaleMultiplier(UInt32 scale)
|
||||
|
||||
/** Components of DecimalX value:
|
||||
* whole - represents whole part of decimal, can be negative or positive.
|
||||
* fractional - for fractional part of decimal.
|
||||
*
|
||||
* 0.123 represents 0 / 0.123
|
||||
* -0.123 represents 0 / -0.123
|
||||
* -1.123 represents -1 / 0.123
|
||||
* fractional - for fractional part of decimal, always positive.
|
||||
*/
|
||||
template <typename DecimalType>
|
||||
struct DecimalComponents
|
||||
|
@ -577,6 +577,7 @@ class IColumn;
|
||||
M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
|
||||
M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
|
||||
M(Bool, optimize_use_projections, true, "Automatically choose projections to perform SELECT query", 0) ALIAS(allow_experimental_projection_optimization) \
|
||||
M(Bool, optimize_use_implicit_projections, false, "Automatically choose implicit projections to perform SELECT query", 0) \
|
||||
M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
|
||||
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
|
||||
M(Bool, async_query_sending_for_remote, true, "Asynchronously create connections and send query to shards in remote query", 0) \
|
||||
@ -629,7 +630,7 @@ class IColumn;
|
||||
M(Bool, database_replicated_allow_only_replicated_engine, false, "Allow to create only Replicated tables in database with engine Replicated", 0) \
|
||||
M(Bool, database_replicated_allow_replicated_engine_arguments, true, "Allow to create only Replicated tables in database with engine Replicated with explicit arguments", 0) \
|
||||
M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \
|
||||
M(UInt64, distributed_ddl_entry_format_version, 3, "Compatibility version of distributed DDL (ON CLUSTER) queries", 0) \
|
||||
M(UInt64, distributed_ddl_entry_format_version, 5, "Compatibility version of distributed DDL (ON CLUSTER) queries", 0) \
|
||||
\
|
||||
M(UInt64, external_storage_max_read_rows, 0, "Limit maximum number of rows when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \
|
||||
M(UInt64, external_storage_max_read_bytes, 0, "Limit maximum number of bytes when table with external engine should flush history data. Now supported only for MySQL table engine, database engine, dictionary and MaterializedMySQL. If equal to 0, this setting is disabled", 0) \
|
||||
@ -736,7 +737,7 @@ class IColumn;
|
||||
M(String, workload, "default", "Name of workload to be used to access resources", 0) \
|
||||
M(Milliseconds, storage_system_stack_trace_pipe_read_timeout_ms, 100, "Maximum time to read from a pipe for receiving information from the threads when querying the `system.stack_trace` table. This setting is used for testing purposes and not meant to be changed by users.", 0) \
|
||||
\
|
||||
M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \
|
||||
M(String, rename_files_after_processing, "", "Rename successfully processed files according to the specified pattern; Pattern can include the following placeholders: `%a` (full original file name), `%f` (original filename without extension), `%e` (file extension with dot), `%t` (current timestamp in µs), and `%%` (% sign)", 0) \
|
||||
\
|
||||
M(Bool, parallelize_output_from_storages, true, "Parallelize output for reading step from storage. It allows parallelizing query processing right after reading from storage if possible", 0) \
|
||||
M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
|
||||
@ -774,6 +775,7 @@ class IColumn;
|
||||
M(Bool, keeper_map_strict_mode, false, "Enforce additional checks during operations on KeeperMap. E.g. throw an exception on an insert for already existing key", 0) \
|
||||
M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
|
||||
M(Timezone, session_timezone, "", "The default timezone for current session or query. The server default timezone if empty.", 0) \
|
||||
M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\
|
||||
// End of COMMON_SETTINGS
|
||||
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
|
||||
|
||||
@ -906,6 +908,7 @@ class IColumn;
|
||||
\
|
||||
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \
|
||||
M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
|
||||
M(IntervalOutputFormat, interval_output_format, FormatSettings::IntervalOutputFormat::Numeric, "Textual representation of Interval. Possible values: 'kusto', 'numeric'.", 0) \
|
||||
\
|
||||
M(Bool, input_format_ipv4_default_on_conversion_error, false, "Deserialization of IPv4 will use default values instead of throwing exception on conversion error.", 0) \
|
||||
M(Bool, input_format_ipv6_default_on_conversion_error, false, "Deserialization of IPV6 will use default values instead of throwing exception on conversion error.", 0) \
|
||||
|
@ -80,6 +80,7 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"23.7", {{"optimize_use_implicit_projections", true, false, "Disable implicit projections due to unexpected results."}}},
|
||||
{"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
|
||||
{"http_receive_timeout", 180, 30, "See http_send_timeout."}}},
|
||||
{"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},
|
||||
|
@ -79,6 +79,10 @@ IMPLEMENT_SETTING_ENUM(DateTimeOutputFormat, ErrorCodes::BAD_ARGUMENTS,
|
||||
{"iso", FormatSettings::DateTimeOutputFormat::ISO},
|
||||
{"unix_timestamp", FormatSettings::DateTimeOutputFormat::UnixTimestamp}})
|
||||
|
||||
IMPLEMENT_SETTING_ENUM(IntervalOutputFormat, ErrorCodes::BAD_ARGUMENTS,
|
||||
{{"kusto", FormatSettings::IntervalOutputFormat::Kusto},
|
||||
{"numeric", FormatSettings::IntervalOutputFormat::Numeric}})
|
||||
|
||||
IMPLEMENT_SETTING_AUTO_ENUM(LogsLevel, ErrorCodes::BAD_ARGUMENTS)
|
||||
|
||||
IMPLEMENT_SETTING_AUTO_ENUM(LogQueriesType, ErrorCodes::BAD_ARGUMENTS)
|
||||
|
@ -72,6 +72,8 @@ DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, FormatSettings::DateTimeIn
|
||||
|
||||
DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeOutputFormat, FormatSettings::DateTimeOutputFormat)
|
||||
|
||||
DECLARE_SETTING_ENUM_WITH_RENAME(IntervalOutputFormat, FormatSettings::IntervalOutputFormat)
|
||||
|
||||
DECLARE_SETTING_ENUM_WITH_RENAME(ParquetVersion, FormatSettings::ParquetVersion)
|
||||
|
||||
enum class LogsLevel
|
||||
|
@ -1,16 +1,18 @@
|
||||
#include <DataTypes/DataTypeInterval.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/Serializations/SerializationInterval.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
SerializationPtr DataTypeInterval::doGetDefaultSerialization() const { return std::make_shared<SerializationInterval>(kind); }
|
||||
|
||||
bool DataTypeInterval::equals(const IDataType & rhs) const
|
||||
{
|
||||
return typeid(rhs) == typeid(*this) && kind == static_cast<const DataTypeInterval &>(rhs).kind;
|
||||
}
|
||||
|
||||
|
||||
void registerDataTypeInterval(DataTypeFactory & factory)
|
||||
{
|
||||
factory.registerSimpleDataType("IntervalNanosecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Nanosecond)); });
|
||||
|
@ -24,6 +24,7 @@ public:
|
||||
|
||||
explicit DataTypeInterval(IntervalKind kind_) : kind(kind_) {}
|
||||
|
||||
SerializationPtr doGetDefaultSerialization() const override;
|
||||
std::string doGetName() const override { return fmt::format("Interval{}", kind.toString()); }
|
||||
const char * getFamilyName() const override { return "Interval"; }
|
||||
String getSQLCompatibleName() const override { return "TEXT"; }
|
||||
|
@ -410,21 +410,29 @@ inline bool isDateTime(const T & data_type) { return WhichDataType(data_type).is
|
||||
template <typename T>
|
||||
inline bool isDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTime64(); }
|
||||
|
||||
inline bool isEnum(const DataTypePtr & data_type) { return WhichDataType(data_type).isEnum(); }
|
||||
inline bool isDecimal(const DataTypePtr & data_type) { return WhichDataType(data_type).isDecimal(); }
|
||||
inline bool isTuple(const DataTypePtr & data_type) { return WhichDataType(data_type).isTuple(); }
|
||||
inline bool isArray(const DataTypePtr & data_type) { return WhichDataType(data_type).isArray(); }
|
||||
inline bool isMap(const DataTypePtr & data_type) {return WhichDataType(data_type).isMap(); }
|
||||
inline bool isInterval(const DataTypePtr & data_type) {return WhichDataType(data_type).isInterval(); }
|
||||
inline bool isNothing(const DataTypePtr & data_type) { return WhichDataType(data_type).isNothing(); }
|
||||
inline bool isUUID(const DataTypePtr & data_type) { return WhichDataType(data_type).isUUID(); }
|
||||
inline bool isIPv4(const DataTypePtr & data_type) { return WhichDataType(data_type).isIPv4(); }
|
||||
inline bool isIPv6(const DataTypePtr & data_type) { return WhichDataType(data_type).isIPv6(); }
|
||||
template <typename T>
|
||||
inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); }
|
||||
template <typename T>
|
||||
inline bool isDecimal(const T & data_type) { return WhichDataType(data_type).isDecimal(); }
|
||||
template <typename T>
|
||||
inline bool isTuple(const T & data_type) { return WhichDataType(data_type).isTuple(); }
|
||||
template <typename T>
|
||||
inline bool isArray(const T & data_type) { return WhichDataType(data_type).isArray(); }
|
||||
template <typename T>
|
||||
inline bool isMap(const T & data_type) {return WhichDataType(data_type).isMap(); }
|
||||
template <typename T>
|
||||
inline bool isInterval(const T & data_type) {return WhichDataType(data_type).isInterval(); }
|
||||
template <typename T>
|
||||
inline bool isNothing(const T & data_type) { return WhichDataType(data_type).isNothing(); }
|
||||
template <typename T>
|
||||
inline bool isUUID(const T & data_type) { return WhichDataType(data_type).isUUID(); }
|
||||
template <typename T>
|
||||
inline bool isIPv4(const T & data_type) { return WhichDataType(data_type).isIPv4(); }
|
||||
template <typename T>
|
||||
inline bool isIPv6(const T & data_type) { return WhichDataType(data_type).isIPv6(); }
|
||||
|
||||
template <typename T>
|
||||
inline bool isObject(const T & data_type)
|
||||
{
|
||||
return WhichDataType(data_type).isObject();
|
||||
inline bool isObject(const T & data_type) { return WhichDataType(data_type).isObject();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
209
src/DataTypes/Serializations/SerializationInterval.cpp
Normal file
209
src/DataTypes/Serializations/SerializationInterval.cpp
Normal file
@ -0,0 +1,209 @@
|
||||
#include "SerializationInterval.h"
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <Parsers/Kusto/Formatters.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using ColumnInterval = DataTypeInterval::ColumnType;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
void SerializationKustoInterval::serializeText(
|
||||
const IColumn & column, const size_t row, WriteBuffer & ostr, const FormatSettings &) const
|
||||
{
|
||||
const auto * interval_column = checkAndGetColumn<ColumnInterval>(column);
|
||||
if (!interval_column)
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Expected column of underlying type of Interval");
|
||||
|
||||
const auto & value = interval_column->getData()[row];
|
||||
const auto ticks = kind.toAvgNanoseconds() * value / 100;
|
||||
const auto interval_as_string = formatKQLTimespan(ticks);
|
||||
ostr.write(interval_as_string.c_str(), interval_as_string.length());
|
||||
}
|
||||
|
||||
void SerializationKustoInterval::deserializeText(
|
||||
[[maybe_unused]] IColumn & column,
|
||||
[[maybe_unused]] ReadBuffer & istr,
|
||||
[[maybe_unused]] const FormatSettings & settings,
|
||||
[[maybe_unused]] const bool whole) const
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED, "Deserialization is not implemented for {}", kind.toNameOfFunctionToIntervalDataType());
|
||||
}
|
||||
|
||||
SerializationInterval::SerializationInterval(IntervalKind interval_kind_) : interval_kind(std::move(interval_kind_))
|
||||
{
|
||||
}
|
||||
|
||||
void SerializationInterval::deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(
|
||||
static_cast<void (ISerialization::*)(Field &, ReadBuffer &, const FormatSettings &) const>(&ISerialization::deserializeBinary),
|
||||
settings.interval.output_format,
|
||||
field,
|
||||
istr,
|
||||
settings);
|
||||
}
|
||||
|
||||
void SerializationInterval::deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(
|
||||
static_cast<void (ISerialization::*)(IColumn &, ReadBuffer &, const FormatSettings &) const>(&ISerialization::deserializeBinary),
|
||||
settings.interval.output_format,
|
||||
column,
|
||||
istr,
|
||||
settings);
|
||||
}
|
||||
|
||||
void SerializationInterval::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const
|
||||
{
|
||||
dispatch(
|
||||
&ISerialization::deserializeBinaryBulk, FormatSettings::IntervalOutputFormat::Numeric, column, istr, limit, avg_value_size_hint);
|
||||
}
|
||||
|
||||
void SerializationInterval::deserializeBinaryBulkStatePrefix(
|
||||
DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
dispatch(&ISerialization::deserializeBinaryBulkStatePrefix, FormatSettings::IntervalOutputFormat::Numeric, settings, state);
|
||||
}
|
||||
|
||||
|
||||
void SerializationInterval::deserializeBinaryBulkWithMultipleStreams(
|
||||
ColumnPtr & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const
|
||||
{
|
||||
dispatch(
|
||||
&ISerialization::deserializeBinaryBulkWithMultipleStreams,
|
||||
FormatSettings::IntervalOutputFormat::Numeric,
|
||||
column,
|
||||
limit,
|
||||
settings,
|
||||
state,
|
||||
cache);
|
||||
}
|
||||
|
||||
|
||||
void SerializationInterval::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(&ISerialization::deserializeTextCSV, settings.interval.output_format, column, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationInterval::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(&ISerialization::deserializeTextEscaped, settings.interval.output_format, column, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationInterval::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(&ISerialization::deserializeTextJSON, settings.interval.output_format, column, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationInterval::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(&ISerialization::deserializeTextQuoted, settings.interval.output_format, column, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationInterval::deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(&ISerialization::deserializeTextRaw, settings.interval.output_format, column, istr, settings);
|
||||
}
|
||||
|
||||
|
||||
void SerializationInterval::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(&ISerialization::deserializeWholeText, settings.interval.output_format, column, istr, settings);
|
||||
}
|
||||
|
||||
void SerializationInterval::serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(
|
||||
static_cast<void (ISerialization::*)(const Field &, WriteBuffer &, const FormatSettings &) const>(&ISerialization::serializeBinary),
|
||||
settings.interval.output_format,
|
||||
field,
|
||||
ostr,
|
||||
settings);
|
||||
}
|
||||
|
||||
void SerializationInterval::serializeBinary(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(
|
||||
static_cast<void (ISerialization::*)(const IColumn &, size_t, WriteBuffer &, const FormatSettings &) const>(
|
||||
&ISerialization::serializeBinary),
|
||||
settings.interval.output_format,
|
||||
column,
|
||||
row,
|
||||
ostr,
|
||||
settings);
|
||||
}
|
||||
|
||||
void SerializationInterval::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
|
||||
{
|
||||
dispatch(&ISerialization::serializeBinaryBulk, FormatSettings::IntervalOutputFormat::Numeric, column, ostr, offset, limit);
|
||||
}
|
||||
|
||||
void SerializationInterval::serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column, SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
dispatch(&ISerialization::serializeBinaryBulkStatePrefix, FormatSettings::IntervalOutputFormat::Numeric, column, settings, state);
|
||||
}
|
||||
|
||||
void SerializationInterval::serializeBinaryBulkStateSuffix(
|
||||
SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
dispatch(&ISerialization::serializeBinaryBulkStateSuffix, FormatSettings::IntervalOutputFormat::Numeric, settings, state);
|
||||
}
|
||||
|
||||
void SerializationInterval::serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn & column, size_t offset, size_t limit, SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const
|
||||
{
|
||||
dispatch(
|
||||
&ISerialization::serializeBinaryBulkWithMultipleStreams,
|
||||
FormatSettings::IntervalOutputFormat::Numeric,
|
||||
column,
|
||||
offset,
|
||||
limit,
|
||||
settings,
|
||||
state);
|
||||
}
|
||||
|
||||
void SerializationInterval::serializeText(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(&ISerialization::serializeText, settings.interval.output_format, column, row, ostr, settings);
|
||||
}
|
||||
|
||||
void SerializationInterval::serializeTextCSV(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(&ISerialization::serializeTextCSV, settings.interval.output_format, column, row, ostr, settings);
|
||||
}
|
||||
|
||||
void SerializationInterval::serializeTextEscaped(
|
||||
const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(&ISerialization::serializeTextEscaped, settings.interval.output_format, column, row, ostr, settings);
|
||||
}
|
||||
|
||||
void SerializationInterval::serializeTextJSON(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(&ISerialization::serializeTextJSON, settings.interval.output_format, column, row, ostr, settings);
|
||||
}
|
||||
|
||||
void SerializationInterval::serializeTextQuoted(
|
||||
const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(&ISerialization::serializeTextQuoted, settings.interval.output_format, column, row, ostr, settings);
|
||||
}
|
||||
|
||||
void SerializationInterval::serializeTextRaw(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
dispatch(&ISerialization::serializeTextRaw, settings.interval.output_format, column, row, ostr, settings);
|
||||
}
|
||||
}
|
90
src/DataTypes/Serializations/SerializationInterval.h
Normal file
90
src/DataTypes/Serializations/SerializationInterval.h
Normal file
@ -0,0 +1,90 @@
|
||||
#pragma once
|
||||
|
||||
#include "ISerialization.h"
|
||||
#include "SerializationCustomSimpleText.h"
|
||||
|
||||
#include <DataTypes/DataTypeInterval.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Common/IntervalKind.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
class SerializationKustoInterval : public SerializationCustomSimpleText
|
||||
{
|
||||
public:
|
||||
explicit SerializationKustoInterval(IntervalKind kind_) : SerializationCustomSimpleText(nullptr), kind(kind_) { }
|
||||
|
||||
void serializeText(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override;
|
||||
|
||||
private:
|
||||
IntervalKind kind;
|
||||
};
|
||||
|
||||
class SerializationInterval : public ISerialization
|
||||
{
|
||||
public:
|
||||
explicit SerializationInterval(IntervalKind kind_);
|
||||
|
||||
void deserializeBinary(Field & field, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void deserializeBinary(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
|
||||
void deserializeBinaryBulkStatePrefix(DeserializeBinaryBulkSettings & settings, DeserializeBinaryBulkStatePtr & state) const override;
|
||||
void deserializeBinaryBulkWithMultipleStreams(
|
||||
ColumnPtr & column,
|
||||
size_t limit,
|
||||
DeserializeBinaryBulkSettings & settings,
|
||||
DeserializeBinaryBulkStatePtr & state,
|
||||
SubstreamsCache * cache) const override;
|
||||
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void deserializeTextRaw(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
|
||||
|
||||
void serializeBinary(const Field & field, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void serializeBinary(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
|
||||
void serializeBinaryBulkStatePrefix(
|
||||
const IColumn & column, SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const override;
|
||||
void serializeBinaryBulkStateSuffix(SerializeBinaryBulkSettings & settings, SerializeBinaryBulkStatePtr & state) const override;
|
||||
void serializeBinaryBulkWithMultipleStreams(
|
||||
const IColumn & column,
|
||||
size_t offset,
|
||||
size_t limit,
|
||||
SerializeBinaryBulkSettings & settings,
|
||||
SerializeBinaryBulkStatePtr & state) const override;
|
||||
void serializeText(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void serializeTextCSV(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void serializeTextEscaped(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void serializeTextJSON(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void serializeTextQuoted(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
void serializeTextRaw(const IColumn & column, size_t row, WriteBuffer & ostr, const FormatSettings & settings) const override;
|
||||
|
||||
private:
|
||||
template <typename... Args, std::invocable<const ISerialization *, Args...> Method>
|
||||
void dispatch(const Method method, const FormatSettings::IntervalOutputFormat format, Args &&... args) const
|
||||
{
|
||||
const ISerialization * serialization = nullptr;
|
||||
if (format == FormatSettings::IntervalOutputFormat::Kusto)
|
||||
serialization = &serialization_kusto;
|
||||
else if (format == FormatSettings::IntervalOutputFormat::Numeric)
|
||||
serialization = &serialization_numeric;
|
||||
|
||||
if (!serialization)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Option {} is not implemented", magic_enum::enum_name(format));
|
||||
|
||||
(serialization->*method)(std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
IntervalKind interval_kind;
|
||||
SerializationKustoInterval serialization_kusto{interval_kind};
|
||||
SerializationNumber<typename DataTypeInterval::FieldType> serialization_numeric;
|
||||
};
|
||||
}
|
@ -2,7 +2,6 @@
|
||||
#include <Core/Field.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <DataTypes/getMostSubtype.h>
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
|
@ -87,6 +87,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.custom.skip_trailing_empty_lines = settings.input_format_custom_skip_trailing_empty_lines;
|
||||
format_settings.date_time_input_format = settings.date_time_input_format;
|
||||
format_settings.date_time_output_format = settings.date_time_output_format;
|
||||
format_settings.interval.output_format = settings.interval_output_format;
|
||||
format_settings.input_format_ipv4_default_on_conversion_error = settings.input_format_ipv4_default_on_conversion_error;
|
||||
format_settings.input_format_ipv6_default_on_conversion_error = settings.input_format_ipv6_default_on_conversion_error;
|
||||
format_settings.bool_true_representation = settings.bool_true_representation;
|
||||
|
@ -77,6 +77,17 @@ struct FormatSettings
|
||||
|
||||
DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple;
|
||||
|
||||
enum class IntervalOutputFormat
|
||||
{
|
||||
Kusto,
|
||||
Numeric
|
||||
};
|
||||
|
||||
struct
|
||||
{
|
||||
IntervalOutputFormat output_format = IntervalOutputFormat::Numeric;
|
||||
} interval;
|
||||
|
||||
bool input_format_ipv4_default_on_conversion_error = false;
|
||||
bool input_format_ipv6_default_on_conversion_error = false;
|
||||
|
||||
|
@ -19,9 +19,6 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static constexpr auto microsecond_multiplier = 1000000;
|
||||
static constexpr auto millisecond_multiplier = 1000;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
@ -1380,36 +1377,6 @@ struct ToRelativeSecondNumImpl
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
template <Int64 scale_multiplier>
|
||||
struct ToRelativeSubsecondNumImpl
|
||||
{
|
||||
static constexpr auto name = "toRelativeSubsecondNumImpl";
|
||||
|
||||
static inline Int64 execute(const DateTime64 & t, DateTime64::NativeType scale, const DateLUTImpl &)
|
||||
{
|
||||
static_assert(scale_multiplier == 1000 || scale_multiplier == 1000000);
|
||||
if (scale == scale_multiplier)
|
||||
return t.value;
|
||||
if (scale > scale_multiplier)
|
||||
return t.value / (scale / scale_multiplier);
|
||||
return t.value * (scale_multiplier / scale);
|
||||
}
|
||||
static inline Int64 execute(UInt32 t, const DateLUTImpl &)
|
||||
{
|
||||
return t * scale_multiplier;
|
||||
}
|
||||
static inline Int64 execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return static_cast<Int64>(time_zone.fromDayNum(ExtendedDayNum(d))) * scale_multiplier;
|
||||
}
|
||||
static inline Int64 execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return static_cast<Int64>(time_zone.fromDayNum(DayNum(d)) * scale_multiplier);
|
||||
}
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
struct ToYYYYMMImpl
|
||||
{
|
||||
static constexpr auto name = "toYYYYMM";
|
||||
@ -1509,47 +1476,25 @@ struct ToYYYYMMDDhhmmssImpl
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
struct DateTimeComponentsWithFractionalPart : public DateLUTImpl::DateTimeComponents
|
||||
{
|
||||
UInt16 millisecond;
|
||||
UInt16 microsecond;
|
||||
};
|
||||
|
||||
struct ToDateTimeComponentsImpl
|
||||
{
|
||||
static constexpr auto name = "toDateTimeComponents";
|
||||
|
||||
static inline DateTimeComponentsWithFractionalPart execute(const DateTime64 & t, DateTime64::NativeType scale_multiplier, const DateLUTImpl & time_zone)
|
||||
static inline DateLUTImpl::DateTimeComponents execute(Int64 t, const DateLUTImpl & time_zone)
|
||||
{
|
||||
auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
|
||||
|
||||
if (t.value < 0 && components.fractional)
|
||||
{
|
||||
components.fractional = scale_multiplier + (components.whole ? Int64(-1) : Int64(1)) * components.fractional;
|
||||
--components.whole;
|
||||
}
|
||||
Int64 fractional = components.fractional;
|
||||
if (scale_multiplier > microsecond_multiplier)
|
||||
fractional = fractional / (scale_multiplier / microsecond_multiplier);
|
||||
else if (scale_multiplier < microsecond_multiplier)
|
||||
fractional = fractional * (microsecond_multiplier / scale_multiplier);
|
||||
|
||||
constexpr Int64 divider = microsecond_multiplier/ millisecond_multiplier;
|
||||
UInt16 millisecond = static_cast<UInt16>(fractional / divider);
|
||||
UInt16 microsecond = static_cast<UInt16>(fractional % divider);
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(components.whole), millisecond, microsecond};
|
||||
return time_zone.toDateTimeComponents(t);
|
||||
}
|
||||
static inline DateTimeComponentsWithFractionalPart execute(UInt32 t, const DateLUTImpl & time_zone)
|
||||
static inline DateLUTImpl::DateTimeComponents execute(UInt32 t, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(static_cast<DateLUTImpl::Time>(t)), 0, 0};
|
||||
return time_zone.toDateTimeComponents(static_cast<DateLUTImpl::Time>(t));
|
||||
}
|
||||
static inline DateTimeComponentsWithFractionalPart execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
static inline DateLUTImpl::DateTimeComponents execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(ExtendedDayNum(d)), 0, 0};
|
||||
return time_zone.toDateTimeComponents(ExtendedDayNum(d));
|
||||
}
|
||||
static inline DateTimeComponentsWithFractionalPart execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
static inline DateLUTImpl::DateTimeComponents execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return DateTimeComponentsWithFractionalPart{time_zone.toDateTimeComponents(DayNum(d)), 0, 0};
|
||||
return time_zone.toDateTimeComponents(DayNum(d));
|
||||
}
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
|
@ -1112,6 +1112,11 @@ private:
|
||||
bool c0_const = isColumnConst(*c0);
|
||||
bool c1_const = isColumnConst(*c1);
|
||||
|
||||
/// This is a paranoid check to protect from a broken query analysis.
|
||||
if (c0->isNullable() != c1->isNullable())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Logical error: columns are assumed to be of identical types, but they are different in Nullable");
|
||||
|
||||
if (c0_const && c1_const)
|
||||
{
|
||||
UInt8 res = 0;
|
||||
|
@ -203,6 +203,21 @@ struct ConvertImpl
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<FromDataType, DataTypeUUID> && std::is_same_v<ToDataType,DataTypeUInt128>)
|
||||
{
|
||||
static_assert(std::is_same_v<DataTypeUInt128::FieldType, DataTypeUUID::FieldType::UnderlyingType>, "UInt128 and UUID types must be same");
|
||||
if constexpr (std::endian::native == std::endian::little)
|
||||
{
|
||||
vec_to[i].items[1] = vec_from[i].toUnderType().items[0];
|
||||
vec_to[i].items[0] = vec_from[i].toUnderType().items[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
vec_to[i] = vec_from[i].toUnderType();
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<FromDataType, DataTypeUUID> != std::is_same_v<ToDataType, DataTypeUUID>)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
|
||||
|
@ -39,6 +39,9 @@ struct HasTokenImpl
|
||||
if (start_pos != nullptr)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function '{}' does not support start_pos argument", name);
|
||||
|
||||
if (pattern.empty())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Needle cannot be empty, because empty string isn't a token");
|
||||
|
||||
if (haystack_offsets.empty())
|
||||
return;
|
||||
|
||||
|
@ -133,8 +133,6 @@ struct LowerUpperUTF8Impl
|
||||
}
|
||||
else
|
||||
{
|
||||
static const Poco::UTF8Encoding utf8;
|
||||
|
||||
size_t src_sequence_length = UTF8::seqLength(*src);
|
||||
/// In case partial buffer was passed (due to SSE optimization)
|
||||
/// we cannot convert it with current src_end, but we may have more
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/** Transform-type wrapper for DateTime64, simplifies DateTime64 support for given Transform.
|
||||
/** Tansform-type wrapper for DateTime64, simplifies DateTime64 support for given Transform.
|
||||
*
|
||||
* Depending on what overloads of Transform::execute() are available, when called with DateTime64 value,
|
||||
* invokes Transform::execute() with either:
|
||||
@ -80,10 +80,7 @@ public:
|
||||
}
|
||||
else
|
||||
{
|
||||
auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
|
||||
if (t.value < 0 && components.fractional)
|
||||
--components.whole;
|
||||
|
||||
const auto components = DecimalUtils::splitWithScaleMultiplier(t, scale_multiplier);
|
||||
return wrapped_transform.execute(static_cast<Int64>(components.whole), std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Core/Types_fwd.h>
|
||||
#include <DataTypes/Serializations/ISerialization.h>
|
||||
#include <Functions/castTypeToEither.h>
|
||||
|
161
src/Functions/array/arrayJaccardIndex.cpp
Normal file
161
src/Functions/array/arrayJaccardIndex.cpp
Normal file
@ -0,0 +1,161 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypeNothing.h>
|
||||
#include <DataTypes/getMostSubtype.h>
|
||||
#include <Core/ColumnsWithTypeAndName.h>
|
||||
#include <Core/ColumnWithTypeAndName.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
#include <base/types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
class FunctionArrayJaccardIndex : public IFunction
|
||||
{
|
||||
private:
|
||||
using ResultType = Float64;
|
||||
|
||||
struct LeftAndRightSizes
|
||||
{
|
||||
size_t left_size;
|
||||
size_t right_size;
|
||||
};
|
||||
|
||||
template <bool left_is_const, bool right_is_const>
|
||||
static LeftAndRightSizes getArraySizes(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t i)
|
||||
{
|
||||
size_t left_size;
|
||||
size_t right_size;
|
||||
|
||||
if constexpr (left_is_const)
|
||||
left_size = left_offsets[0];
|
||||
else
|
||||
left_size = left_offsets[i] - left_offsets[i - 1];
|
||||
|
||||
if constexpr (right_is_const)
|
||||
right_size = right_offsets[0];
|
||||
else
|
||||
right_size = right_offsets[i] - right_offsets[i - 1];
|
||||
|
||||
return {left_size, right_size};
|
||||
}
|
||||
|
||||
template <bool left_is_const, bool right_is_const>
|
||||
static void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray<ResultType> & res)
|
||||
{
|
||||
for (size_t i = 0; i < res.size(); ++i)
|
||||
{
|
||||
LeftAndRightSizes sizes = getArraySizes<left_is_const, right_is_const>(left_offsets, right_offsets, i);
|
||||
size_t intersect_size = intersect_offsets[i] - intersect_offsets[i - 1];
|
||||
res[i] = static_cast<ResultType>(intersect_size) / (sizes.left_size + sizes.right_size - intersect_size);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool left_is_const, bool right_is_const>
|
||||
static void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray<ResultType> & res)
|
||||
{
|
||||
for (size_t i = 0; i < res.size(); ++i)
|
||||
{
|
||||
LeftAndRightSizes sizes = getArraySizes<left_is_const, right_is_const>(left_offsets, right_offsets, i);
|
||||
if (sizes.left_size == 0 && sizes.right_size == 0)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "array aggregate functions cannot be performed on two empty arrays");
|
||||
res[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
static constexpr auto name = "arrayJaccardIndex";
|
||||
String getName() const override { return name; }
|
||||
static FunctionPtr create(ContextPtr context_) { return std::make_shared<FunctionArrayJaccardIndex>(context_); }
|
||||
explicit FunctionArrayJaccardIndex(ContextPtr context_) : context(context_) {}
|
||||
size_t getNumberOfArguments() const override { return 2; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
FunctionArgumentDescriptors args{
|
||||
{"array_1", &isArray<IDataType>, nullptr, "Array"},
|
||||
{"array_2", &isArray<IDataType>, nullptr, "Array"},
|
||||
};
|
||||
validateFunctionArgumentTypes(*this, arguments, args);
|
||||
return std::make_shared<DataTypeNumber<ResultType>>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
auto cast_to_array = [&](const ColumnWithTypeAndName & col) -> std::pair<const ColumnArray *, bool>
|
||||
{
|
||||
if (const ColumnConst * col_const = typeid_cast<const ColumnConst *>(col.column.get()))
|
||||
{
|
||||
const ColumnArray * col_const_array = checkAndGetColumn<ColumnArray>(col_const->getDataColumnPtr().get());
|
||||
return {col_const_array, true};
|
||||
}
|
||||
else if (const ColumnArray * col_non_const_array = checkAndGetColumn<ColumnArray>(col.column.get()))
|
||||
return {col_non_const_array, false};
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument for function {} must be array but it has type {}.", col.column->getName(), getName());
|
||||
};
|
||||
|
||||
const auto & [left_array, left_is_const] = cast_to_array(arguments[0]);
|
||||
const auto & [right_array, right_is_const] = cast_to_array(arguments[1]);
|
||||
|
||||
auto intersect_array = FunctionFactory::instance().get("arrayIntersect", context)->build(arguments);
|
||||
|
||||
ColumnWithTypeAndName intersect_column;
|
||||
intersect_column.type = intersect_array->getResultType();
|
||||
intersect_column.column = intersect_array->execute(arguments, intersect_column.type, input_rows_count);
|
||||
|
||||
const auto * intersect_column_type = checkAndGetDataType<DataTypeArray>(intersect_column.type.get());
|
||||
if (!intersect_column_type)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected return type for function arrayIntersect");
|
||||
|
||||
auto col_res = ColumnVector<ResultType>::create();
|
||||
typename ColumnVector<ResultType>::Container & vec_res = col_res->getData();
|
||||
vec_res.resize(input_rows_count);
|
||||
|
||||
#define EXECUTE_VECTOR(left_is_const, right_is_const) \
|
||||
if (typeid_cast<const DataTypeNothing *>(intersect_column_type->getNestedType().get())) \
|
||||
vectorWithEmptyIntersect<left_is_const, right_is_const>(left_array->getOffsets(), right_array->getOffsets(), vec_res); \
|
||||
else \
|
||||
{ \
|
||||
const ColumnArray * intersect_column_array = checkAndGetColumn<ColumnArray>(intersect_column.column.get()); \
|
||||
vector<left_is_const, right_is_const>(intersect_column_array->getOffsets(), left_array->getOffsets(), right_array->getOffsets(), vec_res); \
|
||||
}
|
||||
|
||||
if (!left_is_const && !right_is_const)
|
||||
EXECUTE_VECTOR(false, false)
|
||||
else if (!left_is_const && right_is_const)
|
||||
EXECUTE_VECTOR(false, true)
|
||||
else if (left_is_const && !right_is_const)
|
||||
EXECUTE_VECTOR(true, false)
|
||||
else
|
||||
EXECUTE_VECTOR(true, true)
|
||||
|
||||
#undef EXECUTE_VECTOR
|
||||
|
||||
return col_res;
|
||||
}
|
||||
|
||||
private:
|
||||
ContextPtr context;
|
||||
};
|
||||
|
||||
REGISTER_FUNCTION(ArrayJaccardIndex)
|
||||
{
|
||||
factory.registerFunction<FunctionArrayJaccardIndex>();
|
||||
}
|
||||
|
||||
}
|
@ -5,7 +5,6 @@
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
|
@ -101,6 +101,7 @@ It is ok to have ASCII NUL bytes in strings, and they will be counted as well.
|
||||
.categories{"String", "Array"}
|
||||
},
|
||||
FunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("OCTET_LENGTH", "length", FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -3,9 +3,12 @@
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNothing.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <numeric>
|
||||
@ -21,6 +24,7 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
|
||||
@ -43,6 +47,7 @@ private:
|
||||
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
bool isVariadic() const override { return true; }
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
@ -55,13 +60,18 @@ private:
|
||||
getName(), arguments.size());
|
||||
}
|
||||
|
||||
if (std::find_if (arguments.cbegin(), arguments.cend(), [](const auto & arg) { return arg->onlyNull(); }) != arguments.cend())
|
||||
return makeNullable(std::make_shared<DataTypeNothing>());
|
||||
|
||||
DataTypes arg_types;
|
||||
for (size_t i = 0, size = arguments.size(); i < size; ++i)
|
||||
{
|
||||
if (i < 2 && WhichDataType(arguments[i]).isIPv4())
|
||||
DataTypePtr type_no_nullable = removeNullable(arguments[i]);
|
||||
|
||||
if (i < 2 && WhichDataType(type_no_nullable).isIPv4())
|
||||
arg_types.emplace_back(std::make_shared<DataTypeUInt32>());
|
||||
else if (isInteger(arguments[i]))
|
||||
arg_types.push_back(arguments[i]);
|
||||
else if (isInteger(type_no_nullable))
|
||||
arg_types.push_back(type_no_nullable);
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
|
||||
arguments[i]->getName(), getName());
|
||||
@ -376,6 +386,10 @@ private:
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
NullPresence null_presence = getNullPresense(arguments);
|
||||
if (null_presence.has_null_constant)
|
||||
return result_type->createColumnConstWithDefaultValue(input_rows_count);
|
||||
|
||||
DataTypePtr elem_type = checkAndGetDataType<DataTypeArray>(result_type.get())->getNestedType();
|
||||
WhichDataType which(elem_type);
|
||||
|
||||
@ -386,10 +400,31 @@ private:
|
||||
"for unsigned/signed integers up to 64 bit", getName());
|
||||
}
|
||||
|
||||
auto throwIfNullValue = [&](const ColumnWithTypeAndName & col)
|
||||
{
|
||||
if (!col.type->isNullable())
|
||||
return;
|
||||
const ColumnNullable * nullable_col = checkAndGetColumn<ColumnNullable>(*col.column);
|
||||
if (!nullable_col)
|
||||
nullable_col = checkAndGetColumnConstData<ColumnNullable>(col.column.get());
|
||||
if (!nullable_col)
|
||||
return;
|
||||
const auto & null_map = nullable_col->getNullMapData();
|
||||
if (!memoryIsZero(null_map.data(), 0, null_map.size()))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal (null) value column {} of argument of function {}", col.column->getName(), getName());
|
||||
};
|
||||
|
||||
ColumnPtr res;
|
||||
if (arguments.size() == 1)
|
||||
{
|
||||
throwIfNullValue(arguments[0]);
|
||||
const auto * col = arguments[0].column.get();
|
||||
if (arguments[0].type->isNullable())
|
||||
{
|
||||
const auto * nullable = checkAndGetColumn<ColumnNullable>(*arguments[0].column);
|
||||
col = nullable->getNestedColumnPtr().get();
|
||||
}
|
||||
|
||||
if (!((res = executeInternal<UInt8>(col)) || (res = executeInternal<UInt16>(col)) || (res = executeInternal<UInt32>(col))
|
||||
|| (res = executeInternal<UInt64>(col)) || (res = executeInternal<Int8>(col)) || (res = executeInternal<Int16>(col))
|
||||
|| (res = executeInternal<Int32>(col)) || (res = executeInternal<Int64>(col))))
|
||||
@ -404,6 +439,7 @@ private:
|
||||
|
||||
for (size_t i = 0; i < arguments.size(); ++i)
|
||||
{
|
||||
throwIfNullValue(arguments[i]);
|
||||
if (i == 1)
|
||||
columns_holder[i] = castColumn(arguments[i], elem_type)->convertToFullColumnIfConst();
|
||||
else
|
||||
|
@ -1,6 +1,5 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/GatherUtils/Algorithms.h>
|
||||
|
@ -174,13 +174,12 @@ public:
|
||||
{
|
||||
auto res = static_cast<Int64>(transform_y.execute(y, timezone_y))
|
||||
- static_cast<Int64>(transform_x.execute(x, timezone_x));
|
||||
DateTimeComponentsWithFractionalPart a_comp;
|
||||
DateTimeComponentsWithFractionalPart b_comp;
|
||||
DateLUTImpl::DateTimeComponents a_comp;
|
||||
DateLUTImpl::DateTimeComponents b_comp;
|
||||
Int64 adjust_value;
|
||||
auto x_microseconds = TransformDateTime64<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
|
||||
auto y_microseconds = TransformDateTime64<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
|
||||
|
||||
if (x_microseconds <= y_microseconds)
|
||||
auto x_seconds = TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
|
||||
auto y_seconds = TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
|
||||
if (x_seconds <= y_seconds)
|
||||
{
|
||||
a_comp = TransformDateTime64<ToDateTimeComponentsImpl>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
|
||||
b_comp = TransformDateTime64<ToDateTimeComponentsImpl>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
|
||||
@ -193,16 +192,14 @@ public:
|
||||
adjust_value = 1;
|
||||
}
|
||||
|
||||
|
||||
if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeYearNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.date.month > b_comp.date.month)
|
||||
|| ((a_comp.date.month == b_comp.date.month) && ((a_comp.date.day > b_comp.date.day)
|
||||
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
|
||||
)))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeQuarterNumImpl<ResultPrecision::Extended>>>)
|
||||
@ -213,9 +210,8 @@ public:
|
||||
|| ((x_month_in_quarter == y_month_in_quarter) && ((a_comp.date.day > b_comp.date.day)
|
||||
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
|
||||
)))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeMonthNumImpl<ResultPrecision::Extended>>>)
|
||||
@ -223,9 +219,8 @@ public:
|
||||
if ((a_comp.date.day > b_comp.date.day)
|
||||
|| ((a_comp.date.day == b_comp.date.day) && ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second))))
|
||||
)))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeWeekNumImpl<ResultPrecision::Extended>>>)
|
||||
@ -235,44 +230,25 @@ public:
|
||||
if ((x_day_of_week > y_day_of_week)
|
||||
|| ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour))
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeDayNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.time.hour > b_comp.time.hour)
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeHourNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.time.minute > b_comp.time.minute)
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))))
|
||||
|| ((a_comp.time.minute == b_comp.time.minute) && (a_comp.time.second > b_comp.time.second)))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.time.second > b_comp.time.second)
|
||||
|| ((a_comp.time.second == b_comp.time.second) && ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeSecondNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
if ((a_comp.millisecond > b_comp.millisecond)
|
||||
|| ((a_comp.millisecond == b_comp.millisecond) && (a_comp.microsecond > b_comp.microsecond)))
|
||||
res += adjust_value;
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeSubsecondNumImpl<1000>>>)
|
||||
{
|
||||
if (a_comp.microsecond > b_comp.microsecond)
|
||||
if (a_comp.time.second > b_comp.time.second)
|
||||
res += adjust_value;
|
||||
}
|
||||
return res;
|
||||
@ -397,10 +373,6 @@ public:
|
||||
impl.template dispatchForColumns<ToRelativeMinuteNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else if (unit == "second" || unit == "ss" || unit == "s")
|
||||
impl.template dispatchForColumns<ToRelativeSecondNumImpl<ResultPrecision::Extended>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else if (unit == "millisecond" || unit == "ms")
|
||||
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<millisecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else if (unit == "microsecond" || unit == "us" || unit == "u")
|
||||
impl.template dispatchForColumns<ToRelativeSubsecondNumImpl<microsecond_multiplier>>(x, y, timezone_x, timezone_y, res->getData());
|
||||
else
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Function {} does not support '{}' unit", getName(), unit);
|
||||
|
@ -2,7 +2,6 @@
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Core/ColumnNumbers.h>
|
||||
|
||||
|
||||
|
66
src/Functions/initcap.cpp
Normal file
66
src/Functions/initcap.cpp
Normal file
@ -0,0 +1,66 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace
|
||||
{
|
||||
|
||||
struct InitcapImpl
|
||||
{
|
||||
static void vector(const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
if (data.empty())
|
||||
return;
|
||||
res_data.resize(data.size());
|
||||
res_offsets.assign(offsets);
|
||||
array(data.data(), data.data() + data.size(), res_data.data());
|
||||
}
|
||||
|
||||
static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data)
|
||||
{
|
||||
res_data.resize(data.size());
|
||||
array(data.data(), data.data() + data.size(), res_data.data());
|
||||
}
|
||||
|
||||
private:
|
||||
static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst)
|
||||
{
|
||||
bool prev_alphanum = false;
|
||||
|
||||
for (; src < src_end; ++src, ++dst)
|
||||
{
|
||||
char c = *src;
|
||||
bool alphanum = isAlphaNumericASCII(c);
|
||||
if (alphanum && !prev_alphanum)
|
||||
if (isAlphaASCII(c))
|
||||
*dst = toUpperIfAlphaASCII(c);
|
||||
else
|
||||
*dst = c;
|
||||
else if (isAlphaASCII(c))
|
||||
*dst = toLowerIfAlphaASCII(c);
|
||||
else
|
||||
*dst = c;
|
||||
prev_alphanum = alphanum;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct NameInitcap
|
||||
{
|
||||
static constexpr auto name = "initcap";
|
||||
};
|
||||
using FunctionInitcap = FunctionStringToString<InitcapImpl, NameInitcap>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(Initcap)
|
||||
{
|
||||
factory.registerFunction<FunctionInitcap>({}, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
114
src/Functions/initcapUTF8.cpp
Normal file
114
src/Functions/initcapUTF8.cpp
Normal file
@ -0,0 +1,114 @@
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionStringToString.h>
|
||||
#include <Functions/LowerUpperUTF8Impl.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Poco/Unicode.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
struct InitcapUTF8Impl
|
||||
{
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
if (data.empty())
|
||||
return;
|
||||
res_data.resize(data.size());
|
||||
res_offsets.assign(offsets);
|
||||
array(data.data(), data.data() + data.size(), offsets, res_data.data());
|
||||
}
|
||||
|
||||
[[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function initcapUTF8 cannot work with FixedString argument");
|
||||
}
|
||||
|
||||
static void processCodePoint(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool& prev_alphanum)
|
||||
{
|
||||
size_t src_sequence_length = UTF8::seqLength(*src);
|
||||
auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src);
|
||||
|
||||
if (src_code_point)
|
||||
{
|
||||
bool alpha = Poco::Unicode::isAlpha(*src_code_point);
|
||||
bool alphanum = alpha || Poco::Unicode::isDigit(*src_code_point);
|
||||
|
||||
int dst_code_point = *src_code_point;
|
||||
if (alphanum && !prev_alphanum)
|
||||
{
|
||||
if (alpha)
|
||||
dst_code_point = Poco::Unicode::toUpper(*src_code_point);
|
||||
}
|
||||
else if (alpha)
|
||||
{
|
||||
dst_code_point = Poco::Unicode::toLower(*src_code_point);
|
||||
}
|
||||
prev_alphanum = alphanum;
|
||||
if (dst_code_point > 0)
|
||||
{
|
||||
size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src);
|
||||
assert(dst_sequence_length <= 4);
|
||||
|
||||
if (dst_sequence_length == src_sequence_length)
|
||||
{
|
||||
src += dst_sequence_length;
|
||||
dst += dst_sequence_length;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
*dst = *src;
|
||||
++dst;
|
||||
++src;
|
||||
prev_alphanum = false;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst)
|
||||
{
|
||||
const auto * offset_it = offsets.begin();
|
||||
const UInt8 * begin = src;
|
||||
|
||||
/// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another)
|
||||
while (src < src_end)
|
||||
{
|
||||
const UInt8 * row_end = begin + *offset_it;
|
||||
chassert(row_end >= src);
|
||||
bool prev_alphanum = false;
|
||||
while (src < row_end)
|
||||
processCodePoint(src, row_end, dst, prev_alphanum);
|
||||
++offset_it;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct NameInitcapUTF8
|
||||
{
|
||||
static constexpr auto name = "initcapUTF8";
|
||||
};
|
||||
|
||||
using FunctionInitcapUTF8 = FunctionStringToString<InitcapUTF8Impl, NameInitcapUTF8>;
|
||||
|
||||
}
|
||||
|
||||
REGISTER_FUNCTION(InitcapUTF8)
|
||||
{
|
||||
factory.registerFunction<FunctionInitcapUTF8>();
|
||||
}
|
||||
|
||||
}
|
@ -10,6 +10,7 @@
|
||||
#include <Functions/DateTimeTransforms.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/TransformDateTime64.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
|
@ -1005,8 +1005,8 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re
|
||||
}
|
||||
}
|
||||
}
|
||||
/// 9908870400 is time_t value for 2184-01-01 UTC (a bit over the last year supported by DateTime64)
|
||||
else if (whole >= 9908870400LL)
|
||||
/// 10413792000 is time_t value for 2300-01-01 UTC (a bit over the last year supported by DateTime64)
|
||||
else if (whole >= 10413792000LL)
|
||||
{
|
||||
/// Unix timestamp with subsecond precision, already scaled to integer.
|
||||
/// For disambiguation we support only time since 2001-09-09 01:46:40 UTC and less than 30 000 years in future.
|
||||
|
@ -822,8 +822,19 @@ void copyS3File(
|
||||
ThreadPoolCallbackRunner<void> schedule,
|
||||
bool for_disk_s3)
|
||||
{
|
||||
CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3};
|
||||
helper.performCopy();
|
||||
if (settings.allow_native_copy)
|
||||
{
|
||||
CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3};
|
||||
helper.performCopy();
|
||||
}
|
||||
else
|
||||
{
|
||||
auto create_read_buffer = [&]
|
||||
{
|
||||
return std::make_unique<ReadBufferFromS3>(s3_client, src_bucket, src_key, "", settings, Context::getGlobalContextInstance()->getReadSettings());
|
||||
};
|
||||
copyDataToS3File(create_read_buffer, src_offset, src_size, s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user