Merge branch 'master' into merge-filter-steps

This commit is contained in:
Nikolai Kochetov 2024-06-12 15:06:12 +02:00
commit f564ec3b69
389 changed files with 10358 additions and 2953 deletions

View File

@ -37,7 +37,6 @@ Checks: [
'-cert-oop54-cpp',
'-cert-oop57-cpp',
'-clang-analyzer-optin.core.EnumCastOutOfRange', # https://github.com/abseil/abseil-cpp/issues/1667
'-clang-analyzer-optin.performance.Padding',
'-clang-analyzer-unix.Malloc',

View File

@ -19,3 +19,7 @@ charset = utf-8
indent_style = space
indent_size = 4
trim_trailing_whitespace = true
# Some SQL results have trailing whitespace which is removed by IDEs
[tests/queries/**.reference]
trim_trailing_whitespace = false

View File

@ -1,20 +0,0 @@
---
name: Question
about: Ask a question about ClickHouse
title: ''
labels: question
assignees: ''
---
> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in [community Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
> If you still prefer GitHub issues, remove all this text and ask your question here.
**Company or project name**
Put your company name or project description here
**Question**
Your question

20
.github/ISSUE_TEMPLATE/10_question.yaml vendored Normal file
View File

@ -0,0 +1,20 @@
name: Question
description: Ask a question about ClickHouse
labels: ["question"]
body:
- type: markdown
attributes:
value: |
> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in [community Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
- type: textarea
attributes:
label: Company or project name
description: Put your company name or project description here.
validations:
required: false
- type: textarea
attributes:
label: Question
description: Please put your question here.
validations:
required: true

View File

@ -48,19 +48,17 @@ At a minimum, the following information should be added (but add more as needed)
- [ ] <!---ci_include_stateful--> Allow: Stateful tests
- [ ] <!---ci_include_integration--> Allow: Integration Tests
- [ ] <!---ci_include_performance--> Allow: Performance tests
- [ ] <!---ci_set_normal_builds--> Allow: Normal Builds
- [ ] <!---ci_set_special_builds--> Allow: Special Builds
- [ ] <!---ci_set_non_required--> Allow: All NOT Required Checks
- [ ] <!---batch_0_1--> Allow: batch 1, 2 for multi-batch jobs
- [ ] <!---batch_2_3--> Allow: batch 3, 4, 5, 6 for multi-batch jobs
---
- [ ] <!---ci_exclude_style--> Exclude: Style check
- [ ] <!---ci_exclude_fast--> Exclude: Fast test
- [ ] <!---ci_exclude_integration--> Exclude: Integration Tests
- [ ] <!---ci_exclude_stateless--> Exclude: Stateless tests
- [ ] <!---ci_exclude_stateful--> Exclude: Stateful tests
- [ ] <!---ci_exclude_performance--> Exclude: Performance tests
- [ ] <!---ci_exclude_asan--> Exclude: All with ASAN
- [ ] <!---ci_exclude_aarch64--> Exclude: All with Aarch64
- [ ] <!---ci_exclude_tsan|msan|ubsan|coverage--> Exclude: All with TSAN, MSAN, UBSAN, Coverage
- [ ] <!---ci_exclude_aarch64|release|debug--> Exclude: All with aarch64, release, debug
---
- [ ] <!---do_not_test--> Do not test
- [ ] <!---upload_all--> Upload binaries for special builds

4
.gitmodules vendored
View File

@ -161,9 +161,9 @@
[submodule "contrib/xz"]
path = contrib/xz
url = https://github.com/xz-mirror/xz
[submodule "contrib/abseil-cpp"]
[submodule "abseil"]
path = contrib/abseil-cpp
url = https://github.com/abseil/abseil-cpp
url = https://github.com/ClickHouse/abseil-cpp.git
[submodule "contrib/dragonbox"]
path = contrib/dragonbox
url = https://github.com/ClickHouse/dragonbox

2
contrib/abseil-cpp vendored

@ -1 +1 @@
Subproject commit 3bd86026c93da5a40006fd53403dff9d5f5e30e3
Subproject commit a3c4dd3e77f28b526efbb0eb394b72e29c633936

View File

@ -1,6 +1,8 @@
set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}")
# This is a minimized version of the function definition in CMake/AbseilHelpers.cmake
#
# Copyright 2017 The Abseil Authors.
#
@ -16,7 +18,6 @@ set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}")
# See the License for the specific language governing permissions and
# limitations under the License.
#
function(absl_cc_library)
cmake_parse_arguments(ABSL_CC_LIB
"DISABLE_INSTALL;PUBLIC;TESTONLY"
@ -76,6 +77,12 @@ function(absl_cc_library)
add_library(absl::${ABSL_CC_LIB_NAME} ALIAS ${_NAME})
endfunction()
# The following definitions are an amalgamation of the CMakeLists.txt files in absl/*/
# To refresh them when upgrading to a new version:
# - copy them over from upstream
# - remove calls of 'absl_cc_test'
# - remove calls of `absl_cc_library` that contain `TESTONLY`
# - append '${DIR}' to the file definitions
set(DIR ${ABSL_ROOT_DIR}/absl/algorithm)
@ -102,12 +109,12 @@ absl_cc_library(
absl::algorithm
absl::core_headers
absl::meta
absl::nullability
PUBLIC
)
set(DIR ${ABSL_ROOT_DIR}/absl/base)
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
atomic_hook
@ -146,6 +153,18 @@ absl_cc_library(
${ABSL_DEFAULT_COPTS}
)
absl_cc_library(
NAME
no_destructor
HDRS
"${DIR}/no_destructor.h"
DEPS
absl::config
absl::nullability
COPTS
${ABSL_DEFAULT_COPTS}
)
absl_cc_library(
NAME
nullability
@ -305,6 +324,8 @@ absl_cc_library(
${ABSL_DEFAULT_COPTS}
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
$<$<BOOL:${LIBRT}>:-lrt>
$<$<BOOL:${MINGW}>:-ladvapi32>
DEPS
absl::atomic_hook
absl::base_internal
@ -312,6 +333,7 @@ absl_cc_library(
absl::core_headers
absl::dynamic_annotations
absl::log_severity
absl::nullability
absl::raw_logging_internal
absl::spinlock_wait
absl::type_traits
@ -357,6 +379,7 @@ absl_cc_library(
absl::base
absl::config
absl::core_headers
absl::nullability
PUBLIC
)
@ -467,10 +490,11 @@ absl_cc_library(
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
DEPS
absl::container_common
absl::common_policy_traits
absl::compare
absl::compressed_tuple
absl::config
absl::container_common
absl::container_memory
absl::cord
absl::core_headers
@ -480,7 +504,6 @@ absl_cc_library(
absl::strings
absl::throw_delegate
absl::type_traits
absl::utility
)
# Internal-only target, do not depend on directly.
@ -523,7 +546,9 @@ absl_cc_library(
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::base_internal
absl::compressed_tuple
absl::config
absl::core_headers
absl::memory
absl::span
@ -548,18 +573,6 @@ absl_cc_library(
PUBLIC
)
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
counting_allocator
HDRS
"${DIR}/internal/counting_allocator.h"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::config
)
absl_cc_library(
NAME
flat_hash_map
@ -570,7 +583,7 @@ absl_cc_library(
DEPS
absl::container_memory
absl::core_headers
absl::hash_function_defaults
absl::hash_container_defaults
absl::raw_hash_map
absl::algorithm_container
absl::memory
@ -586,7 +599,7 @@ absl_cc_library(
${ABSL_DEFAULT_COPTS}
DEPS
absl::container_memory
absl::hash_function_defaults
absl::hash_container_defaults
absl::raw_hash_set
absl::algorithm_container
absl::core_headers
@ -604,7 +617,7 @@ absl_cc_library(
DEPS
absl::container_memory
absl::core_headers
absl::hash_function_defaults
absl::hash_container_defaults
absl::node_slot_policy
absl::raw_hash_map
absl::algorithm_container
@ -620,8 +633,9 @@ absl_cc_library(
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::container_memory
absl::core_headers
absl::hash_function_defaults
absl::hash_container_defaults
absl::node_slot_policy
absl::raw_hash_set
absl::algorithm_container
@ -629,6 +643,19 @@ absl_cc_library(
PUBLIC
)
absl_cc_library(
NAME
hash_container_defaults
HDRS
"${DIR}/hash_container_defaults.h"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::config
absl::hash_function_defaults
PUBLIC
)
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
@ -655,9 +682,11 @@ absl_cc_library(
${ABSL_DEFAULT_COPTS}
DEPS
absl::config
absl::container_common
absl::cord
absl::hash
absl::strings
absl::type_traits
PUBLIC
)
@ -703,6 +732,7 @@ absl_cc_library(
absl::base
absl::config
absl::exponential_biased
absl::no_destructor
absl::raw_logging_internal
absl::sample_recorder
absl::synchronization
@ -756,7 +786,9 @@ absl_cc_library(
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::config
absl::container_memory
absl::core_headers
absl::raw_hash_set
absl::throw_delegate
PUBLIC
@ -817,6 +849,7 @@ absl_cc_library(
DEPS
absl::config
absl::core_headers
absl::debugging_internal
absl::meta
absl::strings
absl::span
@ -931,6 +964,7 @@ absl_cc_library(
absl::crc32c
absl::config
absl::strings
absl::no_destructor
)
set(DIR ${ABSL_ROOT_DIR}/absl/debugging)
@ -954,6 +988,8 @@ absl_cc_library(
"${DIR}/stacktrace.cc"
COPTS
${ABSL_DEFAULT_COPTS}
LINKOPTS
$<$<BOOL:${EXECINFO_LIBRARY}>:${EXECINFO_LIBRARY}>
DEPS
absl::debugging_internal
absl::config
@ -980,6 +1016,7 @@ absl_cc_library(
${ABSL_DEFAULT_COPTS}
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
$<$<BOOL:${MINGW}>:-ldbghelp>
DEPS
absl::debugging_internal
absl::demangle_internal
@ -1058,8 +1095,10 @@ absl_cc_library(
demangle_internal
HDRS
"${DIR}/internal/demangle.h"
"${DIR}/internal/demangle_rust.h"
SRCS
"${DIR}/internal/demangle.cc"
"${DIR}/internal/demangle_rust.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
@ -1252,6 +1291,7 @@ absl_cc_library(
absl::strings
absl::synchronization
absl::flat_hash_map
absl::no_destructor
)
# Internal-only target, do not depend on directly.
@ -1283,12 +1323,9 @@ absl_cc_library(
absl_cc_library(
NAME
flags
SRCS
"${DIR}/flag.cc"
HDRS
"${DIR}/declare.h"
"${DIR}/flag.h"
"${DIR}/internal/flag_msvc.inc"
COPTS
${ABSL_DEFAULT_COPTS}
LINKOPTS
@ -1299,7 +1336,6 @@ absl_cc_library(
absl::flags_config
absl::flags_internal
absl::flags_reflection
absl::base
absl::core_headers
absl::strings
)
@ -1379,6 +1415,9 @@ absl_cc_library(
absl::synchronization
)
############################################################################
# Unit tests in alphabetical order.
set(DIR ${ABSL_ROOT_DIR}/absl/functional)
absl_cc_library(
@ -1431,6 +1470,18 @@ absl_cc_library(
PUBLIC
)
absl_cc_library(
NAME
overload
HDRS
"${DIR}/overload.h"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::meta
PUBLIC
)
set(DIR ${ABSL_ROOT_DIR}/absl/hash)
absl_cc_library(
@ -1640,6 +1691,7 @@ absl_cc_library(
absl::log_internal_conditions
absl::log_internal_message
absl::log_internal_strip
absl::absl_vlog_is_on
)
absl_cc_library(
@ -1721,6 +1773,7 @@ absl_cc_library(
absl::log_entry
absl::log_severity
absl::log_sink
absl::no_destructor
absl::raw_logging_internal
absl::synchronization
absl::span
@ -1771,6 +1824,7 @@ absl_cc_library(
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
DEPS
absl::core_headers
absl::log_internal_message
absl::log_internal_nullstream
absl::log_severity
@ -1876,6 +1930,11 @@ absl_cc_library(
PUBLIC
)
# Warning: Many linkers will strip the contents of this library because its
# symbols are only used in a global constructor. A workaround is for clients
# to link this using $<LINK_LIBRARY:WHOLE_ARCHIVE,absl::log_flags> instead of
# the plain absl::log_flags.
# TODO(b/320467376): Implement the equivalent of Bazel's alwayslink=True.
absl_cc_library(
NAME
log_flags
@ -1897,6 +1956,7 @@ absl_cc_library(
absl::flags
absl::flags_marshalling
absl::strings
absl::vlog_config_internal
PUBLIC
)
@ -1919,6 +1979,7 @@ absl_cc_library(
absl::log_severity
absl::raw_logging_internal
absl::strings
absl::vlog_config_internal
)
absl_cc_library(
@ -1952,6 +2013,7 @@ absl_cc_library(
${ABSL_DEFAULT_LINKOPTS}
DEPS
absl::log_internal_log_impl
absl::vlog_is_on
PUBLIC
)
@ -2064,21 +2126,75 @@ absl_cc_library(
)
absl_cc_library(
NAME
log_internal_fnmatch
SRCS
"${DIR}/internal/fnmatch.cc"
HDRS
"${DIR}/internal/fnmatch.h"
COPTS
${ABSL_DEFAULT_COPTS}
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
DEPS
absl::config
absl::strings
NAME
vlog_config_internal
SRCS
"${DIR}/internal/vlog_config.cc"
HDRS
"${DIR}/internal/vlog_config.h"
COPTS
${ABSL_DEFAULT_COPTS}
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
DEPS
absl::base
absl::config
absl::core_headers
absl::log_internal_fnmatch
absl::memory
absl::no_destructor
absl::strings
absl::synchronization
absl::optional
)
absl_cc_library(
NAME
absl_vlog_is_on
COPTS
${ABSL_DEFAULT_COPTS}
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
HDRS
"${DIR}/absl_vlog_is_on.h"
DEPS
absl::vlog_config_internal
absl::config
absl::core_headers
absl::strings
)
absl_cc_library(
NAME
vlog_is_on
COPTS
${ABSL_DEFAULT_COPTS}
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
HDRS
"${DIR}/vlog_is_on.h"
DEPS
absl::absl_vlog_is_on
)
absl_cc_library(
NAME
log_internal_fnmatch
SRCS
"${DIR}/internal/fnmatch.cc"
HDRS
"${DIR}/internal/fnmatch.h"
COPTS
${ABSL_DEFAULT_COPTS}
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
DEPS
absl::config
absl::strings
)
# Test targets
set(DIR ${ABSL_ROOT_DIR}/absl/memory)
absl_cc_library(
@ -2147,6 +2263,7 @@ absl_cc_library(
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::compare
absl::config
absl::core_headers
absl::bits
@ -2176,6 +2293,8 @@ absl_cc_library(
PUBLIC
)
set(DIR ${ABSL_ROOT_DIR}/absl/profiling)
absl_cc_library(
NAME
sample_recorder
@ -2188,8 +2307,6 @@ absl_cc_library(
absl::synchronization
)
set(DIR ${ABSL_ROOT_DIR}/absl/profiling)
absl_cc_library(
NAME
exponential_biased
@ -2265,6 +2382,7 @@ absl_cc_library(
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
DEPS
absl::config
absl::fast_type_id
absl::optional
)
@ -2336,11 +2454,13 @@ absl_cc_library(
DEPS
absl::config
absl::inlined_vector
absl::nullability
absl::random_internal_pool_urbg
absl::random_internal_salted_seed_seq
absl::random_internal_seed_material
absl::random_seed_gen_exception
absl::span
absl::string_view
)
# Internal-only target, do not depend on directly.
@ -2399,6 +2519,7 @@ absl_cc_library(
${ABSL_DEFAULT_COPTS}
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
$<$<BOOL:${MINGW}>:-lbcrypt>
DEPS
absl::core_headers
absl::optional
@ -2658,6 +2779,29 @@ absl_cc_library(
absl::config
)
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
random_internal_distribution_test_util
SRCS
"${DIR}/internal/chi_square.cc"
"${DIR}/internal/distribution_test_util.cc"
HDRS
"${DIR}/internal/chi_square.h"
"${DIR}/internal/distribution_test_util.h"
COPTS
${ABSL_DEFAULT_COPTS}
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
DEPS
absl::config
absl::core_headers
absl::raw_logging_internal
absl::strings
absl::str_format
absl::span
)
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
@ -2699,6 +2843,8 @@ absl_cc_library(
absl::function_ref
absl::inlined_vector
absl::memory
absl::no_destructor
absl::nullability
absl::optional
absl::raw_logging_internal
absl::span
@ -2724,8 +2870,11 @@ absl_cc_library(
absl::base
absl::config
absl::core_headers
absl::has_ostream_operator
absl::nullability
absl::raw_logging_internal
absl::status
absl::str_format
absl::strings
absl::type_traits
absl::utility
@ -2748,6 +2897,7 @@ absl_cc_library(
absl::base
absl::config
absl::core_headers
absl::nullability
absl::throw_delegate
PUBLIC
)
@ -2762,6 +2912,7 @@ absl_cc_library(
"${DIR}/has_absl_stringify.h"
"${DIR}/internal/damerau_levenshtein_distance.h"
"${DIR}/internal/string_constant.h"
"${DIR}/internal/has_absl_stringify.h"
"${DIR}/match.h"
"${DIR}/numbers.h"
"${DIR}/str_cat.h"
@ -2805,6 +2956,7 @@ absl_cc_library(
absl::endian
absl::int128
absl::memory
absl::nullability
absl::raw_logging_internal
absl::throw_delegate
absl::type_traits
@ -2824,6 +2976,18 @@ absl_cc_library(
PUBLIC
)
absl_cc_library(
NAME
has_ostream_operator
HDRS
"${DIR}/has_ostream_operator.h"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::config
PUBLIC
)
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
@ -2855,7 +3019,12 @@ absl_cc_library(
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::config
absl::core_headers
absl::nullability
absl::span
absl::str_format_internal
absl::string_view
PUBLIC
)
@ -2886,6 +3055,7 @@ absl_cc_library(
absl::strings
absl::config
absl::core_headers
absl::fixed_array
absl::inlined_vector
absl::numeric_representation
absl::type_traits
@ -2989,6 +3159,7 @@ absl_cc_library(
DEPS
absl::base
absl::config
absl::no_destructor
absl::raw_logging_internal
absl::synchronization
)
@ -3079,6 +3250,7 @@ absl_cc_library(
absl::endian
absl::function_ref
absl::inlined_vector
absl::nullability
absl::optional
absl::raw_logging_internal
absl::span
@ -3246,6 +3418,8 @@ absl_cc_library(
${ABSL_DEFAULT_COPTS}
DEPS
Threads::Threads
# TODO(#1495): Use $<LINK_LIBRARY:FRAMEWORK,CoreFoundation> once our
# minimum CMake version >= 3.24
$<$<PLATFORM_ID:Darwin>:-Wl,-framework,CoreFoundation>
)
@ -3286,8 +3460,8 @@ absl_cc_library(
NAME
bad_any_cast_impl
SRCS
"${DIR}/bad_any_cast.h"
"${DIR}/bad_any_cast.cc"
"${DIR}/bad_any_cast.h"
"${DIR}/bad_any_cast.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
@ -3307,6 +3481,7 @@ absl_cc_library(
DEPS
absl::algorithm
absl::core_headers
absl::nullability
absl::throw_delegate
absl::type_traits
PUBLIC
@ -3327,6 +3502,7 @@ absl_cc_library(
absl::config
absl::core_headers
absl::memory
absl::nullability
absl::type_traits
absl::utility
PUBLIC
@ -3389,6 +3565,7 @@ absl_cc_library(
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::config
absl::core_headers
absl::type_traits
PUBLIC

2
contrib/cld2 vendored

@ -1 +1 @@
Subproject commit bc6d493a2f64ed1fc1c4c4b4294a542a04e04217
Subproject commit 217ba8b8805b41557faadaa47bb6e99f2242eea3

2
contrib/googletest vendored

@ -1 +1 @@
Subproject commit e47544ad31cb3ceecd04cc13e8fe556f8df9fe0b
Subproject commit a7f443b80b105f940225332ed3c31f2790092f47

2
contrib/orc vendored

@ -1 +1 @@
Subproject commit e24f2c2a3ca0769c96704ab20ad6f512a83ea2ad
Subproject commit 947cebaf9432d708253ac08dc3012daa6b4ede6f

View File

@ -41,8 +41,7 @@
"docker/test/stateless": {
"name": "clickhouse/stateless-test",
"dependent": [
"docker/test/stateful",
"docker/test/unit"
"docker/test/stateful"
]
},
"docker/test/stateful": {
@ -122,15 +121,16 @@
"docker/test/base": {
"name": "clickhouse/test-base",
"dependent": [
"docker/test/clickbench",
"docker/test/fuzzer",
"docker/test/libfuzzer",
"docker/test/integration/base",
"docker/test/keeper-jepsen",
"docker/test/libfuzzer",
"docker/test/server-jepsen",
"docker/test/sqllogic",
"docker/test/sqltest",
"docker/test/clickbench",
"docker/test/stateless"
"docker/test/stateless",
"docker/test/unit"
]
},
"docker/test/integration/kerberized_hadoop": {

View File

@ -285,7 +285,7 @@ stop_logs_replication
# Try to get logs while server is running
failed_to_save_logs=0
for table in query_log zookeeper_log trace_log transactions_info_log metric_log
for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log
do
err=$(clickhouse-client -q "select * from system.$table into outfile '/test_output/$table.tsv.gz' format TSVWithNamesAndTypes")
echo "$err"
@ -339,7 +339,7 @@ if [ $failed_to_save_logs -ne 0 ]; then
# directly
# - even though ci auto-compress some files (but not *.tsv) it does this only
# for files >64MB, we want this files to be compressed explicitly
for table in query_log zookeeper_log trace_log transactions_info_log metric_log
for table in query_log zookeeper_log trace_log transactions_info_log metric_log blob_storage_log
do
clickhouse-local "$data_path_config" --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||:
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then

View File

@ -30,6 +30,7 @@ RUN pip3 install \
mypy==1.8.0 \
pylint==3.1.0 \
python-magic==0.4.24 \
flake8==4.0.1 \
requests \
thefuzz \
types-requests \

View File

@ -9,6 +9,8 @@ echo "Check style" | ts
./check-style -n |& tee /test_output/style_output.txt
echo "Check python formatting with black" | ts
./check-black -n |& tee /test_output/black_output.txt
echo "Check python with flake8" | ts
./check-flake8 |& tee /test_output/flake8_output.txt
echo "Check python type hinting with mypy" | ts
./check-mypy -n |& tee /test_output/mypy_output.txt
echo "Check typos" | ts

View File

@ -1,9 +1,7 @@
# rebuild in #33610
# docker build -t clickhouse/unit-test .
ARG FROM_TAG=latest
FROM clickhouse/stateless-test:$FROM_TAG
RUN apt-get install gdb
FROM clickhouse/test-base:$FROM_TAG
COPY run.sh /
CMD ["/bin/bash", "/run.sh"]

View File

@ -25,7 +25,8 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
./setup_minio.sh stateless # to have a proper environment
echo "Get previous release tag"
previous_release_tag=$(dpkg --info package_folder/clickhouse-client*.deb | grep "Version: " | awk '{print $2}' | cut -f1 -d'+' | get_previous_release_tag)
# shellcheck disable=SC2016
previous_release_tag=$(dpkg-deb --showformat='${Version}' --show package_folder/clickhouse-client*.deb | get_previous_release_tag)
echo $previous_release_tag
echo "Clone previous release repository"

View File

@ -0,0 +1,101 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.4.2.141-stable (9e23d27bd11) FIXME as compared to v24.4.1.2088-stable (6d4b31322d1)
#### Improvement
* Backported in [#63467](https://github.com/ClickHouse/ClickHouse/issues/63467): Make rabbitmq nack broken messages. Closes [#45350](https://github.com/ClickHouse/ClickHouse/issues/45350). [#60312](https://github.com/ClickHouse/ClickHouse/pull/60312) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### Build/Testing/Packaging Improvement
* Backported in [#63612](https://github.com/ClickHouse/ClickHouse/issues/63612): The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#64279](https://github.com/ClickHouse/ClickHouse/issues/64279): Fix queries with FINAL give wrong result when table does not use adaptive granularity. [#62432](https://github.com/ClickHouse/ClickHouse/pull/62432) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#63295](https://github.com/ClickHouse/ClickHouse/issues/63295): Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#63978](https://github.com/ClickHouse/ClickHouse/issues/63978): Fix intersect parts when restart after drop range. [#63202](https://github.com/ClickHouse/ClickHouse/pull/63202) ([Han Fei](https://github.com/hanfei1991)).
* Backported in [#63413](https://github.com/ClickHouse/ClickHouse/issues/63413): Fix a misbehavior when SQL security defaults don't load for old tables during server startup. [#63209](https://github.com/ClickHouse/ClickHouse/pull/63209) ([pufit](https://github.com/pufit)).
* Backported in [#63388](https://github.com/ClickHouse/ClickHouse/issues/63388): JOIN filter push down filled join fix. Closes [#63228](https://github.com/ClickHouse/ClickHouse/issues/63228). [#63234](https://github.com/ClickHouse/ClickHouse/pull/63234) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#63618](https://github.com/ClickHouse/ClickHouse/issues/63618): Fix bug which could potentially lead to rare LOGICAL_ERROR during SELECT query with message: `Unexpected return type from materialize. Expected type_XXX. Got type_YYY.` Introduced in [#59379](https://github.com/ClickHouse/ClickHouse/issues/59379). [#63353](https://github.com/ClickHouse/ClickHouse/pull/63353) ([alesapin](https://github.com/alesapin)).
* Backported in [#63451](https://github.com/ClickHouse/ClickHouse/issues/63451): Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)).
* Backported in [#63605](https://github.com/ClickHouse/ClickHouse/issues/63605): Fix backup of projection part in case projection was removed from table metadata, but part still has projection. [#63426](https://github.com/ClickHouse/ClickHouse/pull/63426) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#63510](https://github.com/ClickHouse/ClickHouse/issues/63510): Fix 'Every derived table must have its own alias' error for MYSQL dictionary source, close [#63341](https://github.com/ClickHouse/ClickHouse/issues/63341). [#63481](https://github.com/ClickHouse/ClickHouse/pull/63481) ([vdimir](https://github.com/vdimir)).
* Backported in [#63592](https://github.com/ClickHouse/ClickHouse/issues/63592): Avoid segafult in `MergeTreePrefetchedReadPool` while fetching projection parts. [#63513](https://github.com/ClickHouse/ClickHouse/pull/63513) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#63750](https://github.com/ClickHouse/ClickHouse/issues/63750): Read only the necessary columns from VIEW (new analyzer). Closes [#62594](https://github.com/ClickHouse/ClickHouse/issues/62594). [#63688](https://github.com/ClickHouse/ClickHouse/pull/63688) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#63772](https://github.com/ClickHouse/ClickHouse/issues/63772): Fix [#63539](https://github.com/ClickHouse/ClickHouse/issues/63539). Forbid WINDOW redefinition in new analyzer. [#63694](https://github.com/ClickHouse/ClickHouse/pull/63694) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#63872](https://github.com/ClickHouse/ClickHouse/issues/63872): Flatten_nested is broken with replicated database. [#63695](https://github.com/ClickHouse/ClickHouse/pull/63695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#63854](https://github.com/ClickHouse/ClickHouse/issues/63854): Fix `Not found column` and `CAST AS Map from array requires nested tuple of 2 elements` exceptions for distributed queries which use `Map(Nothing, Nothing)` type. Fixes [#63637](https://github.com/ClickHouse/ClickHouse/issues/63637). [#63753](https://github.com/ClickHouse/ClickHouse/pull/63753) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#63847](https://github.com/ClickHouse/ClickHouse/issues/63847): Fix possible `ILLEGAL_COLUMN` error in `partial_merge` join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#63755](https://github.com/ClickHouse/ClickHouse/pull/63755) ([vdimir](https://github.com/vdimir)).
* Backported in [#63908](https://github.com/ClickHouse/ClickHouse/issues/63908): `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)).
* Backported in [#63955](https://github.com/ClickHouse/ClickHouse/issues/63955): Fix possible crash with SYSTEM UNLOAD PRIMARY KEY. [#63778](https://github.com/ClickHouse/ClickHouse/pull/63778) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#63938](https://github.com/ClickHouse/ClickHouse/issues/63938): Allow JOIN filter push down to both streams if only single equivalent column is used in query. Closes [#63799](https://github.com/ClickHouse/ClickHouse/issues/63799). [#63819](https://github.com/ClickHouse/ClickHouse/pull/63819) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#63991](https://github.com/ClickHouse/ClickHouse/issues/63991): Fix incorrect select query result when parallel replicas were used to read from a Materialized View. [#63861](https://github.com/ClickHouse/ClickHouse/pull/63861) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#64033](https://github.com/ClickHouse/ClickHouse/issues/64033): Fix a error `Database name is empty` for remote queries with lambdas over the cluster with modified default database. Fixes [#63471](https://github.com/ClickHouse/ClickHouse/issues/63471). [#63864](https://github.com/ClickHouse/ClickHouse/pull/63864) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#64561](https://github.com/ClickHouse/ClickHouse/issues/64561): Fix SIGSEGV due to CPU/Real (`query_profiler_real_time_period_ns`/`query_profiler_cpu_time_period_ns`) profiler (has been an issue since 2022, that leads to periodic server crashes, especially if you were using distributed engine). [#63865](https://github.com/ClickHouse/ClickHouse/pull/63865) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#64011](https://github.com/ClickHouse/ClickHouse/issues/64011): Fix analyzer - IN function with arbitrary deep sub-selects in materialized view to use insertion block. [#63930](https://github.com/ClickHouse/ClickHouse/pull/63930) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#64238](https://github.com/ClickHouse/ClickHouse/issues/64238): Fix resolve of unqualified COLUMNS matcher. Preserve the input columns order and forbid usage of unknown identifiers. [#63962](https://github.com/ClickHouse/ClickHouse/pull/63962) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#64103](https://github.com/ClickHouse/ClickHouse/issues/64103): Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#64170](https://github.com/ClickHouse/ClickHouse/issues/64170): Add missing settings to recoverLostReplica. [#64040](https://github.com/ClickHouse/ClickHouse/pull/64040) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#64322](https://github.com/ClickHouse/ClickHouse/issues/64322): This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline Closes [#63777](https://github.com/ClickHouse/ClickHouse/issues/63777). [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)).
* Backported in [#64382](https://github.com/ClickHouse/ClickHouse/issues/64382): Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#64568](https://github.com/ClickHouse/ClickHouse/issues/64568): Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#64272](https://github.com/ClickHouse/ClickHouse/issues/64272): Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#64330](https://github.com/ClickHouse/ClickHouse/issues/64330): The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#64254](https://github.com/ClickHouse/ClickHouse/issues/64254): Ignore `text_log` config when using Keeper. [#64218](https://github.com/ClickHouse/ClickHouse/pull/64218) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#64690](https://github.com/ClickHouse/ClickHouse/issues/64690): Fix Query Tree size validation. Closes [#63701](https://github.com/ClickHouse/ClickHouse/issues/63701). [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#64409](https://github.com/ClickHouse/ClickHouse/issues/64409): Fix `Logical error: Bad cast` for `Buffer` table with `PREWHERE`. Fixes [#64172](https://github.com/ClickHouse/ClickHouse/issues/64172). [#64388](https://github.com/ClickHouse/ClickHouse/pull/64388) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#64727](https://github.com/ClickHouse/ClickHouse/issues/64727): Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#64623](https://github.com/ClickHouse/ClickHouse/issues/64623): Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#64680](https://github.com/ClickHouse/ClickHouse/issues/64680): Fix [#64612](https://github.com/ClickHouse/ClickHouse/issues/64612). Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#64942](https://github.com/ClickHouse/ClickHouse/issues/64942): Fix OrderByLimitByDuplicateEliminationVisitor across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#64871](https://github.com/ClickHouse/ClickHouse/issues/64871): Fixed memory possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)).
#### CI Fix or Improvement (changelog entry is not required)
* Backported in [#63364](https://github.com/ClickHouse/ClickHouse/issues/63364): Implement cumulative A Sync status. [#61464](https://github.com/ClickHouse/ClickHouse/pull/61464) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#63338](https://github.com/ClickHouse/ClickHouse/issues/63338): Use `/commit/` to have the URLs in [reports](https://play.clickhouse.com/play?user=play#c2VsZWN0IGRpc3RpbmN0IGNvbW1pdF91cmwgZnJvbSBjaGVja3Mgd2hlcmUgY2hlY2tfc3RhcnRfdGltZSA+PSBub3coKSAtIGludGVydmFsIDEgbW9udGggYW5kIHB1bGxfcmVxdWVzdF9udW1iZXI9NjA1MzI=) like https://github.com/ClickHouse/ClickHouse/commit/44f8bc5308b53797bec8cccc3bd29fab8a00235d and not like https://github.com/ClickHouse/ClickHouse/commits/44f8bc5308b53797bec8cccc3bd29fab8a00235d. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#63376](https://github.com/ClickHouse/ClickHouse/issues/63376):. [#63366](https://github.com/ClickHouse/ClickHouse/pull/63366) ([Aleksei Filatov](https://github.com/aalexfvk)).
* Backported in [#63571](https://github.com/ClickHouse/ClickHouse/issues/63571):. [#63551](https://github.com/ClickHouse/ClickHouse/pull/63551) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Backported in [#63651](https://github.com/ClickHouse/ClickHouse/issues/63651): Fix 02362_part_log_merge_algorithm flaky test. [#63635](https://github.com/ClickHouse/ClickHouse/pull/63635) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
* Backported in [#63828](https://github.com/ClickHouse/ClickHouse/issues/63828): Fix test_odbc_interaction from aarch64 [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63787](https://github.com/ClickHouse/ClickHouse/pull/63787) ([alesapin](https://github.com/alesapin)).
* Backported in [#63897](https://github.com/ClickHouse/ClickHouse/issues/63897): Fix test `test_catboost_evaluate` for aarch64. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63789](https://github.com/ClickHouse/ClickHouse/pull/63789) ([alesapin](https://github.com/alesapin)).
* Backported in [#63889](https://github.com/ClickHouse/ClickHouse/issues/63889): Remove HDFS from disks config for one integration test for arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63832](https://github.com/ClickHouse/ClickHouse/pull/63832) ([alesapin](https://github.com/alesapin)).
* Backported in [#63881](https://github.com/ClickHouse/ClickHouse/issues/63881): Bump version for old image in test_short_strings_aggregation to make it work on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63836](https://github.com/ClickHouse/ClickHouse/pull/63836) ([alesapin](https://github.com/alesapin)).
* Backported in [#63919](https://github.com/ClickHouse/ClickHouse/issues/63919): Disable test `test_non_default_compression/test.py::test_preconfigured_deflateqpl_codec` on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63839](https://github.com/ClickHouse/ClickHouse/pull/63839) ([alesapin](https://github.com/alesapin)).
* Backported in [#63971](https://github.com/ClickHouse/ClickHouse/issues/63971): Fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)).
* Backported in [#64049](https://github.com/ClickHouse/ClickHouse/issues/64049): Add `ClickHouseVersion.copy` method. Create a branch release in advance without spinning out the release to increase the stability. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#64078](https://github.com/ClickHouse/ClickHouse/issues/64078): The mime type is not 100% reliable for Python and shell scripts without shebangs; add a check for file extension. [#64062](https://github.com/ClickHouse/ClickHouse/pull/64062) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#64161](https://github.com/ClickHouse/ClickHouse/issues/64161): Add retries in git submodule update. [#64125](https://github.com/ClickHouse/ClickHouse/pull/64125) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
* Backported in [#64589](https://github.com/ClickHouse/ClickHouse/issues/64589): Disabled `enable_vertical_final` setting by default. This feature should not be used because it has a bug: [#64543](https://github.com/ClickHouse/ClickHouse/issues/64543). [#64544](https://github.com/ClickHouse/ClickHouse/pull/64544) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#64880](https://github.com/ClickHouse/ClickHouse/issues/64880): This PR fixes an error when a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)).
#### NO CL CATEGORY
* Backported in [#63306](https://github.com/ClickHouse/ClickHouse/issues/63306):. [#63297](https://github.com/ClickHouse/ClickHouse/pull/63297) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#63710](https://github.com/ClickHouse/ClickHouse/issues/63710):. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Backport [#64363](https://github.com/ClickHouse/ClickHouse/issues/64363) to 24.4: Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts"'. [#64905](https://github.com/ClickHouse/ClickHouse/pull/64905) ([Raúl Marín](https://github.com/Algunenano)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* group_by_use_nulls strikes back [#62922](https://github.com/ClickHouse/ClickHouse/pull/62922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Add `FROM` keyword to `TRUNCATE ALL TABLES` [#63241](https://github.com/ClickHouse/ClickHouse/pull/63241) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* More checks for concurrently deleted files and dirs in system.remote_data_paths [#63274](https://github.com/ClickHouse/ClickHouse/pull/63274) ([Alexander Gololobov](https://github.com/davenger)).
* Try fix segfault in `MergeTreeReadPoolBase::createTask` [#63323](https://github.com/ClickHouse/ClickHouse/pull/63323) ([Antonio Andelic](https://github.com/antonio2368)).
* Skip unaccessible table dirs in system.remote_data_paths [#63330](https://github.com/ClickHouse/ClickHouse/pull/63330) ([Alexander Gololobov](https://github.com/davenger)).
* Workaround for `oklch()` inside canvas bug for firefox [#63404](https://github.com/ClickHouse/ClickHouse/pull/63404) ([Sergei Trifonov](https://github.com/serxa)).
* Cancel S3 reads properly when parallel reads are used [#63687](https://github.com/ClickHouse/ClickHouse/pull/63687) ([Antonio Andelic](https://github.com/antonio2368)).
* Userspace page cache: don't collect stats if cache is unused [#63730](https://github.com/ClickHouse/ClickHouse/pull/63730) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix sanitizers [#64090](https://github.com/ClickHouse/ClickHouse/pull/64090) ([Azat Khuzhin](https://github.com/azat)).
* Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts [#64363](https://github.com/ClickHouse/ClickHouse/pull/64363) ([Kruglov Pavel](https://github.com/Avogar)).
* CI: Critical bugfix category in PR template [#64480](https://github.com/ClickHouse/ClickHouse/pull/64480) ([Max K.](https://github.com/maxknv)).

View File

@ -91,6 +91,9 @@ cd ./utils/check-style
# Check python type hinting with mypy
./check-mypy
# Check python with flake8
./check-flake8
# Check code with codespell
./check-typos

View File

@ -54,6 +54,7 @@ SELECT * FROM test_table;
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## See also

View File

@ -235,6 +235,7 @@ libhdfs3 support HDFS namenode HA.
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}

View File

@ -53,14 +53,14 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
This example uses the [docker compose recipe](https://github.com/ClickHouse/examples/tree/5fdc6ff72f4e5137e23ea075c88d3f44b0202490/docker-compose-recipes/recipes/ch-and-minio-S3), which integrates ClickHouse and MinIO. You should be able to reproduce the same queries using S3 by replacing the endpoint and authentication values.
Notice that the S3 endpoint in the `ENGINE` configuration uses the parameter token `{_partition_id}` as part of the S3 object (filename), and that the SELECT queries select against those resulting object names (e.g., `test_3.csv`).
Notice that the S3 endpoint in the `ENGINE` configuration uses the parameter token `{_partition_id}` as part of the S3 object (filename), and that the SELECT queries select against those resulting object names (e.g., `test_3.csv`).
:::note
As shown in the example, querying from S3 tables that are partitioned is
not directly supported at this time, but can be accomplished by querying the individual partitions
using the S3 table function.
The primary use-case for writing
The primary use-case for writing
partitioned data in S3 is to enable transferring that data into another
ClickHouse system (for example, moving from on-prem systems to ClickHouse
Cloud). Because ClickHouse datasets are often very large, and network
@ -78,9 +78,9 @@ CREATE TABLE p
)
ENGINE = S3(
# highlight-next-line
'http://minio:10000/clickhouse//test_{_partition_id}.csv',
'minioadmin',
'minioadminpassword',
'http://minio:10000/clickhouse//test_{_partition_id}.csv',
'minioadmin',
'minioadminpassword',
'CSV')
PARTITION BY column3
```
@ -145,6 +145,7 @@ Code: 48. DB::Exception: Received from localhost:9000. DB::Exception: Reading fr
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns).

View File

@ -267,7 +267,7 @@ For introspection use `system.s3queue` stateless table and `system.s3queue_log`
`exception` String
)
ENGINE = SystemS3Queue
COMMENT 'SYSTEM TABLE is built on the fly.' │
COMMENT 'Contains in-memory state of S3Queue metadata and currently processed rows per file.' │
└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```

View File

@ -6,35 +6,26 @@ sidebar_label: MergeTree
# MergeTree
The `MergeTree` engine and other engines of this family (`*MergeTree`) are the most commonly used and most robust ClickHouse table engines.
The `MergeTree` engine and other engines of the `MergeTree` family (e.g. `ReplacingMergeTree`, `AggregatingMergeTree` ) are the most commonly used and most robust table engines in ClickHouse.
Engines in the `MergeTree` family are designed for inserting a very large amount of data into a table. The data is quickly written to the table part by part, then rules are applied for merging the parts in the background. This method is much more efficient than continually rewriting the data in storage during insert.
`MergeTree`-family table engines are designed for high data ingest rates and huge data volumes.
Insert operations create table parts which are merged by a background process with other table parts.
Main features:
Main features of `MergeTree`-family table engines.
- Stores data sorted by primary key.
- The table's primary key determines the sort order within each table part (clustered index). The primary key also does not reference individual rows but blocks of 8192 rows called granules. This makes primary keys of huge data sets small enough to remain loaded in main memory, while still providing fast access to on-disk data.
This allows you to create a small sparse index that helps find data faster.
- Tables can be partitioned using an arbitrary partition expression. Partition pruning ensures partitions are omitted from reading when the query allows it.
- Partitions can be used if the [partitioning key](/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified.
- Data can be replicated across multiple cluster nodes for high availability, failover, and zero downtime upgrades. See [Data replication](/docs/en/engines/table-engines/mergetree-family/replication.md).
ClickHouse supports certain operations with partitions that are more efficient than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query.
- `MergeTree` table engines support various statistics kinds and sampling methods to help query optimization.
- Data replication support.
The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](/docs/en/engines/table-engines/mergetree-family/replication.md).
- Data sampling support.
If necessary, you can set the data sampling method in the table.
:::info
The [Merge](/docs/en/engines/table-engines/special/merge.md/#merge) engine does not belong to the `*MergeTree` family.
:::note
Despite a similar name, the [Merge](/docs/en/engines/table-engines/special/merge.md/#merge) engine is different from `*MergeTree` engines.
:::
If you need to update rows frequently, we recommend using the [`ReplacingMergeTree`](/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md) table engine. Using `ALTER TABLE my_table UPDATE` to update rows triggers a mutation, which causes parts to be re-written and uses IO/resources. With `ReplacingMergeTree`, you can simply insert the updated rows and the old rows will be replaced according to the table sorting key.
## Creating a Table {#table_engine-mergetree-creating-a-table}
## Creating Tables {#table_engine-mergetree-creating-a-table}
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
@ -59,23 +50,24 @@ ORDER BY expr
[SETTINGS name = value, ...]
```
For a description of parameters, see the [CREATE query description](/docs/en/sql-reference/statements/create/table.md).
For a detailed description of the parameters, see the [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md) statement
### Query Clauses {#mergetree-query-clauses}
#### ENGINE
`ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters.
`ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine has no parameters.
#### ORDER_BY
`ORDER BY` — The sorting key.
A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID + 1, EventDate)`.
ClickHouse uses the sorting key as a primary key if the primary key is not defined explicitly by the `PRIMARY KEY` clause.
If no primary key is defined (i.e. `PRIMARY KEY` was not specified), ClickHouse uses the the sorting key as primary key.
Use the `ORDER BY tuple()` syntax, if you do not need sorting, or set `create_table_empty_primary_key_by_default` to `true` to use the `ORDER BY tuple()` syntax by default. See [Selecting the Primary Key](#selecting-the-primary-key).
If no sorting is required, you can use syntax `ORDER BY tuple()`.
Alternatively, if setting `create_table_empty_primary_key_by_default` is enabled, `ORDER BY tuple()` is implicitly added to `CREATE TABLE` statements. See [Selecting a Primary Key](#selecting-a-primary-key).
#### PARTITION BY
@ -87,100 +79,32 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
`PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional.
By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause.
Specifying a sorting key (using `ORDER BY` clause) implicitly specifies a primary key.
It is usually not necessary to specify the primary key in addition to the primary key.
#### SAMPLE BY
`SAMPLE BY` — An expression for sampling. Optional.
`SAMPLE BY` — A sampling expression. Optional.
If a sampling expression is used, the primary key must contain it. The result of a sampling expression must be an unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
If specified, it must be contained in the primary key.
The sampling expression must result in an unsigned integer.
Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
#### TTL
`TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional.
`TTL` — A list of rules that specify the storage duration of rows and the logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional.
Expression must have one `Date` or `DateTime` column as a result. Example:
```
TTL date + INTERVAL 1 DAY
```
Expression must result in a `Date` or `DateTime`, e.g. `TTL date + INTERVAL 1 DAY`.
Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can be specified, but there should be no more than one `DELETE` rule.
For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl)
### SETTINGS
Additional parameters that control the behavior of the `MergeTree` (optional):
#### SETTINGS
#### index_granularity
`index_granularity` — Maximum number of data rows between the marks of an index. Default value: 8192. See [Data Storage](#mergetree-data-storage).
#### index_granularity_bytes
`index_granularity_bytes` — Maximum size of data granules in bytes. Default value: 10Mb. To restrict the granule size only by number of rows, set to 0 (not recommended). See [Data Storage](#mergetree-data-storage).
#### min_index_granularity_bytes
`min_index_granularity_bytes` — Min allowed size of data granules in bytes. Default value: 1024b. To provide a safeguard against accidentally creating tables with very low index_granularity_bytes. See [Data Storage](#mergetree-data-storage).
#### enable_mixed_granularity_parts
`enable_mixed_granularity_parts` — Enables or disables transitioning to control the granule size with the `index_granularity_bytes` setting. Before version 19.11, there was only the `index_granularity` setting for restricting granule size. The `index_granularity_bytes` setting improves ClickHouse performance when selecting data from tables with big rows (tens and hundreds of megabytes). If you have tables with big rows, you can enable this setting for the tables to improve the efficiency of `SELECT` queries.
#### use_minimalistic_part_header_in_zookeeper
`use_minimalistic_part_header_in_zookeeper` — Storage method of the data parts headers in ZooKeeper. If `use_minimalistic_part_header_in_zookeeper=1`, then ZooKeeper stores less data. For more information, see the [setting description](/docs/en/operations/server-configuration-parameters/settings.md/#server-settings-use_minimalistic_part_header_in_zookeeper) in “Server configuration parameters”.
#### min_merge_bytes_to_use_direct_io
`min_merge_bytes_to_use_direct_io` — The minimum data volume for merge operation that is required for using direct I/O access to the storage disk. When merging data parts, ClickHouse calculates the total storage volume of all the data to be merged. If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, ClickHouse reads and writes the data to the storage disk using the direct I/O interface (`O_DIRECT` option). If `min_merge_bytes_to_use_direct_io = 0`, then direct I/O is disabled. Default value: `10 * 1024 * 1024 * 1024` bytes.
#### merge_with_ttl_timeout
`merge_with_ttl_timeout` — Minimum delay in seconds before repeating a merge with delete TTL. Default value: `14400` seconds (4 hours).
#### merge_with_recompression_ttl_timeout
`merge_with_recompression_ttl_timeout` — Minimum delay in seconds before repeating a merge with recompression TTL. Default value: `14400` seconds (4 hours).
#### try_fetch_recompressed_part_timeout
`try_fetch_recompressed_part_timeout` — Timeout (in seconds) before starting merge with recompression. During this time ClickHouse tries to fetch recompressed part from replica which assigned this merge with recompression. Default value: `7200` seconds (2 hours).
#### write_final_mark
`write_final_mark` — Enables or disables writing the final index mark at the end of data part (after the last byte). Default value: 1. Dont turn it off.
#### merge_max_block_size
`merge_max_block_size` — Maximum number of rows in block for merge operations. Default value: 8192.
#### storage_policy
`storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes).
#### min_bytes_for_wide_part
`min_bytes_for_wide_part`, `min_rows_for_wide_part` — Minimum number of bytes/rows in a data part that can be stored in `Wide` format. You can set one, both or none of these settings. See [Data Storage](#mergetree-data-storage).
#### max_parts_in_total
`max_parts_in_total` — Maximum number of parts in all partitions.
#### max_compress_block_size
`max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](/docs/en/operations/settings/settings.md/#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
#### min_compress_block_size
`min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](/docs/en/operations/settings/settings.md/#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
#### max_partitions_to_read
`max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. You can also specify setting [max_partitions_to_read](/docs/en/operations/settings/merge-tree-settings.md/#max-partitions-to-read) in the global setting.
#### allow_experimental_optimized_row_order
`allow_experimental_optimized_row_order` - Experimental. Enables the optimization of the row order during inserts to improve the compressability of the data for compression codecs (e.g. LZ4). Analyzes and reorders the data, and thus increases the CPU overhead of inserts.
See [MergeTree Settings](../../../operations/settings/merge-tree-settings.md).
**Example of Sections Setting**
@ -270,7 +194,7 @@ ClickHouse does not require a unique primary key. You can insert multiple rows w
You can use `Nullable`-typed expressions in the `PRIMARY KEY` and `ORDER BY` clauses but it is strongly discouraged. To allow this feature, turn on the [allow_nullable_key](/docs/en/operations/settings/settings.md/#allow-nullable-key) setting. The [NULLS_LAST](/docs/en/sql-reference/statements/select/order-by.md/#sorting-of-special-values) principle applies for `NULL` values in the `ORDER BY` clause.
### Selecting the Primary Key {#selecting-the-primary-key}
### Selecting a Primary Key {#selecting-a-primary-key}
The number of columns in the primary key is not explicitly limited. Depending on the data structure, you can include more or fewer columns in the primary key. This may:

View File

@ -102,6 +102,7 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## Settings {#settings}

View File

@ -108,6 +108,7 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
- `_path` — Path to the `URL`. Type: `LowCardinalty(String)`.
- `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`.
- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}

View File

@ -2165,6 +2165,8 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`.
- [input_format_parquet_max_block_size](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Max block row size for parquet reader. Default value - `65409`.
- [input_format_parquet_prefer_block_bytes](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_prefer_block_bytes) - Average block bytes output by parquet reader. Default value - `16744704`.
## ParquetMetadata {data-format-parquet-metadata}

View File

@ -0,0 +1,194 @@
---
slug: /en/operations/analyzer
sidebar_label: Analyzer
title: Analyzer
description: Details about ClickHouse's query analyzer
keywords: [analyzer]
---
# Analyzer
<BetaBadge />
## Known incompatibilities
In ClickHouse version `24.3`, the new query analyzer was enabled by default.
Despite fixing a large number of bugs and introducing new optimizations, it also introduces some breaking changes in ClickHouse behaviour. Please read the following changes to determine how to rewrite your queries for the new analyzer.
### Invalid queries are no longer optimized
The previous query planning infrastructure applied AST-level optimizations before the query validation step.
Optimizations could rewrite the initial query so it becomes valid and can be executed.
In the new analyzer, query validation takes place before the optimization step.
This means that invalid queries that were possible to execute before are now unsupported.
In such cases, the query must be fixed manually.
**Example 1:**
```sql
SELECT number
FROM numbers(1)
GROUP BY toString(number)
```
The following query uses column `number` in the projection list when only `toString(number)` is available after the aggregation.
In the old analyzer, `GROUP BY toString(number)` was optimized into `GROUP BY number,` making the query valid.
**Example 2:**
```sql
SELECT
number % 2 AS n,
sum(number)
FROM numbers(10)
GROUP BY n
HAVING number > 5
```
The same problem occurs in this query: column `number` is used after aggregation with another key.
The previous query analyzer fixed this query by moving the `number > 5` filter from the `HAVING` clause to the `WHERE` clause.
To fix the query, you should move all conditions that apply to non-aggregated columns to the `WHERE` section to conform to standard SQL syntax:
```sql
SELECT
number % 2 AS n,
sum(number)
FROM numbers(10)
WHERE number > 5
GROUP BY n
```
### CREATE VIEW with invalid query
The new analyzer always performs type-checking.
Previously, it was possible to create a `VIEW` with an invalid `SELECT` query. It would then fail during the first `SELECT` or `INSERT` (in the case of `MATERIALIZED VIEW`).
Now, it's not possible to create such `VIEW`s anymore.
**Example:**
```sql
CREATE TABLE source (data String) ENGINE=MergeTree ORDER BY tuple();
CREATE VIEW some_view
AS SELECT JSONExtract(data, 'test', 'DateTime64(3)')
FROM source;
```
### Known incompatibilities of the `JOIN` clause
#### Join using column from projection
Alias from the `SELECT` list can not be used as a `JOIN USING` key by default.
A new setting, `analyzer_compatibility_join_using_top_level_identifier`, when enabled, alters the behavior of `JOIN USING` to prefer to resolve identifiers based on expressions from the projection list of the `SELECT` query, rather than using the columns from left table directly.
**Example:**
```sql
SELECT a + 1 AS b, t2.s
FROM Values('a UInt64, b UInt64', (1, 1)) AS t1
JOIN Values('b UInt64, s String', (1, 'one'), (2, 'two')) t2
USING (b);
```
With `analyzer_compatibility_join_using_top_level_identifier` set to `true`, the join condition is interpreted as `t1.a + 1 = t2.b`, matching the behavior of earlier versions. So, the result will be `2, 'two'`.
When the setting is `false`, the join condition defaults to `t1.b = t2.b`, and the query will return `2, 'one'`.
If `b` is not present in `t1`, the query will fail with an error.
#### Changes in behavior with `JOIN USING` and `ALIAS`/`MATERIALIZED` columns
In the new analyzer, using `*` in a `JOIN USING` query that involves `ALIAS` or `MATERIALIZED` columns will include those columns in the result set by default.
**Example:**
```sql
CREATE TABLE t1 (id UInt64, payload ALIAS sipHash64(id)) ENGINE = MergeTree ORDER BY id;
INSERT INTO t1 VALUES (1), (2);
CREATE TABLE t2 (id UInt64, payload ALIAS sipHash64(id)) ENGINE = MergeTree ORDER BY id;
INSERT INTO t2 VALUES (2), (3);
SELECT * FROM t1
FULL JOIN t2 USING (payload);
```
In the new analyzer, the result of this query will include the `payload` column along with `id` from both tables. In contrast, the previous analyzer would only include these `ALIAS` columns if specific settings (`asterisk_include_alias_columns` or `asterisk_include_materialized_columns`) were enabled, and the columns might appear in a different order.
To ensure consistent and expected results, especially when migrating old queries to the new analyzer, it is advisable to specify columns explicitly in the `SELECT` clause rather than using `*`.
#### Handling of Type Modifiers for columns in `USING` Clause
In the new version of the analyzer, the rules for determining the common supertype for columns specified in the `USING` clause have been standardized to produce more predictable outcomes, especially when dealing with type modifiers like `LowCardinality` and `Nullable`.
- `LowCardinality(T)` and `T`: When a column of type `LowCardinality(T)` is joined with a column of type `T`, the resulting common supertype will be `T`, effectively discarding the `LowCardinality` modifier.
- `Nullable(T)` and `T`: When a column of type `Nullable(T)` is joined with a column of type `T`, the resulting common supertype will be `Nullable(T)`, ensuring that the nullable property is preserved.
**Example:**
```sql
SELECT id, toTypeName(id) FROM Values('id LowCardinality(String)', ('a')) AS t1
FULL OUTER JOIN Values('id String', ('b')) AS t2
USING (id);
```
In this query, the common supertype for `id` is determined as `String`, discarding the `LowCardinality` modifier from `t1`.
### Projection column names changes
During projection names computation, aliases are not substituted.
```sql
SELECT
1 + 1 AS x,
x + 1
SETTINGS allow_experimental_analyzer = 0
FORMAT PrettyCompact
┌─x─┬─plus(plus(1, 1), 1)─┐
1. │ 2 │ 3 │
└───┴─────────────────────┘
SELECT
1 + 1 AS x,
x + 1
SETTINGS allow_experimental_analyzer = 1
FORMAT PrettyCompact
┌─x─┬─plus(x, 1)─┐
1. │ 2 │ 3 │
└───┴────────────┘
```
### Incompatible function arguments types
In the new analyzer, type inference happens during initial query analysis.
This change means that type checks are done before short-circuit evaluation; thus, `if` function arguments must always have a common supertype.
**Example:**
The following query fails with `There is no supertype for types Array(UInt8), String because some of them are Array and some of them are not`:
```sql
SELECT toTypeName(if(0, [2, 3, 4], 'String'))
```
### Heterogeneous clusters
The new analyzer significantly changed the communication protocol between servers in the cluster. Thus, it's impossible to run distributed queries on servers with different `allow_experimental_analyzer` setting values.
### Mutations are interpreted by previous analyzer
Mutations are still using the old analyzer.
This means some new ClickHouse SQL features can't be used in mutations. For example, the `QUALIFY` clause.
Status can be checked [here](https://github.com/ClickHouse/ClickHouse/issues/61563).
### Unsupported features
The list of features new analyzer currently doesn't support:
- Annoy index.
- Hypothesis index. Work in progress [here](https://github.com/ClickHouse/ClickHouse/pull/48381).
- Window view is not supported. There are no plans to support it in the future.

View File

@ -67,6 +67,23 @@ To manage named collections with DDL a user must have the `named_control_collect
In the above example the `password_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user.
:::
### Storage for named collections
Named collections can either be stored on local disk or in zookeeper/keeper. By default local storage is used.
To configure named collections storage in keeper and a `type` (equal to either `keeper` or `zookeeper`) and `path` (path in keeper, where named collections will be stored) to `named_collections_storage` section in configuration file:
```
<clickhouse>
<named_collections_storage>
<type>zookeeper</type>
<path>/named_collections_path/</path>
<update_timeout_ms>1000</update_timeout_ms>
</named_collections_storage>
</clickhouse>
```
An optional configuration parameter `update_timeout_ms` by default is equal to `5000`.
## Storing named collections in configuration files
### XML example
@ -443,3 +460,59 @@ SELECT dictGet('dict', 'b', 1);
│ a │
└─────────────────────────┘
```
## Named collections for accessing Kafka
The description of parameters see [Kafka](../engines/table-engines/integrations/kafka.md).
### DDL example
```sql
CREATE NAMED COLLECTION my_kafka_cluster AS
kafka_broker_list = 'localhost:9092',
kafka_topic_list = 'kafka_topic',
kafka_group_name = 'consumer_group',
kafka_format = 'JSONEachRow',
kafka_max_block_size = '1048576';
```
### XML example
```xml
<clickhouse>
<named_collections>
<my_kafka_cluster>
<kafka_broker_list>localhost:9092</kafka_broker_list>
<kafka_topic_list>kafka_topic</kafka_topic_list>
<kafka_group_name>consumer_group</kafka_group_name>
<kafka_format>JSONEachRow</kafka_format>
<kafka_max_block_size>1048576</kafka_max_block_size>
</my_kafka_cluster>
</named_collections>
</clickhouse>
```
### Example of using named collections with a Kafka table
Both of the following examples use the same named collection `my_kafka_cluster`:
```sql
CREATE TABLE queue
(
timestamp UInt64,
level String,
message String
)
ENGINE = Kafka(my_kafka_cluster)
CREATE TABLE queue
(
timestamp UInt64,
level String,
message String
)
ENGINE = Kafka(my_kafka_cluster)
SETTINGS kafka_num_consumers = 4,
kafka_thread_per_consumer = 1;
```

View File

@ -1206,6 +1206,16 @@ Expired time for HSTS in seconds. The default value is 0 means clickhouse disabl
<hsts_max_age>600000</hsts_max_age>
```
## mlock_executable {#mlock_executable}
Perform mlockall after startup to lower first queries latency and to prevent clickhouse executable from being paged out under high IO load. Enabling this option is recommended but will lead to increased startup time for up to a few seconds.
Keep in mind that this parameter would not work without "CAP_IPC_LOCK" capability.
**Example**
``` xml
<mlock_executable>false</mlock_executable>
```
## include_from {#include_from}
The path to the file with substitutions. Both XML and YAML formats are supported.
@ -1353,6 +1363,26 @@ Examples:
<listen_host>127.0.0.1</listen_host>
```
## listen_try {#listen_try}
The server will not exit if IPv6 or IPv4 networks are unavailable while trying to listen.
Examples:
``` xml
<listen_try>0</listen_try>
```
## listen_reuse_port {#listen_reuse_port}
Allow multiple servers to listen on the same address:port. Requests will be routed to a random server by the operating system. Enabling this setting is not recommended.
Examples:
``` xml
<listen_reuse_port>0</listen_reuse_port>
```
## listen_backlog {#listen_backlog}
Backlog (queue size of pending connections) of the listen socket.

View File

@ -3,9 +3,126 @@ slug: /en/operations/settings/merge-tree-settings
title: "MergeTree tables settings"
---
The values of `merge_tree` settings (for all MergeTree tables) can be viewed in the table `system.merge_tree_settings`, they can be overridden in `config.xml` in the `merge_tree` section, or set in the `SETTINGS` section of each table.
System table `system.merge_tree_settings` shows the globally set MergeTree settings.
These are example overrides for `max_suspicious_broken_parts`:
MergeTree settings can be set in the `merge_tree` section of the server config file, or specified for each `MergeTree` table individually in
the `SETTINGS` clause of the `CREATE TABLE` statement.
Example for customizing setting `max_suspicious_broken_parts`:
Configure the default for all `MergeTree` tables in the server configuration file:
``` text
<merge_tree>
<max_suspicious_broken_parts>5</max_suspicious_broken_parts>
</merge_tree>
```
Set for a particular table:
``` sql
CREATE TABLE tab
(
`A` Int64
)
ENGINE = MergeTree
ORDER BY tuple()
SETTINGS max_suspicious_broken_parts = 500;
```
Change the settings for a particular table using `ALTER TABLE ... MODIFY SETTING`:
```sql
ALTER TABLE tab MODIFY SETTING max_suspicious_broken_parts = 100;
-- reset to global default (value from system.merge_tree_settings)
ALTER TABLE tab RESET SETTING max_suspicious_broken_parts;
```
## index_granularity
Maximum number of data rows between the marks of an index.
Default value: 8192.
## index_granularity_bytes
Maximum size of data granules in bytes.
Default value: 10Mb.
To restrict the granule size only by number of rows, set to 0 (not recommended).
## min_index_granularity_bytes
Min allowed size of data granules in bytes.
Default value: 1024b.
To provide a safeguard against accidentally creating tables with very low index_granularity_bytes.
## enable_mixed_granularity_parts
Enables or disables transitioning to control the granule size with the `index_granularity_bytes` setting. Before version 19.11, there was only the `index_granularity` setting for restricting granule size. The `index_granularity_bytes` setting improves ClickHouse performance when selecting data from tables with big rows (tens and hundreds of megabytes). If you have tables with big rows, you can enable this setting for the tables to improve the efficiency of `SELECT` queries.
## use_minimalistic_part_header_in_zookeeper
Storage method of the data parts headers in ZooKeeper. If enabled, ZooKeeper stores less data. For details, see [here](../server-configuration-parameters/settings.md/#server-settings-use_minimalistic_part_header_in_zookeeper).
## min_merge_bytes_to_use_direct_io
The minimum data volume for merge operation that is required for using direct I/O access to the storage disk.
When merging data parts, ClickHouse calculates the total storage volume of all the data to be merged.
If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, ClickHouse reads and writes the data to the storage disk using the direct I/O interface (`O_DIRECT` option).
If `min_merge_bytes_to_use_direct_io = 0`, then direct I/O is disabled.
Default value: `10 * 1024 * 1024 * 1024` bytes.
## merge_with_ttl_timeout
Minimum delay in seconds before repeating a merge with delete TTL.
Default value: `14400` seconds (4 hours).
## merge_with_recompression_ttl_timeout
Minimum delay in seconds before repeating a merge with recompression TTL.
Default value: `14400` seconds (4 hours).
## write_final_mark
Enables or disables writing the final index mark at the end of data part (after the last byte).
Default value: 1.
Dont change or bad things will happen.
## storage_policy
Storage policy.
## min_bytes_for_wide_part
Minimum number of bytes/rows in a data part that can be stored in `Wide` format.
You can set one, both or none of these settings.
## max_compress_block_size
Maximum size of blocks of uncompressed data before compressing for writing to a table.
You can also specify this setting in the global settings (see [max_compress_block_size](/docs/en/operations/settings/settings.md/#max-compress-block-size) setting).
The value specified when table is created overrides the global value for this setting.
## min_compress_block_size
Minimum size of blocks of uncompressed data required for compression when writing the next mark.
You can also specify this setting in the global settings (see [min_compress_block_size](/docs/en/operations/settings/settings.md/#min-compress-block-size) setting).
The value specified when table is created overrides the global value for this setting.
## max_partitions_to_read
Limits the maximum number of partitions that can be accessed in one query.
You can also specify setting [max_partitions_to_read](/docs/en/operations/settings/merge-tree-settings.md/#max-partitions-to-read) in the global setting.
## max_suspicious_broken_parts
@ -17,37 +134,6 @@ Possible values:
Default value: 100.
Override example in `config.xml`:
``` text
<merge_tree>
<max_suspicious_broken_parts>5</max_suspicious_broken_parts>
</merge_tree>
```
An example to set in `SETTINGS` for a particular table:
``` sql
CREATE TABLE foo
(
`A` Int64
)
ENGINE = MergeTree
ORDER BY tuple()
SETTINGS max_suspicious_broken_parts = 500;
```
An example of changing the settings for a specific table with the `ALTER TABLE ... MODIFY SETTING` command:
``` sql
ALTER TABLE foo
MODIFY SETTING max_suspicious_broken_parts = 100;
-- reset to default (use value from system.merge_tree_settings)
ALTER TABLE foo
RESET SETTING max_suspicious_broken_parts;
```
## parts_to_throw_insert {#parts-to-throw-insert}
If the number of active parts in a single partition exceeds the `parts_to_throw_insert` value, `INSERT` is interrupted with the `Too many parts (N). Merges are processing significantly slower than inserts` exception.
@ -301,6 +387,8 @@ Default value: 10800
## try_fetch_recompressed_part_timeout
Timeout (in seconds) before starting merge with recompression. During this time ClickHouse tries to fetch recompressed part from replica which assigned this merge with recompression.
Recompression works slow in most cases, so we don't start merge with recompression until this timeout and trying to fetch recompressed part from replica which assigned this merge with recompression.
Possible values:
@ -886,10 +974,12 @@ Default value: false
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting
### allow_experimental_optimized_row_order
### optimize_row_order
Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part.
Only has an effect for ordinary MergeTree-engine tables. Does nothing for specialized MergeTree engine tables (e.g. CollapsingMergeTree).
MergeTree tables are (optionally) compressed using [compression codecs](../../sql-reference/statements/create/table.md#column_compression_codec).
Generic compression codecs such as LZ4 and ZSTD achieve maximum compression rates if the data exposes patterns.
Long runs of the same value typically compress very well.

View File

@ -1417,6 +1417,17 @@ Compression method used in output Parquet format. Supported codecs: `snappy`, `l
Default value: `lz4`.
### input_format_parquet_max_block_size {#input_format_parquet_max_block_size}
Max block row size for parquet reader. By controlling the number of rows in each block, you can control the memory usage,
and in some operators that cache blocks, you can improve the accuracy of the operator's memory control。
Default value: `65409`.
### input_format_parquet_prefer_block_bytes {#input_format_parquet_prefer_block_bytes}
Average block bytes output by parquet reader. Lowering the configuration in the case of reading some high compression parquet relieves the memory pressure.
Default value: `65409 * 256 = 16744704`
## Hive format settings {#hive-format-settings}
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}

View File

@ -1590,6 +1590,22 @@ Possible values:
Default value: `default`.
## parallel_replicas_custom_key_range_lower {#parallel_replicas_custom_key_range_lower}
Allows the filter type `range` to split the work evenly between replicas based on the custom range `[parallel_replicas_custom_key_range_lower, INT_MAX]`.
When used in conjuction with [parallel_replicas_custom_key_range_upper](#parallel_replicas_custom_key_range_upper), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`.
Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing.
## parallel_replicas_custom_key_range_upper {#parallel_replicas_custom_key_range_upper}
Allows the filter type `range` to split the work evenly between replicas based on the custom range `[0, parallel_replicas_custom_key_range_upper]`. A value of 0 disables the upper bound, setting it the max value of the custom key expression.
When used in conjuction with [parallel_replicas_custom_key_range_lower](#parallel_replicas_custom_key_range_lower), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`.
Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing.
## allow_experimental_parallel_reading_from_replicas
Enables or disables sending SELECT queries to all replicas of a table (up to `max_parallel_replicas`). Reading is parallelized and coordinated dynamically. It will work for any kind of MergeTree table.
@ -3170,6 +3186,18 @@ Possible values:
Default value: `0`.
## lightweight_deletes_sync {#lightweight_deletes_sync}
The same as 'mutation_sync', but controls only execution of lightweight deletes.
Possible values:
- 0 - Mutations execute asynchronously.
- 1 - The query waits for the lightweight deletes to complete on the current server.
- 2 - The query waits for the lightweight deletes to complete on all replicas (if they exist).
Default value: `2`.
**See Also**
- [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md#synchronicity-of-alter-queries)
@ -3850,6 +3878,10 @@ Possible values:
Default value: 30.
:::note
It's applicable only to the default profile. A server reboot is required for the changes to take effect.
:::
## http_receive_timeout {#http_receive_timeout}
HTTP receive timeout (in seconds).

View File

@ -18,7 +18,7 @@ This tool works via HTTP, not via pipes, shared memory, or TCP because:
However it can be used as standalone tool from command line with the following
parameters in POST-request URL:
- `connection_string` -- ODBC connection string.
- `columns` -- columns in ClickHouse NamesAndTypesList format, name in backticks,
- `sample_block` -- columns description in ClickHouse NamesAndTypesList format, name in backticks,
type as string. Name and type are space separated, rows separated with
newline.
- `max_block_size` -- optional parameter, sets maximum size of single block.

View File

@ -0,0 +1,95 @@
---
slug: /en/sql-reference/aggregate-functions/reference/flamegraph
sidebar_position: 110
---
# flameGraph
Aggregate function which builds a [flamegraph](https://www.brendangregg.com/flamegraphs.html) using the list of stacktraces. Outputs an array of strings which can be used by [flamegraph.pl utility](https://github.com/brendangregg/FlameGraph) to render an SVG of the flamegraph.
## Syntax
```sql
flameGraph(traces, [size], [ptr])
```
## Parameters
- `traces` — a stacktrace. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)).
- `size` — an allocation size for memory profiling. (optional - default `1`). [UInt64](../../data-types/int-uint.md).
- `ptr` — an allocation address. (optional - default `0`). [UInt64](../../data-types/int-uint.md).
:::note
In the case where `ptr != 0`, a flameGraph will map allocations (size > 0) and deallocations (size < 0) with the same size and ptr.
Only allocations which were not freed are shown. Non mapped deallocations are ignored.
:::
## Returned value
- An array of strings for use with [flamegraph.pl utility](https://github.com/brendangregg/FlameGraph). [Array](../../data-types/array.md)([String](../../data-types/string.md)).
## Examples
### Building a flamegraph based on a CPU query profiler
```sql
SET query_profiler_cpu_time_period_ns=10000000;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
```
```text
clickhouse client --allow_introspection_functions=1 -q "select arrayJoin(flameGraph(arrayReverse(trace))) from system.trace_log where trace_type = 'CPU' and query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl > flame_cpu.svg
```
### Building a flamegraph based on a memory query profiler, showing all allocations
```sql
SET memory_profiler_sample_probability=1, max_untracked_memory=1;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
```
```text
clickhouse client --allow_introspection_functions=1 -q "select arrayJoin(flameGraph(trace, size)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem.svg
```
### Building a flamegraph based on a memory query profiler, showing allocations which were not deallocated in query context
```sql
SET memory_profiler_sample_probability=1, max_untracked_memory=1, use_uncompressed_cache=1, merge_tree_max_rows_to_use_cache=100000000000, merge_tree_max_bytes_to_use_cache=1000000000000;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
```
```text
clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, size, ptr)) FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_untracked.svg
```
### Build a flamegraph based on memory query profiler, showing active allocations at the fixed point of time
```sql
SET memory_profiler_sample_probability=1, max_untracked_memory=1;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
```
- 1 - Memory usage per second
```sql
SELECT event_time, m, formatReadableSize(max(s) as m) FROM (SELECT event_time, sum(size) OVER (ORDER BY event_time) AS s FROM system.trace_log WHERE query_id = 'xxx' AND trace_type = 'MemorySample') GROUP BY event_time ORDER BY event_time;
```
- 2 - Find a time point with maximal memory usage
```sql
SELECT argMax(event_time, s), max(s) FROM (SELECT event_time, sum(size) OVER (ORDER BY event_time) AS s FROM system.trace_log WHERE query_id = 'xxx' AND trace_type = 'MemorySample');
```
- 3 - Fix active allocations at fixed point of time
```text
clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, size, ptr)) FROM (SELECT * FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx' AND event_time <= 'yyy' ORDER BY event_time)" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_pos.svg
```
- 4 - Find deallocations at fixed point of time
```text
clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, -size, ptr)) FROM (SELECT * FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx' AND event_time > 'yyy' ORDER BY event_time desc)" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_neg.svg
```

View File

@ -58,6 +58,7 @@ ClickHouse-specific aggregate functions:
- [topKWeighted](../reference/topkweighted.md)
- [deltaSum](../reference/deltasum.md)
- [deltaSumTimestamp](../reference/deltasumtimestamp.md)
- [flameGraph](../reference/flame_graph.md)
- [groupArray](../reference/grouparray.md)
- [groupArrayLast](../reference/grouparraylast.md)
- [groupUniqArray](../reference/groupuniqarray.md)

View File

@ -24,6 +24,8 @@ Alias: `lttb`.
- `x` — x coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md).
- `y` — y coordinate. [Integer](../../../sql-reference/data-types/int-uint.md) , [Float](../../../sql-reference/data-types/float.md) , [Decimal](../../../sql-reference/data-types/decimal.md) , [Date](../../../sql-reference/data-types/date.md), [Date32](../../../sql-reference/data-types/date32.md), [DateTime](../../../sql-reference/data-types/datetime.md), [DateTime64](../../../sql-reference/data-types/datetime64.md).
NaNs are ignored in the provided series, meaning that any NaN values will be excluded from the analysis. This ensures that the function operates only on valid numerical data.
**Parameters**
- `n` — number of points in the resulting series. [UInt64](../../../sql-reference/data-types/int-uint.md).
@ -61,7 +63,7 @@ Result:
``` text
┌────────largestTriangleThreeBuckets(4)(x, y)───────────┐
│ [(1,10),(3,15),(5,40),(10,70)] │
│ [(1,10),(3,15),(9,55),(10,70)] │
└───────────────────────────────────────────────────────┘
```

View File

@ -142,6 +142,34 @@ SELECT readWKTPoint('POINT (1.2 3.4)');
(1.2,3.4)
```
## readWKTLineString
Parses a Well-Known Text (WKT) representation of a LineString geometry and returns it in the internal ClickHouse format.
### Syntax
```sql
readWKTLineString(wkt_string)
```
### Arguments
- `wkt_string`: The input WKT string representing a LineString geometry.
### Returned value
The function returns a ClickHouse internal representation of the linestring geometry.
### Example
```sql
SELECT readWKTLineString('LINESTRING (1 1, 2 2, 3 3, 1 1)');
```
```response
[(1,1),(2,2),(3,3),(1,1)]
```
## readWKTRing
Parses a Well-Known Text (WKT) representation of a Polygon geometry and returns a ring (closed linestring) in the internal ClickHouse format.
@ -163,7 +191,7 @@ The function returns a ClickHouse internal representation of the ring (closed li
### Example
```sql
SELECT readWKTRing('LINESTRING (1 1, 2 2, 3 3, 1 1)');
SELECT readWKTRing('POLYGON ((1 1, 2 2, 3 3, 1 1))');
```
```response

View File

@ -212,7 +212,7 @@ toTypeName(x)
## blockSize {#blockSize}
In ClickHouse, queries are processed in blocks (chunks).
In ClickHouse, queries are processed in [blocks](../../development/architecture.md/#block-block) (chunks).
This function returns the size (row count) of the block the function is called on.
**Syntax**
@ -221,6 +221,33 @@ This function returns the size (row count) of the block the function is called o
blockSize()
```
**Example**
Query:
```sql
DROP TABLE IF EXISTS test;
CREATE TABLE test (n UInt8) ENGINE = Memory;
INSERT INTO test
SELECT * FROM system.numbers LIMIT 5;
SELECT blockSize()
FROM test;
```
Result:
```response
┌─blockSize()─┐
1. │ 5 │
2. │ 5 │
3. │ 5 │
4. │ 5 │
5. │ 5 │
└─────────────┘
```
## byteSize
Returns an estimation of uncompressed byte size of its arguments in memory.
@ -3688,3 +3715,108 @@ Result:
```response
{'version':'1','serial_number':'2D9071D64530052D48308473922C7ADAFA85D6C5','signature_algo':'sha256WithRSAEncryption','issuer':'/CN=marsnet.local CA','not_before':'May 7 17:01:21 2024 GMT','not_after':'May 7 17:01:21 2025 GMT','subject':'/CN=chnode1','pkey_algo':'rsaEncryption'}
```
## lowCardinalityIndices
Returns the position of a value in the dictionary of a [LowCardinality](../data-types/lowcardinality.md) column. Positions start at 1. Since LowCardinality have per-part dictionaries, this function may return different positions for the same value in different parts.
**Syntax**
```sql
lowCardinalityIndices(col)
```
**Arguments**
- `col` — a low cardinality column. [LowCardinality](../data-types/lowcardinality.md).
**Returned value**
- The position of the value in the dictionary of the current part. [UInt64](../data-types/int-uint.md).
**Example**
Query:
```sql
DROP TABLE IF EXISTS test;
CREATE TABLE test (s LowCardinality(String)) ENGINE = Memory;
-- create two parts:
INSERT INTO test VALUES ('ab'), ('cd'), ('ab'), ('ab'), ('df');
INSERT INTO test VALUES ('ef'), ('cd'), ('ab'), ('cd'), ('ef');
SELECT s, lowCardinalityIndices(s) FROM test;
```
Result:
```response
┌─s──┬─lowCardinalityIndices(s)─┐
1. │ ab │ 1 │
2. │ cd │ 2 │
3. │ ab │ 1 │
4. │ ab │ 1 │
5. │ df │ 3 │
└────┴──────────────────────────┘
┌─s──┬─lowCardinalityIndices(s)─┐
6. │ ef │ 1 │
7. │ cd │ 2 │
8. │ ab │ 3 │
9. │ cd │ 2 │
10. │ ef │ 1 │
└────┴──────────────────────────┘
```
## lowCardinalityKeys
Returns the dictionary values of a [LowCardinality](../data-types/lowcardinality.md) column. If the block is smaller or larger than the dictionary size, the result will be truncated or extended with default values. Since LowCardinality have per-part dictionaries, this function may return different dictionary values in different parts.
**Syntax**
```sql
lowCardinalityIndices(col)
```
**Arguments**
- `col` — a low cardinality column. [LowCardinality](../data-types/lowcardinality.md).
**Returned value**
- The dictionary keys. [UInt64](../data-types/int-uint.md).
**Example**
Query:
```sql
DROP TABLE IF EXISTS test;
CREATE TABLE test (s LowCardinality(String)) ENGINE = Memory;
-- create two parts:
INSERT INTO test VALUES ('ab'), ('cd'), ('ab'), ('ab'), ('df');
INSERT INTO test VALUES ('ef'), ('cd'), ('ab'), ('cd'), ('ef');
SELECT s, lowCardinalityKeys(s) FROM test;
```
Result:
```response
┌─s──┬─lowCardinalityKeys(s)─┐
1. │ ef │ │
2. │ cd │ ef │
3. │ ab │ cd │
4. │ cd │ ab │
5. │ ef │ │
└────┴───────────────────────┘
┌─s──┬─lowCardinalityKeys(s)─┐
6. │ ab │ │
7. │ cd │ ab │
8. │ ab │ cd │
9. │ ab │ df │
10. │ df │ │
└────┴───────────────────────┘
```

View File

@ -6,49 +6,90 @@ sidebar_label: Rounding
# Rounding Functions
## floor(x\[, N\])
## floor
Returns the largest round number that is less than or equal to `x`. A round number is a multiple of 1/10N, or the nearest number of the appropriate data type if 1 / 10N isnt exact.
N is an integer constant, optional parameter. By default it is zero, which means to round to an integer.
N may be negative.
Returns the largest rounded number less than or equal `x`.
A rounded number is a multiple of 1 / 10 * N, or the nearest number of the appropriate data type if 1 / 10 * N isnt exact.
Examples: `floor(123.45, 1) = 123.4, floor(123.45, -1) = 120.`
Integer arguments may be rounded with negative `N` argument, with non-negative `N` the function returns `x`, i.e. does nothing.
`x` is any numeric type. The result is a number of the same type.
For integer arguments, it makes sense to round with a negative `N` value (for non-negative `N`, the function does not do anything).
If rounding causes overflow (for example, floor(-128, -1)), an implementation-specific result is returned.
If rounding causes an overflow (for example, `floor(-128, -1)`), the result is undefined.
## ceil(x\[, N\]), ceiling(x\[, N\])
**Syntax**
Returns the smallest round number that is greater than or equal to `x`. In every other way, it is the same as the `floor` function (see above).
``` sql
floor(x[, N])
```
## trunc(x\[, N\]), truncate(x\[, N\])
**Parameters**
Returns the round number with largest absolute value that has an absolute value less than or equal to `x`s. In every other way, it is the same as the floor function (see above).
- `x` - The value to round. [Float*](../data-types/float.md), [Decimal*](../data-types/decimal.md), or [(U)Int*](../data-types/int-uint.md).
- `N` . [(U)Int*](../data-types/int-uint.md). The default is zero, which means rounding to an integer. Can be negative.
**Returned value**
A rounded number of the same type as `x`.
**Examples**
Query:
```sql
SELECT floor(123.45, 1) AS rounded
```
Result:
```
┌─rounded─┐
│ 123.4 │
└─────────┘
```
Query:
```sql
SELECT floor(123.45, -1)
```
Result:
```
┌─rounded─┐
│ 120 │
└─────────┘
```
## ceiling
Like `floor` but returns the smallest rounded number greater than or equal `x`.
**Syntax**
``` sql
ceiling(x[, N])
```
Alias: `ceil`
## truncate
Like `floor` but returns the rounded number with largest absolute value that has an absolute value less than or equal to `x`s.
**Syntax**
```sql
trunc(input, precision)
truncate(x[, N])
```
Alias: `truncate`.
**Parameters**
- `input`: A numeric type ([Float](../data-types/float.md), [Decimal](../data-types/decimal.md) or [Integer](../data-types/int-uint.md)).
- `precision`: An [Integer](../data-types/int-uint.md) type.
**Returned value**
- A data type of `input`.
Alias: `trunc`.
**Example**
Query:
```sql
SELECT trunc(123.499, 1) as res;
SELECT truncate(123.499, 1) as res;
```
```response
@ -57,37 +98,40 @@ SELECT trunc(123.499, 1) as res;
└───────┘
```
## round(x\[, N\])
## round
Rounds a value to a specified number of decimal places.
The function returns the nearest number of the specified order. In case when given number has equal distance to surrounding numbers, the function uses bankers rounding for float number types and rounds away from zero for the other number types (Decimal).
The function returns the nearest number of the specified order.
If the input value has equal distance to two neighboring numbers, the function uses bankers rounding for [Float*](../data-types/float.md) inputs and rounds away from zero for the other number types ([Decimal*](../data-types/decimal.md).
**Syntax**
``` sql
round(expression [, decimal_places])
round(x[, N])
```
**Arguments**
- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../data-types/index.md#data_types).
- `decimal-places` — An integer value.
- If `decimal-places > 0` then the function rounds the value to the right of the decimal point.
- If `decimal-places < 0` then the function rounds the value to the left of the decimal point.
- If `decimal-places = 0` then the function rounds the value to integer. In this case the argument can be omitted.
- `x` — A number to round. [Float*](../data-types/float.md), [Decimal*](../data-types/decimal.md), or [(U)Int*](../data-types/int-uint.md).
- `N` — The number of decimal places to round to. Integer. Defaults to `0`.
- If `N > 0`, the function rounds to the right of the decimal point.
- If `N < 0`, the function rounds to the left of the decimal point.
- If `N = 0`, the function rounds to the next integer.
**Returned value:**
The rounded number of the same type as the input number.
A rounded number of the same type as `x`.
**Examples**
Example of usage with Float:
Example with `Float` inputs:
``` sql
```sql
SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3;
```
``` text
```
┌───x─┬─round(divide(number, 2))─┐
│ 0 │ 0 │
│ 0.5 │ 0 │
@ -95,13 +139,13 @@ SELECT number / 2 AS x, round(x) FROM system.numbers LIMIT 3;
└─────┴──────────────────────────┘
```
Example of usage with Decimal:
Example with `Decimal` inputs:
``` sql
```sql
SELECT cast(number / 2 AS Decimal(10,4)) AS x, round(x) FROM system.numbers LIMIT 3;
```
``` text
```
┌───x─┬─round(CAST(divide(number, 2), 'Decimal(10, 4)'))─┐
│ 0 │ 0 │
│ 0.5 │ 1 │
@ -109,14 +153,14 @@ SELECT cast(number / 2 AS Decimal(10,4)) AS x, round(x) FROM system.numbers LIM
└─────┴──────────────────────────────────────────────────┘
```
If you want to keep the trailing zeros, you need to enable `output_format_decimal_trailing_zeros`
To retain trailing zeros, enable setting `output_format_decimal_trailing_zeros`:
``` sql
```sql
SELECT cast(number / 2 AS Decimal(10,4)) AS x, round(x) FROM system.numbers LIMIT 3 settings output_format_decimal_trailing_zeros=1;
```
``` text
```
┌──────x─┬─round(CAST(divide(number, 2), 'Decimal(10, 4)'))─┐
│ 0.0000 │ 0.0000 │
│ 0.5000 │ 1.0000 │
@ -151,9 +195,15 @@ round(3.65, 1) = 3.6
Rounds a number to a specified decimal position.
- If the rounding number is halfway between two numbers, the function uses bankers rounding. Banker's rounding is a method of rounding fractional numbers. When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position. For example: 3.5 rounds up to 4, 2.5 rounds down to 2. It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest). The [round](#rounding_functions-round) function performs the same rounding for floating point numbers. The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`.
If the rounding number is halfway between two numbers, the function uses bankers rounding.
Banker's rounding is a method of rounding fractional numbers
When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position.
For example: 3.5 rounds up to 4, 2.5 rounds down to 2.
It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest).
The [round](#rounding_functions-round) function performs the same rounding for floating point numbers.
The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`.
- In other cases, the function rounds numbers to the nearest integer.
In other cases, the function rounds numbers to the nearest integer.
Using bankers rounding, you can reduce the effect that rounding numbers has on the results of summing or subtracting these numbers.
@ -166,16 +216,20 @@ For example, sum numbers 1.5, 2.5, 3.5, 4.5 with different rounding:
**Syntax**
``` sql
roundBankers(expression [, decimal_places])
roundBankers(x [, N])
```
**Arguments**
- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../data-types/index.md#data_types).
- `decimal-places` — Decimal places. An integer number.
- `decimal-places > 0` — The function rounds the number to the given position right of the decimal point. Example: `roundBankers(3.55, 1) = 3.6`.
- `decimal-places < 0` — The function rounds the number to the given position left of the decimal point. Example: `roundBankers(24.55, -1) = 20`.
- `decimal-places = 0` — The function rounds the number to an integer. In this case the argument can be omitted. Example: `roundBankers(2.5) = 2`.
- `N > 0` — The function rounds the number to the given position right of the decimal point. Example: `roundBankers(3.55, 1) = 3.6`.
- `N < 0` — The function rounds the number to the given position left of the decimal point. Example: `roundBankers(24.55, -1) = 20`.
- `N = 0` — The function rounds the number to an integer. In this case the argument can be omitted. Example: `roundBankers(2.5) = 2`.
- `x` — A number to round. [Float*](../data-types/float.md), [Decimal*](../data-types/decimal.md), or [(U)Int*](../data-types/int-uint.md).
- `N` — The number of decimal places to round to. Integer. Defaults to `0`.
- If `N > 0`, the function rounds to the right of the decimal point.
- If `N < 0`, the function rounds to the left of the decimal point.
- If `N = 0`, the function rounds to the next integer.
**Returned value**
@ -185,13 +239,13 @@ A value rounded by the bankers rounding method.
Query:
``` sql
```sql
SELECT number / 2 AS x, roundBankers(x, 0) AS b fROM system.numbers limit 10
```
Result:
``` text
```
┌───x─┬─b─┐
│ 0 │ 0 │
│ 0.5 │ 0 │
@ -208,7 +262,7 @@ Result:
Examples of Bankers rounding:
``` text
```
roundBankers(0.4) = 0
roundBankers(-3.5) = -4
roundBankers(4.5) = 4
@ -264,7 +318,7 @@ Result:
## roundDuration
Accepts a number. If the number is less than one, it returns `0`. Otherwise, it rounds the number down to numbers from the set of commonly used durations: `1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000`.
Accepts a number. If the number is less than one, it returns `0`. Otherwise, it rounds the number down to numbers from the set of commonly used durations: `1, 10, 30, 60, 120, 180, 240, 300, 600, 1200, 1800, 3600, 7200, 18000, 36000`.
**Syntax**

View File

@ -2423,11 +2423,7 @@ Result:
## toUnixTimestamp64Milli
## toUnixTimestamp64Micro
## toUnixTimestamp64Nano
Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Input value is scaled up or down appropriately depending on it precision.
Converts a `DateTime64` to a `Int64` value with fixed millisecond precision. The input value is scaled up or down appropriately depending on its precision.
:::note
The output value is a timestamp in UTC, not in the timezone of `DateTime64`.
@ -2437,24 +2433,22 @@ The output value is a timestamp in UTC, not in the timezone of `DateTime64`.
```sql
toUnixTimestamp64Milli(value)
toUnixTimestamp64Micro(value)
toUnixTimestamp64Nano(value)
```
**Arguments**
- `value` — DateTime64 value with any precision.
- `value` — DateTime64 value with any precision. [DateTime64](../data-types/datetime64.md).
**Returned value**
- `value` converted to the `Int64` data type.
- `value` converted to the `Int64` data type. [Int64](../data-types/int-uint.md).
**Examples**
**Example**
Query:
```sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
WITH toDateTime64('2009-02-13 23:31:31.011', 3, 'UTC') AS dt64
SELECT toUnixTimestamp64Milli(dt64);
```
@ -2462,14 +2456,77 @@ Result:
```response
┌─toUnixTimestamp64Milli(dt64)─┐
│ 1568650812345
│ 1234567891011
└──────────────────────────────┘
```
## toUnixTimestamp64Micro
Converts a `DateTime64` to a `Int64` value with fixed microsecond precision. The input value is scaled up or down appropriately depending on its precision.
:::note
The output value is a timestamp in UTC, not in the timezone of `DateTime64`.
:::
**Syntax**
```sql
toUnixTimestamp64Micro(value)
```
**Arguments**
- `value` — DateTime64 value with any precision. [DateTime64](../data-types/datetime64.md).
**Returned value**
- `value` converted to the `Int64` data type. [Int64](../data-types/int-uint.md).
**Example**
Query:
``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
```sql
WITH toDateTime64('1970-01-15 06:56:07.891011', 6, 'UTC') AS dt64
SELECT toUnixTimestamp64Micro(dt64);
```
Result:
```response
┌─toUnixTimestamp64Micro(dt64)─┐
│ 1234567891011 │
└──────────────────────────────┘
```
## toUnixTimestamp64Nano
Converts a `DateTime64` to a `Int64` value with fixed nanosecond precision. The input value is scaled up or down appropriately depending on its precision.
:::note
The output value is a timestamp in UTC, not in the timezone of `DateTime64`.
:::
**Syntax**
```sql
toUnixTimestamp64Nano(value)
```
**Arguments**
- `value` — DateTime64 value with any precision. [DateTime64](../data-types/datetime64.md).
**Returned value**
- `value` converted to the `Int64` data type. [Int64](../data-types/int-uint.md).
**Example**
Query:
```sql
WITH toDateTime64('1970-01-01 00:20:34.567891011', 9, 'UTC') AS dt64
SELECT toUnixTimestamp64Nano(dt64);
```
@ -2477,34 +2534,32 @@ Result:
```response
┌─toUnixTimestamp64Nano(dt64)─┐
1568650812345678000
1234567891011
└─────────────────────────────┘
```
## fromUnixTimestamp64Milli
## fromUnixTimestamp64Micro
Converts an `Int64` to a `DateTime64` value with fixed millisecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision.
## fromUnixTimestamp64Nano
Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and optional timezone. Input value is scaled up or down appropriately depending on its precision. Please note that input value is treated as UTC timestamp, not timestamp at given (or implicit) timezone.
:::note
Please note that input value is treated as a UTC timestamp, not timestamp at the given (or implicit) timezone.
:::
**Syntax**
``` sql
fromUnixTimestamp64Milli(value[, timezone])
fromUnixTimestamp64Micro(value[, timezone])
fromUnixTimestamp64Nano(value[, timezone])
```
**Arguments**
- `value``Int64` value with any precision.
- `timezone``String` (optional) timezone name of the result.
- `value` — value with any precision. [Int64](../data-types/int-uint.md).
- `timezone` — (optional) timezone name of the result. [String](../data-types/string.md).
**Returned value**
- `value` converted to the `DateTime64` data type.
- `value` converted to DateTime64 with precision `3`. [DateTime64](../data-types/datetime64.md).
**Example**
@ -2512,15 +2567,101 @@ Query:
``` sql
WITH CAST(1234567891011, 'Int64') AS i64
SELECT fromUnixTimestamp64Milli(i64, 'UTC');
SELECT
fromUnixTimestamp64Milli(i64, 'UTC') AS x,
toTypeName(x);
```
Result:
```response
┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
│ 2009-02-13 23:31:31.011 │
└──────────────────────────────────────┘
┌───────────────────────x─┬─toTypeName(x)────────┐
│ 2009-02-13 23:31:31.011 │ DateTime64(3, 'UTC') │
└─────────────────────────┴──────────────────────┘
```
## fromUnixTimestamp64Micro
Converts an `Int64` to a `DateTime64` value with fixed microsecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision.
:::note
Please note that input value is treated as a UTC timestamp, not timestamp at the given (or implicit) timezone.
:::
**Syntax**
``` sql
fromUnixTimestamp64Micro(value[, timezone])
```
**Arguments**
- `value` — value with any precision. [Int64](../data-types/int-uint.md).
- `timezone` — (optional) timezone name of the result. [String](../data-types/string.md).
**Returned value**
- `value` converted to DateTime64 with precision `6`. [DateTime64](../data-types/datetime64.md).
**Example**
Query:
``` sql
WITH CAST(1234567891011, 'Int64') AS i64
SELECT
fromUnixTimestamp64Micro(i64, 'UTC') AS x,
toTypeName(x);
```
Result:
```response
┌──────────────────────────x─┬─toTypeName(x)────────┐
│ 1970-01-15 06:56:07.891011 │ DateTime64(6, 'UTC') │
└────────────────────────────┴──────────────────────┘
```
## fromUnixTimestamp64Nano
Converts an `Int64` to a `DateTime64` value with fixed nanosecond precision and optional timezone. The input value is scaled up or down appropriately depending on its precision.
:::note
Please note that input value is treated as a UTC timestamp, not timestamp at the given (or implicit) timezone.
:::
**Syntax**
``` sql
fromUnixTimestamp64Nano(value[, timezone])
```
**Arguments**
- `value` — value with any precision. [Int64](../data-types/int-uint.md).
- `timezone` — (optional) timezone name of the result. [String](../data-types/string.md).
**Returned value**
- `value` converted to DateTime64 with precision `9`. [DateTime64](../data-types/datetime64.md).
**Example**
Query:
``` sql
WITH CAST(1234567891011, 'Int64') AS i64
SELECT
fromUnixTimestamp64Nano(i64, 'UTC') AS x,
toTypeName(x);
```
Result:
```response
┌─────────────────────────────x─┬─toTypeName(x)────────┐
│ 1970-01-01 00:20:34.567891011 │ DateTime64(9, 'UTC') │
└───────────────────────────────┴──────────────────────┘
```
## formatRow

View File

@ -139,7 +139,7 @@ For the query to run successfully, the following conditions must be met:
ALTER TABLE table2 [ON CLUSTER cluster] REPLACE PARTITION partition_expr FROM table1
```
This query copies the data partition from the `table1` to `table2` and replaces existing partition in the `table2`.
This query copies the data partition from `table1` to `table2` and replaces the existing partition in `table2`. The operation is atomic.
Note that:

View File

@ -72,6 +72,7 @@ SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountNam
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
**See Also**

View File

@ -196,6 +196,7 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## Settings {#settings}

View File

@ -97,6 +97,7 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}

View File

@ -272,6 +272,7 @@ FROM s3(
- `_path` — Path to the file. Type: `LowCardinalty(String)`.
- `_file` — Name of the file. Type: `LowCardinalty(String)`.
- `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}

View File

@ -53,6 +53,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it
- `_path` — Path to the `URL`. Type: `LowCardinalty(String)`.
- `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`.
- `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`.
- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`.
## Storage Settings {#storage-settings}

View File

@ -10,7 +10,7 @@ sidebar_label: "Функции округления"
Возвращает наибольшее круглое число, которое меньше или равно, чем x.
Круглым называется число, кратное 1 / 10N или ближайшее к нему число соответствующего типа данных, если 1 / 10N не представимо точно.
N - целочисленная константа, не обязательный параметр. По умолчанию - ноль, что означает - округлять до целого числа.
N - целочисленный аргумент, не обязательный параметр. По умолчанию - ноль, что означает - округлять до целого числа.
N может быть отрицательным.
Примеры: `floor(123.45, 1) = 123.4, floor(123.45, -1) = 120.`

View File

@ -48,6 +48,7 @@
#include <Common/FailPoint.h>
#include <Common/CPUID.h>
#include <Common/HTTPConnectionPool.h>
#include <Common/NamedCollections/NamedCollectionsFactory.h>
#include <Server/waitServersToFinish.h>
#include <Interpreters/Cache/FileCacheFactory.h>
#include <Core/ServerUUID.h>
@ -70,7 +71,6 @@
#include <Storages/System/attachInformationSchemaTables.h>
#include <Storages/Cache/ExternalDataSourceCache.h>
#include <Storages/Cache/registerRemoteFileMetadatas.h>
#include <Common/NamedCollections/NamedCollectionUtils.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
#include <Functions/registerFunctions.h>
@ -773,7 +773,27 @@ try
LOG_INFO(log, "Available CPU instruction sets: {}", cpu_info);
#endif
bool will_have_trace_collector = hasPHDRCache() && config().has("trace_log");
bool has_trace_collector = false;
/// Disable it if we collect test coverage information, because it will work extremely slow.
#if !WITH_COVERAGE
/// Profilers cannot work reliably with any other libunwind or without PHDR cache.
has_trace_collector = hasPHDRCache() && config().has("trace_log");
#endif
/// Describe multiple reasons when query profiler cannot work.
#if WITH_COVERAGE
LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they work extremely slow with test coverage.");
#endif
#if defined(SANITIZER)
LOG_INFO(log, "Query Profiler disabled because they cannot work under sanitizers"
" when two different stack unwinding methods will interfere with each other.");
#endif
if (!hasPHDRCache())
LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they require PHDR cache to be created"
" (otherwise the function 'dl_iterate_phdr' is not lock free and not async-signal safe).");
// Initialize global thread pool. Do it before we fetch configs from zookeeper
// nodes (`from_zk`), because ZooKeeper interface uses the pool. We will
@ -782,8 +802,27 @@ try
server_settings.max_thread_pool_size,
server_settings.max_thread_pool_free_size,
server_settings.thread_pool_queue_size,
will_have_trace_collector ? server_settings.global_profiler_real_time_period_ns : 0,
will_have_trace_collector ? server_settings.global_profiler_cpu_time_period_ns : 0);
has_trace_collector ? server_settings.global_profiler_real_time_period_ns : 0,
has_trace_collector ? server_settings.global_profiler_cpu_time_period_ns : 0);
if (has_trace_collector)
{
global_context->createTraceCollector();
/// Set up server-wide memory profiler (for total memory tracker).
if (server_settings.total_memory_profiler_step)
total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step);
if (server_settings.total_memory_tracker_sample_probability > 0.0)
total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability);
if (server_settings.total_memory_profiler_sample_min_allocation_size)
total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size);
if (server_settings.total_memory_profiler_sample_max_allocation_size)
total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size);
}
/// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed).
SCOPE_EXIT({
Stopwatch watch;
@ -1339,7 +1378,7 @@ try
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
#endif
NamedCollectionUtils::loadIfNot();
NamedCollectionFactory::instance().loadIfNot();
/// Initialize main config reloader.
std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
@ -1608,7 +1647,7 @@ try
#if USE_SSL
CertificateReloader::instance().tryLoad(*config);
#endif
NamedCollectionUtils::reloadFromConfig(*config);
NamedCollectionFactory::instance().reloadFromConfig(*config);
FileCacheFactory::instance().updateSettingsFromConfig(*config);
@ -1950,52 +1989,9 @@ try
LOG_DEBUG(log, "Loaded metadata.");
/// Init trace collector only after trace_log system table was created
/// Disable it if we collect test coverage information, because it will work extremely slow.
#if !WITH_COVERAGE
/// Profilers cannot work reliably with any other libunwind or without PHDR cache.
if (hasPHDRCache())
{
if (has_trace_collector)
global_context->initializeTraceCollector();
/// Set up server-wide memory profiler (for total memory tracker).
if (server_settings.total_memory_profiler_step)
{
total_memory_tracker.setProfilerStep(server_settings.total_memory_profiler_step);
}
if (server_settings.total_memory_tracker_sample_probability > 0.0)
{
total_memory_tracker.setSampleProbability(server_settings.total_memory_tracker_sample_probability);
}
if (server_settings.total_memory_profiler_sample_min_allocation_size)
{
total_memory_tracker.setSampleMinAllocationSize(server_settings.total_memory_profiler_sample_min_allocation_size);
}
if (server_settings.total_memory_profiler_sample_max_allocation_size)
{
total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size);
}
}
#endif
/// Describe multiple reasons when query profiler cannot work.
#if WITH_COVERAGE
LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they work extremely slow with test coverage.");
#endif
#if defined(SANITIZER)
LOG_INFO(log, "Query Profiler disabled because they cannot work under sanitizers"
" when two different stack unwinding methods will interfere with each other.");
#endif
if (!hasPHDRCache())
LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they require PHDR cache to be created"
" (otherwise the function 'dl_iterate_phdr' is not lock free and not async-signal safe).");
#if defined(OS_LINUX)
auto tasks_stats_provider = TasksStatsCounters::findBestAvailableProvider();
if (tasks_stats_provider == TasksStatsCounters::MetricsProvider::None)

View File

@ -1561,6 +1561,7 @@
<rocksdb>
<options>
<max_background_jobs>8</max_background_jobs>
<info_log_level>DEBUG_LEVEL</info_log_level>
</options>
<column_family_options>
<num_levels>2</num_levels>

View File

@ -1,5 +1,6 @@
#include <Analyzer/ArrayJoinNode.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/Utils.h>
#include <IO/Operators.h>
#include <IO/WriteBuffer.h>
@ -64,7 +65,12 @@ ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const
auto * column_node = array_join_expression->as<ColumnNode>();
if (column_node && column_node->getExpression())
array_join_expression_ast = column_node->getExpression()->toAST(options);
{
if (const auto * function_node = column_node->getExpression()->as<FunctionNode>(); function_node && function_node->getFunctionName() == "nested")
array_join_expression_ast = array_join_expression->toAST(options);
else
array_join_expression_ast = column_node->getExpression()->toAST(options);
}
else
array_join_expression_ast = array_join_expression->toAST(options);

View File

@ -1,5 +1,7 @@
#include <Analyzer/FunctionNode.h>
#include <Columns/ColumnConst.h>
#include <Common/SipHash.h>
#include <Common/FieldVisitorToString.h>
@ -58,12 +60,20 @@ ColumnsWithTypeAndName FunctionNode::getArgumentColumns() const
ColumnWithTypeAndName argument_column;
auto * constant = argument->as<ConstantNode>();
if (isNameOfInFunction(function_name) && i == 1)
{
argument_column.type = std::make_shared<DataTypeSet>();
if (constant)
{
/// Created but not filled for the analysis during function resolution.
FutureSetPtr empty_set;
argument_column.column = ColumnConst::create(ColumnSet::create(1, empty_set), 1);
}
}
else
argument_column.type = argument->getResultType();
auto * constant = argument->as<ConstantNode>();
if (constant && !isNotCreatable(argument_column.type))
argument_column.column = argument_column.type->createColumnConst(1, constant->getValue());

View File

@ -51,7 +51,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<AggregateFunctionsArithmericOperationsVisitor>;
using Base::Base;
void leaveImpl(QueryTreeNodePtr & node)
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_arithmetic_operations_in_aggregate_functions)
return;

View File

@ -551,14 +551,25 @@ private:
in_function->getArguments().getNodes() = std::move(in_arguments);
in_function->resolveAsFunction(in_function_resolver);
DataTypePtr result_type = in_function->getResultType();
const auto * type_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(result_type.get());
if (type_low_cardinality)
result_type = type_low_cardinality->getDictionaryType();
/** For `k :: UInt8`, expression `k = 1 OR k = NULL` with result type Nullable(UInt8)
* is replaced with `k IN (1, NULL)` with result type UInt8.
* Convert it back to Nullable(UInt8).
* And for `k :: LowCardinality(UInt8)`, the transformation of `k IN (1, NULL)` results in type LowCardinality(UInt8).
* Convert it to LowCardinality(Nullable(UInt8)).
*/
if (is_any_nullable && !in_function->getResultType()->isNullable())
if (is_any_nullable && !result_type->isNullable())
{
auto nullable_result_type = std::make_shared<DataTypeNullable>(in_function->getResultType());
auto in_function_nullable = createCastFunction(std::move(in_function), std::move(nullable_result_type), getContext());
DataTypePtr new_result_type = std::make_shared<DataTypeNullable>(result_type);
if (type_low_cardinality)
{
new_result_type = std::make_shared<DataTypeLowCardinality>(new_result_type);
}
auto in_function_nullable = createCastFunction(std::move(in_function), std::move(new_result_type), getContext());
or_operands.push_back(std::move(in_function_nullable));
}
else

View File

@ -22,6 +22,7 @@ public:
if (query_node->hasOrderBy())
{
QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set;
QueryTreeNodes result_nodes;
auto & query_order_by_nodes = query_node->getOrderBy().getNodes();
@ -45,10 +46,9 @@ public:
query_order_by_nodes = std::move(result_nodes);
}
unique_expressions_nodes_set.clear();
if (query_node->hasLimitBy())
{
QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set;
QueryTreeNodes result_nodes;
auto & query_limit_by_nodes = query_node->getLimitBy().getNodes();
@ -63,9 +63,6 @@ public:
query_limit_by_nodes = std::move(result_nodes);
}
}
private:
QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set;
};
}

View File

@ -75,7 +75,12 @@ struct ScopeAliases
if (jt == transitive_aliases.end())
return {};
key = &(getKey(jt->second, find_option));
const auto & new_key = getKey(jt->second, find_option);
/// Ignore potential cyclic aliases.
if (new_key == *key)
return {};
key = &new_key;
it = alias_map.find(*key);
}

View File

@ -289,10 +289,14 @@ void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty)
if (!column_function)
return;
size_t original_size = column.column->size();
if (!empty)
column = column_function->reduce();
else
column.column = column_function->getResultType()->createColumn();
column.column = column_function->getResultType()->createColumnConstWithDefaultValue(original_size)->convertToFullColumnIfConst();
chassert(column.column->size() == original_size);
}
int checkShortCircuitArguments(const ColumnsWithTypeAndName & arguments)

View File

@ -45,14 +45,17 @@ struct ProtocolServerMetrics
};
/** Periodically (by default, each second)
* calculates and updates some metrics,
* that are not updated automatically (so, need to be asynchronously calculated).
* calculates and updates some metrics,
* that are not updated automatically (so, need to be asynchronously calculated).
*
* This includes both ClickHouse-related metrics (like memory usage of ClickHouse process)
* and common OS-related metrics (like total memory usage on the server).
* This includes both general process metrics (like memory usage)
* and common OS-related metrics (like total memory usage on the server).
*
* All the values are either gauge type (like the total number of tables, the current memory usage).
* Or delta-counters representing some accumulation during the interval of time.
*
* Server and Keeper specific metrics are contained inside
* ServerAsynchronousMetrics and KeeperAsynchronousMetrics respectively.
*/
class AsynchronousMetrics
{

View File

@ -15,6 +15,7 @@ struct MemoryTrackerSwitcher
return;
auto * thread_tracker = CurrentThread::getMemoryTracker();
prev_untracked_memory = current_thread->untracked_memory;
prev_memory_tracker_parent = thread_tracker->getParent();
@ -31,8 +32,10 @@ struct MemoryTrackerSwitcher
CurrentThread::flushUntrackedMemory();
auto * thread_tracker = CurrentThread::getMemoryTracker();
current_thread->untracked_memory = prev_untracked_memory;
/// It is important to set untracked memory after the call of
/// 'setParent' because it may flush untracked memory to the wrong parent.
thread_tracker->setParent(prev_memory_tracker_parent);
current_thread->untracked_memory = prev_untracked_memory;
}
private:

View File

@ -1,484 +0,0 @@
#include <Common/NamedCollections/NamedCollectionUtils.h>
#include <Common/escapeForFileName.h>
#include <Common/FieldVisitorToString.h>
#include <Common/logger_useful.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <Parsers/formatAST.h>
#include <Parsers/ASTCreateNamedCollectionQuery.h>
#include <Parsers/ASTAlterNamedCollectionQuery.h>
#include <Parsers/ASTDropNamedCollectionQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <Interpreters/Context.h>
#include <Common/NamedCollections/NamedCollections.h>
#include <Common/NamedCollections/NamedCollectionConfiguration.h>
#include <Common/NamedCollections/NamedCollectionsFactory.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int NAMED_COLLECTION_ALREADY_EXISTS;
extern const int NAMED_COLLECTION_DOESNT_EXIST;
extern const int BAD_ARGUMENTS;
}
namespace NamedCollectionUtils
{
static std::atomic<bool> is_loaded_from_config = false;
static std::atomic<bool> is_loaded_from_sql = false;
class LoadFromConfig
{
private:
const Poco::Util::AbstractConfiguration & config;
public:
explicit LoadFromConfig(const Poco::Util::AbstractConfiguration & config_)
: config(config_) {}
std::vector<std::string> listCollections() const
{
Poco::Util::AbstractConfiguration::Keys collections_names;
config.keys(NAMED_COLLECTIONS_CONFIG_PREFIX, collections_names);
return collections_names;
}
NamedCollectionsMap getAll() const
{
NamedCollectionsMap result;
for (const auto & collection_name : listCollections())
{
if (result.contains(collection_name))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"Found duplicate named collection `{}`",
collection_name);
}
result.emplace(collection_name, get(collection_name));
}
return result;
}
MutableNamedCollectionPtr get(const std::string & collection_name) const
{
const auto collection_prefix = getCollectionPrefix(collection_name);
std::queue<std::string> enumerate_input;
std::set<std::string, std::less<>> enumerate_result;
enumerate_input.push(collection_prefix);
NamedCollectionConfiguration::listKeys(config, std::move(enumerate_input), enumerate_result, -1);
/// Collection does not have any keys.
/// (`enumerate_result` == <collection_path>).
const bool collection_is_empty = enumerate_result.size() == 1
&& *enumerate_result.begin() == collection_prefix;
std::set<std::string, std::less<>> keys;
if (!collection_is_empty)
{
/// Skip collection prefix and add +1 to avoid '.' in the beginning.
for (const auto & path : enumerate_result)
keys.emplace(path.substr(collection_prefix.size() + 1));
}
return NamedCollection::create(
config, collection_name, collection_prefix, keys, SourceId::CONFIG, /* is_mutable */false);
}
private:
static constexpr auto NAMED_COLLECTIONS_CONFIG_PREFIX = "named_collections";
static std::string getCollectionPrefix(const std::string & collection_name)
{
return fmt::format("{}.{}", NAMED_COLLECTIONS_CONFIG_PREFIX, collection_name);
}
};
class LoadFromSQL : private WithContext
{
private:
const std::string metadata_path;
public:
explicit LoadFromSQL(ContextPtr context_)
: WithContext(context_)
, metadata_path(fs::weakly_canonical(context_->getPath()) / NAMED_COLLECTIONS_METADATA_DIRECTORY)
{
if (fs::exists(metadata_path))
cleanup();
}
std::vector<std::string> listCollections() const
{
if (!fs::exists(metadata_path))
return {};
std::vector<std::string> collection_names;
fs::directory_iterator it{metadata_path};
for (; it != fs::directory_iterator{}; ++it)
{
const auto & current_path = it->path();
if (current_path.extension() == ".sql")
{
collection_names.push_back(it->path().stem());
}
else
{
LOG_WARNING(
getLogger("NamedCollectionsLoadFromSQL"),
"Unexpected file {} in named collections directory",
current_path.filename().string());
}
}
return collection_names;
}
NamedCollectionsMap getAll() const
{
NamedCollectionsMap result;
for (const auto & collection_name : listCollections())
{
if (result.contains(collection_name))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"Found duplicate named collection `{}`",
collection_name);
}
result.emplace(collection_name, get(collection_name));
}
return result;
}
MutableNamedCollectionPtr get(const std::string & collection_name) const
{
const auto query = readCreateQueryFromMetadata(
getMetadataPath(collection_name),
getContext()->getSettingsRef());
return createNamedCollectionFromAST(query);
}
MutableNamedCollectionPtr create(const ASTCreateNamedCollectionQuery & query)
{
writeCreateQueryToMetadata(
query,
getMetadataPath(query.collection_name),
getContext()->getSettingsRef());
return createNamedCollectionFromAST(query);
}
void update(const ASTAlterNamedCollectionQuery & query)
{
const auto path = getMetadataPath(query.collection_name);
auto create_query = readCreateQueryFromMetadata(path, getContext()->getSettings());
std::unordered_map<std::string, Field> result_changes_map;
for (const auto & [name, value] : query.changes)
{
auto [it, inserted] = result_changes_map.emplace(name, value);
if (!inserted)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Value with key `{}` is used twice in the SET query (collection name: {})",
name, query.collection_name);
}
}
for (const auto & [name, value] : create_query.changes)
result_changes_map.emplace(name, value);
std::unordered_map<std::string, bool> result_overridability_map;
for (const auto & [name, value] : query.overridability)
result_overridability_map.emplace(name, value);
for (const auto & [name, value] : create_query.overridability)
result_overridability_map.emplace(name, value);
for (const auto & delete_key : query.delete_keys)
{
auto it = result_changes_map.find(delete_key);
if (it == result_changes_map.end())
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Cannot delete key `{}` because it does not exist in collection",
delete_key);
}
else
{
result_changes_map.erase(it);
auto it_override = result_overridability_map.find(delete_key);
if (it_override != result_overridability_map.end())
result_overridability_map.erase(it_override);
}
}
create_query.changes.clear();
for (const auto & [name, value] : result_changes_map)
create_query.changes.emplace_back(name, value);
create_query.overridability = std::move(result_overridability_map);
if (create_query.changes.empty())
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Named collection cannot be empty (collection name: {})",
query.collection_name);
writeCreateQueryToMetadata(
create_query,
getMetadataPath(query.collection_name),
getContext()->getSettingsRef(),
true);
}
void remove(const std::string & collection_name)
{
auto collection_path = getMetadataPath(collection_name);
if (!fs::exists(collection_path))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove collection `{}`, because it doesn't exist",
collection_name);
}
(void)fs::remove(collection_path);
}
private:
static constexpr auto NAMED_COLLECTIONS_METADATA_DIRECTORY = "named_collections";
static MutableNamedCollectionPtr createNamedCollectionFromAST(
const ASTCreateNamedCollectionQuery & query)
{
const auto & collection_name = query.collection_name;
const auto config = NamedCollectionConfiguration::createConfiguration(collection_name, query.changes, query.overridability);
std::set<std::string, std::less<>> keys;
for (const auto & [name, _] : query.changes)
keys.insert(name);
return NamedCollection::create(
*config, collection_name, "", keys, SourceId::SQL, /* is_mutable */true);
}
std::string getMetadataPath(const std::string & collection_name) const
{
return fs::path(metadata_path) / (escapeForFileName(collection_name) + ".sql");
}
/// Delete .tmp files. They could be left undeleted in case of
/// some exception or abrupt server restart.
void cleanup()
{
fs::directory_iterator it{metadata_path};
std::vector<std::string> files_to_remove;
for (; it != fs::directory_iterator{}; ++it)
{
const auto & current_path = it->path();
if (current_path.extension() == ".tmp")
files_to_remove.push_back(current_path);
}
for (const auto & file : files_to_remove)
(void)fs::remove(file);
}
static ASTCreateNamedCollectionQuery readCreateQueryFromMetadata(
const std::string & path,
const Settings & settings)
{
ReadBufferFromFile in(path);
std::string query;
readStringUntilEOF(query, in);
ParserCreateNamedCollectionQuery parser;
auto ast = parseQuery(parser, query, "in file " + path, 0, settings.max_parser_depth, settings.max_parser_backtracks);
const auto & create_query = ast->as<const ASTCreateNamedCollectionQuery &>();
return create_query;
}
void writeCreateQueryToMetadata(
const ASTCreateNamedCollectionQuery & query,
const std::string & path,
const Settings & settings,
bool replace = false) const
{
if (!replace && fs::exists(path))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"Metadata file {} for named collection already exists",
path);
}
fs::create_directories(metadata_path);
auto tmp_path = path + ".tmp";
String formatted_query = serializeAST(query);
WriteBufferFromFile out(tmp_path, formatted_query.size(), O_WRONLY | O_CREAT | O_EXCL);
writeString(formatted_query, out);
out.next();
if (settings.fsync_metadata)
out.sync();
out.close();
fs::rename(tmp_path, path);
}
};
std::unique_lock<std::mutex> lockNamedCollectionsTransaction()
{
static std::mutex transaction_lock;
return std::unique_lock(transaction_lock);
}
void loadFromConfigUnlocked(const Poco::Util::AbstractConfiguration & config, std::unique_lock<std::mutex> &)
{
auto named_collections = LoadFromConfig(config).getAll();
LOG_TRACE(
getLogger("NamedCollectionsUtils"),
"Loaded {} collections from config", named_collections.size());
NamedCollectionFactory::instance().add(std::move(named_collections));
is_loaded_from_config = true;
}
void loadFromConfig(const Poco::Util::AbstractConfiguration & config)
{
auto lock = lockNamedCollectionsTransaction();
loadFromConfigUnlocked(config, lock);
}
void reloadFromConfig(const Poco::Util::AbstractConfiguration & config)
{
auto lock = lockNamedCollectionsTransaction();
auto collections = LoadFromConfig(config).getAll();
auto & instance = NamedCollectionFactory::instance();
instance.removeById(SourceId::CONFIG);
instance.add(collections);
is_loaded_from_config = true;
}
void loadFromSQLUnlocked(ContextPtr context, std::unique_lock<std::mutex> &)
{
auto named_collections = LoadFromSQL(context).getAll();
LOG_TRACE(
getLogger("NamedCollectionsUtils"),
"Loaded {} collections from SQL", named_collections.size());
NamedCollectionFactory::instance().add(std::move(named_collections));
is_loaded_from_sql = true;
}
void loadFromSQL(ContextPtr context)
{
auto lock = lockNamedCollectionsTransaction();
loadFromSQLUnlocked(context, lock);
}
void loadIfNotUnlocked(std::unique_lock<std::mutex> & lock)
{
auto global_context = Context::getGlobalContextInstance();
if (!is_loaded_from_config)
loadFromConfigUnlocked(global_context->getConfigRef(), lock);
if (!is_loaded_from_sql)
loadFromSQLUnlocked(global_context, lock);
}
void loadIfNot()
{
if (is_loaded_from_sql && is_loaded_from_config)
return;
auto lock = lockNamedCollectionsTransaction();
loadIfNotUnlocked(lock);
}
void removeFromSQL(const ASTDropNamedCollectionQuery & query, ContextPtr context)
{
auto lock = lockNamedCollectionsTransaction();
loadIfNotUnlocked(lock);
auto & instance = NamedCollectionFactory::instance();
if (!instance.exists(query.collection_name))
{
if (!query.if_exists)
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove collection `{}`, because it doesn't exist",
query.collection_name);
}
return;
}
LoadFromSQL(context).remove(query.collection_name);
instance.remove(query.collection_name);
}
void createFromSQL(const ASTCreateNamedCollectionQuery & query, ContextPtr context)
{
auto lock = lockNamedCollectionsTransaction();
loadIfNotUnlocked(lock);
auto & instance = NamedCollectionFactory::instance();
if (instance.exists(query.collection_name))
{
if (!query.if_not_exists)
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"A named collection `{}` already exists",
query.collection_name);
}
return;
}
instance.add(query.collection_name, LoadFromSQL(context).create(query));
}
void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr context)
{
auto lock = lockNamedCollectionsTransaction();
loadIfNotUnlocked(lock);
auto & instance = NamedCollectionFactory::instance();
if (!instance.exists(query.collection_name))
{
if (!query.if_exists)
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove collection `{}`, because it doesn't exist",
query.collection_name);
}
return;
}
LoadFromSQL(context).update(query);
auto collection = instance.getMutable(query.collection_name);
auto collection_lock = collection->lock();
for (const auto & [name, value] : query.changes)
{
auto it_override = query.overridability.find(name);
if (it_override != query.overridability.end())
collection->setOrUpdate<String, true>(name, convertFieldToString(value), it_override->second);
else
collection->setOrUpdate<String, true>(name, convertFieldToString(value), {});
}
for (const auto & key : query.delete_keys)
collection->remove<true>(key);
}
}
}

View File

@ -1,42 +0,0 @@
#pragma once
#include <Interpreters/Context_fwd.h>
namespace Poco { namespace Util { class AbstractConfiguration; } }
namespace DB
{
class ASTCreateNamedCollectionQuery;
class ASTAlterNamedCollectionQuery;
class ASTDropNamedCollectionQuery;
namespace NamedCollectionUtils
{
enum class SourceId : uint8_t
{
NONE = 0,
CONFIG = 1,
SQL = 2,
};
void loadFromConfig(const Poco::Util::AbstractConfiguration & config);
void reloadFromConfig(const Poco::Util::AbstractConfiguration & config);
/// Load named collections from `context->getPath() / named_collections /`.
void loadFromSQL(ContextPtr context);
/// Remove collection as well as its metadata from `context->getPath() / named_collections /`.
void removeFromSQL(const ASTDropNamedCollectionQuery & query, ContextPtr context);
/// Create a new collection from AST and put it to `context->getPath() / named_collections /`.
void createFromSQL(const ASTCreateNamedCollectionQuery & query, ContextPtr context);
/// Update definition of already existing collection from AST and update result in `context->getPath() / named_collections /`.
void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr context);
void loadIfNot();
}
}

View File

@ -4,7 +4,6 @@
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Common/NamedCollections/NamedCollectionConfiguration.h>
#include <Common/NamedCollections/NamedCollectionUtils.h>
#include <Poco/Util/AbstractConfiguration.h>
@ -297,7 +296,7 @@ MutableNamedCollectionPtr NamedCollection::duplicate() const
auto impl = pimpl->createCopy(collection_name);
return std::unique_ptr<NamedCollection>(
new NamedCollection(
std::move(impl), collection_name, NamedCollectionUtils::SourceId::NONE, true));
std::move(impl), collection_name, SourceId::NONE, true));
}
NamedCollection::Keys NamedCollection::getKeys(ssize_t depth, const std::string & prefix) const

View File

@ -1,7 +1,6 @@
#pragma once
#include <Interpreters/Context.h>
#include <Common/NamedCollections/NamedCollections_fwd.h>
#include <Common/NamedCollections/NamedCollectionUtils.h>
namespace Poco { namespace Util { class AbstractConfiguration; } }
@ -23,7 +22,12 @@ class NamedCollection
public:
using Key = std::string;
using Keys = std::set<Key, std::less<>>;
using SourceId = NamedCollectionUtils::SourceId;
enum class SourceId : uint8_t
{
NONE = 0,
CONFIG = 1,
SQL = 2,
};
static MutableNamedCollectionPtr create(
const Poco::Util::AbstractConfiguration & config,

View File

@ -1,5 +1,7 @@
#include <Common/NamedCollections/NamedCollectionsFactory.h>
#include <Common/NamedCollections/NamedCollectionUtils.h>
#include <Common/NamedCollections/NamedCollectionConfiguration.h>
#include <Common/NamedCollections/NamedCollectionsMetadataStorage.h>
#include <base/sleep.h>
namespace DB
{
@ -17,23 +19,29 @@ NamedCollectionFactory & NamedCollectionFactory::instance()
return instance;
}
NamedCollectionFactory::~NamedCollectionFactory()
{
shutdown();
}
void NamedCollectionFactory::shutdown()
{
shutdown_called = true;
if (update_task)
update_task->deactivate();
metadata_storage.reset();
}
bool NamedCollectionFactory::exists(const std::string & collection_name) const
{
std::lock_guard lock(mutex);
return existsUnlocked(collection_name, lock);
}
bool NamedCollectionFactory::existsUnlocked(
const std::string & collection_name,
std::lock_guard<std::mutex> & /* lock */) const
{
return loaded_named_collections.contains(collection_name);
return exists(collection_name, lock);
}
NamedCollectionPtr NamedCollectionFactory::get(const std::string & collection_name) const
{
std::lock_guard lock(mutex);
auto collection = tryGetUnlocked(collection_name, lock);
auto collection = tryGet(collection_name, lock);
if (!collection)
{
throw Exception(
@ -47,14 +55,35 @@ NamedCollectionPtr NamedCollectionFactory::get(const std::string & collection_na
NamedCollectionPtr NamedCollectionFactory::tryGet(const std::string & collection_name) const
{
std::lock_guard lock(mutex);
return tryGetUnlocked(collection_name, lock);
return tryGet(collection_name, lock);
}
NamedCollectionsMap NamedCollectionFactory::getAll() const
{
std::lock_guard lock(mutex);
return loaded_named_collections;
}
bool NamedCollectionFactory::exists(const std::string & collection_name, std::lock_guard<std::mutex> &) const
{
return loaded_named_collections.contains(collection_name);
}
MutableNamedCollectionPtr NamedCollectionFactory::tryGet(
const std::string & collection_name,
std::lock_guard<std::mutex> &) const
{
auto it = loaded_named_collections.find(collection_name);
if (it == loaded_named_collections.end())
return nullptr;
return it->second;
}
MutableNamedCollectionPtr NamedCollectionFactory::getMutable(
const std::string & collection_name) const
const std::string & collection_name,
std::lock_guard<std::mutex> & lock) const
{
std::lock_guard lock(mutex);
auto collection = tryGetUnlocked(collection_name, lock);
auto collection = tryGet(collection_name, lock);
if (!collection)
{
throw Exception(
@ -73,35 +102,10 @@ MutableNamedCollectionPtr NamedCollectionFactory::getMutable(
return collection;
}
MutableNamedCollectionPtr NamedCollectionFactory::tryGetUnlocked(
const std::string & collection_name,
std::lock_guard<std::mutex> & /* lock */) const
{
auto it = loaded_named_collections.find(collection_name);
if (it == loaded_named_collections.end())
return nullptr;
return it->second;
}
void NamedCollectionFactory::add(
const std::string & collection_name,
MutableNamedCollectionPtr collection)
{
std::lock_guard lock(mutex);
addUnlocked(collection_name, collection, lock);
}
void NamedCollectionFactory::add(NamedCollectionsMap collections)
{
std::lock_guard lock(mutex);
for (const auto & [collection_name, collection] : collections)
addUnlocked(collection_name, collection, lock);
}
void NamedCollectionFactory::addUnlocked(
const std::string & collection_name,
MutableNamedCollectionPtr collection,
std::lock_guard<std::mutex> & /* lock */)
std::lock_guard<std::mutex> &)
{
auto [it, inserted] = loaded_named_collections.emplace(collection_name, collection);
if (!inserted)
@ -113,10 +117,15 @@ void NamedCollectionFactory::addUnlocked(
}
}
void NamedCollectionFactory::remove(const std::string & collection_name)
void NamedCollectionFactory::add(NamedCollectionsMap collections, std::lock_guard<std::mutex> & lock)
{
std::lock_guard lock(mutex);
bool removed = removeIfExistsUnlocked(collection_name, lock);
for (const auto & [collection_name, collection] : collections)
add(collection_name, collection, lock);
}
void NamedCollectionFactory::remove(const std::string & collection_name, std::lock_guard<std::mutex> & lock)
{
bool removed = removeIfExists(collection_name, lock);
if (!removed)
{
throw Exception(
@ -126,17 +135,11 @@ void NamedCollectionFactory::remove(const std::string & collection_name)
}
}
void NamedCollectionFactory::removeIfExists(const std::string & collection_name)
{
std::lock_guard lock(mutex);
removeIfExistsUnlocked(collection_name, lock); // NOLINT
}
bool NamedCollectionFactory::removeIfExistsUnlocked(
bool NamedCollectionFactory::removeIfExists(
const std::string & collection_name,
std::lock_guard<std::mutex> & lock)
{
auto collection = tryGetUnlocked(collection_name, lock);
auto collection = tryGet(collection_name, lock);
if (!collection)
return false;
@ -152,18 +155,246 @@ bool NamedCollectionFactory::removeIfExistsUnlocked(
return true;
}
void NamedCollectionFactory::removeById(NamedCollectionUtils::SourceId id)
void NamedCollectionFactory::removeById(NamedCollection::SourceId id, std::lock_guard<std::mutex> &)
{
std::lock_guard lock(mutex);
std::erase_if(
loaded_named_collections,
[&](const auto & value) { return value.second->getSourceId() == id; });
}
NamedCollectionsMap NamedCollectionFactory::getAll() const
namespace
{
constexpr auto NAMED_COLLECTIONS_CONFIG_PREFIX = "named_collections";
std::vector<std::string> listCollections(const Poco::Util::AbstractConfiguration & config)
{
Poco::Util::AbstractConfiguration::Keys collections_names;
config.keys(NAMED_COLLECTIONS_CONFIG_PREFIX, collections_names);
return collections_names;
}
MutableNamedCollectionPtr getCollection(
const Poco::Util::AbstractConfiguration & config,
const std::string & collection_name)
{
const auto collection_prefix = fmt::format("{}.{}", NAMED_COLLECTIONS_CONFIG_PREFIX, collection_name);
std::queue<std::string> enumerate_input;
std::set<std::string, std::less<>> enumerate_result;
enumerate_input.push(collection_prefix);
NamedCollectionConfiguration::listKeys(config, std::move(enumerate_input), enumerate_result, -1);
/// Collection does not have any keys. (`enumerate_result` == <collection_path>).
const bool collection_is_empty = enumerate_result.size() == 1
&& *enumerate_result.begin() == collection_prefix;
std::set<std::string, std::less<>> keys;
if (!collection_is_empty)
{
/// Skip collection prefix and add +1 to avoid '.' in the beginning.
for (const auto & path : enumerate_result)
keys.emplace(path.substr(collection_prefix.size() + 1));
}
return NamedCollection::create(
config, collection_name, collection_prefix, keys, NamedCollection::SourceId::CONFIG, /* is_mutable */false);
}
NamedCollectionsMap getNamedCollections(const Poco::Util::AbstractConfiguration & config)
{
NamedCollectionsMap result;
for (const auto & collection_name : listCollections(config))
{
if (result.contains(collection_name))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"Found duplicate named collection `{}`",
collection_name);
}
result.emplace(collection_name, getCollection(config, collection_name));
}
return result;
}
}
void NamedCollectionFactory::loadIfNot()
{
std::lock_guard lock(mutex);
return loaded_named_collections;
loadIfNot(lock);
}
bool NamedCollectionFactory::loadIfNot(std::lock_guard<std::mutex> & lock)
{
if (loaded)
return false;
auto context = Context::getGlobalContextInstance();
metadata_storage = NamedCollectionsMetadataStorage::create(context);
loadFromConfig(context->getConfigRef(), lock);
loadFromSQL(lock);
if (metadata_storage->supportsPeriodicUpdate())
{
update_task = context->getSchedulePool().createTask("NamedCollectionsMetadataStorage", [this]{ updateFunc(); });
update_task->activate();
update_task->schedule();
}
loaded = true;
return true;
}
void NamedCollectionFactory::loadFromConfig(const Poco::Util::AbstractConfiguration & config, std::lock_guard<std::mutex> & lock)
{
auto collections = getNamedCollections(config);
LOG_TEST(log, "Loaded {} collections from config", collections.size());
add(std::move(collections), lock);
}
void NamedCollectionFactory::reloadFromConfig(const Poco::Util::AbstractConfiguration & config)
{
std::lock_guard lock(mutex);
if (loadIfNot(lock))
return;
auto collections = getNamedCollections(config);
LOG_TEST(log, "Loaded {} collections from config", collections.size());
removeById(NamedCollection::SourceId::CONFIG, lock);
add(std::move(collections), lock);
}
void NamedCollectionFactory::loadFromSQL(std::lock_guard<std::mutex> & lock)
{
auto collections = metadata_storage->getAll();
LOG_TEST(log, "Loaded {} collections from sql", collections.size());
add(std::move(collections), lock);
}
void NamedCollectionFactory::createFromSQL(const ASTCreateNamedCollectionQuery & query)
{
std::lock_guard lock(mutex);
loadIfNot(lock);
if (exists(query.collection_name, lock))
{
if (query.if_not_exists)
return;
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"A named collection `{}` already exists",
query.collection_name);
}
add(query.collection_name, metadata_storage->create(query), lock);
}
void NamedCollectionFactory::removeFromSQL(const ASTDropNamedCollectionQuery & query)
{
std::lock_guard lock(mutex);
loadIfNot(lock);
if (!exists(query.collection_name, lock))
{
if (query.if_exists)
return;
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove collection `{}`, because it doesn't exist",
query.collection_name);
}
metadata_storage->remove(query.collection_name);
remove(query.collection_name, lock);
}
void NamedCollectionFactory::updateFromSQL(const ASTAlterNamedCollectionQuery & query)
{
std::lock_guard lock(mutex);
loadIfNot(lock);
if (!exists(query.collection_name, lock))
{
if (query.if_exists)
return;
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove collection `{}`, because it doesn't exist",
query.collection_name);
}
metadata_storage->update(query);
auto collection = getMutable(query.collection_name, lock);
auto collection_lock = collection->lock();
for (const auto & [name, value] : query.changes)
{
auto it_override = query.overridability.find(name);
if (it_override != query.overridability.end())
collection->setOrUpdate<String, true>(name, convertFieldToString(value), it_override->second);
else
collection->setOrUpdate<String, true>(name, convertFieldToString(value), {});
}
for (const auto & key : query.delete_keys)
collection->remove<true>(key);
}
void NamedCollectionFactory::reloadFromSQL()
{
std::lock_guard lock(mutex);
if (loadIfNot(lock))
return;
auto collections = metadata_storage->getAll();
removeById(NamedCollection::SourceId::SQL, lock);
add(std::move(collections), lock);
}
void NamedCollectionFactory::updateFunc()
{
LOG_TRACE(log, "Named collections background updating thread started");
while (!shutdown_called.load())
{
if (metadata_storage->waitUpdate())
{
try
{
reloadFromSQL();
}
catch (const Coordination::Exception & e)
{
if (Coordination::isHardwareError(e.code))
{
LOG_INFO(log, "Lost ZooKeeper connection, will try to connect again: {}",
DB::getCurrentExceptionMessage(true));
sleepForSeconds(1);
}
else
{
tryLogCurrentException(__PRETTY_FUNCTION__);
chassert(false);
}
continue;
}
catch (...)
{
DB::tryLogCurrentException(__PRETTY_FUNCTION__);
chassert(false);
continue;
}
}
}
LOG_TRACE(log, "Named collections background updating thread finished");
}
}

View File

@ -1,58 +1,83 @@
#pragma once
#include <Common/NamedCollections/NamedCollections.h>
#include <Common/NamedCollections/NamedCollectionsMetadataStorage.h>
#include <Common/logger_useful.h>
namespace DB
{
class ASTCreateNamedCollectionQuery;
class ASTDropNamedCollectionQuery;
class ASTAlterNamedCollectionQuery;
class NamedCollectionFactory : boost::noncopyable
{
public:
static NamedCollectionFactory & instance();
~NamedCollectionFactory();
bool exists(const std::string & collection_name) const;
NamedCollectionPtr get(const std::string & collection_name) const;
NamedCollectionPtr tryGet(const std::string & collection_name) const;
MutableNamedCollectionPtr getMutable(const std::string & collection_name) const;
void add(const std::string & collection_name, MutableNamedCollectionPtr collection);
void add(NamedCollectionsMap collections);
void update(NamedCollectionsMap collections);
void remove(const std::string & collection_name);
void removeIfExists(const std::string & collection_name);
void removeById(NamedCollectionUtils::SourceId id);
NamedCollectionsMap getAll() const;
private:
bool existsUnlocked(
const std::string & collection_name,
std::lock_guard<std::mutex> & lock) const;
void reloadFromConfig(const Poco::Util::AbstractConfiguration & config);
MutableNamedCollectionPtr tryGetUnlocked(
const std::string & collection_name,
std::lock_guard<std::mutex> & lock) const;
void reloadFromSQL();
void addUnlocked(
const std::string & collection_name,
MutableNamedCollectionPtr collection,
std::lock_guard<std::mutex> & lock);
void createFromSQL(const ASTCreateNamedCollectionQuery & query);
bool removeIfExistsUnlocked(
const std::string & collection_name,
std::lock_guard<std::mutex> & lock);
void removeFromSQL(const ASTDropNamedCollectionQuery & query);
void updateFromSQL(const ASTAlterNamedCollectionQuery & query);
void loadIfNot();
void shutdown();
protected:
mutable NamedCollectionsMap loaded_named_collections;
mutable std::mutex mutex;
bool is_initialized = false;
const LoggerPtr log = getLogger("NamedCollectionFactory");
bool loaded = false;
std::atomic<bool> shutdown_called = false;
std::unique_ptr<NamedCollectionsMetadataStorage> metadata_storage;
BackgroundSchedulePool::TaskHolder update_task;
bool loadIfNot(std::lock_guard<std::mutex> & lock);
bool exists(
const std::string & collection_name,
std::lock_guard<std::mutex> & lock) const;
MutableNamedCollectionPtr getMutable(const std::string & collection_name, std::lock_guard<std::mutex> & lock) const;
void add(const std::string & collection_name, MutableNamedCollectionPtr collection, std::lock_guard<std::mutex> & lock);
void add(NamedCollectionsMap collections, std::lock_guard<std::mutex> & lock);
void update(NamedCollectionsMap collections, std::lock_guard<std::mutex> & lock);
void remove(const std::string & collection_name, std::lock_guard<std::mutex> & lock);
bool removeIfExists(const std::string & collection_name, std::lock_guard<std::mutex> & lock);
MutableNamedCollectionPtr tryGet(const std::string & collection_name, std::lock_guard<std::mutex> & lock) const;
void removeById(NamedCollection::SourceId id, std::lock_guard<std::mutex> & lock);
void loadFromConfig(
const Poco::Util::AbstractConfiguration & config,
std::lock_guard<std::mutex> & lock);
void loadFromSQL(std::lock_guard<std::mutex> & lock);
void updateFunc();
};
}

View File

@ -0,0 +1,519 @@
#include <Common/NamedCollections/NamedCollectionsMetadataStorage.h>
#include <Common/NamedCollections/NamedCollectionConfiguration.h>
#include <Common/escapeForFileName.h>
#include <Common/logger_useful.h>
#include <Common/ZooKeeper/IKeeper.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/formatAST.h>
#include <Interpreters/Context.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int NAMED_COLLECTION_ALREADY_EXISTS;
extern const int NAMED_COLLECTION_DOESNT_EXIST;
extern const int INVALID_CONFIG_PARAMETER;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
static const std::string named_collections_storage_config_path = "named_collections_storage";
namespace
{
MutableNamedCollectionPtr createNamedCollectionFromAST(const ASTCreateNamedCollectionQuery & query)
{
const auto & collection_name = query.collection_name;
const auto config = NamedCollectionConfiguration::createConfiguration(collection_name, query.changes, query.overridability);
std::set<std::string, std::less<>> keys;
for (const auto & [name, _] : query.changes)
keys.insert(name);
return NamedCollection::create(
*config, collection_name, "", keys, NamedCollection::SourceId::SQL, /* is_mutable */true);
}
std::string getFileName(const std::string & collection_name)
{
return escapeForFileName(collection_name) + ".sql";
}
}
class NamedCollectionsMetadataStorage::INamedCollectionsStorage
{
public:
virtual ~INamedCollectionsStorage() = default;
virtual bool exists(const std::string & path) const = 0;
virtual std::vector<std::string> list() const = 0;
virtual std::string read(const std::string & path) const = 0;
virtual void write(const std::string & path, const std::string & data, bool replace) = 0;
virtual void remove(const std::string & path) = 0;
virtual bool removeIfExists(const std::string & path) = 0;
virtual bool supportsPeriodicUpdate() const = 0;
virtual bool waitUpdate(size_t /* timeout */) { return false; }
};
class NamedCollectionsMetadataStorage::LocalStorage : public INamedCollectionsStorage, private WithContext
{
private:
std::string root_path;
public:
LocalStorage(ContextPtr context_, const std::string & path_)
: WithContext(context_)
, root_path(path_)
{
if (fs::exists(root_path))
cleanup();
}
~LocalStorage() override = default;
bool supportsPeriodicUpdate() const override { return false; }
std::vector<std::string> list() const override
{
if (!fs::exists(root_path))
return {};
std::vector<std::string> elements;
for (fs::directory_iterator it{root_path}; it != fs::directory_iterator{}; ++it)
{
const auto & current_path = it->path();
if (current_path.extension() == ".sql")
{
elements.push_back(it->path());
}
else
{
LOG_WARNING(
getLogger("LocalStorage"),
"Unexpected file {} in named collections directory",
current_path.filename().string());
}
}
return elements;
}
bool exists(const std::string & path) const override
{
return fs::exists(getPath(path));
}
std::string read(const std::string & path) const override
{
ReadBufferFromFile in(getPath(path));
std::string data;
readStringUntilEOF(data, in);
return data;
}
void write(const std::string & path, const std::string & data, bool replace) override
{
if (!replace && fs::exists(path))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"Metadata file {} for named collection already exists",
path);
}
fs::create_directories(root_path);
auto tmp_path = getPath(path + ".tmp");
WriteBufferFromFile out(tmp_path, data.size(), O_WRONLY | O_CREAT | O_EXCL);
writeString(data, out);
out.next();
if (getContext()->getSettingsRef().fsync_metadata)
out.sync();
out.close();
fs::rename(tmp_path, getPath(path));
}
void remove(const std::string & path) override
{
if (!removeIfExists(getPath(path)))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove `{}`, because it doesn't exist", path);
}
}
bool removeIfExists(const std::string & path) override
{
return fs::remove(getPath(path));
}
private:
std::string getPath(const std::string & path) const
{
return fs::path(root_path) / path;
}
/// Delete .tmp files. They could be left undeleted in case of
/// some exception or abrupt server restart.
void cleanup()
{
std::vector<std::string> files_to_remove;
for (fs::directory_iterator it{root_path}; it != fs::directory_iterator{}; ++it)
{
const auto & current_path = it->path();
if (current_path.extension() == ".tmp")
files_to_remove.push_back(current_path);
}
for (const auto & file : files_to_remove)
fs::remove(file);
}
};
class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, private WithContext
{
private:
std::string root_path;
mutable zkutil::ZooKeeperPtr zookeeper_client{nullptr};
mutable zkutil::EventPtr wait_event;
mutable Int32 collections_node_cversion = 0;
public:
ZooKeeperStorage(ContextPtr context_, const std::string & path_)
: WithContext(context_)
, root_path(path_)
{
if (root_path.empty())
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Collections path cannot be empty");
if (root_path != "/" && root_path.back() == '/')
root_path.resize(root_path.size() - 1);
if (root_path.front() != '/')
root_path = "/" + root_path;
auto client = getClient();
if (root_path != "/" && !client->exists(root_path))
{
client->createAncestors(root_path);
client->createIfNotExists(root_path, "");
}
}
~ZooKeeperStorage() override = default;
bool supportsPeriodicUpdate() const override { return true; }
/// Return true if children changed.
bool waitUpdate(size_t timeout) override
{
if (!wait_event)
{
/// We did not yet made any list() attempt, so do that.
return true;
}
if (wait_event->tryWait(timeout))
{
/// Children changed before timeout.
return true;
}
std::string res;
Coordination::Stat stat;
if (!getClient()->tryGet(root_path, res, &stat))
{
/// We do create root_path in constructor of this class,
/// so this case is not really possible.
chassert(false);
return false;
}
return stat.cversion != collections_node_cversion;
}
std::vector<std::string> list() const override
{
if (!wait_event)
wait_event = std::make_shared<Poco::Event>();
Coordination::Stat stat;
auto children = getClient()->getChildren(root_path, &stat, wait_event);
collections_node_cversion = stat.cversion;
return children;
}
bool exists(const std::string & path) const override
{
return getClient()->exists(getPath(path));
}
std::string read(const std::string & path) const override
{
return getClient()->get(getPath(path));
}
void write(const std::string & path, const std::string & data, bool replace) override
{
if (replace)
{
getClient()->createOrUpdate(getPath(path), data, zkutil::CreateMode::Persistent);
}
else
{
auto code = getClient()->tryCreate(getPath(path), data, zkutil::CreateMode::Persistent);
if (code == Coordination::Error::ZNODEEXISTS)
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"Metadata file {} for named collection already exists",
path);
}
}
}
void remove(const std::string & path) override
{
getClient()->remove(getPath(path));
}
bool removeIfExists(const std::string & path) override
{
auto code = getClient()->tryRemove(getPath(path));
if (code == Coordination::Error::ZOK)
return true;
if (code == Coordination::Error::ZNONODE)
return false;
throw Coordination::Exception::fromPath(code, getPath(path));
}
private:
zkutil::ZooKeeperPtr getClient() const
{
if (!zookeeper_client || zookeeper_client->expired())
{
zookeeper_client = getContext()->getZooKeeper();
zookeeper_client->sync(root_path);
}
return zookeeper_client;
}
std::string getPath(const std::string & path) const
{
return fs::path(root_path) / path;
}
};
NamedCollectionsMetadataStorage::NamedCollectionsMetadataStorage(
std::shared_ptr<INamedCollectionsStorage> storage_,
ContextPtr context_)
: WithContext(context_)
, storage(std::move(storage_))
{
}
MutableNamedCollectionPtr NamedCollectionsMetadataStorage::get(const std::string & collection_name) const
{
const auto query = readCreateQuery(collection_name);
return createNamedCollectionFromAST(query);
}
NamedCollectionsMap NamedCollectionsMetadataStorage::getAll() const
{
NamedCollectionsMap result;
for (const auto & collection_name : listCollections())
{
if (result.contains(collection_name))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"Found duplicate named collection `{}`",
collection_name);
}
result.emplace(collection_name, get(collection_name));
}
return result;
}
MutableNamedCollectionPtr NamedCollectionsMetadataStorage::create(const ASTCreateNamedCollectionQuery & query)
{
writeCreateQuery(query);
return createNamedCollectionFromAST(query);
}
void NamedCollectionsMetadataStorage::remove(const std::string & collection_name)
{
storage->remove(getFileName(collection_name));
}
bool NamedCollectionsMetadataStorage::removeIfExists(const std::string & collection_name)
{
return storage->removeIfExists(getFileName(collection_name));
}
void NamedCollectionsMetadataStorage::update(const ASTAlterNamedCollectionQuery & query)
{
auto create_query = readCreateQuery(query.collection_name);
std::unordered_map<std::string, Field> result_changes_map;
for (const auto & [name, value] : query.changes)
{
auto [it, inserted] = result_changes_map.emplace(name, value);
if (!inserted)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Value with key `{}` is used twice in the SET query (collection name: {})",
name, query.collection_name);
}
}
for (const auto & [name, value] : create_query.changes)
result_changes_map.emplace(name, value);
std::unordered_map<std::string, bool> result_overridability_map;
for (const auto & [name, value] : query.overridability)
result_overridability_map.emplace(name, value);
for (const auto & [name, value] : create_query.overridability)
result_overridability_map.emplace(name, value);
for (const auto & delete_key : query.delete_keys)
{
auto it = result_changes_map.find(delete_key);
if (it == result_changes_map.end())
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Cannot delete key `{}` because it does not exist in collection",
delete_key);
}
else
{
result_changes_map.erase(it);
auto it_override = result_overridability_map.find(delete_key);
if (it_override != result_overridability_map.end())
result_overridability_map.erase(it_override);
}
}
create_query.changes.clear();
for (const auto & [name, value] : result_changes_map)
create_query.changes.emplace_back(name, value);
create_query.overridability = std::move(result_overridability_map);
if (create_query.changes.empty())
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Named collection cannot be empty (collection name: {})",
query.collection_name);
chassert(create_query.collection_name == query.collection_name);
writeCreateQuery(create_query, true);
}
std::vector<std::string> NamedCollectionsMetadataStorage::listCollections() const
{
auto paths = storage->list();
std::vector<std::string> collections;
collections.reserve(paths.size());
for (const auto & path : paths)
collections.push_back(std::filesystem::path(path).stem());
return collections;
}
ASTCreateNamedCollectionQuery NamedCollectionsMetadataStorage::readCreateQuery(const std::string & collection_name) const
{
const auto path = getFileName(collection_name);
auto query = storage->read(path);
const auto & settings = getContext()->getSettingsRef();
ParserCreateNamedCollectionQuery parser;
auto ast = parseQuery(parser, query, "in file " + path, 0, settings.max_parser_depth, settings.max_parser_backtracks);
const auto & create_query = ast->as<const ASTCreateNamedCollectionQuery &>();
return create_query;
}
void NamedCollectionsMetadataStorage::writeCreateQuery(const ASTCreateNamedCollectionQuery & query, bool replace)
{
auto normalized_query = query.clone();
auto & changes = typeid_cast<ASTCreateNamedCollectionQuery *>(normalized_query.get())->changes;
::sort(
changes.begin(), changes.end(),
[](const SettingChange & lhs, const SettingChange & rhs) { return lhs.name < rhs.name; });
storage->write(getFileName(query.collection_name), serializeAST(*normalized_query), replace);
}
bool NamedCollectionsMetadataStorage::supportsPeriodicUpdate() const
{
return storage->supportsPeriodicUpdate();
}
bool NamedCollectionsMetadataStorage::waitUpdate()
{
if (!storage->supportsPeriodicUpdate())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Periodic updates are not supported");
const auto & config = Context::getGlobalContextInstance()->getConfigRef();
const size_t timeout = config.getUInt(named_collections_storage_config_path + ".update_timeout_ms", 5000);
return storage->waitUpdate(timeout);
}
std::unique_ptr<NamedCollectionsMetadataStorage> NamedCollectionsMetadataStorage::create(const ContextPtr & context_)
{
const auto & config = context_->getConfigRef();
const auto storage_type = config.getString(named_collections_storage_config_path + ".type", "local");
if (storage_type == "local")
{
const auto path = config.getString(
named_collections_storage_config_path + ".path",
std::filesystem::path(context_->getPath()) / "named_collections");
LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"),
"Using local storage for named collections at path: {}", path);
auto local_storage = std::make_unique<NamedCollectionsMetadataStorage::LocalStorage>(context_, path);
return std::unique_ptr<NamedCollectionsMetadataStorage>(
new NamedCollectionsMetadataStorage(std::move(local_storage), context_));
}
if (storage_type == "zookeeper" || storage_type == "keeper")
{
const auto path = config.getString(named_collections_storage_config_path + ".path");
auto zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorage>(context_, path);
LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"),
"Using zookeeper storage for named collections at path: {}", path);
return std::unique_ptr<NamedCollectionsMetadataStorage>(
new NamedCollectionsMetadataStorage(std::move(zk_storage), context_));
}
throw Exception(
ErrorCodes::INVALID_CONFIG_PARAMETER,
"Unknown storage for named collections: {}", storage_type);
}
}

View File

@ -0,0 +1,52 @@
#pragma once
#include <Parsers/ASTCreateNamedCollectionQuery.h>
#include <Parsers/ASTAlterNamedCollectionQuery.h>
#include <Parsers/ASTDropNamedCollectionQuery.h>
#include <Common/NamedCollections/NamedCollections.h>
#include <Core/BackgroundSchedulePool.h>
namespace DB
{
class NamedCollectionsMetadataStorage : private WithContext
{
public:
static std::unique_ptr<NamedCollectionsMetadataStorage> create(const ContextPtr & context);
NamedCollectionsMap getAll() const;
MutableNamedCollectionPtr get(const std::string & collection_name) const;
MutableNamedCollectionPtr create(const ASTCreateNamedCollectionQuery & query);
void remove(const std::string & collection_name);
bool removeIfExists(const std::string & collection_name);
void update(const ASTAlterNamedCollectionQuery & query);
void shutdown();
/// Return true if update was made
bool waitUpdate();
bool supportsPeriodicUpdate() const;
private:
class INamedCollectionsStorage;
class LocalStorage;
class ZooKeeperStorage;
std::shared_ptr<INamedCollectionsStorage> storage;
NamedCollectionsMetadataStorage(std::shared_ptr<INamedCollectionsStorage> storage_, ContextPtr context_);
std::vector<std::string> listCollections() const;
ASTCreateNamedCollectionQuery readCreateQuery(const std::string & collection_name) const;
void writeCreateQuery(const ASTCreateNamedCollectionQuery & query, bool replace = false);
};
}

View File

@ -228,9 +228,9 @@ void Timer::cleanup()
#endif
template <typename ProfilerImpl>
QueryProfilerBase<ProfilerImpl>::QueryProfilerBase([[maybe_unused]] UInt64 thread_id, [[maybe_unused]] int clock_type, [[maybe_unused]] UInt32 period, [[maybe_unused]] int pause_signal_)
: log(getLogger("QueryProfiler"))
, pause_signal(pause_signal_)
QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(
[[maybe_unused]] UInt64 thread_id, [[maybe_unused]] int clock_type, [[maybe_unused]] UInt32 period, [[maybe_unused]] int pause_signal_)
: log(getLogger("QueryProfiler")), pause_signal(pause_signal_)
{
#if defined(SANITIZER)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler disabled because they cannot work under sanitizers");

View File

@ -140,6 +140,18 @@ inline bool isPrintableASCII(char c)
return uc >= 32 && uc <= 126; /// 127 is ASCII DEL.
}
inline bool isCSIParameterByte(char c)
{
uint8_t uc = c;
return uc >= 0x30 && uc <= 0x3F; /// ASCII 09:;<=>?
}
inline bool isCSIIntermediateByte(char c)
{
uint8_t uc = c;
return uc >= 0x20 && uc <= 0x2F; /// ASCII !"#$%&'()*+,-./
}
inline bool isCSIFinalByte(char c)
{
uint8_t uc = c;

View File

@ -103,7 +103,7 @@ template <ComputeWidthMode mode>
size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t limit) noexcept
{
UTF8Decoder decoder;
int isEscapeSequence = false;
bool is_escape_sequence = false;
size_t width = 0;
size_t rollback = 0;
for (size_t i = 0; i < size; ++i)
@ -116,6 +116,9 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
while (i + 15 < size)
{
if (is_escape_sequence)
break;
__m128i bytes = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&data[i]));
const uint16_t non_regular_width_mask = _mm_movemask_epi8(
@ -132,25 +135,28 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
}
else
{
if (isEscapeSequence)
{
break;
}
else
{
i += 16;
width += 16;
}
i += 16;
width += 16;
}
}
#endif
while (i < size && isPrintableASCII(data[i]))
{
if (!isEscapeSequence)
bool ignore_width = is_escape_sequence && (isCSIParameterByte(data[i]) || isCSIIntermediateByte(data[i]));
if (ignore_width || (data[i] == '[' && is_escape_sequence))
{
/// don't count the width
}
else if (is_escape_sequence && isCSIFinalByte(data[i]))
{
is_escape_sequence = false;
}
else
{
++width;
else if (isCSIFinalByte(data[i]) && data[i - 1] != '\x1b')
isEscapeSequence = false; /// end of CSI escape sequence reached
}
++i;
}
@ -178,7 +184,7 @@ size_t computeWidthImpl(const UInt8 * data, size_t size, size_t prefix, size_t l
// special treatment for '\t' and for ESC
size_t next_width = width;
if (decoder.codepoint == '\x1b')
isEscapeSequence = true;
is_escape_sequence = true;
else if (decoder.codepoint == '\t')
next_width += 8 - (prefix + width) % 8;
else

View File

@ -1,12 +1,40 @@
#include <Common/tests/gtest_global_context.h>
#include <Common/NamedCollections/NamedCollectionsFactory.h>
#include <Common/NamedCollections/NamedCollectionUtils.h>
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/DOM/DOMParser.h>
#include <gtest/gtest.h>
using namespace DB;
/// A class which allows to test private methods of NamedCollectionFactory.
class NamedCollectionFactoryFriend : public NamedCollectionFactory
{
public:
static NamedCollectionFactoryFriend & instance()
{
static NamedCollectionFactoryFriend instance;
return instance;
}
void loadFromConfig(const Poco::Util::AbstractConfiguration & config)
{
std::lock_guard lock(mutex);
NamedCollectionFactory::loadFromConfig(config, lock);
}
void add(const std::string & collection_name, MutableNamedCollectionPtr collection)
{
std::lock_guard lock(mutex);
NamedCollectionFactory::add(collection_name, collection, lock);
}
void remove(const std::string & collection_name)
{
std::lock_guard lock(mutex);
NamedCollectionFactory::remove(collection_name, lock);
}
};
TEST(NamedCollections, SimpleConfig)
{
std::string xml(R"CONFIG(<clickhouse>
@ -29,13 +57,13 @@ TEST(NamedCollections, SimpleConfig)
Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
NamedCollectionUtils::loadFromConfig(*config);
NamedCollectionFactoryFriend::instance().loadFromConfig(*config);
ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection1"));
ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection2"));
ASSERT_TRUE(NamedCollectionFactory::instance().tryGet("collection3") == nullptr);
ASSERT_TRUE(NamedCollectionFactoryFriend::instance().exists("collection1"));
ASSERT_TRUE(NamedCollectionFactoryFriend::instance().exists("collection2"));
ASSERT_TRUE(NamedCollectionFactoryFriend::instance().tryGet("collection3") == nullptr);
auto collections = NamedCollectionFactory::instance().getAll();
auto collections = NamedCollectionFactoryFriend::instance().getAll();
ASSERT_EQ(collections.size(), 2);
ASSERT_TRUE(collections.contains("collection1"));
ASSERT_TRUE(collections.contains("collection2"));
@ -47,7 +75,7 @@ key3: 3.3
key4: -4
)CONFIG");
auto collection1 = NamedCollectionFactory::instance().get("collection1");
auto collection1 = NamedCollectionFactoryFriend::instance().get("collection1");
ASSERT_TRUE(collection1 != nullptr);
ASSERT_TRUE(collection1->get<String>("key1") == "value1");
@ -61,7 +89,7 @@ key5: 5
key6: 6.6
)CONFIG");
auto collection2 = NamedCollectionFactory::instance().get("collection2");
auto collection2 = NamedCollectionFactoryFriend::instance().get("collection2");
ASSERT_TRUE(collection2 != nullptr);
ASSERT_TRUE(collection2->get<String>("key4") == "value4");
@ -69,9 +97,9 @@ key6: 6.6
ASSERT_TRUE(collection2->get<Float64>("key6") == 6.6);
auto collection2_copy = collections["collection2"]->duplicate();
NamedCollectionFactory::instance().add("collection2_copy", collection2_copy);
ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection2_copy"));
ASSERT_EQ(NamedCollectionFactory::instance().get("collection2_copy")->dumpStructure(),
NamedCollectionFactoryFriend::instance().add("collection2_copy", collection2_copy);
ASSERT_TRUE(NamedCollectionFactoryFriend::instance().exists("collection2_copy"));
ASSERT_EQ(NamedCollectionFactoryFriend::instance().get("collection2_copy")->dumpStructure(),
R"CONFIG(key4: value4
key5: 5
key6: 6.6
@ -88,8 +116,8 @@ key6: 6.6
collection2_copy->setOrUpdate<String>("key4", "value45", {});
ASSERT_EQ(collection2_copy->getOrDefault<String>("key4", "N"), "value45");
NamedCollectionFactory::instance().remove("collection2_copy");
ASSERT_FALSE(NamedCollectionFactory::instance().exists("collection2_copy"));
NamedCollectionFactoryFriend::instance().remove("collection2_copy");
ASSERT_FALSE(NamedCollectionFactoryFriend::instance().exists("collection2_copy"));
config.reset();
}
@ -119,11 +147,11 @@ TEST(NamedCollections, NestedConfig)
Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
NamedCollectionUtils::loadFromConfig(*config);
NamedCollectionFactoryFriend::instance().loadFromConfig(*config);
ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection3"));
ASSERT_TRUE(NamedCollectionFactoryFriend::instance().exists("collection3"));
auto collection = NamedCollectionFactory::instance().get("collection3");
auto collection = NamedCollectionFactoryFriend::instance().get("collection3");
ASSERT_TRUE(collection != nullptr);
ASSERT_EQ(collection->dumpStructure(),
@ -171,8 +199,8 @@ TEST(NamedCollections, NestedConfigDuplicateKeys)
Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
NamedCollectionUtils::loadFromConfig(*config);
auto collection = NamedCollectionFactory::instance().get("collection");
NamedCollectionFactoryFriend::instance().loadFromConfig(*config);
auto collection = NamedCollectionFactoryFriend::instance().get("collection");
auto keys = collection->getKeys();
ASSERT_EQ(keys.size(), 6);

View File

@ -188,6 +188,18 @@ NamesAndTypesList NamesAndTypesList::filter(const Names & names) const
return filter(NameSet(names.begin(), names.end()));
}
NamesAndTypesList NamesAndTypesList::eraseNames(const NameSet & names) const
{
NamesAndTypesList res;
for (const auto & column : *this)
{
if (!names.contains(column.name))
res.push_back(column);
}
return res;
}
NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const
{
/// NOTE: It's better to make a map in `IStorage` than to create it here every time again.

View File

@ -111,6 +111,9 @@ public:
/// Leave only the columns whose names are in the `names`. In `names` there can be superfluous columns.
NamesAndTypesList filter(const Names & names) const;
/// Leave only the columns whose names are not in the `names`.
NamesAndTypesList eraseNames(const NameSet & names) const;
/// Unlike `filter`, returns columns in the order in which they go in `names`.
NamesAndTypesList addTypes(const Names & names) const;

View File

@ -160,8 +160,8 @@ class IColumn;
M(Bool, enable_multiple_prewhere_read_steps, true, "Move more conditions from WHERE to PREWHERE and do reads from disk and filtering in multiple steps if there are multiple conditions combined with AND", 0) \
M(Bool, move_primary_key_columns_to_end_of_prewhere, true, "Move PREWHERE conditions containing primary key columns to the end of AND chain. It is likely that these conditions are taken into account during primary key analysis and thus will not contribute a lot to PREWHERE filtering.", 0) \
\
M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) \
M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) \
M(Bool, allow_statistics_optimize, false, "Allows using statistics to optimize queries", 0) ALIAS(allow_statistic_optimize) \
M(Bool, allow_experimental_statistics, false, "Allows using statistics", 0) ALIAS(allow_experimental_statistic) \
\
M(UInt64, alter_sync, 1, "Wait for actions to manipulate the partitions. 0 - do not wait, 1 - wait for execution only of itself, 2 - wait for everyone.", 0) ALIAS(replication_alter_partitions_sync) \
M(Int64, replication_wait_for_inactive_replica_timeout, 120, "Wait for inactive replica to execute ALTER/OPTIMIZE. Time in seconds, 0 - do not wait, negative - wait for unlimited time.", 0) \
@ -202,6 +202,8 @@ class IColumn;
M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \
M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \
M(ParallelReplicasCustomKeyFilterType, parallel_replicas_custom_key_filter_type, ParallelReplicasCustomKeyFilterType::DEFAULT, "Type of filter to use with custom key for parallel replicas. default - use modulo operation on the custom key, range - use range filter on custom key using all possible values for the value type of custom key.", 0) \
M(UInt64, parallel_replicas_custom_key_range_lower, 0, "Lower bound for the universe that the parallel replicas custom range filter is calculated over", 0) \
M(UInt64, parallel_replicas_custom_key_range_upper, 0, "Upper bound for the universe that the parallel replicas custom range filter is calculated over. A value of 0 disables the upper bound, setting it to the max value of the custom key expression", 0) \
\
M(String, cluster_for_parallel_replicas, "", "Cluster for a shard in which current server is located", 0) \
M(UInt64, allow_experimental_parallel_reading_from_replicas, 0, "Use all the replicas from a shard for SELECT query execution. Reading is parallelized and coordinated dynamically. 0 - disabled, 1 - enabled, silently disable them in case of failure, 2 - enabled, throw an exception in case of failure", 0) \
@ -332,7 +334,7 @@ class IColumn;
M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \
\
M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \
M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", IMPORTANT) \
M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \
\
M(JoinStrictness, join_default_strictness, JoinStrictness::All, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \
M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \
@ -891,6 +893,7 @@ class IColumn;
M(Bool, geo_distance_returns_float64_on_float64_arguments, true, "If all four arguments to `geoDistance`, `greatCircleDistance`, `greatCircleAngle` functions are Float64, return Float64 and use double precision for internal calculations. In previous ClickHouse versions, the functions always returned Float32.", 0) \
M(Bool, allow_get_client_http_header, false, "Allow to use the function `getClientHTTPHeader` which lets to obtain a value of an the current HTTP request's header. It is not enabled by default for security reasons, because some headers, such as `Cookie`, could contain sensitive info. Note that the `X-ClickHouse-*` and `Authentication` headers are always restricted and cannot be obtained with this function.", 0) \
M(Bool, cast_string_to_dynamic_use_inference, false, "Use types inference during String to Dynamic conversion", 0) \
M(Bool, enable_blob_storage_log, true, "Write information about blob storage operations to system.blob_storage_log table", 0) \
\
/** Experimental functions */ \
M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
@ -1056,7 +1059,8 @@ class IColumn;
M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \
M(UInt64, input_format_parquet_max_block_size, 8192, "Max block size for parquet reader.", 0) \
M(UInt64, input_format_parquet_max_block_size, DEFAULT_BLOCK_SIZE, "Max block size for parquet reader.", 0) \
M(UInt64, input_format_parquet_prefer_block_bytes, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader", 0) \
M(Bool, input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format Protobuf", 0) \
M(Bool, input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format CapnProto", 0) \
M(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format ORC", 0) \

View File

@ -83,7 +83,7 @@ namespace SettingsChangesHistory
/// For newly added setting choose the most appropriate previous_value (for example, if new setting
/// controls new feature and it's 'true' by default, use 'false' as previous_value).
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
{"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},
@ -96,6 +96,15 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
{"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
{"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
{"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."},
{"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."},
{"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"},
{"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."},
{"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."},
{"allow_statistics_optimize", false, false, "The setting was renamed. The previous name is `allow_statistic_optimize`."},
{"allow_experimental_statistics", false, false, "The setting was renamed. The previous name is `allow_experimental_statistic`."},
{"parallel_replicas_custom_key_range_lower", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards"},
{"parallel_replicas_custom_key_range_upper", 0, 0, "Add settings to control the range filter when using parallel replicas with dynamic shards. A value of 0 disables the upper limit"},
}},
{"24.5", {{"allow_deprecated_error_prone_window_functions", true, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)"},
{"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},

View File

@ -17,6 +17,13 @@ void registerDataTypeDomainGeo(DataTypeFactory & factory)
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypePointName>()));
});
// Custom type for simple line which consists from several segments.
factory.registerSimpleDataTypeCustom("LineString", []
{
return std::make_pair(DataTypeFactory::instance().get("Array(Point)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeLineStringName>()));
});
// Custom type for simple polygon without holes stored as Array(Point)
factory.registerSimpleDataTypeCustom("Ring", []
{

View File

@ -11,6 +11,12 @@ public:
DataTypePointName() : DataTypeCustomFixedName("Point") {}
};
class DataTypeLineStringName : public DataTypeCustomFixedName
{
public:
DataTypeLineStringName() : DataTypeCustomFixedName("LineString") {}
};
class DataTypeRingName : public DataTypeCustomFixedName
{
public:

View File

@ -3,6 +3,7 @@
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/Serializations/SerializationNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeVariant.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnConst.h>
#include <Core/Field.h>
@ -174,4 +175,9 @@ DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type)
}
bool canContainNull(const IDataType & type)
{
return type.isNullable() || type.isLowCardinalityNullable() || isDynamic(type) || isVariant(type);
}
}

View File

@ -62,4 +62,6 @@ DataTypePtr makeNullableOrLowCardinalityNullableSafe(const DataTypePtr & type);
/// Nullable(T) -> T, LowCardinality(Nullable(T)) -> T
DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type);
bool canContainNull(const IDataType & type);
}

View File

@ -543,6 +543,7 @@ template <typename DataType> constexpr bool IsDataTypeNumber = false;
template <typename DataType> constexpr bool IsDataTypeDateOrDateTime = false;
template <typename DataType> constexpr bool IsDataTypeDate = false;
template <typename DataType> constexpr bool IsDataTypeEnum = false;
template <typename DataType> constexpr bool IsDataTypeStringOrFixedString = false;
template <typename DataType> constexpr bool IsDataTypeDecimalOrNumber = IsDataTypeDecimal<DataType> || IsDataTypeNumber<DataType>;
@ -556,6 +557,8 @@ class DataTypeDate;
class DataTypeDate32;
class DataTypeDateTime;
class DataTypeDateTime64;
class DataTypeString;
class DataTypeFixedString;
template <is_decimal T> constexpr bool IsDataTypeDecimal<DataTypeDecimal<T>> = true;
@ -572,6 +575,9 @@ template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDate32> = tru
template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDateTime> = true;
template <> inline constexpr bool IsDataTypeDateOrDateTime<DataTypeDateTime64> = true;
template <> inline constexpr bool IsDataTypeStringOrFixedString<DataTypeString> = true;
template <> inline constexpr bool IsDataTypeStringOrFixedString<DataTypeFixedString> = true;
template <typename T>
class DataTypeEnum;

View File

@ -1,20 +1,21 @@
#include <filesystem>
#include <Databases/DatabaseAtomic.h>
#include <Databases/DatabaseFactory.h>
#include <Databases/DatabaseOnDisk.h>
#include <Databases/DatabaseReplicated.h>
#include <Databases/DatabaseFactory.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromFile.h>
#include <Interpreters/Context.h>
#include <Interpreters/DDLTask.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <Parsers/formatAST.h>
#include <Storages/StorageMaterializedView.h>
#include "Common/logger_useful.h"
#include <Common/PoolId.h>
#include <Common/atomicRename.h>
#include <Common/filesystemHelpers.h>
#include <Storages/StorageMaterializedView.h>
#include <Interpreters/Context.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/ExternalDictionariesLoader.h>
#include <filesystem>
#include <Interpreters/DDLTask.h>
namespace fs = std::filesystem;
@ -393,6 +394,7 @@ DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables()
{
DetachedTables not_in_use;
auto it = detached_tables.begin();
LOG_DEBUG(log, "There are {} detached tables. Start searching non used tables.", detached_tables.size());
while (it != detached_tables.end())
{
if (it->second.unique())
@ -403,6 +405,7 @@ DatabaseAtomic::DetachedTables DatabaseAtomic::cleanupDetachedTables()
else
++it;
}
LOG_DEBUG(log, "Found {} non used tables in detached tables.", not_in_use.size());
/// It should be destroyed in caller with released database mutex
return not_in_use;
}

View File

@ -670,7 +670,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat
for (auto it = metadata_files.begin(); it < metadata_files.end(); std::advance(it, batch_size))
{
std::span batch{it, std::min(std::next(it, batch_size), metadata_files.end())};
pool.scheduleOrThrowOnError(
pool.scheduleOrThrow(
[batch, &process_metadata_file, &process_tmp_drop_metadata_file]() mutable
{
setThreadName("DatabaseOnDisk");
@ -679,7 +679,7 @@ void DatabaseOnDisk::iterateMetadataFiles(ContextPtr local_context, const Iterat
process_metadata_file(file.first);
else
process_tmp_drop_metadata_file(file.first);
});
}, Priority{}, getContext()->getSettingsRef().lock_acquire_timeout.totalMicroseconds());
}
pool.wait();
}
@ -794,7 +794,7 @@ ASTPtr DatabaseOnDisk::getCreateQueryFromStorage(const String & table_name, cons
throw_on_error);
create_table_query->set(create_table_query->as<ASTCreateQuery>()->comment,
std::make_shared<ASTLiteral>("SYSTEM TABLE is built on the fly."));
std::make_shared<ASTLiteral>(storage->getInMemoryMetadata().comment));
return create_table_query;
}

View File

@ -44,6 +44,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int UNKNOWN_DATABASE_ENGINE;
extern const int NOT_IMPLEMENTED;
extern const int UNEXPECTED_NODE_IN_ZOOKEEPER;
}
static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768;
@ -76,6 +77,20 @@ static void setReplicatedEngine(ASTCreateQuery * create_query, ContextPtr contex
String replica_path = server_settings.default_replica_path;
String replica_name = server_settings.default_replica_name;
/// Check that replica path doesn't exist
Macros::MacroExpansionInfo info;
StorageID table_id = StorageID(create_query->getDatabase(), create_query->getTable(), create_query->uuid);
info.table_id = table_id;
info.expand_special_macros_only = false;
String zookeeper_path = context->getMacros()->expand(replica_path, info);
if (context->getZooKeeper()->exists(zookeeper_path))
throw Exception(
ErrorCodes::UNEXPECTED_NODE_IN_ZOOKEEPER,
"Found existing ZooKeeper path {} while trying to convert table {} to replicated. Table will not be converted.",
zookeeper_path, backQuote(table_id.getFullTableName())
);
auto args = std::make_shared<ASTExpressionList>();
args->children.push_back(std::make_shared<ASTLiteral>(replica_path));
args->children.push_back(std::make_shared<ASTLiteral>(replica_name));

View File

@ -122,6 +122,13 @@ DatabaseReplicated::DatabaseReplicated(
fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef());
replica_group_name = context_->getConfigRef().getString("replica_group_name", "");
if (!replica_group_name.empty() && database_name.starts_with(DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX))
{
context_->addWarningMessage(fmt::format("There's a Replicated database with a name starting from '{}', "
"and replica_group_name is configured. It may cause collisions in cluster names.",
ALL_GROUPS_CLUSTER_PREFIX));
}
}
String DatabaseReplicated::getFullReplicaName(const String & shard, const String & replica)
@ -173,13 +180,40 @@ ClusterPtr DatabaseReplicated::tryGetCluster() const
return cluster;
}
void DatabaseReplicated::setCluster(ClusterPtr && new_cluster)
ClusterPtr DatabaseReplicated::tryGetAllGroupsCluster() const
{
std::lock_guard lock{mutex};
cluster = std::move(new_cluster);
if (replica_group_name.empty())
return nullptr;
if (cluster_all_groups)
return cluster_all_groups;
/// Database is probably not created or not initialized yet, it's ok to return nullptr
if (is_readonly)
return cluster_all_groups;
try
{
cluster_all_groups = getClusterImpl(/*all_groups*/ true);
}
catch (...)
{
tryLogCurrentException(log);
}
return cluster_all_groups;
}
ClusterPtr DatabaseReplicated::getClusterImpl() const
void DatabaseReplicated::setCluster(ClusterPtr && new_cluster, bool all_groups)
{
std::lock_guard lock{mutex};
if (all_groups)
cluster_all_groups = std::move(new_cluster);
else
cluster = std::move(new_cluster);
}
ClusterPtr DatabaseReplicated::getClusterImpl(bool all_groups) const
{
Strings unfiltered_hosts;
Strings hosts;
@ -199,17 +233,24 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
"It's possible if the first replica is not fully created yet "
"or if the last replica was just dropped or due to logical error", zookeeper_path);
hosts.clear();
std::vector<String> paths;
for (const auto & host : unfiltered_hosts)
paths.push_back(zookeeper_path + "/replicas/" + host + "/replica_group");
auto replica_groups = zookeeper->tryGet(paths);
for (size_t i = 0; i < paths.size(); ++i)
if (all_groups)
{
if (replica_groups[i].data == replica_group_name)
hosts.push_back(unfiltered_hosts[i]);
hosts = unfiltered_hosts;
}
else
{
hosts.clear();
std::vector<String> paths;
for (const auto & host : unfiltered_hosts)
paths.push_back(zookeeper_path + "/replicas/" + host + "/replica_group");
auto replica_groups = zookeeper->tryGet(paths);
for (size_t i = 0; i < paths.size(); ++i)
{
if (replica_groups[i].data == replica_group_name)
hosts.push_back(unfiltered_hosts[i]);
}
}
Int32 cversion = stat.cversion;
@ -274,6 +315,11 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
bool treat_local_as_remote = false;
bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL;
String cluster_name = TSA_SUPPRESS_WARNING_FOR_READ(database_name); /// FIXME
if (all_groups)
cluster_name = ALL_GROUPS_CLUSTER_PREFIX + cluster_name;
ClusterConnectionParameters params{
cluster_auth_info.cluster_username,
cluster_auth_info.cluster_password,
@ -282,7 +328,7 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
treat_local_port_as_remote,
cluster_auth_info.cluster_secure_connection,
Priority{1},
TSA_SUPPRESS_WARNING_FOR_READ(database_name), /// FIXME
cluster_name,
cluster_auth_info.cluster_secret};
return std::make_shared<Cluster>(getContext()->getSettingsRef(), shards, params);

View File

@ -20,6 +20,8 @@ using ClusterPtr = std::shared_ptr<Cluster>;
class DatabaseReplicated : public DatabaseAtomic
{
public:
static constexpr auto ALL_GROUPS_CLUSTER_PREFIX = "all_groups.";
DatabaseReplicated(const String & name_, const String & metadata_path_, UUID uuid,
const String & zookeeper_path_, const String & shard_name_, const String & replica_name_,
DatabaseReplicatedSettings db_settings_,
@ -65,6 +67,7 @@ public:
/// Returns cluster consisting of database replicas
ClusterPtr tryGetCluster() const;
ClusterPtr tryGetAllGroupsCluster() const;
void drop(ContextPtr /*context*/) override;
@ -113,8 +116,8 @@ private:
ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query);
String readMetadataFile(const String & table_name) const;
ClusterPtr getClusterImpl() const;
void setCluster(ClusterPtr && new_cluster);
ClusterPtr getClusterImpl(bool all_groups = false) const;
void setCluster(ClusterPtr && new_cluster, bool all_groups = false);
void createEmptyLogEntry(const ZooKeeperPtr & current_zookeeper);
@ -155,6 +158,7 @@ private:
UInt64 tables_metadata_digest TSA_GUARDED_BY(metadata_mutex);
mutable ClusterPtr cluster;
mutable ClusterPtr cluster_all_groups;
LoadTaskPtr startup_replicated_database_task TSA_GUARDED_BY(mutex);
};

View File

@ -421,6 +421,8 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
{
/// Some replica is added or removed, let's update cached cluster
database->setCluster(database->getClusterImpl());
if (!database->replica_group_name.empty())
database->setCluster(database->getClusterImpl(/*all_groups*/ true), /*all_groups*/ true);
out_reason = fmt::format("Entry {} is a dummy task", entry_name);
return {};
}

View File

@ -41,11 +41,11 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function"
" and doesn't have structure in metadata", backQuote(ast_create_query.getTable()));
if (!has_structure && !ast_create_query.is_dictionary)
if (!has_structure && !ast_create_query.is_dictionary && !ast_create_query.isParameterizedView())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot alter table {} metadata doesn't have structure",
backQuote(ast_create_query.getTable()));
if (!ast_create_query.is_dictionary)
if (!ast_create_query.is_dictionary && !ast_create_query.isParameterizedView())
{
ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns);
ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices);

View File

@ -19,11 +19,15 @@ namespace ProfileEvents
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
struct WriteBufferFromAzureBlobStorage::PartData
{
Memory<> memory;
size_t data_size = 0;
std::string block_id;
};
BufferAllocationPolicyPtr createBufferAllocationPolicy(const AzureObjectStorageSettings & settings)
@ -119,22 +123,30 @@ void WriteBufferFromAzureBlobStorage::preFinalize()
// This function should not be run again
is_prefinalized = true;
hidePartialData();
if (hidden_size > 0)
detachBuffer();
setFakeBufferWhenPreFinalized();
/// If there is only one block and size is less than or equal to max_single_part_upload_size
/// then we use single part upload instead of multi part upload
if (buffer_allocation_policy->getBufferNumber() == 1)
if (block_ids.empty() && detached_part_data.size() == 1 && detached_part_data.front().data_size <= max_single_part_upload_size)
{
size_t data_size = size_t(position() - memory.data());
if (data_size <= max_single_part_upload_size)
{
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast<const uint8_t *>(memory.data()), data_size);
execWithRetry([&](){ block_blob_client.Upload(memory_stream); }, max_unexpected_write_error_retries, data_size);
LOG_TRACE(log, "Committed single block for blob `{}`", blob_path);
return;
}
}
auto part_data = std::move(detached_part_data.front());
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast<const uint8_t *>(part_data.memory.data()), part_data.data_size);
execWithRetry([&](){ block_blob_client.Upload(memory_stream); }, max_unexpected_write_error_retries, part_data.data_size);
LOG_TRACE(log, "Committed single block for blob `{}`", blob_path);
writePart();
detached_part_data.pop_front();
return;
}
else
{
writeMultipartUpload();
}
}
void WriteBufferFromAzureBlobStorage::finalizeImpl()
@ -144,9 +156,13 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl()
if (!is_prefinalized)
preFinalize();
chassert(offset() == 0);
chassert(hidden_size == 0);
task_tracker->waitAll();
if (!block_ids.empty())
{
task_tracker->waitAll();
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries);
LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path);
@ -155,14 +171,66 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl()
void WriteBufferFromAzureBlobStorage::nextImpl()
{
if (is_prefinalized)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Cannot write to prefinalized buffer for Azure Blob Storage, the file could have been created");
task_tracker->waitIfAny();
writePart();
hidePartialData();
reallocateFirstBuffer();
if (available() > 0)
return;
detachBuffer();
if (detached_part_data.size() > 1)
writeMultipartUpload();
allocateBuffer();
}
void WriteBufferFromAzureBlobStorage::hidePartialData()
{
if (write_settings.remote_throttler)
write_settings.remote_throttler->add(offset(), ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds);
chassert(memory.size() >= hidden_size + offset());
hidden_size += offset();
chassert(memory.data() + hidden_size == working_buffer.begin() + offset());
chassert(memory.data() + hidden_size == position());
WriteBuffer::set(memory.data() + hidden_size, memory.size() - hidden_size);
chassert(offset() == 0);
}
void WriteBufferFromAzureBlobStorage::reallocateFirstBuffer()
{
chassert(offset() == 0);
if (buffer_allocation_policy->getBufferNumber() > 1 || available() > 0)
return;
const size_t max_first_buffer = buffer_allocation_policy->getBufferSize();
if (memory.size() == max_first_buffer)
return;
size_t size = std::min(memory.size() * 2, max_first_buffer);
memory.resize(size);
WriteBuffer::set(memory.data() + hidden_size, memory.size() - hidden_size);
chassert(offset() == 0);
}
void WriteBufferFromAzureBlobStorage::allocateBuffer()
{
buffer_allocation_policy->nextBuffer();
chassert(0 == hidden_size);
auto size = buffer_allocation_policy->getBufferSize();
if (buffer_allocation_policy->getBufferNumber() == 1)
@ -172,30 +240,56 @@ void WriteBufferFromAzureBlobStorage::allocateBuffer()
WriteBuffer::set(memory.data(), memory.size());
}
void WriteBufferFromAzureBlobStorage::writePart()
void WriteBufferFromAzureBlobStorage::detachBuffer()
{
auto data_size = size_t(position() - memory.data());
size_t data_size = size_t(position() - memory.data());
if (data_size == 0)
return;
const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64));
std::shared_ptr<PartData> part_data = std::make_shared<PartData>(std::move(memory), data_size, block_id);
WriteBuffer::set(nullptr, 0);
chassert(data_size == hidden_size);
auto upload_worker = [this, part_data] ()
auto buf = std::move(memory);
WriteBuffer::set(nullptr, 0);
total_size += hidden_size;
hidden_size = 0;
detached_part_data.push_back({std::move(buf), data_size});
WriteBuffer::set(nullptr, 0);
}
void WriteBufferFromAzureBlobStorage::writePart(WriteBufferFromAzureBlobStorage::PartData && part_data)
{
const std::string & block_id = block_ids.emplace_back(getRandomASCIIString(64));
auto worker_data = std::make_shared<std::tuple<std::string, WriteBufferFromAzureBlobStorage::PartData>>(block_id, std::move(part_data));
auto upload_worker = [this, worker_data] ()
{
auto & data_size = std::get<1>(*worker_data).data_size;
auto & data_block_id = std::get<0>(*worker_data);
auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path);
Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast<const uint8_t *>(part_data->memory.data()), part_data->data_size);
execWithRetry([&](){ block_blob_client.StageBlock(part_data->block_id, memory_stream); }, max_unexpected_write_error_retries, part_data->data_size);
if (write_settings.remote_throttler)
write_settings.remote_throttler->add(part_data->data_size, ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds);
Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast<const uint8_t *>(std::get<1>(*worker_data).memory.data()), data_size);
execWithRetry([&](){ block_blob_client.StageBlock(data_block_id, memory_stream); }, max_unexpected_write_error_retries, data_size);
};
task_tracker->add(std::move(upload_worker));
}
void WriteBufferFromAzureBlobStorage::setFakeBufferWhenPreFinalized()
{
WriteBuffer::set(fake_buffer_when_prefinalized, sizeof(fake_buffer_when_prefinalized));
}
void WriteBufferFromAzureBlobStorage::writeMultipartUpload()
{
while (!detached_part_data.empty())
{
writePart(std::move(detached_part_data.front()));
detached_part_data.pop_front();
}
}
}
#endif

View File

@ -48,8 +48,13 @@ public:
private:
struct PartData;
void writePart();
void writeMultipartUpload();
void writePart(PartData && part_data);
void detachBuffer();
void reallocateFirstBuffer();
void allocateBuffer();
void hidePartialData();
void setFakeBufferWhenPreFinalized();
void finalizeImpl() override;
void execWithRetry(std::function<void()> func, size_t num_tries, size_t cost = 0);
@ -77,9 +82,16 @@ private:
MemoryBufferPtr allocateBuffer() const;
char fake_buffer_when_prefinalized[1] = {};
bool first_buffer=true;
size_t total_size = 0;
size_t hidden_size = 0;
std::unique_ptr<TaskTracker> task_tracker;
std::deque<PartData> detached_part_data;
};
}

View File

@ -166,6 +166,8 @@ public:
return client.get();
}
bool supportParallelWrite() const override { return true; }
private:
using SharedAzureClientPtr = std::shared_ptr<const Azure::Storage::Blobs::BlobContainerClient>;
void removeObjectImpl(const StoredObject & object, const SharedAzureClientPtr & client_ptr, bool if_exists);

View File

@ -36,30 +36,24 @@ void IObjectStorageIteratorAsync::deactivate()
void IObjectStorageIteratorAsync::nextBatch()
{
std::lock_guard lock(mutex);
if (is_finished)
{
current_batch.clear();
current_batch_iterator = current_batch.begin();
return;
}
else
{
if (!is_initialized)
{
outcome_future = scheduleBatch();
is_initialized = true;
}
if (!is_initialized)
{
outcome_future = scheduleBatch();
is_initialized = true;
}
try
{
chassert(outcome_future.valid());
BatchAndHasNext result;
try
{
result = outcome_future.get();
}
catch (...)
{
is_finished = true;
throw;
}
BatchAndHasNext result = outcome_future.get();
current_batch = std::move(result.batch);
current_batch_iterator = current_batch.begin();
@ -71,6 +65,11 @@ void IObjectStorageIteratorAsync::nextBatch()
else
is_finished = true;
}
catch (...)
{
is_finished = true;
throw;
}
}
void IObjectStorageIteratorAsync::next()
@ -95,35 +94,39 @@ std::future<IObjectStorageIteratorAsync::BatchAndHasNext> IObjectStorageIterator
bool IObjectStorageIteratorAsync::isValid()
{
std::lock_guard lock(mutex);
if (!is_initialized)
nextBatch();
std::lock_guard lock(mutex);
return current_batch_iterator != current_batch.end();
}
RelativePathWithMetadataPtr IObjectStorageIteratorAsync::current()
{
std::lock_guard lock(mutex);
if (!isValid())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator");
std::lock_guard lock(mutex);
return *current_batch_iterator;
}
RelativePathsWithMetadata IObjectStorageIteratorAsync::currentBatch()
{
std::lock_guard lock(mutex);
if (!isValid())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to access invalid iterator");
std::lock_guard lock(mutex);
return current_batch;
}
std::optional<RelativePathsWithMetadata> IObjectStorageIteratorAsync::getCurrentBatchAndScheduleNext()
{
std::lock_guard lock(mutex);
if (!is_initialized)
nextBatch();

View File

@ -382,6 +382,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
{
std::vector<Aws::S3::Model::ObjectIdentifier> current_chunk;
String keys;
size_t first_position = current_position;
for (; current_position < objects.size() && current_chunk.size() < chunk_size_limit; ++current_position)
{
Aws::S3::Model::ObjectIdentifier obj;
@ -407,9 +408,9 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
{
const auto * outcome_error = outcome.IsSuccess() ? nullptr : &outcome.GetError();
auto time_now = std::chrono::system_clock::now();
for (const auto & object : objects)
for (size_t i = first_position; i < current_position; ++i)
blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete,
uri.bucket, object.remote_path, object.local_path, object.bytes_size,
uri.bucket, objects[i].remote_path, objects[i].local_path, objects[i].bytes_size,
outcome_error, time_now);
}

View File

@ -161,6 +161,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string;
format_settings.parquet.output_fixed_string_as_fixed_byte_array = settings.output_format_parquet_fixed_string_as_fixed_byte_array;
format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size;
format_settings.parquet.prefer_block_bytes = settings.input_format_parquet_prefer_block_bytes;
format_settings.parquet.output_compression_method = settings.output_format_parquet_compression_method;
format_settings.parquet.output_compliant_nested_types = settings.output_format_parquet_compliant_nested_types;
format_settings.parquet.use_custom_encoder = settings.output_format_parquet_use_custom_encoder;

View File

@ -265,7 +265,8 @@ struct FormatSettings
bool preserve_order = false;
bool use_custom_encoder = true;
bool parallel_encoding = true;
UInt64 max_block_size = 8192;
UInt64 max_block_size = DEFAULT_BLOCK_SIZE;
size_t prefer_block_bytes = DEFAULT_BLOCK_SIZE * 256;
ParquetVersion output_version;
ParquetCompression output_compression_method = ParquetCompression::SNAPPY;
bool output_compliant_nested_types = true;

View File

@ -879,11 +879,11 @@ namespace
}
template <bool is_json>
bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings)
bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings, bool & has_fractional)
{
if (is_json || settings.try_infer_exponent_floats)
return tryReadFloatText(value, buf);
return tryReadFloatTextNoExponent(value, buf);
return tryReadFloatTextExt(value, buf, has_fractional);
return tryReadFloatTextExtNoExponent(value, buf, has_fractional);
}
template <bool is_json>
@ -893,46 +893,31 @@ namespace
return nullptr;
Float64 tmp_float;
bool has_fractional;
if (settings.try_infer_integers)
{
/// If we read from String, we can do it in a more efficient way.
if (auto * string_buf = dynamic_cast<ReadBufferFromString *>(&buf))
{
/// Remember the pointer to the start of the number to rollback to it.
char * number_start = buf.position();
Int64 tmp_int;
bool read_int = tryReadIntText(tmp_int, buf);
/// If we reached eof, it cannot be float (it requires no less data than integer)
if (buf.eof())
return read_int ? std::make_shared<DataTypeInt64>() : nullptr;
char * int_end = buf.position();
/// We can safely get back to the start of the number, because we read from a string and we didn't reach eof.
buf.position() = number_start;
char * number_start = buf.position();
bool read_uint = false;
char * uint_end = nullptr;
/// In case of Int64 overflow we can try to infer UInt64.
if (!read_int)
{
UInt64 tmp_uint;
read_uint = tryReadIntText(tmp_uint, buf);
/// If we reached eof, it cannot be float (it requires no less data than integer)
if (buf.eof())
return read_uint ? std::make_shared<DataTypeUInt64>() : nullptr;
uint_end = buf.position();
buf.position() = number_start;
}
if (tryReadFloat<is_json>(tmp_float, buf, settings))
{
if (read_int && buf.position() == int_end)
return std::make_shared<DataTypeInt64>();
if (read_uint && buf.position() == uint_end)
return std::make_shared<DataTypeUInt64>();
/// NOTE: it may break parsing of tryReadFloat() != tryReadIntText() + parsing of '.'/'e'
/// But, for now it is true
if (tryReadFloat<is_json>(tmp_float, buf, settings, has_fractional) && has_fractional)
return std::make_shared<DataTypeFloat64>();
}
Int64 tmp_int;
buf.position() = number_start;
if (tryReadIntText(tmp_int, buf))
return std::make_shared<DataTypeInt64>();
/// In case of Int64 overflow we can try to infer UInt64.
UInt64 tmp_uint;
buf.position() = number_start;
if (tryReadIntText(tmp_uint, buf))
return std::make_shared<DataTypeUInt64>();
return nullptr;
}
@ -942,36 +927,22 @@ namespace
/// and then as float.
PeekableReadBuffer peekable_buf(buf);
PeekableReadBufferCheckpoint checkpoint(peekable_buf);
Int64 tmp_int;
bool read_int = tryReadIntText(tmp_int, peekable_buf);
auto * int_end = peekable_buf.position();
peekable_buf.rollbackToCheckpoint(true);
bool read_uint = false;
char * uint_end = nullptr;
/// In case of Int64 overflow we can try to infer UInt64.
if (!read_int)
{
PeekableReadBufferCheckpoint new_checkpoint(peekable_buf);
UInt64 tmp_uint;
read_uint = tryReadIntText(tmp_uint, peekable_buf);
uint_end = peekable_buf.position();
peekable_buf.rollbackToCheckpoint(true);
}
if (tryReadFloat<is_json>(tmp_float, peekable_buf, settings))
{
/// Float parsing reads no fewer bytes than integer parsing,
/// so position of the buffer is either the same, or further.
/// If it's the same, then it's integer.
if (read_int && peekable_buf.position() == int_end)
return std::make_shared<DataTypeInt64>();
if (read_uint && peekable_buf.position() == uint_end)
return std::make_shared<DataTypeUInt64>();
if (tryReadFloat<is_json>(tmp_float, peekable_buf, settings, has_fractional) && has_fractional)
return std::make_shared<DataTypeFloat64>();
}
peekable_buf.rollbackToCheckpoint(/* drop= */ false);
Int64 tmp_int;
if (tryReadIntText(tmp_int, peekable_buf))
return std::make_shared<DataTypeInt64>();
peekable_buf.rollbackToCheckpoint(/* drop= */ true);
/// In case of Int64 overflow we can try to infer UInt64.
UInt64 tmp_uint;
if (tryReadIntText(tmp_uint, peekable_buf))
return std::make_shared<DataTypeUInt64>();
}
else if (tryReadFloat<is_json>(tmp_float, buf, settings))
else if (tryReadFloat<is_json>(tmp_float, buf, settings, has_fractional))
{
return std::make_shared<DataTypeFloat64>();
}
@ -1004,7 +975,8 @@ namespace
buf.position() = buf.buffer().begin();
Float64 tmp;
if (tryReadFloat<is_json>(tmp, buf, settings) && buf.eof())
bool has_fractional;
if (tryReadFloat<is_json>(tmp, buf, settings, has_fractional) && buf.eof())
return std::make_shared<DataTypeFloat64>();
return nullptr;

View File

@ -314,7 +314,7 @@ void checkFunctionArgumentSizes(const ColumnsWithTypeAndName & arguments, size_t
if (current_size != input_rows_count)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Expected the argument nº#{} ('{}' of type {}) to have {} rows, but it has {}",
"Expected the argument {} ('{}' of type {}) to have {} rows, but it has {}",
i + 1,
arguments[i].name,
arguments[i].type->getName(),

View File

@ -709,7 +709,7 @@ bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateL
else
return tryReadFloatTextFast(x, rb);
}
else /*if constexpr (is_integer_v<typename DataType::FieldType>)*/
else /*if constexpr (is_integral_v<typename DataType::FieldType>)*/
return tryReadIntText(x, rb);
}
@ -814,6 +814,16 @@ enum class ConvertFromStringParsingMode : uint8_t
BestEffortUS
};
struct AccurateConvertStrategyAdditions
{
UInt32 scale { 0 };
};
struct AccurateOrNullConvertStrategyAdditions
{
UInt32 scale { 0 };
};
template <typename FromDataType, typename ToDataType, typename Name,
ConvertFromStringExceptionMode exception_mode, ConvertFromStringParsingMode parsing_mode>
struct ConvertThroughParsing
@ -1020,7 +1030,13 @@ struct ConvertThroughParsing
break;
}
}
parseImpl<ToDataType>(vec_to[i], read_buffer, local_time_zone, precise_float_parsing);
if constexpr (std::is_same_v<Additions, AccurateConvertStrategyAdditions>)
{
if (!tryParseImpl<ToDataType>(vec_to[i], read_buffer, local_time_zone, precise_float_parsing))
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse string to type {}", TypeName<typename ToDataType::FieldType>);
}
else
parseImpl<ToDataType>(vec_to[i], read_buffer, local_time_zone, precise_float_parsing);
} while (false);
}
}
@ -1120,16 +1136,6 @@ struct ConvertThroughParsing
/// Function toUnixTimestamp has exactly the same implementation as toDateTime of String type.
struct NameToUnixTimestamp { static constexpr auto name = "toUnixTimestamp"; };
struct AccurateConvertStrategyAdditions
{
UInt32 scale { 0 };
};
struct AccurateOrNullConvertStrategyAdditions
{
UInt32 scale { 0 };
};
enum class BehaviourOnErrorFromString : uint8_t
{
ConvertDefaultBehaviorTag,
@ -3174,8 +3180,11 @@ private:
{
TypeIndex from_type_index = from_type->getTypeId();
WhichDataType which(from_type_index);
TypeIndex to_type_index = to_type->getTypeId();
WhichDataType to(to_type_index);
bool can_apply_accurate_cast = (cast_type == CastType::accurate || cast_type == CastType::accurateOrNull)
&& (which.isInt() || which.isUInt() || which.isFloat());
can_apply_accurate_cast |= cast_type == CastType::accurate && which.isStringOrFixedString() && to.isNativeInteger();
FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = default_date_time_overflow_behavior;
if (context)
@ -3260,6 +3269,20 @@ private:
return true;
}
}
else if constexpr (IsDataTypeStringOrFixedString<LeftDataType>)
{
if constexpr (IsDataTypeNumber<RightDataType>)
{
chassert(wrapper_cast_type == CastType::accurate);
result_column = ConvertImpl<LeftDataType, RightDataType, FunctionCastName>::execute(
arguments,
result_type,
input_rows_count,
BehaviourOnErrorFromString::ConvertDefaultBehaviorTag,
AccurateConvertStrategyAdditions());
}
return true;
}
return false;
});

View File

@ -31,7 +31,6 @@ namespace DB
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int ILLEGAL_COLUMN;
@ -40,26 +39,22 @@ namespace ErrorCodes
}
/** Rounding Functions:
* round(x, N) - rounding to nearest (N = 0 by default). Use banker's rounding for floating point numbers.
* roundBankers(x, N) - rounding to nearest (N = 0 by default). Use banker's rounding for all numbers.
* floor(x, N) is the largest number <= x (N = 0 by default).
* ceil(x, N) is the smallest number >= x (N = 0 by default).
* trunc(x, N) - is the largest by absolute value number that is not greater than x by absolute value (N = 0 by default).
*
* The value of the parameter N (scale):
* - N > 0: round to the number with N decimal places after the decimal point
* - N < 0: round to an integer with N zero characters
* - N = 0: round to an integer
*
* Type of the result is the type of argument.
* For integer arguments, when passing negative scale, overflow can occur.
* In that case, the behavior is implementation specific.
*/
/// Rounding Functions:
/// - round(x, N) - rounding to nearest (N = 0 by default). Use banker's rounding for floating point numbers.
/// - roundBankers(x, N) - rounding to nearest (N = 0 by default). Use banker's rounding for all numbers.
/// - floor(x, N) is the largest number <= x (N = 0 by default).
/// - ceil(x, N) is the smallest number >= x (N = 0 by default).
/// - trunc(x, N) - is the largest by absolute value number that is not greater than x by absolute value (N = 0 by default).
/// The value of the parameter N (scale):
/// - N > 0: round to the number with N decimal places after the decimal point
/// - N < 0: round to an integer with N zero characters
/// - N = 0: round to an integer
/** This parameter controls the behavior of the rounding functions.
*/
/// Type of the result is the type of argument.
/// For integer arguments, when passing negative scale, overflow can occur. In that case, the behavior is undefined.
/// Controls the behavior of the rounding functions.
enum class ScaleMode : uint8_t
{
Positive, // round to a number with N decimal places after the decimal point
@ -75,7 +70,7 @@ enum class RoundingMode : uint8_t
Ceil = _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC,
Trunc = _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
#else
Round = 8, /// Values are correspond to above just in case.
Round = 8, /// Values correspond to above values, just in case.
Floor = 9,
Ceil = 10,
Trunc = 11,
@ -84,16 +79,21 @@ enum class RoundingMode : uint8_t
enum class TieBreakingMode : uint8_t
{
Auto, // use banker's rounding for floating point numbers, round up otherwise
Bankers, // use banker's rounding
Auto, /// banker's rounding for floating point numbers, round up otherwise
Bankers, /// banker's rounding
};
enum class Vectorize : uint8_t
{
No,
Yes
};
/// For N, no more than the number of digits in the largest type.
using Scale = Int16;
/** Rounding functions for integer values.
*/
/// Rounding functions for integer values.
template <typename T, RoundingMode rounding_mode, ScaleMode scale_mode, TieBreakingMode tie_breaking_mode>
struct IntegerRoundingComputation
{
@ -149,6 +149,8 @@ struct IntegerRoundingComputation
return x;
}
}
std::unreachable();
}
static ALWAYS_INLINE T compute(T x, T scale)
@ -161,9 +163,12 @@ struct IntegerRoundingComputation
case ScaleMode::Negative:
return computeImpl(x, scale);
}
std::unreachable();
}
static ALWAYS_INLINE void compute(const T * __restrict in, size_t scale, T * __restrict out) requires std::integral<T>
static ALWAYS_INLINE void compute(const T * __restrict in, size_t scale, T * __restrict out)
requires std::integral<T>
{
if constexpr (sizeof(T) <= sizeof(scale) && scale_mode == ScaleMode::Negative)
{
@ -176,20 +181,23 @@ struct IntegerRoundingComputation
*out = compute(*in, static_cast<T>(scale));
}
static ALWAYS_INLINE void compute(const T * __restrict in, T scale, T * __restrict out) requires(!std::integral<T>)
static ALWAYS_INLINE void compute(const T * __restrict in, T scale, T * __restrict out)
requires(!std::integral<T>)
{
*out = compute(*in, scale);
}
};
template <typename T, Vectorize vectorize>
class FloatRoundingComputationBase;
#ifdef __SSE4_1__
template <typename T>
class BaseFloatRoundingComputation;
/// Vectorized implementation for x86.
template <>
class BaseFloatRoundingComputation<Float32>
class FloatRoundingComputationBase<Float32, Vectorize::Yes>
{
public:
using ScalarType = Float32;
@ -210,7 +218,7 @@ public:
};
template <>
class BaseFloatRoundingComputation<Float64>
class FloatRoundingComputationBase<Float64, Vectorize::Yes>
{
public:
using ScalarType = Float64;
@ -230,9 +238,9 @@ public:
}
};
#else
#endif
/// Implementation for ARM. Not vectorized.
/// Sequential implementation for ARM. Also used for scalar arguments.
inline float roundWithMode(float x, RoundingMode mode)
{
@ -243,6 +251,8 @@ inline float roundWithMode(float x, RoundingMode mode)
case RoundingMode::Ceil: return ceilf(x);
case RoundingMode::Trunc: return truncf(x);
}
std::unreachable();
}
inline double roundWithMode(double x, RoundingMode mode)
@ -254,10 +264,12 @@ inline double roundWithMode(double x, RoundingMode mode)
case RoundingMode::Ceil: return ceil(x);
case RoundingMode::Trunc: return trunc(x);
}
std::unreachable();
}
template <typename T>
class BaseFloatRoundingComputation
class FloatRoundingComputationBase<T, Vectorize::No>
{
public:
using ScalarType = T;
@ -277,15 +289,13 @@ public:
}
};
#endif
/** Implementation of low-level round-off functions for floating-point values.
*/
template <typename T, RoundingMode rounding_mode, ScaleMode scale_mode>
class FloatRoundingComputation : public BaseFloatRoundingComputation<T>
template <typename T, RoundingMode rounding_mode, ScaleMode scale_mode, Vectorize vectorize>
class FloatRoundingComputation : public FloatRoundingComputationBase<T, vectorize>
{
using Base = BaseFloatRoundingComputation<T>;
using Base = FloatRoundingComputationBase<T, vectorize>;
public:
static void compute(const T * __restrict in, const typename Base::VectorType & scale, T * __restrict out)
@ -317,15 +327,22 @@ struct FloatRoundingImpl
private:
static_assert(!is_decimal<T>);
using Op = FloatRoundingComputation<T, rounding_mode, scale_mode>;
using Data = std::array<T, Op::data_count>;
template <Vectorize vectorize =
#ifdef __SSE4_1__
Vectorize::Yes
#else
Vectorize::No
#endif
>
using Op = FloatRoundingComputation<T, rounding_mode, scale_mode, vectorize>;
using Data = std::array<T, Op<>::data_count>;
using ColumnType = ColumnVector<T>;
using Container = typename ColumnType::Container;
public:
static NO_INLINE void apply(const Container & in, size_t scale, Container & out)
{
auto mm_scale = Op::prepare(scale);
auto mm_scale = Op<>::prepare(scale);
const size_t data_count = std::tuple_size<Data>();
@ -337,7 +354,7 @@ public:
while (p_in < limit)
{
Op::compute(p_in, mm_scale, p_out);
Op<>::compute(p_in, mm_scale, p_out);
p_in += data_count;
p_out += data_count;
}
@ -350,10 +367,17 @@ public:
size_t tail_size_bytes = (end_in - p_in) * sizeof(*p_in);
memcpy(&tmp_src, p_in, tail_size_bytes);
Op::compute(reinterpret_cast<T *>(&tmp_src), mm_scale, reinterpret_cast<T *>(&tmp_dst));
Op<>::compute(reinterpret_cast<T *>(&tmp_src), mm_scale, reinterpret_cast<T *>(&tmp_dst));
memcpy(p_out, &tmp_dst, tail_size_bytes);
}
}
static void applyOne(T in, size_t scale, T& out)
{
using ScalarOp = Op<Vectorize::No>;
auto s = ScalarOp::prepare(scale);
ScalarOp::compute(&in, s, &out);
}
};
template <typename T, RoundingMode rounding_mode, ScaleMode scale_mode, TieBreakingMode tie_breaking_mode>
@ -409,6 +433,11 @@ public:
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected 'scale' parameter passed to function");
}
}
static void applyOne(T in, size_t scale, T& out)
{
Op::compute(&in, scale, &out);
}
};
@ -444,11 +473,40 @@ public:
memcpy(out.data(), in.data(), in.size() * sizeof(T));
}
}
static void applyOne(NativeType in, UInt32 in_scale, NativeType& out, Scale scale_arg)
{
scale_arg = in_scale - scale_arg;
if (scale_arg > 0)
{
auto scale = intExp10OfSize<NativeType>(scale_arg);
Op::compute(&in, scale, &out);
}
else
{
memcpy(&out, &in, sizeof(T));
}
}
};
/// Select the appropriate processing algorithm depending on the scale.
inline void validateScale(Int64 scale64)
{
if (scale64 > std::numeric_limits<Scale>::max() || scale64 < std::numeric_limits<Scale>::min())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale argument for rounding function is too large");
}
/** Select the appropriate processing algorithm depending on the scale.
*/
inline Scale getScaleArg(const ColumnConst* scale_col)
{
const auto & scale_field = scale_col->getField();
Int64 scale64 = scale_field.get<Int64>();
validateScale(scale64);
return scale64;
}
/// Generic dispatcher
template <typename T, RoundingMode rounding_mode, TieBreakingMode tie_breaking_mode>
struct Dispatcher
{
@ -457,30 +515,65 @@ struct Dispatcher
FloatRoundingImpl<T, rounding_mode, scale_mode>,
IntegerRoundingImpl<T, rounding_mode, scale_mode, tie_breaking_mode>>;
static ColumnPtr apply(const IColumn * col_general, Scale scale_arg)
template <typename ScaleType>
static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr)
{
const auto & col = checkAndGetColumn<ColumnVector<T>>(*col_general);
const auto & value_col_typed = checkAndGetColumn<ColumnVector<T>>(*value_col);
auto col_res = ColumnVector<T>::create();
typename ColumnVector<T>::Container & vec_res = col_res->getData();
vec_res.resize(col.getData().size());
vec_res.resize(value_col_typed.getData().size());
if (!vec_res.empty())
{
if (scale_arg == 0)
if (scale_col == nullptr || isColumnConst(*scale_col))
{
size_t scale = 1;
FunctionRoundingImpl<ScaleMode::Zero>::apply(col.getData(), scale, vec_res);
auto scale_arg = (scale_col == nullptr) ? 0 : getScaleArg(checkAndGetColumnConst<ColumnVector<ScaleType>>(scale_col));
if (scale_arg == 0)
{
size_t scale = 1;
FunctionRoundingImpl<ScaleMode::Zero>::apply(value_col_typed.getData(), scale, vec_res);
}
else if (scale_arg > 0)
{
size_t scale = intExp10(scale_arg);
FunctionRoundingImpl<ScaleMode::Positive>::apply(value_col_typed.getData(), scale, vec_res);
}
else
{
size_t scale = intExp10(-scale_arg);
FunctionRoundingImpl<ScaleMode::Negative>::apply(value_col_typed.getData(), scale, vec_res);
}
}
else if (scale_arg > 0)
/// Non-const scale argument:
else if (const auto * scale_col_typed = checkAndGetColumn<ColumnVector<ScaleType>>(scale_col))
{
size_t scale = intExp10(scale_arg);
FunctionRoundingImpl<ScaleMode::Positive>::apply(col.getData(), scale, vec_res);
}
else
{
size_t scale = intExp10(-scale_arg);
FunctionRoundingImpl<ScaleMode::Negative>::apply(col.getData(), scale, vec_res);
const auto & value_data = value_col_typed.getData();
const auto & scale_data = scale_col_typed->getData();
const size_t rows = value_data.size();
for (size_t i = 0; i < rows; ++i)
{
Int64 scale64 = scale_data[i];
validateScale(scale64);
Scale raw_scale = scale64;
if (raw_scale == 0)
{
size_t scale = 1;
FunctionRoundingImpl<ScaleMode::Zero>::applyOne(value_data[i], scale, vec_res[i]);
}
else if (raw_scale > 0)
{
size_t scale = intExp10(raw_scale);
FunctionRoundingImpl<ScaleMode::Positive>::applyOne(value_data[i], scale, vec_res[i]);
}
else
{
size_t scale = intExp10(-raw_scale);
FunctionRoundingImpl<ScaleMode::Negative>::applyOne(value_data[i], scale, vec_res[i]);
}
}
}
}
@ -488,28 +581,51 @@ struct Dispatcher
}
};
/// Dispatcher for Decimal inputs
template <is_decimal T, RoundingMode rounding_mode, TieBreakingMode tie_breaking_mode>
struct Dispatcher<T, rounding_mode, tie_breaking_mode>
{
public:
static ColumnPtr apply(const IColumn * col_general, Scale scale_arg)
template <typename ScaleType>
static ColumnPtr apply(const IColumn * value_col, const IColumn * scale_col = nullptr)
{
const auto & col = checkAndGetColumn<ColumnDecimal<T>>(*col_general);
const typename ColumnDecimal<T>::Container & vec_src = col.getData();
const auto & value_col_typed = checkAndGetColumn<ColumnDecimal<T>>(*value_col);
const typename ColumnDecimal<T>::Container & vec_src = value_col_typed.getData();
auto col_res = ColumnDecimal<T>::create(vec_src.size(), col.getScale());
auto col_res = ColumnDecimal<T>::create(vec_src.size(), value_col_typed.getScale());
auto & vec_res = col_res->getData();
if (!vec_res.empty())
DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::apply(col.getData(), col.getScale(), vec_res, scale_arg);
{
if (scale_col == nullptr || isColumnConst(*scale_col))
{
auto scale_arg = scale_col == nullptr ? 0 : getScaleArg(checkAndGetColumnConst<ColumnVector<ScaleType>>(scale_col));
DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::apply(value_col_typed.getData(), value_col_typed.getScale(), vec_res, scale_arg);
}
/// Non-const scale argument
else if (const auto * scale_col_typed = checkAndGetColumn<ColumnVector<ScaleType>>(scale_col))
{
const auto & scale = scale_col_typed->getData();
const size_t rows = vec_src.size();
for (size_t i = 0; i < rows; ++i)
{
Int64 scale64 = scale[i];
validateScale(scale64);
Scale raw_scale = scale64;
DecimalRoundingImpl<T, rounding_mode, tie_breaking_mode>::applyOne(value_col_typed.getElement(i), value_col_typed.getScale(),
reinterpret_cast<ColumnDecimal<T>::NativeT&>(col_res->getElement(i)), raw_scale);
}
}
}
return col_res;
}
};
/** A template for functions that round the value of an input parameter of type
* (U)Int8/16/32/64, Float32/64 or Decimal32/64/128, and accept an additional optional parameter (default is 0).
*/
/// Functions that round the value of an input parameter of type (U)Int8/16/32/64, Float32/64 or Decimal32/64/128.
/// Accept an additional optional parameter of type (U)Int8/16/32/64 (0 by default).
template <typename Name, RoundingMode rounding_mode, TieBreakingMode tie_breaking_mode>
class FunctionRounding : public IFunction
{
@ -517,75 +633,58 @@ public:
static constexpr auto name = Name::name;
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionRounding>(); }
String getName() const override
{
return name;
}
String getName() const override { return name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
/// Get result types by argument types. If the function does not apply to these arguments, throw an exception.
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if ((arguments.empty()) || (arguments.size() > 2))
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be 1 or 2.",
getName(), arguments.size());
for (const auto & type : arguments)
if (!isNumber(type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}",
arguments[0]->getName(), getName());
return arguments[0];
}
static Scale getScaleArg(const ColumnsWithTypeAndName & arguments)
{
if (arguments.size() == 2)
{
const IColumn & scale_column = *arguments[1].column;
if (!isColumnConst(scale_column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument for rounding functions must be constant");
Field scale_field = assert_cast<const ColumnConst &>(scale_column).getField();
if (scale_field.getType() != Field::Types::UInt64
&& scale_field.getType() != Field::Types::Int64)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument for rounding functions must have integer type");
Int64 scale64 = scale_field.get<Int64>();
if (scale64 > std::numeric_limits<Scale>::max()
|| scale64 < std::numeric_limits<Scale>::min())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Scale argument for rounding function is too large");
return scale64;
}
return 0;
}
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors mandatory_args{
{"x", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNumber), nullptr, "A number to round"},
};
FunctionArgumentDescriptors optional_args{
{"N", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), nullptr, "The number of decimal places to round to"},
};
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
return arguments[0].type;
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
const ColumnWithTypeAndName & column = arguments[0];
Scale scale_arg = getScaleArg(arguments);
const ColumnWithTypeAndName & value_arg = arguments[0];
ColumnPtr res;
auto call = [&](const auto & types) -> bool
auto call_data = [&](const auto & types) -> bool
{
using Types = std::decay_t<decltype(types)>;
using DataType = typename Types::LeftType;
using DataType = typename Types::RightType;
if constexpr (IsDataTypeNumber<DataType> || IsDataTypeDecimal<DataType>)
if (arguments.size() > 1)
{
using FieldType = typename DataType::FieldType;
res = Dispatcher<FieldType, rounding_mode, tie_breaking_mode>::apply(column.column.get(), scale_arg);
const ColumnWithTypeAndName & scale_column = arguments[1];
auto call_scale = [&](const auto & scaleTypes) -> bool
{
using ScaleTypes = std::decay_t<decltype(scaleTypes)>;
using ScaleType = typename ScaleTypes::RightType;
if (isColumnConst(*value_arg.column) && !isColumnConst(*scale_column.column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale column must be const for const data column");
res = Dispatcher<DataType, rounding_mode, tie_breaking_mode>::template apply<ScaleType>(value_arg.column.get(), scale_column.column.get());
return true;
};
TypeIndex right_index = scale_column.type->getTypeId();
if (!callOnBasicType<void, true, false, false, false>(right_index, call_scale))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Scale argument for rounding functions must have integer type");
return true;
}
return false;
res = Dispatcher<DataType, rounding_mode, tie_breaking_mode>::template apply<int>(value_arg.column.get());
return true;
};
#if !defined(__SSE4_1__)
@ -597,10 +696,9 @@ public:
throw Exception(ErrorCodes::CANNOT_SET_ROUNDING_MODE, "Cannot set floating point rounding mode");
#endif
if (!callOnIndexAndDataType<void>(column.type->getTypeId(), call))
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", column.name, getName());
}
TypeIndex left_index = value_arg.type->getTypeId();
if (!callOnBasicType<void, true, true, true, false>(left_index, call_data))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", value_arg.name, getName());
return res;
}
@ -617,9 +715,8 @@ public:
};
/** Rounds down to a number within explicitly specified array.
* If the value is less than the minimal bound - returns the minimal bound.
*/
/// Rounds down to a number within explicitly specified array.
/// If the value is less than the minimal bound - returns the minimal bound.
class FunctionRoundDown : public IFunction
{
public:
@ -627,7 +724,6 @@ public:
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionRoundDown>(); }
String getName() const override { return name; }
bool isVariadic() const override { return false; }
size_t getNumberOfArguments() const override { return 2; }
bool useDefaultImplementationForConstants() const override { return true; }

View File

@ -111,7 +111,7 @@ public:
argument_types.push_back(argument.type);
/// More efficient specialization for two numeric arguments.
if (arguments.size() == 2 && isNumber(arguments[0].type) && isNumber(arguments[1].type))
if (arguments.size() == 2 && isNumber(removeNullable(arguments[0].type)) && isNumber(removeNullable(arguments[1].type)))
return std::make_unique<FunctionToFunctionBaseAdaptor>(SpecializedFunction::create(context), argument_types, return_type);
return std::make_unique<FunctionToFunctionBaseAdaptor>(
@ -123,7 +123,7 @@ public:
if (types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} cannot be called without arguments", getName());
if (types.size() == 2 && isNumber(types[0]) && isNumber(types[1]))
if (types.size() == 2 && isNumber(removeNullable(types[0])) && isNumber(removeNullable(types[1])))
return SpecializedFunction::create(context)->getReturnTypeImpl(types);
return getLeastSupertype(types);

View File

@ -28,6 +28,9 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
template <typename Point>
using LineString = boost::geometry::model::linestring<Point>;
template <typename Point>
using Ring = boost::geometry::model::ring<Point>;
@ -38,11 +41,13 @@ template <typename Point>
using MultiPolygon = boost::geometry::model::multi_polygon<Polygon<Point>>;
using CartesianPoint = boost::geometry::model::d2::point_xy<Float64>;
using CartesianLineString = LineString<CartesianPoint>;
using CartesianRing = Ring<CartesianPoint>;
using CartesianPolygon = Polygon<CartesianPoint>;
using CartesianMultiPolygon = MultiPolygon<CartesianPoint>;
using SphericalPoint = boost::geometry::model::point<Float64, 2, boost::geometry::cs::spherical_equatorial<boost::geometry::degree>>;
using SphericalLineString = LineString<SphericalPoint>;
using SphericalRing = Ring<SphericalPoint>;
using SphericalPolygon = Polygon<SphericalPoint>;
using SphericalMultiPolygon = MultiPolygon<SphericalPoint>;
@ -85,6 +90,29 @@ struct ColumnToPointsConverter
}
};
/**
* Class which converts Column with type Array(Tuple(Float64, Float64)) to a vector of boost linestring type.
*/
template <typename Point>
struct ColumnToLineStringsConverter
{
static std::vector<LineString<Point>> convert(ColumnPtr col)
{
const IColumn::Offsets & offsets = typeid_cast<const ColumnArray &>(*col).getOffsets();
size_t prev_offset = 0;
std::vector<LineString<Point>> answer;
answer.reserve(offsets.size());
auto tmp = ColumnToPointsConverter<Point>::convert(typeid_cast<const ColumnArray &>(*col).getDataPtr());
for (size_t offset : offsets)
{
answer.emplace_back(tmp.begin() + prev_offset, tmp.begin() + offset);
prev_offset = offset;
}
return answer;
}
};
/**
* Class which converts Column with type Array(Tuple(Float64, Float64)) to a vector of boost ring type.
*/
@ -208,6 +236,39 @@ private:
ColumnFloat64::Container & second_container;
};
/// Serialize Point, LineString as LineString
template <typename Point>
class LineStringSerializer
{
public:
LineStringSerializer()
: offsets(ColumnUInt64::create())
{}
explicit LineStringSerializer(size_t n)
: offsets(ColumnUInt64::create(n))
{}
void add(const LineString<Point> & ring)
{
size += ring.size();
offsets->insertValue(size);
for (const auto & point : ring)
point_serializer.add(point);
}
ColumnPtr finalize()
{
return ColumnArray::create(point_serializer.finalize(), std::move(offsets));
}
private:
size_t size = 0;
PointSerializer<Point> point_serializer;
ColumnUInt64::MutablePtr offsets;
};
/// Almost the same as LineStringSerializer
/// Serialize Point, Ring as Ring
template <typename Point>
class RingSerializer
@ -344,8 +405,16 @@ static void callOnGeometryDataType(DataTypePtr type, F && f)
/// There is no Point type, because for most of geometry functions it is useless.
if (factory.get("Point")->equals(*type))
return f(ConverterType<ColumnToPointsConverter<Point>>());
/// We should take the name into consideration to avoid ambiguity.
/// Because for example both Ring and LineString are resolved to Array(Tuple(Point)).
else if (factory.get("LineString")->equals(*type) && type->getCustomName() && type->getCustomName()->getName() == "LineString")
return f(ConverterType<ColumnToLineStringsConverter<Point>>());
/// For backward compatibility if we call this function not on a custom type, we will consider Array(Tuple(Point)) as type Ring.
else if (factory.get("Ring")->equals(*type))
return f(ConverterType<ColumnToRingsConverter<Point>>());
else if (factory.get("Polygon")->equals(*type))
return f(ConverterType<ColumnToPolygonsConverter<Point>>());
else if (factory.get("MultiPolygon")->equals(*type))

Some files were not shown because too many files have changed in this diff Show More