Merge branch 'master' into unite-storages3-and-disks3-settings

This commit is contained in:
Kseniia Sumarokova 2024-06-13 14:24:04 +02:00 committed by GitHub
commit ef8810a60d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
209 changed files with 4342 additions and 2274 deletions

View File

@ -37,7 +37,6 @@ Checks: [
'-cert-oop54-cpp', '-cert-oop54-cpp',
'-cert-oop57-cpp', '-cert-oop57-cpp',
'-clang-analyzer-optin.core.EnumCastOutOfRange', # https://github.com/abseil/abseil-cpp/issues/1667
'-clang-analyzer-optin.performance.Padding', '-clang-analyzer-optin.performance.Padding',
'-clang-analyzer-unix.Malloc', '-clang-analyzer-unix.Malloc',

8
.gitmodules vendored
View File

@ -91,13 +91,13 @@
[submodule "contrib/aws"] [submodule "contrib/aws"]
path = contrib/aws path = contrib/aws
url = https://github.com/ClickHouse/aws-sdk-cpp url = https://github.com/ClickHouse/aws-sdk-cpp
[submodule "aws-c-event-stream"] [submodule "contrib/aws-c-event-stream"]
path = contrib/aws-c-event-stream path = contrib/aws-c-event-stream
url = https://github.com/awslabs/aws-c-event-stream url = https://github.com/awslabs/aws-c-event-stream
[submodule "aws-c-common"] [submodule "contrib/aws-c-common"]
path = contrib/aws-c-common path = contrib/aws-c-common
url = https://github.com/awslabs/aws-c-common.git url = https://github.com/awslabs/aws-c-common.git
[submodule "aws-checksums"] [submodule "contrib/aws-checksums"]
path = contrib/aws-checksums path = contrib/aws-checksums
url = https://github.com/awslabs/aws-checksums url = https://github.com/awslabs/aws-checksums
[submodule "contrib/curl"] [submodule "contrib/curl"]
@ -163,7 +163,7 @@
url = https://github.com/xz-mirror/xz url = https://github.com/xz-mirror/xz
[submodule "contrib/abseil-cpp"] [submodule "contrib/abseil-cpp"]
path = contrib/abseil-cpp path = contrib/abseil-cpp
url = https://github.com/abseil/abseil-cpp url = https://github.com/ClickHouse/abseil-cpp.git
[submodule "contrib/dragonbox"] [submodule "contrib/dragonbox"]
path = contrib/dragonbox path = contrib/dragonbox
url = https://github.com/ClickHouse/dragonbox url = https://github.com/ClickHouse/dragonbox

2
contrib/abseil-cpp vendored

@ -1 +1 @@
Subproject commit 3bd86026c93da5a40006fd53403dff9d5f5e30e3 Subproject commit a3c4dd3e77f28b526efbb0eb394b72e29c633936

View File

@ -1,6 +1,8 @@
set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}") set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}")
# This is a minimized version of the function definition in CMake/AbseilHelpers.cmake
# #
# Copyright 2017 The Abseil Authors. # Copyright 2017 The Abseil Authors.
# #
@ -16,7 +18,6 @@ set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}")
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# #
function(absl_cc_library) function(absl_cc_library)
cmake_parse_arguments(ABSL_CC_LIB cmake_parse_arguments(ABSL_CC_LIB
"DISABLE_INSTALL;PUBLIC;TESTONLY" "DISABLE_INSTALL;PUBLIC;TESTONLY"
@ -76,6 +77,12 @@ function(absl_cc_library)
add_library(absl::${ABSL_CC_LIB_NAME} ALIAS ${_NAME}) add_library(absl::${ABSL_CC_LIB_NAME} ALIAS ${_NAME})
endfunction() endfunction()
# The following definitions are an amalgamation of the CMakeLists.txt files in absl/*/
# To refresh them when upgrading to a new version:
# - copy them over from upstream
# - remove calls of 'absl_cc_test'
# - remove calls of `absl_cc_library` that contain `TESTONLY`
# - append '${DIR}' to the file definitions
set(DIR ${ABSL_ROOT_DIR}/absl/algorithm) set(DIR ${ABSL_ROOT_DIR}/absl/algorithm)
@ -102,12 +109,12 @@ absl_cc_library(
absl::algorithm absl::algorithm
absl::core_headers absl::core_headers
absl::meta absl::meta
absl::nullability
PUBLIC PUBLIC
) )
set(DIR ${ABSL_ROOT_DIR}/absl/base) set(DIR ${ABSL_ROOT_DIR}/absl/base)
# Internal-only target, do not depend on directly.
absl_cc_library( absl_cc_library(
NAME NAME
atomic_hook atomic_hook
@ -146,6 +153,18 @@ absl_cc_library(
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
) )
absl_cc_library(
NAME
no_destructor
HDRS
"${DIR}/no_destructor.h"
DEPS
absl::config
absl::nullability
COPTS
${ABSL_DEFAULT_COPTS}
)
absl_cc_library( absl_cc_library(
NAME NAME
nullability nullability
@ -305,6 +324,8 @@ absl_cc_library(
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
LINKOPTS LINKOPTS
${ABSL_DEFAULT_LINKOPTS} ${ABSL_DEFAULT_LINKOPTS}
$<$<BOOL:${LIBRT}>:-lrt>
$<$<BOOL:${MINGW}>:-ladvapi32>
DEPS DEPS
absl::atomic_hook absl::atomic_hook
absl::base_internal absl::base_internal
@ -312,6 +333,7 @@ absl_cc_library(
absl::core_headers absl::core_headers
absl::dynamic_annotations absl::dynamic_annotations
absl::log_severity absl::log_severity
absl::nullability
absl::raw_logging_internal absl::raw_logging_internal
absl::spinlock_wait absl::spinlock_wait
absl::type_traits absl::type_traits
@ -357,6 +379,7 @@ absl_cc_library(
absl::base absl::base
absl::config absl::config
absl::core_headers absl::core_headers
absl::nullability
PUBLIC PUBLIC
) )
@ -467,10 +490,11 @@ absl_cc_library(
LINKOPTS LINKOPTS
${ABSL_DEFAULT_LINKOPTS} ${ABSL_DEFAULT_LINKOPTS}
DEPS DEPS
absl::container_common
absl::common_policy_traits absl::common_policy_traits
absl::compare absl::compare
absl::compressed_tuple absl::compressed_tuple
absl::config
absl::container_common
absl::container_memory absl::container_memory
absl::cord absl::cord
absl::core_headers absl::core_headers
@ -480,7 +504,6 @@ absl_cc_library(
absl::strings absl::strings
absl::throw_delegate absl::throw_delegate
absl::type_traits absl::type_traits
absl::utility
) )
# Internal-only target, do not depend on directly. # Internal-only target, do not depend on directly.
@ -523,7 +546,9 @@ absl_cc_library(
COPTS COPTS
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
DEPS DEPS
absl::base_internal
absl::compressed_tuple absl::compressed_tuple
absl::config
absl::core_headers absl::core_headers
absl::memory absl::memory
absl::span absl::span
@ -548,18 +573,6 @@ absl_cc_library(
PUBLIC PUBLIC
) )
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
counting_allocator
HDRS
"${DIR}/internal/counting_allocator.h"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::config
)
absl_cc_library( absl_cc_library(
NAME NAME
flat_hash_map flat_hash_map
@ -570,7 +583,7 @@ absl_cc_library(
DEPS DEPS
absl::container_memory absl::container_memory
absl::core_headers absl::core_headers
absl::hash_function_defaults absl::hash_container_defaults
absl::raw_hash_map absl::raw_hash_map
absl::algorithm_container absl::algorithm_container
absl::memory absl::memory
@ -586,7 +599,7 @@ absl_cc_library(
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
DEPS DEPS
absl::container_memory absl::container_memory
absl::hash_function_defaults absl::hash_container_defaults
absl::raw_hash_set absl::raw_hash_set
absl::algorithm_container absl::algorithm_container
absl::core_headers absl::core_headers
@ -604,7 +617,7 @@ absl_cc_library(
DEPS DEPS
absl::container_memory absl::container_memory
absl::core_headers absl::core_headers
absl::hash_function_defaults absl::hash_container_defaults
absl::node_slot_policy absl::node_slot_policy
absl::raw_hash_map absl::raw_hash_map
absl::algorithm_container absl::algorithm_container
@ -620,8 +633,9 @@ absl_cc_library(
COPTS COPTS
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
DEPS DEPS
absl::container_memory
absl::core_headers absl::core_headers
absl::hash_function_defaults absl::hash_container_defaults
absl::node_slot_policy absl::node_slot_policy
absl::raw_hash_set absl::raw_hash_set
absl::algorithm_container absl::algorithm_container
@ -629,6 +643,19 @@ absl_cc_library(
PUBLIC PUBLIC
) )
absl_cc_library(
NAME
hash_container_defaults
HDRS
"${DIR}/hash_container_defaults.h"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::config
absl::hash_function_defaults
PUBLIC
)
# Internal-only target, do not depend on directly. # Internal-only target, do not depend on directly.
absl_cc_library( absl_cc_library(
NAME NAME
@ -655,9 +682,11 @@ absl_cc_library(
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
DEPS DEPS
absl::config absl::config
absl::container_common
absl::cord absl::cord
absl::hash absl::hash
absl::strings absl::strings
absl::type_traits
PUBLIC PUBLIC
) )
@ -703,6 +732,7 @@ absl_cc_library(
absl::base absl::base
absl::config absl::config
absl::exponential_biased absl::exponential_biased
absl::no_destructor
absl::raw_logging_internal absl::raw_logging_internal
absl::sample_recorder absl::sample_recorder
absl::synchronization absl::synchronization
@ -756,7 +786,9 @@ absl_cc_library(
COPTS COPTS
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
DEPS DEPS
absl::config
absl::container_memory absl::container_memory
absl::core_headers
absl::raw_hash_set absl::raw_hash_set
absl::throw_delegate absl::throw_delegate
PUBLIC PUBLIC
@ -817,6 +849,7 @@ absl_cc_library(
DEPS DEPS
absl::config absl::config
absl::core_headers absl::core_headers
absl::debugging_internal
absl::meta absl::meta
absl::strings absl::strings
absl::span absl::span
@ -931,6 +964,7 @@ absl_cc_library(
absl::crc32c absl::crc32c
absl::config absl::config
absl::strings absl::strings
absl::no_destructor
) )
set(DIR ${ABSL_ROOT_DIR}/absl/debugging) set(DIR ${ABSL_ROOT_DIR}/absl/debugging)
@ -954,6 +988,8 @@ absl_cc_library(
"${DIR}/stacktrace.cc" "${DIR}/stacktrace.cc"
COPTS COPTS
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
LINKOPTS
$<$<BOOL:${EXECINFO_LIBRARY}>:${EXECINFO_LIBRARY}>
DEPS DEPS
absl::debugging_internal absl::debugging_internal
absl::config absl::config
@ -980,6 +1016,7 @@ absl_cc_library(
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
LINKOPTS LINKOPTS
${ABSL_DEFAULT_LINKOPTS} ${ABSL_DEFAULT_LINKOPTS}
$<$<BOOL:${MINGW}>:-ldbghelp>
DEPS DEPS
absl::debugging_internal absl::debugging_internal
absl::demangle_internal absl::demangle_internal
@ -1058,8 +1095,10 @@ absl_cc_library(
demangle_internal demangle_internal
HDRS HDRS
"${DIR}/internal/demangle.h" "${DIR}/internal/demangle.h"
"${DIR}/internal/demangle_rust.h"
SRCS SRCS
"${DIR}/internal/demangle.cc" "${DIR}/internal/demangle.cc"
"${DIR}/internal/demangle_rust.cc"
COPTS COPTS
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
DEPS DEPS
@ -1252,6 +1291,7 @@ absl_cc_library(
absl::strings absl::strings
absl::synchronization absl::synchronization
absl::flat_hash_map absl::flat_hash_map
absl::no_destructor
) )
# Internal-only target, do not depend on directly. # Internal-only target, do not depend on directly.
@ -1283,12 +1323,9 @@ absl_cc_library(
absl_cc_library( absl_cc_library(
NAME NAME
flags flags
SRCS
"${DIR}/flag.cc"
HDRS HDRS
"${DIR}/declare.h" "${DIR}/declare.h"
"${DIR}/flag.h" "${DIR}/flag.h"
"${DIR}/internal/flag_msvc.inc"
COPTS COPTS
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
LINKOPTS LINKOPTS
@ -1299,7 +1336,6 @@ absl_cc_library(
absl::flags_config absl::flags_config
absl::flags_internal absl::flags_internal
absl::flags_reflection absl::flags_reflection
absl::base
absl::core_headers absl::core_headers
absl::strings absl::strings
) )
@ -1379,6 +1415,9 @@ absl_cc_library(
absl::synchronization absl::synchronization
) )
############################################################################
# Unit tests in alphabetical order.
set(DIR ${ABSL_ROOT_DIR}/absl/functional) set(DIR ${ABSL_ROOT_DIR}/absl/functional)
absl_cc_library( absl_cc_library(
@ -1431,6 +1470,18 @@ absl_cc_library(
PUBLIC PUBLIC
) )
absl_cc_library(
NAME
overload
HDRS
"${DIR}/overload.h"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::meta
PUBLIC
)
set(DIR ${ABSL_ROOT_DIR}/absl/hash) set(DIR ${ABSL_ROOT_DIR}/absl/hash)
absl_cc_library( absl_cc_library(
@ -1640,6 +1691,7 @@ absl_cc_library(
absl::log_internal_conditions absl::log_internal_conditions
absl::log_internal_message absl::log_internal_message
absl::log_internal_strip absl::log_internal_strip
absl::absl_vlog_is_on
) )
absl_cc_library( absl_cc_library(
@ -1721,6 +1773,7 @@ absl_cc_library(
absl::log_entry absl::log_entry
absl::log_severity absl::log_severity
absl::log_sink absl::log_sink
absl::no_destructor
absl::raw_logging_internal absl::raw_logging_internal
absl::synchronization absl::synchronization
absl::span absl::span
@ -1771,6 +1824,7 @@ absl_cc_library(
LINKOPTS LINKOPTS
${ABSL_DEFAULT_LINKOPTS} ${ABSL_DEFAULT_LINKOPTS}
DEPS DEPS
absl::core_headers
absl::log_internal_message absl::log_internal_message
absl::log_internal_nullstream absl::log_internal_nullstream
absl::log_severity absl::log_severity
@ -1876,6 +1930,11 @@ absl_cc_library(
PUBLIC PUBLIC
) )
# Warning: Many linkers will strip the contents of this library because its
# symbols are only used in a global constructor. A workaround is for clients
# to link this using $<LINK_LIBRARY:WHOLE_ARCHIVE,absl::log_flags> instead of
# the plain absl::log_flags.
# TODO(b/320467376): Implement the equivalent of Bazel's alwayslink=True.
absl_cc_library( absl_cc_library(
NAME NAME
log_flags log_flags
@ -1897,6 +1956,7 @@ absl_cc_library(
absl::flags absl::flags
absl::flags_marshalling absl::flags_marshalling
absl::strings absl::strings
absl::vlog_config_internal
PUBLIC PUBLIC
) )
@ -1919,6 +1979,7 @@ absl_cc_library(
absl::log_severity absl::log_severity
absl::raw_logging_internal absl::raw_logging_internal
absl::strings absl::strings
absl::vlog_config_internal
) )
absl_cc_library( absl_cc_library(
@ -1952,6 +2013,7 @@ absl_cc_library(
${ABSL_DEFAULT_LINKOPTS} ${ABSL_DEFAULT_LINKOPTS}
DEPS DEPS
absl::log_internal_log_impl absl::log_internal_log_impl
absl::vlog_is_on
PUBLIC PUBLIC
) )
@ -2064,21 +2126,75 @@ absl_cc_library(
) )
absl_cc_library( absl_cc_library(
NAME NAME
log_internal_fnmatch vlog_config_internal
SRCS SRCS
"${DIR}/internal/fnmatch.cc" "${DIR}/internal/vlog_config.cc"
HDRS HDRS
"${DIR}/internal/fnmatch.h" "${DIR}/internal/vlog_config.h"
COPTS COPTS
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
LINKOPTS LINKOPTS
${ABSL_DEFAULT_LINKOPTS} ${ABSL_DEFAULT_LINKOPTS}
DEPS DEPS
absl::config absl::base
absl::strings absl::config
absl::core_headers
absl::log_internal_fnmatch
absl::memory
absl::no_destructor
absl::strings
absl::synchronization
absl::optional
) )
absl_cc_library(
NAME
absl_vlog_is_on
COPTS
${ABSL_DEFAULT_COPTS}
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
HDRS
"${DIR}/absl_vlog_is_on.h"
DEPS
absl::vlog_config_internal
absl::config
absl::core_headers
absl::strings
)
absl_cc_library(
NAME
vlog_is_on
COPTS
${ABSL_DEFAULT_COPTS}
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
HDRS
"${DIR}/vlog_is_on.h"
DEPS
absl::absl_vlog_is_on
)
absl_cc_library(
NAME
log_internal_fnmatch
SRCS
"${DIR}/internal/fnmatch.cc"
HDRS
"${DIR}/internal/fnmatch.h"
COPTS
${ABSL_DEFAULT_COPTS}
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
DEPS
absl::config
absl::strings
)
# Test targets
set(DIR ${ABSL_ROOT_DIR}/absl/memory) set(DIR ${ABSL_ROOT_DIR}/absl/memory)
absl_cc_library( absl_cc_library(
@ -2147,6 +2263,7 @@ absl_cc_library(
COPTS COPTS
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
DEPS DEPS
absl::compare
absl::config absl::config
absl::core_headers absl::core_headers
absl::bits absl::bits
@ -2176,6 +2293,8 @@ absl_cc_library(
PUBLIC PUBLIC
) )
set(DIR ${ABSL_ROOT_DIR}/absl/profiling)
absl_cc_library( absl_cc_library(
NAME NAME
sample_recorder sample_recorder
@ -2188,8 +2307,6 @@ absl_cc_library(
absl::synchronization absl::synchronization
) )
set(DIR ${ABSL_ROOT_DIR}/absl/profiling)
absl_cc_library( absl_cc_library(
NAME NAME
exponential_biased exponential_biased
@ -2265,6 +2382,7 @@ absl_cc_library(
LINKOPTS LINKOPTS
${ABSL_DEFAULT_LINKOPTS} ${ABSL_DEFAULT_LINKOPTS}
DEPS DEPS
absl::config
absl::fast_type_id absl::fast_type_id
absl::optional absl::optional
) )
@ -2336,11 +2454,13 @@ absl_cc_library(
DEPS DEPS
absl::config absl::config
absl::inlined_vector absl::inlined_vector
absl::nullability
absl::random_internal_pool_urbg absl::random_internal_pool_urbg
absl::random_internal_salted_seed_seq absl::random_internal_salted_seed_seq
absl::random_internal_seed_material absl::random_internal_seed_material
absl::random_seed_gen_exception absl::random_seed_gen_exception
absl::span absl::span
absl::string_view
) )
# Internal-only target, do not depend on directly. # Internal-only target, do not depend on directly.
@ -2399,6 +2519,7 @@ absl_cc_library(
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
LINKOPTS LINKOPTS
${ABSL_DEFAULT_LINKOPTS} ${ABSL_DEFAULT_LINKOPTS}
$<$<BOOL:${MINGW}>:-lbcrypt>
DEPS DEPS
absl::core_headers absl::core_headers
absl::optional absl::optional
@ -2658,6 +2779,29 @@ absl_cc_library(
absl::config absl::config
) )
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
random_internal_distribution_test_util
SRCS
"${DIR}/internal/chi_square.cc"
"${DIR}/internal/distribution_test_util.cc"
HDRS
"${DIR}/internal/chi_square.h"
"${DIR}/internal/distribution_test_util.h"
COPTS
${ABSL_DEFAULT_COPTS}
LINKOPTS
${ABSL_DEFAULT_LINKOPTS}
DEPS
absl::config
absl::core_headers
absl::raw_logging_internal
absl::strings
absl::str_format
absl::span
)
# Internal-only target, do not depend on directly. # Internal-only target, do not depend on directly.
absl_cc_library( absl_cc_library(
NAME NAME
@ -2699,6 +2843,8 @@ absl_cc_library(
absl::function_ref absl::function_ref
absl::inlined_vector absl::inlined_vector
absl::memory absl::memory
absl::no_destructor
absl::nullability
absl::optional absl::optional
absl::raw_logging_internal absl::raw_logging_internal
absl::span absl::span
@ -2724,8 +2870,11 @@ absl_cc_library(
absl::base absl::base
absl::config absl::config
absl::core_headers absl::core_headers
absl::has_ostream_operator
absl::nullability
absl::raw_logging_internal absl::raw_logging_internal
absl::status absl::status
absl::str_format
absl::strings absl::strings
absl::type_traits absl::type_traits
absl::utility absl::utility
@ -2748,6 +2897,7 @@ absl_cc_library(
absl::base absl::base
absl::config absl::config
absl::core_headers absl::core_headers
absl::nullability
absl::throw_delegate absl::throw_delegate
PUBLIC PUBLIC
) )
@ -2762,6 +2912,7 @@ absl_cc_library(
"${DIR}/has_absl_stringify.h" "${DIR}/has_absl_stringify.h"
"${DIR}/internal/damerau_levenshtein_distance.h" "${DIR}/internal/damerau_levenshtein_distance.h"
"${DIR}/internal/string_constant.h" "${DIR}/internal/string_constant.h"
"${DIR}/internal/has_absl_stringify.h"
"${DIR}/match.h" "${DIR}/match.h"
"${DIR}/numbers.h" "${DIR}/numbers.h"
"${DIR}/str_cat.h" "${DIR}/str_cat.h"
@ -2805,6 +2956,7 @@ absl_cc_library(
absl::endian absl::endian
absl::int128 absl::int128
absl::memory absl::memory
absl::nullability
absl::raw_logging_internal absl::raw_logging_internal
absl::throw_delegate absl::throw_delegate
absl::type_traits absl::type_traits
@ -2824,6 +2976,18 @@ absl_cc_library(
PUBLIC PUBLIC
) )
absl_cc_library(
NAME
has_ostream_operator
HDRS
"${DIR}/has_ostream_operator.h"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::config
PUBLIC
)
# Internal-only target, do not depend on directly. # Internal-only target, do not depend on directly.
absl_cc_library( absl_cc_library(
NAME NAME
@ -2855,7 +3019,12 @@ absl_cc_library(
COPTS COPTS
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
DEPS DEPS
absl::config
absl::core_headers
absl::nullability
absl::span
absl::str_format_internal absl::str_format_internal
absl::string_view
PUBLIC PUBLIC
) )
@ -2886,6 +3055,7 @@ absl_cc_library(
absl::strings absl::strings
absl::config absl::config
absl::core_headers absl::core_headers
absl::fixed_array
absl::inlined_vector absl::inlined_vector
absl::numeric_representation absl::numeric_representation
absl::type_traits absl::type_traits
@ -2989,6 +3159,7 @@ absl_cc_library(
DEPS DEPS
absl::base absl::base
absl::config absl::config
absl::no_destructor
absl::raw_logging_internal absl::raw_logging_internal
absl::synchronization absl::synchronization
) )
@ -3079,6 +3250,7 @@ absl_cc_library(
absl::endian absl::endian
absl::function_ref absl::function_ref
absl::inlined_vector absl::inlined_vector
absl::nullability
absl::optional absl::optional
absl::raw_logging_internal absl::raw_logging_internal
absl::span absl::span
@ -3246,6 +3418,8 @@ absl_cc_library(
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
DEPS DEPS
Threads::Threads Threads::Threads
# TODO(#1495): Use $<LINK_LIBRARY:FRAMEWORK,CoreFoundation> once our
# minimum CMake version >= 3.24
$<$<PLATFORM_ID:Darwin>:-Wl,-framework,CoreFoundation> $<$<PLATFORM_ID:Darwin>:-Wl,-framework,CoreFoundation>
) )
@ -3286,8 +3460,8 @@ absl_cc_library(
NAME NAME
bad_any_cast_impl bad_any_cast_impl
SRCS SRCS
"${DIR}/bad_any_cast.h" "${DIR}/bad_any_cast.h"
"${DIR}/bad_any_cast.cc" "${DIR}/bad_any_cast.cc"
COPTS COPTS
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
DEPS DEPS
@ -3307,6 +3481,7 @@ absl_cc_library(
DEPS DEPS
absl::algorithm absl::algorithm
absl::core_headers absl::core_headers
absl::nullability
absl::throw_delegate absl::throw_delegate
absl::type_traits absl::type_traits
PUBLIC PUBLIC
@ -3327,6 +3502,7 @@ absl_cc_library(
absl::config absl::config
absl::core_headers absl::core_headers
absl::memory absl::memory
absl::nullability
absl::type_traits absl::type_traits
absl::utility absl::utility
PUBLIC PUBLIC
@ -3389,6 +3565,7 @@ absl_cc_library(
COPTS COPTS
${ABSL_DEFAULT_COPTS} ${ABSL_DEFAULT_COPTS}
DEPS DEPS
absl::config
absl::core_headers absl::core_headers
absl::type_traits absl::type_traits
PUBLIC PUBLIC

2
contrib/googletest vendored

@ -1 +1 @@
Subproject commit e47544ad31cb3ceecd04cc13e8fe556f8df9fe0b Subproject commit a7f443b80b105f940225332ed3c31f2790092f47

2
contrib/openssl vendored

@ -1 +1 @@
Subproject commit f7b8721dfc66abb147f24ca07b9c9d1d64f40f71 Subproject commit 67c0b63e578e4c751ac9edf490f5a96124fff8dc

View File

@ -41,8 +41,7 @@
"docker/test/stateless": { "docker/test/stateless": {
"name": "clickhouse/stateless-test", "name": "clickhouse/stateless-test",
"dependent": [ "dependent": [
"docker/test/stateful", "docker/test/stateful"
"docker/test/unit"
] ]
}, },
"docker/test/stateful": { "docker/test/stateful": {
@ -122,15 +121,16 @@
"docker/test/base": { "docker/test/base": {
"name": "clickhouse/test-base", "name": "clickhouse/test-base",
"dependent": [ "dependent": [
"docker/test/clickbench",
"docker/test/fuzzer", "docker/test/fuzzer",
"docker/test/libfuzzer",
"docker/test/integration/base", "docker/test/integration/base",
"docker/test/keeper-jepsen", "docker/test/keeper-jepsen",
"docker/test/libfuzzer",
"docker/test/server-jepsen", "docker/test/server-jepsen",
"docker/test/sqllogic", "docker/test/sqllogic",
"docker/test/sqltest", "docker/test/sqltest",
"docker/test/clickbench", "docker/test/stateless",
"docker/test/stateless" "docker/test/unit"
] ]
}, },
"docker/test/integration/kerberized_hadoop": { "docker/test/integration/kerberized_hadoop": {

View File

@ -33,6 +33,7 @@ RUN pip3 install \
flake8==4.0.1 \ flake8==4.0.1 \
requests \ requests \
thefuzz \ thefuzz \
tqdm==4.66.4 \
types-requests \ types-requests \
unidiff \ unidiff \
&& rm -rf /root/.cache/pip && rm -rf /root/.cache/pip

View File

@ -1,9 +1,7 @@
# rebuild in #33610 # rebuild in #33610
# docker build -t clickhouse/unit-test . # docker build -t clickhouse/unit-test .
ARG FROM_TAG=latest ARG FROM_TAG=latest
FROM clickhouse/stateless-test:$FROM_TAG FROM clickhouse/test-base:$FROM_TAG
RUN apt-get install gdb
COPY run.sh / COPY run.sh /
CMD ["/bin/bash", "/run.sh"] CMD ["/bin/bash", "/run.sh"]

View File

@ -0,0 +1,45 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.1.6.52-stable (fa09f677bc9) FIXME as compared to v24.1.5.6-stable (7f67181ff31)
#### Improvement
* Backported in [#60292](https://github.com/ClickHouse/ClickHouse/issues/60292): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#60832](https://github.com/ClickHouse/ClickHouse/issues/60832): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#60413](https://github.com/ClickHouse/ClickHouse/issues/60413): Fix segmentation fault in KQL parser when the input query exceeds the `max_query_size`. Also re-enable the KQL dialect. Fixes [#59036](https://github.com/ClickHouse/ClickHouse/issues/59036) and [#59037](https://github.com/ClickHouse/ClickHouse/issues/59037). [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)).
* Backported in [#60074](https://github.com/ClickHouse/ClickHouse/issues/60074): Fix error `Read beyond last offset` for `AsynchronousBoundedReadBuffer`. [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)).
* Backported in [#60299](https://github.com/ClickHouse/ClickHouse/issues/60299): Fix having neigher acked nor nacked messages. If exception happens during read-write phase, messages will be nacked. [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#60066](https://github.com/ClickHouse/ClickHouse/issues/60066): Fix optimize_uniq_to_count removing the column alias. [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#60638](https://github.com/ClickHouse/ClickHouse/issues/60638): Fixed a bug in parallel optimization for queries with `FINAL`, which could give an incorrect result in rare cases. [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#60177](https://github.com/ClickHouse/ClickHouse/issues/60177): Fix cosineDistance crash with Nullable. [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#60279](https://github.com/ClickHouse/ClickHouse/issues/60279): Hide sensitive info for `S3Queue` table engine. [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#61000](https://github.com/ClickHouse/ClickHouse/issues/61000): Reduce the number of read rows from `system.numbers`. Fixes [#59418](https://github.com/ClickHouse/ClickHouse/issues/59418). [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
* Backported in [#60791](https://github.com/ClickHouse/ClickHouse/issues/60791): Fix buffer overflow that can happen if the attacker asks the HTTP server to decompress data with a composition of codecs and size triggering numeric overflow. Fix buffer overflow that can happen inside codec NONE on wrong input data. This was submitted by TIANGONG research team through our [Bug Bounty program](https://github.com/ClickHouse/ClickHouse/issues/38986). [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#60783](https://github.com/ClickHouse/ClickHouse/issues/60783): Functions for SQL/JSON were able to read uninitialized memory. This closes [#60017](https://github.com/ClickHouse/ClickHouse/issues/60017). Found by Fuzzer. [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#60803](https://github.com/ClickHouse/ClickHouse/issues/60803): Do not set aws custom metadata `x-amz-meta-*` headers on UploadPart & CompleteMultipartUpload calls. [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
* Backported in [#60820](https://github.com/ClickHouse/ClickHouse/issues/60820): Fix crash in arrayEnumerateRanked. [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#60841](https://github.com/ClickHouse/ClickHouse/issues/60841): Fix crash when using input() in INSERT SELECT JOIN. Closes [#60035](https://github.com/ClickHouse/ClickHouse/issues/60035). [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#60904](https://github.com/ClickHouse/ClickHouse/issues/60904): Avoid segfault if too many keys are skipped when reading from S3. [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
#### NO CL CATEGORY
* Backported in [#60186](https://github.com/ClickHouse/ClickHouse/issues/60186):. [#60181](https://github.com/ClickHouse/ClickHouse/pull/60181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#60333](https://github.com/ClickHouse/ClickHouse/issues/60333): CI: Fix job failures due to jepsen artifacts. [#59890](https://github.com/ClickHouse/ClickHouse/pull/59890) ([Max K.](https://github.com/maxknv)).
* Backported in [#60034](https://github.com/ClickHouse/ClickHouse/issues/60034): Fix mark release ready. [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#60326](https://github.com/ClickHouse/ClickHouse/issues/60326): Ability to detect undead ZooKeeper sessions. [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#60363](https://github.com/ClickHouse/ClickHouse/issues/60363): CI: hot fix for gh statuses. [#60201](https://github.com/ClickHouse/ClickHouse/pull/60201) ([Max K.](https://github.com/maxknv)).
* Backported in [#60648](https://github.com/ClickHouse/ClickHouse/issues/60648): Detect io_uring in tests. [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#60569](https://github.com/ClickHouse/ClickHouse/issues/60569): Remove broken test while we fix it. [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#60756](https://github.com/ClickHouse/ClickHouse/issues/60756): Update shellcheck. [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#60584](https://github.com/ClickHouse/ClickHouse/issues/60584): CI: fix docker build job name. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)).

View File

@ -0,0 +1,101 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.4.2.141-stable (9e23d27bd11) FIXME as compared to v24.4.1.2088-stable (6d4b31322d1)
#### Improvement
* Backported in [#63467](https://github.com/ClickHouse/ClickHouse/issues/63467): Make rabbitmq nack broken messages. Closes [#45350](https://github.com/ClickHouse/ClickHouse/issues/45350). [#60312](https://github.com/ClickHouse/ClickHouse/pull/60312) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### Build/Testing/Packaging Improvement
* Backported in [#63612](https://github.com/ClickHouse/ClickHouse/issues/63612): The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#64279](https://github.com/ClickHouse/ClickHouse/issues/64279): Fix queries with FINAL give wrong result when table does not use adaptive granularity. [#62432](https://github.com/ClickHouse/ClickHouse/pull/62432) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#63295](https://github.com/ClickHouse/ClickHouse/issues/63295): Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#63978](https://github.com/ClickHouse/ClickHouse/issues/63978): Fix intersect parts when restart after drop range. [#63202](https://github.com/ClickHouse/ClickHouse/pull/63202) ([Han Fei](https://github.com/hanfei1991)).
* Backported in [#63413](https://github.com/ClickHouse/ClickHouse/issues/63413): Fix a misbehavior when SQL security defaults don't load for old tables during server startup. [#63209](https://github.com/ClickHouse/ClickHouse/pull/63209) ([pufit](https://github.com/pufit)).
* Backported in [#63388](https://github.com/ClickHouse/ClickHouse/issues/63388): JOIN filter push down filled join fix. Closes [#63228](https://github.com/ClickHouse/ClickHouse/issues/63228). [#63234](https://github.com/ClickHouse/ClickHouse/pull/63234) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#63618](https://github.com/ClickHouse/ClickHouse/issues/63618): Fix bug which could potentially lead to rare LOGICAL_ERROR during SELECT query with message: `Unexpected return type from materialize. Expected type_XXX. Got type_YYY.` Introduced in [#59379](https://github.com/ClickHouse/ClickHouse/issues/59379). [#63353](https://github.com/ClickHouse/ClickHouse/pull/63353) ([alesapin](https://github.com/alesapin)).
* Backported in [#63451](https://github.com/ClickHouse/ClickHouse/issues/63451): Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)).
* Backported in [#63605](https://github.com/ClickHouse/ClickHouse/issues/63605): Fix backup of projection part in case projection was removed from table metadata, but part still has projection. [#63426](https://github.com/ClickHouse/ClickHouse/pull/63426) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#63510](https://github.com/ClickHouse/ClickHouse/issues/63510): Fix 'Every derived table must have its own alias' error for MYSQL dictionary source, close [#63341](https://github.com/ClickHouse/ClickHouse/issues/63341). [#63481](https://github.com/ClickHouse/ClickHouse/pull/63481) ([vdimir](https://github.com/vdimir)).
* Backported in [#63592](https://github.com/ClickHouse/ClickHouse/issues/63592): Avoid segafult in `MergeTreePrefetchedReadPool` while fetching projection parts. [#63513](https://github.com/ClickHouse/ClickHouse/pull/63513) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#63750](https://github.com/ClickHouse/ClickHouse/issues/63750): Read only the necessary columns from VIEW (new analyzer). Closes [#62594](https://github.com/ClickHouse/ClickHouse/issues/62594). [#63688](https://github.com/ClickHouse/ClickHouse/pull/63688) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#63772](https://github.com/ClickHouse/ClickHouse/issues/63772): Fix [#63539](https://github.com/ClickHouse/ClickHouse/issues/63539). Forbid WINDOW redefinition in new analyzer. [#63694](https://github.com/ClickHouse/ClickHouse/pull/63694) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#63872](https://github.com/ClickHouse/ClickHouse/issues/63872): Flatten_nested is broken with replicated database. [#63695](https://github.com/ClickHouse/ClickHouse/pull/63695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#63854](https://github.com/ClickHouse/ClickHouse/issues/63854): Fix `Not found column` and `CAST AS Map from array requires nested tuple of 2 elements` exceptions for distributed queries which use `Map(Nothing, Nothing)` type. Fixes [#63637](https://github.com/ClickHouse/ClickHouse/issues/63637). [#63753](https://github.com/ClickHouse/ClickHouse/pull/63753) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#63847](https://github.com/ClickHouse/ClickHouse/issues/63847): Fix possible `ILLEGAL_COLUMN` error in `partial_merge` join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#63755](https://github.com/ClickHouse/ClickHouse/pull/63755) ([vdimir](https://github.com/vdimir)).
* Backported in [#63908](https://github.com/ClickHouse/ClickHouse/issues/63908): `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)).
* Backported in [#63955](https://github.com/ClickHouse/ClickHouse/issues/63955): Fix possible crash with SYSTEM UNLOAD PRIMARY KEY. [#63778](https://github.com/ClickHouse/ClickHouse/pull/63778) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#63938](https://github.com/ClickHouse/ClickHouse/issues/63938): Allow JOIN filter push down to both streams if only single equivalent column is used in query. Closes [#63799](https://github.com/ClickHouse/ClickHouse/issues/63799). [#63819](https://github.com/ClickHouse/ClickHouse/pull/63819) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#63991](https://github.com/ClickHouse/ClickHouse/issues/63991): Fix incorrect select query result when parallel replicas were used to read from a Materialized View. [#63861](https://github.com/ClickHouse/ClickHouse/pull/63861) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#64033](https://github.com/ClickHouse/ClickHouse/issues/64033): Fix a error `Database name is empty` for remote queries with lambdas over the cluster with modified default database. Fixes [#63471](https://github.com/ClickHouse/ClickHouse/issues/63471). [#63864](https://github.com/ClickHouse/ClickHouse/pull/63864) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#64561](https://github.com/ClickHouse/ClickHouse/issues/64561): Fix SIGSEGV due to CPU/Real (`query_profiler_real_time_period_ns`/`query_profiler_cpu_time_period_ns`) profiler (has been an issue since 2022, that leads to periodic server crashes, especially if you were using distributed engine). [#63865](https://github.com/ClickHouse/ClickHouse/pull/63865) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#64011](https://github.com/ClickHouse/ClickHouse/issues/64011): Fix analyzer - IN function with arbitrary deep sub-selects in materialized view to use insertion block. [#63930](https://github.com/ClickHouse/ClickHouse/pull/63930) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#64238](https://github.com/ClickHouse/ClickHouse/issues/64238): Fix resolve of unqualified COLUMNS matcher. Preserve the input columns order and forbid usage of unknown identifiers. [#63962](https://github.com/ClickHouse/ClickHouse/pull/63962) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#64103](https://github.com/ClickHouse/ClickHouse/issues/64103): Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#64170](https://github.com/ClickHouse/ClickHouse/issues/64170): Add missing settings to recoverLostReplica. [#64040](https://github.com/ClickHouse/ClickHouse/pull/64040) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#64322](https://github.com/ClickHouse/ClickHouse/issues/64322): This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline Closes [#63777](https://github.com/ClickHouse/ClickHouse/issues/63777). [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)).
* Backported in [#64382](https://github.com/ClickHouse/ClickHouse/issues/64382): Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#64568](https://github.com/ClickHouse/ClickHouse/issues/64568): Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#64272](https://github.com/ClickHouse/ClickHouse/issues/64272): Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#64330](https://github.com/ClickHouse/ClickHouse/issues/64330): The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#64254](https://github.com/ClickHouse/ClickHouse/issues/64254): Ignore `text_log` config when using Keeper. [#64218](https://github.com/ClickHouse/ClickHouse/pull/64218) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#64690](https://github.com/ClickHouse/ClickHouse/issues/64690): Fix Query Tree size validation. Closes [#63701](https://github.com/ClickHouse/ClickHouse/issues/63701). [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#64409](https://github.com/ClickHouse/ClickHouse/issues/64409): Fix `Logical error: Bad cast` for `Buffer` table with `PREWHERE`. Fixes [#64172](https://github.com/ClickHouse/ClickHouse/issues/64172). [#64388](https://github.com/ClickHouse/ClickHouse/pull/64388) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#64727](https://github.com/ClickHouse/ClickHouse/issues/64727): Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#64623](https://github.com/ClickHouse/ClickHouse/issues/64623): Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#64680](https://github.com/ClickHouse/ClickHouse/issues/64680): Fix [#64612](https://github.com/ClickHouse/ClickHouse/issues/64612). Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#64942](https://github.com/ClickHouse/ClickHouse/issues/64942): Fix OrderByLimitByDuplicateEliminationVisitor across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#64871](https://github.com/ClickHouse/ClickHouse/issues/64871): Fixed memory possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)).
#### CI Fix or Improvement (changelog entry is not required)
* Backported in [#63364](https://github.com/ClickHouse/ClickHouse/issues/63364): Implement cumulative A Sync status. [#61464](https://github.com/ClickHouse/ClickHouse/pull/61464) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#63338](https://github.com/ClickHouse/ClickHouse/issues/63338): Use `/commit/` to have the URLs in [reports](https://play.clickhouse.com/play?user=play#c2VsZWN0IGRpc3RpbmN0IGNvbW1pdF91cmwgZnJvbSBjaGVja3Mgd2hlcmUgY2hlY2tfc3RhcnRfdGltZSA+PSBub3coKSAtIGludGVydmFsIDEgbW9udGggYW5kIHB1bGxfcmVxdWVzdF9udW1iZXI9NjA1MzI=) like https://github.com/ClickHouse/ClickHouse/commit/44f8bc5308b53797bec8cccc3bd29fab8a00235d and not like https://github.com/ClickHouse/ClickHouse/commits/44f8bc5308b53797bec8cccc3bd29fab8a00235d. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#63376](https://github.com/ClickHouse/ClickHouse/issues/63376):. [#63366](https://github.com/ClickHouse/ClickHouse/pull/63366) ([Aleksei Filatov](https://github.com/aalexfvk)).
* Backported in [#63571](https://github.com/ClickHouse/ClickHouse/issues/63571):. [#63551](https://github.com/ClickHouse/ClickHouse/pull/63551) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Backported in [#63651](https://github.com/ClickHouse/ClickHouse/issues/63651): Fix 02362_part_log_merge_algorithm flaky test. [#63635](https://github.com/ClickHouse/ClickHouse/pull/63635) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
* Backported in [#63828](https://github.com/ClickHouse/ClickHouse/issues/63828): Fix test_odbc_interaction from aarch64 [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63787](https://github.com/ClickHouse/ClickHouse/pull/63787) ([alesapin](https://github.com/alesapin)).
* Backported in [#63897](https://github.com/ClickHouse/ClickHouse/issues/63897): Fix test `test_catboost_evaluate` for aarch64. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63789](https://github.com/ClickHouse/ClickHouse/pull/63789) ([alesapin](https://github.com/alesapin)).
* Backported in [#63889](https://github.com/ClickHouse/ClickHouse/issues/63889): Remove HDFS from disks config for one integration test for arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63832](https://github.com/ClickHouse/ClickHouse/pull/63832) ([alesapin](https://github.com/alesapin)).
* Backported in [#63881](https://github.com/ClickHouse/ClickHouse/issues/63881): Bump version for old image in test_short_strings_aggregation to make it work on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63836](https://github.com/ClickHouse/ClickHouse/pull/63836) ([alesapin](https://github.com/alesapin)).
* Backported in [#63919](https://github.com/ClickHouse/ClickHouse/issues/63919): Disable test `test_non_default_compression/test.py::test_preconfigured_deflateqpl_codec` on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63839](https://github.com/ClickHouse/ClickHouse/pull/63839) ([alesapin](https://github.com/alesapin)).
* Backported in [#63971](https://github.com/ClickHouse/ClickHouse/issues/63971): Fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)).
* Backported in [#64049](https://github.com/ClickHouse/ClickHouse/issues/64049): Add `ClickHouseVersion.copy` method. Create a branch release in advance without spinning out the release to increase the stability. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#64078](https://github.com/ClickHouse/ClickHouse/issues/64078): The mime type is not 100% reliable for Python and shell scripts without shebangs; add a check for file extension. [#64062](https://github.com/ClickHouse/ClickHouse/pull/64062) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Backported in [#64161](https://github.com/ClickHouse/ClickHouse/issues/64161): Add retries in git submodule update. [#64125](https://github.com/ClickHouse/ClickHouse/pull/64125) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
* Backported in [#64589](https://github.com/ClickHouse/ClickHouse/issues/64589): Disabled `enable_vertical_final` setting by default. This feature should not be used because it has a bug: [#64543](https://github.com/ClickHouse/ClickHouse/issues/64543). [#64544](https://github.com/ClickHouse/ClickHouse/pull/64544) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#64880](https://github.com/ClickHouse/ClickHouse/issues/64880): This PR fixes an error when a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)).
#### NO CL CATEGORY
* Backported in [#63306](https://github.com/ClickHouse/ClickHouse/issues/63306):. [#63297](https://github.com/ClickHouse/ClickHouse/pull/63297) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#63710](https://github.com/ClickHouse/ClickHouse/issues/63710):. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Backport [#64363](https://github.com/ClickHouse/ClickHouse/issues/64363) to 24.4: Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts"'. [#64905](https://github.com/ClickHouse/ClickHouse/pull/64905) ([Raúl Marín](https://github.com/Algunenano)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* group_by_use_nulls strikes back [#62922](https://github.com/ClickHouse/ClickHouse/pull/62922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Add `FROM` keyword to `TRUNCATE ALL TABLES` [#63241](https://github.com/ClickHouse/ClickHouse/pull/63241) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* More checks for concurrently deleted files and dirs in system.remote_data_paths [#63274](https://github.com/ClickHouse/ClickHouse/pull/63274) ([Alexander Gololobov](https://github.com/davenger)).
* Try fix segfault in `MergeTreeReadPoolBase::createTask` [#63323](https://github.com/ClickHouse/ClickHouse/pull/63323) ([Antonio Andelic](https://github.com/antonio2368)).
* Skip unaccessible table dirs in system.remote_data_paths [#63330](https://github.com/ClickHouse/ClickHouse/pull/63330) ([Alexander Gololobov](https://github.com/davenger)).
* Workaround for `oklch()` inside canvas bug for firefox [#63404](https://github.com/ClickHouse/ClickHouse/pull/63404) ([Sergei Trifonov](https://github.com/serxa)).
* Cancel S3 reads properly when parallel reads are used [#63687](https://github.com/ClickHouse/ClickHouse/pull/63687) ([Antonio Andelic](https://github.com/antonio2368)).
* Userspace page cache: don't collect stats if cache is unused [#63730](https://github.com/ClickHouse/ClickHouse/pull/63730) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix sanitizers [#64090](https://github.com/ClickHouse/ClickHouse/pull/64090) ([Azat Khuzhin](https://github.com/azat)).
* Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts [#64363](https://github.com/ClickHouse/ClickHouse/pull/64363) ([Kruglov Pavel](https://github.com/Avogar)).
* CI: Critical bugfix category in PR template [#64480](https://github.com/ClickHouse/ClickHouse/pull/64480) ([Max K.](https://github.com/maxknv)).

View File

@ -480,7 +480,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
- [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`. - [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`.
- [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`. - [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`.
- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`. - [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
- [input_format_csv_try_infer_numbers_from_strings](/docs/en/operations/settings/settings-formats.md/#input_format_csv_try_infer_numbers_from_strings) - Try to infer numbers from string fields while schema inference. Default value - `false`. - [input_format_csv_try_infer_numbers_from_strings](/docs/en/operations/settings/settings-formats.md/#input_format_csv_try_infer_numbers_from_strings) - Try to infer numbers from string fields while schema inference. Default value - `false`.
@ -2165,6 +2165,8 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`. - [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`. - [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`. - [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`.
- [input_format_parquet_max_block_size](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Max block row size for parquet reader. Default value - `65409`.
- [input_format_parquet_prefer_block_bytes](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_prefer_block_bytes) - Average block bytes output by parquet reader. Default value - `16744704`.
## ParquetMetadata {data-format-parquet-metadata} ## ParquetMetadata {data-format-parquet-metadata}

View File

@ -67,6 +67,23 @@ To manage named collections with DDL a user must have the `named_control_collect
In the above example the `password_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user. In the above example the `password_sha256_hex` value is the hexadecimal representation of the SHA256 hash of the password. This configuration for the user `default` has the attribute `replace=true` as in the default configuration has a plain text `password` set, and it is not possible to have both plain text and sha256 hex passwords set for a user.
::: :::
### Storage for named collections
Named collections can either be stored on local disk or in zookeeper/keeper. By default local storage is used.
To configure named collections storage in keeper and a `type` (equal to either `keeper` or `zookeeper`) and `path` (path in keeper, where named collections will be stored) to `named_collections_storage` section in configuration file:
```
<clickhouse>
<named_collections_storage>
<type>zookeeper</type>
<path>/named_collections_path/</path>
<update_timeout_ms>1000</update_timeout_ms>
</named_collections_storage>
</clickhouse>
```
An optional configuration parameter `update_timeout_ms` by default is equal to `5000`.
## Storing named collections in configuration files ## Storing named collections in configuration files
### XML example ### XML example

View File

@ -974,10 +974,12 @@ Default value: false
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting - [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting
### allow_experimental_optimized_row_order ### optimize_row_order
Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part. Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part.
Only has an effect for ordinary MergeTree-engine tables. Does nothing for specialized MergeTree engine tables (e.g. CollapsingMergeTree).
MergeTree tables are (optionally) compressed using [compression codecs](../../sql-reference/statements/create/table.md#column_compression_codec). MergeTree tables are (optionally) compressed using [compression codecs](../../sql-reference/statements/create/table.md#column_compression_codec).
Generic compression codecs such as LZ4 and ZSTD achieve maximum compression rates if the data exposes patterns. Generic compression codecs such as LZ4 and ZSTD achieve maximum compression rates if the data exposes patterns.
Long runs of the same value typically compress very well. Long runs of the same value typically compress very well.

View File

@ -1417,6 +1417,17 @@ Compression method used in output Parquet format. Supported codecs: `snappy`, `l
Default value: `lz4`. Default value: `lz4`.
### input_format_parquet_max_block_size {#input_format_parquet_max_block_size}
Max block row size for parquet reader. By controlling the number of rows in each block, you can control the memory usage,
and in some operators that cache blocks, you can improve the accuracy of the operator's memory control。
Default value: `65409`.
### input_format_parquet_prefer_block_bytes {#input_format_parquet_prefer_block_bytes}
Average block bytes output by parquet reader. Lowering the configuration in the case of reading some high compression parquet relieves the memory pressure.
Default value: `65409 * 256 = 16744704`
## Hive format settings {#hive-format-settings} ## Hive format settings {#hive-format-settings}
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter} ### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}

View File

@ -18,7 +18,7 @@ This tool works via HTTP, not via pipes, shared memory, or TCP because:
However it can be used as standalone tool from command line with the following However it can be used as standalone tool from command line with the following
parameters in POST-request URL: parameters in POST-request URL:
- `connection_string` -- ODBC connection string. - `connection_string` -- ODBC connection string.
- `columns` -- columns in ClickHouse NamesAndTypesList format, name in backticks, - `sample_block` -- columns description in ClickHouse NamesAndTypesList format, name in backticks,
type as string. Name and type are space separated, rows separated with type as string. Name and type are space separated, rows separated with
newline. newline.
- `max_block_size` -- optional parameter, sets maximum size of single block. - `max_block_size` -- optional parameter, sets maximum size of single block.

View File

@ -106,8 +106,8 @@ To work with these states, use:
- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engine. - [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engine.
- [finalizeAggregation](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) function. - [finalizeAggregation](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) function.
- [runningAccumulate](../../sql-reference/functions/other-functions.md#runningaccumulate) function. - [runningAccumulate](../../sql-reference/functions/other-functions.md#runningaccumulate) function.
- [-Merge](#aggregate_functions_combinators-merge) combinator. - [-Merge](#-merge) combinator.
- [-MergeState](#aggregate_functions_combinators-mergestate) combinator. - [-MergeState](#-mergestate) combinator.
## -Merge ## -Merge

View File

@ -82,10 +82,12 @@ FROM
In this case, you should remember that you do not know the histogram bin borders. In this case, you should remember that you do not know the histogram bin borders.
## sequenceMatch(pattern)(timestamp, cond1, cond2, ...) ## sequenceMatch
Checks whether the sequence contains an event chain that matches the pattern. Checks whether the sequence contains an event chain that matches the pattern.
**Syntax**
``` sql ``` sql
sequenceMatch(pattern)(timestamp, cond1, cond2, ...) sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
``` ```
@ -102,7 +104,7 @@ Events that occur at the same second may lay in the sequence in an undefined ord
**Parameters** **Parameters**
- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). - `pattern` — Pattern string. See [Pattern syntax](#sequencematch).
**Returned values** **Returned values**
@ -170,9 +172,9 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM
**See Also** **See Also**
- [sequenceCount](#function-sequencecount) - [sequenceCount](#sequencecount)
## sequenceCount(pattern)(time, cond1, cond2, ...) ## sequenceCount
Counts the number of event chains that matched the pattern. The function searches event chains that do not overlap. It starts to search for the next chain after the current chain is matched. Counts the number of event chains that matched the pattern. The function searches event chains that do not overlap. It starts to search for the next chain after the current chain is matched.
@ -180,6 +182,8 @@ Counts the number of event chains that matched the pattern. The function searche
Events that occur at the same second may lay in the sequence in an undefined order affecting the result. Events that occur at the same second may lay in the sequence in an undefined order affecting the result.
::: :::
**Syntax**
``` sql ``` sql
sequenceCount(pattern)(timestamp, cond1, cond2, ...) sequenceCount(pattern)(timestamp, cond1, cond2, ...)
``` ```
@ -192,7 +196,7 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...)
**Parameters** **Parameters**
- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). - `pattern` — Pattern string. See [Pattern syntax](#sequencematch).
**Returned values** **Returned values**
@ -229,7 +233,7 @@ SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t
**See Also** **See Also**
- [sequenceMatch](#function-sequencematch) - [sequenceMatch](#sequencematch)
## windowFunnel ## windowFunnel

View File

@ -0,0 +1,95 @@
---
slug: /en/sql-reference/aggregate-functions/reference/flamegraph
sidebar_position: 110
---
# flameGraph
Aggregate function which builds a [flamegraph](https://www.brendangregg.com/flamegraphs.html) using the list of stacktraces. Outputs an array of strings which can be used by [flamegraph.pl utility](https://github.com/brendangregg/FlameGraph) to render an SVG of the flamegraph.
## Syntax
```sql
flameGraph(traces, [size], [ptr])
```
## Parameters
- `traces` — a stacktrace. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)).
- `size` — an allocation size for memory profiling. (optional - default `1`). [UInt64](../../data-types/int-uint.md).
- `ptr` — an allocation address. (optional - default `0`). [UInt64](../../data-types/int-uint.md).
:::note
In the case where `ptr != 0`, a flameGraph will map allocations (size > 0) and deallocations (size < 0) with the same size and ptr.
Only allocations which were not freed are shown. Non mapped deallocations are ignored.
:::
## Returned value
- An array of strings for use with [flamegraph.pl utility](https://github.com/brendangregg/FlameGraph). [Array](../../data-types/array.md)([String](../../data-types/string.md)).
## Examples
### Building a flamegraph based on a CPU query profiler
```sql
SET query_profiler_cpu_time_period_ns=10000000;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
```
```text
clickhouse client --allow_introspection_functions=1 -q "select arrayJoin(flameGraph(arrayReverse(trace))) from system.trace_log where trace_type = 'CPU' and query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl > flame_cpu.svg
```
### Building a flamegraph based on a memory query profiler, showing all allocations
```sql
SET memory_profiler_sample_probability=1, max_untracked_memory=1;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
```
```text
clickhouse client --allow_introspection_functions=1 -q "select arrayJoin(flameGraph(trace, size)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem.svg
```
### Building a flamegraph based on a memory query profiler, showing allocations which were not deallocated in query context
```sql
SET memory_profiler_sample_probability=1, max_untracked_memory=1, use_uncompressed_cache=1, merge_tree_max_rows_to_use_cache=100000000000, merge_tree_max_bytes_to_use_cache=1000000000000;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
```
```text
clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, size, ptr)) FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_untracked.svg
```
### Build a flamegraph based on memory query profiler, showing active allocations at the fixed point of time
```sql
SET memory_profiler_sample_probability=1, max_untracked_memory=1;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
```
- 1 - Memory usage per second
```sql
SELECT event_time, m, formatReadableSize(max(s) as m) FROM (SELECT event_time, sum(size) OVER (ORDER BY event_time) AS s FROM system.trace_log WHERE query_id = 'xxx' AND trace_type = 'MemorySample') GROUP BY event_time ORDER BY event_time;
```
- 2 - Find a time point with maximal memory usage
```sql
SELECT argMax(event_time, s), max(s) FROM (SELECT event_time, sum(size) OVER (ORDER BY event_time) AS s FROM system.trace_log WHERE query_id = 'xxx' AND trace_type = 'MemorySample');
```
- 3 - Fix active allocations at fixed point of time
```text
clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, size, ptr)) FROM (SELECT * FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx' AND event_time <= 'yyy' ORDER BY event_time)" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_pos.svg
```
- 4 - Find deallocations at fixed point of time
```text
clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, -size, ptr)) FROM (SELECT * FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx' AND event_time > 'yyy' ORDER BY event_time desc)" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_neg.svg
```

View File

@ -58,6 +58,7 @@ ClickHouse-specific aggregate functions:
- [topKWeighted](../reference/topkweighted.md) - [topKWeighted](../reference/topkweighted.md)
- [deltaSum](../reference/deltasum.md) - [deltaSum](../reference/deltasum.md)
- [deltaSumTimestamp](../reference/deltasumtimestamp.md) - [deltaSumTimestamp](../reference/deltasumtimestamp.md)
- [flameGraph](../reference/flame_graph.md)
- [groupArray](../reference/grouparray.md) - [groupArray](../reference/grouparray.md)
- [groupArrayLast](../reference/grouparraylast.md) - [groupArrayLast](../reference/grouparraylast.md)
- [groupUniqArray](../reference/groupuniqarray.md) - [groupUniqArray](../reference/groupuniqarray.md)

View File

@ -3,7 +3,7 @@ slug: /en/sql-reference/aggregate-functions/reference/stochasticlinearregression
sidebar_position: 221 sidebar_position: 221
--- ---
# stochasticLinearRegression # stochasticLinearRegression {#agg_functions_stochasticlinearregression_parameters}
This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size, and has a few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), and [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)). This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size, and has a few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), and [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)).
@ -72,5 +72,5 @@ The query will return a column of predicted values. Note that first argument of
**See Also** **See Also**
- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) - [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#stochasticlogisticregression)
- [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) - [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression)

View File

@ -11,7 +11,7 @@ This function implements stochastic logistic regression. It can be used for bina
Parameters are exactly the same as in stochasticLinearRegression: Parameters are exactly the same as in stochasticLinearRegression:
`learning rate`, `l2 regularization coefficient`, `mini-batch size`, `method for updating weights`. `learning rate`, `l2 regularization coefficient`, `mini-batch size`, `method for updating weights`.
For more information see [parameters](#agg_functions-stochasticlinearregression-parameters). For more information see [parameters](../reference/stochasticlinearregression.md/#parameters).
``` text ``` text
stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') stochasticLogisticRegression(1.0, 1.0, 10, 'SGD')

View File

@ -27,7 +27,7 @@ Returns an integer of type `Float64`.
**Implementation details** **Implementation details**
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable). This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable`](#varpopstable) function.
**Example** **Example**
@ -76,7 +76,7 @@ Returns an integer of type `Float64`.
**Implementation details** **Implementation details**
Unlike [`varPop()`](#varPop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations. Unlike [`varPop`](#varpop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations.
**Example** **Example**

View File

@ -40,7 +40,7 @@ Where:
The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead. The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead.
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable` function](#varSampStable). This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable`](#varsampstable) function.
**Example** **Example**
@ -82,11 +82,11 @@ varSampStable(expr)
**Returned value** **Returned value**
The `varSampStable()` function returns a Float64 value representing the sample variance of the input data set. The `varSampStable` function returns a Float64 value representing the sample variance of the input data set.
**Implementation details** **Implementation details**
The `varSampStable()` function calculates the sample variance using the same formula as the [`varSamp()`](#varSamp function): The `varSampStable` function calculates the sample variance using the same formula as the [`varSamp`](#varsamp) function:
```plaintext ```plaintext
∑(x - mean(x))^2 / (n - 1) ∑(x - mean(x))^2 / (n - 1)
@ -97,9 +97,9 @@ Where:
- `mean(x)` is the arithmetic mean of the data set. - `mean(x)` is the arithmetic mean of the data set.
- `n` is the number of data points in the data set. - `n` is the number of data points in the data set.
The difference between `varSampStable()` and `varSamp()` is that `varSampStable()` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values. The difference between `varSampStable` and `varSamp` is that `varSampStable` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values.
Like `varSamp()`, the `varSampStable()` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable()` function](./varpop#varpopstable) instead. Like `varSamp`, the `varSampStable` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable`](./varpop#varpopstable) function instead.
**Example** **Example**
@ -125,4 +125,4 @@ Response:
0.865 0.865
``` ```
This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp()` due to the more precise handling of floating-point arithmetic. This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp` due to the more precise handling of floating-point arithmetic.

View File

@ -33,7 +33,7 @@ Result:
## Ring ## Ring
`Ring` is a simple polygon without holes stored as an array of points: [Array](array.md)([Point](#point-data-type)). `Ring` is a simple polygon without holes stored as an array of points: [Array](array.md)([Point](#point)).
**Example** **Example**
@ -54,7 +54,7 @@ Result:
## Polygon ## Polygon
`Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring-data-type)). First element of outer array is the outer shape of polygon and all the following elements are holes. `Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring)). First element of outer array is the outer shape of polygon and all the following elements are holes.
**Example** **Example**
@ -76,7 +76,7 @@ Result:
## MultiPolygon ## MultiPolygon
`MultiPolygon` consists of multiple polygons and is stored as an array of polygons: [Array](array.md)([Polygon](#polygon-data-type)). `MultiPolygon` consists of multiple polygons and is stored as an array of polygons: [Array](array.md)([Polygon](#polygon)).
**Example** **Example**

View File

@ -16,7 +16,7 @@ ClickHouse supports special functions for working with dictionaries that can be
ClickHouse supports: ClickHouse supports:
- Dictionaries with a [set of functions](../../sql-reference/functions/ext-dict-functions.md). - Dictionaries with a [set of functions](../../sql-reference/functions/ext-dict-functions.md).
- [Embedded dictionaries](#embedded_dictionaries) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md). - [Embedded dictionaries](#embedded-dictionaries) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md).
:::tip Tutorial :::tip Tutorial
@ -82,7 +82,7 @@ You can [configure](#configuring-a-dictionary) any number of dictionaries in the
You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../sql-reference/functions/other-functions.md) function). This functionality is not related to dictionaries. You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../sql-reference/functions/other-functions.md) function). This functionality is not related to dictionaries.
::: :::
## Configuring a Dictionary {#configuring-a-dictionary} ## Configuring a Dictionary
<CloudDetails /> <CloudDetails />
@ -123,7 +123,7 @@ LAYOUT(...) -- Memory layout configuration
LIFETIME(...) -- Lifetime of dictionary in memory LIFETIME(...) -- Lifetime of dictionary in memory
``` ```
## Storing Dictionaries in Memory {#storing-dictionaries-in-memory} ## Storing Dictionaries in Memory
There are a variety of ways to store dictionaries in memory. There are a variety of ways to store dictionaries in memory.
@ -415,7 +415,7 @@ or
LAYOUT(COMPLEX_KEY_HASHED_ARRAY([SHARDS 1])) LAYOUT(COMPLEX_KEY_HASHED_ARRAY([SHARDS 1]))
``` ```
### range_hashed {#range_hashed} ### range_hashed
The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values. The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values.
@ -679,7 +679,7 @@ When searching for a dictionary, the cache is searched first. For each block of
If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`. If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`.
For cache dictionaries, the expiration [lifetime](#dictionary-updates) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cells value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`. For cache dictionaries, the expiration [lifetime](#refreshing-dictionary-data-using-lifetime) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cells value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`.
This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the [system.dictionaries](../../operations/system-tables/dictionaries.md) table. This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the [system.dictionaries](../../operations/system-tables/dictionaries.md) table.
@ -899,7 +899,7 @@ Other types are not supported yet. The function returns the attribute for the pr
Data must completely fit into RAM. Data must completely fit into RAM.
## Refreshing dictionary data using LIFETIME {#lifetime} ## Refreshing dictionary data using LIFETIME
ClickHouse periodically updates dictionaries based on the `LIFETIME` tag (defined in seconds). `LIFETIME` is the update interval for fully downloaded dictionaries and the invalidation interval for cached dictionaries. ClickHouse periodically updates dictionaries based on the `LIFETIME` tag (defined in seconds). `LIFETIME` is the update interval for fully downloaded dictionaries and the invalidation interval for cached dictionaries.
@ -1031,7 +1031,7 @@ SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15))
... ...
``` ```
## Dictionary Sources {#dictionary-sources} ## Dictionary Sources
<CloudDetails /> <CloudDetails />
@ -1065,7 +1065,7 @@ SOURCE(SOURCE_TYPE(param1 val1 ... paramN valN)) -- Source configuration
The source is configured in the `source` section. The source is configured in the `source` section.
For source types [Local file](#local_file), [Executable file](#executable), [HTTP(s)](#https), [ClickHouse](#clickhouse) For source types [Local file](#local-file), [Executable file](#executable-file), [HTTP(s)](#https), [ClickHouse](#clickhouse)
optional settings are available: optional settings are available:
``` xml ``` xml
@ -1089,10 +1089,10 @@ SETTINGS(format_csv_allow_single_quotes = 0)
Types of sources (`source_type`): Types of sources (`source_type`):
- [Local file](#local_file) - [Local file](#local-file)
- [Executable File](#executable) - [Executable File](#executable-file)
- [Executable Pool](#executable_pool) - [Executable Pool](#executable-pool)
- [HTTP(S)](#http) - [HTTP(S)](#https)
- DBMS - DBMS
- [ODBC](#odbc) - [ODBC](#odbc)
- [MySQL](#mysql) - [MySQL](#mysql)
@ -1102,7 +1102,7 @@ Types of sources (`source_type`):
- [Cassandra](#cassandra) - [Cassandra](#cassandra)
- [PostgreSQL](#postgresql) - [PostgreSQL](#postgresql)
### Local File {#local_file} ### Local File
Example of settings: Example of settings:
@ -1132,9 +1132,9 @@ When a dictionary with source `FILE` is created via DDL command (`CREATE DICTION
- [Dictionary function](../../sql-reference/table-functions/dictionary.md#dictionary-function) - [Dictionary function](../../sql-reference/table-functions/dictionary.md#dictionary-function)
### Executable File {#executable} ### Executable File
Working with executable files depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable files STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data. Working with executable files depends on [how the dictionary is stored in memory](#storing-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable files STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data.
Example of settings: Example of settings:
@ -1161,7 +1161,7 @@ Setting fields:
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node. That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node.
### Executable Pool {#executable_pool} ### Executable Pool
Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts. Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts.
@ -1196,9 +1196,9 @@ Setting fields:
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node. That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
### HTTP(S) {#https} ### HTTP(S)
Working with an HTTP(S) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. Working with an HTTP(S) server depends on [how the dictionary is stored in memory](#storing-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method.
Example of settings: Example of settings:
@ -1285,7 +1285,7 @@ Setting fields:
- `db` Name of the database. Omit it if the database name is set in the `<connection_string>` parameters. - `db` Name of the database. Omit it if the database name is set in the `<connection_string>` parameters.
- `table` Name of the table and schema if exists. - `table` Name of the table and schema if exists.
- `connection_string` Connection string. - `connection_string` Connection string.
- `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). - `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime).
- `query` The custom query. Optional parameter. - `query` The custom query. Optional parameter.
:::note :::note
@ -1575,7 +1575,7 @@ Setting fields:
- `where` The selection criteria. The syntax for conditions is the same as for `WHERE` clause in MySQL, for example, `id > 10 AND id < 20`. Optional parameter. - `where` The selection criteria. The syntax for conditions is the same as for `WHERE` clause in MySQL, for example, `id > 10 AND id < 20`. Optional parameter.
- `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). - `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime).
- `fail_on_connection_loss` The configuration parameter that controls behavior of the server on connection loss. If `true`, an exception is thrown immediately if the connection between client and server was lost. If `false`, the ClickHouse server retries to execute the query three times before throwing an exception. Note that retrying leads to increased response times. Default value: `false`. - `fail_on_connection_loss` The configuration parameter that controls behavior of the server on connection loss. If `true`, an exception is thrown immediately if the connection between client and server was lost. If `false`, the ClickHouse server retries to execute the query three times before throwing an exception. Note that retrying leads to increased response times. Default value: `false`.
@ -1672,7 +1672,7 @@ Setting fields:
- `db` Name of the database. - `db` Name of the database.
- `table` Name of the table. - `table` Name of the table.
- `where` The selection criteria. May be omitted. - `where` The selection criteria. May be omitted.
- `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). - `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime).
- `secure` - Use ssl for connection. - `secure` - Use ssl for connection.
- `query` The custom query. Optional parameter. - `query` The custom query. Optional parameter.
@ -1849,7 +1849,7 @@ Setting fields:
- `db` Name of the database. - `db` Name of the database.
- `table` Name of the table. - `table` Name of the table.
- `where` The selection criteria. The syntax for conditions is the same as for `WHERE` clause in PostgreSQL. For example, `id > 10 AND id < 20`. Optional parameter. - `where` The selection criteria. The syntax for conditions is the same as for `WHERE` clause in PostgreSQL. For example, `id > 10 AND id < 20`. Optional parameter.
- `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). - `invalidate_query` Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime).
- `query` The custom query. Optional parameter. - `query` The custom query. Optional parameter.
:::note :::note
@ -1873,7 +1873,7 @@ LAYOUT(FLAT())
LIFETIME(0); LIFETIME(0);
``` ```
## Dictionary Key and Fields {#dictionary-key-and-fields} ## Dictionary Key and Fields
<CloudDetails /> <CloudDetails />
@ -1963,7 +1963,7 @@ PRIMARY KEY Id
### Composite Key ### Composite Key
The key can be a `tuple` from any types of fields. The [layout](#storig-dictionaries-in-memory) in this case must be `complex_key_hashed` or `complex_key_cache`. The key can be a `tuple` from any types of fields. The [layout](#storing-dictionaries-in-memory) in this case must be `complex_key_hashed` or `complex_key_cache`.
:::tip :::tip
A composite key can consist of a single element. This makes it possible to use a string as the key, for instance. A composite key can consist of a single element. This makes it possible to use a string as the key, for instance.
@ -2030,17 +2030,17 @@ CREATE DICTIONARY somename (
Configuration fields: Configuration fields:
| Tag | Description | Required | | Tag | Description | Required |
|------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------| |------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
| `name` | Column name. | Yes | | `name` | Column name. | Yes |
| `type` | ClickHouse data type: [UInt8](../../sql-reference/data-types/int-uint.md), [UInt16](../../sql-reference/data-types/int-uint.md), [UInt32](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md), [Int8](../../sql-reference/data-types/int-uint.md), [Int16](../../sql-reference/data-types/int-uint.md), [Int32](../../sql-reference/data-types/int-uint.md), [Int64](../../sql-reference/data-types/int-uint.md), [Float32](../../sql-reference/data-types/float.md), [Float64](../../sql-reference/data-types/float.md), [UUID](../../sql-reference/data-types/uuid.md), [Decimal32](../../sql-reference/data-types/decimal.md), [Decimal64](../../sql-reference/data-types/decimal.md), [Decimal128](../../sql-reference/data-types/decimal.md), [Decimal256](../../sql-reference/data-types/decimal.md),[Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), [String](../../sql-reference/data-types/string.md), [Array](../../sql-reference/data-types/array.md).<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../sql-reference/data-types/nullable.md) is currently supported for [Flat](#flat), [Hashed](#hashed), [ComplexKeyHashed](#complex_key_hashed), [Direct](#direct), [ComplexKeyDirect](#complex_key_direct), [RangeHashed](#range_hashed), Polygon, [Cache](#cache), [ComplexKeyCache](#complex_key_cache), [SSDCache](#ssd_cache), [SSDComplexKeyCache](#complex_key_ssd_cache) dictionaries. In [IPTrie](#ip_trie) dictionaries `Nullable` types are not supported. | Yes | | `type` | ClickHouse data type: [UInt8](../../sql-reference/data-types/int-uint.md), [UInt16](../../sql-reference/data-types/int-uint.md), [UInt32](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md), [Int8](../../sql-reference/data-types/int-uint.md), [Int16](../../sql-reference/data-types/int-uint.md), [Int32](../../sql-reference/data-types/int-uint.md), [Int64](../../sql-reference/data-types/int-uint.md), [Float32](../../sql-reference/data-types/float.md), [Float64](../../sql-reference/data-types/float.md), [UUID](../../sql-reference/data-types/uuid.md), [Decimal32](../../sql-reference/data-types/decimal.md), [Decimal64](../../sql-reference/data-types/decimal.md), [Decimal128](../../sql-reference/data-types/decimal.md), [Decimal256](../../sql-reference/data-types/decimal.md),[Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), [String](../../sql-reference/data-types/string.md), [Array](../../sql-reference/data-types/array.md).<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../sql-reference/data-types/nullable.md) is currently supported for [Flat](#flat), [Hashed](#hashed), [ComplexKeyHashed](#complex_key_hashed), [Direct](#direct), [ComplexKeyDirect](#complex_key_direct), [RangeHashed](#range_hashed), Polygon, [Cache](#cache), [ComplexKeyCache](#complex_key_cache), [SSDCache](#ssd_cache), [SSDComplexKeyCache](#complex_key_ssd_cache) dictionaries. In [IPTrie](#ip_trie) dictionaries `Nullable` types are not supported. | Yes |
| `null_value` | Default value for a non-existing element.<br/>In the example, it is an empty string. [NULL](../syntax.md#null) value can be used only for the `Nullable` types (see the previous line with types description). | Yes | | `null_value` | Default value for a non-existing element.<br/>In the example, it is an empty string. [NULL](../syntax.md#null) value can be used only for the `Nullable` types (see the previous line with types description). | Yes |
| `expression` | [Expression](../../sql-reference/syntax.md#expressions) that ClickHouse executes on the value.<br/>The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.<br/><br/>Default value: no expression. | No | | `expression` | [Expression](../../sql-reference/syntax.md#expressions) that ClickHouse executes on the value.<br/>The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.<br/><br/>Default value: no expression. | No |
| <a name="hierarchical-dict-attr"></a> `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](#hierarchical-dictionaries).<br/><br/>Default value: `false`. | No | | <a name="hierarchical-dict-attr"></a> `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](#hierarchical-dictionaries).<br/><br/>Default value: `false`. | No |
| `injective` | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).<br/>If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.<br/><br/>Default value: `false`. | No | | `injective` | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).<br/>If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.<br/><br/>Default value: `false`. | No |
| `is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.<br/><br/>Default value: `false`. | `is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.<br/><br/>Default value: `false`.
## Hierarchical Dictionaries {#hierarchical-dictionaries} ## Hierarchical Dictionaries
ClickHouse supports hierarchical dictionaries with a [numeric key](#numeric-key). ClickHouse supports hierarchical dictionaries with a [numeric key](#numeric-key).
@ -2165,7 +2165,7 @@ Points can be specified as an array or a tuple of their coordinates. In the curr
The user can upload their own data in all formats supported by ClickHouse. The user can upload their own data in all formats supported by ClickHouse.
There are 3 types of [in-memory storage](#storig-dictionaries-in-memory) available: There are 3 types of [in-memory storage](#storing-dictionaries-in-memory) available:
- `POLYGON_SIMPLE`. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes. - `POLYGON_SIMPLE`. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes.
@ -2435,7 +2435,7 @@ LIFETIME(0)
LAYOUT(regexp_tree); LAYOUT(regexp_tree);
``` ```
## Embedded Dictionaries {#embedded-dictionaries} ## Embedded Dictionaries
<SelfManaged /> <SelfManaged />

View File

@ -1261,7 +1261,7 @@ SELECT arraySort((x) -> -x, [1, 2, 3]) as res;
└─────────┘ └─────────┘
``` ```
For each element of the source array, the lambda function returns the sorting key, that is, \[1 \> -1, 2 \> -2, 3 \> -3\]. Since the `arraySort` function sorts the keys in ascending order, the result is \[3, 2, 1\]. Thus, the `(x) > -x` lambda function sets the [descending order](#reverse-sort) in a sorting. For each element of the source array, the lambda function returns the sorting key, that is, \[1 \> -1, 2 \> -2, 3 \> -3\]. Since the `arraySort` function sorts the keys in ascending order, the result is \[3, 2, 1\]. Thus, the `(x) > -x` lambda function sets the [descending order](#arrayreversesort) in a sorting.
The lambda function can accept multiple arguments. In this case, you need to pass the `arraySort` function several arrays of identical length that the arguments of lambda function will correspond to. The resulting array will consist of elements from the first input array; elements from the next input array(s) specify the sorting keys. For example: The lambda function can accept multiple arguments. In this case, you need to pass the `arraySort` function several arrays of identical length that the arguments of lambda function will correspond to. The resulting array will consist of elements from the first input array; elements from the next input array(s) specify the sorting keys. For example:
@ -1307,10 +1307,15 @@ To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.
Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order.
## arrayReverseSort(\[func,\] arr, ...) {#reverse-sort} ## arrayReverseSort
Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description. Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description.
**Syntax**
```sql
arrayReverseSort([func,] arr, ...)
```
Example of integer values sorting: Example of integer values sorting:
``` sql ``` sql
@ -1907,10 +1912,16 @@ FROM numbers(1,10);
- [arrayReduce](#arrayreduce) - [arrayReduce](#arrayreduce)
## arrayReverse(arr) ## arrayReverse
Returns an array of the same size as the original array containing the elements in reverse order. Returns an array of the same size as the original array containing the elements in reverse order.
**Syntax**
```sql
arrayReverse(arr)
```
Example: Example:
``` sql ``` sql

View File

@ -74,7 +74,7 @@ bitmapSubsetInRange(bitmap, range_start, range_end)
**Arguments** **Arguments**
- `bitmap` [Bitmap object](#bitmap_functions-bitmapbuild). - `bitmap` [Bitmap object](#bitmapbuild).
- `range_start` Start of the range (inclusive). [UInt32](../data-types/int-uint.md). - `range_start` Start of the range (inclusive). [UInt32](../data-types/int-uint.md).
- `range_end` End of the range (exclusive). [UInt32](../data-types/int-uint.md). - `range_end` End of the range (exclusive). [UInt32](../data-types/int-uint.md).
@ -104,7 +104,7 @@ bitmapSubsetLimit(bitmap, range_start, cardinality_limit)
**Arguments** **Arguments**
- `bitmap` [Bitmap object](#bitmap_functions-bitmapbuild). - `bitmap` [Bitmap object](#bitmapbuild).
- `range_start` Start of the range (inclusive). [UInt32](../data-types/int-uint.md). - `range_start` Start of the range (inclusive). [UInt32](../data-types/int-uint.md).
- `cardinality_limit` Maximum cardinality of the subset. [UInt32](../data-types/int-uint.md). - `cardinality_limit` Maximum cardinality of the subset. [UInt32](../data-types/int-uint.md).
@ -134,7 +134,7 @@ subBitmap(bitmap, offset, cardinality_limit)
**Arguments** **Arguments**
- `bitmap` The bitmap. [Bitmap object](#bitmap_functions-bitmapbuild). - `bitmap` The bitmap. [Bitmap object](#bitmapbuild).
- `offset` The position of the first element of the subset. [UInt32](../data-types/int-uint.md). - `offset` The position of the first element of the subset. [UInt32](../data-types/int-uint.md).
- `cardinality_limit` The maximum number of elements in the subset. [UInt32](../data-types/int-uint.md). - `cardinality_limit` The maximum number of elements in the subset. [UInt32](../data-types/int-uint.md).
@ -162,7 +162,7 @@ bitmapContains(bitmap, needle)
**Arguments** **Arguments**
- `bitmap` [Bitmap object](#bitmap_functions-bitmapbuild). - `bitmap` [Bitmap object](#bitmapbuild).
- `needle` Searched bit value. [UInt32](../data-types/int-uint.md). - `needle` Searched bit value. [UInt32](../data-types/int-uint.md).
**Returned values** **Returned values**
@ -188,7 +188,7 @@ Result:
Checks whether two bitmaps intersect. Checks whether two bitmaps intersect.
If `bitmap2` contains exactly one element, consider using [bitmapContains](#bitmap_functions-bitmapcontains) instead as it works more efficiently. If `bitmap2` contains exactly one element, consider using [bitmapContains](#bitmapcontains) instead as it works more efficiently.
**Syntax** **Syntax**

View File

@ -83,7 +83,7 @@ Result:
``` ```
## makeDate32 ## makeDate32
Like [makeDate](#makeDate) but produces a [Date32](../data-types/date32.md). Like [makeDate](#makedate) but produces a [Date32](../data-types/date32.md).
## makeDateTime ## makeDateTime
@ -214,7 +214,7 @@ Result:
**See also** **See also**
- [serverTimeZone](#serverTimeZone) - [serverTimeZone](#servertimezone)
## serverTimeZone ## serverTimeZone
@ -249,7 +249,7 @@ Result:
**See also** **See also**
- [timeZone](#timeZone) - [timeZone](#timezone)
## toTimeZone ## toTimeZone
@ -305,7 +305,7 @@ int32samoa: 1546300800
**See Also** **See Also**
- [formatDateTime](#formatDateTime) - supports non-constant timezone. - [formatDateTime](#formatdatetime) - supports non-constant timezone.
- [toString](type-conversion-functions.md#tostring) - supports non-constant timezone. - [toString](type-conversion-functions.md#tostring) - supports non-constant timezone.
## timeZoneOf ## timeZoneOf
@ -1006,7 +1006,7 @@ toStartOfWeek(t[, mode[, timezone]])
**Arguments** **Arguments**
- `t` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) - `t` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
- `mode` - determines the first day of the week as described in the [toWeek()](date-time-functions#toweek) function - `mode` - determines the first day of the week as described in the [toWeek()](#toweek) function
- `timezone` - Optional parameter, it behaves like any other conversion function - `timezone` - Optional parameter, it behaves like any other conversion function
**Returned value** **Returned value**
@ -1049,7 +1049,7 @@ toLastDayOfWeek(t[, mode[, timezone]])
**Arguments** **Arguments**
- `t` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md) - `t` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
- `mode` - determines the last day of the week as described in the [toWeek()](date-time-functions#toweek) function - `mode` - determines the last day of the week as described in the [toWeek](#toweek) function
- `timezone` - Optional parameter, it behaves like any other conversion function - `timezone` - Optional parameter, it behaves like any other conversion function
**Returned value** **Returned value**
@ -1719,7 +1719,7 @@ Result:
**See Also** **See Also**
- [fromDaysSinceYearZero](#fromDaysSinceYearZero) - [fromDaysSinceYearZero](#fromdayssinceyearzero)
## fromDaysSinceYearZero ## fromDaysSinceYearZero
@ -1759,11 +1759,11 @@ Result:
**See Also** **See Also**
- [toDaysSinceYearZero](#toDaysSinceYearZero) - [toDaysSinceYearZero](#todayssinceyearzero)
## fromDaysSinceYearZero32 ## fromDaysSinceYearZero32
Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../data-types/date32.md). Like [fromDaysSinceYearZero](#fromdayssinceyearzero) but returns a [Date32](../data-types/date32.md).
## age ## age
@ -1982,7 +1982,7 @@ Result:
**See Also** **See Also**
- [toStartOfInterval](#tostartofintervaldate_or_date_with_time-interval-x-unit--time_zone) - [toStartOfInterval](#tostartofinterval)
## date\_add ## date\_add
@ -2055,7 +2055,7 @@ Result:
**See Also** **See Also**
- [addDate](#addDate) - [addDate](#adddate)
## date\_sub ## date\_sub
@ -2129,7 +2129,7 @@ Result:
**See Also** **See Also**
- [subDate](#subDate) - [subDate](#subdate)
## timestamp\_add ## timestamp\_add
@ -2310,7 +2310,7 @@ Alias: `SUBDATE`
- [date_sub](#date_sub) - [date_sub](#date_sub)
## now {#now} ## now
Returns the current date and time at the moment of query analysis. The function is a constant expression. Returns the current date and time at the moment of query analysis. The function is a constant expression.
@ -3609,7 +3609,7 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64
└───────────────────────────────────────────────────────────────────────────────────────────────────────────┘ └───────────────────────────────────────────────────────────────────────────────────────────────────────────┘
``` ```
## formatDateTime {#formatDateTime} ## formatDateTime
Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column. Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column.
@ -3734,10 +3734,9 @@ LIMIT 10
**See Also** **See Also**
- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax) - [formatDateTimeInJodaSyntax](#formatdatetimeinjodasyntax)
## formatDateTimeInJodaSyntax
## formatDateTimeInJodaSyntax {#formatDateTimeInJodaSyntax}
Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
@ -3902,11 +3901,11 @@ Result:
**See Also** **See Also**
- [fromUnixTimestampInJodaSyntax](##fromUnixTimestampInJodaSyntax) - [fromUnixTimestampInJodaSyntax](#fromunixtimestampinjodasyntax)
## fromUnixTimestampInJodaSyntax ## fromUnixTimestampInJodaSyntax
Same as [fromUnixTimestamp](#fromUnixTimestamp) but when called in the second way (two or three arguments), the formatting is performed using [Joda style](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL style. Same as [fromUnixTimestamp](#fromunixtimestamp) but when called in the second way (two or three arguments), the formatting is performed using [Joda style](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL style.
**Example:** **Example:**
@ -4121,7 +4120,7 @@ Result:
Returns the current date and time at the moment of query analysis. The function is a constant expression. Returns the current date and time at the moment of query analysis. The function is a constant expression.
:::note :::note
This function gives the same result that `now('UTC')` would. It was added only for MySQL support and [`now`](#now-now) is the preferred usage. This function gives the same result that `now('UTC')` would. It was added only for MySQL support and [`now`](#now) is the preferred usage.
::: :::
**Syntax** **Syntax**

View File

@ -12,7 +12,7 @@ For dictionaries created with [DDL queries](../../sql-reference/statements/creat
For information on connecting and configuring dictionaries, see [Dictionaries](../../sql-reference/dictionaries/index.md). For information on connecting and configuring dictionaries, see [Dictionaries](../../sql-reference/dictionaries/index.md).
## dictGet, dictGetOrDefault, dictGetOrNull {#dictGet} ## dictGet, dictGetOrDefault, dictGetOrNull
Retrieves values from a dictionary. Retrieves values from a dictionary.

View File

@ -4,6 +4,8 @@ sidebar_label: Geohash
title: "Functions for Working with Geohash" title: "Functions for Working with Geohash"
--- ---
## Geohash
[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earths surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer is the geohash string, the more precise is the geographic location. [Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earths surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer is the geohash string, the more precise is the geographic location.
If you need to manually convert geographic coordinates to geohash strings, you can use [geohash.org](http://geohash.org/). If you need to manually convert geographic coordinates to geohash strings, you can use [geohash.org](http://geohash.org/).

View File

@ -4,6 +4,8 @@ sidebar_label: H3 Indexes
title: "Functions for Working with H3 Indexes" title: "Functions for Working with H3 Indexes"
--- ---
## H3 Index
[H3](https://eng.uber.com/h3/) is a geographical indexing system where Earths surface divided into a grid of even hexagonal cells. This system is hierarchical, i. e. each hexagon on the top level ("parent") can be split into seven even but smaller ones ("children"), and so on. [H3](https://eng.uber.com/h3/) is a geographical indexing system where Earths surface divided into a grid of even hexagonal cells. This system is hierarchical, i. e. each hexagon on the top level ("parent") can be split into seven even but smaller ones ("children"), and so on.
The level of the hierarchy is called `resolution` and can receive a value from `0` till `15`, where `0` is the `base` level with the largest and coarsest cells. The level of the hierarchy is called `resolution` and can receive a value from `0` till `15`, where `0` is the `base` level with the largest and coarsest cells.
@ -16,7 +18,7 @@ The full description of the H3 system is available at [the Uber Engineering site
## h3IsValid ## h3IsValid
Verifies whether the number is a valid [H3](#h3index) index. Verifies whether the number is a valid [H3](#h3-index) index.
**Syntax** **Syntax**
@ -51,7 +53,7 @@ Result:
## h3GetResolution ## h3GetResolution
Defines the resolution of the given [H3](#h3index) index. Defines the resolution of the given [H3](#h3-index) index.
**Syntax** **Syntax**
@ -86,7 +88,7 @@ Result:
## h3EdgeAngle ## h3EdgeAngle
Calculates the average length of the [H3](#h3index) hexagon edge in grades. Calculates the average length of the [H3](#h3-index) hexagon edge in grades.
**Syntax** **Syntax**
@ -100,7 +102,7 @@ h3EdgeAngle(resolution)
**Returned values** **Returned values**
- The average length of the [H3](#h3index) hexagon edge in grades. [Float64](../../data-types/float.md). - The average length of the [H3](#h3-index) hexagon edge in grades. [Float64](../../data-types/float.md).
**Example** **Example**
@ -120,7 +122,7 @@ Result:
## h3EdgeLengthM ## h3EdgeLengthM
Calculates the average length of the [H3](#h3index) hexagon edge in meters. Calculates the average length of the [H3](#h3-index) hexagon edge in meters.
**Syntax** **Syntax**
@ -134,7 +136,7 @@ h3EdgeLengthM(resolution)
**Returned values** **Returned values**
- The average length of the [H3](#h3index) hexagon edge in meters. [Float64](../../data-types/float.md). - The average length of the [H3](#h3-index) hexagon edge in meters. [Float64](../../data-types/float.md).
**Example** **Example**
@ -154,7 +156,7 @@ Result:
## h3EdgeLengthKm ## h3EdgeLengthKm
Calculates the average length of the [H3](#h3index) hexagon edge in kilometers. Calculates the average length of the [H3](#h3-index) hexagon edge in kilometers.
**Syntax** **Syntax**
@ -168,7 +170,7 @@ h3EdgeLengthKm(resolution)
**Returned values** **Returned values**
- The average length of the [H3](#h3index) hexagon edge in kilometers. [Float64](../../data-types/float.md). - The average length of the [H3](#h3-index) hexagon edge in kilometers. [Float64](../../data-types/float.md).
**Example** **Example**
@ -188,7 +190,7 @@ Result:
## geoToH3 ## geoToH3
Returns [H3](#h3index) point index `(lon, lat)` with specified resolution. Returns [H3](#h3-index) point index `(lon, lat)` with specified resolution.
**Syntax** **Syntax**
@ -225,7 +227,7 @@ Result:
## h3ToGeo ## h3ToGeo
Returns the centroid longitude and latitude corresponding to the provided [H3](#h3index) index. Returns the centroid longitude and latitude corresponding to the provided [H3](#h3-index) index.
**Syntax** **Syntax**
@ -294,7 +296,7 @@ Result:
## h3kRing ## h3kRing
Lists all the [H3](#h3index) hexagons in the raduis of `k` from the given hexagon in random order. Lists all the [H3](#h3-index) hexagons in the raduis of `k` from the given hexagon in random order.
**Syntax** **Syntax**
@ -335,7 +337,7 @@ Result:
## h3GetBaseCell ## h3GetBaseCell
Returns the base cell number of the [H3](#h3index) index. Returns the base cell number of the [H3](#h3-index) index.
**Syntax** **Syntax**
@ -437,7 +439,7 @@ Result:
## h3IndexesAreNeighbors ## h3IndexesAreNeighbors
Returns whether or not the provided [H3](#h3index) indexes are neighbors. Returns whether or not the provided [H3](#h3-index) indexes are neighbors.
**Syntax** **Syntax**
@ -473,7 +475,7 @@ Result:
## h3ToChildren ## h3ToChildren
Returns an array of child indexes for the given [H3](#h3index) index. Returns an array of child indexes for the given [H3](#h3-index) index.
**Syntax** **Syntax**
@ -508,7 +510,7 @@ Result:
## h3ToParent ## h3ToParent
Returns the parent (coarser) index containing the given [H3](#h3index) index. Returns the parent (coarser) index containing the given [H3](#h3-index) index.
**Syntax** **Syntax**
@ -609,7 +611,7 @@ Result:
## h3GetResolution ## h3GetResolution
Returns the resolution of the [H3](#h3index) index. Returns the resolution of the [H3](#h3-index) index.
**Syntax** **Syntax**
@ -643,7 +645,7 @@ Result:
## h3IsResClassIII ## h3IsResClassIII
Returns whether [H3](#h3index) index has a resolution with Class III orientation. Returns whether [H3](#h3-index) index has a resolution with Class III orientation.
**Syntax** **Syntax**
@ -678,7 +680,7 @@ Result:
## h3IsPentagon ## h3IsPentagon
Returns whether this [H3](#h3index) index represents a pentagonal cell. Returns whether this [H3](#h3-index) index represents a pentagonal cell.
**Syntax** **Syntax**
@ -713,7 +715,7 @@ Result:
## h3GetFaces ## h3GetFaces
Returns icosahedron faces intersected by a given [H3](#h3index) index. Returns icosahedron faces intersected by a given [H3](#h3-index) index.
**Syntax** **Syntax**
@ -815,7 +817,7 @@ Result:
## h3ToCenterChild ## h3ToCenterChild
Returns the center child (finer) [H3](#h3index) index contained by given [H3](#h3index) at the given resolution. Returns the center child (finer) [H3](#h3-index) index contained by given [H3](#h3-index) at the given resolution.
**Syntax** **Syntax**
@ -830,7 +832,7 @@ h3ToCenterChild(index, resolution)
**Returned values** **Returned values**
- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. [UInt64](../../data-types/int-uint.md). - [H3](#h3-index) index of the center child contained by given [H3](#h3-index) at the given resolution. [UInt64](../../data-types/int-uint.md).
**Example** **Example**

View File

@ -5,6 +5,8 @@ sidebar_label: S2 Geometry
# Functions for Working with S2 Index # Functions for Working with S2 Index
## S2Index
[S2](https://s2geometry.io/) is a geographical indexing system where all geographical data is represented on a three-dimensional sphere (similar to a globe). [S2](https://s2geometry.io/) is a geographical indexing system where all geographical data is represented on a three-dimensional sphere (similar to a globe).
In the S2 library points are represented as the S2 Index - a specific number which encodes internally a point on the surface of a unit sphere, unlike traditional (latitude, longitude) pairs. To get the S2 point index for a given point specified in the format (latitude, longitude) use the [geoToS2](#geotos2) function. Also, you can use the [s2ToGeo](#s2togeo) function for getting geographical coordinates corresponding to the specified S2 point index. In the S2 library points are represented as the S2 Index - a specific number which encodes internally a point on the surface of a unit sphere, unlike traditional (latitude, longitude) pairs. To get the S2 point index for a given point specified in the format (latitude, longitude) use the [geoToS2](#geotos2) function. Also, you can use the [s2ToGeo](#s2togeo) function for getting geographical coordinates corresponding to the specified S2 point index.

View File

@ -45,13 +45,13 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')
Calculates the MD4 from a string and returns the resulting set of bytes as FixedString(16). Calculates the MD4 from a string and returns the resulting set of bytes as FixedString(16).
## MD5 {#md5} ## MD5
Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16). Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16).
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the sipHash128 function instead. If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the sipHash128 function instead.
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))). If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
## sipHash64 {#siphash64} ## sipHash64
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.

View File

@ -295,7 +295,7 @@ Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null
## toIPv6 ## toIPv6
Converts a string form of IPv6 address to [IPv6](../data-types/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value. Converts a string form of IPv6 address to [IPv6](../data-types/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value.
Similar to [IPv6StringToNum](#ipv6stringtonums) function, which converts IPv6 address to binary format. Similar to [IPv6StringToNum](#ipv6stringtonum) function, which converts IPv6 address to binary format.
If the input string contains a valid IPv4 address, then the IPv6 equivalent of the IPv4 address is returned. If the input string contains a valid IPv4 address, then the IPv6 equivalent of the IPv4 address is returned.

View File

@ -5,10 +5,10 @@ sidebar_label: JSON
--- ---
There are two sets of functions to parse JSON: There are two sets of functions to parse JSON:
- [`simpleJSON*` (`visitParam*`)](#simplejson--visitparam-functions) which is made for parsing a limited subset of JSON extremely fast. - [`simpleJSON*` (`visitParam*`)](#simplejson-visitparam-functions) which is made for parsing a limited subset of JSON extremely fast.
- [`JSONExtract*`](#jsonextract-functions) which is made for parsing ordinary JSON. - [`JSONExtract*`](#jsonextract-functions) which is made for parsing ordinary JSON.
## simpleJSON / visitParam functions ## simpleJSON (visitParam) functions
ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be. They try to do as little as possible to get the job done as quickly as possible. ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be. They try to do as little as possible to get the job done as quickly as possible.

View File

@ -762,7 +762,7 @@ LIMIT 10
Given a size (number of bytes), this function returns a readable, rounded size with suffix (KB, MB, etc.) as string. Given a size (number of bytes), this function returns a readable, rounded size with suffix (KB, MB, etc.) as string.
The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull). The opposite operations of this function are [parseReadableSize](#parsereadablesize), [parseReadableSizeOrZero](#parsereadablesizeorzero), and [parseReadableSizeOrNull](#parsereadablesizeornull).
**Syntax** **Syntax**
@ -795,7 +795,7 @@ Result:
Given a size (number of bytes), this function returns a readable, rounded size with suffix (KiB, MiB, etc.) as string. Given a size (number of bytes), this function returns a readable, rounded size with suffix (KiB, MiB, etc.) as string.
The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull). The opposite operations of this function are [parseReadableSize](#parsereadablesize), [parseReadableSizeOrZero](#parsereadablesizeorzero), and [parseReadableSizeOrNull](#parsereadablesizeornull).
**Syntax** **Syntax**
@ -926,7 +926,7 @@ SELECT
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes.
If the function is unable to parse the input value, it throws an exception. If the function is unable to parse the input value, it throws an exception.
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize). The inverse operations of this function are [formatReadableSize](#formatreadablesize) and [formatReadableDecimalSize](#formatreadabledecimalsize).
**Syntax** **Syntax**
@ -964,7 +964,7 @@ SELECT
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes.
If the function is unable to parse the input value, it returns `NULL`. If the function is unable to parse the input value, it returns `NULL`.
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize). The inverse operations of this function are [formatReadableSize](#formatreadablesize) and [formatReadableDecimalSize](#formatreadabledecimalsize).
**Syntax** **Syntax**
@ -1002,7 +1002,7 @@ SELECT
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. If the function is unable to parse the input value, it returns `0`. Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. If the function is unable to parse the input value, it returns `0`.
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize). The inverse operations of this function are [formatReadableSize](#formatreadablesize) and [formatReadableDecimalSize](#formatreadabledecimalsize).
**Syntax** **Syntax**
@ -2711,7 +2711,7 @@ countDigits(x)
- Number of digits. [UInt8](../data-types/int-uint.md#uint-ranges). - Number of digits. [UInt8](../data-types/int-uint.md#uint-ranges).
:::note :::note
For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow). For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#isdecimaloverflow).
::: :::
**Example** **Example**
@ -2803,7 +2803,7 @@ currentProfiles()
## enabledProfiles ## enabledProfiles
Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#current-profiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file). Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#currentprofiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file).
**Syntax** **Syntax**
@ -2916,11 +2916,11 @@ Result:
└───────────────────────────┘ └───────────────────────────┘
``` ```
## queryID {#queryID} ## queryID
Returns the ID of the current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `query_id`. Returns the ID of the current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `query_id`.
In contrast to [initialQueryID](#initial-query-id) function, `queryID` can return different results on different shards (see the example). In contrast to [initialQueryID](#initialqueryid) function, `queryID` can return different results on different shards (see the example).
**Syntax** **Syntax**
@ -2954,7 +2954,7 @@ Result:
Returns the ID of the initial current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `initial_query_id`. Returns the ID of the initial current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `initial_query_id`.
In contrast to [queryID](#query-id) function, `initialQueryID` returns the same results on different shards (see example). In contrast to [queryID](#queryid) function, `initialQueryID` returns the same results on different shards (see example).
**Syntax** **Syntax**
@ -3041,7 +3041,7 @@ shardCount()
**See Also** **See Also**
- [shardNum()](#shard-num) function example also contains `shardCount()` function call. - [shardNum()](#shardnum) function example also contains `shardCount()` function call.
## getOSKernelVersion ## getOSKernelVersion

View File

@ -200,7 +200,7 @@ Banker's rounding is a method of rounding fractional numbers
When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position. When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position.
For example: 3.5 rounds up to 4, 2.5 rounds down to 2. For example: 3.5 rounds up to 4, 2.5 rounds down to 2.
It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest). It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest).
The [round](#rounding_functions-round) function performs the same rounding for floating point numbers. The [round](#round) function performs the same rounding for floating point numbers.
The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`. The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`.
In other cases, the function rounds numbers to the nearest integer. In other cases, the function rounds numbers to the nearest integer.
@ -274,7 +274,7 @@ roundBankers(10.755, 2) = 10.76
**See Also** **See Also**
- [round](#rounding_functions-round) - [round](#round)
## roundToExp2 ## roundToExp2

View File

@ -1994,7 +1994,7 @@ Result:
## stringJaccardIndexUTF8 ## stringJaccardIndexUTF8
Like [stringJaccardIndex](#stringJaccardIndex) but for UTF8-encoded strings. Like [stringJaccardIndex](#stringjaccardindex) but for UTF8-encoded strings.
## editDistance ## editDistance

View File

@ -262,7 +262,7 @@ Result:
## multiSearchAllPositionsUTF8 ## multiSearchAllPositionsUTF8
Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings. Like [multiSearchAllPositions](#multisearchallpositions) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings.
**Syntax** **Syntax**
@ -336,7 +336,7 @@ Result:
Like [`position`](#position) but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings. Like [`position`](#position) but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
Functions [`multiSearchFirstPositionCaseInsensitive`](#multiSearchFirstPositionCaseInsensitive), [`multiSearchFirstPositionUTF8`](#multiSearchFirstPositionUTF8) and [`multiSearchFirstPositionCaseInsensitiveUTF8`](#multiSearchFirstPositionCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function. Functions [`multiSearchFirstPositionCaseInsensitive`](#multisearchfirstpositioncaseinsensitive), [`multiSearchFirstPositionUTF8`](#multisearchfirstpositionutf8) and [`multiSearchFirstPositionCaseInsensitiveUTF8`](#multisearchfirstpositioncaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
**Syntax** **Syntax**
@ -370,7 +370,7 @@ Result:
## multiSearchFirstPositionCaseInsensitive ## multiSearchFirstPositionCaseInsensitive
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but ignores case. Like [`multiSearchFirstPosition`](#multisearchfirstposition) but ignores case.
**Syntax** **Syntax**
@ -404,7 +404,7 @@ Result:
## multiSearchFirstPositionUTF8 ## multiSearchFirstPositionUTF8
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings. Like [`multiSearchFirstPosition`](#multisearchfirstposition) but assumes `haystack` and `needle` to be UTF-8 strings.
**Syntax** **Syntax**
@ -440,7 +440,7 @@ Result:
## multiSearchFirstPositionCaseInsensitiveUTF8 ## multiSearchFirstPositionCaseInsensitiveUTF8
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings and ignores case. Like [`multiSearchFirstPosition`](#multisearchfirstposition) but assumes `haystack` and `needle` to be UTF-8 strings and ignores case.
**Syntax** **Syntax**
@ -478,7 +478,7 @@ Result:
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise. Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise.
Functions [`multiSearchFirstIndexCaseInsensitive`](#multiSearchFirstIndexCaseInsensitive), [`multiSearchFirstIndexUTF8`](#multiSearchFirstIndexUTF8) and [`multiSearchFirstIndexCaseInsensitiveUTF8`](#multiSearchFirstIndexCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function. Functions [`multiSearchFirstIndexCaseInsensitive`](#multisearchfirstindexcaseinsensitive), [`multiSearchFirstIndexUTF8`](#multisearchfirstindexutf8) and [`multiSearchFirstIndexCaseInsensitiveUTF8`](#multisearchfirstindexcaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
**Syntax** **Syntax**
@ -615,7 +615,7 @@ Result:
Returns 1, if at least one string needle<sub>i</sub> matches the string `haystack` and 0 otherwise. Returns 1, if at least one string needle<sub>i</sub> matches the string `haystack` and 0 otherwise.
Functions [`multiSearchAnyCaseInsensitive`](#multiSearchAnyCaseInsensitive), [`multiSearchAnyUTF8`](#multiSearchAnyUTF8) and []`multiSearchAnyCaseInsensitiveUTF8`](#multiSearchAnyCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function. Functions [`multiSearchAnyCaseInsensitive`](#multisearchanycaseinsensitive), [`multiSearchAnyUTF8`](#multisearchanyutf8) and [`multiSearchAnyCaseInsensitiveUTF8`](#multisearchanycaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
**Syntax** **Syntax**
@ -719,7 +719,7 @@ Result:
## multiSearchAnyCaseInsensitiveUTF8 ## multiSearchAnyCaseInsensitiveUTF8
Like [multiSearchAnyUTF8](#multiSearchAnyUTF8) but ignores case. Like [multiSearchAnyUTF8](#multisearchanyutf8) but ignores case.
*Syntax** *Syntax**
@ -880,7 +880,7 @@ extractAll(haystack, pattern)
Matches all groups of the `haystack` string using the `pattern` regular expression. Returns an array of arrays, where the first array includes all fragments matching the first group, the second array - matching the second group, etc. Matches all groups of the `haystack` string using the `pattern` regular expression. Returns an array of arrays, where the first array includes all fragments matching the first group, the second array - matching the second group, etc.
This function is slower than [extractAllGroupsVertical](#extractallgroups-vertical). This function is slower than [extractAllGroupsVertical](#extractallgroupsvertical).
**Syntax** **Syntax**
@ -952,7 +952,7 @@ Result:
└────────────────────────────────────────────────────────────────────────────────────────┘ └────────────────────────────────────────────────────────────────────────────────────────┘
``` ```
## like {#like} ## like
Returns whether string `haystack` matches the LIKE expression `pattern`. Returns whether string `haystack` matches the LIKE expression `pattern`.
@ -1215,7 +1215,7 @@ Result:
## ngramSearchCaseInsensitive ## ngramSearchCaseInsensitive
Provides a case-insensitive variant of [ngramSearch](#ngramSearch). Provides a case-insensitive variant of [ngramSearch](#ngramsearch).
**Syntax** **Syntax**
@ -1630,7 +1630,7 @@ Result:
## hasSubsequenceCaseInsensitive ## hasSubsequenceCaseInsensitive
Like [hasSubsequence](#hasSubsequence) but searches case-insensitively. Like [hasSubsequence](#hassubsequence) but searches case-insensitively.
**Syntax** **Syntax**
@ -1665,7 +1665,7 @@ Result:
## hasSubsequenceUTF8 ## hasSubsequenceUTF8
Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings. Like [hasSubsequence](#hassubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings.
**Syntax** **Syntax**
@ -1700,7 +1700,7 @@ Result:
## hasSubsequenceCaseInsensitiveUTF8 ## hasSubsequenceCaseInsensitiveUTF8
Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively. Like [hasSubsequenceUTF8](#hassubsequenceutf8) but searches case-insensitively.
**Syntax** **Syntax**

View File

@ -10,7 +10,7 @@ sidebar_label: Type Conversion
ClickHouse generally uses the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion). ClickHouse generally uses the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion).
`to<type>` functions and [cast](#castx-t) behave differently in some cases, for example in case of [LowCardinality](../data-types/lowcardinality.md): [cast](#castx-t) removes [LowCardinality](../data-types/lowcardinality.md) trait `to<type>` functions don't. The same with [Nullable](../data-types/nullable.md), this behaviour is not compatible with SQL standard, and it can be changed using [cast_keep_nullable](../../operations/settings/settings.md/#cast_keep_nullable) setting. `to<type>` functions and [cast](#cast) behave differently in some cases, for example in case of [LowCardinality](../data-types/lowcardinality.md): [cast](#cast) removes [LowCardinality](../data-types/lowcardinality.md) trait `to<type>` functions don't. The same with [Nullable](../data-types/nullable.md), this behaviour is not compatible with SQL standard, and it can be changed using [cast_keep_nullable](../../operations/settings/settings.md/#cast_keep_nullable) setting.
:::note :::note
Be aware of potential data loss if values of a datatype are converted to a smaller datatype (for example from `Int64` to `Int32`) or between Be aware of potential data loss if values of a datatype are converted to a smaller datatype (for example from `Int64` to `Int32`) or between
@ -70,7 +70,7 @@ Integer value in the `Int8`, `Int16`, `Int32`, `Int64`, `Int128` or `Int256` dat
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
The behavior of functions for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. The behavior of functions for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#common-issues-with-data-conversion), when using the functions.
**Example** **Example**
@ -169,7 +169,7 @@ Converts an input value to the [UInt](../data-types/int-uint.md) data type. This
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
The behavior of functions for negative arguments and for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions. The behavior of functions for negative arguments and for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#common-issues-with-data-conversion), when using the functions.
**Example** **Example**
@ -996,7 +996,7 @@ Result:
## reinterpretAsUInt8 ## reinterpretAsUInt8
Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax** **Syntax**
@ -1034,7 +1034,7 @@ Result:
## reinterpretAsUInt16 ## reinterpretAsUInt16
Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax** **Syntax**
@ -1072,7 +1072,7 @@ Result:
## reinterpretAsUInt32 ## reinterpretAsUInt32
Performs byte reinterpretation by treating the input value as a value of type UInt32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. Performs byte reinterpretation by treating the input value as a value of type UInt32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax** **Syntax**
@ -1110,7 +1110,7 @@ Result:
## reinterpretAsUInt64 ## reinterpretAsUInt64
Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax** **Syntax**
@ -1148,7 +1148,7 @@ Result:
## reinterpretAsUInt128 ## reinterpretAsUInt128
Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax** **Syntax**
@ -1186,7 +1186,7 @@ Result:
## reinterpretAsUInt256 ## reinterpretAsUInt256
Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax** **Syntax**
@ -1224,7 +1224,7 @@ Result:
## reinterpretAsInt8 ## reinterpretAsInt8
Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax** **Syntax**
@ -1262,7 +1262,7 @@ Result:
## reinterpretAsInt16 ## reinterpretAsInt16
Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax** **Syntax**
@ -1300,7 +1300,7 @@ Result:
## reinterpretAsInt32 ## reinterpretAsInt32
Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax** **Syntax**
@ -1338,7 +1338,7 @@ Result:
## reinterpretAsInt64 ## reinterpretAsInt64
Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax** **Syntax**
@ -1376,7 +1376,7 @@ Result:
## reinterpretAsInt128 ## reinterpretAsInt128
Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax** **Syntax**
@ -1414,7 +1414,7 @@ Result:
## reinterpretAsInt256 ## reinterpretAsInt256
Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax** **Syntax**
@ -1452,7 +1452,7 @@ Result:
## reinterpretAsFloat32 ## reinterpretAsFloat32
Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax** **Syntax**
@ -1486,7 +1486,7 @@ Result:
## reinterpretAsFloat64 ## reinterpretAsFloat64
Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless. Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
**Syntax** **Syntax**
@ -1730,7 +1730,7 @@ Result:
└─────────────────────┘ └─────────────────────┘
``` ```
## reinterpret(x, T) ## reinterpret
Uses the same source in-memory bytes sequence for `x` value and reinterprets it to destination type. Uses the same source in-memory bytes sequence for `x` value and reinterprets it to destination type.
@ -1766,9 +1766,9 @@ Result:
└─────────────┴──────────────┴───────────────┘ └─────────────┴──────────────┴───────────────┘
``` ```
## CAST(x, T) ## CAST
Converts an input value to the specified data type. Unlike the [reinterpret](#type_conversion_function-reinterpret) function, `CAST` tries to present the same value using the new data type. If the conversion can not be done then an exception is raised. Converts an input value to the specified data type. Unlike the [reinterpret](#reinterpret) function, `CAST` tries to present the same value using the new data type. If the conversion can not be done then an exception is raised.
Several syntax variants are supported. Several syntax variants are supported.
**Syntax** **Syntax**
@ -1875,7 +1875,7 @@ Result:
Converts `x` to the `T` data type. Converts `x` to the `T` data type.
The difference from [cast(x, T)](#type_conversion_function-cast) is that `accurateCast` does not allow overflow of numeric types during cast if type value `x` does not fit the bounds of type `T`. For example, `accurateCast(-1, 'UInt8')` throws an exception. The difference from [cast](#cast) is that `accurateCast` does not allow overflow of numeric types during cast if type value `x` does not fit the bounds of type `T`. For example, `accurateCast(-1, 'UInt8')` throws an exception.
**Example** **Example**
@ -2061,7 +2061,7 @@ Result:
└───────────────────────────┴──────────────────────────────┘ └───────────────────────────┴──────────────────────────────┘
``` ```
## parseDateTime {#type_conversion_functions-parseDateTime} ## parseDateTime
Converts a [String](../data-types/string.md) to [DateTime](../data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format). Converts a [String](../data-types/string.md) to [DateTime](../data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format).
@ -2102,15 +2102,15 @@ Alias: `TO_TIMESTAMP`.
## parseDateTimeOrZero ## parseDateTimeOrZero
Same as for [parseDateTime](#type_conversion_functions-parseDateTime) except that it returns zero date when it encounters a date format that cannot be processed. Same as for [parseDateTime](#parsedatetime) except that it returns zero date when it encounters a date format that cannot be processed.
## parseDateTimeOrNull ## parseDateTimeOrNull
Same as for [parseDateTime](#type_conversion_functions-parseDateTime) except that it returns `NULL` when it encounters a date format that cannot be processed. Same as for [parseDateTime](#parsedatetime) except that it returns `NULL` when it encounters a date format that cannot be processed.
Alias: `str_to_date`. Alias: `str_to_date`.
## parseDateTimeInJodaSyntax {#type_conversion_functions-parseDateTimeInJodaSyntax} ## parseDateTimeInJodaSyntax
Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax. Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax.
@ -2151,11 +2151,11 @@ SELECT parseDateTimeInJodaSyntax('2023-02-24 14:53:31', 'yyyy-MM-dd HH:mm:ss', '
## parseDateTimeInJodaSyntaxOrZero ## parseDateTimeInJodaSyntaxOrZero
Same as for [parseDateTimeInJodaSyntax](#type_conversion_functions-parseDateTimeInJodaSyntax) except that it returns zero date when it encounters a date format that cannot be processed. Same as for [parseDateTimeInJodaSyntax](#parsedatetimeinjodasyntax) except that it returns zero date when it encounters a date format that cannot be processed.
## parseDateTimeInJodaSyntaxOrNull ## parseDateTimeInJodaSyntaxOrNull
Same as for [parseDateTimeInJodaSyntax](#type_conversion_functions-parseDateTimeInJodaSyntax) except that it returns `NULL` when it encounters a date format that cannot be processed. Same as for [parseDateTimeInJodaSyntax](#parsedatetimeinjodasyntax) except that it returns `NULL` when it encounters a date format that cannot be processed.
## parseDateTimeBestEffort ## parseDateTimeBestEffort
## parseDateTime32BestEffort ## parseDateTime32BestEffort
@ -2313,11 +2313,11 @@ Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it r
## parseDateTimeBestEffortUSOrNull ## parseDateTimeBestEffortUSOrNull
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns `NULL` when it encounters a date format that cannot be processed. Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortus) function except that it returns `NULL` when it encounters a date format that cannot be processed.
## parseDateTimeBestEffortUSOrZero ## parseDateTimeBestEffortUSOrZero
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns zero date (`1970-01-01`) or zero date with time (`1970-01-01 00:00:00`) when it encounters a date format that cannot be processed. Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortus) function except that it returns zero date (`1970-01-01`) or zero date with time (`1970-01-01 00:00:00`) when it encounters a date format that cannot be processed.
## parseDateTime64BestEffort ## parseDateTime64BestEffort
@ -2389,7 +2389,7 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that
Converts input parameter to the [LowCardinality](../data-types/lowcardinality.md) version of same data type. Converts input parameter to the [LowCardinality](../data-types/lowcardinality.md) version of same data type.
To convert data from the `LowCardinality` data type use the [CAST](#type_conversion_function-cast) function. For example, `CAST(x as String)`. To convert data from the `LowCardinality` data type use the [CAST](#cast) function. For example, `CAST(x as String)`.
**Syntax** **Syntax**

View File

@ -150,7 +150,7 @@ The function also works for [Arrays](array-functions.md#function-empty) and [Str
**Example** **Example**
To generate the UUID value, ClickHouse provides the [generateUUIDv4](#uuid-function-generate) function. To generate the UUID value, ClickHouse provides the [generateUUIDv4](#generateuuidv4) function.
Query: Query:
@ -190,7 +190,7 @@ The function also works for [Arrays](array-functions.md#function-notempty) or [S
**Example** **Example**
To generate the UUID value, ClickHouse provides the [generateUUIDv4](#uuid-function-generate) function. To generate the UUID value, ClickHouse provides the [generateUUIDv4](#generateuuidv4) function.
Query: Query:

View File

@ -235,7 +235,7 @@ If `some_predicate` is not selective enough, it will return a large amount of da
### Distributed Subqueries and max_parallel_replicas ### Distributed Subqueries and max_parallel_replicas
When [max_parallel_replicas](#settings-max_parallel_replicas) is greater than 1, distributed queries are further transformed. When [max_parallel_replicas](#distributed-subqueries-and-max_parallel_replicas) is greater than 1, distributed queries are further transformed.
For example, the following: For example, the following:
@ -255,7 +255,7 @@ where `M` is between `1` and `3` depending on which replica the local query is e
These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table. These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if `local_table_2` does not have a sampling key, incorrect results will be produced. The same rule applies to `JOIN`. Therefore adding the [max_parallel_replicas](#distributed-subqueries-and-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if `local_table_2` does not have a sampling key, incorrect results will be produced. The same rule applies to `JOIN`.
One workaround if `local_table_2` does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`. One workaround if `local_table_2` does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`.

View File

@ -108,7 +108,7 @@ ALTER TABLE visits RENAME COLUMN webBrowser TO browser
CLEAR COLUMN [IF EXISTS] name IN PARTITION partition_name CLEAR COLUMN [IF EXISTS] name IN PARTITION partition_name
``` ```
Resets all data in a column for a specified partition. Read more about setting the partition name in the section [How to set the partition expression](partition.md/#how-to-set-partition-expression). Resets all data in a column for a specified partition. Read more about setting the partition name in the section [How to set the partition expression](../alter/partition.md/#how-to-set-partition-expression).
If the `IF EXISTS` clause is specified, the query wont return an error if the column does not exist. If the `IF EXISTS` clause is specified, the query wont return an error if the column does not exist.
@ -173,7 +173,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
Changing the column type is the only complex action it changes the contents of files with data. For large tables, this may take a long time. Changing the column type is the only complex action it changes the contents of files with data. For large tables, this may take a long time.
The query also can change the order of the columns using `FIRST | AFTER` clause, see [ADD COLUMN](#alter_add-column) description, but column type is mandatory in this case. The query also can change the order of the columns using `FIRST | AFTER` clause, see [ADD COLUMN](#add-column) description, but column type is mandatory in this case.
Example: Example:

View File

@ -31,7 +31,7 @@ The following operations with [partitions](/docs/en/engines/table-engines/merget
ALTER TABLE table_name [ON CLUSTER cluster] DETACH PARTITION|PART partition_expr ALTER TABLE table_name [ON CLUSTER cluster] DETACH PARTITION|PART partition_expr
``` ```
Moves all data for the specified partition to the `detached` directory. The server forgets about the detached data partition as if it does not exist. The server will not know about this data until you make the [ATTACH](#alter_attach-partition) query. Moves all data for the specified partition to the `detached` directory. The server forgets about the detached data partition as if it does not exist. The server will not know about this data until you make the [ATTACH](#attach-partitionpart) query.
Example: Example:
@ -252,7 +252,7 @@ Downloads a partition from another server. This query only works for the replica
The query does the following: The query does the following:
1. Downloads the partition|part from the specified shard. In path-in-zookeeper you must specify a path to the shard in ZooKeeper. 1. Downloads the partition|part from the specified shard. In path-in-zookeeper you must specify a path to the shard in ZooKeeper.
2. Then the query puts the downloaded data to the `detached` directory of the `table_name` table. Use the [ATTACH PARTITION\|PART](#alter_attach-partition) query to add the data to the table. 2. Then the query puts the downloaded data to the `detached` directory of the `table_name` table. Use the [ATTACH PARTITION\|PART](#attach-partitionpart) query to add the data to the table.
For example: For example:
@ -353,7 +353,7 @@ You can specify the partition expression in `ALTER ... PARTITION` queries in dif
- Using the keyword `ALL`. It can be used only with DROP/DETACH/ATTACH. For example, `ALTER TABLE visits ATTACH PARTITION ALL`. - Using the keyword `ALL`. It can be used only with DROP/DETACH/ATTACH. For example, `ALTER TABLE visits ATTACH PARTITION ALL`.
- As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`. - As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
- Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`. - Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. - In the [ALTER ATTACH PART](#attach-partitionpart) and [DROP DETACHED PART](#drop-detached-partitionpart) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.
Usage of quotes when specifying the partition depends on the type of partition expression. For example, for the `String` type, you have to specify its name in quotes (`'`). For the `Date` and `Int*` types no quotes are needed. Usage of quotes when specifying the partition depends on the type of partition expression. For example, for the `String` type, you have to specify its name in quotes (`'`). For the `Date` and `Int*` types no quotes are needed.

View File

@ -17,8 +17,8 @@ By default, tables are created only on the current server. Distributed DDL queri
``` sql ``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
( (
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1] [COMMENT 'comment for column'], name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [COMMENT 'comment for column'] [compression_codec] [TTL expr1],
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2] [COMMENT 'comment for column'], name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [COMMENT 'comment for column'] [compression_codec] [TTL expr2],
... ...
) ENGINE = engine ) ENGINE = engine
COMMENT 'comment for table' COMMENT 'comment for table'

View File

@ -6,7 +6,7 @@ sidebar_label: VIEW
# CREATE VIEW # CREATE VIEW
Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-experimental), and [window](#window-view-experimental) (live view and window view are experimental features). Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-deprecated), and [window](#window-view-experimental) (live view and window view are experimental features).
## Normal View ## Normal View

View File

@ -33,7 +33,7 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US
- `role` — ClickHouse user role. - `role` — ClickHouse user role.
- `user` — ClickHouse user account. - `user` — ClickHouse user account.
The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`. The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option) privilege to `user` or `role`.
The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if is not specified it appends roles. The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if is not specified it appends roles.
## Grant Current Grants Syntax ## Grant Current Grants Syntax
@ -201,7 +201,7 @@ Hierarchy of privileges:
- `HDFS` - `HDFS`
- `S3` - `S3`
- [dictGet](#dictget) - [dictGet](#dictget)
- [displaySecretsInShowAndSelect](#display-secrets) - [displaySecretsInShowAndSelect](#displaysecretsinshowandselect)
- [NAMED COLLECTION ADMIN](#named-collection-admin) - [NAMED COLLECTION ADMIN](#named-collection-admin)
- `CREATE NAMED COLLECTION` - `CREATE NAMED COLLECTION`
- `DROP NAMED COLLECTION` - `DROP NAMED COLLECTION`
@ -498,7 +498,7 @@ Privilege level: `DICTIONARY`.
- `GRANT dictGet ON mydictionary TO john` - `GRANT dictGet ON mydictionary TO john`
### displaySecretsInShowAndSelect {#display-secrets} ### displaySecretsInShowAndSelect
Allows a user to view secrets in `SHOW` and `SELECT` queries if both Allows a user to view secrets in `SHOW` and `SELECT` queries if both
[`display_secrets_in_show_and_select` server setting](../../operations/server-configuration-parameters/settings#display_secrets_in_show_and_select) [`display_secrets_in_show_and_select` server setting](../../operations/server-configuration-parameters/settings#display_secrets_in_show_and_select)

View File

@ -27,14 +27,14 @@ The features of data sampling are listed below:
For the `SAMPLE` clause the following syntax is supported: For the `SAMPLE` clause the following syntax is supported:
| SAMPLE Clause Syntax | Description | | SAMPLE Clause Syntax | Description |
|----------------------|------------------------------| |----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `SAMPLE k` | Here `k` is the number from 0 to 1. The query is executed on `k` fraction of data. For example, `SAMPLE 0.1` runs the query on 10% of data. [Read more](#select-sample-k) | | `SAMPLE k` | Here `k` is the number from 0 to 1. The query is executed on `k` fraction of data. For example, `SAMPLE 0.1` runs the query on 10% of data. [Read more](#sample-k) |
| `SAMPLE n` | Here `n` is a sufficiently large integer. The query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. [Read more](#select-sample-n) | | `SAMPLE n` | Here `n` is a sufficiently large integer. The query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. [Read more](#sample-n) |
| `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1. The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#select-sample-offset) | | `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1. The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#sample-k-offset-m) |
## SAMPLE K {#select-sample-k} ## SAMPLE K
Here `k` is the number from 0 to 1 (both fractional and decimal notations are supported). For example, `SAMPLE 1/2` or `SAMPLE 0.5`. Here `k` is the number from 0 to 1 (both fractional and decimal notations are supported). For example, `SAMPLE 1/2` or `SAMPLE 0.5`.
@ -54,7 +54,7 @@ ORDER BY PageViews DESC LIMIT 1000
In this example, the query is executed on a sample from 0.1 (10%) of data. Values of aggregate functions are not corrected automatically, so to get an approximate result, the value `count()` is manually multiplied by 10. In this example, the query is executed on a sample from 0.1 (10%) of data. Values of aggregate functions are not corrected automatically, so to get an approximate result, the value `count()` is manually multiplied by 10.
## SAMPLE N {#select-sample-n} ## SAMPLE N
Here `n` is a sufficiently large integer. For example, `SAMPLE 10000000`. Here `n` is a sufficiently large integer. For example, `SAMPLE 10000000`.
@ -90,7 +90,7 @@ FROM visits
SAMPLE 10000000 SAMPLE 10000000
``` ```
## SAMPLE K OFFSET M {#select-sample-offset} ## SAMPLE K OFFSET M
Here `k` and `m` are numbers from 0 to 1. Examples are shown below. Here `k` and `m` are numbers from 0 to 1. Examples are shown below.

View File

@ -174,7 +174,7 @@ Aborts ClickHouse process (like `kill -9 {$ pid_clickhouse-server}`)
## Managing Distributed Tables ## Managing Distributed Tables
ClickHouse can manage [distributed](../../engines/table-engines/special/distributed.md) tables. When a user inserts data into these tables, ClickHouse first creates a queue of the data that should be sent to cluster nodes, then asynchronously sends it. You can manage queue processing with the [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [FLUSH DISTRIBUTED](#query_language-system-flush-distributed), and [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) queries. You can also synchronously insert distributed data with the [distributed_foreground_insert](../../operations/settings/settings.md#distributed_foreground_insert) setting. ClickHouse can manage [distributed](../../engines/table-engines/special/distributed.md) tables. When a user inserts data into these tables, ClickHouse first creates a queue of the data that should be sent to cluster nodes, then asynchronously sends it. You can manage queue processing with the [STOP DISTRIBUTED SENDS](#stop-distributed-sends), [FLUSH DISTRIBUTED](#flush-distributed), and [START DISTRIBUTED SENDS](#start-distributed-sends) queries. You can also synchronously insert distributed data with the [distributed_foreground_insert](../../operations/settings/settings.md#distributed_foreground_insert) setting.
### STOP DISTRIBUTED SENDS ### STOP DISTRIBUTED SENDS

View File

@ -54,11 +54,11 @@ Identifiers are:
- Cluster, database, table, partition, and column names. - Cluster, database, table, partition, and column names.
- Functions. - Functions.
- Data types. - Data types.
- [Expression aliases](#expression_aliases). - [Expression aliases](#expression-aliases).
Identifiers can be quoted or non-quoted. The latter is preferred. Identifiers can be quoted or non-quoted. The latter is preferred.
Non-quoted identifiers must match the regex `^[a-zA-Z_][0-9a-zA-Z_]*$` and can not be equal to [keywords](#syntax-keywords). Examples: `x`, `_1`, `X_y__Z123_`. Non-quoted identifiers must match the regex `^[a-zA-Z_][0-9a-zA-Z_]*$` and can not be equal to [keywords](#keywords). Examples: `x`, `_1`, `X_y__Z123_`.
If you want to use identifiers the same as keywords or you want to use other symbols in identifiers, quote it using double quotes or backticks, for example, `"id"`, `` `id` ``. If you want to use identifiers the same as keywords or you want to use other symbols in identifiers, quote it using double quotes or backticks, for example, `"id"`, `` `id` ``.

View File

@ -18,7 +18,7 @@ file([path_to_archive ::] path [,format] [,structure] [,compression])
**Parameters** **Parameters**
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports in read-only mode the following [globs](#globs_in_path): `*`, `?`, `{abc,def}` (with `'abc'` and `'def'` being strings) and `{N..M}` (with `N` and `M` being numbers). - `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports in read-only mode the following [globs](#globs-in-path): `*`, `?`, `{abc,def}` (with `'abc'` and `'def'` being strings) and `{N..M}` (with `N` and `M` being numbers).
- `path_to_archive` - The relative path to a zip/tar/7z archive. Supports the same globs as `path`. - `path_to_archive` - The relative path to a zip/tar/7z archive. Supports the same globs as `path`.
- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file. - `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`. - `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
@ -128,7 +128,7 @@ Reading data from `table.csv`, located in `archive1.zip` or/and `archive2.zip`:
SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv'); SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv');
``` ```
## Globs in path {#globs_in_path} ## Globs in path
Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix. Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix.

View File

@ -22,7 +22,7 @@ fileCluster(cluster_name, path[, format, structure, compression_method])
**Arguments** **Arguments**
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file also supports [globs](#globs_in_path). - `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file also supports [globs](#globs-in-path).
- `format` — [Format](../../interfaces/formats.md#formats) of the files. Type: [String](../../sql-reference/data-types/string.md). - `format` — [Format](../../interfaces/formats.md#formats) of the files. Type: [String](../../sql-reference/data-types/string.md).
- `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md). - `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md).
- `compression_method` — Compression method. Supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`. - `compression_method` — Compression method. Supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
@ -74,7 +74,7 @@ SELECT * FROM fileCluster('my_cluster', 'file{1,2}.csv', 'CSV', 'i UInt32, s Str
``` ```
## Globs in Path {#globs_in_path} ## Globs in Path
All patterns supported by [File](../../sql-reference/table-functions/file.md#globs-in-path) table function are supported by FileCluster. All patterns supported by [File](../../sql-reference/table-functions/file.md#globs-in-path) table function are supported by FileCluster.

View File

@ -48,6 +48,7 @@
#include <Common/FailPoint.h> #include <Common/FailPoint.h>
#include <Common/CPUID.h> #include <Common/CPUID.h>
#include <Common/HTTPConnectionPool.h> #include <Common/HTTPConnectionPool.h>
#include <Common/NamedCollections/NamedCollectionsFactory.h>
#include <Server/waitServersToFinish.h> #include <Server/waitServersToFinish.h>
#include <Interpreters/Cache/FileCacheFactory.h> #include <Interpreters/Cache/FileCacheFactory.h>
#include <Core/ServerUUID.h> #include <Core/ServerUUID.h>
@ -70,7 +71,6 @@
#include <Storages/System/attachInformationSchemaTables.h> #include <Storages/System/attachInformationSchemaTables.h>
#include <Storages/Cache/ExternalDataSourceCache.h> #include <Storages/Cache/ExternalDataSourceCache.h>
#include <Storages/Cache/registerRemoteFileMetadatas.h> #include <Storages/Cache/registerRemoteFileMetadatas.h>
#include <Common/NamedCollections/NamedCollectionUtils.h>
#include <AggregateFunctions/registerAggregateFunctions.h> #include <AggregateFunctions/registerAggregateFunctions.h>
#include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h> #include <Functions/UserDefined/IUserDefinedSQLObjectsStorage.h>
#include <Functions/registerFunctions.h> #include <Functions/registerFunctions.h>
@ -1378,7 +1378,7 @@ try
CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements); CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
#endif #endif
NamedCollectionUtils::loadIfNot(); NamedCollectionFactory::instance().loadIfNot();
/// Initialize main config reloader. /// Initialize main config reloader.
std::string include_from_path = config().getString("include_from", "/etc/metrika.xml"); std::string include_from_path = config().getString("include_from", "/etc/metrika.xml");
@ -1647,7 +1647,7 @@ try
#if USE_SSL #if USE_SSL
CertificateReloader::instance().tryLoad(*config); CertificateReloader::instance().tryLoad(*config);
#endif #endif
NamedCollectionUtils::reloadFromConfig(*config); NamedCollectionFactory::instance().reloadFromConfig(*config);
FileCacheFactory::instance().updateSettingsFromConfig(*config); FileCacheFactory::instance().updateSettingsFromConfig(*config);

View File

@ -1,5 +1,7 @@
#include <Analyzer/FunctionNode.h> #include <Analyzer/FunctionNode.h>
#include <Columns/ColumnConst.h>
#include <Common/SipHash.h> #include <Common/SipHash.h>
#include <Common/FieldVisitorToString.h> #include <Common/FieldVisitorToString.h>
@ -58,12 +60,20 @@ ColumnsWithTypeAndName FunctionNode::getArgumentColumns() const
ColumnWithTypeAndName argument_column; ColumnWithTypeAndName argument_column;
auto * constant = argument->as<ConstantNode>();
if (isNameOfInFunction(function_name) && i == 1) if (isNameOfInFunction(function_name) && i == 1)
{
argument_column.type = std::make_shared<DataTypeSet>(); argument_column.type = std::make_shared<DataTypeSet>();
if (constant)
{
/// Created but not filled for the analysis during function resolution.
FutureSetPtr empty_set;
argument_column.column = ColumnConst::create(ColumnSet::create(1, empty_set), 1);
}
}
else else
argument_column.type = argument->getResultType(); argument_column.type = argument->getResultType();
auto * constant = argument->as<ConstantNode>();
if (constant && !isNotCreatable(argument_column.type)) if (constant && !isNotCreatable(argument_column.type))
argument_column.column = argument_column.type->createColumnConst(1, constant->getValue()); argument_column.column = argument_column.type->createColumnConst(1, constant->getValue());

View File

@ -10,9 +10,12 @@
namespace DB namespace DB
{ {
InterpolateNode::InterpolateNode(QueryTreeNodePtr expression_, QueryTreeNodePtr interpolate_expression_) InterpolateNode::InterpolateNode(std::shared_ptr<IdentifierNode> expression_, QueryTreeNodePtr interpolate_expression_)
: IQueryTreeNode(children_size) : IQueryTreeNode(children_size)
{ {
if (expression_)
expression_name = expression_->getIdentifier().getFullName();
children[expression_child_index] = std::move(expression_); children[expression_child_index] = std::move(expression_);
children[interpolate_expression_child_index] = std::move(interpolate_expression_); children[interpolate_expression_child_index] = std::move(interpolate_expression_);
} }
@ -41,13 +44,23 @@ void InterpolateNode::updateTreeHashImpl(HashState &, CompareOptions) const
QueryTreeNodePtr InterpolateNode::cloneImpl() const QueryTreeNodePtr InterpolateNode::cloneImpl() const
{ {
return std::make_shared<InterpolateNode>(nullptr /*expression*/, nullptr /*interpolate_expression*/); auto cloned = std::make_shared<InterpolateNode>(nullptr /*expression*/, nullptr /*interpolate_expression*/);
cloned->expression_name = expression_name;
return cloned;
} }
ASTPtr InterpolateNode::toASTImpl(const ConvertToASTOptions & options) const ASTPtr InterpolateNode::toASTImpl(const ConvertToASTOptions & options) const
{ {
auto result = std::make_shared<ASTInterpolateElement>(); auto result = std::make_shared<ASTInterpolateElement>();
result->column = getExpression()->toAST(options)->getColumnName();
/// Interpolate parser supports only identifier node.
/// In case of alias, identifier is replaced to expression, which can't be parsed.
/// In this case, keep original alias name.
if (const auto * identifier = getExpression()->as<IdentifierNode>())
result->column = identifier->toAST(options)->getColumnName();
else
result->column = expression_name;
result->children.push_back(getInterpolateExpression()->toAST(options)); result->children.push_back(getInterpolateExpression()->toAST(options));
result->expr = result->children.back(); result->expr = result->children.back();

View File

@ -1,6 +1,6 @@
#pragma once #pragma once
#include <Analyzer/IQueryTreeNode.h> #include <Analyzer/IdentifierNode.h>
#include <Analyzer/ListNode.h> #include <Analyzer/ListNode.h>
namespace DB namespace DB
@ -19,7 +19,7 @@ class InterpolateNode final : public IQueryTreeNode
{ {
public: public:
/// Initialize interpolate node with expression and interpolate expression /// Initialize interpolate node with expression and interpolate expression
explicit InterpolateNode(QueryTreeNodePtr expression_, QueryTreeNodePtr interpolate_expression_); explicit InterpolateNode(std::shared_ptr<IdentifierNode> expression_, QueryTreeNodePtr interpolate_expression_);
/// Get expression to interpolate /// Get expression to interpolate
const QueryTreeNodePtr & getExpression() const const QueryTreeNodePtr & getExpression() const
@ -61,6 +61,9 @@ protected:
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override; ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
/// Initial name from column identifier.
std::string expression_name;
private: private:
static constexpr size_t expression_child_index = 0; static constexpr size_t expression_child_index = 0;
static constexpr size_t interpolate_expression_child_index = 1; static constexpr size_t interpolate_expression_child_index = 1;

View File

@ -51,7 +51,7 @@ public:
using Base = InDepthQueryTreeVisitorWithContext<AggregateFunctionsArithmericOperationsVisitor>; using Base = InDepthQueryTreeVisitorWithContext<AggregateFunctionsArithmericOperationsVisitor>;
using Base::Base; using Base::Base;
void leaveImpl(QueryTreeNodePtr & node) void enterImpl(QueryTreeNodePtr & node)
{ {
if (!getSettings().optimize_arithmetic_operations_in_aggregate_functions) if (!getSettings().optimize_arithmetic_operations_in_aggregate_functions)
return; return;

View File

@ -551,14 +551,25 @@ private:
in_function->getArguments().getNodes() = std::move(in_arguments); in_function->getArguments().getNodes() = std::move(in_arguments);
in_function->resolveAsFunction(in_function_resolver); in_function->resolveAsFunction(in_function_resolver);
DataTypePtr result_type = in_function->getResultType();
const auto * type_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(result_type.get());
if (type_low_cardinality)
result_type = type_low_cardinality->getDictionaryType();
/** For `k :: UInt8`, expression `k = 1 OR k = NULL` with result type Nullable(UInt8) /** For `k :: UInt8`, expression `k = 1 OR k = NULL` with result type Nullable(UInt8)
* is replaced with `k IN (1, NULL)` with result type UInt8. * is replaced with `k IN (1, NULL)` with result type UInt8.
* Convert it back to Nullable(UInt8). * Convert it back to Nullable(UInt8).
* And for `k :: LowCardinality(UInt8)`, the transformation of `k IN (1, NULL)` results in type LowCardinality(UInt8).
* Convert it to LowCardinality(Nullable(UInt8)).
*/ */
if (is_any_nullable && !in_function->getResultType()->isNullable()) if (is_any_nullable && !result_type->isNullable())
{ {
auto nullable_result_type = std::make_shared<DataTypeNullable>(in_function->getResultType()); DataTypePtr new_result_type = std::make_shared<DataTypeNullable>(result_type);
auto in_function_nullable = createCastFunction(std::move(in_function), std::move(nullable_result_type), getContext()); if (type_low_cardinality)
{
new_result_type = std::make_shared<DataTypeLowCardinality>(new_result_type);
}
auto in_function_nullable = createCastFunction(std::move(in_function), std::move(new_result_type), getContext());
or_operands.push_back(std::move(in_function_nullable)); or_operands.push_back(std::move(in_function_nullable));
} }
else else

View File

@ -75,7 +75,12 @@ struct ScopeAliases
if (jt == transitive_aliases.end()) if (jt == transitive_aliases.end())
return {}; return {};
key = &(getKey(jt->second, find_option)); const auto & new_key = getKey(jt->second, find_option);
/// Ignore potential cyclic aliases.
if (new_key == *key)
return {};
key = &new_key;
it = alias_map.find(*key); it = alias_map.find(*key);
} }

View File

@ -45,14 +45,17 @@ struct ProtocolServerMetrics
}; };
/** Periodically (by default, each second) /** Periodically (by default, each second)
* calculates and updates some metrics, * calculates and updates some metrics,
* that are not updated automatically (so, need to be asynchronously calculated). * that are not updated automatically (so, need to be asynchronously calculated).
* *
* This includes both ClickHouse-related metrics (like memory usage of ClickHouse process) * This includes both general process metrics (like memory usage)
* and common OS-related metrics (like total memory usage on the server). * and common OS-related metrics (like total memory usage on the server).
* *
* All the values are either gauge type (like the total number of tables, the current memory usage). * All the values are either gauge type (like the total number of tables, the current memory usage).
* Or delta-counters representing some accumulation during the interval of time. * Or delta-counters representing some accumulation during the interval of time.
*
* Server and Keeper specific metrics are contained inside
* ServerAsynchronousMetrics and KeeperAsynchronousMetrics respectively.
*/ */
class AsynchronousMetrics class AsynchronousMetrics
{ {

View File

@ -1,484 +0,0 @@
#include <Common/NamedCollections/NamedCollectionUtils.h>
#include <Common/escapeForFileName.h>
#include <Common/FieldVisitorToString.h>
#include <Common/logger_useful.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <Parsers/formatAST.h>
#include <Parsers/ASTCreateNamedCollectionQuery.h>
#include <Parsers/ASTAlterNamedCollectionQuery.h>
#include <Parsers/ASTDropNamedCollectionQuery.h>
#include <Parsers/ASTSetQuery.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <Interpreters/Context.h>
#include <Common/NamedCollections/NamedCollections.h>
#include <Common/NamedCollections/NamedCollectionConfiguration.h>
#include <Common/NamedCollections/NamedCollectionsFactory.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int NAMED_COLLECTION_ALREADY_EXISTS;
extern const int NAMED_COLLECTION_DOESNT_EXIST;
extern const int BAD_ARGUMENTS;
}
namespace NamedCollectionUtils
{
static std::atomic<bool> is_loaded_from_config = false;
static std::atomic<bool> is_loaded_from_sql = false;
class LoadFromConfig
{
private:
const Poco::Util::AbstractConfiguration & config;
public:
explicit LoadFromConfig(const Poco::Util::AbstractConfiguration & config_)
: config(config_) {}
std::vector<std::string> listCollections() const
{
Poco::Util::AbstractConfiguration::Keys collections_names;
config.keys(NAMED_COLLECTIONS_CONFIG_PREFIX, collections_names);
return collections_names;
}
NamedCollectionsMap getAll() const
{
NamedCollectionsMap result;
for (const auto & collection_name : listCollections())
{
if (result.contains(collection_name))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"Found duplicate named collection `{}`",
collection_name);
}
result.emplace(collection_name, get(collection_name));
}
return result;
}
MutableNamedCollectionPtr get(const std::string & collection_name) const
{
const auto collection_prefix = getCollectionPrefix(collection_name);
std::queue<std::string> enumerate_input;
std::set<std::string, std::less<>> enumerate_result;
enumerate_input.push(collection_prefix);
NamedCollectionConfiguration::listKeys(config, std::move(enumerate_input), enumerate_result, -1);
/// Collection does not have any keys.
/// (`enumerate_result` == <collection_path>).
const bool collection_is_empty = enumerate_result.size() == 1
&& *enumerate_result.begin() == collection_prefix;
std::set<std::string, std::less<>> keys;
if (!collection_is_empty)
{
/// Skip collection prefix and add +1 to avoid '.' in the beginning.
for (const auto & path : enumerate_result)
keys.emplace(path.substr(collection_prefix.size() + 1));
}
return NamedCollection::create(
config, collection_name, collection_prefix, keys, SourceId::CONFIG, /* is_mutable */false);
}
private:
static constexpr auto NAMED_COLLECTIONS_CONFIG_PREFIX = "named_collections";
static std::string getCollectionPrefix(const std::string & collection_name)
{
return fmt::format("{}.{}", NAMED_COLLECTIONS_CONFIG_PREFIX, collection_name);
}
};
class LoadFromSQL : private WithContext
{
private:
const std::string metadata_path;
public:
explicit LoadFromSQL(ContextPtr context_)
: WithContext(context_)
, metadata_path(fs::weakly_canonical(context_->getPath()) / NAMED_COLLECTIONS_METADATA_DIRECTORY)
{
if (fs::exists(metadata_path))
cleanup();
}
std::vector<std::string> listCollections() const
{
if (!fs::exists(metadata_path))
return {};
std::vector<std::string> collection_names;
fs::directory_iterator it{metadata_path};
for (; it != fs::directory_iterator{}; ++it)
{
const auto & current_path = it->path();
if (current_path.extension() == ".sql")
{
collection_names.push_back(it->path().stem());
}
else
{
LOG_WARNING(
getLogger("NamedCollectionsLoadFromSQL"),
"Unexpected file {} in named collections directory",
current_path.filename().string());
}
}
return collection_names;
}
NamedCollectionsMap getAll() const
{
NamedCollectionsMap result;
for (const auto & collection_name : listCollections())
{
if (result.contains(collection_name))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"Found duplicate named collection `{}`",
collection_name);
}
result.emplace(collection_name, get(collection_name));
}
return result;
}
MutableNamedCollectionPtr get(const std::string & collection_name) const
{
const auto query = readCreateQueryFromMetadata(
getMetadataPath(collection_name),
getContext()->getSettingsRef());
return createNamedCollectionFromAST(query);
}
MutableNamedCollectionPtr create(const ASTCreateNamedCollectionQuery & query)
{
writeCreateQueryToMetadata(
query,
getMetadataPath(query.collection_name),
getContext()->getSettingsRef());
return createNamedCollectionFromAST(query);
}
void update(const ASTAlterNamedCollectionQuery & query)
{
const auto path = getMetadataPath(query.collection_name);
auto create_query = readCreateQueryFromMetadata(path, getContext()->getSettings());
std::unordered_map<std::string, Field> result_changes_map;
for (const auto & [name, value] : query.changes)
{
auto [it, inserted] = result_changes_map.emplace(name, value);
if (!inserted)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Value with key `{}` is used twice in the SET query (collection name: {})",
name, query.collection_name);
}
}
for (const auto & [name, value] : create_query.changes)
result_changes_map.emplace(name, value);
std::unordered_map<std::string, bool> result_overridability_map;
for (const auto & [name, value] : query.overridability)
result_overridability_map.emplace(name, value);
for (const auto & [name, value] : create_query.overridability)
result_overridability_map.emplace(name, value);
for (const auto & delete_key : query.delete_keys)
{
auto it = result_changes_map.find(delete_key);
if (it == result_changes_map.end())
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Cannot delete key `{}` because it does not exist in collection",
delete_key);
}
else
{
result_changes_map.erase(it);
auto it_override = result_overridability_map.find(delete_key);
if (it_override != result_overridability_map.end())
result_overridability_map.erase(it_override);
}
}
create_query.changes.clear();
for (const auto & [name, value] : result_changes_map)
create_query.changes.emplace_back(name, value);
create_query.overridability = std::move(result_overridability_map);
if (create_query.changes.empty())
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Named collection cannot be empty (collection name: {})",
query.collection_name);
writeCreateQueryToMetadata(
create_query,
getMetadataPath(query.collection_name),
getContext()->getSettingsRef(),
true);
}
void remove(const std::string & collection_name)
{
auto collection_path = getMetadataPath(collection_name);
if (!fs::exists(collection_path))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove collection `{}`, because it doesn't exist",
collection_name);
}
(void)fs::remove(collection_path);
}
private:
static constexpr auto NAMED_COLLECTIONS_METADATA_DIRECTORY = "named_collections";
static MutableNamedCollectionPtr createNamedCollectionFromAST(
const ASTCreateNamedCollectionQuery & query)
{
const auto & collection_name = query.collection_name;
const auto config = NamedCollectionConfiguration::createConfiguration(collection_name, query.changes, query.overridability);
std::set<std::string, std::less<>> keys;
for (const auto & [name, _] : query.changes)
keys.insert(name);
return NamedCollection::create(
*config, collection_name, "", keys, SourceId::SQL, /* is_mutable */true);
}
std::string getMetadataPath(const std::string & collection_name) const
{
return fs::path(metadata_path) / (escapeForFileName(collection_name) + ".sql");
}
/// Delete .tmp files. They could be left undeleted in case of
/// some exception or abrupt server restart.
void cleanup()
{
fs::directory_iterator it{metadata_path};
std::vector<std::string> files_to_remove;
for (; it != fs::directory_iterator{}; ++it)
{
const auto & current_path = it->path();
if (current_path.extension() == ".tmp")
files_to_remove.push_back(current_path);
}
for (const auto & file : files_to_remove)
(void)fs::remove(file);
}
static ASTCreateNamedCollectionQuery readCreateQueryFromMetadata(
const std::string & path,
const Settings & settings)
{
ReadBufferFromFile in(path);
std::string query;
readStringUntilEOF(query, in);
ParserCreateNamedCollectionQuery parser;
auto ast = parseQuery(parser, query, "in file " + path, 0, settings.max_parser_depth, settings.max_parser_backtracks);
const auto & create_query = ast->as<const ASTCreateNamedCollectionQuery &>();
return create_query;
}
void writeCreateQueryToMetadata(
const ASTCreateNamedCollectionQuery & query,
const std::string & path,
const Settings & settings,
bool replace = false) const
{
if (!replace && fs::exists(path))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"Metadata file {} for named collection already exists",
path);
}
fs::create_directories(metadata_path);
auto tmp_path = path + ".tmp";
String formatted_query = serializeAST(query);
WriteBufferFromFile out(tmp_path, formatted_query.size(), O_WRONLY | O_CREAT | O_EXCL);
writeString(formatted_query, out);
out.next();
if (settings.fsync_metadata)
out.sync();
out.close();
fs::rename(tmp_path, path);
}
};
std::unique_lock<std::mutex> lockNamedCollectionsTransaction()
{
static std::mutex transaction_lock;
return std::unique_lock(transaction_lock);
}
void loadFromConfigUnlocked(const Poco::Util::AbstractConfiguration & config, std::unique_lock<std::mutex> &)
{
auto named_collections = LoadFromConfig(config).getAll();
LOG_TRACE(
getLogger("NamedCollectionsUtils"),
"Loaded {} collections from config", named_collections.size());
NamedCollectionFactory::instance().add(std::move(named_collections));
is_loaded_from_config = true;
}
void loadFromConfig(const Poco::Util::AbstractConfiguration & config)
{
auto lock = lockNamedCollectionsTransaction();
loadFromConfigUnlocked(config, lock);
}
void reloadFromConfig(const Poco::Util::AbstractConfiguration & config)
{
auto lock = lockNamedCollectionsTransaction();
auto collections = LoadFromConfig(config).getAll();
auto & instance = NamedCollectionFactory::instance();
instance.removeById(SourceId::CONFIG);
instance.add(collections);
is_loaded_from_config = true;
}
void loadFromSQLUnlocked(ContextPtr context, std::unique_lock<std::mutex> &)
{
auto named_collections = LoadFromSQL(context).getAll();
LOG_TRACE(
getLogger("NamedCollectionsUtils"),
"Loaded {} collections from SQL", named_collections.size());
NamedCollectionFactory::instance().add(std::move(named_collections));
is_loaded_from_sql = true;
}
void loadFromSQL(ContextPtr context)
{
auto lock = lockNamedCollectionsTransaction();
loadFromSQLUnlocked(context, lock);
}
void loadIfNotUnlocked(std::unique_lock<std::mutex> & lock)
{
auto global_context = Context::getGlobalContextInstance();
if (!is_loaded_from_config)
loadFromConfigUnlocked(global_context->getConfigRef(), lock);
if (!is_loaded_from_sql)
loadFromSQLUnlocked(global_context, lock);
}
void loadIfNot()
{
if (is_loaded_from_sql && is_loaded_from_config)
return;
auto lock = lockNamedCollectionsTransaction();
loadIfNotUnlocked(lock);
}
void removeFromSQL(const ASTDropNamedCollectionQuery & query, ContextPtr context)
{
auto lock = lockNamedCollectionsTransaction();
loadIfNotUnlocked(lock);
auto & instance = NamedCollectionFactory::instance();
if (!instance.exists(query.collection_name))
{
if (!query.if_exists)
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove collection `{}`, because it doesn't exist",
query.collection_name);
}
return;
}
LoadFromSQL(context).remove(query.collection_name);
instance.remove(query.collection_name);
}
void createFromSQL(const ASTCreateNamedCollectionQuery & query, ContextPtr context)
{
auto lock = lockNamedCollectionsTransaction();
loadIfNotUnlocked(lock);
auto & instance = NamedCollectionFactory::instance();
if (instance.exists(query.collection_name))
{
if (!query.if_not_exists)
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"A named collection `{}` already exists",
query.collection_name);
}
return;
}
instance.add(query.collection_name, LoadFromSQL(context).create(query));
}
void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr context)
{
auto lock = lockNamedCollectionsTransaction();
loadIfNotUnlocked(lock);
auto & instance = NamedCollectionFactory::instance();
if (!instance.exists(query.collection_name))
{
if (!query.if_exists)
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove collection `{}`, because it doesn't exist",
query.collection_name);
}
return;
}
LoadFromSQL(context).update(query);
auto collection = instance.getMutable(query.collection_name);
auto collection_lock = collection->lock();
for (const auto & [name, value] : query.changes)
{
auto it_override = query.overridability.find(name);
if (it_override != query.overridability.end())
collection->setOrUpdate<String, true>(name, convertFieldToString(value), it_override->second);
else
collection->setOrUpdate<String, true>(name, convertFieldToString(value), {});
}
for (const auto & key : query.delete_keys)
collection->remove<true>(key);
}
}
}

View File

@ -1,42 +0,0 @@
#pragma once
#include <Interpreters/Context_fwd.h>
namespace Poco { namespace Util { class AbstractConfiguration; } }
namespace DB
{
class ASTCreateNamedCollectionQuery;
class ASTAlterNamedCollectionQuery;
class ASTDropNamedCollectionQuery;
namespace NamedCollectionUtils
{
enum class SourceId : uint8_t
{
NONE = 0,
CONFIG = 1,
SQL = 2,
};
void loadFromConfig(const Poco::Util::AbstractConfiguration & config);
void reloadFromConfig(const Poco::Util::AbstractConfiguration & config);
/// Load named collections from `context->getPath() / named_collections /`.
void loadFromSQL(ContextPtr context);
/// Remove collection as well as its metadata from `context->getPath() / named_collections /`.
void removeFromSQL(const ASTDropNamedCollectionQuery & query, ContextPtr context);
/// Create a new collection from AST and put it to `context->getPath() / named_collections /`.
void createFromSQL(const ASTCreateNamedCollectionQuery & query, ContextPtr context);
/// Update definition of already existing collection from AST and update result in `context->getPath() / named_collections /`.
void updateFromSQL(const ASTAlterNamedCollectionQuery & query, ContextPtr context);
void loadIfNot();
}
}

View File

@ -4,7 +4,6 @@
#include <IO/WriteBufferFromString.h> #include <IO/WriteBufferFromString.h>
#include <IO/Operators.h> #include <IO/Operators.h>
#include <Common/NamedCollections/NamedCollectionConfiguration.h> #include <Common/NamedCollections/NamedCollectionConfiguration.h>
#include <Common/NamedCollections/NamedCollectionUtils.h>
#include <Poco/Util/AbstractConfiguration.h> #include <Poco/Util/AbstractConfiguration.h>
@ -297,7 +296,7 @@ MutableNamedCollectionPtr NamedCollection::duplicate() const
auto impl = pimpl->createCopy(collection_name); auto impl = pimpl->createCopy(collection_name);
return std::unique_ptr<NamedCollection>( return std::unique_ptr<NamedCollection>(
new NamedCollection( new NamedCollection(
std::move(impl), collection_name, NamedCollectionUtils::SourceId::NONE, true)); std::move(impl), collection_name, SourceId::NONE, true));
} }
NamedCollection::Keys NamedCollection::getKeys(ssize_t depth, const std::string & prefix) const NamedCollection::Keys NamedCollection::getKeys(ssize_t depth, const std::string & prefix) const

View File

@ -1,7 +1,6 @@
#pragma once #pragma once
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Common/NamedCollections/NamedCollections_fwd.h> #include <Common/NamedCollections/NamedCollections_fwd.h>
#include <Common/NamedCollections/NamedCollectionUtils.h>
namespace Poco { namespace Util { class AbstractConfiguration; } } namespace Poco { namespace Util { class AbstractConfiguration; } }
@ -23,7 +22,12 @@ class NamedCollection
public: public:
using Key = std::string; using Key = std::string;
using Keys = std::set<Key, std::less<>>; using Keys = std::set<Key, std::less<>>;
using SourceId = NamedCollectionUtils::SourceId; enum class SourceId : uint8_t
{
NONE = 0,
CONFIG = 1,
SQL = 2,
};
static MutableNamedCollectionPtr create( static MutableNamedCollectionPtr create(
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,

View File

@ -1,5 +1,7 @@
#include <Common/NamedCollections/NamedCollectionsFactory.h> #include <Common/NamedCollections/NamedCollectionsFactory.h>
#include <Common/NamedCollections/NamedCollectionUtils.h> #include <Common/NamedCollections/NamedCollectionConfiguration.h>
#include <Common/NamedCollections/NamedCollectionsMetadataStorage.h>
#include <base/sleep.h>
namespace DB namespace DB
{ {
@ -17,23 +19,29 @@ NamedCollectionFactory & NamedCollectionFactory::instance()
return instance; return instance;
} }
NamedCollectionFactory::~NamedCollectionFactory()
{
shutdown();
}
void NamedCollectionFactory::shutdown()
{
shutdown_called = true;
if (update_task)
update_task->deactivate();
metadata_storage.reset();
}
bool NamedCollectionFactory::exists(const std::string & collection_name) const bool NamedCollectionFactory::exists(const std::string & collection_name) const
{ {
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
return existsUnlocked(collection_name, lock); return exists(collection_name, lock);
}
bool NamedCollectionFactory::existsUnlocked(
const std::string & collection_name,
std::lock_guard<std::mutex> & /* lock */) const
{
return loaded_named_collections.contains(collection_name);
} }
NamedCollectionPtr NamedCollectionFactory::get(const std::string & collection_name) const NamedCollectionPtr NamedCollectionFactory::get(const std::string & collection_name) const
{ {
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
auto collection = tryGetUnlocked(collection_name, lock); auto collection = tryGet(collection_name, lock);
if (!collection) if (!collection)
{ {
throw Exception( throw Exception(
@ -47,14 +55,35 @@ NamedCollectionPtr NamedCollectionFactory::get(const std::string & collection_na
NamedCollectionPtr NamedCollectionFactory::tryGet(const std::string & collection_name) const NamedCollectionPtr NamedCollectionFactory::tryGet(const std::string & collection_name) const
{ {
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
return tryGetUnlocked(collection_name, lock); return tryGet(collection_name, lock);
}
NamedCollectionsMap NamedCollectionFactory::getAll() const
{
std::lock_guard lock(mutex);
return loaded_named_collections;
}
bool NamedCollectionFactory::exists(const std::string & collection_name, std::lock_guard<std::mutex> &) const
{
return loaded_named_collections.contains(collection_name);
}
MutableNamedCollectionPtr NamedCollectionFactory::tryGet(
const std::string & collection_name,
std::lock_guard<std::mutex> &) const
{
auto it = loaded_named_collections.find(collection_name);
if (it == loaded_named_collections.end())
return nullptr;
return it->second;
} }
MutableNamedCollectionPtr NamedCollectionFactory::getMutable( MutableNamedCollectionPtr NamedCollectionFactory::getMutable(
const std::string & collection_name) const const std::string & collection_name,
std::lock_guard<std::mutex> & lock) const
{ {
std::lock_guard lock(mutex); auto collection = tryGet(collection_name, lock);
auto collection = tryGetUnlocked(collection_name, lock);
if (!collection) if (!collection)
{ {
throw Exception( throw Exception(
@ -73,35 +102,10 @@ MutableNamedCollectionPtr NamedCollectionFactory::getMutable(
return collection; return collection;
} }
MutableNamedCollectionPtr NamedCollectionFactory::tryGetUnlocked(
const std::string & collection_name,
std::lock_guard<std::mutex> & /* lock */) const
{
auto it = loaded_named_collections.find(collection_name);
if (it == loaded_named_collections.end())
return nullptr;
return it->second;
}
void NamedCollectionFactory::add( void NamedCollectionFactory::add(
const std::string & collection_name,
MutableNamedCollectionPtr collection)
{
std::lock_guard lock(mutex);
addUnlocked(collection_name, collection, lock);
}
void NamedCollectionFactory::add(NamedCollectionsMap collections)
{
std::lock_guard lock(mutex);
for (const auto & [collection_name, collection] : collections)
addUnlocked(collection_name, collection, lock);
}
void NamedCollectionFactory::addUnlocked(
const std::string & collection_name, const std::string & collection_name,
MutableNamedCollectionPtr collection, MutableNamedCollectionPtr collection,
std::lock_guard<std::mutex> & /* lock */) std::lock_guard<std::mutex> &)
{ {
auto [it, inserted] = loaded_named_collections.emplace(collection_name, collection); auto [it, inserted] = loaded_named_collections.emplace(collection_name, collection);
if (!inserted) if (!inserted)
@ -113,10 +117,15 @@ void NamedCollectionFactory::addUnlocked(
} }
} }
void NamedCollectionFactory::remove(const std::string & collection_name) void NamedCollectionFactory::add(NamedCollectionsMap collections, std::lock_guard<std::mutex> & lock)
{ {
std::lock_guard lock(mutex); for (const auto & [collection_name, collection] : collections)
bool removed = removeIfExistsUnlocked(collection_name, lock); add(collection_name, collection, lock);
}
void NamedCollectionFactory::remove(const std::string & collection_name, std::lock_guard<std::mutex> & lock)
{
bool removed = removeIfExists(collection_name, lock);
if (!removed) if (!removed)
{ {
throw Exception( throw Exception(
@ -126,17 +135,11 @@ void NamedCollectionFactory::remove(const std::string & collection_name)
} }
} }
void NamedCollectionFactory::removeIfExists(const std::string & collection_name) bool NamedCollectionFactory::removeIfExists(
{
std::lock_guard lock(mutex);
removeIfExistsUnlocked(collection_name, lock); // NOLINT
}
bool NamedCollectionFactory::removeIfExistsUnlocked(
const std::string & collection_name, const std::string & collection_name,
std::lock_guard<std::mutex> & lock) std::lock_guard<std::mutex> & lock)
{ {
auto collection = tryGetUnlocked(collection_name, lock); auto collection = tryGet(collection_name, lock);
if (!collection) if (!collection)
return false; return false;
@ -152,18 +155,246 @@ bool NamedCollectionFactory::removeIfExistsUnlocked(
return true; return true;
} }
void NamedCollectionFactory::removeById(NamedCollectionUtils::SourceId id) void NamedCollectionFactory::removeById(NamedCollection::SourceId id, std::lock_guard<std::mutex> &)
{ {
std::lock_guard lock(mutex);
std::erase_if( std::erase_if(
loaded_named_collections, loaded_named_collections,
[&](const auto & value) { return value.second->getSourceId() == id; }); [&](const auto & value) { return value.second->getSourceId() == id; });
} }
NamedCollectionsMap NamedCollectionFactory::getAll() const namespace
{
constexpr auto NAMED_COLLECTIONS_CONFIG_PREFIX = "named_collections";
std::vector<std::string> listCollections(const Poco::Util::AbstractConfiguration & config)
{
Poco::Util::AbstractConfiguration::Keys collections_names;
config.keys(NAMED_COLLECTIONS_CONFIG_PREFIX, collections_names);
return collections_names;
}
MutableNamedCollectionPtr getCollection(
const Poco::Util::AbstractConfiguration & config,
const std::string & collection_name)
{
const auto collection_prefix = fmt::format("{}.{}", NAMED_COLLECTIONS_CONFIG_PREFIX, collection_name);
std::queue<std::string> enumerate_input;
std::set<std::string, std::less<>> enumerate_result;
enumerate_input.push(collection_prefix);
NamedCollectionConfiguration::listKeys(config, std::move(enumerate_input), enumerate_result, -1);
/// Collection does not have any keys. (`enumerate_result` == <collection_path>).
const bool collection_is_empty = enumerate_result.size() == 1
&& *enumerate_result.begin() == collection_prefix;
std::set<std::string, std::less<>> keys;
if (!collection_is_empty)
{
/// Skip collection prefix and add +1 to avoid '.' in the beginning.
for (const auto & path : enumerate_result)
keys.emplace(path.substr(collection_prefix.size() + 1));
}
return NamedCollection::create(
config, collection_name, collection_prefix, keys, NamedCollection::SourceId::CONFIG, /* is_mutable */false);
}
NamedCollectionsMap getNamedCollections(const Poco::Util::AbstractConfiguration & config)
{
NamedCollectionsMap result;
for (const auto & collection_name : listCollections(config))
{
if (result.contains(collection_name))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"Found duplicate named collection `{}`",
collection_name);
}
result.emplace(collection_name, getCollection(config, collection_name));
}
return result;
}
}
void NamedCollectionFactory::loadIfNot()
{ {
std::lock_guard lock(mutex); std::lock_guard lock(mutex);
return loaded_named_collections; loadIfNot(lock);
}
bool NamedCollectionFactory::loadIfNot(std::lock_guard<std::mutex> & lock)
{
if (loaded)
return false;
auto context = Context::getGlobalContextInstance();
metadata_storage = NamedCollectionsMetadataStorage::create(context);
loadFromConfig(context->getConfigRef(), lock);
loadFromSQL(lock);
if (metadata_storage->supportsPeriodicUpdate())
{
update_task = context->getSchedulePool().createTask("NamedCollectionsMetadataStorage", [this]{ updateFunc(); });
update_task->activate();
update_task->schedule();
}
loaded = true;
return true;
}
void NamedCollectionFactory::loadFromConfig(const Poco::Util::AbstractConfiguration & config, std::lock_guard<std::mutex> & lock)
{
auto collections = getNamedCollections(config);
LOG_TEST(log, "Loaded {} collections from config", collections.size());
add(std::move(collections), lock);
}
void NamedCollectionFactory::reloadFromConfig(const Poco::Util::AbstractConfiguration & config)
{
std::lock_guard lock(mutex);
if (loadIfNot(lock))
return;
auto collections = getNamedCollections(config);
LOG_TEST(log, "Loaded {} collections from config", collections.size());
removeById(NamedCollection::SourceId::CONFIG, lock);
add(std::move(collections), lock);
}
void NamedCollectionFactory::loadFromSQL(std::lock_guard<std::mutex> & lock)
{
auto collections = metadata_storage->getAll();
LOG_TEST(log, "Loaded {} collections from sql", collections.size());
add(std::move(collections), lock);
}
void NamedCollectionFactory::createFromSQL(const ASTCreateNamedCollectionQuery & query)
{
std::lock_guard lock(mutex);
loadIfNot(lock);
if (exists(query.collection_name, lock))
{
if (query.if_not_exists)
return;
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"A named collection `{}` already exists",
query.collection_name);
}
add(query.collection_name, metadata_storage->create(query), lock);
}
void NamedCollectionFactory::removeFromSQL(const ASTDropNamedCollectionQuery & query)
{
std::lock_guard lock(mutex);
loadIfNot(lock);
if (!exists(query.collection_name, lock))
{
if (query.if_exists)
return;
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove collection `{}`, because it doesn't exist",
query.collection_name);
}
metadata_storage->remove(query.collection_name);
remove(query.collection_name, lock);
}
void NamedCollectionFactory::updateFromSQL(const ASTAlterNamedCollectionQuery & query)
{
std::lock_guard lock(mutex);
loadIfNot(lock);
if (!exists(query.collection_name, lock))
{
if (query.if_exists)
return;
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove collection `{}`, because it doesn't exist",
query.collection_name);
}
metadata_storage->update(query);
auto collection = getMutable(query.collection_name, lock);
auto collection_lock = collection->lock();
for (const auto & [name, value] : query.changes)
{
auto it_override = query.overridability.find(name);
if (it_override != query.overridability.end())
collection->setOrUpdate<String, true>(name, convertFieldToString(value), it_override->second);
else
collection->setOrUpdate<String, true>(name, convertFieldToString(value), {});
}
for (const auto & key : query.delete_keys)
collection->remove<true>(key);
}
void NamedCollectionFactory::reloadFromSQL()
{
std::lock_guard lock(mutex);
if (loadIfNot(lock))
return;
auto collections = metadata_storage->getAll();
removeById(NamedCollection::SourceId::SQL, lock);
add(std::move(collections), lock);
}
void NamedCollectionFactory::updateFunc()
{
LOG_TRACE(log, "Named collections background updating thread started");
while (!shutdown_called.load())
{
if (metadata_storage->waitUpdate())
{
try
{
reloadFromSQL();
}
catch (const Coordination::Exception & e)
{
if (Coordination::isHardwareError(e.code))
{
LOG_INFO(log, "Lost ZooKeeper connection, will try to connect again: {}",
DB::getCurrentExceptionMessage(true));
sleepForSeconds(1);
}
else
{
tryLogCurrentException(__PRETTY_FUNCTION__);
chassert(false);
}
continue;
}
catch (...)
{
DB::tryLogCurrentException(__PRETTY_FUNCTION__);
chassert(false);
continue;
}
}
}
LOG_TRACE(log, "Named collections background updating thread finished");
} }
} }

View File

@ -1,58 +1,83 @@
#pragma once #pragma once
#include <Common/NamedCollections/NamedCollections.h> #include <Common/NamedCollections/NamedCollections.h>
#include <Common/NamedCollections/NamedCollectionsMetadataStorage.h>
#include <Common/logger_useful.h>
namespace DB namespace DB
{ {
class ASTCreateNamedCollectionQuery;
class ASTDropNamedCollectionQuery;
class ASTAlterNamedCollectionQuery;
class NamedCollectionFactory : boost::noncopyable class NamedCollectionFactory : boost::noncopyable
{ {
public: public:
static NamedCollectionFactory & instance(); static NamedCollectionFactory & instance();
~NamedCollectionFactory();
bool exists(const std::string & collection_name) const; bool exists(const std::string & collection_name) const;
NamedCollectionPtr get(const std::string & collection_name) const; NamedCollectionPtr get(const std::string & collection_name) const;
NamedCollectionPtr tryGet(const std::string & collection_name) const; NamedCollectionPtr tryGet(const std::string & collection_name) const;
MutableNamedCollectionPtr getMutable(const std::string & collection_name) const;
void add(const std::string & collection_name, MutableNamedCollectionPtr collection);
void add(NamedCollectionsMap collections);
void update(NamedCollectionsMap collections);
void remove(const std::string & collection_name);
void removeIfExists(const std::string & collection_name);
void removeById(NamedCollectionUtils::SourceId id);
NamedCollectionsMap getAll() const; NamedCollectionsMap getAll() const;
private: void reloadFromConfig(const Poco::Util::AbstractConfiguration & config);
bool existsUnlocked(
const std::string & collection_name,
std::lock_guard<std::mutex> & lock) const;
MutableNamedCollectionPtr tryGetUnlocked( void reloadFromSQL();
const std::string & collection_name,
std::lock_guard<std::mutex> & lock) const;
void addUnlocked( void createFromSQL(const ASTCreateNamedCollectionQuery & query);
const std::string & collection_name,
MutableNamedCollectionPtr collection,
std::lock_guard<std::mutex> & lock);
bool removeIfExistsUnlocked( void removeFromSQL(const ASTDropNamedCollectionQuery & query);
const std::string & collection_name,
std::lock_guard<std::mutex> & lock);
void updateFromSQL(const ASTAlterNamedCollectionQuery & query);
void loadIfNot();
void shutdown();
protected:
mutable NamedCollectionsMap loaded_named_collections; mutable NamedCollectionsMap loaded_named_collections;
mutable std::mutex mutex; mutable std::mutex mutex;
bool is_initialized = false;
const LoggerPtr log = getLogger("NamedCollectionFactory");
bool loaded = false;
std::atomic<bool> shutdown_called = false;
std::unique_ptr<NamedCollectionsMetadataStorage> metadata_storage;
BackgroundSchedulePool::TaskHolder update_task;
bool loadIfNot(std::lock_guard<std::mutex> & lock);
bool exists(
const std::string & collection_name,
std::lock_guard<std::mutex> & lock) const;
MutableNamedCollectionPtr getMutable(const std::string & collection_name, std::lock_guard<std::mutex> & lock) const;
void add(const std::string & collection_name, MutableNamedCollectionPtr collection, std::lock_guard<std::mutex> & lock);
void add(NamedCollectionsMap collections, std::lock_guard<std::mutex> & lock);
void update(NamedCollectionsMap collections, std::lock_guard<std::mutex> & lock);
void remove(const std::string & collection_name, std::lock_guard<std::mutex> & lock);
bool removeIfExists(const std::string & collection_name, std::lock_guard<std::mutex> & lock);
MutableNamedCollectionPtr tryGet(const std::string & collection_name, std::lock_guard<std::mutex> & lock) const;
void removeById(NamedCollection::SourceId id, std::lock_guard<std::mutex> & lock);
void loadFromConfig(
const Poco::Util::AbstractConfiguration & config,
std::lock_guard<std::mutex> & lock);
void loadFromSQL(std::lock_guard<std::mutex> & lock);
void updateFunc();
}; };
} }

View File

@ -0,0 +1,519 @@
#include <Common/NamedCollections/NamedCollectionsMetadataStorage.h>
#include <Common/NamedCollections/NamedCollectionConfiguration.h>
#include <Common/escapeForFileName.h>
#include <Common/logger_useful.h>
#include <Common/ZooKeeper/IKeeper.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <Parsers/parseQuery.h>
#include <Parsers/ParserCreateQuery.h>
#include <Parsers/formatAST.h>
#include <Interpreters/Context.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace DB
{
namespace ErrorCodes
{
extern const int NAMED_COLLECTION_ALREADY_EXISTS;
extern const int NAMED_COLLECTION_DOESNT_EXIST;
extern const int INVALID_CONFIG_PARAMETER;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
static const std::string named_collections_storage_config_path = "named_collections_storage";
namespace
{
MutableNamedCollectionPtr createNamedCollectionFromAST(const ASTCreateNamedCollectionQuery & query)
{
const auto & collection_name = query.collection_name;
const auto config = NamedCollectionConfiguration::createConfiguration(collection_name, query.changes, query.overridability);
std::set<std::string, std::less<>> keys;
for (const auto & [name, _] : query.changes)
keys.insert(name);
return NamedCollection::create(
*config, collection_name, "", keys, NamedCollection::SourceId::SQL, /* is_mutable */true);
}
std::string getFileName(const std::string & collection_name)
{
return escapeForFileName(collection_name) + ".sql";
}
}
class NamedCollectionsMetadataStorage::INamedCollectionsStorage
{
public:
virtual ~INamedCollectionsStorage() = default;
virtual bool exists(const std::string & path) const = 0;
virtual std::vector<std::string> list() const = 0;
virtual std::string read(const std::string & path) const = 0;
virtual void write(const std::string & path, const std::string & data, bool replace) = 0;
virtual void remove(const std::string & path) = 0;
virtual bool removeIfExists(const std::string & path) = 0;
virtual bool supportsPeriodicUpdate() const = 0;
virtual bool waitUpdate(size_t /* timeout */) { return false; }
};
class NamedCollectionsMetadataStorage::LocalStorage : public INamedCollectionsStorage, private WithContext
{
private:
std::string root_path;
public:
LocalStorage(ContextPtr context_, const std::string & path_)
: WithContext(context_)
, root_path(path_)
{
if (fs::exists(root_path))
cleanup();
}
~LocalStorage() override = default;
bool supportsPeriodicUpdate() const override { return false; }
std::vector<std::string> list() const override
{
if (!fs::exists(root_path))
return {};
std::vector<std::string> elements;
for (fs::directory_iterator it{root_path}; it != fs::directory_iterator{}; ++it)
{
const auto & current_path = it->path();
if (current_path.extension() == ".sql")
{
elements.push_back(it->path());
}
else
{
LOG_WARNING(
getLogger("LocalStorage"),
"Unexpected file {} in named collections directory",
current_path.filename().string());
}
}
return elements;
}
bool exists(const std::string & path) const override
{
return fs::exists(getPath(path));
}
std::string read(const std::string & path) const override
{
ReadBufferFromFile in(getPath(path));
std::string data;
readStringUntilEOF(data, in);
return data;
}
void write(const std::string & path, const std::string & data, bool replace) override
{
if (!replace && fs::exists(path))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"Metadata file {} for named collection already exists",
path);
}
fs::create_directories(root_path);
auto tmp_path = getPath(path + ".tmp");
WriteBufferFromFile out(tmp_path, data.size(), O_WRONLY | O_CREAT | O_EXCL);
writeString(data, out);
out.next();
if (getContext()->getSettingsRef().fsync_metadata)
out.sync();
out.close();
fs::rename(tmp_path, getPath(path));
}
void remove(const std::string & path) override
{
if (!removeIfExists(getPath(path)))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_DOESNT_EXIST,
"Cannot remove `{}`, because it doesn't exist", path);
}
}
bool removeIfExists(const std::string & path) override
{
return fs::remove(getPath(path));
}
private:
std::string getPath(const std::string & path) const
{
return fs::path(root_path) / path;
}
/// Delete .tmp files. They could be left undeleted in case of
/// some exception or abrupt server restart.
void cleanup()
{
std::vector<std::string> files_to_remove;
for (fs::directory_iterator it{root_path}; it != fs::directory_iterator{}; ++it)
{
const auto & current_path = it->path();
if (current_path.extension() == ".tmp")
files_to_remove.push_back(current_path);
}
for (const auto & file : files_to_remove)
fs::remove(file);
}
};
class NamedCollectionsMetadataStorage::ZooKeeperStorage : public INamedCollectionsStorage, private WithContext
{
private:
std::string root_path;
mutable zkutil::ZooKeeperPtr zookeeper_client{nullptr};
mutable zkutil::EventPtr wait_event;
mutable Int32 collections_node_cversion = 0;
public:
ZooKeeperStorage(ContextPtr context_, const std::string & path_)
: WithContext(context_)
, root_path(path_)
{
if (root_path.empty())
throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Collections path cannot be empty");
if (root_path != "/" && root_path.back() == '/')
root_path.resize(root_path.size() - 1);
if (root_path.front() != '/')
root_path = "/" + root_path;
auto client = getClient();
if (root_path != "/" && !client->exists(root_path))
{
client->createAncestors(root_path);
client->createIfNotExists(root_path, "");
}
}
~ZooKeeperStorage() override = default;
bool supportsPeriodicUpdate() const override { return true; }
/// Return true if children changed.
bool waitUpdate(size_t timeout) override
{
if (!wait_event)
{
/// We did not yet made any list() attempt, so do that.
return true;
}
if (wait_event->tryWait(timeout))
{
/// Children changed before timeout.
return true;
}
std::string res;
Coordination::Stat stat;
if (!getClient()->tryGet(root_path, res, &stat))
{
/// We do create root_path in constructor of this class,
/// so this case is not really possible.
chassert(false);
return false;
}
return stat.cversion != collections_node_cversion;
}
std::vector<std::string> list() const override
{
if (!wait_event)
wait_event = std::make_shared<Poco::Event>();
Coordination::Stat stat;
auto children = getClient()->getChildren(root_path, &stat, wait_event);
collections_node_cversion = stat.cversion;
return children;
}
bool exists(const std::string & path) const override
{
return getClient()->exists(getPath(path));
}
std::string read(const std::string & path) const override
{
return getClient()->get(getPath(path));
}
void write(const std::string & path, const std::string & data, bool replace) override
{
if (replace)
{
getClient()->createOrUpdate(getPath(path), data, zkutil::CreateMode::Persistent);
}
else
{
auto code = getClient()->tryCreate(getPath(path), data, zkutil::CreateMode::Persistent);
if (code == Coordination::Error::ZNODEEXISTS)
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"Metadata file {} for named collection already exists",
path);
}
}
}
void remove(const std::string & path) override
{
getClient()->remove(getPath(path));
}
bool removeIfExists(const std::string & path) override
{
auto code = getClient()->tryRemove(getPath(path));
if (code == Coordination::Error::ZOK)
return true;
if (code == Coordination::Error::ZNONODE)
return false;
throw Coordination::Exception::fromPath(code, getPath(path));
}
private:
zkutil::ZooKeeperPtr getClient() const
{
if (!zookeeper_client || zookeeper_client->expired())
{
zookeeper_client = getContext()->getZooKeeper();
zookeeper_client->sync(root_path);
}
return zookeeper_client;
}
std::string getPath(const std::string & path) const
{
return fs::path(root_path) / path;
}
};
NamedCollectionsMetadataStorage::NamedCollectionsMetadataStorage(
std::shared_ptr<INamedCollectionsStorage> storage_,
ContextPtr context_)
: WithContext(context_)
, storage(std::move(storage_))
{
}
MutableNamedCollectionPtr NamedCollectionsMetadataStorage::get(const std::string & collection_name) const
{
const auto query = readCreateQuery(collection_name);
return createNamedCollectionFromAST(query);
}
NamedCollectionsMap NamedCollectionsMetadataStorage::getAll() const
{
NamedCollectionsMap result;
for (const auto & collection_name : listCollections())
{
if (result.contains(collection_name))
{
throw Exception(
ErrorCodes::NAMED_COLLECTION_ALREADY_EXISTS,
"Found duplicate named collection `{}`",
collection_name);
}
result.emplace(collection_name, get(collection_name));
}
return result;
}
MutableNamedCollectionPtr NamedCollectionsMetadataStorage::create(const ASTCreateNamedCollectionQuery & query)
{
writeCreateQuery(query);
return createNamedCollectionFromAST(query);
}
void NamedCollectionsMetadataStorage::remove(const std::string & collection_name)
{
storage->remove(getFileName(collection_name));
}
bool NamedCollectionsMetadataStorage::removeIfExists(const std::string & collection_name)
{
return storage->removeIfExists(getFileName(collection_name));
}
void NamedCollectionsMetadataStorage::update(const ASTAlterNamedCollectionQuery & query)
{
auto create_query = readCreateQuery(query.collection_name);
std::unordered_map<std::string, Field> result_changes_map;
for (const auto & [name, value] : query.changes)
{
auto [it, inserted] = result_changes_map.emplace(name, value);
if (!inserted)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Value with key `{}` is used twice in the SET query (collection name: {})",
name, query.collection_name);
}
}
for (const auto & [name, value] : create_query.changes)
result_changes_map.emplace(name, value);
std::unordered_map<std::string, bool> result_overridability_map;
for (const auto & [name, value] : query.overridability)
result_overridability_map.emplace(name, value);
for (const auto & [name, value] : create_query.overridability)
result_overridability_map.emplace(name, value);
for (const auto & delete_key : query.delete_keys)
{
auto it = result_changes_map.find(delete_key);
if (it == result_changes_map.end())
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Cannot delete key `{}` because it does not exist in collection",
delete_key);
}
else
{
result_changes_map.erase(it);
auto it_override = result_overridability_map.find(delete_key);
if (it_override != result_overridability_map.end())
result_overridability_map.erase(it_override);
}
}
create_query.changes.clear();
for (const auto & [name, value] : result_changes_map)
create_query.changes.emplace_back(name, value);
create_query.overridability = std::move(result_overridability_map);
if (create_query.changes.empty())
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Named collection cannot be empty (collection name: {})",
query.collection_name);
chassert(create_query.collection_name == query.collection_name);
writeCreateQuery(create_query, true);
}
std::vector<std::string> NamedCollectionsMetadataStorage::listCollections() const
{
auto paths = storage->list();
std::vector<std::string> collections;
collections.reserve(paths.size());
for (const auto & path : paths)
collections.push_back(std::filesystem::path(path).stem());
return collections;
}
ASTCreateNamedCollectionQuery NamedCollectionsMetadataStorage::readCreateQuery(const std::string & collection_name) const
{
const auto path = getFileName(collection_name);
auto query = storage->read(path);
const auto & settings = getContext()->getSettingsRef();
ParserCreateNamedCollectionQuery parser;
auto ast = parseQuery(parser, query, "in file " + path, 0, settings.max_parser_depth, settings.max_parser_backtracks);
const auto & create_query = ast->as<const ASTCreateNamedCollectionQuery &>();
return create_query;
}
void NamedCollectionsMetadataStorage::writeCreateQuery(const ASTCreateNamedCollectionQuery & query, bool replace)
{
auto normalized_query = query.clone();
auto & changes = typeid_cast<ASTCreateNamedCollectionQuery *>(normalized_query.get())->changes;
::sort(
changes.begin(), changes.end(),
[](const SettingChange & lhs, const SettingChange & rhs) { return lhs.name < rhs.name; });
storage->write(getFileName(query.collection_name), serializeAST(*normalized_query), replace);
}
bool NamedCollectionsMetadataStorage::supportsPeriodicUpdate() const
{
return storage->supportsPeriodicUpdate();
}
bool NamedCollectionsMetadataStorage::waitUpdate()
{
if (!storage->supportsPeriodicUpdate())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Periodic updates are not supported");
const auto & config = Context::getGlobalContextInstance()->getConfigRef();
const size_t timeout = config.getUInt(named_collections_storage_config_path + ".update_timeout_ms", 5000);
return storage->waitUpdate(timeout);
}
std::unique_ptr<NamedCollectionsMetadataStorage> NamedCollectionsMetadataStorage::create(const ContextPtr & context_)
{
const auto & config = context_->getConfigRef();
const auto storage_type = config.getString(named_collections_storage_config_path + ".type", "local");
if (storage_type == "local")
{
const auto path = config.getString(
named_collections_storage_config_path + ".path",
std::filesystem::path(context_->getPath()) / "named_collections");
LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"),
"Using local storage for named collections at path: {}", path);
auto local_storage = std::make_unique<NamedCollectionsMetadataStorage::LocalStorage>(context_, path);
return std::unique_ptr<NamedCollectionsMetadataStorage>(
new NamedCollectionsMetadataStorage(std::move(local_storage), context_));
}
if (storage_type == "zookeeper" || storage_type == "keeper")
{
const auto path = config.getString(named_collections_storage_config_path + ".path");
auto zk_storage = std::make_unique<NamedCollectionsMetadataStorage::ZooKeeperStorage>(context_, path);
LOG_TRACE(getLogger("NamedCollectionsMetadataStorage"),
"Using zookeeper storage for named collections at path: {}", path);
return std::unique_ptr<NamedCollectionsMetadataStorage>(
new NamedCollectionsMetadataStorage(std::move(zk_storage), context_));
}
throw Exception(
ErrorCodes::INVALID_CONFIG_PARAMETER,
"Unknown storage for named collections: {}", storage_type);
}
}

View File

@ -0,0 +1,52 @@
#pragma once
#include <Parsers/ASTCreateNamedCollectionQuery.h>
#include <Parsers/ASTAlterNamedCollectionQuery.h>
#include <Parsers/ASTDropNamedCollectionQuery.h>
#include <Common/NamedCollections/NamedCollections.h>
#include <Core/BackgroundSchedulePool.h>
namespace DB
{
class NamedCollectionsMetadataStorage : private WithContext
{
public:
static std::unique_ptr<NamedCollectionsMetadataStorage> create(const ContextPtr & context);
NamedCollectionsMap getAll() const;
MutableNamedCollectionPtr get(const std::string & collection_name) const;
MutableNamedCollectionPtr create(const ASTCreateNamedCollectionQuery & query);
void remove(const std::string & collection_name);
bool removeIfExists(const std::string & collection_name);
void update(const ASTAlterNamedCollectionQuery & query);
void shutdown();
/// Return true if update was made
bool waitUpdate();
bool supportsPeriodicUpdate() const;
private:
class INamedCollectionsStorage;
class LocalStorage;
class ZooKeeperStorage;
std::shared_ptr<INamedCollectionsStorage> storage;
NamedCollectionsMetadataStorage(std::shared_ptr<INamedCollectionsStorage> storage_, ContextPtr context_);
std::vector<std::string> listCollections() const;
ASTCreateNamedCollectionQuery readCreateQuery(const std::string & collection_name) const;
void writeCreateQuery(const ASTCreateNamedCollectionQuery & query, bool replace = false);
};
}

View File

@ -643,7 +643,8 @@ The server successfully detected this situation and will download merged part fr
\ \
M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\ M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\
M(IOUringSQEsSubmitted, "Total number of io_uring SQEs submitted") \ M(IOUringSQEsSubmitted, "Total number of io_uring SQEs submitted") \
M(IOUringSQEsResubmits, "Total number of io_uring SQE resubmits performed") \ M(IOUringSQEsResubmitsAsync, "Total number of asynchronous io_uring SQE resubmits performed") \
M(IOUringSQEsResubmitsSync, "Total number of synchronous io_uring SQE resubmits performed") \
M(IOUringCQEsCompleted, "Total number of successfully completed io_uring CQEs") \ M(IOUringCQEsCompleted, "Total number of successfully completed io_uring CQEs") \
M(IOUringCQEsFailed, "Total number of completed io_uring CQEs with failures") \ M(IOUringCQEsFailed, "Total number of completed io_uring CQEs with failures") \
\ \

View File

@ -1,12 +1,40 @@
#include <Common/tests/gtest_global_context.h> #include <Common/tests/gtest_global_context.h>
#include <Common/NamedCollections/NamedCollectionsFactory.h> #include <Common/NamedCollections/NamedCollectionsFactory.h>
#include <Common/NamedCollections/NamedCollectionUtils.h>
#include <Poco/Util/XMLConfiguration.h> #include <Poco/Util/XMLConfiguration.h>
#include <Poco/DOM/DOMParser.h> #include <Poco/DOM/DOMParser.h>
#include <gtest/gtest.h> #include <gtest/gtest.h>
using namespace DB; using namespace DB;
/// A class which allows to test private methods of NamedCollectionFactory.
class NamedCollectionFactoryFriend : public NamedCollectionFactory
{
public:
static NamedCollectionFactoryFriend & instance()
{
static NamedCollectionFactoryFriend instance;
return instance;
}
void loadFromConfig(const Poco::Util::AbstractConfiguration & config)
{
std::lock_guard lock(mutex);
NamedCollectionFactory::loadFromConfig(config, lock);
}
void add(const std::string & collection_name, MutableNamedCollectionPtr collection)
{
std::lock_guard lock(mutex);
NamedCollectionFactory::add(collection_name, collection, lock);
}
void remove(const std::string & collection_name)
{
std::lock_guard lock(mutex);
NamedCollectionFactory::remove(collection_name, lock);
}
};
TEST(NamedCollections, SimpleConfig) TEST(NamedCollections, SimpleConfig)
{ {
std::string xml(R"CONFIG(<clickhouse> std::string xml(R"CONFIG(<clickhouse>
@ -29,13 +57,13 @@ TEST(NamedCollections, SimpleConfig)
Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml); Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document); Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
NamedCollectionUtils::loadFromConfig(*config); NamedCollectionFactoryFriend::instance().loadFromConfig(*config);
ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection1")); ASSERT_TRUE(NamedCollectionFactoryFriend::instance().exists("collection1"));
ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection2")); ASSERT_TRUE(NamedCollectionFactoryFriend::instance().exists("collection2"));
ASSERT_TRUE(NamedCollectionFactory::instance().tryGet("collection3") == nullptr); ASSERT_TRUE(NamedCollectionFactoryFriend::instance().tryGet("collection3") == nullptr);
auto collections = NamedCollectionFactory::instance().getAll(); auto collections = NamedCollectionFactoryFriend::instance().getAll();
ASSERT_EQ(collections.size(), 2); ASSERT_EQ(collections.size(), 2);
ASSERT_TRUE(collections.contains("collection1")); ASSERT_TRUE(collections.contains("collection1"));
ASSERT_TRUE(collections.contains("collection2")); ASSERT_TRUE(collections.contains("collection2"));
@ -47,7 +75,7 @@ key3: 3.3
key4: -4 key4: -4
)CONFIG"); )CONFIG");
auto collection1 = NamedCollectionFactory::instance().get("collection1"); auto collection1 = NamedCollectionFactoryFriend::instance().get("collection1");
ASSERT_TRUE(collection1 != nullptr); ASSERT_TRUE(collection1 != nullptr);
ASSERT_TRUE(collection1->get<String>("key1") == "value1"); ASSERT_TRUE(collection1->get<String>("key1") == "value1");
@ -61,7 +89,7 @@ key5: 5
key6: 6.6 key6: 6.6
)CONFIG"); )CONFIG");
auto collection2 = NamedCollectionFactory::instance().get("collection2"); auto collection2 = NamedCollectionFactoryFriend::instance().get("collection2");
ASSERT_TRUE(collection2 != nullptr); ASSERT_TRUE(collection2 != nullptr);
ASSERT_TRUE(collection2->get<String>("key4") == "value4"); ASSERT_TRUE(collection2->get<String>("key4") == "value4");
@ -69,9 +97,9 @@ key6: 6.6
ASSERT_TRUE(collection2->get<Float64>("key6") == 6.6); ASSERT_TRUE(collection2->get<Float64>("key6") == 6.6);
auto collection2_copy = collections["collection2"]->duplicate(); auto collection2_copy = collections["collection2"]->duplicate();
NamedCollectionFactory::instance().add("collection2_copy", collection2_copy); NamedCollectionFactoryFriend::instance().add("collection2_copy", collection2_copy);
ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection2_copy")); ASSERT_TRUE(NamedCollectionFactoryFriend::instance().exists("collection2_copy"));
ASSERT_EQ(NamedCollectionFactory::instance().get("collection2_copy")->dumpStructure(), ASSERT_EQ(NamedCollectionFactoryFriend::instance().get("collection2_copy")->dumpStructure(),
R"CONFIG(key4: value4 R"CONFIG(key4: value4
key5: 5 key5: 5
key6: 6.6 key6: 6.6
@ -88,8 +116,8 @@ key6: 6.6
collection2_copy->setOrUpdate<String>("key4", "value45", {}); collection2_copy->setOrUpdate<String>("key4", "value45", {});
ASSERT_EQ(collection2_copy->getOrDefault<String>("key4", "N"), "value45"); ASSERT_EQ(collection2_copy->getOrDefault<String>("key4", "N"), "value45");
NamedCollectionFactory::instance().remove("collection2_copy"); NamedCollectionFactoryFriend::instance().remove("collection2_copy");
ASSERT_FALSE(NamedCollectionFactory::instance().exists("collection2_copy")); ASSERT_FALSE(NamedCollectionFactoryFriend::instance().exists("collection2_copy"));
config.reset(); config.reset();
} }
@ -119,11 +147,11 @@ TEST(NamedCollections, NestedConfig)
Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml); Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document); Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
NamedCollectionUtils::loadFromConfig(*config); NamedCollectionFactoryFriend::instance().loadFromConfig(*config);
ASSERT_TRUE(NamedCollectionFactory::instance().exists("collection3")); ASSERT_TRUE(NamedCollectionFactoryFriend::instance().exists("collection3"));
auto collection = NamedCollectionFactory::instance().get("collection3"); auto collection = NamedCollectionFactoryFriend::instance().get("collection3");
ASSERT_TRUE(collection != nullptr); ASSERT_TRUE(collection != nullptr);
ASSERT_EQ(collection->dumpStructure(), ASSERT_EQ(collection->dumpStructure(),
@ -171,8 +199,8 @@ TEST(NamedCollections, NestedConfigDuplicateKeys)
Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml); Poco::AutoPtr<Poco::XML::Document> document = dom_parser.parseString(xml);
Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document); Poco::AutoPtr<Poco::Util::XMLConfiguration> config = new Poco::Util::XMLConfiguration(document);
NamedCollectionUtils::loadFromConfig(*config); NamedCollectionFactoryFriend::instance().loadFromConfig(*config);
auto collection = NamedCollectionFactory::instance().get("collection"); auto collection = NamedCollectionFactoryFriend::instance().get("collection");
auto keys = collection->getKeys(); auto keys = collection->getKeys();
ASSERT_EQ(keys.size(), 6); ASSERT_EQ(keys.size(), 6);

View File

@ -1,12 +1,39 @@
#include <IO/WriteBufferFromFile.h> #include <IO/WriteBufferFromFile.h>
#include <base/sleep.h>
#include <Common/CurrentThread.h> #include <Common/CurrentThread.h>
#include <Common/HostResolvePool.h> #include <Common/HostResolvePool.h>
#include "base/defines.h" #include <base/defines.h>
#include <gtest/gtest.h>
#include <optional> #include <optional>
#include <chrono>
#include <thread> #include <thread>
#include <gtest/gtest.h>
using namespace std::literals::chrono_literals;
auto now()
{
return std::chrono::steady_clock::now();
}
void sleep_until(auto time_point)
{
std::this_thread::sleep_until(time_point);
}
void sleep_for(auto duration)
{
std::this_thread::sleep_for(duration);
}
size_t toMilliseconds(auto duration)
{
return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
}
const auto epsilon = 500us;
class ResolvePoolMock : public DB::HostResolver class ResolvePoolMock : public DB::HostResolver
{ {
@ -267,13 +294,14 @@ TEST_F(ResolvePoolTest, CanFailAndHeal)
TEST_F(ResolvePoolTest, CanExpire) TEST_F(ResolvePoolTest, CanExpire)
{ {
auto resolver = make_resolver(); auto history = 5ms;
auto resolver = make_resolver(toMilliseconds(history));
auto expired_addr = resolver->resolve(); auto expired_addr = resolver->resolve();
ASSERT_TRUE(addresses.contains(*expired_addr)); ASSERT_TRUE(addresses.contains(*expired_addr));
addresses.erase(*expired_addr); addresses.erase(*expired_addr);
sleepForSeconds(1);
sleep_for(history + epsilon);
for (size_t i = 0; i < 1000; ++i) for (size_t i = 0; i < 1000; ++i)
{ {
@ -310,12 +338,19 @@ TEST_F(ResolvePoolTest, DuplicatesInAddresses)
ASSERT_EQ(3, DB::CurrentThread::getProfileEvents()[metrics.discovered]); ASSERT_EQ(3, DB::CurrentThread::getProfileEvents()[metrics.discovered]);
} }
void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses, auto & failed_addr, auto & metrics) void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses, auto & failed_addr, auto & metrics, auto deadline)
{ {
ASSERT_EQ(iteration, DB::CurrentThread::getProfileEvents()[metrics.failed]); ASSERT_EQ(iteration, DB::CurrentThread::getProfileEvents()[metrics.failed]);
for (size_t i = 0; i < 100; ++i) for (size_t i = 0; i < 100; ++i)
{ {
auto next_addr = resolver->resolve(); auto next_addr = resolver->resolve();
if (now() > deadline)
{
ASSERT_NE(i, 0);
break;
}
ASSERT_TRUE(addresses.contains(*next_addr)); ASSERT_TRUE(addresses.contains(*next_addr));
ASSERT_NE(*next_addr, *failed_addr); ASSERT_NE(*next_addr, *failed_addr);
} }
@ -323,52 +358,60 @@ void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses
TEST_F(ResolvePoolTest, BannedForConsiquenceFail) TEST_F(ResolvePoolTest, BannedForConsiquenceFail)
{ {
size_t history_ms = 5; auto history = 5ms;
auto resolver = make_resolver(history_ms); auto resolver = make_resolver(toMilliseconds(history));
auto failed_addr = resolver->resolve(); auto failed_addr = resolver->resolve();
ASSERT_TRUE(addresses.contains(*failed_addr)); ASSERT_TRUE(addresses.contains(*failed_addr));
auto start_at = now();
failed_addr.setFail(); failed_addr.setFail();
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
check_no_failed_address(1, resolver, addresses, failed_addr, metrics); check_no_failed_address(1, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
sleep_until(start_at + history + epsilon);
start_at = now();
sleepForMilliseconds(history_ms + 1);
resolver->update(); resolver->update();
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count)); ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count));
failed_addr.setFail(); failed_addr.setFail();
check_no_failed_address(2, resolver, addresses, failed_addr, metrics); check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
sleep_until(start_at + history + epsilon);
start_at = now();
sleepForMilliseconds(history_ms + 1);
resolver->update(); resolver->update();
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
// ip still banned adter history_ms + update, because it was his second consiquent fail // ip still banned adter history_ms + update, because it was his second consiquent fail
check_no_failed_address(2, resolver, addresses, failed_addr, metrics); check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
} }
TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail) TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail)
{ {
size_t history_ms = 5; auto history = 5ms;
auto resolver = make_resolver(history_ms); auto resolver = make_resolver(toMilliseconds(history));
auto failed_addr = resolver->resolve(); auto failed_addr = resolver->resolve();
ASSERT_TRUE(addresses.contains(*failed_addr)); ASSERT_TRUE(addresses.contains(*failed_addr));
auto start_at = now();
failed_addr.setFail(); failed_addr.setFail();
failed_addr.setFail(); failed_addr.setFail();
failed_addr.setFail(); failed_addr.setFail();
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
check_no_failed_address(3, resolver, addresses, failed_addr, metrics); check_no_failed_address(3, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
sleep_until(start_at + history + epsilon);
sleepForMilliseconds(history_ms + 1);
resolver->update(); resolver->update();
// ip is cleared after just 1 history_ms interval. // ip is cleared after just 1 history_ms interval.
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
@ -377,8 +420,8 @@ TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail)
TEST_F(ResolvePoolTest, StillBannedAfterSuccess) TEST_F(ResolvePoolTest, StillBannedAfterSuccess)
{ {
size_t history_ms = 5; auto history = 5ms;
auto resolver = make_resolver(history_ms); auto resolver = make_resolver(toMilliseconds(history));
auto failed_addr = resolver->resolve(); auto failed_addr = resolver->resolve();
ASSERT_TRUE(addresses.contains(*failed_addr)); ASSERT_TRUE(addresses.contains(*failed_addr));
@ -395,11 +438,12 @@ TEST_F(ResolvePoolTest, StillBannedAfterSuccess)
} }
chassert(again_addr); chassert(again_addr);
auto start_at = now();
failed_addr.setFail(); failed_addr.setFail();
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
check_no_failed_address(1, resolver, addresses, failed_addr, metrics); check_no_failed_address(1, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
again_addr = std::nullopt; // success; again_addr = std::nullopt; // success;

View File

@ -258,7 +258,8 @@
M(KeeperExistsRequest) \ M(KeeperExistsRequest) \
\ \
M(IOUringSQEsSubmitted) \ M(IOUringSQEsSubmitted) \
M(IOUringSQEsResubmits) \ M(IOUringSQEsResubmitsAsync) \
M(IOUringSQEsResubmitsSync) \
M(IOUringCQEsCompleted) \ M(IOUringCQEsCompleted) \
M(IOUringCQEsFailed) \ M(IOUringCQEsFailed) \
\ \

View File

@ -188,6 +188,18 @@ NamesAndTypesList NamesAndTypesList::filter(const Names & names) const
return filter(NameSet(names.begin(), names.end())); return filter(NameSet(names.begin(), names.end()));
} }
NamesAndTypesList NamesAndTypesList::eraseNames(const NameSet & names) const
{
NamesAndTypesList res;
for (const auto & column : *this)
{
if (!names.contains(column.name))
res.push_back(column);
}
return res;
}
NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const
{ {
/// NOTE: It's better to make a map in `IStorage` than to create it here every time again. /// NOTE: It's better to make a map in `IStorage` than to create it here every time again.

View File

@ -111,6 +111,9 @@ public:
/// Leave only the columns whose names are in the `names`. In `names` there can be superfluous columns. /// Leave only the columns whose names are in the `names`. In `names` there can be superfluous columns.
NamesAndTypesList filter(const Names & names) const; NamesAndTypesList filter(const Names & names) const;
/// Leave only the columns whose names are not in the `names`.
NamesAndTypesList eraseNames(const NameSet & names) const;
/// Unlike `filter`, returns columns in the order in which they go in `names`. /// Unlike `filter`, returns columns in the order in which they go in `names`.
NamesAndTypesList addTypes(const Names & names) const; NamesAndTypesList addTypes(const Names & names) const;

View File

@ -337,7 +337,7 @@ class IColumn;
M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \ M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \
\ \
M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \ M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \
M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", IMPORTANT) \ M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \
\ \
M(JoinStrictness, join_default_strictness, JoinStrictness::All, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \ M(JoinStrictness, join_default_strictness, JoinStrictness::All, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \
M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \ M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \
@ -1062,7 +1062,8 @@ class IColumn;
M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \ M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \ M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \ M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \
M(UInt64, input_format_parquet_max_block_size, 8192, "Max block size for parquet reader.", 0) \ M(UInt64, input_format_parquet_max_block_size, DEFAULT_BLOCK_SIZE, "Max block size for parquet reader.", 0) \
M(UInt64, input_format_parquet_prefer_block_bytes, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader", 0) \
M(Bool, input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format Protobuf", 0) \ M(Bool, input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format Protobuf", 0) \
M(Bool, input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format CapnProto", 0) \ M(Bool, input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format CapnProto", 0) \
M(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format ORC", 0) \ M(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format ORC", 0) \

View File

@ -98,6 +98,8 @@ static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges
{"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"}, {"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
{"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"}, {"s3_max_part_number", 10000, 10000, "Maximum part number number for s3 upload part"},
{"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"}, {"s3_max_single_operation_copy_size", 32 * 1024 * 1024, 32 * 1024 * 1024, "Maximum size for a single copy operation in s3"},
{"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."},
{"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."},
{"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"}, {"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"},
{"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."}, {"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."},
{"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."}, {"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."},

View File

@ -3,6 +3,7 @@
#include <DataTypes/DataTypeFactory.h> #include <DataTypes/DataTypeFactory.h>
#include <DataTypes/Serializations/SerializationNullable.h> #include <DataTypes/Serializations/SerializationNullable.h>
#include <DataTypes/DataTypeLowCardinality.h> #include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeVariant.h>
#include <Columns/ColumnNullable.h> #include <Columns/ColumnNullable.h>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Core/Field.h> #include <Core/Field.h>
@ -174,4 +175,9 @@ DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type)
} }
bool canContainNull(const IDataType & type)
{
return type.isNullable() || type.isLowCardinalityNullable() || isDynamic(type) || isVariant(type);
}
} }

View File

@ -62,4 +62,6 @@ DataTypePtr makeNullableOrLowCardinalityNullableSafe(const DataTypePtr & type);
/// Nullable(T) -> T, LowCardinality(Nullable(T)) -> T /// Nullable(T) -> T, LowCardinality(Nullable(T)) -> T
DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type); DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type);
bool canContainNull(const IDataType & type);
} }

View File

@ -122,6 +122,13 @@ DatabaseReplicated::DatabaseReplicated(
fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef()); fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef());
replica_group_name = context_->getConfigRef().getString("replica_group_name", ""); replica_group_name = context_->getConfigRef().getString("replica_group_name", "");
if (!replica_group_name.empty() && database_name.starts_with(DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX))
{
context_->addWarningMessage(fmt::format("There's a Replicated database with a name starting from '{}', "
"and replica_group_name is configured. It may cause collisions in cluster names.",
ALL_GROUPS_CLUSTER_PREFIX));
}
} }
String DatabaseReplicated::getFullReplicaName(const String & shard, const String & replica) String DatabaseReplicated::getFullReplicaName(const String & shard, const String & replica)
@ -173,13 +180,40 @@ ClusterPtr DatabaseReplicated::tryGetCluster() const
return cluster; return cluster;
} }
void DatabaseReplicated::setCluster(ClusterPtr && new_cluster) ClusterPtr DatabaseReplicated::tryGetAllGroupsCluster() const
{ {
std::lock_guard lock{mutex}; std::lock_guard lock{mutex};
cluster = std::move(new_cluster); if (replica_group_name.empty())
return nullptr;
if (cluster_all_groups)
return cluster_all_groups;
/// Database is probably not created or not initialized yet, it's ok to return nullptr
if (is_readonly)
return cluster_all_groups;
try
{
cluster_all_groups = getClusterImpl(/*all_groups*/ true);
}
catch (...)
{
tryLogCurrentException(log);
}
return cluster_all_groups;
} }
ClusterPtr DatabaseReplicated::getClusterImpl() const void DatabaseReplicated::setCluster(ClusterPtr && new_cluster, bool all_groups)
{
std::lock_guard lock{mutex};
if (all_groups)
cluster_all_groups = std::move(new_cluster);
else
cluster = std::move(new_cluster);
}
ClusterPtr DatabaseReplicated::getClusterImpl(bool all_groups) const
{ {
Strings unfiltered_hosts; Strings unfiltered_hosts;
Strings hosts; Strings hosts;
@ -199,17 +233,24 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
"It's possible if the first replica is not fully created yet " "It's possible if the first replica is not fully created yet "
"or if the last replica was just dropped or due to logical error", zookeeper_path); "or if the last replica was just dropped or due to logical error", zookeeper_path);
hosts.clear(); if (all_groups)
std::vector<String> paths;
for (const auto & host : unfiltered_hosts)
paths.push_back(zookeeper_path + "/replicas/" + host + "/replica_group");
auto replica_groups = zookeeper->tryGet(paths);
for (size_t i = 0; i < paths.size(); ++i)
{ {
if (replica_groups[i].data == replica_group_name) hosts = unfiltered_hosts;
hosts.push_back(unfiltered_hosts[i]); }
else
{
hosts.clear();
std::vector<String> paths;
for (const auto & host : unfiltered_hosts)
paths.push_back(zookeeper_path + "/replicas/" + host + "/replica_group");
auto replica_groups = zookeeper->tryGet(paths);
for (size_t i = 0; i < paths.size(); ++i)
{
if (replica_groups[i].data == replica_group_name)
hosts.push_back(unfiltered_hosts[i]);
}
} }
Int32 cversion = stat.cversion; Int32 cversion = stat.cversion;
@ -274,6 +315,11 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
bool treat_local_as_remote = false; bool treat_local_as_remote = false;
bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL; bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL;
String cluster_name = TSA_SUPPRESS_WARNING_FOR_READ(database_name); /// FIXME
if (all_groups)
cluster_name = ALL_GROUPS_CLUSTER_PREFIX + cluster_name;
ClusterConnectionParameters params{ ClusterConnectionParameters params{
cluster_auth_info.cluster_username, cluster_auth_info.cluster_username,
cluster_auth_info.cluster_password, cluster_auth_info.cluster_password,
@ -282,7 +328,7 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
treat_local_port_as_remote, treat_local_port_as_remote,
cluster_auth_info.cluster_secure_connection, cluster_auth_info.cluster_secure_connection,
Priority{1}, Priority{1},
TSA_SUPPRESS_WARNING_FOR_READ(database_name), /// FIXME cluster_name,
cluster_auth_info.cluster_secret}; cluster_auth_info.cluster_secret};
return std::make_shared<Cluster>(getContext()->getSettingsRef(), shards, params); return std::make_shared<Cluster>(getContext()->getSettingsRef(), shards, params);

View File

@ -20,6 +20,8 @@ using ClusterPtr = std::shared_ptr<Cluster>;
class DatabaseReplicated : public DatabaseAtomic class DatabaseReplicated : public DatabaseAtomic
{ {
public: public:
static constexpr auto ALL_GROUPS_CLUSTER_PREFIX = "all_groups.";
DatabaseReplicated(const String & name_, const String & metadata_path_, UUID uuid, DatabaseReplicated(const String & name_, const String & metadata_path_, UUID uuid,
const String & zookeeper_path_, const String & shard_name_, const String & replica_name_, const String & zookeeper_path_, const String & shard_name_, const String & replica_name_,
DatabaseReplicatedSettings db_settings_, DatabaseReplicatedSettings db_settings_,
@ -65,6 +67,7 @@ public:
/// Returns cluster consisting of database replicas /// Returns cluster consisting of database replicas
ClusterPtr tryGetCluster() const; ClusterPtr tryGetCluster() const;
ClusterPtr tryGetAllGroupsCluster() const;
void drop(ContextPtr /*context*/) override; void drop(ContextPtr /*context*/) override;
@ -113,8 +116,8 @@ private:
ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query); ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query);
String readMetadataFile(const String & table_name) const; String readMetadataFile(const String & table_name) const;
ClusterPtr getClusterImpl() const; ClusterPtr getClusterImpl(bool all_groups = false) const;
void setCluster(ClusterPtr && new_cluster); void setCluster(ClusterPtr && new_cluster, bool all_groups = false);
void createEmptyLogEntry(const ZooKeeperPtr & current_zookeeper); void createEmptyLogEntry(const ZooKeeperPtr & current_zookeeper);
@ -155,6 +158,7 @@ private:
UInt64 tables_metadata_digest TSA_GUARDED_BY(metadata_mutex); UInt64 tables_metadata_digest TSA_GUARDED_BY(metadata_mutex);
mutable ClusterPtr cluster; mutable ClusterPtr cluster;
mutable ClusterPtr cluster_all_groups;
LoadTaskPtr startup_replicated_database_task TSA_GUARDED_BY(mutex); LoadTaskPtr startup_replicated_database_task TSA_GUARDED_BY(mutex);
}; };

View File

@ -421,6 +421,8 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
{ {
/// Some replica is added or removed, let's update cached cluster /// Some replica is added or removed, let's update cached cluster
database->setCluster(database->getClusterImpl()); database->setCluster(database->getClusterImpl());
if (!database->replica_group_name.empty())
database->setCluster(database->getClusterImpl(/*all_groups*/ true), /*all_groups*/ true);
out_reason = fmt::format("Entry {} is a dummy task", entry_name); out_reason = fmt::format("Entry {} is a dummy task", entry_name);
return {}; return {};
} }

View File

@ -41,11 +41,11 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function" throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function"
" and doesn't have structure in metadata", backQuote(ast_create_query.getTable())); " and doesn't have structure in metadata", backQuote(ast_create_query.getTable()));
if (!has_structure && !ast_create_query.is_dictionary) if (!has_structure && !ast_create_query.is_dictionary && !ast_create_query.isParameterizedView())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot alter table {} metadata doesn't have structure", throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot alter table {} metadata doesn't have structure",
backQuote(ast_create_query.getTable())); backQuote(ast_create_query.getTable()));
if (!ast_create_query.is_dictionary) if (!ast_create_query.is_dictionary && !ast_create_query.isParameterizedView())
{ {
ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns); ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns);
ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices); ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices);

View File

@ -511,7 +511,10 @@ MutableColumns CacheDictionary<dictionary_key_type>::aggregateColumns(
if (default_mask) if (default_mask)
{ {
if (key_state_from_storage.isDefault()) if (key_state_from_storage.isDefault())
{
(*default_mask)[key_index] = 1; (*default_mask)[key_index] = 1;
aggregated_column->insertDefault();
}
else else
{ {
(*default_mask)[key_index] = 0; (*default_mask)[key_index] = 0;

View File

@ -22,7 +22,8 @@ namespace ProfileEvents
extern const Event AsynchronousReaderIgnoredBytes; extern const Event AsynchronousReaderIgnoredBytes;
extern const Event IOUringSQEsSubmitted; extern const Event IOUringSQEsSubmitted;
extern const Event IOUringSQEsResubmits; extern const Event IOUringSQEsResubmitsAsync;
extern const Event IOUringSQEsResubmitsSync;
extern const Event IOUringCQEsCompleted; extern const Event IOUringCQEsCompleted;
extern const Event IOUringCQEsFailed; extern const Event IOUringCQEsFailed;
} }
@ -149,10 +150,12 @@ int IOUringReader::submitToRing(EnqueuedRequest & enqueued)
io_uring_prep_read(sqe, fd, request.buf, static_cast<unsigned>(request.size - enqueued.bytes_read), request.offset + enqueued.bytes_read); io_uring_prep_read(sqe, fd, request.buf, static_cast<unsigned>(request.size - enqueued.bytes_read), request.offset + enqueued.bytes_read);
int ret = 0; int ret = 0;
do ret = io_uring_submit(&ring);
while (ret == -EINTR || ret == -EAGAIN)
{ {
ProfileEvents::increment(ProfileEvents::IOUringSQEsResubmitsSync);
ret = io_uring_submit(&ring); ret = io_uring_submit(&ring);
} while (ret == -EINTR || ret == -EAGAIN); }
if (ret > 0 && !enqueued.resubmitting) if (ret > 0 && !enqueued.resubmitting)
{ {
@ -266,7 +269,7 @@ void IOUringReader::monitorRing()
if (cqe->res == -EAGAIN || cqe->res == -EINTR) if (cqe->res == -EAGAIN || cqe->res == -EINTR)
{ {
enqueued.resubmitting = true; enqueued.resubmitting = true;
ProfileEvents::increment(ProfileEvents::IOUringSQEsResubmits); ProfileEvents::increment(ProfileEvents::IOUringSQEsResubmitsAsync);
ret = submitToRing(enqueued); ret = submitToRing(enqueued);
if (ret <= 0) if (ret <= 0)
@ -310,6 +313,7 @@ void IOUringReader::monitorRing()
// potential short read, re-submit // potential short read, re-submit
enqueued.resubmitting = true; enqueued.resubmitting = true;
enqueued.bytes_read += bytes_read; enqueued.bytes_read += bytes_read;
ProfileEvents::increment(ProfileEvents::IOUringSQEsResubmitsAsync);
ret = submitToRing(enqueued); ret = submitToRing(enqueued);
if (ret <= 0) if (ret <= 0)

View File

@ -384,6 +384,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
{ {
std::vector<Aws::S3::Model::ObjectIdentifier> current_chunk; std::vector<Aws::S3::Model::ObjectIdentifier> current_chunk;
String keys; String keys;
size_t first_position = current_position;
for (; current_position < objects.size() && current_chunk.size() < chunk_size_limit; ++current_position) for (; current_position < objects.size() && current_chunk.size() < chunk_size_limit; ++current_position)
{ {
Aws::S3::Model::ObjectIdentifier obj; Aws::S3::Model::ObjectIdentifier obj;
@ -409,9 +410,9 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
{ {
const auto * outcome_error = outcome.IsSuccess() ? nullptr : &outcome.GetError(); const auto * outcome_error = outcome.IsSuccess() ? nullptr : &outcome.GetError();
auto time_now = std::chrono::system_clock::now(); auto time_now = std::chrono::system_clock::now();
for (const auto & object : objects) for (size_t i = first_position; i < current_position; ++i)
blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete, blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete,
uri.bucket, object.remote_path, object.local_path, object.bytes_size, uri.bucket, objects[i].remote_path, objects[i].local_path, objects[i].bytes_size,
outcome_error, time_now); outcome_error, time_now);
} }

View File

@ -161,6 +161,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string; format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string;
format_settings.parquet.output_fixed_string_as_fixed_byte_array = settings.output_format_parquet_fixed_string_as_fixed_byte_array; format_settings.parquet.output_fixed_string_as_fixed_byte_array = settings.output_format_parquet_fixed_string_as_fixed_byte_array;
format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size; format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size;
format_settings.parquet.prefer_block_bytes = settings.input_format_parquet_prefer_block_bytes;
format_settings.parquet.output_compression_method = settings.output_format_parquet_compression_method; format_settings.parquet.output_compression_method = settings.output_format_parquet_compression_method;
format_settings.parquet.output_compliant_nested_types = settings.output_format_parquet_compliant_nested_types; format_settings.parquet.output_compliant_nested_types = settings.output_format_parquet_compliant_nested_types;
format_settings.parquet.use_custom_encoder = settings.output_format_parquet_use_custom_encoder; format_settings.parquet.use_custom_encoder = settings.output_format_parquet_use_custom_encoder;

View File

@ -265,7 +265,8 @@ struct FormatSettings
bool preserve_order = false; bool preserve_order = false;
bool use_custom_encoder = true; bool use_custom_encoder = true;
bool parallel_encoding = true; bool parallel_encoding = true;
UInt64 max_block_size = 8192; UInt64 max_block_size = DEFAULT_BLOCK_SIZE;
size_t prefer_block_bytes = DEFAULT_BLOCK_SIZE * 256;
ParquetVersion output_version; ParquetVersion output_version;
ParquetCompression output_compression_method = ParquetCompression::SNAPPY; ParquetCompression output_compression_method = ParquetCompression::SNAPPY;
bool output_compliant_nested_types = true; bool output_compliant_nested_types = true;

View File

@ -879,11 +879,11 @@ namespace
} }
template <bool is_json> template <bool is_json>
bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings) bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings, bool & has_fractional)
{ {
if (is_json || settings.try_infer_exponent_floats) if (is_json || settings.try_infer_exponent_floats)
return tryReadFloatText(value, buf); return tryReadFloatTextExt(value, buf, has_fractional);
return tryReadFloatTextNoExponent(value, buf); return tryReadFloatTextExtNoExponent(value, buf, has_fractional);
} }
template <bool is_json> template <bool is_json>
@ -893,46 +893,31 @@ namespace
return nullptr; return nullptr;
Float64 tmp_float; Float64 tmp_float;
bool has_fractional;
if (settings.try_infer_integers) if (settings.try_infer_integers)
{ {
/// If we read from String, we can do it in a more efficient way. /// If we read from String, we can do it in a more efficient way.
if (auto * string_buf = dynamic_cast<ReadBufferFromString *>(&buf)) if (auto * string_buf = dynamic_cast<ReadBufferFromString *>(&buf))
{ {
/// Remember the pointer to the start of the number to rollback to it. /// Remember the pointer to the start of the number to rollback to it.
char * number_start = buf.position();
Int64 tmp_int;
bool read_int = tryReadIntText(tmp_int, buf);
/// If we reached eof, it cannot be float (it requires no less data than integer)
if (buf.eof())
return read_int ? std::make_shared<DataTypeInt64>() : nullptr;
char * int_end = buf.position();
/// We can safely get back to the start of the number, because we read from a string and we didn't reach eof. /// We can safely get back to the start of the number, because we read from a string and we didn't reach eof.
buf.position() = number_start; char * number_start = buf.position();
bool read_uint = false; /// NOTE: it may break parsing of tryReadFloat() != tryReadIntText() + parsing of '.'/'e'
char * uint_end = nullptr; /// But, for now it is true
/// In case of Int64 overflow we can try to infer UInt64. if (tryReadFloat<is_json>(tmp_float, buf, settings, has_fractional) && has_fractional)
if (!read_int)
{
UInt64 tmp_uint;
read_uint = tryReadIntText(tmp_uint, buf);
/// If we reached eof, it cannot be float (it requires no less data than integer)
if (buf.eof())
return read_uint ? std::make_shared<DataTypeUInt64>() : nullptr;
uint_end = buf.position();
buf.position() = number_start;
}
if (tryReadFloat<is_json>(tmp_float, buf, settings))
{
if (read_int && buf.position() == int_end)
return std::make_shared<DataTypeInt64>();
if (read_uint && buf.position() == uint_end)
return std::make_shared<DataTypeUInt64>();
return std::make_shared<DataTypeFloat64>(); return std::make_shared<DataTypeFloat64>();
}
Int64 tmp_int;
buf.position() = number_start;
if (tryReadIntText(tmp_int, buf))
return std::make_shared<DataTypeInt64>();
/// In case of Int64 overflow we can try to infer UInt64.
UInt64 tmp_uint;
buf.position() = number_start;
if (tryReadIntText(tmp_uint, buf))
return std::make_shared<DataTypeUInt64>();
return nullptr; return nullptr;
} }
@ -942,36 +927,22 @@ namespace
/// and then as float. /// and then as float.
PeekableReadBuffer peekable_buf(buf); PeekableReadBuffer peekable_buf(buf);
PeekableReadBufferCheckpoint checkpoint(peekable_buf); PeekableReadBufferCheckpoint checkpoint(peekable_buf);
Int64 tmp_int;
bool read_int = tryReadIntText(tmp_int, peekable_buf);
auto * int_end = peekable_buf.position();
peekable_buf.rollbackToCheckpoint(true);
bool read_uint = false; if (tryReadFloat<is_json>(tmp_float, peekable_buf, settings, has_fractional) && has_fractional)
char * uint_end = nullptr;
/// In case of Int64 overflow we can try to infer UInt64.
if (!read_int)
{
PeekableReadBufferCheckpoint new_checkpoint(peekable_buf);
UInt64 tmp_uint;
read_uint = tryReadIntText(tmp_uint, peekable_buf);
uint_end = peekable_buf.position();
peekable_buf.rollbackToCheckpoint(true);
}
if (tryReadFloat<is_json>(tmp_float, peekable_buf, settings))
{
/// Float parsing reads no fewer bytes than integer parsing,
/// so position of the buffer is either the same, or further.
/// If it's the same, then it's integer.
if (read_int && peekable_buf.position() == int_end)
return std::make_shared<DataTypeInt64>();
if (read_uint && peekable_buf.position() == uint_end)
return std::make_shared<DataTypeUInt64>();
return std::make_shared<DataTypeFloat64>(); return std::make_shared<DataTypeFloat64>();
} peekable_buf.rollbackToCheckpoint(/* drop= */ false);
Int64 tmp_int;
if (tryReadIntText(tmp_int, peekable_buf))
return std::make_shared<DataTypeInt64>();
peekable_buf.rollbackToCheckpoint(/* drop= */ true);
/// In case of Int64 overflow we can try to infer UInt64.
UInt64 tmp_uint;
if (tryReadIntText(tmp_uint, peekable_buf))
return std::make_shared<DataTypeUInt64>();
} }
else if (tryReadFloat<is_json>(tmp_float, buf, settings)) else if (tryReadFloat<is_json>(tmp_float, buf, settings, has_fractional))
{ {
return std::make_shared<DataTypeFloat64>(); return std::make_shared<DataTypeFloat64>();
} }
@ -1004,7 +975,8 @@ namespace
buf.position() = buf.buffer().begin(); buf.position() = buf.buffer().begin();
Float64 tmp; Float64 tmp;
if (tryReadFloat<is_json>(tmp, buf, settings) && buf.eof()) bool has_fractional;
if (tryReadFloat<is_json>(tmp, buf, settings, has_fractional) && buf.eof())
return std::make_shared<DataTypeFloat64>(); return std::make_shared<DataTypeFloat64>();
return nullptr; return nullptr;

View File

@ -111,7 +111,7 @@ public:
argument_types.push_back(argument.type); argument_types.push_back(argument.type);
/// More efficient specialization for two numeric arguments. /// More efficient specialization for two numeric arguments.
if (arguments.size() == 2 && isNumber(arguments[0].type) && isNumber(arguments[1].type)) if (arguments.size() == 2 && isNumber(removeNullable(arguments[0].type)) && isNumber(removeNullable(arguments[1].type)))
return std::make_unique<FunctionToFunctionBaseAdaptor>(SpecializedFunction::create(context), argument_types, return_type); return std::make_unique<FunctionToFunctionBaseAdaptor>(SpecializedFunction::create(context), argument_types, return_type);
return std::make_unique<FunctionToFunctionBaseAdaptor>( return std::make_unique<FunctionToFunctionBaseAdaptor>(
@ -123,7 +123,7 @@ public:
if (types.empty()) if (types.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} cannot be called without arguments", getName()); throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} cannot be called without arguments", getName());
if (types.size() == 2 && isNumber(types[0]) && isNumber(types[1])) if (types.size() == 2 && isNumber(removeNullable(types[0])) && isNumber(removeNullable(types[1])))
return SpecializedFunction::create(context)->getReturnTypeImpl(types); return SpecializedFunction::create(context)->getReturnTypeImpl(types);
return getLeastSupertype(types); return getLeastSupertype(types);

View File

@ -29,6 +29,18 @@ public:
return name; return name;
} }
ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
{
const ColumnWithTypeAndName & elem = arguments[0];
if (elem.type->onlyNull())
return result_type->createColumnConst(1, UInt8(0));
if (canContainNull(*elem.type))
return nullptr;
return result_type->createColumnConst(1, UInt8(1));
}
size_t getNumberOfArguments() const override { return 1; } size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForConstants() const override { return true; }

View File

@ -31,6 +31,18 @@ public:
return name; return name;
} }
ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
{
const ColumnWithTypeAndName & elem = arguments[0];
if (elem.type->onlyNull())
return result_type->createColumnConst(1, UInt8(1));
if (canContainNull(*elem.type))
return nullptr;
return result_type->createColumnConst(1, UInt8(0));
}
size_t getNumberOfArguments() const override { return 1; } size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }

View File

@ -2,6 +2,7 @@
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypesNumber.h> #include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h> #include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeNullable.h>
namespace DB namespace DB
{ {
@ -23,6 +24,15 @@ public:
return name; return name;
} }
ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
{
const ColumnWithTypeAndName & elem = arguments[0];
if (elem.type->onlyNull() || canContainNull(*elem.type))
return result_type->createColumnConst(1, UInt8(1));
return result_type->createColumnConst(1, UInt8(0));
}
bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; } bool useDefaultImplementationForNothing() const override { return false; }

View File

@ -320,11 +320,13 @@ static inline void readUIntTextUpToNSignificantDigits(T & x, ReadBuffer & buf)
template <typename T, typename ReturnType, bool allow_exponent = true> template <typename T, typename ReturnType, bool allow_exponent = true>
ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in, bool & has_fractional)
{ {
static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextImpl must be float or double"); static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextImpl must be float or double");
static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII"); static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII");
has_fractional = false;
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>; static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
bool negative = false; bool negative = false;
@ -377,6 +379,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
if (checkChar('.', in)) if (checkChar('.', in))
{ {
has_fractional = true;
auto after_point_count = in.count(); auto after_point_count = in.count();
while (!in.eof() && *in.position() == '0') while (!in.eof() && *in.position() == '0')
@ -394,6 +397,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
{ {
if (checkChar('e', in) || checkChar('E', in)) if (checkChar('e', in) || checkChar('E', in))
{ {
has_fractional = true;
if (in.eof()) if (in.eof())
{ {
if constexpr (throw_exception) if constexpr (throw_exception)
@ -420,10 +424,14 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
} }
if (after_point) if (after_point)
{
x += static_cast<T>(shift10(after_point, after_point_exponent)); x += static_cast<T>(shift10(after_point, after_point_exponent));
}
if (exponent) if (exponent)
{
x = static_cast<T>(shift10(x, exponent)); x = static_cast<T>(shift10(x, exponent));
}
if (negative) if (negative)
x = -x; x = -x;
@ -590,8 +598,16 @@ ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf)
template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextPreciseImpl<T, void>(x, in); } template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextPreciseImpl<T, void>(x, in); }
template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return readFloatTextPreciseImpl<T, bool>(x, in); } template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return readFloatTextPreciseImpl<T, bool>(x, in); }
template <typename T> void readFloatTextFast(T & x, ReadBuffer & in) { readFloatTextFastImpl<T, void>(x, in); } template <typename T> void readFloatTextFast(T & x, ReadBuffer & in)
template <typename T> bool tryReadFloatTextFast(T & x, ReadBuffer & in) { return readFloatTextFastImpl<T, bool>(x, in); } {
bool has_fractional;
readFloatTextFastImpl<T, void>(x, in, has_fractional);
}
template <typename T> bool tryReadFloatTextFast(T & x, ReadBuffer & in)
{
bool has_fractional;
return readFloatTextFastImpl<T, bool>(x, in, has_fractional);
}
template <typename T> void readFloatTextSimple(T & x, ReadBuffer & in) { readFloatTextSimpleImpl<T, void>(x, in); } template <typename T> void readFloatTextSimple(T & x, ReadBuffer & in) { readFloatTextSimpleImpl<T, void>(x, in); }
template <typename T> bool tryReadFloatTextSimple(T & x, ReadBuffer & in) { return readFloatTextSimpleImpl<T, bool>(x, in); } template <typename T> bool tryReadFloatTextSimple(T & x, ReadBuffer & in) { return readFloatTextSimpleImpl<T, bool>(x, in); }
@ -603,6 +619,21 @@ template <typename T> void readFloatText(T & x, ReadBuffer & in) { readFloatText
template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextFast(x, in); } template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextFast(x, in); }
/// Don't read exponent part of the number. /// Don't read exponent part of the number.
template <typename T> bool tryReadFloatTextNoExponent(T & x, ReadBuffer & in) { return readFloatTextFastImpl<T, bool, false>(x, in); } template <typename T> bool tryReadFloatTextNoExponent(T & x, ReadBuffer & in)
{
bool has_fractional;
return readFloatTextFastImpl<T, bool, false>(x, in, has_fractional);
}
/// With a @has_fractional flag
/// Used for input_format_try_infer_integers
template <typename T> bool tryReadFloatTextExt(T & x, ReadBuffer & in, bool & has_fractional)
{
return readFloatTextFastImpl<T, bool>(x, in, has_fractional);
}
template <typename T> bool tryReadFloatTextExtNoExponent(T & x, ReadBuffer & in, bool & has_fractional)
{
return readFloatTextFastImpl<T, bool, false>(x, in, has_fractional);
}
} }

View File

@ -22,7 +22,9 @@
#include <Storages/SelectQueryInfo.h> #include <Storages/SelectQueryInfo.h>
#include <Storages/StorageReplicatedMergeTree.h> #include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/Distributed/DistributedSettings.h> #include <Storages/Distributed/DistributedSettings.h>
#include <Storages/buildQueryTreeForShard.h>
#include <Planner/Utils.h>
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
namespace DB namespace DB
{ {
@ -505,6 +507,41 @@ void executeQueryWithParallelReplicas(
query_plan.addStep(std::move(read_from_remote)); query_plan.addStep(std::move(read_from_remote));
} }
void executeQueryWithParallelReplicas(
QueryPlan & query_plan,
const StorageID & storage_id,
QueryProcessingStage::Enum processed_stage,
const QueryTreeNodePtr & query_tree,
const PlannerContextPtr & planner_context,
ContextPtr context,
std::shared_ptr<const StorageLimitsList> storage_limits)
{
QueryTreeNodePtr modified_query_tree = query_tree->clone();
rewriteJoinToGlobalJoin(modified_query_tree, context);
modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree);
auto header
= InterpreterSelectQueryAnalyzer::getSampleBlock(modified_query_tree, context, SelectQueryOptions(processed_stage).analyze());
auto modified_query_ast = queryNodeToDistributedSelectQuery(modified_query_tree);
executeQueryWithParallelReplicas(query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits);
}
void executeQueryWithParallelReplicas(
QueryPlan & query_plan,
const StorageID & storage_id,
QueryProcessingStage::Enum processed_stage,
const ASTPtr & query_ast,
ContextPtr context,
std::shared_ptr<const StorageLimitsList> storage_limits)
{
auto modified_query_ast = ClusterProxy::rewriteSelectQuery(
context, query_ast, storage_id.database_name, storage_id.table_name, /*remote_table_function_ptr*/ nullptr);
auto header = InterpreterSelectQuery(modified_query_ast, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
executeQueryWithParallelReplicas(query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits);
}
} }
} }

View File

@ -24,6 +24,12 @@ struct StorageID;
struct StorageLimits; struct StorageLimits;
using StorageLimitsList = std::list<StorageLimits>; using StorageLimitsList = std::list<StorageLimits>;
class IQueryTreeNode;
using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
class PlannerContext;
using PlannerContextPtr = std::shared_ptr<PlannerContext>;
namespace ClusterProxy namespace ClusterProxy
{ {
@ -60,7 +66,6 @@ void executeQuery(
AdditionalShardFilterGenerator shard_filter_generator, AdditionalShardFilterGenerator shard_filter_generator,
bool is_remote_function); bool is_remote_function);
void executeQueryWithParallelReplicas( void executeQueryWithParallelReplicas(
QueryPlan & query_plan, QueryPlan & query_plan,
const StorageID & storage_id, const StorageID & storage_id,
@ -69,6 +74,23 @@ void executeQueryWithParallelReplicas(
const ASTPtr & query_ast, const ASTPtr & query_ast,
ContextPtr context, ContextPtr context,
std::shared_ptr<const StorageLimitsList> storage_limits); std::shared_ptr<const StorageLimitsList> storage_limits);
void executeQueryWithParallelReplicas(
QueryPlan & query_plan,
const StorageID & storage_id,
QueryProcessingStage::Enum processed_stage,
const ASTPtr & query_ast,
ContextPtr context,
std::shared_ptr<const StorageLimitsList> storage_limits);
void executeQueryWithParallelReplicas(
QueryPlan & query_plan,
const StorageID & storage_id,
QueryProcessingStage::Enum processed_stage,
const QueryTreeNodePtr & query_tree,
const PlannerContextPtr & planner_context,
ContextPtr context,
std::shared_ptr<const StorageLimitsList> storage_limits);
} }
} }

View File

@ -18,6 +18,7 @@
#include <Common/callOnce.h> #include <Common/callOnce.h>
#include <Common/SharedLockGuard.h> #include <Common/SharedLockGuard.h>
#include <Common/PageCache.h> #include <Common/PageCache.h>
#include <Common/NamedCollections/NamedCollectionsFactory.h>
#include <Coordination/KeeperDispatcher.h> #include <Coordination/KeeperDispatcher.h>
#include <Core/BackgroundSchedulePool.h> #include <Core/BackgroundSchedulePool.h>
#include <Formats/FormatFactory.h> #include <Formats/FormatFactory.h>
@ -610,6 +611,8 @@ struct ContextSharedPart : boost::noncopyable
LOG_TRACE(log, "Shutting down database catalog"); LOG_TRACE(log, "Shutting down database catalog");
DatabaseCatalog::shutdown(); DatabaseCatalog::shutdown();
NamedCollectionFactory::instance().shutdown();
delete_async_insert_queue.reset(); delete_async_insert_queue.reset();
SHUTDOWN(log, "merges executor", merge_mutate_executor, wait()); SHUTDOWN(log, "merges executor", merge_mutate_executor, wait());

View File

@ -568,8 +568,21 @@ void ZooKeeperMetadataTransaction::commit()
ClusterPtr tryGetReplicatedDatabaseCluster(const String & cluster_name) ClusterPtr tryGetReplicatedDatabaseCluster(const String & cluster_name)
{ {
if (const auto * replicated_db = dynamic_cast<const DatabaseReplicated *>(DatabaseCatalog::instance().tryGetDatabase(cluster_name).get())) String name = cluster_name;
return replicated_db->tryGetCluster(); bool all_groups = false;
if (name.starts_with(DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX))
{
name = name.substr(strlen(DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX));
all_groups = true;
}
if (const auto * replicated_db = dynamic_cast<const DatabaseReplicated *>(DatabaseCatalog::instance().tryGetDatabase(name).get()))
{
if (all_groups)
return replicated_db->tryGetAllGroupsCluster();
else
return replicated_db->tryGetCluster();
}
return {}; return {};
} }

View File

@ -4,7 +4,7 @@
#include <Access/ContextAccess.h> #include <Access/ContextAccess.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h> #include <Interpreters/executeDDLQueryOnCluster.h>
#include <Common/NamedCollections/NamedCollectionUtils.h> #include <Common/NamedCollections/NamedCollectionsFactory.h>
namespace DB namespace DB
@ -23,7 +23,7 @@ BlockIO InterpreterAlterNamedCollectionQuery::execute()
return executeDDLQueryOnCluster(query_ptr, current_context, params); return executeDDLQueryOnCluster(query_ptr, current_context, params);
} }
NamedCollectionUtils::updateFromSQL(query, current_context); NamedCollectionFactory::instance().updateFromSQL(query);
return {}; return {};
} }

View File

@ -4,7 +4,7 @@
#include <Access/ContextAccess.h> #include <Access/ContextAccess.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h> #include <Interpreters/executeDDLQueryOnCluster.h>
#include <Common/NamedCollections/NamedCollectionUtils.h> #include <Common/NamedCollections/NamedCollectionsFactory.h>
namespace DB namespace DB
@ -23,7 +23,7 @@ BlockIO InterpreterCreateNamedCollectionQuery::execute()
return executeDDLQueryOnCluster(query_ptr, current_context, params); return executeDDLQueryOnCluster(query_ptr, current_context, params);
} }
NamedCollectionUtils::createFromSQL(query, current_context); NamedCollectionFactory::instance().createFromSQL(query);
return {}; return {};
} }

Some files were not shown because too many files have changed in this diff Show More