mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 09:32:01 +00:00
Merge branch 'master' into Fix_parameterized_view_date
This commit is contained in:
commit
b426924235
@ -37,7 +37,6 @@ Checks: [
|
||||
'-cert-oop54-cpp',
|
||||
'-cert-oop57-cpp',
|
||||
|
||||
'-clang-analyzer-optin.core.EnumCastOutOfRange', # https://github.com/abseil/abseil-cpp/issues/1667
|
||||
'-clang-analyzer-optin.performance.Padding',
|
||||
|
||||
'-clang-analyzer-unix.Malloc',
|
||||
|
8
.gitmodules
vendored
8
.gitmodules
vendored
@ -91,13 +91,13 @@
|
||||
[submodule "contrib/aws"]
|
||||
path = contrib/aws
|
||||
url = https://github.com/ClickHouse/aws-sdk-cpp
|
||||
[submodule "aws-c-event-stream"]
|
||||
[submodule "contrib/aws-c-event-stream"]
|
||||
path = contrib/aws-c-event-stream
|
||||
url = https://github.com/awslabs/aws-c-event-stream
|
||||
[submodule "aws-c-common"]
|
||||
[submodule "contrib/aws-c-common"]
|
||||
path = contrib/aws-c-common
|
||||
url = https://github.com/awslabs/aws-c-common.git
|
||||
[submodule "aws-checksums"]
|
||||
[submodule "contrib/aws-checksums"]
|
||||
path = contrib/aws-checksums
|
||||
url = https://github.com/awslabs/aws-checksums
|
||||
[submodule "contrib/curl"]
|
||||
@ -163,7 +163,7 @@
|
||||
url = https://github.com/xz-mirror/xz
|
||||
[submodule "contrib/abseil-cpp"]
|
||||
path = contrib/abseil-cpp
|
||||
url = https://github.com/abseil/abseil-cpp
|
||||
url = https://github.com/ClickHouse/abseil-cpp.git
|
||||
[submodule "contrib/dragonbox"]
|
||||
path = contrib/dragonbox
|
||||
url = https://github.com/ClickHouse/dragonbox
|
||||
|
2
contrib/abseil-cpp
vendored
2
contrib/abseil-cpp
vendored
@ -1 +1 @@
|
||||
Subproject commit 3bd86026c93da5a40006fd53403dff9d5f5e30e3
|
||||
Subproject commit a3c4dd3e77f28b526efbb0eb394b72e29c633936
|
@ -1,6 +1,8 @@
|
||||
set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
|
||||
set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}")
|
||||
|
||||
# This is a minimized version of the function definition in CMake/AbseilHelpers.cmake
|
||||
|
||||
#
|
||||
# Copyright 2017 The Abseil Authors.
|
||||
#
|
||||
@ -16,7 +18,6 @@ set(ABSL_COMMON_INCLUDE_DIRS "${ABSL_ROOT_DIR}")
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
function(absl_cc_library)
|
||||
cmake_parse_arguments(ABSL_CC_LIB
|
||||
"DISABLE_INSTALL;PUBLIC;TESTONLY"
|
||||
@ -76,6 +77,12 @@ function(absl_cc_library)
|
||||
add_library(absl::${ABSL_CC_LIB_NAME} ALIAS ${_NAME})
|
||||
endfunction()
|
||||
|
||||
# The following definitions are an amalgamation of the CMakeLists.txt files in absl/*/
|
||||
# To refresh them when upgrading to a new version:
|
||||
# - copy them over from upstream
|
||||
# - remove calls of 'absl_cc_test'
|
||||
# - remove calls of `absl_cc_library` that contain `TESTONLY`
|
||||
# - append '${DIR}' to the file definitions
|
||||
|
||||
set(DIR ${ABSL_ROOT_DIR}/absl/algorithm)
|
||||
|
||||
@ -102,12 +109,12 @@ absl_cc_library(
|
||||
absl::algorithm
|
||||
absl::core_headers
|
||||
absl::meta
|
||||
absl::nullability
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
set(DIR ${ABSL_ROOT_DIR}/absl/base)
|
||||
|
||||
# Internal-only target, do not depend on directly.
|
||||
absl_cc_library(
|
||||
NAME
|
||||
atomic_hook
|
||||
@ -146,6 +153,18 @@ absl_cc_library(
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
)
|
||||
|
||||
absl_cc_library(
|
||||
NAME
|
||||
no_destructor
|
||||
HDRS
|
||||
"${DIR}/no_destructor.h"
|
||||
DEPS
|
||||
absl::config
|
||||
absl::nullability
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
)
|
||||
|
||||
absl_cc_library(
|
||||
NAME
|
||||
nullability
|
||||
@ -305,6 +324,8 @@ absl_cc_library(
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
LINKOPTS
|
||||
${ABSL_DEFAULT_LINKOPTS}
|
||||
$<$<BOOL:${LIBRT}>:-lrt>
|
||||
$<$<BOOL:${MINGW}>:-ladvapi32>
|
||||
DEPS
|
||||
absl::atomic_hook
|
||||
absl::base_internal
|
||||
@ -312,6 +333,7 @@ absl_cc_library(
|
||||
absl::core_headers
|
||||
absl::dynamic_annotations
|
||||
absl::log_severity
|
||||
absl::nullability
|
||||
absl::raw_logging_internal
|
||||
absl::spinlock_wait
|
||||
absl::type_traits
|
||||
@ -357,6 +379,7 @@ absl_cc_library(
|
||||
absl::base
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::nullability
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
@ -467,10 +490,11 @@ absl_cc_library(
|
||||
LINKOPTS
|
||||
${ABSL_DEFAULT_LINKOPTS}
|
||||
DEPS
|
||||
absl::container_common
|
||||
absl::common_policy_traits
|
||||
absl::compare
|
||||
absl::compressed_tuple
|
||||
absl::config
|
||||
absl::container_common
|
||||
absl::container_memory
|
||||
absl::cord
|
||||
absl::core_headers
|
||||
@ -480,7 +504,6 @@ absl_cc_library(
|
||||
absl::strings
|
||||
absl::throw_delegate
|
||||
absl::type_traits
|
||||
absl::utility
|
||||
)
|
||||
|
||||
# Internal-only target, do not depend on directly.
|
||||
@ -523,7 +546,9 @@ absl_cc_library(
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
absl::base_internal
|
||||
absl::compressed_tuple
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::memory
|
||||
absl::span
|
||||
@ -548,18 +573,6 @@ absl_cc_library(
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
# Internal-only target, do not depend on directly.
|
||||
absl_cc_library(
|
||||
NAME
|
||||
counting_allocator
|
||||
HDRS
|
||||
"${DIR}/internal/counting_allocator.h"
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
absl::config
|
||||
)
|
||||
|
||||
absl_cc_library(
|
||||
NAME
|
||||
flat_hash_map
|
||||
@ -570,7 +583,7 @@ absl_cc_library(
|
||||
DEPS
|
||||
absl::container_memory
|
||||
absl::core_headers
|
||||
absl::hash_function_defaults
|
||||
absl::hash_container_defaults
|
||||
absl::raw_hash_map
|
||||
absl::algorithm_container
|
||||
absl::memory
|
||||
@ -586,7 +599,7 @@ absl_cc_library(
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
absl::container_memory
|
||||
absl::hash_function_defaults
|
||||
absl::hash_container_defaults
|
||||
absl::raw_hash_set
|
||||
absl::algorithm_container
|
||||
absl::core_headers
|
||||
@ -604,7 +617,7 @@ absl_cc_library(
|
||||
DEPS
|
||||
absl::container_memory
|
||||
absl::core_headers
|
||||
absl::hash_function_defaults
|
||||
absl::hash_container_defaults
|
||||
absl::node_slot_policy
|
||||
absl::raw_hash_map
|
||||
absl::algorithm_container
|
||||
@ -620,8 +633,9 @@ absl_cc_library(
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
absl::container_memory
|
||||
absl::core_headers
|
||||
absl::hash_function_defaults
|
||||
absl::hash_container_defaults
|
||||
absl::node_slot_policy
|
||||
absl::raw_hash_set
|
||||
absl::algorithm_container
|
||||
@ -629,6 +643,19 @@ absl_cc_library(
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
absl_cc_library(
|
||||
NAME
|
||||
hash_container_defaults
|
||||
HDRS
|
||||
"${DIR}/hash_container_defaults.h"
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
absl::config
|
||||
absl::hash_function_defaults
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
# Internal-only target, do not depend on directly.
|
||||
absl_cc_library(
|
||||
NAME
|
||||
@ -655,9 +682,11 @@ absl_cc_library(
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
absl::config
|
||||
absl::container_common
|
||||
absl::cord
|
||||
absl::hash
|
||||
absl::strings
|
||||
absl::type_traits
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
@ -703,6 +732,7 @@ absl_cc_library(
|
||||
absl::base
|
||||
absl::config
|
||||
absl::exponential_biased
|
||||
absl::no_destructor
|
||||
absl::raw_logging_internal
|
||||
absl::sample_recorder
|
||||
absl::synchronization
|
||||
@ -756,7 +786,9 @@ absl_cc_library(
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
absl::config
|
||||
absl::container_memory
|
||||
absl::core_headers
|
||||
absl::raw_hash_set
|
||||
absl::throw_delegate
|
||||
PUBLIC
|
||||
@ -817,6 +849,7 @@ absl_cc_library(
|
||||
DEPS
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::debugging_internal
|
||||
absl::meta
|
||||
absl::strings
|
||||
absl::span
|
||||
@ -931,6 +964,7 @@ absl_cc_library(
|
||||
absl::crc32c
|
||||
absl::config
|
||||
absl::strings
|
||||
absl::no_destructor
|
||||
)
|
||||
|
||||
set(DIR ${ABSL_ROOT_DIR}/absl/debugging)
|
||||
@ -954,6 +988,8 @@ absl_cc_library(
|
||||
"${DIR}/stacktrace.cc"
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
LINKOPTS
|
||||
$<$<BOOL:${EXECINFO_LIBRARY}>:${EXECINFO_LIBRARY}>
|
||||
DEPS
|
||||
absl::debugging_internal
|
||||
absl::config
|
||||
@ -980,6 +1016,7 @@ absl_cc_library(
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
LINKOPTS
|
||||
${ABSL_DEFAULT_LINKOPTS}
|
||||
$<$<BOOL:${MINGW}>:-ldbghelp>
|
||||
DEPS
|
||||
absl::debugging_internal
|
||||
absl::demangle_internal
|
||||
@ -1058,8 +1095,10 @@ absl_cc_library(
|
||||
demangle_internal
|
||||
HDRS
|
||||
"${DIR}/internal/demangle.h"
|
||||
"${DIR}/internal/demangle_rust.h"
|
||||
SRCS
|
||||
"${DIR}/internal/demangle.cc"
|
||||
"${DIR}/internal/demangle_rust.cc"
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
@ -1252,6 +1291,7 @@ absl_cc_library(
|
||||
absl::strings
|
||||
absl::synchronization
|
||||
absl::flat_hash_map
|
||||
absl::no_destructor
|
||||
)
|
||||
|
||||
# Internal-only target, do not depend on directly.
|
||||
@ -1283,12 +1323,9 @@ absl_cc_library(
|
||||
absl_cc_library(
|
||||
NAME
|
||||
flags
|
||||
SRCS
|
||||
"${DIR}/flag.cc"
|
||||
HDRS
|
||||
"${DIR}/declare.h"
|
||||
"${DIR}/flag.h"
|
||||
"${DIR}/internal/flag_msvc.inc"
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
LINKOPTS
|
||||
@ -1299,7 +1336,6 @@ absl_cc_library(
|
||||
absl::flags_config
|
||||
absl::flags_internal
|
||||
absl::flags_reflection
|
||||
absl::base
|
||||
absl::core_headers
|
||||
absl::strings
|
||||
)
|
||||
@ -1379,6 +1415,9 @@ absl_cc_library(
|
||||
absl::synchronization
|
||||
)
|
||||
|
||||
############################################################################
|
||||
# Unit tests in alphabetical order.
|
||||
|
||||
set(DIR ${ABSL_ROOT_DIR}/absl/functional)
|
||||
|
||||
absl_cc_library(
|
||||
@ -1431,6 +1470,18 @@ absl_cc_library(
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
absl_cc_library(
|
||||
NAME
|
||||
overload
|
||||
HDRS
|
||||
"${DIR}/overload.h"
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
absl::meta
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
set(DIR ${ABSL_ROOT_DIR}/absl/hash)
|
||||
|
||||
absl_cc_library(
|
||||
@ -1640,6 +1691,7 @@ absl_cc_library(
|
||||
absl::log_internal_conditions
|
||||
absl::log_internal_message
|
||||
absl::log_internal_strip
|
||||
absl::absl_vlog_is_on
|
||||
)
|
||||
|
||||
absl_cc_library(
|
||||
@ -1721,6 +1773,7 @@ absl_cc_library(
|
||||
absl::log_entry
|
||||
absl::log_severity
|
||||
absl::log_sink
|
||||
absl::no_destructor
|
||||
absl::raw_logging_internal
|
||||
absl::synchronization
|
||||
absl::span
|
||||
@ -1771,6 +1824,7 @@ absl_cc_library(
|
||||
LINKOPTS
|
||||
${ABSL_DEFAULT_LINKOPTS}
|
||||
DEPS
|
||||
absl::core_headers
|
||||
absl::log_internal_message
|
||||
absl::log_internal_nullstream
|
||||
absl::log_severity
|
||||
@ -1876,6 +1930,11 @@ absl_cc_library(
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
# Warning: Many linkers will strip the contents of this library because its
|
||||
# symbols are only used in a global constructor. A workaround is for clients
|
||||
# to link this using $<LINK_LIBRARY:WHOLE_ARCHIVE,absl::log_flags> instead of
|
||||
# the plain absl::log_flags.
|
||||
# TODO(b/320467376): Implement the equivalent of Bazel's alwayslink=True.
|
||||
absl_cc_library(
|
||||
NAME
|
||||
log_flags
|
||||
@ -1897,6 +1956,7 @@ absl_cc_library(
|
||||
absl::flags
|
||||
absl::flags_marshalling
|
||||
absl::strings
|
||||
absl::vlog_config_internal
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
@ -1919,6 +1979,7 @@ absl_cc_library(
|
||||
absl::log_severity
|
||||
absl::raw_logging_internal
|
||||
absl::strings
|
||||
absl::vlog_config_internal
|
||||
)
|
||||
|
||||
absl_cc_library(
|
||||
@ -1952,6 +2013,7 @@ absl_cc_library(
|
||||
${ABSL_DEFAULT_LINKOPTS}
|
||||
DEPS
|
||||
absl::log_internal_log_impl
|
||||
absl::vlog_is_on
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
@ -2064,21 +2126,75 @@ absl_cc_library(
|
||||
)
|
||||
|
||||
absl_cc_library(
|
||||
NAME
|
||||
log_internal_fnmatch
|
||||
SRCS
|
||||
"${DIR}/internal/fnmatch.cc"
|
||||
HDRS
|
||||
"${DIR}/internal/fnmatch.h"
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
LINKOPTS
|
||||
${ABSL_DEFAULT_LINKOPTS}
|
||||
DEPS
|
||||
absl::config
|
||||
absl::strings
|
||||
NAME
|
||||
vlog_config_internal
|
||||
SRCS
|
||||
"${DIR}/internal/vlog_config.cc"
|
||||
HDRS
|
||||
"${DIR}/internal/vlog_config.h"
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
LINKOPTS
|
||||
${ABSL_DEFAULT_LINKOPTS}
|
||||
DEPS
|
||||
absl::base
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::log_internal_fnmatch
|
||||
absl::memory
|
||||
absl::no_destructor
|
||||
absl::strings
|
||||
absl::synchronization
|
||||
absl::optional
|
||||
)
|
||||
|
||||
absl_cc_library(
|
||||
NAME
|
||||
absl_vlog_is_on
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
LINKOPTS
|
||||
${ABSL_DEFAULT_LINKOPTS}
|
||||
HDRS
|
||||
"${DIR}/absl_vlog_is_on.h"
|
||||
DEPS
|
||||
absl::vlog_config_internal
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::strings
|
||||
)
|
||||
|
||||
absl_cc_library(
|
||||
NAME
|
||||
vlog_is_on
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
LINKOPTS
|
||||
${ABSL_DEFAULT_LINKOPTS}
|
||||
HDRS
|
||||
"${DIR}/vlog_is_on.h"
|
||||
DEPS
|
||||
absl::absl_vlog_is_on
|
||||
)
|
||||
|
||||
absl_cc_library(
|
||||
NAME
|
||||
log_internal_fnmatch
|
||||
SRCS
|
||||
"${DIR}/internal/fnmatch.cc"
|
||||
HDRS
|
||||
"${DIR}/internal/fnmatch.h"
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
LINKOPTS
|
||||
${ABSL_DEFAULT_LINKOPTS}
|
||||
DEPS
|
||||
absl::config
|
||||
absl::strings
|
||||
)
|
||||
|
||||
# Test targets
|
||||
|
||||
set(DIR ${ABSL_ROOT_DIR}/absl/memory)
|
||||
|
||||
absl_cc_library(
|
||||
@ -2147,6 +2263,7 @@ absl_cc_library(
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
absl::compare
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::bits
|
||||
@ -2176,6 +2293,8 @@ absl_cc_library(
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
set(DIR ${ABSL_ROOT_DIR}/absl/profiling)
|
||||
|
||||
absl_cc_library(
|
||||
NAME
|
||||
sample_recorder
|
||||
@ -2188,8 +2307,6 @@ absl_cc_library(
|
||||
absl::synchronization
|
||||
)
|
||||
|
||||
set(DIR ${ABSL_ROOT_DIR}/absl/profiling)
|
||||
|
||||
absl_cc_library(
|
||||
NAME
|
||||
exponential_biased
|
||||
@ -2265,6 +2382,7 @@ absl_cc_library(
|
||||
LINKOPTS
|
||||
${ABSL_DEFAULT_LINKOPTS}
|
||||
DEPS
|
||||
absl::config
|
||||
absl::fast_type_id
|
||||
absl::optional
|
||||
)
|
||||
@ -2336,11 +2454,13 @@ absl_cc_library(
|
||||
DEPS
|
||||
absl::config
|
||||
absl::inlined_vector
|
||||
absl::nullability
|
||||
absl::random_internal_pool_urbg
|
||||
absl::random_internal_salted_seed_seq
|
||||
absl::random_internal_seed_material
|
||||
absl::random_seed_gen_exception
|
||||
absl::span
|
||||
absl::string_view
|
||||
)
|
||||
|
||||
# Internal-only target, do not depend on directly.
|
||||
@ -2399,6 +2519,7 @@ absl_cc_library(
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
LINKOPTS
|
||||
${ABSL_DEFAULT_LINKOPTS}
|
||||
$<$<BOOL:${MINGW}>:-lbcrypt>
|
||||
DEPS
|
||||
absl::core_headers
|
||||
absl::optional
|
||||
@ -2658,6 +2779,29 @@ absl_cc_library(
|
||||
absl::config
|
||||
)
|
||||
|
||||
# Internal-only target, do not depend on directly.
|
||||
absl_cc_library(
|
||||
NAME
|
||||
random_internal_distribution_test_util
|
||||
SRCS
|
||||
"${DIR}/internal/chi_square.cc"
|
||||
"${DIR}/internal/distribution_test_util.cc"
|
||||
HDRS
|
||||
"${DIR}/internal/chi_square.h"
|
||||
"${DIR}/internal/distribution_test_util.h"
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
LINKOPTS
|
||||
${ABSL_DEFAULT_LINKOPTS}
|
||||
DEPS
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::raw_logging_internal
|
||||
absl::strings
|
||||
absl::str_format
|
||||
absl::span
|
||||
)
|
||||
|
||||
# Internal-only target, do not depend on directly.
|
||||
absl_cc_library(
|
||||
NAME
|
||||
@ -2699,6 +2843,8 @@ absl_cc_library(
|
||||
absl::function_ref
|
||||
absl::inlined_vector
|
||||
absl::memory
|
||||
absl::no_destructor
|
||||
absl::nullability
|
||||
absl::optional
|
||||
absl::raw_logging_internal
|
||||
absl::span
|
||||
@ -2724,8 +2870,11 @@ absl_cc_library(
|
||||
absl::base
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::has_ostream_operator
|
||||
absl::nullability
|
||||
absl::raw_logging_internal
|
||||
absl::status
|
||||
absl::str_format
|
||||
absl::strings
|
||||
absl::type_traits
|
||||
absl::utility
|
||||
@ -2748,6 +2897,7 @@ absl_cc_library(
|
||||
absl::base
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::nullability
|
||||
absl::throw_delegate
|
||||
PUBLIC
|
||||
)
|
||||
@ -2762,6 +2912,7 @@ absl_cc_library(
|
||||
"${DIR}/has_absl_stringify.h"
|
||||
"${DIR}/internal/damerau_levenshtein_distance.h"
|
||||
"${DIR}/internal/string_constant.h"
|
||||
"${DIR}/internal/has_absl_stringify.h"
|
||||
"${DIR}/match.h"
|
||||
"${DIR}/numbers.h"
|
||||
"${DIR}/str_cat.h"
|
||||
@ -2805,6 +2956,7 @@ absl_cc_library(
|
||||
absl::endian
|
||||
absl::int128
|
||||
absl::memory
|
||||
absl::nullability
|
||||
absl::raw_logging_internal
|
||||
absl::throw_delegate
|
||||
absl::type_traits
|
||||
@ -2824,6 +2976,18 @@ absl_cc_library(
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
absl_cc_library(
|
||||
NAME
|
||||
has_ostream_operator
|
||||
HDRS
|
||||
"${DIR}/has_ostream_operator.h"
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
absl::config
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
# Internal-only target, do not depend on directly.
|
||||
absl_cc_library(
|
||||
NAME
|
||||
@ -2855,7 +3019,12 @@ absl_cc_library(
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::nullability
|
||||
absl::span
|
||||
absl::str_format_internal
|
||||
absl::string_view
|
||||
PUBLIC
|
||||
)
|
||||
|
||||
@ -2886,6 +3055,7 @@ absl_cc_library(
|
||||
absl::strings
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::fixed_array
|
||||
absl::inlined_vector
|
||||
absl::numeric_representation
|
||||
absl::type_traits
|
||||
@ -2989,6 +3159,7 @@ absl_cc_library(
|
||||
DEPS
|
||||
absl::base
|
||||
absl::config
|
||||
absl::no_destructor
|
||||
absl::raw_logging_internal
|
||||
absl::synchronization
|
||||
)
|
||||
@ -3079,6 +3250,7 @@ absl_cc_library(
|
||||
absl::endian
|
||||
absl::function_ref
|
||||
absl::inlined_vector
|
||||
absl::nullability
|
||||
absl::optional
|
||||
absl::raw_logging_internal
|
||||
absl::span
|
||||
@ -3246,6 +3418,8 @@ absl_cc_library(
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
Threads::Threads
|
||||
# TODO(#1495): Use $<LINK_LIBRARY:FRAMEWORK,CoreFoundation> once our
|
||||
# minimum CMake version >= 3.24
|
||||
$<$<PLATFORM_ID:Darwin>:-Wl,-framework,CoreFoundation>
|
||||
)
|
||||
|
||||
@ -3286,8 +3460,8 @@ absl_cc_library(
|
||||
NAME
|
||||
bad_any_cast_impl
|
||||
SRCS
|
||||
"${DIR}/bad_any_cast.h"
|
||||
"${DIR}/bad_any_cast.cc"
|
||||
"${DIR}/bad_any_cast.h"
|
||||
"${DIR}/bad_any_cast.cc"
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
@ -3307,6 +3481,7 @@ absl_cc_library(
|
||||
DEPS
|
||||
absl::algorithm
|
||||
absl::core_headers
|
||||
absl::nullability
|
||||
absl::throw_delegate
|
||||
absl::type_traits
|
||||
PUBLIC
|
||||
@ -3327,6 +3502,7 @@ absl_cc_library(
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::memory
|
||||
absl::nullability
|
||||
absl::type_traits
|
||||
absl::utility
|
||||
PUBLIC
|
||||
@ -3389,6 +3565,7 @@ absl_cc_library(
|
||||
COPTS
|
||||
${ABSL_DEFAULT_COPTS}
|
||||
DEPS
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::type_traits
|
||||
PUBLIC
|
||||
|
2
contrib/googletest
vendored
2
contrib/googletest
vendored
@ -1 +1 @@
|
||||
Subproject commit e47544ad31cb3ceecd04cc13e8fe556f8df9fe0b
|
||||
Subproject commit a7f443b80b105f940225332ed3c31f2790092f47
|
2
contrib/openssl
vendored
2
contrib/openssl
vendored
@ -1 +1 @@
|
||||
Subproject commit f7b8721dfc66abb147f24ca07b9c9d1d64f40f71
|
||||
Subproject commit 67c0b63e578e4c751ac9edf490f5a96124fff8dc
|
@ -41,8 +41,7 @@
|
||||
"docker/test/stateless": {
|
||||
"name": "clickhouse/stateless-test",
|
||||
"dependent": [
|
||||
"docker/test/stateful",
|
||||
"docker/test/unit"
|
||||
"docker/test/stateful"
|
||||
]
|
||||
},
|
||||
"docker/test/stateful": {
|
||||
@ -122,15 +121,16 @@
|
||||
"docker/test/base": {
|
||||
"name": "clickhouse/test-base",
|
||||
"dependent": [
|
||||
"docker/test/clickbench",
|
||||
"docker/test/fuzzer",
|
||||
"docker/test/libfuzzer",
|
||||
"docker/test/integration/base",
|
||||
"docker/test/keeper-jepsen",
|
||||
"docker/test/libfuzzer",
|
||||
"docker/test/server-jepsen",
|
||||
"docker/test/sqllogic",
|
||||
"docker/test/sqltest",
|
||||
"docker/test/clickbench",
|
||||
"docker/test/stateless"
|
||||
"docker/test/stateless",
|
||||
"docker/test/unit"
|
||||
]
|
||||
},
|
||||
"docker/test/integration/kerberized_hadoop": {
|
||||
|
@ -33,6 +33,7 @@ RUN pip3 install \
|
||||
flake8==4.0.1 \
|
||||
requests \
|
||||
thefuzz \
|
||||
tqdm==4.66.4 \
|
||||
types-requests \
|
||||
unidiff \
|
||||
&& rm -rf /root/.cache/pip
|
||||
|
@ -1,9 +1,7 @@
|
||||
# rebuild in #33610
|
||||
# docker build -t clickhouse/unit-test .
|
||||
ARG FROM_TAG=latest
|
||||
FROM clickhouse/stateless-test:$FROM_TAG
|
||||
|
||||
RUN apt-get install gdb
|
||||
FROM clickhouse/test-base:$FROM_TAG
|
||||
|
||||
COPY run.sh /
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
|
45
docs/changelogs/v24.1.6.52-stable.md
Normal file
45
docs/changelogs/v24.1.6.52-stable.md
Normal file
@ -0,0 +1,45 @@
|
||||
---
|
||||
sidebar_position: 1
|
||||
sidebar_label: 2024
|
||||
---
|
||||
|
||||
# 2024 Changelog
|
||||
|
||||
### ClickHouse release v24.1.6.52-stable (fa09f677bc9) FIXME as compared to v24.1.5.6-stable (7f67181ff31)
|
||||
|
||||
#### Improvement
|
||||
* Backported in [#60292](https://github.com/ClickHouse/ClickHouse/issues/60292): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Backported in [#60832](https://github.com/ClickHouse/ClickHouse/issues/60832): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in an official stable release)
|
||||
|
||||
* Backported in [#60413](https://github.com/ClickHouse/ClickHouse/issues/60413): Fix segmentation fault in KQL parser when the input query exceeds the `max_query_size`. Also re-enable the KQL dialect. Fixes [#59036](https://github.com/ClickHouse/ClickHouse/issues/59036) and [#59037](https://github.com/ClickHouse/ClickHouse/issues/59037). [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)).
|
||||
* Backported in [#60074](https://github.com/ClickHouse/ClickHouse/issues/60074): Fix error `Read beyond last offset` for `AsynchronousBoundedReadBuffer`. [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Backported in [#60299](https://github.com/ClickHouse/ClickHouse/issues/60299): Fix having neigher acked nor nacked messages. If exception happens during read-write phase, messages will be nacked. [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#60066](https://github.com/ClickHouse/ClickHouse/issues/60066): Fix optimize_uniq_to_count removing the column alias. [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#60638](https://github.com/ClickHouse/ClickHouse/issues/60638): Fixed a bug in parallel optimization for queries with `FINAL`, which could give an incorrect result in rare cases. [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Backported in [#60177](https://github.com/ClickHouse/ClickHouse/issues/60177): Fix cosineDistance crash with Nullable. [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#60279](https://github.com/ClickHouse/ClickHouse/issues/60279): Hide sensitive info for `S3Queue` table engine. [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Backported in [#61000](https://github.com/ClickHouse/ClickHouse/issues/61000): Reduce the number of read rows from `system.numbers`. Fixes [#59418](https://github.com/ClickHouse/ClickHouse/issues/59418). [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Backported in [#60791](https://github.com/ClickHouse/ClickHouse/issues/60791): Fix buffer overflow that can happen if the attacker asks the HTTP server to decompress data with a composition of codecs and size triggering numeric overflow. Fix buffer overflow that can happen inside codec NONE on wrong input data. This was submitted by TIANGONG research team through our [Bug Bounty program](https://github.com/ClickHouse/ClickHouse/issues/38986). [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#60783](https://github.com/ClickHouse/ClickHouse/issues/60783): Functions for SQL/JSON were able to read uninitialized memory. This closes [#60017](https://github.com/ClickHouse/ClickHouse/issues/60017). Found by Fuzzer. [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#60803](https://github.com/ClickHouse/ClickHouse/issues/60803): Do not set aws custom metadata `x-amz-meta-*` headers on UploadPart & CompleteMultipartUpload calls. [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
|
||||
* Backported in [#60820](https://github.com/ClickHouse/ClickHouse/issues/60820): Fix crash in arrayEnumerateRanked. [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#60841](https://github.com/ClickHouse/ClickHouse/issues/60841): Fix crash when using input() in INSERT SELECT JOIN. Closes [#60035](https://github.com/ClickHouse/ClickHouse/issues/60035). [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Backported in [#60904](https://github.com/ClickHouse/ClickHouse/issues/60904): Avoid segfault if too many keys are skipped when reading from S3. [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### NO CL CATEGORY
|
||||
|
||||
* Backported in [#60186](https://github.com/ClickHouse/ClickHouse/issues/60186):. [#60181](https://github.com/ClickHouse/ClickHouse/pull/60181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Backported in [#60333](https://github.com/ClickHouse/ClickHouse/issues/60333): CI: Fix job failures due to jepsen artifacts. [#59890](https://github.com/ClickHouse/ClickHouse/pull/59890) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60034](https://github.com/ClickHouse/ClickHouse/issues/60034): Fix mark release ready. [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60326](https://github.com/ClickHouse/ClickHouse/issues/60326): Ability to detect undead ZooKeeper sessions. [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Backported in [#60363](https://github.com/ClickHouse/ClickHouse/issues/60363): CI: hot fix for gh statuses. [#60201](https://github.com/ClickHouse/ClickHouse/pull/60201) ([Max K.](https://github.com/maxknv)).
|
||||
* Backported in [#60648](https://github.com/ClickHouse/ClickHouse/issues/60648): Detect io_uring in tests. [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#60569](https://github.com/ClickHouse/ClickHouse/issues/60569): Remove broken test while we fix it. [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#60756](https://github.com/ClickHouse/ClickHouse/issues/60756): Update shellcheck. [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#60584](https://github.com/ClickHouse/ClickHouse/issues/60584): CI: fix docker build job name. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)).
|
||||
|
@ -480,7 +480,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
|
||||
- [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`.
|
||||
- [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
|
||||
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
|
||||
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
|
||||
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
|
||||
- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`.
|
||||
- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
|
||||
- [input_format_csv_try_infer_numbers_from_strings](/docs/en/operations/settings/settings-formats.md/#input_format_csv_try_infer_numbers_from_strings) - Try to infer numbers from string fields while schema inference. Default value - `false`.
|
||||
@ -2165,6 +2165,8 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
|
||||
- [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`.
|
||||
- [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`.
|
||||
- [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `lz4`.
|
||||
- [input_format_parquet_max_block_size](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_max_block_size) - Max block row size for parquet reader. Default value - `65409`.
|
||||
- [input_format_parquet_prefer_block_bytes](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_prefer_block_bytes) - Average block bytes output by parquet reader. Default value - `16744704`.
|
||||
|
||||
## ParquetMetadata {data-format-parquet-metadata}
|
||||
|
||||
|
@ -974,10 +974,12 @@ Default value: false
|
||||
|
||||
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting
|
||||
|
||||
### allow_experimental_optimized_row_order
|
||||
### optimize_row_order
|
||||
|
||||
Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part.
|
||||
|
||||
Only has an effect for ordinary MergeTree-engine tables. Does nothing for specialized MergeTree engine tables (e.g. CollapsingMergeTree).
|
||||
|
||||
MergeTree tables are (optionally) compressed using [compression codecs](../../sql-reference/statements/create/table.md#column_compression_codec).
|
||||
Generic compression codecs such as LZ4 and ZSTD achieve maximum compression rates if the data exposes patterns.
|
||||
Long runs of the same value typically compress very well.
|
||||
|
@ -1417,6 +1417,17 @@ Compression method used in output Parquet format. Supported codecs: `snappy`, `l
|
||||
|
||||
Default value: `lz4`.
|
||||
|
||||
### input_format_parquet_max_block_size {#input_format_parquet_max_block_size}
|
||||
Max block row size for parquet reader. By controlling the number of rows in each block, you can control the memory usage,
|
||||
and in some operators that cache blocks, you can improve the accuracy of the operator's memory control。
|
||||
|
||||
Default value: `65409`.
|
||||
|
||||
### input_format_parquet_prefer_block_bytes {#input_format_parquet_prefer_block_bytes}
|
||||
Average block bytes output by parquet reader. Lowering the configuration in the case of reading some high compression parquet relieves the memory pressure.
|
||||
|
||||
Default value: `65409 * 256 = 16744704`
|
||||
|
||||
## Hive format settings {#hive-format-settings}
|
||||
|
||||
### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter}
|
||||
|
@ -18,7 +18,7 @@ This tool works via HTTP, not via pipes, shared memory, or TCP because:
|
||||
However it can be used as standalone tool from command line with the following
|
||||
parameters in POST-request URL:
|
||||
- `connection_string` -- ODBC connection string.
|
||||
- `columns` -- columns in ClickHouse NamesAndTypesList format, name in backticks,
|
||||
- `sample_block` -- columns description in ClickHouse NamesAndTypesList format, name in backticks,
|
||||
type as string. Name and type are space separated, rows separated with
|
||||
newline.
|
||||
- `max_block_size` -- optional parameter, sets maximum size of single block.
|
||||
|
@ -106,8 +106,8 @@ To work with these states, use:
|
||||
- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engine.
|
||||
- [finalizeAggregation](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) function.
|
||||
- [runningAccumulate](../../sql-reference/functions/other-functions.md#runningaccumulate) function.
|
||||
- [-Merge](#aggregate_functions_combinators-merge) combinator.
|
||||
- [-MergeState](#aggregate_functions_combinators-mergestate) combinator.
|
||||
- [-Merge](#-merge) combinator.
|
||||
- [-MergeState](#-mergestate) combinator.
|
||||
|
||||
## -Merge
|
||||
|
||||
|
@ -82,10 +82,12 @@ FROM
|
||||
|
||||
In this case, you should remember that you do not know the histogram bin borders.
|
||||
|
||||
## sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
|
||||
## sequenceMatch
|
||||
|
||||
Checks whether the sequence contains an event chain that matches the pattern.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
|
||||
```
|
||||
@ -102,7 +104,7 @@ Events that occur at the same second may lay in the sequence in an undefined ord
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#sequencematch).
|
||||
|
||||
**Returned values**
|
||||
|
||||
@ -170,9 +172,9 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM
|
||||
|
||||
**See Also**
|
||||
|
||||
- [sequenceCount](#function-sequencecount)
|
||||
- [sequenceCount](#sequencecount)
|
||||
|
||||
## sequenceCount(pattern)(time, cond1, cond2, ...)
|
||||
## sequenceCount
|
||||
|
||||
Counts the number of event chains that matched the pattern. The function searches event chains that do not overlap. It starts to search for the next chain after the current chain is matched.
|
||||
|
||||
@ -180,6 +182,8 @@ Counts the number of event chains that matched the pattern. The function searche
|
||||
Events that occur at the same second may lay in the sequence in an undefined order affecting the result.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
sequenceCount(pattern)(timestamp, cond1, cond2, ...)
|
||||
```
|
||||
@ -192,7 +196,7 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...)
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#sequencematch).
|
||||
|
||||
**Returned values**
|
||||
|
||||
@ -229,7 +233,7 @@ SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t
|
||||
|
||||
**See Also**
|
||||
|
||||
- [sequenceMatch](#function-sequencematch)
|
||||
- [sequenceMatch](#sequencematch)
|
||||
|
||||
## windowFunnel
|
||||
|
||||
|
@ -0,0 +1,95 @@
|
||||
---
|
||||
slug: /en/sql-reference/aggregate-functions/reference/flamegraph
|
||||
sidebar_position: 110
|
||||
---
|
||||
|
||||
# flameGraph
|
||||
|
||||
Aggregate function which builds a [flamegraph](https://www.brendangregg.com/flamegraphs.html) using the list of stacktraces. Outputs an array of strings which can be used by [flamegraph.pl utility](https://github.com/brendangregg/FlameGraph) to render an SVG of the flamegraph.
|
||||
|
||||
## Syntax
|
||||
|
||||
```sql
|
||||
flameGraph(traces, [size], [ptr])
|
||||
```
|
||||
|
||||
## Parameters
|
||||
|
||||
- `traces` — a stacktrace. [Array](../../data-types/array.md)([UInt64](../../data-types/int-uint.md)).
|
||||
- `size` — an allocation size for memory profiling. (optional - default `1`). [UInt64](../../data-types/int-uint.md).
|
||||
- `ptr` — an allocation address. (optional - default `0`). [UInt64](../../data-types/int-uint.md).
|
||||
|
||||
:::note
|
||||
In the case where `ptr != 0`, a flameGraph will map allocations (size > 0) and deallocations (size < 0) with the same size and ptr.
|
||||
Only allocations which were not freed are shown. Non mapped deallocations are ignored.
|
||||
:::
|
||||
|
||||
## Returned value
|
||||
|
||||
- An array of strings for use with [flamegraph.pl utility](https://github.com/brendangregg/FlameGraph). [Array](../../data-types/array.md)([String](../../data-types/string.md)).
|
||||
|
||||
## Examples
|
||||
|
||||
### Building a flamegraph based on a CPU query profiler
|
||||
|
||||
```sql
|
||||
SET query_profiler_cpu_time_period_ns=10000000;
|
||||
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
```
|
||||
|
||||
```text
|
||||
clickhouse client --allow_introspection_functions=1 -q "select arrayJoin(flameGraph(arrayReverse(trace))) from system.trace_log where trace_type = 'CPU' and query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl > flame_cpu.svg
|
||||
```
|
||||
|
||||
### Building a flamegraph based on a memory query profiler, showing all allocations
|
||||
|
||||
```sql
|
||||
SET memory_profiler_sample_probability=1, max_untracked_memory=1;
|
||||
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
```
|
||||
|
||||
```text
|
||||
clickhouse client --allow_introspection_functions=1 -q "select arrayJoin(flameGraph(trace, size)) from system.trace_log where trace_type = 'MemorySample' and query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem.svg
|
||||
```
|
||||
|
||||
### Building a flamegraph based on a memory query profiler, showing allocations which were not deallocated in query context
|
||||
|
||||
```sql
|
||||
SET memory_profiler_sample_probability=1, max_untracked_memory=1, use_uncompressed_cache=1, merge_tree_max_rows_to_use_cache=100000000000, merge_tree_max_bytes_to_use_cache=1000000000000;
|
||||
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
```
|
||||
|
||||
```text
|
||||
clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, size, ptr)) FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx'" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_untracked.svg
|
||||
```
|
||||
|
||||
### Build a flamegraph based on memory query profiler, showing active allocations at the fixed point of time
|
||||
|
||||
```sql
|
||||
SET memory_profiler_sample_probability=1, max_untracked_memory=1;
|
||||
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
|
||||
```
|
||||
|
||||
- 1 - Memory usage per second
|
||||
|
||||
```sql
|
||||
SELECT event_time, m, formatReadableSize(max(s) as m) FROM (SELECT event_time, sum(size) OVER (ORDER BY event_time) AS s FROM system.trace_log WHERE query_id = 'xxx' AND trace_type = 'MemorySample') GROUP BY event_time ORDER BY event_time;
|
||||
```
|
||||
|
||||
- 2 - Find a time point with maximal memory usage
|
||||
|
||||
```sql
|
||||
SELECT argMax(event_time, s), max(s) FROM (SELECT event_time, sum(size) OVER (ORDER BY event_time) AS s FROM system.trace_log WHERE query_id = 'xxx' AND trace_type = 'MemorySample');
|
||||
```
|
||||
|
||||
- 3 - Fix active allocations at fixed point of time
|
||||
|
||||
```text
|
||||
clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, size, ptr)) FROM (SELECT * FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx' AND event_time <= 'yyy' ORDER BY event_time)" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_pos.svg
|
||||
```
|
||||
|
||||
- 4 - Find deallocations at fixed point of time
|
||||
|
||||
```text
|
||||
clickhouse client --allow_introspection_functions=1 -q "SELECT arrayJoin(flameGraph(trace, -size, ptr)) FROM (SELECT * FROM system.trace_log WHERE trace_type = 'MemorySample' AND query_id = 'xxx' AND event_time > 'yyy' ORDER BY event_time desc)" | ~/dev/FlameGraph/flamegraph.pl --countname=bytes --color=mem > flame_mem_time_point_neg.svg
|
||||
```
|
@ -58,6 +58,7 @@ ClickHouse-specific aggregate functions:
|
||||
- [topKWeighted](../reference/topkweighted.md)
|
||||
- [deltaSum](../reference/deltasum.md)
|
||||
- [deltaSumTimestamp](../reference/deltasumtimestamp.md)
|
||||
- [flameGraph](../reference/flame_graph.md)
|
||||
- [groupArray](../reference/grouparray.md)
|
||||
- [groupArrayLast](../reference/grouparraylast.md)
|
||||
- [groupUniqArray](../reference/groupuniqarray.md)
|
||||
|
@ -3,7 +3,7 @@ slug: /en/sql-reference/aggregate-functions/reference/stochasticlinearregression
|
||||
sidebar_position: 221
|
||||
---
|
||||
|
||||
# stochasticLinearRegression
|
||||
# stochasticLinearRegression {#agg_functions_stochasticlinearregression_parameters}
|
||||
|
||||
This function implements stochastic linear regression. It supports custom parameters for learning rate, L2 regularization coefficient, mini-batch size, and has a few methods for updating weights ([Adam](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Adam) (used by default), [simple SGD](https://en.wikipedia.org/wiki/Stochastic_gradient_descent), [Momentum](https://en.wikipedia.org/wiki/Stochastic_gradient_descent#Momentum), and [Nesterov](https://mipt.ru/upload/medialibrary/d7e/41-91.pdf)).
|
||||
|
||||
@ -72,5 +72,5 @@ The query will return a column of predicted values. Note that first argument of
|
||||
|
||||
**See Also**
|
||||
|
||||
- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression)
|
||||
- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#stochasticlogisticregression)
|
||||
- [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression)
|
||||
|
@ -11,7 +11,7 @@ This function implements stochastic logistic regression. It can be used for bina
|
||||
|
||||
Parameters are exactly the same as in stochasticLinearRegression:
|
||||
`learning rate`, `l2 regularization coefficient`, `mini-batch size`, `method for updating weights`.
|
||||
For more information see [parameters](#agg_functions-stochasticlinearregression-parameters).
|
||||
For more information see [parameters](../reference/stochasticlinearregression.md/#parameters).
|
||||
|
||||
``` text
|
||||
stochasticLogisticRegression(1.0, 1.0, 10, 'SGD')
|
||||
|
@ -27,7 +27,7 @@ Returns an integer of type `Float64`.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable).
|
||||
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable`](#varpopstable) function.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -76,7 +76,7 @@ Returns an integer of type `Float64`.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
Unlike [`varPop()`](#varPop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations.
|
||||
Unlike [`varPop`](#varpop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -40,7 +40,7 @@ Where:
|
||||
|
||||
The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead.
|
||||
|
||||
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable` function](#varSampStable).
|
||||
This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable`](#varsampstable) function.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -82,11 +82,11 @@ varSampStable(expr)
|
||||
|
||||
**Returned value**
|
||||
|
||||
The `varSampStable()` function returns a Float64 value representing the sample variance of the input data set.
|
||||
The `varSampStable` function returns a Float64 value representing the sample variance of the input data set.
|
||||
|
||||
**Implementation details**
|
||||
|
||||
The `varSampStable()` function calculates the sample variance using the same formula as the [`varSamp()`](#varSamp function):
|
||||
The `varSampStable` function calculates the sample variance using the same formula as the [`varSamp`](#varsamp) function:
|
||||
|
||||
```plaintext
|
||||
∑(x - mean(x))^2 / (n - 1)
|
||||
@ -97,9 +97,9 @@ Where:
|
||||
- `mean(x)` is the arithmetic mean of the data set.
|
||||
- `n` is the number of data points in the data set.
|
||||
|
||||
The difference between `varSampStable()` and `varSamp()` is that `varSampStable()` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values.
|
||||
The difference between `varSampStable` and `varSamp` is that `varSampStable` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values.
|
||||
|
||||
Like `varSamp()`, the `varSampStable()` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable()` function](./varpop#varpopstable) instead.
|
||||
Like `varSamp`, the `varSampStable` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable`](./varpop#varpopstable) function instead.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -125,4 +125,4 @@ Response:
|
||||
0.865
|
||||
```
|
||||
|
||||
This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp()` due to the more precise handling of floating-point arithmetic.
|
||||
This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp` due to the more precise handling of floating-point arithmetic.
|
||||
|
@ -33,7 +33,7 @@ Result:
|
||||
|
||||
## Ring
|
||||
|
||||
`Ring` is a simple polygon without holes stored as an array of points: [Array](array.md)([Point](#point-data-type)).
|
||||
`Ring` is a simple polygon without holes stored as an array of points: [Array](array.md)([Point](#point)).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -54,7 +54,7 @@ Result:
|
||||
|
||||
## Polygon
|
||||
|
||||
`Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring-data-type)). First element of outer array is the outer shape of polygon and all the following elements are holes.
|
||||
`Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring)). First element of outer array is the outer shape of polygon and all the following elements are holes.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -76,7 +76,7 @@ Result:
|
||||
|
||||
## MultiPolygon
|
||||
|
||||
`MultiPolygon` consists of multiple polygons and is stored as an array of polygons: [Array](array.md)([Polygon](#polygon-data-type)).
|
||||
`MultiPolygon` consists of multiple polygons and is stored as an array of polygons: [Array](array.md)([Polygon](#polygon)).
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -16,7 +16,7 @@ ClickHouse supports special functions for working with dictionaries that can be
|
||||
ClickHouse supports:
|
||||
|
||||
- Dictionaries with a [set of functions](../../sql-reference/functions/ext-dict-functions.md).
|
||||
- [Embedded dictionaries](#embedded_dictionaries) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md).
|
||||
- [Embedded dictionaries](#embedded-dictionaries) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md).
|
||||
|
||||
|
||||
:::tip Tutorial
|
||||
@ -82,7 +82,7 @@ You can [configure](#configuring-a-dictionary) any number of dictionaries in the
|
||||
You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../sql-reference/functions/other-functions.md) function). This functionality is not related to dictionaries.
|
||||
:::
|
||||
|
||||
## Configuring a Dictionary {#configuring-a-dictionary}
|
||||
## Configuring a Dictionary
|
||||
|
||||
<CloudDetails />
|
||||
|
||||
@ -123,7 +123,7 @@ LAYOUT(...) -- Memory layout configuration
|
||||
LIFETIME(...) -- Lifetime of dictionary in memory
|
||||
```
|
||||
|
||||
## Storing Dictionaries in Memory {#storing-dictionaries-in-memory}
|
||||
## Storing Dictionaries in Memory
|
||||
|
||||
There are a variety of ways to store dictionaries in memory.
|
||||
|
||||
@ -415,7 +415,7 @@ or
|
||||
LAYOUT(COMPLEX_KEY_HASHED_ARRAY([SHARDS 1]))
|
||||
```
|
||||
|
||||
### range_hashed {#range_hashed}
|
||||
### range_hashed
|
||||
|
||||
The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values.
|
||||
|
||||
@ -679,7 +679,7 @@ When searching for a dictionary, the cache is searched first. For each block of
|
||||
|
||||
If keys are not found in dictionary, then update cache task is created and added into update queue. Update queue properties can be controlled with settings `max_update_queue_size`, `update_queue_push_timeout_milliseconds`, `query_wait_timeout_milliseconds`, `max_threads_for_updates`.
|
||||
|
||||
For cache dictionaries, the expiration [lifetime](#dictionary-updates) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`.
|
||||
For cache dictionaries, the expiration [lifetime](#refreshing-dictionary-data-using-lifetime) of data in the cache can be set. If more time than `lifetime` has passed since loading the data in a cell, the cell’s value is not used and key becomes expired. The key is re-requested the next time it needs to be used. This behaviour can be configured with setting `allow_read_expired_keys`.
|
||||
|
||||
This is the least effective of all the ways to store dictionaries. The speed of the cache depends strongly on correct settings and the usage scenario. A cache type dictionary performs well only when the hit rates are high enough (recommended 99% and higher). You can view the average hit rate in the [system.dictionaries](../../operations/system-tables/dictionaries.md) table.
|
||||
|
||||
@ -899,7 +899,7 @@ Other types are not supported yet. The function returns the attribute for the pr
|
||||
|
||||
Data must completely fit into RAM.
|
||||
|
||||
## Refreshing dictionary data using LIFETIME {#lifetime}
|
||||
## Refreshing dictionary data using LIFETIME
|
||||
|
||||
ClickHouse periodically updates dictionaries based on the `LIFETIME` tag (defined in seconds). `LIFETIME` is the update interval for fully downloaded dictionaries and the invalidation interval for cached dictionaries.
|
||||
|
||||
@ -1031,7 +1031,7 @@ SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15))
|
||||
...
|
||||
```
|
||||
|
||||
## Dictionary Sources {#dictionary-sources}
|
||||
## Dictionary Sources
|
||||
|
||||
<CloudDetails />
|
||||
|
||||
@ -1065,7 +1065,7 @@ SOURCE(SOURCE_TYPE(param1 val1 ... paramN valN)) -- Source configuration
|
||||
|
||||
The source is configured in the `source` section.
|
||||
|
||||
For source types [Local file](#local_file), [Executable file](#executable), [HTTP(s)](#https), [ClickHouse](#clickhouse)
|
||||
For source types [Local file](#local-file), [Executable file](#executable-file), [HTTP(s)](#https), [ClickHouse](#clickhouse)
|
||||
optional settings are available:
|
||||
|
||||
``` xml
|
||||
@ -1089,10 +1089,10 @@ SETTINGS(format_csv_allow_single_quotes = 0)
|
||||
|
||||
Types of sources (`source_type`):
|
||||
|
||||
- [Local file](#local_file)
|
||||
- [Executable File](#executable)
|
||||
- [Executable Pool](#executable_pool)
|
||||
- [HTTP(S)](#http)
|
||||
- [Local file](#local-file)
|
||||
- [Executable File](#executable-file)
|
||||
- [Executable Pool](#executable-pool)
|
||||
- [HTTP(S)](#https)
|
||||
- DBMS
|
||||
- [ODBC](#odbc)
|
||||
- [MySQL](#mysql)
|
||||
@ -1102,7 +1102,7 @@ Types of sources (`source_type`):
|
||||
- [Cassandra](#cassandra)
|
||||
- [PostgreSQL](#postgresql)
|
||||
|
||||
### Local File {#local_file}
|
||||
### Local File
|
||||
|
||||
Example of settings:
|
||||
|
||||
@ -1132,9 +1132,9 @@ When a dictionary with source `FILE` is created via DDL command (`CREATE DICTION
|
||||
|
||||
- [Dictionary function](../../sql-reference/table-functions/dictionary.md#dictionary-function)
|
||||
|
||||
### Executable File {#executable}
|
||||
### Executable File
|
||||
|
||||
Working with executable files depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data.
|
||||
Working with executable files depends on [how the dictionary is stored in memory](#storing-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data.
|
||||
|
||||
Example of settings:
|
||||
|
||||
@ -1161,7 +1161,7 @@ Setting fields:
|
||||
|
||||
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node.
|
||||
|
||||
### Executable Pool {#executable_pool}
|
||||
### Executable Pool
|
||||
|
||||
Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts.
|
||||
|
||||
@ -1196,9 +1196,9 @@ Setting fields:
|
||||
|
||||
That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.
|
||||
|
||||
### HTTP(S) {#https}
|
||||
### HTTP(S)
|
||||
|
||||
Working with an HTTP(S) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method.
|
||||
Working with an HTTP(S) server depends on [how the dictionary is stored in memory](#storing-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method.
|
||||
|
||||
Example of settings:
|
||||
|
||||
@ -1285,7 +1285,7 @@ Setting fields:
|
||||
- `db` – Name of the database. Omit it if the database name is set in the `<connection_string>` parameters.
|
||||
- `table` – Name of the table and schema if exists.
|
||||
- `connection_string` – Connection string.
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates).
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime).
|
||||
- `query` – The custom query. Optional parameter.
|
||||
|
||||
:::note
|
||||
@ -1575,7 +1575,7 @@ Setting fields:
|
||||
|
||||
- `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in MySQL, for example, `id > 10 AND id < 20`. Optional parameter.
|
||||
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates).
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime).
|
||||
|
||||
- `fail_on_connection_loss` – The configuration parameter that controls behavior of the server on connection loss. If `true`, an exception is thrown immediately if the connection between client and server was lost. If `false`, the ClickHouse server retries to execute the query three times before throwing an exception. Note that retrying leads to increased response times. Default value: `false`.
|
||||
|
||||
@ -1672,7 +1672,7 @@ Setting fields:
|
||||
- `db` – Name of the database.
|
||||
- `table` – Name of the table.
|
||||
- `where` – The selection criteria. May be omitted.
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates).
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime).
|
||||
- `secure` - Use ssl for connection.
|
||||
- `query` – The custom query. Optional parameter.
|
||||
|
||||
@ -1849,7 +1849,7 @@ Setting fields:
|
||||
- `db` – Name of the database.
|
||||
- `table` – Name of the table.
|
||||
- `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in PostgreSQL. For example, `id > 10 AND id < 20`. Optional parameter.
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates).
|
||||
- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Refreshing dictionary data using LIFETIME](#refreshing-dictionary-data-using-lifetime).
|
||||
- `query` – The custom query. Optional parameter.
|
||||
|
||||
:::note
|
||||
@ -1873,7 +1873,7 @@ LAYOUT(FLAT())
|
||||
LIFETIME(0);
|
||||
```
|
||||
|
||||
## Dictionary Key and Fields {#dictionary-key-and-fields}
|
||||
## Dictionary Key and Fields
|
||||
|
||||
<CloudDetails />
|
||||
|
||||
@ -1963,7 +1963,7 @@ PRIMARY KEY Id
|
||||
|
||||
### Composite Key
|
||||
|
||||
The key can be a `tuple` from any types of fields. The [layout](#storig-dictionaries-in-memory) in this case must be `complex_key_hashed` or `complex_key_cache`.
|
||||
The key can be a `tuple` from any types of fields. The [layout](#storing-dictionaries-in-memory) in this case must be `complex_key_hashed` or `complex_key_cache`.
|
||||
|
||||
:::tip
|
||||
A composite key can consist of a single element. This makes it possible to use a string as the key, for instance.
|
||||
@ -2030,17 +2030,17 @@ CREATE DICTIONARY somename (
|
||||
|
||||
Configuration fields:
|
||||
|
||||
| Tag | Description | Required |
|
||||
|------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------|
|
||||
| `name` | Column name. | Yes |
|
||||
| `type` | ClickHouse data type: [UInt8](../../sql-reference/data-types/int-uint.md), [UInt16](../../sql-reference/data-types/int-uint.md), [UInt32](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md), [Int8](../../sql-reference/data-types/int-uint.md), [Int16](../../sql-reference/data-types/int-uint.md), [Int32](../../sql-reference/data-types/int-uint.md), [Int64](../../sql-reference/data-types/int-uint.md), [Float32](../../sql-reference/data-types/float.md), [Float64](../../sql-reference/data-types/float.md), [UUID](../../sql-reference/data-types/uuid.md), [Decimal32](../../sql-reference/data-types/decimal.md), [Decimal64](../../sql-reference/data-types/decimal.md), [Decimal128](../../sql-reference/data-types/decimal.md), [Decimal256](../../sql-reference/data-types/decimal.md),[Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), [String](../../sql-reference/data-types/string.md), [Array](../../sql-reference/data-types/array.md).<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../sql-reference/data-types/nullable.md) is currently supported for [Flat](#flat), [Hashed](#hashed), [ComplexKeyHashed](#complex_key_hashed), [Direct](#direct), [ComplexKeyDirect](#complex_key_direct), [RangeHashed](#range_hashed), Polygon, [Cache](#cache), [ComplexKeyCache](#complex_key_cache), [SSDCache](#ssd_cache), [SSDComplexKeyCache](#complex_key_ssd_cache) dictionaries. In [IPTrie](#ip_trie) dictionaries `Nullable` types are not supported. | Yes |
|
||||
| `null_value` | Default value for a non-existing element.<br/>In the example, it is an empty string. [NULL](../syntax.md#null) value can be used only for the `Nullable` types (see the previous line with types description). | Yes |
|
||||
| `expression` | [Expression](../../sql-reference/syntax.md#expressions) that ClickHouse executes on the value.<br/>The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.<br/><br/>Default value: no expression. | No |
|
||||
| <a name="hierarchical-dict-attr"></a> `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](#hierarchical-dictionaries).<br/><br/>Default value: `false`. | No |
|
||||
| `injective` | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).<br/>If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.<br/><br/>Default value: `false`. | No |
|
||||
| `is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.<br/><br/>Default value: `false`.
|
||||
| Tag | Description | Required |
|
||||
|------------------------------------------------------||----------|
|
||||
| `name` | Column name. | Yes |
|
||||
| `type` | ClickHouse data type: [UInt8](../../sql-reference/data-types/int-uint.md), [UInt16](../../sql-reference/data-types/int-uint.md), [UInt32](../../sql-reference/data-types/int-uint.md), [UInt64](../../sql-reference/data-types/int-uint.md), [Int8](../../sql-reference/data-types/int-uint.md), [Int16](../../sql-reference/data-types/int-uint.md), [Int32](../../sql-reference/data-types/int-uint.md), [Int64](../../sql-reference/data-types/int-uint.md), [Float32](../../sql-reference/data-types/float.md), [Float64](../../sql-reference/data-types/float.md), [UUID](../../sql-reference/data-types/uuid.md), [Decimal32](../../sql-reference/data-types/decimal.md), [Decimal64](../../sql-reference/data-types/decimal.md), [Decimal128](../../sql-reference/data-types/decimal.md), [Decimal256](../../sql-reference/data-types/decimal.md),[Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md), [String](../../sql-reference/data-types/string.md), [Array](../../sql-reference/data-types/array.md).<br/>ClickHouse tries to cast value from dictionary to the specified data type. For example, for MySQL, the field might be `TEXT`, `VARCHAR`, or `BLOB` in the MySQL source table, but it can be uploaded as `String` in ClickHouse.<br/>[Nullable](../../sql-reference/data-types/nullable.md) is currently supported for [Flat](#flat), [Hashed](#hashed), [ComplexKeyHashed](#complex_key_hashed), [Direct](#direct), [ComplexKeyDirect](#complex_key_direct), [RangeHashed](#range_hashed), Polygon, [Cache](#cache), [ComplexKeyCache](#complex_key_cache), [SSDCache](#ssd_cache), [SSDComplexKeyCache](#complex_key_ssd_cache) dictionaries. In [IPTrie](#ip_trie) dictionaries `Nullable` types are not supported. | Yes |
|
||||
| `null_value` | Default value for a non-existing element.<br/>In the example, it is an empty string. [NULL](../syntax.md#null) value can be used only for the `Nullable` types (see the previous line with types description). | Yes |
|
||||
| `expression` | [Expression](../../sql-reference/syntax.md#expressions) that ClickHouse executes on the value.<br/>The expression can be a column name in the remote SQL database. Thus, you can use it to create an alias for the remote column.<br/><br/>Default value: no expression. | No |
|
||||
| <a name="hierarchical-dict-attr"></a> `hierarchical` | If `true`, the attribute contains the value of a parent key for the current key. See [Hierarchical Dictionaries](#hierarchical-dictionaries).<br/><br/>Default value: `false`. | No |
|
||||
| `injective` | Flag that shows whether the `id -> attribute` image is [injective](https://en.wikipedia.org/wiki/Injective_function).<br/>If `true`, ClickHouse can automatically place after the `GROUP BY` clause the requests to dictionaries with injection. Usually it significantly reduces the amount of such requests.<br/><br/>Default value: `false`. | No |
|
||||
| `is_object_id` | Flag that shows whether the query is executed for a MongoDB document by `ObjectID`.<br/><br/>Default value: `false`.
|
||||
|
||||
## Hierarchical Dictionaries {#hierarchical-dictionaries}
|
||||
## Hierarchical Dictionaries
|
||||
|
||||
ClickHouse supports hierarchical dictionaries with a [numeric key](#numeric-key).
|
||||
|
||||
@ -2165,7 +2165,7 @@ Points can be specified as an array or a tuple of their coordinates. In the curr
|
||||
|
||||
The user can upload their own data in all formats supported by ClickHouse.
|
||||
|
||||
There are 3 types of [in-memory storage](#storig-dictionaries-in-memory) available:
|
||||
There are 3 types of [in-memory storage](#storing-dictionaries-in-memory) available:
|
||||
|
||||
- `POLYGON_SIMPLE`. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes.
|
||||
|
||||
@ -2435,7 +2435,7 @@ LIFETIME(0)
|
||||
LAYOUT(regexp_tree);
|
||||
```
|
||||
|
||||
## Embedded Dictionaries {#embedded-dictionaries}
|
||||
## Embedded Dictionaries
|
||||
|
||||
<SelfManaged />
|
||||
|
||||
|
@ -1261,7 +1261,7 @@ SELECT arraySort((x) -> -x, [1, 2, 3]) as res;
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
For each element of the source array, the lambda function returns the sorting key, that is, \[1 –\> -1, 2 –\> -2, 3 –\> -3\]. Since the `arraySort` function sorts the keys in ascending order, the result is \[3, 2, 1\]. Thus, the `(x) –> -x` lambda function sets the [descending order](#reverse-sort) in a sorting.
|
||||
For each element of the source array, the lambda function returns the sorting key, that is, \[1 –\> -1, 2 –\> -2, 3 –\> -3\]. Since the `arraySort` function sorts the keys in ascending order, the result is \[3, 2, 1\]. Thus, the `(x) –> -x` lambda function sets the [descending order](#arrayreversesort) in a sorting.
|
||||
|
||||
The lambda function can accept multiple arguments. In this case, you need to pass the `arraySort` function several arrays of identical length that the arguments of lambda function will correspond to. The resulting array will consist of elements from the first input array; elements from the next input array(s) specify the sorting keys. For example:
|
||||
|
||||
@ -1307,10 +1307,15 @@ To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.
|
||||
|
||||
Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order.
|
||||
|
||||
## arrayReverseSort(\[func,\] arr, ...) {#reverse-sort}
|
||||
## arrayReverseSort
|
||||
|
||||
Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
arrayReverseSort([func,] arr, ...)
|
||||
```
|
||||
Example of integer values sorting:
|
||||
|
||||
``` sql
|
||||
@ -1907,10 +1912,16 @@ FROM numbers(1,10);
|
||||
|
||||
- [arrayReduce](#arrayreduce)
|
||||
|
||||
## arrayReverse(arr)
|
||||
## arrayReverse
|
||||
|
||||
Returns an array of the same size as the original array containing the elements in reverse order.
|
||||
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
arrayReverse(arr)
|
||||
```
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
|
@ -74,7 +74,7 @@ bitmapSubsetInRange(bitmap, range_start, range_end)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
|
||||
- `bitmap` – [Bitmap object](#bitmapbuild).
|
||||
- `range_start` – Start of the range (inclusive). [UInt32](../data-types/int-uint.md).
|
||||
- `range_end` – End of the range (exclusive). [UInt32](../data-types/int-uint.md).
|
||||
|
||||
@ -104,7 +104,7 @@ bitmapSubsetLimit(bitmap, range_start, cardinality_limit)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
|
||||
- `bitmap` – [Bitmap object](#bitmapbuild).
|
||||
- `range_start` – Start of the range (inclusive). [UInt32](../data-types/int-uint.md).
|
||||
- `cardinality_limit` – Maximum cardinality of the subset. [UInt32](../data-types/int-uint.md).
|
||||
|
||||
@ -134,7 +134,7 @@ subBitmap(bitmap, offset, cardinality_limit)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `bitmap` – The bitmap. [Bitmap object](#bitmap_functions-bitmapbuild).
|
||||
- `bitmap` – The bitmap. [Bitmap object](#bitmapbuild).
|
||||
- `offset` – The position of the first element of the subset. [UInt32](../data-types/int-uint.md).
|
||||
- `cardinality_limit` – The maximum number of elements in the subset. [UInt32](../data-types/int-uint.md).
|
||||
|
||||
@ -162,7 +162,7 @@ bitmapContains(bitmap, needle)
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild).
|
||||
- `bitmap` – [Bitmap object](#bitmapbuild).
|
||||
- `needle` – Searched bit value. [UInt32](../data-types/int-uint.md).
|
||||
|
||||
**Returned values**
|
||||
@ -188,7 +188,7 @@ Result:
|
||||
|
||||
Checks whether two bitmaps intersect.
|
||||
|
||||
If `bitmap2` contains exactly one element, consider using [bitmapContains](#bitmap_functions-bitmapcontains) instead as it works more efficiently.
|
||||
If `bitmap2` contains exactly one element, consider using [bitmapContains](#bitmapcontains) instead as it works more efficiently.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -83,7 +83,7 @@ Result:
|
||||
```
|
||||
## makeDate32
|
||||
|
||||
Like [makeDate](#makeDate) but produces a [Date32](../data-types/date32.md).
|
||||
Like [makeDate](#makedate) but produces a [Date32](../data-types/date32.md).
|
||||
|
||||
## makeDateTime
|
||||
|
||||
@ -214,7 +214,7 @@ Result:
|
||||
|
||||
**See also**
|
||||
|
||||
- [serverTimeZone](#serverTimeZone)
|
||||
- [serverTimeZone](#servertimezone)
|
||||
|
||||
## serverTimeZone
|
||||
|
||||
@ -249,7 +249,7 @@ Result:
|
||||
|
||||
**See also**
|
||||
|
||||
- [timeZone](#timeZone)
|
||||
- [timeZone](#timezone)
|
||||
|
||||
## toTimeZone
|
||||
|
||||
@ -305,7 +305,7 @@ int32samoa: 1546300800
|
||||
|
||||
**See Also**
|
||||
|
||||
- [formatDateTime](#formatDateTime) - supports non-constant timezone.
|
||||
- [formatDateTime](#formatdatetime) - supports non-constant timezone.
|
||||
- [toString](type-conversion-functions.md#tostring) - supports non-constant timezone.
|
||||
|
||||
## timeZoneOf
|
||||
@ -1006,7 +1006,7 @@ toStartOfWeek(t[, mode[, timezone]])
|
||||
**Arguments**
|
||||
|
||||
- `t` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
|
||||
- `mode` - determines the first day of the week as described in the [toWeek()](date-time-functions#toweek) function
|
||||
- `mode` - determines the first day of the week as described in the [toWeek()](#toweek) function
|
||||
- `timezone` - Optional parameter, it behaves like any other conversion function
|
||||
|
||||
**Returned value**
|
||||
@ -1049,7 +1049,7 @@ toLastDayOfWeek(t[, mode[, timezone]])
|
||||
**Arguments**
|
||||
|
||||
- `t` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
|
||||
- `mode` - determines the last day of the week as described in the [toWeek()](date-time-functions#toweek) function
|
||||
- `mode` - determines the last day of the week as described in the [toWeek](#toweek) function
|
||||
- `timezone` - Optional parameter, it behaves like any other conversion function
|
||||
|
||||
**Returned value**
|
||||
@ -1719,7 +1719,7 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [fromDaysSinceYearZero](#fromDaysSinceYearZero)
|
||||
- [fromDaysSinceYearZero](#fromdayssinceyearzero)
|
||||
|
||||
## fromDaysSinceYearZero
|
||||
|
||||
@ -1759,11 +1759,11 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [toDaysSinceYearZero](#toDaysSinceYearZero)
|
||||
- [toDaysSinceYearZero](#todayssinceyearzero)
|
||||
|
||||
## fromDaysSinceYearZero32
|
||||
|
||||
Like [fromDaysSinceYearZero](#fromDaysSinceYearZero) but returns a [Date32](../data-types/date32.md).
|
||||
Like [fromDaysSinceYearZero](#fromdayssinceyearzero) but returns a [Date32](../data-types/date32.md).
|
||||
|
||||
## age
|
||||
|
||||
@ -1982,7 +1982,7 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [toStartOfInterval](#tostartofintervaldate_or_date_with_time-interval-x-unit--time_zone)
|
||||
- [toStartOfInterval](#tostartofinterval)
|
||||
|
||||
## date\_add
|
||||
|
||||
@ -2055,7 +2055,7 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [addDate](#addDate)
|
||||
- [addDate](#adddate)
|
||||
|
||||
## date\_sub
|
||||
|
||||
@ -2129,7 +2129,7 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [subDate](#subDate)
|
||||
- [subDate](#subdate)
|
||||
|
||||
## timestamp\_add
|
||||
|
||||
@ -2310,7 +2310,7 @@ Alias: `SUBDATE`
|
||||
|
||||
- [date_sub](#date_sub)
|
||||
|
||||
## now {#now}
|
||||
## now
|
||||
|
||||
Returns the current date and time at the moment of query analysis. The function is a constant expression.
|
||||
|
||||
@ -3609,7 +3609,7 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## formatDateTime {#formatDateTime}
|
||||
## formatDateTime
|
||||
|
||||
Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column.
|
||||
|
||||
@ -3734,10 +3734,9 @@ LIMIT 10
|
||||
|
||||
**See Also**
|
||||
|
||||
- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax)
|
||||
- [formatDateTimeInJodaSyntax](#formatdatetimeinjodasyntax)
|
||||
|
||||
|
||||
## formatDateTimeInJodaSyntax {#formatDateTimeInJodaSyntax}
|
||||
## formatDateTimeInJodaSyntax
|
||||
|
||||
Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
|
||||
|
||||
@ -3902,11 +3901,11 @@ Result:
|
||||
|
||||
**See Also**
|
||||
|
||||
- [fromUnixTimestampInJodaSyntax](##fromUnixTimestampInJodaSyntax)
|
||||
- [fromUnixTimestampInJodaSyntax](#fromunixtimestampinjodasyntax)
|
||||
|
||||
## fromUnixTimestampInJodaSyntax
|
||||
|
||||
Same as [fromUnixTimestamp](#fromUnixTimestamp) but when called in the second way (two or three arguments), the formatting is performed using [Joda style](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL style.
|
||||
Same as [fromUnixTimestamp](#fromunixtimestamp) but when called in the second way (two or three arguments), the formatting is performed using [Joda style](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL style.
|
||||
|
||||
**Example:**
|
||||
|
||||
@ -4121,7 +4120,7 @@ Result:
|
||||
Returns the current date and time at the moment of query analysis. The function is a constant expression.
|
||||
|
||||
:::note
|
||||
This function gives the same result that `now('UTC')` would. It was added only for MySQL support and [`now`](#now-now) is the preferred usage.
|
||||
This function gives the same result that `now('UTC')` would. It was added only for MySQL support and [`now`](#now) is the preferred usage.
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
|
@ -12,7 +12,7 @@ For dictionaries created with [DDL queries](../../sql-reference/statements/creat
|
||||
|
||||
For information on connecting and configuring dictionaries, see [Dictionaries](../../sql-reference/dictionaries/index.md).
|
||||
|
||||
## dictGet, dictGetOrDefault, dictGetOrNull {#dictGet}
|
||||
## dictGet, dictGetOrDefault, dictGetOrNull
|
||||
|
||||
Retrieves values from a dictionary.
|
||||
|
||||
|
@ -4,6 +4,8 @@ sidebar_label: Geohash
|
||||
title: "Functions for Working with Geohash"
|
||||
---
|
||||
|
||||
## Geohash
|
||||
|
||||
[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earth’s surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer is the geohash string, the more precise is the geographic location.
|
||||
|
||||
If you need to manually convert geographic coordinates to geohash strings, you can use [geohash.org](http://geohash.org/).
|
||||
|
@ -4,6 +4,8 @@ sidebar_label: H3 Indexes
|
||||
title: "Functions for Working with H3 Indexes"
|
||||
---
|
||||
|
||||
## H3 Index
|
||||
|
||||
[H3](https://eng.uber.com/h3/) is a geographical indexing system where Earth’s surface divided into a grid of even hexagonal cells. This system is hierarchical, i. e. each hexagon on the top level ("parent") can be split into seven even but smaller ones ("children"), and so on.
|
||||
|
||||
The level of the hierarchy is called `resolution` and can receive a value from `0` till `15`, where `0` is the `base` level with the largest and coarsest cells.
|
||||
@ -16,7 +18,7 @@ The full description of the H3 system is available at [the Uber Engineering site
|
||||
|
||||
## h3IsValid
|
||||
|
||||
Verifies whether the number is a valid [H3](#h3index) index.
|
||||
Verifies whether the number is a valid [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -51,7 +53,7 @@ Result:
|
||||
|
||||
## h3GetResolution
|
||||
|
||||
Defines the resolution of the given [H3](#h3index) index.
|
||||
Defines the resolution of the given [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -86,7 +88,7 @@ Result:
|
||||
|
||||
## h3EdgeAngle
|
||||
|
||||
Calculates the average length of the [H3](#h3index) hexagon edge in grades.
|
||||
Calculates the average length of the [H3](#h3-index) hexagon edge in grades.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -100,7 +102,7 @@ h3EdgeAngle(resolution)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The average length of the [H3](#h3index) hexagon edge in grades. [Float64](../../data-types/float.md).
|
||||
- The average length of the [H3](#h3-index) hexagon edge in grades. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -120,7 +122,7 @@ Result:
|
||||
|
||||
## h3EdgeLengthM
|
||||
|
||||
Calculates the average length of the [H3](#h3index) hexagon edge in meters.
|
||||
Calculates the average length of the [H3](#h3-index) hexagon edge in meters.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -134,7 +136,7 @@ h3EdgeLengthM(resolution)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The average length of the [H3](#h3index) hexagon edge in meters. [Float64](../../data-types/float.md).
|
||||
- The average length of the [H3](#h3-index) hexagon edge in meters. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -154,7 +156,7 @@ Result:
|
||||
|
||||
## h3EdgeLengthKm
|
||||
|
||||
Calculates the average length of the [H3](#h3index) hexagon edge in kilometers.
|
||||
Calculates the average length of the [H3](#h3-index) hexagon edge in kilometers.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -168,7 +170,7 @@ h3EdgeLengthKm(resolution)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- The average length of the [H3](#h3index) hexagon edge in kilometers. [Float64](../../data-types/float.md).
|
||||
- The average length of the [H3](#h3-index) hexagon edge in kilometers. [Float64](../../data-types/float.md).
|
||||
|
||||
**Example**
|
||||
|
||||
@ -188,7 +190,7 @@ Result:
|
||||
|
||||
## geoToH3
|
||||
|
||||
Returns [H3](#h3index) point index `(lon, lat)` with specified resolution.
|
||||
Returns [H3](#h3-index) point index `(lon, lat)` with specified resolution.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -225,7 +227,7 @@ Result:
|
||||
|
||||
## h3ToGeo
|
||||
|
||||
Returns the centroid longitude and latitude corresponding to the provided [H3](#h3index) index.
|
||||
Returns the centroid longitude and latitude corresponding to the provided [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -294,7 +296,7 @@ Result:
|
||||
|
||||
## h3kRing
|
||||
|
||||
Lists all the [H3](#h3index) hexagons in the raduis of `k` from the given hexagon in random order.
|
||||
Lists all the [H3](#h3-index) hexagons in the raduis of `k` from the given hexagon in random order.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -335,7 +337,7 @@ Result:
|
||||
|
||||
## h3GetBaseCell
|
||||
|
||||
Returns the base cell number of the [H3](#h3index) index.
|
||||
Returns the base cell number of the [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -437,7 +439,7 @@ Result:
|
||||
|
||||
## h3IndexesAreNeighbors
|
||||
|
||||
Returns whether or not the provided [H3](#h3index) indexes are neighbors.
|
||||
Returns whether or not the provided [H3](#h3-index) indexes are neighbors.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -473,7 +475,7 @@ Result:
|
||||
|
||||
## h3ToChildren
|
||||
|
||||
Returns an array of child indexes for the given [H3](#h3index) index.
|
||||
Returns an array of child indexes for the given [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -508,7 +510,7 @@ Result:
|
||||
|
||||
## h3ToParent
|
||||
|
||||
Returns the parent (coarser) index containing the given [H3](#h3index) index.
|
||||
Returns the parent (coarser) index containing the given [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -609,7 +611,7 @@ Result:
|
||||
|
||||
## h3GetResolution
|
||||
|
||||
Returns the resolution of the [H3](#h3index) index.
|
||||
Returns the resolution of the [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -643,7 +645,7 @@ Result:
|
||||
|
||||
## h3IsResClassIII
|
||||
|
||||
Returns whether [H3](#h3index) index has a resolution with Class III orientation.
|
||||
Returns whether [H3](#h3-index) index has a resolution with Class III orientation.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -678,7 +680,7 @@ Result:
|
||||
|
||||
## h3IsPentagon
|
||||
|
||||
Returns whether this [H3](#h3index) index represents a pentagonal cell.
|
||||
Returns whether this [H3](#h3-index) index represents a pentagonal cell.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -713,7 +715,7 @@ Result:
|
||||
|
||||
## h3GetFaces
|
||||
|
||||
Returns icosahedron faces intersected by a given [H3](#h3index) index.
|
||||
Returns icosahedron faces intersected by a given [H3](#h3-index) index.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -815,7 +817,7 @@ Result:
|
||||
|
||||
## h3ToCenterChild
|
||||
|
||||
Returns the center child (finer) [H3](#h3index) index contained by given [H3](#h3index) at the given resolution.
|
||||
Returns the center child (finer) [H3](#h3-index) index contained by given [H3](#h3-index) at the given resolution.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -830,7 +832,7 @@ h3ToCenterChild(index, resolution)
|
||||
|
||||
**Returned values**
|
||||
|
||||
- [H3](#h3index) index of the center child contained by given [H3](#h3index) at the given resolution. [UInt64](../../data-types/int-uint.md).
|
||||
- [H3](#h3-index) index of the center child contained by given [H3](#h3-index) at the given resolution. [UInt64](../../data-types/int-uint.md).
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -5,6 +5,8 @@ sidebar_label: S2 Geometry
|
||||
|
||||
# Functions for Working with S2 Index
|
||||
|
||||
## S2Index
|
||||
|
||||
[S2](https://s2geometry.io/) is a geographical indexing system where all geographical data is represented on a three-dimensional sphere (similar to a globe).
|
||||
|
||||
In the S2 library points are represented as the S2 Index - a specific number which encodes internally a point on the surface of a unit sphere, unlike traditional (latitude, longitude) pairs. To get the S2 point index for a given point specified in the format (latitude, longitude) use the [geoToS2](#geotos2) function. Also, you can use the [s2ToGeo](#s2togeo) function for getting geographical coordinates corresponding to the specified S2 point index.
|
||||
|
@ -45,13 +45,13 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')
|
||||
|
||||
Calculates the MD4 from a string and returns the resulting set of bytes as FixedString(16).
|
||||
|
||||
## MD5 {#md5}
|
||||
## MD5
|
||||
|
||||
Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16).
|
||||
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead.
|
||||
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
|
||||
|
||||
## sipHash64 {#siphash64}
|
||||
## sipHash64
|
||||
|
||||
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
|
||||
|
||||
|
@ -295,7 +295,7 @@ Same as `toIPv6`, but if the IPv6 address has an invalid format, it returns null
|
||||
## toIPv6
|
||||
|
||||
Converts a string form of IPv6 address to [IPv6](../data-types/ipv6.md) type. If the IPv6 address has an invalid format, returns an empty value.
|
||||
Similar to [IPv6StringToNum](#ipv6stringtonums) function, which converts IPv6 address to binary format.
|
||||
Similar to [IPv6StringToNum](#ipv6stringtonum) function, which converts IPv6 address to binary format.
|
||||
|
||||
If the input string contains a valid IPv4 address, then the IPv6 equivalent of the IPv4 address is returned.
|
||||
|
||||
|
@ -5,10 +5,10 @@ sidebar_label: JSON
|
||||
---
|
||||
|
||||
There are two sets of functions to parse JSON:
|
||||
- [`simpleJSON*` (`visitParam*`)](#simplejson--visitparam-functions) which is made for parsing a limited subset of JSON extremely fast.
|
||||
- [`simpleJSON*` (`visitParam*`)](#simplejson-visitparam-functions) which is made for parsing a limited subset of JSON extremely fast.
|
||||
- [`JSONExtract*`](#jsonextract-functions) which is made for parsing ordinary JSON.
|
||||
|
||||
## simpleJSON / visitParam functions
|
||||
## simpleJSON (visitParam) functions
|
||||
|
||||
ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be. They try to do as little as possible to get the job done as quickly as possible.
|
||||
|
||||
|
@ -762,7 +762,7 @@ LIMIT 10
|
||||
|
||||
Given a size (number of bytes), this function returns a readable, rounded size with suffix (KB, MB, etc.) as string.
|
||||
|
||||
The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull).
|
||||
The opposite operations of this function are [parseReadableSize](#parsereadablesize), [parseReadableSizeOrZero](#parsereadablesizeorzero), and [parseReadableSizeOrNull](#parsereadablesizeornull).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -795,7 +795,7 @@ Result:
|
||||
|
||||
Given a size (number of bytes), this function returns a readable, rounded size with suffix (KiB, MiB, etc.) as string.
|
||||
|
||||
The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull).
|
||||
The opposite operations of this function are [parseReadableSize](#parsereadablesize), [parseReadableSizeOrZero](#parsereadablesizeorzero), and [parseReadableSizeOrNull](#parsereadablesizeornull).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -926,7 +926,7 @@ SELECT
|
||||
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes.
|
||||
If the function is unable to parse the input value, it throws an exception.
|
||||
|
||||
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
|
||||
The inverse operations of this function are [formatReadableSize](#formatreadablesize) and [formatReadableDecimalSize](#formatreadabledecimalsize).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -964,7 +964,7 @@ SELECT
|
||||
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes.
|
||||
If the function is unable to parse the input value, it returns `NULL`.
|
||||
|
||||
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
|
||||
The inverse operations of this function are [formatReadableSize](#formatreadablesize) and [formatReadableDecimalSize](#formatreadabledecimalsize).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1002,7 +1002,7 @@ SELECT
|
||||
|
||||
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. If the function is unable to parse the input value, it returns `0`.
|
||||
|
||||
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
|
||||
The inverse operations of this function are [formatReadableSize](#formatreadablesize) and [formatReadableDecimalSize](#formatreadabledecimalsize).
|
||||
|
||||
|
||||
**Syntax**
|
||||
@ -2711,7 +2711,7 @@ countDigits(x)
|
||||
- Number of digits. [UInt8](../data-types/int-uint.md#uint-ranges).
|
||||
|
||||
:::note
|
||||
For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow).
|
||||
For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#isdecimaloverflow).
|
||||
:::
|
||||
|
||||
**Example**
|
||||
@ -2803,7 +2803,7 @@ currentProfiles()
|
||||
|
||||
## enabledProfiles
|
||||
|
||||
Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#current-profiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file).
|
||||
Returns settings profiles, assigned to the current user both explicitly and implicitly. Explicitly assigned profiles are the same as returned by the [currentProfiles](#currentprofiles) function. Implicitly assigned profiles include parent profiles of other assigned profiles, profiles assigned via granted roles, profiles assigned via their own settings, and the main default profile (see the `default_profile` section in the main server configuration file).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -2916,11 +2916,11 @@ Result:
|
||||
└───────────────────────────┘
|
||||
```
|
||||
|
||||
## queryID {#queryID}
|
||||
## queryID
|
||||
|
||||
Returns the ID of the current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `query_id`.
|
||||
|
||||
In contrast to [initialQueryID](#initial-query-id) function, `queryID` can return different results on different shards (see the example).
|
||||
In contrast to [initialQueryID](#initialqueryid) function, `queryID` can return different results on different shards (see the example).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -2954,7 +2954,7 @@ Result:
|
||||
|
||||
Returns the ID of the initial current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `initial_query_id`.
|
||||
|
||||
In contrast to [queryID](#query-id) function, `initialQueryID` returns the same results on different shards (see example).
|
||||
In contrast to [queryID](#queryid) function, `initialQueryID` returns the same results on different shards (see example).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -3041,7 +3041,7 @@ shardCount()
|
||||
|
||||
**See Also**
|
||||
|
||||
- [shardNum()](#shard-num) function example also contains `shardCount()` function call.
|
||||
- [shardNum()](#shardnum) function example also contains `shardCount()` function call.
|
||||
|
||||
## getOSKernelVersion
|
||||
|
||||
|
@ -200,7 +200,7 @@ Banker's rounding is a method of rounding fractional numbers
|
||||
When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position.
|
||||
For example: 3.5 rounds up to 4, 2.5 rounds down to 2.
|
||||
It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest).
|
||||
The [round](#rounding_functions-round) function performs the same rounding for floating point numbers.
|
||||
The [round](#round) function performs the same rounding for floating point numbers.
|
||||
The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`.
|
||||
|
||||
In other cases, the function rounds numbers to the nearest integer.
|
||||
@ -274,7 +274,7 @@ roundBankers(10.755, 2) = 10.76
|
||||
|
||||
**See Also**
|
||||
|
||||
- [round](#rounding_functions-round)
|
||||
- [round](#round)
|
||||
|
||||
## roundToExp2
|
||||
|
||||
|
@ -1994,7 +1994,7 @@ Result:
|
||||
|
||||
## stringJaccardIndexUTF8
|
||||
|
||||
Like [stringJaccardIndex](#stringJaccardIndex) but for UTF8-encoded strings.
|
||||
Like [stringJaccardIndex](#stringjaccardindex) but for UTF8-encoded strings.
|
||||
|
||||
## editDistance
|
||||
|
||||
|
@ -262,7 +262,7 @@ Result:
|
||||
|
||||
## multiSearchAllPositionsUTF8
|
||||
|
||||
Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings.
|
||||
Like [multiSearchAllPositions](#multisearchallpositions) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -336,7 +336,7 @@ Result:
|
||||
|
||||
Like [`position`](#position) but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings.
|
||||
|
||||
Functions [`multiSearchFirstPositionCaseInsensitive`](#multiSearchFirstPositionCaseInsensitive), [`multiSearchFirstPositionUTF8`](#multiSearchFirstPositionUTF8) and [`multiSearchFirstPositionCaseInsensitiveUTF8`](#multiSearchFirstPositionCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
|
||||
Functions [`multiSearchFirstPositionCaseInsensitive`](#multisearchfirstpositioncaseinsensitive), [`multiSearchFirstPositionUTF8`](#multisearchfirstpositionutf8) and [`multiSearchFirstPositionCaseInsensitiveUTF8`](#multisearchfirstpositioncaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -370,7 +370,7 @@ Result:
|
||||
|
||||
## multiSearchFirstPositionCaseInsensitive
|
||||
|
||||
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but ignores case.
|
||||
Like [`multiSearchFirstPosition`](#multisearchfirstposition) but ignores case.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -404,7 +404,7 @@ Result:
|
||||
|
||||
## multiSearchFirstPositionUTF8
|
||||
|
||||
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings.
|
||||
Like [`multiSearchFirstPosition`](#multisearchfirstposition) but assumes `haystack` and `needle` to be UTF-8 strings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -440,7 +440,7 @@ Result:
|
||||
|
||||
## multiSearchFirstPositionCaseInsensitiveUTF8
|
||||
|
||||
Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings and ignores case.
|
||||
Like [`multiSearchFirstPosition`](#multisearchfirstposition) but assumes `haystack` and `needle` to be UTF-8 strings and ignores case.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -478,7 +478,7 @@ Result:
|
||||
|
||||
Returns the index `i` (starting from 1) of the leftmost found needle<sub>i</sub> in the string `haystack` and 0 otherwise.
|
||||
|
||||
Functions [`multiSearchFirstIndexCaseInsensitive`](#multiSearchFirstIndexCaseInsensitive), [`multiSearchFirstIndexUTF8`](#multiSearchFirstIndexUTF8) and [`multiSearchFirstIndexCaseInsensitiveUTF8`](#multiSearchFirstIndexCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
|
||||
Functions [`multiSearchFirstIndexCaseInsensitive`](#multisearchfirstindexcaseinsensitive), [`multiSearchFirstIndexUTF8`](#multisearchfirstindexutf8) and [`multiSearchFirstIndexCaseInsensitiveUTF8`](#multisearchfirstindexcaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -615,7 +615,7 @@ Result:
|
||||
|
||||
Returns 1, if at least one string needle<sub>i</sub> matches the string `haystack` and 0 otherwise.
|
||||
|
||||
Functions [`multiSearchAnyCaseInsensitive`](#multiSearchAnyCaseInsensitive), [`multiSearchAnyUTF8`](#multiSearchAnyUTF8) and []`multiSearchAnyCaseInsensitiveUTF8`](#multiSearchAnyCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function.
|
||||
Functions [`multiSearchAnyCaseInsensitive`](#multisearchanycaseinsensitive), [`multiSearchAnyUTF8`](#multisearchanyutf8) and [`multiSearchAnyCaseInsensitiveUTF8`](#multisearchanycaseinsensitiveutf8) provide case-insensitive and/or UTF-8 variants of this function.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -719,7 +719,7 @@ Result:
|
||||
|
||||
## multiSearchAnyCaseInsensitiveUTF8
|
||||
|
||||
Like [multiSearchAnyUTF8](#multiSearchAnyUTF8) but ignores case.
|
||||
Like [multiSearchAnyUTF8](#multisearchanyutf8) but ignores case.
|
||||
|
||||
*Syntax**
|
||||
|
||||
@ -880,7 +880,7 @@ extractAll(haystack, pattern)
|
||||
|
||||
Matches all groups of the `haystack` string using the `pattern` regular expression. Returns an array of arrays, where the first array includes all fragments matching the first group, the second array - matching the second group, etc.
|
||||
|
||||
This function is slower than [extractAllGroupsVertical](#extractallgroups-vertical).
|
||||
This function is slower than [extractAllGroupsVertical](#extractallgroupsvertical).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -952,7 +952,7 @@ Result:
|
||||
└────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## like {#like}
|
||||
## like
|
||||
|
||||
Returns whether string `haystack` matches the LIKE expression `pattern`.
|
||||
|
||||
@ -1215,7 +1215,7 @@ Result:
|
||||
|
||||
## ngramSearchCaseInsensitive
|
||||
|
||||
Provides a case-insensitive variant of [ngramSearch](#ngramSearch).
|
||||
Provides a case-insensitive variant of [ngramSearch](#ngramsearch).
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1630,7 +1630,7 @@ Result:
|
||||
|
||||
## hasSubsequenceCaseInsensitive
|
||||
|
||||
Like [hasSubsequence](#hasSubsequence) but searches case-insensitively.
|
||||
Like [hasSubsequence](#hassubsequence) but searches case-insensitively.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1665,7 +1665,7 @@ Result:
|
||||
|
||||
## hasSubsequenceUTF8
|
||||
|
||||
Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings.
|
||||
Like [hasSubsequence](#hassubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1700,7 +1700,7 @@ Result:
|
||||
|
||||
## hasSubsequenceCaseInsensitiveUTF8
|
||||
|
||||
Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively.
|
||||
Like [hasSubsequenceUTF8](#hassubsequenceutf8) but searches case-insensitively.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -10,7 +10,7 @@ sidebar_label: Type Conversion
|
||||
|
||||
ClickHouse generally uses the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion).
|
||||
|
||||
`to<type>` functions and [cast](#castx-t) behave differently in some cases, for example in case of [LowCardinality](../data-types/lowcardinality.md): [cast](#castx-t) removes [LowCardinality](../data-types/lowcardinality.md) trait `to<type>` functions don't. The same with [Nullable](../data-types/nullable.md), this behaviour is not compatible with SQL standard, and it can be changed using [cast_keep_nullable](../../operations/settings/settings.md/#cast_keep_nullable) setting.
|
||||
`to<type>` functions and [cast](#cast) behave differently in some cases, for example in case of [LowCardinality](../data-types/lowcardinality.md): [cast](#cast) removes [LowCardinality](../data-types/lowcardinality.md) trait `to<type>` functions don't. The same with [Nullable](../data-types/nullable.md), this behaviour is not compatible with SQL standard, and it can be changed using [cast_keep_nullable](../../operations/settings/settings.md/#cast_keep_nullable) setting.
|
||||
|
||||
:::note
|
||||
Be aware of potential data loss if values of a datatype are converted to a smaller datatype (for example from `Int64` to `Int32`) or between
|
||||
@ -70,7 +70,7 @@ Integer value in the `Int8`, `Int16`, `Int32`, `Int64`, `Int128` or `Int256` dat
|
||||
|
||||
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
|
||||
|
||||
The behavior of functions for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
|
||||
The behavior of functions for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. Remember about [numeric conversions issues](#common-issues-with-data-conversion), when using the functions.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -169,7 +169,7 @@ Converts an input value to the [UInt](../data-types/int-uint.md) data type. This
|
||||
|
||||
Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers.
|
||||
|
||||
The behavior of functions for negative arguments and for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#numeric-conversion-issues), when using the functions.
|
||||
The behavior of functions for negative arguments and for the [NaN and Inf](../data-types/float.md/#data_type-float-nan-inf) arguments is undefined. If you pass a string with a negative number, for example `'-32'`, ClickHouse raises an exception. Remember about [numeric conversions issues](#common-issues-with-data-conversion), when using the functions.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -996,7 +996,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt8
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt8. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1034,7 +1034,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt16
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt16. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1072,7 +1072,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt32
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1110,7 +1110,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt64
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1148,7 +1148,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt128
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt128. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1186,7 +1186,7 @@ Result:
|
||||
|
||||
## reinterpretAsUInt256
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type UInt256. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1224,7 +1224,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt8
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int8. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1262,7 +1262,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt16
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int16. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1300,7 +1300,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt32
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1338,7 +1338,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt64
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1376,7 +1376,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt128
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int128. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1414,7 +1414,7 @@ Result:
|
||||
|
||||
## reinterpretAsInt256
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Int256. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1452,7 +1452,7 @@ Result:
|
||||
|
||||
## reinterpretAsFloat32
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Float32. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1486,7 +1486,7 @@ Result:
|
||||
|
||||
## reinterpretAsFloat64
|
||||
|
||||
Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#castx-t), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
Performs byte reinterpretation by treating the input value as a value of type Float64. Unlike [`CAST`](#cast), the function does not attempt to preserve the original value - if the target type is not able to represent the input type, the output is meaningless.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -1730,7 +1730,7 @@ Result:
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## reinterpret(x, T)
|
||||
## reinterpret
|
||||
|
||||
Uses the same source in-memory bytes sequence for `x` value and reinterprets it to destination type.
|
||||
|
||||
@ -1766,9 +1766,9 @@ Result:
|
||||
└─────────────┴──────────────┴───────────────┘
|
||||
```
|
||||
|
||||
## CAST(x, T)
|
||||
## CAST
|
||||
|
||||
Converts an input value to the specified data type. Unlike the [reinterpret](#type_conversion_function-reinterpret) function, `CAST` tries to present the same value using the new data type. If the conversion can not be done then an exception is raised.
|
||||
Converts an input value to the specified data type. Unlike the [reinterpret](#reinterpret) function, `CAST` tries to present the same value using the new data type. If the conversion can not be done then an exception is raised.
|
||||
Several syntax variants are supported.
|
||||
|
||||
**Syntax**
|
||||
@ -1875,7 +1875,7 @@ Result:
|
||||
|
||||
Converts `x` to the `T` data type.
|
||||
|
||||
The difference from [cast(x, T)](#type_conversion_function-cast) is that `accurateCast` does not allow overflow of numeric types during cast if type value `x` does not fit the bounds of type `T`. For example, `accurateCast(-1, 'UInt8')` throws an exception.
|
||||
The difference from [cast](#cast) is that `accurateCast` does not allow overflow of numeric types during cast if type value `x` does not fit the bounds of type `T`. For example, `accurateCast(-1, 'UInt8')` throws an exception.
|
||||
|
||||
**Example**
|
||||
|
||||
@ -2061,7 +2061,7 @@ Result:
|
||||
└───────────────────────────┴──────────────────────────────┘
|
||||
```
|
||||
|
||||
## parseDateTime {#type_conversion_functions-parseDateTime}
|
||||
## parseDateTime
|
||||
|
||||
Converts a [String](../data-types/string.md) to [DateTime](../data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format).
|
||||
|
||||
@ -2102,15 +2102,15 @@ Alias: `TO_TIMESTAMP`.
|
||||
|
||||
## parseDateTimeOrZero
|
||||
|
||||
Same as for [parseDateTime](#type_conversion_functions-parseDateTime) except that it returns zero date when it encounters a date format that cannot be processed.
|
||||
Same as for [parseDateTime](#parsedatetime) except that it returns zero date when it encounters a date format that cannot be processed.
|
||||
|
||||
## parseDateTimeOrNull
|
||||
|
||||
Same as for [parseDateTime](#type_conversion_functions-parseDateTime) except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
Same as for [parseDateTime](#parsedatetime) except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
|
||||
Alias: `str_to_date`.
|
||||
|
||||
## parseDateTimeInJodaSyntax {#type_conversion_functions-parseDateTimeInJodaSyntax}
|
||||
## parseDateTimeInJodaSyntax
|
||||
|
||||
Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax.
|
||||
|
||||
@ -2151,11 +2151,11 @@ SELECT parseDateTimeInJodaSyntax('2023-02-24 14:53:31', 'yyyy-MM-dd HH:mm:ss', '
|
||||
|
||||
## parseDateTimeInJodaSyntaxOrZero
|
||||
|
||||
Same as for [parseDateTimeInJodaSyntax](#type_conversion_functions-parseDateTimeInJodaSyntax) except that it returns zero date when it encounters a date format that cannot be processed.
|
||||
Same as for [parseDateTimeInJodaSyntax](#parsedatetimeinjodasyntax) except that it returns zero date when it encounters a date format that cannot be processed.
|
||||
|
||||
## parseDateTimeInJodaSyntaxOrNull
|
||||
|
||||
Same as for [parseDateTimeInJodaSyntax](#type_conversion_functions-parseDateTimeInJodaSyntax) except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
Same as for [parseDateTimeInJodaSyntax](#parsedatetimeinjodasyntax) except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
|
||||
## parseDateTimeBestEffort
|
||||
## parseDateTime32BestEffort
|
||||
@ -2313,11 +2313,11 @@ Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it r
|
||||
|
||||
## parseDateTimeBestEffortUSOrNull
|
||||
|
||||
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortus) function except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
|
||||
## parseDateTimeBestEffortUSOrZero
|
||||
|
||||
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns zero date (`1970-01-01`) or zero date with time (`1970-01-01 00:00:00`) when it encounters a date format that cannot be processed.
|
||||
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortus) function except that it returns zero date (`1970-01-01`) or zero date with time (`1970-01-01 00:00:00`) when it encounters a date format that cannot be processed.
|
||||
|
||||
## parseDateTime64BestEffort
|
||||
|
||||
@ -2389,7 +2389,7 @@ Same as for [parseDateTime64BestEffort](#parsedatetime64besteffort), except that
|
||||
|
||||
Converts input parameter to the [LowCardinality](../data-types/lowcardinality.md) version of same data type.
|
||||
|
||||
To convert data from the `LowCardinality` data type use the [CAST](#type_conversion_function-cast) function. For example, `CAST(x as String)`.
|
||||
To convert data from the `LowCardinality` data type use the [CAST](#cast) function. For example, `CAST(x as String)`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -150,7 +150,7 @@ The function also works for [Arrays](array-functions.md#function-empty) and [Str
|
||||
|
||||
**Example**
|
||||
|
||||
To generate the UUID value, ClickHouse provides the [generateUUIDv4](#uuid-function-generate) function.
|
||||
To generate the UUID value, ClickHouse provides the [generateUUIDv4](#generateuuidv4) function.
|
||||
|
||||
Query:
|
||||
|
||||
@ -190,7 +190,7 @@ The function also works for [Arrays](array-functions.md#function-notempty) or [S
|
||||
|
||||
**Example**
|
||||
|
||||
To generate the UUID value, ClickHouse provides the [generateUUIDv4](#uuid-function-generate) function.
|
||||
To generate the UUID value, ClickHouse provides the [generateUUIDv4](#generateuuidv4) function.
|
||||
|
||||
Query:
|
||||
|
||||
|
@ -235,7 +235,7 @@ If `some_predicate` is not selective enough, it will return a large amount of da
|
||||
|
||||
### Distributed Subqueries and max_parallel_replicas
|
||||
|
||||
When [max_parallel_replicas](#settings-max_parallel_replicas) is greater than 1, distributed queries are further transformed.
|
||||
When [max_parallel_replicas](#distributed-subqueries-and-max_parallel_replicas) is greater than 1, distributed queries are further transformed.
|
||||
|
||||
For example, the following:
|
||||
|
||||
@ -255,7 +255,7 @@ where `M` is between `1` and `3` depending on which replica the local query is e
|
||||
|
||||
These settings affect every MergeTree-family table in the query and have the same effect as applying `SAMPLE 1/3 OFFSET (M-1)/3` on each table.
|
||||
|
||||
Therefore adding the [max_parallel_replicas](#settings-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if `local_table_2` does not have a sampling key, incorrect results will be produced. The same rule applies to `JOIN`.
|
||||
Therefore adding the [max_parallel_replicas](#distributed-subqueries-and-max_parallel_replicas) setting will only produce correct results if both tables have the same replication scheme and are sampled by UserID or a subkey of it. In particular, if `local_table_2` does not have a sampling key, incorrect results will be produced. The same rule applies to `JOIN`.
|
||||
|
||||
One workaround if `local_table_2` does not meet the requirements, is to use `GLOBAL IN` or `GLOBAL JOIN`.
|
||||
|
||||
|
@ -108,7 +108,7 @@ ALTER TABLE visits RENAME COLUMN webBrowser TO browser
|
||||
CLEAR COLUMN [IF EXISTS] name IN PARTITION partition_name
|
||||
```
|
||||
|
||||
Resets all data in a column for a specified partition. Read more about setting the partition name in the section [How to set the partition expression](partition.md/#how-to-set-partition-expression).
|
||||
Resets all data in a column for a specified partition. Read more about setting the partition name in the section [How to set the partition expression](../alter/partition.md/#how-to-set-partition-expression).
|
||||
|
||||
If the `IF EXISTS` clause is specified, the query won’t return an error if the column does not exist.
|
||||
|
||||
@ -173,7 +173,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
|
||||
|
||||
Changing the column type is the only complex action – it changes the contents of files with data. For large tables, this may take a long time.
|
||||
|
||||
The query also can change the order of the columns using `FIRST | AFTER` clause, see [ADD COLUMN](#alter_add-column) description, but column type is mandatory in this case.
|
||||
The query also can change the order of the columns using `FIRST | AFTER` clause, see [ADD COLUMN](#add-column) description, but column type is mandatory in this case.
|
||||
|
||||
Example:
|
||||
|
||||
|
@ -31,7 +31,7 @@ The following operations with [partitions](/docs/en/engines/table-engines/merget
|
||||
ALTER TABLE table_name [ON CLUSTER cluster] DETACH PARTITION|PART partition_expr
|
||||
```
|
||||
|
||||
Moves all data for the specified partition to the `detached` directory. The server forgets about the detached data partition as if it does not exist. The server will not know about this data until you make the [ATTACH](#alter_attach-partition) query.
|
||||
Moves all data for the specified partition to the `detached` directory. The server forgets about the detached data partition as if it does not exist. The server will not know about this data until you make the [ATTACH](#attach-partitionpart) query.
|
||||
|
||||
Example:
|
||||
|
||||
@ -252,7 +252,7 @@ Downloads a partition from another server. This query only works for the replica
|
||||
The query does the following:
|
||||
|
||||
1. Downloads the partition|part from the specified shard. In ‘path-in-zookeeper’ you must specify a path to the shard in ZooKeeper.
|
||||
2. Then the query puts the downloaded data to the `detached` directory of the `table_name` table. Use the [ATTACH PARTITION\|PART](#alter_attach-partition) query to add the data to the table.
|
||||
2. Then the query puts the downloaded data to the `detached` directory of the `table_name` table. Use the [ATTACH PARTITION\|PART](#attach-partitionpart) query to add the data to the table.
|
||||
|
||||
For example:
|
||||
|
||||
@ -353,7 +353,7 @@ You can specify the partition expression in `ALTER ... PARTITION` queries in dif
|
||||
- Using the keyword `ALL`. It can be used only with DROP/DETACH/ATTACH. For example, `ALTER TABLE visits ATTACH PARTITION ALL`.
|
||||
- As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
|
||||
- Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
|
||||
- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.
|
||||
- In the [ALTER ATTACH PART](#attach-partitionpart) and [DROP DETACHED PART](#drop-detached-partitionpart) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.
|
||||
|
||||
Usage of quotes when specifying the partition depends on the type of partition expression. For example, for the `String` type, you have to specify its name in quotes (`'`). For the `Date` and `Int*` types no quotes are needed.
|
||||
|
||||
|
@ -17,8 +17,8 @@ By default, tables are created only on the current server. Distributed DDL queri
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
(
|
||||
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [compression_codec] [TTL expr1] [COMMENT 'comment for column'],
|
||||
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [compression_codec] [TTL expr2] [COMMENT 'comment for column'],
|
||||
name1 [type1] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr1] [COMMENT 'comment for column'] [compression_codec] [TTL expr1],
|
||||
name2 [type2] [NULL|NOT NULL] [DEFAULT|MATERIALIZED|EPHEMERAL|ALIAS expr2] [COMMENT 'comment for column'] [compression_codec] [TTL expr2],
|
||||
...
|
||||
) ENGINE = engine
|
||||
COMMENT 'comment for table'
|
||||
|
@ -6,7 +6,7 @@ sidebar_label: VIEW
|
||||
|
||||
# CREATE VIEW
|
||||
|
||||
Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-experimental), and [window](#window-view-experimental) (live view and window view are experimental features).
|
||||
Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-deprecated), and [window](#window-view-experimental) (live view and window view are experimental features).
|
||||
|
||||
## Normal View
|
||||
|
||||
|
@ -33,7 +33,7 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US
|
||||
- `role` — ClickHouse user role.
|
||||
- `user` — ClickHouse user account.
|
||||
|
||||
The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`.
|
||||
The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option) privilege to `user` or `role`.
|
||||
The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if is not specified it appends roles.
|
||||
|
||||
## Grant Current Grants Syntax
|
||||
@ -201,7 +201,7 @@ Hierarchy of privileges:
|
||||
- `HDFS`
|
||||
- `S3`
|
||||
- [dictGet](#dictget)
|
||||
- [displaySecretsInShowAndSelect](#display-secrets)
|
||||
- [displaySecretsInShowAndSelect](#displaysecretsinshowandselect)
|
||||
- [NAMED COLLECTION ADMIN](#named-collection-admin)
|
||||
- `CREATE NAMED COLLECTION`
|
||||
- `DROP NAMED COLLECTION`
|
||||
@ -498,7 +498,7 @@ Privilege level: `DICTIONARY`.
|
||||
- `GRANT dictGet ON mydictionary TO john`
|
||||
|
||||
|
||||
### displaySecretsInShowAndSelect {#display-secrets}
|
||||
### displaySecretsInShowAndSelect
|
||||
|
||||
Allows a user to view secrets in `SHOW` and `SELECT` queries if both
|
||||
[`display_secrets_in_show_and_select` server setting](../../operations/server-configuration-parameters/settings#display_secrets_in_show_and_select)
|
||||
|
@ -27,14 +27,14 @@ The features of data sampling are listed below:
|
||||
|
||||
For the `SAMPLE` clause the following syntax is supported:
|
||||
|
||||
| SAMPLE Clause Syntax | Description |
|
||||
|----------------------|------------------------------|
|
||||
| `SAMPLE k` | Here `k` is the number from 0 to 1. The query is executed on `k` fraction of data. For example, `SAMPLE 0.1` runs the query on 10% of data. [Read more](#select-sample-k) |
|
||||
| `SAMPLE n` | Here `n` is a sufficiently large integer. The query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. [Read more](#select-sample-n) |
|
||||
| `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1. The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#select-sample-offset) |
|
||||
| SAMPLE Clause Syntax | Description |
|
||||
|----------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| `SAMPLE k` | Here `k` is the number from 0 to 1. The query is executed on `k` fraction of data. For example, `SAMPLE 0.1` runs the query on 10% of data. [Read more](#sample-k) |
|
||||
| `SAMPLE n` | Here `n` is a sufficiently large integer. The query is executed on a sample of at least `n` rows (but not significantly more than this). For example, `SAMPLE 10000000` runs the query on a minimum of 10,000,000 rows. [Read more](#sample-n) |
|
||||
| `SAMPLE k OFFSET m` | Here `k` and `m` are the numbers from 0 to 1. The query is executed on a sample of `k` fraction of the data. The data used for the sample is offset by `m` fraction. [Read more](#sample-k-offset-m) |
|
||||
|
||||
|
||||
## SAMPLE K {#select-sample-k}
|
||||
## SAMPLE K
|
||||
|
||||
Here `k` is the number from 0 to 1 (both fractional and decimal notations are supported). For example, `SAMPLE 1/2` or `SAMPLE 0.5`.
|
||||
|
||||
@ -54,7 +54,7 @@ ORDER BY PageViews DESC LIMIT 1000
|
||||
|
||||
In this example, the query is executed on a sample from 0.1 (10%) of data. Values of aggregate functions are not corrected automatically, so to get an approximate result, the value `count()` is manually multiplied by 10.
|
||||
|
||||
## SAMPLE N {#select-sample-n}
|
||||
## SAMPLE N
|
||||
|
||||
Here `n` is a sufficiently large integer. For example, `SAMPLE 10000000`.
|
||||
|
||||
@ -90,7 +90,7 @@ FROM visits
|
||||
SAMPLE 10000000
|
||||
```
|
||||
|
||||
## SAMPLE K OFFSET M {#select-sample-offset}
|
||||
## SAMPLE K OFFSET M
|
||||
|
||||
Here `k` and `m` are numbers from 0 to 1. Examples are shown below.
|
||||
|
||||
|
@ -174,7 +174,7 @@ Aborts ClickHouse process (like `kill -9 {$ pid_clickhouse-server}`)
|
||||
|
||||
## Managing Distributed Tables
|
||||
|
||||
ClickHouse can manage [distributed](../../engines/table-engines/special/distributed.md) tables. When a user inserts data into these tables, ClickHouse first creates a queue of the data that should be sent to cluster nodes, then asynchronously sends it. You can manage queue processing with the [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [FLUSH DISTRIBUTED](#query_language-system-flush-distributed), and [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) queries. You can also synchronously insert distributed data with the [distributed_foreground_insert](../../operations/settings/settings.md#distributed_foreground_insert) setting.
|
||||
ClickHouse can manage [distributed](../../engines/table-engines/special/distributed.md) tables. When a user inserts data into these tables, ClickHouse first creates a queue of the data that should be sent to cluster nodes, then asynchronously sends it. You can manage queue processing with the [STOP DISTRIBUTED SENDS](#stop-distributed-sends), [FLUSH DISTRIBUTED](#flush-distributed), and [START DISTRIBUTED SENDS](#start-distributed-sends) queries. You can also synchronously insert distributed data with the [distributed_foreground_insert](../../operations/settings/settings.md#distributed_foreground_insert) setting.
|
||||
|
||||
### STOP DISTRIBUTED SENDS
|
||||
|
||||
|
@ -54,11 +54,11 @@ Identifiers are:
|
||||
- Cluster, database, table, partition, and column names.
|
||||
- Functions.
|
||||
- Data types.
|
||||
- [Expression aliases](#expression_aliases).
|
||||
- [Expression aliases](#expression-aliases).
|
||||
|
||||
Identifiers can be quoted or non-quoted. The latter is preferred.
|
||||
|
||||
Non-quoted identifiers must match the regex `^[a-zA-Z_][0-9a-zA-Z_]*$` and can not be equal to [keywords](#syntax-keywords). Examples: `x`, `_1`, `X_y__Z123_`.
|
||||
Non-quoted identifiers must match the regex `^[a-zA-Z_][0-9a-zA-Z_]*$` and can not be equal to [keywords](#keywords). Examples: `x`, `_1`, `X_y__Z123_`.
|
||||
|
||||
If you want to use identifiers the same as keywords or you want to use other symbols in identifiers, quote it using double quotes or backticks, for example, `"id"`, `` `id` ``.
|
||||
|
||||
|
@ -18,7 +18,7 @@ file([path_to_archive ::] path [,format] [,structure] [,compression])
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports in read-only mode the following [globs](#globs_in_path): `*`, `?`, `{abc,def}` (with `'abc'` and `'def'` being strings) and `{N..M}` (with `N` and `M` being numbers).
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports in read-only mode the following [globs](#globs-in-path): `*`, `?`, `{abc,def}` (with `'abc'` and `'def'` being strings) and `{N..M}` (with `N` and `M` being numbers).
|
||||
- `path_to_archive` - The relative path to a zip/tar/7z archive. Supports the same globs as `path`.
|
||||
- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file.
|
||||
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
@ -128,7 +128,7 @@ Reading data from `table.csv`, located in `archive1.zip` or/and `archive2.zip`:
|
||||
SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv');
|
||||
```
|
||||
|
||||
## Globs in path {#globs_in_path}
|
||||
## Globs in path
|
||||
|
||||
Paths may use globbing. Files must match the whole path pattern, not only the suffix or prefix.
|
||||
|
||||
|
@ -22,7 +22,7 @@ fileCluster(cluster_name, path[, format, structure, compression_method])
|
||||
**Arguments**
|
||||
|
||||
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file also supports [globs](#globs_in_path).
|
||||
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file also supports [globs](#globs-in-path).
|
||||
- `format` — [Format](../../interfaces/formats.md#formats) of the files. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md).
|
||||
- `compression_method` — Compression method. Supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
|
||||
@ -74,7 +74,7 @@ SELECT * FROM fileCluster('my_cluster', 'file{1,2}.csv', 'CSV', 'i UInt32, s Str
|
||||
```
|
||||
|
||||
|
||||
## Globs in Path {#globs_in_path}
|
||||
## Globs in Path
|
||||
|
||||
All patterns supported by [File](../../sql-reference/table-functions/file.md#globs-in-path) table function are supported by FileCluster.
|
||||
|
||||
|
@ -1,5 +1,7 @@
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
|
||||
#include <Columns/ColumnConst.h>
|
||||
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/FieldVisitorToString.h>
|
||||
|
||||
@ -58,12 +60,20 @@ ColumnsWithTypeAndName FunctionNode::getArgumentColumns() const
|
||||
|
||||
ColumnWithTypeAndName argument_column;
|
||||
|
||||
auto * constant = argument->as<ConstantNode>();
|
||||
if (isNameOfInFunction(function_name) && i == 1)
|
||||
{
|
||||
argument_column.type = std::make_shared<DataTypeSet>();
|
||||
if (constant)
|
||||
{
|
||||
/// Created but not filled for the analysis during function resolution.
|
||||
FutureSetPtr empty_set;
|
||||
argument_column.column = ColumnConst::create(ColumnSet::create(1, empty_set), 1);
|
||||
}
|
||||
}
|
||||
else
|
||||
argument_column.type = argument->getResultType();
|
||||
|
||||
auto * constant = argument->as<ConstantNode>();
|
||||
if (constant && !isNotCreatable(argument_column.type))
|
||||
argument_column.column = argument_column.type->createColumnConst(1, constant->getValue());
|
||||
|
||||
|
@ -10,9 +10,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
InterpolateNode::InterpolateNode(QueryTreeNodePtr expression_, QueryTreeNodePtr interpolate_expression_)
|
||||
InterpolateNode::InterpolateNode(std::shared_ptr<IdentifierNode> expression_, QueryTreeNodePtr interpolate_expression_)
|
||||
: IQueryTreeNode(children_size)
|
||||
{
|
||||
if (expression_)
|
||||
expression_name = expression_->getIdentifier().getFullName();
|
||||
|
||||
children[expression_child_index] = std::move(expression_);
|
||||
children[interpolate_expression_child_index] = std::move(interpolate_expression_);
|
||||
}
|
||||
@ -41,13 +44,23 @@ void InterpolateNode::updateTreeHashImpl(HashState &, CompareOptions) const
|
||||
|
||||
QueryTreeNodePtr InterpolateNode::cloneImpl() const
|
||||
{
|
||||
return std::make_shared<InterpolateNode>(nullptr /*expression*/, nullptr /*interpolate_expression*/);
|
||||
auto cloned = std::make_shared<InterpolateNode>(nullptr /*expression*/, nullptr /*interpolate_expression*/);
|
||||
cloned->expression_name = expression_name;
|
||||
return cloned;
|
||||
}
|
||||
|
||||
ASTPtr InterpolateNode::toASTImpl(const ConvertToASTOptions & options) const
|
||||
{
|
||||
auto result = std::make_shared<ASTInterpolateElement>();
|
||||
result->column = getExpression()->toAST(options)->getColumnName();
|
||||
|
||||
/// Interpolate parser supports only identifier node.
|
||||
/// In case of alias, identifier is replaced to expression, which can't be parsed.
|
||||
/// In this case, keep original alias name.
|
||||
if (const auto * identifier = getExpression()->as<IdentifierNode>())
|
||||
result->column = identifier->toAST(options)->getColumnName();
|
||||
else
|
||||
result->column = expression_name;
|
||||
|
||||
result->children.push_back(getInterpolateExpression()->toAST(options));
|
||||
result->expr = result->children.back();
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/IdentifierNode.h>
|
||||
#include <Analyzer/ListNode.h>
|
||||
|
||||
namespace DB
|
||||
@ -19,7 +19,7 @@ class InterpolateNode final : public IQueryTreeNode
|
||||
{
|
||||
public:
|
||||
/// Initialize interpolate node with expression and interpolate expression
|
||||
explicit InterpolateNode(QueryTreeNodePtr expression_, QueryTreeNodePtr interpolate_expression_);
|
||||
explicit InterpolateNode(std::shared_ptr<IdentifierNode> expression_, QueryTreeNodePtr interpolate_expression_);
|
||||
|
||||
/// Get expression to interpolate
|
||||
const QueryTreeNodePtr & getExpression() const
|
||||
@ -61,6 +61,9 @@ protected:
|
||||
|
||||
ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
|
||||
|
||||
/// Initial name from column identifier.
|
||||
std::string expression_name;
|
||||
|
||||
private:
|
||||
static constexpr size_t expression_child_index = 0;
|
||||
static constexpr size_t interpolate_expression_child_index = 1;
|
||||
|
@ -51,7 +51,7 @@ public:
|
||||
using Base = InDepthQueryTreeVisitorWithContext<AggregateFunctionsArithmericOperationsVisitor>;
|
||||
using Base::Base;
|
||||
|
||||
void leaveImpl(QueryTreeNodePtr & node)
|
||||
void enterImpl(QueryTreeNodePtr & node)
|
||||
{
|
||||
if (!getSettings().optimize_arithmetic_operations_in_aggregate_functions)
|
||||
return;
|
||||
|
@ -551,14 +551,25 @@ private:
|
||||
|
||||
in_function->getArguments().getNodes() = std::move(in_arguments);
|
||||
in_function->resolveAsFunction(in_function_resolver);
|
||||
|
||||
DataTypePtr result_type = in_function->getResultType();
|
||||
const auto * type_low_cardinality = typeid_cast<const DataTypeLowCardinality *>(result_type.get());
|
||||
if (type_low_cardinality)
|
||||
result_type = type_low_cardinality->getDictionaryType();
|
||||
/** For `k :: UInt8`, expression `k = 1 OR k = NULL` with result type Nullable(UInt8)
|
||||
* is replaced with `k IN (1, NULL)` with result type UInt8.
|
||||
* Convert it back to Nullable(UInt8).
|
||||
* And for `k :: LowCardinality(UInt8)`, the transformation of `k IN (1, NULL)` results in type LowCardinality(UInt8).
|
||||
* Convert it to LowCardinality(Nullable(UInt8)).
|
||||
*/
|
||||
if (is_any_nullable && !in_function->getResultType()->isNullable())
|
||||
if (is_any_nullable && !result_type->isNullable())
|
||||
{
|
||||
auto nullable_result_type = std::make_shared<DataTypeNullable>(in_function->getResultType());
|
||||
auto in_function_nullable = createCastFunction(std::move(in_function), std::move(nullable_result_type), getContext());
|
||||
DataTypePtr new_result_type = std::make_shared<DataTypeNullable>(result_type);
|
||||
if (type_low_cardinality)
|
||||
{
|
||||
new_result_type = std::make_shared<DataTypeLowCardinality>(new_result_type);
|
||||
}
|
||||
auto in_function_nullable = createCastFunction(std::move(in_function), std::move(new_result_type), getContext());
|
||||
or_operands.push_back(std::move(in_function_nullable));
|
||||
}
|
||||
else
|
||||
|
@ -75,7 +75,12 @@ struct ScopeAliases
|
||||
if (jt == transitive_aliases.end())
|
||||
return {};
|
||||
|
||||
key = &(getKey(jt->second, find_option));
|
||||
const auto & new_key = getKey(jt->second, find_option);
|
||||
/// Ignore potential cyclic aliases.
|
||||
if (new_key == *key)
|
||||
return {};
|
||||
|
||||
key = &new_key;
|
||||
it = alias_map.find(*key);
|
||||
}
|
||||
|
||||
|
@ -54,9 +54,9 @@ namespace
|
||||
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
|
||||
settings.auth_settings.region,
|
||||
context->getRemoteHostFilter(),
|
||||
static_cast<unsigned>(local_settings.s3_max_redirects),
|
||||
static_cast<unsigned>(local_settings.backup_restore_s3_retry_attempts),
|
||||
local_settings.enable_s3_requests_logging,
|
||||
static_cast<unsigned>(global_settings.s3_max_redirects),
|
||||
static_cast<unsigned>(global_settings.s3_retry_attempts),
|
||||
global_settings.enable_s3_requests_logging,
|
||||
/* for_disk_s3 = */ false,
|
||||
request_settings.get_request_throttler,
|
||||
request_settings.put_request_throttler,
|
||||
|
@ -643,7 +643,8 @@ The server successfully detected this situation and will download merged part fr
|
||||
\
|
||||
M(ServerStartupMilliseconds, "Time elapsed from starting server to listening to sockets in milliseconds")\
|
||||
M(IOUringSQEsSubmitted, "Total number of io_uring SQEs submitted") \
|
||||
M(IOUringSQEsResubmits, "Total number of io_uring SQE resubmits performed") \
|
||||
M(IOUringSQEsResubmitsAsync, "Total number of asynchronous io_uring SQE resubmits performed") \
|
||||
M(IOUringSQEsResubmitsSync, "Total number of synchronous io_uring SQE resubmits performed") \
|
||||
M(IOUringCQEsCompleted, "Total number of successfully completed io_uring CQEs") \
|
||||
M(IOUringCQEsFailed, "Total number of completed io_uring CQEs with failures") \
|
||||
\
|
||||
|
@ -1,12 +1,39 @@
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <base/sleep.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/HostResolvePool.h>
|
||||
#include "base/defines.h"
|
||||
#include <base/defines.h>
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <optional>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
|
||||
using namespace std::literals::chrono_literals;
|
||||
|
||||
|
||||
auto now()
|
||||
{
|
||||
return std::chrono::steady_clock::now();
|
||||
}
|
||||
|
||||
void sleep_until(auto time_point)
|
||||
{
|
||||
std::this_thread::sleep_until(time_point);
|
||||
}
|
||||
|
||||
void sleep_for(auto duration)
|
||||
{
|
||||
std::this_thread::sleep_for(duration);
|
||||
}
|
||||
|
||||
size_t toMilliseconds(auto duration)
|
||||
{
|
||||
return std::chrono::duration_cast<std::chrono::milliseconds>(duration).count();
|
||||
}
|
||||
|
||||
const auto epsilon = 500us;
|
||||
|
||||
class ResolvePoolMock : public DB::HostResolver
|
||||
{
|
||||
@ -267,13 +294,14 @@ TEST_F(ResolvePoolTest, CanFailAndHeal)
|
||||
|
||||
TEST_F(ResolvePoolTest, CanExpire)
|
||||
{
|
||||
auto resolver = make_resolver();
|
||||
auto history = 5ms;
|
||||
auto resolver = make_resolver(toMilliseconds(history));
|
||||
|
||||
auto expired_addr = resolver->resolve();
|
||||
ASSERT_TRUE(addresses.contains(*expired_addr));
|
||||
|
||||
addresses.erase(*expired_addr);
|
||||
sleepForSeconds(1);
|
||||
|
||||
sleep_for(history + epsilon);
|
||||
|
||||
for (size_t i = 0; i < 1000; ++i)
|
||||
{
|
||||
@ -310,12 +338,19 @@ TEST_F(ResolvePoolTest, DuplicatesInAddresses)
|
||||
ASSERT_EQ(3, DB::CurrentThread::getProfileEvents()[metrics.discovered]);
|
||||
}
|
||||
|
||||
void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses, auto & failed_addr, auto & metrics)
|
||||
void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses, auto & failed_addr, auto & metrics, auto deadline)
|
||||
{
|
||||
ASSERT_EQ(iteration, DB::CurrentThread::getProfileEvents()[metrics.failed]);
|
||||
for (size_t i = 0; i < 100; ++i)
|
||||
{
|
||||
auto next_addr = resolver->resolve();
|
||||
|
||||
if (now() > deadline)
|
||||
{
|
||||
ASSERT_NE(i, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
ASSERT_TRUE(addresses.contains(*next_addr));
|
||||
ASSERT_NE(*next_addr, *failed_addr);
|
||||
}
|
||||
@ -323,52 +358,60 @@ void check_no_failed_address(size_t iteration, auto & resolver, auto & addresses
|
||||
|
||||
TEST_F(ResolvePoolTest, BannedForConsiquenceFail)
|
||||
{
|
||||
size_t history_ms = 5;
|
||||
auto resolver = make_resolver(history_ms);
|
||||
|
||||
auto history = 5ms;
|
||||
auto resolver = make_resolver(toMilliseconds(history));
|
||||
|
||||
auto failed_addr = resolver->resolve();
|
||||
ASSERT_TRUE(addresses.contains(*failed_addr));
|
||||
|
||||
auto start_at = now();
|
||||
|
||||
failed_addr.setFail();
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
|
||||
check_no_failed_address(1, resolver, addresses, failed_addr, metrics);
|
||||
check_no_failed_address(1, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
|
||||
|
||||
sleep_until(start_at + history + epsilon);
|
||||
start_at = now();
|
||||
|
||||
sleepForMilliseconds(history_ms + 1);
|
||||
resolver->update();
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(0, CurrentMetrics::get(metrics.banned_count));
|
||||
|
||||
failed_addr.setFail();
|
||||
check_no_failed_address(2, resolver, addresses, failed_addr, metrics);
|
||||
check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
|
||||
|
||||
sleep_until(start_at + history + epsilon);
|
||||
start_at = now();
|
||||
|
||||
sleepForMilliseconds(history_ms + 1);
|
||||
resolver->update();
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
|
||||
|
||||
// ip still banned adter history_ms + update, because it was his second consiquent fail
|
||||
check_no_failed_address(2, resolver, addresses, failed_addr, metrics);
|
||||
check_no_failed_address(2, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
|
||||
}
|
||||
|
||||
TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail)
|
||||
{
|
||||
size_t history_ms = 5;
|
||||
auto resolver = make_resolver(history_ms);
|
||||
auto history = 5ms;
|
||||
auto resolver = make_resolver(toMilliseconds(history));
|
||||
|
||||
auto failed_addr = resolver->resolve();
|
||||
ASSERT_TRUE(addresses.contains(*failed_addr));
|
||||
|
||||
auto start_at = now();
|
||||
|
||||
failed_addr.setFail();
|
||||
failed_addr.setFail();
|
||||
failed_addr.setFail();
|
||||
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
|
||||
check_no_failed_address(3, resolver, addresses, failed_addr, metrics);
|
||||
check_no_failed_address(3, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
|
||||
|
||||
sleep_until(start_at + history + epsilon);
|
||||
|
||||
sleepForMilliseconds(history_ms + 1);
|
||||
resolver->update();
|
||||
// ip is cleared after just 1 history_ms interval.
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
@ -377,8 +420,8 @@ TEST_F(ResolvePoolTest, NoAditionalBannForConcurrentFail)
|
||||
|
||||
TEST_F(ResolvePoolTest, StillBannedAfterSuccess)
|
||||
{
|
||||
size_t history_ms = 5;
|
||||
auto resolver = make_resolver(history_ms);
|
||||
auto history = 5ms;
|
||||
auto resolver = make_resolver(toMilliseconds(history));
|
||||
|
||||
auto failed_addr = resolver->resolve();
|
||||
ASSERT_TRUE(addresses.contains(*failed_addr));
|
||||
@ -395,11 +438,12 @@ TEST_F(ResolvePoolTest, StillBannedAfterSuccess)
|
||||
}
|
||||
chassert(again_addr);
|
||||
|
||||
auto start_at = now();
|
||||
failed_addr.setFail();
|
||||
|
||||
ASSERT_EQ(3, CurrentMetrics::get(metrics.active_count));
|
||||
ASSERT_EQ(1, CurrentMetrics::get(metrics.banned_count));
|
||||
check_no_failed_address(1, resolver, addresses, failed_addr, metrics);
|
||||
check_no_failed_address(1, resolver, addresses, failed_addr, metrics, start_at + history - epsilon);
|
||||
|
||||
again_addr = std::nullopt; // success;
|
||||
|
||||
|
@ -258,7 +258,8 @@
|
||||
M(KeeperExistsRequest) \
|
||||
\
|
||||
M(IOUringSQEsSubmitted) \
|
||||
M(IOUringSQEsResubmits) \
|
||||
M(IOUringSQEsResubmitsAsync) \
|
||||
M(IOUringSQEsResubmitsSync) \
|
||||
M(IOUringCQEsCompleted) \
|
||||
M(IOUringCQEsFailed) \
|
||||
\
|
||||
|
@ -188,6 +188,18 @@ NamesAndTypesList NamesAndTypesList::filter(const Names & names) const
|
||||
return filter(NameSet(names.begin(), names.end()));
|
||||
}
|
||||
|
||||
NamesAndTypesList NamesAndTypesList::eraseNames(const NameSet & names) const
|
||||
{
|
||||
NamesAndTypesList res;
|
||||
for (const auto & column : *this)
|
||||
{
|
||||
if (!names.contains(column.name))
|
||||
res.push_back(column);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const
|
||||
{
|
||||
/// NOTE: It's better to make a map in `IStorage` than to create it here every time again.
|
||||
|
@ -111,6 +111,9 @@ public:
|
||||
/// Leave only the columns whose names are in the `names`. In `names` there can be superfluous columns.
|
||||
NamesAndTypesList filter(const Names & names) const;
|
||||
|
||||
/// Leave only the columns whose names are not in the `names`.
|
||||
NamesAndTypesList eraseNames(const NameSet & names) const;
|
||||
|
||||
/// Unlike `filter`, returns columns in the order in which they go in `names`.
|
||||
NamesAndTypesList addTypes(const Names & names) const;
|
||||
|
||||
|
@ -334,7 +334,7 @@ class IColumn;
|
||||
M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \
|
||||
\
|
||||
M(Bool, join_use_nulls, false, "Use NULLs for non-joined rows of outer JOINs for types that can be inside Nullable. If false, use default value of corresponding columns data type.", IMPORTANT) \
|
||||
M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", IMPORTANT) \
|
||||
M(Bool, allow_experimental_join_condition, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y.", 0) \
|
||||
\
|
||||
M(JoinStrictness, join_default_strictness, JoinStrictness::All, "Set default strictness in JOIN query. Possible values: empty string, 'ANY', 'ALL'. If empty, query without strictness will throw exception.", 0) \
|
||||
M(Bool, any_join_distinct_right_table_keys, false, "Enable old ANY JOIN logic with many-to-one left-to-right table keys mapping for all ANY JOINs. It leads to confusing not equal results for 't1 ANY LEFT JOIN t2' and 't2 ANY RIGHT JOIN t1'. ANY RIGHT JOIN needs one-to-many keys mapping to be consistent with LEFT one.", IMPORTANT) \
|
||||
@ -517,7 +517,6 @@ class IColumn;
|
||||
M(UInt64, backup_restore_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \
|
||||
M(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore", 0) \
|
||||
M(UInt64, backup_restore_batch_size_for_keeper_multi, 1000, "Maximum size of batch for multi request to [Zoo]Keeper during backup or restore", 0) \
|
||||
M(UInt64, backup_restore_s3_retry_attempts, 1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore.", 0) \
|
||||
M(UInt64, max_backup_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \
|
||||
\
|
||||
M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \
|
||||
@ -1060,7 +1059,8 @@ class IColumn;
|
||||
M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
|
||||
M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
|
||||
M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \
|
||||
M(UInt64, input_format_parquet_max_block_size, 8192, "Max block size for parquet reader.", 0) \
|
||||
M(UInt64, input_format_parquet_max_block_size, DEFAULT_BLOCK_SIZE, "Max block size for parquet reader.", 0) \
|
||||
M(UInt64, input_format_parquet_prefer_block_bytes, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader", 0) \
|
||||
M(Bool, input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format Protobuf", 0) \
|
||||
M(Bool, input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format CapnProto", 0) \
|
||||
M(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format ORC", 0) \
|
||||
|
@ -96,6 +96,8 @@ static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges
|
||||
{"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
|
||||
{"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
|
||||
{"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
|
||||
{"input_format_parquet_max_block_size", 8192, DEFAULT_BLOCK_SIZE, "Increase block size for parquet reader."},
|
||||
{"input_format_parquet_prefer_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Average block bytes output by parquet reader."},
|
||||
{"enable_blob_storage_log", true, true, "Write information about blob storage operations to system.blob_storage_log table"},
|
||||
{"allow_statistic_optimize", false, false, "Old setting which popped up here being renamed."},
|
||||
{"allow_experimental_statistic", false, false, "Old setting which popped up here being renamed."},
|
||||
@ -113,7 +115,6 @@ static const std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges
|
||||
{"http_max_chunk_size", 0, 0, "Internal limitation"},
|
||||
{"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
|
||||
{"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
|
||||
{"backup_restore_s3_retry_attempts", 0, 1000, "A new setting."},
|
||||
{"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"},
|
||||
{"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"},
|
||||
{"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."},
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/Serializations/SerializationNullable.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeVariant.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Core/Field.h>
|
||||
@ -174,4 +175,9 @@ DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type)
|
||||
|
||||
}
|
||||
|
||||
bool canContainNull(const IDataType & type)
|
||||
{
|
||||
return type.isNullable() || type.isLowCardinalityNullable() || isDynamic(type) || isVariant(type);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -62,4 +62,6 @@ DataTypePtr makeNullableOrLowCardinalityNullableSafe(const DataTypePtr & type);
|
||||
/// Nullable(T) -> T, LowCardinality(Nullable(T)) -> T
|
||||
DataTypePtr removeNullableOrLowCardinalityNullable(const DataTypePtr & type);
|
||||
|
||||
bool canContainNull(const IDataType & type);
|
||||
|
||||
}
|
||||
|
@ -22,7 +22,8 @@ namespace ProfileEvents
|
||||
extern const Event AsynchronousReaderIgnoredBytes;
|
||||
|
||||
extern const Event IOUringSQEsSubmitted;
|
||||
extern const Event IOUringSQEsResubmits;
|
||||
extern const Event IOUringSQEsResubmitsAsync;
|
||||
extern const Event IOUringSQEsResubmitsSync;
|
||||
extern const Event IOUringCQEsCompleted;
|
||||
extern const Event IOUringCQEsFailed;
|
||||
}
|
||||
@ -149,10 +150,12 @@ int IOUringReader::submitToRing(EnqueuedRequest & enqueued)
|
||||
io_uring_prep_read(sqe, fd, request.buf, static_cast<unsigned>(request.size - enqueued.bytes_read), request.offset + enqueued.bytes_read);
|
||||
int ret = 0;
|
||||
|
||||
do
|
||||
ret = io_uring_submit(&ring);
|
||||
while (ret == -EINTR || ret == -EAGAIN)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::IOUringSQEsResubmitsSync);
|
||||
ret = io_uring_submit(&ring);
|
||||
} while (ret == -EINTR || ret == -EAGAIN);
|
||||
}
|
||||
|
||||
if (ret > 0 && !enqueued.resubmitting)
|
||||
{
|
||||
@ -266,7 +269,7 @@ void IOUringReader::monitorRing()
|
||||
if (cqe->res == -EAGAIN || cqe->res == -EINTR)
|
||||
{
|
||||
enqueued.resubmitting = true;
|
||||
ProfileEvents::increment(ProfileEvents::IOUringSQEsResubmits);
|
||||
ProfileEvents::increment(ProfileEvents::IOUringSQEsResubmitsAsync);
|
||||
|
||||
ret = submitToRing(enqueued);
|
||||
if (ret <= 0)
|
||||
@ -310,6 +313,7 @@ void IOUringReader::monitorRing()
|
||||
// potential short read, re-submit
|
||||
enqueued.resubmitting = true;
|
||||
enqueued.bytes_read += bytes_read;
|
||||
ProfileEvents::increment(ProfileEvents::IOUringSQEsResubmitsAsync);
|
||||
|
||||
ret = submitToRing(enqueued);
|
||||
if (ret <= 0)
|
||||
|
@ -382,6 +382,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
|
||||
{
|
||||
std::vector<Aws::S3::Model::ObjectIdentifier> current_chunk;
|
||||
String keys;
|
||||
size_t first_position = current_position;
|
||||
for (; current_position < objects.size() && current_chunk.size() < chunk_size_limit; ++current_position)
|
||||
{
|
||||
Aws::S3::Model::ObjectIdentifier obj;
|
||||
@ -407,9 +408,9 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
|
||||
{
|
||||
const auto * outcome_error = outcome.IsSuccess() ? nullptr : &outcome.GetError();
|
||||
auto time_now = std::chrono::system_clock::now();
|
||||
for (const auto & object : objects)
|
||||
for (size_t i = first_position; i < current_position; ++i)
|
||||
blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete,
|
||||
uri.bucket, object.remote_path, object.local_path, object.bytes_size,
|
||||
uri.bucket, objects[i].remote_path, objects[i].local_path, objects[i].bytes_size,
|
||||
outcome_error, time_now);
|
||||
}
|
||||
|
||||
|
@ -161,6 +161,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
|
||||
format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string;
|
||||
format_settings.parquet.output_fixed_string_as_fixed_byte_array = settings.output_format_parquet_fixed_string_as_fixed_byte_array;
|
||||
format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size;
|
||||
format_settings.parquet.prefer_block_bytes = settings.input_format_parquet_prefer_block_bytes;
|
||||
format_settings.parquet.output_compression_method = settings.output_format_parquet_compression_method;
|
||||
format_settings.parquet.output_compliant_nested_types = settings.output_format_parquet_compliant_nested_types;
|
||||
format_settings.parquet.use_custom_encoder = settings.output_format_parquet_use_custom_encoder;
|
||||
|
@ -265,7 +265,8 @@ struct FormatSettings
|
||||
bool preserve_order = false;
|
||||
bool use_custom_encoder = true;
|
||||
bool parallel_encoding = true;
|
||||
UInt64 max_block_size = 8192;
|
||||
UInt64 max_block_size = DEFAULT_BLOCK_SIZE;
|
||||
size_t prefer_block_bytes = DEFAULT_BLOCK_SIZE * 256;
|
||||
ParquetVersion output_version;
|
||||
ParquetCompression output_compression_method = ParquetCompression::SNAPPY;
|
||||
bool output_compliant_nested_types = true;
|
||||
|
@ -879,11 +879,11 @@ namespace
|
||||
}
|
||||
|
||||
template <bool is_json>
|
||||
bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings)
|
||||
bool tryReadFloat(Float64 & value, ReadBuffer & buf, const FormatSettings & settings, bool & has_fractional)
|
||||
{
|
||||
if (is_json || settings.try_infer_exponent_floats)
|
||||
return tryReadFloatText(value, buf);
|
||||
return tryReadFloatTextNoExponent(value, buf);
|
||||
return tryReadFloatTextExt(value, buf, has_fractional);
|
||||
return tryReadFloatTextExtNoExponent(value, buf, has_fractional);
|
||||
}
|
||||
|
||||
template <bool is_json>
|
||||
@ -893,46 +893,31 @@ namespace
|
||||
return nullptr;
|
||||
|
||||
Float64 tmp_float;
|
||||
bool has_fractional;
|
||||
if (settings.try_infer_integers)
|
||||
{
|
||||
/// If we read from String, we can do it in a more efficient way.
|
||||
if (auto * string_buf = dynamic_cast<ReadBufferFromString *>(&buf))
|
||||
{
|
||||
/// Remember the pointer to the start of the number to rollback to it.
|
||||
char * number_start = buf.position();
|
||||
Int64 tmp_int;
|
||||
bool read_int = tryReadIntText(tmp_int, buf);
|
||||
/// If we reached eof, it cannot be float (it requires no less data than integer)
|
||||
if (buf.eof())
|
||||
return read_int ? std::make_shared<DataTypeInt64>() : nullptr;
|
||||
|
||||
char * int_end = buf.position();
|
||||
/// We can safely get back to the start of the number, because we read from a string and we didn't reach eof.
|
||||
buf.position() = number_start;
|
||||
char * number_start = buf.position();
|
||||
|
||||
bool read_uint = false;
|
||||
char * uint_end = nullptr;
|
||||
/// In case of Int64 overflow we can try to infer UInt64.
|
||||
if (!read_int)
|
||||
{
|
||||
UInt64 tmp_uint;
|
||||
read_uint = tryReadIntText(tmp_uint, buf);
|
||||
/// If we reached eof, it cannot be float (it requires no less data than integer)
|
||||
if (buf.eof())
|
||||
return read_uint ? std::make_shared<DataTypeUInt64>() : nullptr;
|
||||
|
||||
uint_end = buf.position();
|
||||
buf.position() = number_start;
|
||||
}
|
||||
|
||||
if (tryReadFloat<is_json>(tmp_float, buf, settings))
|
||||
{
|
||||
if (read_int && buf.position() == int_end)
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
if (read_uint && buf.position() == uint_end)
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
/// NOTE: it may break parsing of tryReadFloat() != tryReadIntText() + parsing of '.'/'e'
|
||||
/// But, for now it is true
|
||||
if (tryReadFloat<is_json>(tmp_float, buf, settings, has_fractional) && has_fractional)
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
|
||||
Int64 tmp_int;
|
||||
buf.position() = number_start;
|
||||
if (tryReadIntText(tmp_int, buf))
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
|
||||
/// In case of Int64 overflow we can try to infer UInt64.
|
||||
UInt64 tmp_uint;
|
||||
buf.position() = number_start;
|
||||
if (tryReadIntText(tmp_uint, buf))
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
@ -942,36 +927,22 @@ namespace
|
||||
/// and then as float.
|
||||
PeekableReadBuffer peekable_buf(buf);
|
||||
PeekableReadBufferCheckpoint checkpoint(peekable_buf);
|
||||
Int64 tmp_int;
|
||||
bool read_int = tryReadIntText(tmp_int, peekable_buf);
|
||||
auto * int_end = peekable_buf.position();
|
||||
peekable_buf.rollbackToCheckpoint(true);
|
||||
|
||||
bool read_uint = false;
|
||||
char * uint_end = nullptr;
|
||||
/// In case of Int64 overflow we can try to infer UInt64.
|
||||
if (!read_int)
|
||||
{
|
||||
PeekableReadBufferCheckpoint new_checkpoint(peekable_buf);
|
||||
UInt64 tmp_uint;
|
||||
read_uint = tryReadIntText(tmp_uint, peekable_buf);
|
||||
uint_end = peekable_buf.position();
|
||||
peekable_buf.rollbackToCheckpoint(true);
|
||||
}
|
||||
|
||||
if (tryReadFloat<is_json>(tmp_float, peekable_buf, settings))
|
||||
{
|
||||
/// Float parsing reads no fewer bytes than integer parsing,
|
||||
/// so position of the buffer is either the same, or further.
|
||||
/// If it's the same, then it's integer.
|
||||
if (read_int && peekable_buf.position() == int_end)
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
if (read_uint && peekable_buf.position() == uint_end)
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
if (tryReadFloat<is_json>(tmp_float, peekable_buf, settings, has_fractional) && has_fractional)
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
peekable_buf.rollbackToCheckpoint(/* drop= */ false);
|
||||
|
||||
Int64 tmp_int;
|
||||
if (tryReadIntText(tmp_int, peekable_buf))
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
peekable_buf.rollbackToCheckpoint(/* drop= */ true);
|
||||
|
||||
/// In case of Int64 overflow we can try to infer UInt64.
|
||||
UInt64 tmp_uint;
|
||||
if (tryReadIntText(tmp_uint, peekable_buf))
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
else if (tryReadFloat<is_json>(tmp_float, buf, settings))
|
||||
else if (tryReadFloat<is_json>(tmp_float, buf, settings, has_fractional))
|
||||
{
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
@ -1004,7 +975,8 @@ namespace
|
||||
buf.position() = buf.buffer().begin();
|
||||
|
||||
Float64 tmp;
|
||||
if (tryReadFloat<is_json>(tmp, buf, settings) && buf.eof())
|
||||
bool has_fractional;
|
||||
if (tryReadFloat<is_json>(tmp, buf, settings, has_fractional) && buf.eof())
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
|
||||
return nullptr;
|
||||
|
@ -111,7 +111,7 @@ public:
|
||||
argument_types.push_back(argument.type);
|
||||
|
||||
/// More efficient specialization for two numeric arguments.
|
||||
if (arguments.size() == 2 && isNumber(arguments[0].type) && isNumber(arguments[1].type))
|
||||
if (arguments.size() == 2 && isNumber(removeNullable(arguments[0].type)) && isNumber(removeNullable(arguments[1].type)))
|
||||
return std::make_unique<FunctionToFunctionBaseAdaptor>(SpecializedFunction::create(context), argument_types, return_type);
|
||||
|
||||
return std::make_unique<FunctionToFunctionBaseAdaptor>(
|
||||
@ -123,7 +123,7 @@ public:
|
||||
if (types.empty())
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} cannot be called without arguments", getName());
|
||||
|
||||
if (types.size() == 2 && isNumber(types[0]) && isNumber(types[1]))
|
||||
if (types.size() == 2 && isNumber(removeNullable(types[0])) && isNumber(removeNullable(types[1])))
|
||||
return SpecializedFunction::create(context)->getReturnTypeImpl(types);
|
||||
|
||||
return getLeastSupertype(types);
|
||||
|
@ -29,6 +29,18 @@ public:
|
||||
return name;
|
||||
}
|
||||
|
||||
ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
|
||||
{
|
||||
const ColumnWithTypeAndName & elem = arguments[0];
|
||||
if (elem.type->onlyNull())
|
||||
return result_type->createColumnConst(1, UInt8(0));
|
||||
|
||||
if (canContainNull(*elem.type))
|
||||
return nullptr;
|
||||
|
||||
return result_type->createColumnConst(1, UInt8(1));
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
@ -31,6 +31,18 @@ public:
|
||||
return name;
|
||||
}
|
||||
|
||||
ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
|
||||
{
|
||||
const ColumnWithTypeAndName & elem = arguments[0];
|
||||
if (elem.type->onlyNull())
|
||||
return result_type->createColumnConst(1, UInt8(1));
|
||||
|
||||
if (canContainNull(*elem.type))
|
||||
return nullptr;
|
||||
|
||||
return result_type->createColumnConst(1, UInt8(0));
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -23,6 +24,15 @@ public:
|
||||
return name;
|
||||
}
|
||||
|
||||
ColumnPtr getConstantResultForNonConstArguments(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const override
|
||||
{
|
||||
const ColumnWithTypeAndName & elem = arguments[0];
|
||||
if (elem.type->onlyNull() || canContainNull(*elem.type))
|
||||
return result_type->createColumnConst(1, UInt8(1));
|
||||
|
||||
return result_type->createColumnConst(1, UInt8(0));
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForNulls() const override { return false; }
|
||||
|
||||
bool useDefaultImplementationForNothing() const override { return false; }
|
||||
|
@ -162,7 +162,7 @@ public:
|
||||
class RetryStrategy : public Aws::Client::RetryStrategy
|
||||
{
|
||||
public:
|
||||
explicit RetryStrategy(uint32_t maxRetries_ = 10, uint32_t scaleFactor_ = 25, uint32_t maxDelayMs_ = 5000);
|
||||
explicit RetryStrategy(uint32_t maxRetries_ = 10, uint32_t scaleFactor_ = 25, uint32_t maxDelayMs_ = 90000);
|
||||
|
||||
/// NOLINTNEXTLINE(google-runtime-int)
|
||||
bool ShouldRetry(const Aws::Client::AWSError<Aws::Client::CoreErrors>& error, long attemptedRetries) const override;
|
||||
|
@ -320,11 +320,13 @@ static inline void readUIntTextUpToNSignificantDigits(T & x, ReadBuffer & buf)
|
||||
|
||||
|
||||
template <typename T, typename ReturnType, bool allow_exponent = true>
|
||||
ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
|
||||
ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in, bool & has_fractional)
|
||||
{
|
||||
static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>, "Argument for readFloatTextImpl must be float or double");
|
||||
static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.', "Layout of char is not like ASCII");
|
||||
|
||||
has_fractional = false;
|
||||
|
||||
static constexpr bool throw_exception = std::is_same_v<ReturnType, void>;
|
||||
|
||||
bool negative = false;
|
||||
@ -377,6 +379,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
|
||||
|
||||
if (checkChar('.', in))
|
||||
{
|
||||
has_fractional = true;
|
||||
auto after_point_count = in.count();
|
||||
|
||||
while (!in.eof() && *in.position() == '0')
|
||||
@ -394,6 +397,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
|
||||
{
|
||||
if (checkChar('e', in) || checkChar('E', in))
|
||||
{
|
||||
has_fractional = true;
|
||||
if (in.eof())
|
||||
{
|
||||
if constexpr (throw_exception)
|
||||
@ -420,10 +424,14 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
|
||||
}
|
||||
|
||||
if (after_point)
|
||||
{
|
||||
x += static_cast<T>(shift10(after_point, after_point_exponent));
|
||||
}
|
||||
|
||||
if (exponent)
|
||||
{
|
||||
x = static_cast<T>(shift10(x, exponent));
|
||||
}
|
||||
|
||||
if (negative)
|
||||
x = -x;
|
||||
@ -590,8 +598,16 @@ ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf)
|
||||
template <typename T> void readFloatTextPrecise(T & x, ReadBuffer & in) { readFloatTextPreciseImpl<T, void>(x, in); }
|
||||
template <typename T> bool tryReadFloatTextPrecise(T & x, ReadBuffer & in) { return readFloatTextPreciseImpl<T, bool>(x, in); }
|
||||
|
||||
template <typename T> void readFloatTextFast(T & x, ReadBuffer & in) { readFloatTextFastImpl<T, void>(x, in); }
|
||||
template <typename T> bool tryReadFloatTextFast(T & x, ReadBuffer & in) { return readFloatTextFastImpl<T, bool>(x, in); }
|
||||
template <typename T> void readFloatTextFast(T & x, ReadBuffer & in)
|
||||
{
|
||||
bool has_fractional;
|
||||
readFloatTextFastImpl<T, void>(x, in, has_fractional);
|
||||
}
|
||||
template <typename T> bool tryReadFloatTextFast(T & x, ReadBuffer & in)
|
||||
{
|
||||
bool has_fractional;
|
||||
return readFloatTextFastImpl<T, bool>(x, in, has_fractional);
|
||||
}
|
||||
|
||||
template <typename T> void readFloatTextSimple(T & x, ReadBuffer & in) { readFloatTextSimpleImpl<T, void>(x, in); }
|
||||
template <typename T> bool tryReadFloatTextSimple(T & x, ReadBuffer & in) { return readFloatTextSimpleImpl<T, bool>(x, in); }
|
||||
@ -603,6 +619,21 @@ template <typename T> void readFloatText(T & x, ReadBuffer & in) { readFloatText
|
||||
template <typename T> bool tryReadFloatText(T & x, ReadBuffer & in) { return tryReadFloatTextFast(x, in); }
|
||||
|
||||
/// Don't read exponent part of the number.
|
||||
template <typename T> bool tryReadFloatTextNoExponent(T & x, ReadBuffer & in) { return readFloatTextFastImpl<T, bool, false>(x, in); }
|
||||
template <typename T> bool tryReadFloatTextNoExponent(T & x, ReadBuffer & in)
|
||||
{
|
||||
bool has_fractional;
|
||||
return readFloatTextFastImpl<T, bool, false>(x, in, has_fractional);
|
||||
}
|
||||
|
||||
/// With a @has_fractional flag
|
||||
/// Used for input_format_try_infer_integers
|
||||
template <typename T> bool tryReadFloatTextExt(T & x, ReadBuffer & in, bool & has_fractional)
|
||||
{
|
||||
return readFloatTextFastImpl<T, bool>(x, in, has_fractional);
|
||||
}
|
||||
template <typename T> bool tryReadFloatTextExtNoExponent(T & x, ReadBuffer & in, bool & has_fractional)
|
||||
{
|
||||
return readFloatTextFastImpl<T, bool, false>(x, in, has_fractional);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -22,7 +22,9 @@
|
||||
#include <Storages/SelectQueryInfo.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/Distributed/DistributedSettings.h>
|
||||
|
||||
#include <Storages/buildQueryTreeForShard.h>
|
||||
#include <Planner/Utils.h>
|
||||
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -505,6 +507,41 @@ void executeQueryWithParallelReplicas(
|
||||
query_plan.addStep(std::move(read_from_remote));
|
||||
}
|
||||
|
||||
void executeQueryWithParallelReplicas(
|
||||
QueryPlan & query_plan,
|
||||
const StorageID & storage_id,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
const QueryTreeNodePtr & query_tree,
|
||||
const PlannerContextPtr & planner_context,
|
||||
ContextPtr context,
|
||||
std::shared_ptr<const StorageLimitsList> storage_limits)
|
||||
{
|
||||
QueryTreeNodePtr modified_query_tree = query_tree->clone();
|
||||
rewriteJoinToGlobalJoin(modified_query_tree, context);
|
||||
modified_query_tree = buildQueryTreeForShard(planner_context, modified_query_tree);
|
||||
|
||||
auto header
|
||||
= InterpreterSelectQueryAnalyzer::getSampleBlock(modified_query_tree, context, SelectQueryOptions(processed_stage).analyze());
|
||||
auto modified_query_ast = queryNodeToDistributedSelectQuery(modified_query_tree);
|
||||
|
||||
executeQueryWithParallelReplicas(query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits);
|
||||
}
|
||||
|
||||
void executeQueryWithParallelReplicas(
|
||||
QueryPlan & query_plan,
|
||||
const StorageID & storage_id,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
const ASTPtr & query_ast,
|
||||
ContextPtr context,
|
||||
std::shared_ptr<const StorageLimitsList> storage_limits)
|
||||
{
|
||||
auto modified_query_ast = ClusterProxy::rewriteSelectQuery(
|
||||
context, query_ast, storage_id.database_name, storage_id.table_name, /*remote_table_function_ptr*/ nullptr);
|
||||
auto header = InterpreterSelectQuery(modified_query_ast, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
|
||||
|
||||
executeQueryWithParallelReplicas(query_plan, storage_id, header, processed_stage, modified_query_ast, context, storage_limits);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -24,6 +24,12 @@ struct StorageID;
|
||||
struct StorageLimits;
|
||||
using StorageLimitsList = std::list<StorageLimits>;
|
||||
|
||||
class IQueryTreeNode;
|
||||
using QueryTreeNodePtr = std::shared_ptr<IQueryTreeNode>;
|
||||
|
||||
class PlannerContext;
|
||||
using PlannerContextPtr = std::shared_ptr<PlannerContext>;
|
||||
|
||||
namespace ClusterProxy
|
||||
{
|
||||
|
||||
@ -60,7 +66,6 @@ void executeQuery(
|
||||
AdditionalShardFilterGenerator shard_filter_generator,
|
||||
bool is_remote_function);
|
||||
|
||||
|
||||
void executeQueryWithParallelReplicas(
|
||||
QueryPlan & query_plan,
|
||||
const StorageID & storage_id,
|
||||
@ -69,6 +74,23 @@ void executeQueryWithParallelReplicas(
|
||||
const ASTPtr & query_ast,
|
||||
ContextPtr context,
|
||||
std::shared_ptr<const StorageLimitsList> storage_limits);
|
||||
|
||||
void executeQueryWithParallelReplicas(
|
||||
QueryPlan & query_plan,
|
||||
const StorageID & storage_id,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
const ASTPtr & query_ast,
|
||||
ContextPtr context,
|
||||
std::shared_ptr<const StorageLimitsList> storage_limits);
|
||||
|
||||
void executeQueryWithParallelReplicas(
|
||||
QueryPlan & query_plan,
|
||||
const StorageID & storage_id,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
const QueryTreeNodePtr & query_tree,
|
||||
const PlannerContextPtr & planner_context,
|
||||
ContextPtr context,
|
||||
std::shared_ptr<const StorageLimitsList> storage_limits);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2374,49 +2374,6 @@ UInt64 InterpreterSelectQuery::maxBlockSizeByLimit() const
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** Storages can rely that filters that for storage will be available for analysis before
|
||||
* plan is fully constructed and optimized.
|
||||
*
|
||||
* StorageMerge common header calculation and prewhere push-down relies on this.
|
||||
*
|
||||
* This is similar to Planner::collectFiltersForAnalysis
|
||||
*/
|
||||
void collectFiltersForAnalysis(
|
||||
const ASTPtr & query_ptr,
|
||||
const ContextPtr & query_context,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const SelectQueryOptions & options,
|
||||
SelectQueryInfo & query_info)
|
||||
{
|
||||
auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
|
||||
|
||||
auto dummy = std::make_shared<StorageDummy>(
|
||||
storage_snapshot->storage.getStorageID(), ColumnsDescription(storage_snapshot->getColumns(get_column_options)), storage_snapshot);
|
||||
|
||||
QueryPlan query_plan;
|
||||
InterpreterSelectQuery(query_ptr, query_context, dummy, dummy->getInMemoryMetadataPtr(), options).buildQueryPlan(query_plan);
|
||||
|
||||
auto optimization_settings = QueryPlanOptimizationSettings::fromContext(query_context);
|
||||
query_plan.optimize(optimization_settings);
|
||||
|
||||
std::vector<QueryPlan::Node *> nodes_to_process;
|
||||
nodes_to_process.push_back(query_plan.getRootNode());
|
||||
|
||||
while (!nodes_to_process.empty())
|
||||
{
|
||||
const auto * node_to_process = nodes_to_process.back();
|
||||
nodes_to_process.pop_back();
|
||||
nodes_to_process.insert(nodes_to_process.end(), node_to_process->children.begin(), node_to_process->children.end());
|
||||
|
||||
auto * read_from_dummy = typeid_cast<ReadFromDummy *>(node_to_process->step.get());
|
||||
if (!read_from_dummy)
|
||||
continue;
|
||||
|
||||
query_info.filter_actions_dag = read_from_dummy->getFilterActionsDAG();
|
||||
query_info.optimized_prewhere_info = read_from_dummy->getPrewhereInfo();
|
||||
}
|
||||
}
|
||||
|
||||
void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan)
|
||||
{
|
||||
auto & query = getSelectQuery();
|
||||
@ -2546,10 +2503,6 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
|
||||
}
|
||||
else if (storage)
|
||||
{
|
||||
if (shouldMoveToPrewhere() && settings.query_plan_optimize_prewhere && settings.query_plan_enable_optimizations
|
||||
&& typeid_cast<const StorageMerge *>(storage.get()))
|
||||
collectFiltersForAnalysis(query_ptr, context, storage_snapshot, options, query_info);
|
||||
|
||||
/// Table.
|
||||
if (max_streams == 0)
|
||||
max_streams = 1;
|
||||
|
@ -504,6 +504,10 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
|
||||
Block block(std::move(log_element_columns));
|
||||
|
||||
MutableColumns columns = block.mutateColumns();
|
||||
|
||||
for (auto & column : columns)
|
||||
column->reserve(to_flush.size());
|
||||
|
||||
for (const auto & elem : to_flush)
|
||||
elem.appendToBlock(columns);
|
||||
|
||||
@ -532,7 +536,8 @@ void SystemLog<LogElement>::flushImpl(const std::vector<LogElement> & to_flush,
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("Failed to flush system log {} with {} entries up to offset {}",
|
||||
table_id.getNameForLogs(), to_flush.size(), to_flush_end));
|
||||
}
|
||||
|
||||
queue->confirm(to_flush_end);
|
||||
|
@ -166,7 +166,7 @@ FiltersForTableExpressionMap collectFiltersForAnalysis(const QueryTreeNodePtr &
|
||||
continue;
|
||||
|
||||
const auto & storage = table_node ? table_node->getStorage() : table_function_node->getStorage();
|
||||
if (typeid_cast<const StorageDistributed *>(storage.get()) || typeid_cast<const StorageMerge *>(storage.get())
|
||||
if (typeid_cast<const StorageDistributed *>(storage.get())
|
||||
|| (parallel_replicas_estimation_enabled && std::dynamic_pointer_cast<MergeTreeData>(storage)))
|
||||
{
|
||||
collect_filters = true;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Planner/PlannerExpressionAnalysis.h>
|
||||
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/FilterDescription.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
@ -37,7 +38,7 @@ namespace
|
||||
* Actions before filter are added into into actions chain.
|
||||
* It is client responsibility to update filter analysis result if filter column must be removed after chain is finalized.
|
||||
*/
|
||||
FilterAnalysisResult analyzeFilter(const QueryTreeNodePtr & filter_expression_node,
|
||||
std::optional<FilterAnalysisResult> analyzeFilter(const QueryTreeNodePtr & filter_expression_node,
|
||||
const ColumnsWithTypeAndName & input_columns,
|
||||
const PlannerContextPtr & planner_context,
|
||||
ActionsChain & actions_chain)
|
||||
@ -45,7 +46,11 @@ FilterAnalysisResult analyzeFilter(const QueryTreeNodePtr & filter_expression_no
|
||||
FilterAnalysisResult result;
|
||||
|
||||
result.filter_actions = buildActionsDAGFromExpressionNode(filter_expression_node, input_columns, planner_context);
|
||||
result.filter_column_name = result.filter_actions->getOutputs().at(0)->result_name;
|
||||
const auto * output = result.filter_actions->getOutputs().at(0);
|
||||
if (output->column && ConstantFilterDescription(*output->column).always_true)
|
||||
return {};
|
||||
|
||||
result.filter_column_name = output->result_name;
|
||||
actions_chain.addStep(std::make_unique<ActionsChainStep>(result.filter_actions));
|
||||
|
||||
return result;
|
||||
@ -534,8 +539,11 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
|
||||
if (query_node.hasWhere())
|
||||
{
|
||||
where_analysis_result_optional = analyzeFilter(query_node.getWhere(), current_output_columns, planner_context, actions_chain);
|
||||
where_action_step_index_optional = actions_chain.getLastStepIndex();
|
||||
current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
|
||||
if (where_analysis_result_optional)
|
||||
{
|
||||
where_action_step_index_optional = actions_chain.getLastStepIndex();
|
||||
current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
|
||||
}
|
||||
}
|
||||
|
||||
auto aggregation_analysis_result_optional = analyzeAggregation(query_tree, current_output_columns, planner_context, actions_chain);
|
||||
@ -548,8 +556,11 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
|
||||
if (query_node.hasHaving())
|
||||
{
|
||||
having_analysis_result_optional = analyzeFilter(query_node.getHaving(), current_output_columns, planner_context, actions_chain);
|
||||
having_action_step_index_optional = actions_chain.getLastStepIndex();
|
||||
current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
|
||||
if (having_analysis_result_optional)
|
||||
{
|
||||
having_action_step_index_optional = actions_chain.getLastStepIndex();
|
||||
current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
|
||||
}
|
||||
}
|
||||
|
||||
auto window_analysis_result_optional = analyzeWindow(query_tree, current_output_columns, planner_context, actions_chain);
|
||||
@ -562,8 +573,11 @@ PlannerExpressionsAnalysisResult buildExpressionAnalysisResult(const QueryTreeNo
|
||||
if (query_node.hasQualify())
|
||||
{
|
||||
qualify_analysis_result_optional = analyzeFilter(query_node.getQualify(), current_output_columns, planner_context, actions_chain);
|
||||
qualify_action_step_index_optional = actions_chain.getLastStepIndex();
|
||||
current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
|
||||
if (qualify_analysis_result_optional)
|
||||
{
|
||||
qualify_action_step_index_optional = actions_chain.getLastStepIndex();
|
||||
current_output_columns = actions_chain.getLastStepAvailableOutputColumns();
|
||||
}
|
||||
}
|
||||
|
||||
auto projection_analysis_result = analyzeProjection(query_node, current_output_columns, planner_context, actions_chain);
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include <Processors/QueryPlan/FilterStep.h>
|
||||
#include <Processors/QueryPlan/JoinStep.h>
|
||||
#include <Processors/QueryPlan/ArrayJoinStep.h>
|
||||
#include <Processors/QueryPlan/ReadFromMergeTree.h>
|
||||
#include <Processors/Sources/SourceFromSingleChunk.h>
|
||||
|
||||
#include <Storages/StorageDummy.h>
|
||||
@ -56,6 +57,7 @@
|
||||
#include <Interpreters/IJoin.h>
|
||||
#include <Interpreters/TableJoin.h>
|
||||
#include <Interpreters/getCustomKeyFilterForParallelReplicas.h>
|
||||
#include <Interpreters/ClusterProxy/executeQuery.h>
|
||||
|
||||
#include <Planner/CollectColumnIdentifiers.h>
|
||||
#include <Planner/Planner.h>
|
||||
@ -645,7 +647,6 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
|
||||
auto table_expression_query_info = select_query_info;
|
||||
table_expression_query_info.table_expression = table_expression;
|
||||
table_expression_query_info.filter_actions_dag = table_expression_data.getFilterActions();
|
||||
table_expression_query_info.optimized_prewhere_info = table_expression_data.getPrewhereInfo();
|
||||
table_expression_query_info.analyzer_can_use_parallel_replicas_on_follower = table_node == planner_context->getGlobalPlannerContext()->parallel_replicas_table;
|
||||
|
||||
size_t max_streams = settings.max_threads;
|
||||
@ -769,37 +770,6 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
|
||||
{
|
||||
if (!select_query_options.only_analyze)
|
||||
{
|
||||
auto storage_merge_tree = std::dynamic_pointer_cast<MergeTreeData>(storage);
|
||||
if (storage_merge_tree && query_context->canUseParallelReplicasOnInitiator()
|
||||
&& settings.parallel_replicas_min_number_of_rows_per_replica > 0)
|
||||
{
|
||||
UInt64 rows_to_read
|
||||
= storage_merge_tree->estimateNumberOfRowsToRead(query_context, storage_snapshot, table_expression_query_info);
|
||||
|
||||
if (max_block_size_limited && (max_block_size_limited < rows_to_read))
|
||||
rows_to_read = max_block_size_limited;
|
||||
|
||||
size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica;
|
||||
LOG_TRACE(
|
||||
getLogger("Planner"),
|
||||
"Estimated {} rows to read. It is enough work for {} parallel replicas",
|
||||
rows_to_read,
|
||||
number_of_replicas_to_use);
|
||||
|
||||
if (number_of_replicas_to_use <= 1)
|
||||
{
|
||||
planner_context->getMutableQueryContext()->setSetting(
|
||||
"allow_experimental_parallel_reading_from_replicas", Field(0));
|
||||
planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1});
|
||||
LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read");
|
||||
}
|
||||
else if (number_of_replicas_to_use < settings.max_parallel_replicas)
|
||||
{
|
||||
planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use);
|
||||
LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use);
|
||||
}
|
||||
}
|
||||
|
||||
auto & prewhere_info = table_expression_query_info.prewhere_info;
|
||||
const auto & prewhere_actions = table_expression_data.getPrewhereFilterActions();
|
||||
|
||||
@ -897,6 +867,96 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres
|
||||
max_block_size,
|
||||
max_streams);
|
||||
|
||||
auto parallel_replicas_enabled_for_storage = [](const StoragePtr & table, const Settings & query_settings)
|
||||
{
|
||||
if (!table->isMergeTree())
|
||||
return false;
|
||||
|
||||
if (!table->supportsReplication() && !query_settings.parallel_replicas_for_non_replicated_merge_tree)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
/// query_plan can be empty if there is nothing to read
|
||||
if (query_plan.isInitialized() && parallel_replicas_enabled_for_storage(storage, settings) && query_context->canUseParallelReplicasOnInitiator())
|
||||
{
|
||||
// (1) find read step
|
||||
QueryPlan::Node * node = query_plan.getRootNode();
|
||||
ReadFromMergeTree * reading = nullptr;
|
||||
while (node)
|
||||
{
|
||||
reading = typeid_cast<ReadFromMergeTree *>(node->step.get());
|
||||
if (reading)
|
||||
break;
|
||||
|
||||
QueryPlan::Node * prev_node = node;
|
||||
if (!node->children.empty())
|
||||
{
|
||||
chassert(node->children.size() == 1);
|
||||
node = node->children.at(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Step is expected to be ReadFromMergeTree but it's {}",
|
||||
prev_node->step->getName());
|
||||
}
|
||||
}
|
||||
|
||||
chassert(reading);
|
||||
|
||||
// (2) if it's ReadFromMergeTree - run index analysis and check number of rows to read
|
||||
if (settings.parallel_replicas_min_number_of_rows_per_replica > 0)
|
||||
{
|
||||
auto result_ptr = reading->selectRangesToRead();
|
||||
|
||||
UInt64 rows_to_read = result_ptr->selected_rows;
|
||||
if (table_expression_query_info.limit > 0 && table_expression_query_info.limit < rows_to_read)
|
||||
rows_to_read = table_expression_query_info.limit;
|
||||
|
||||
if (max_block_size_limited && (max_block_size_limited < rows_to_read))
|
||||
rows_to_read = max_block_size_limited;
|
||||
|
||||
const size_t number_of_replicas_to_use = rows_to_read / settings.parallel_replicas_min_number_of_rows_per_replica;
|
||||
LOG_TRACE(
|
||||
getLogger("Planner"),
|
||||
"Estimated {} rows to read. It is enough work for {} parallel replicas",
|
||||
rows_to_read,
|
||||
number_of_replicas_to_use);
|
||||
|
||||
if (number_of_replicas_to_use <= 1)
|
||||
{
|
||||
planner_context->getMutableQueryContext()->setSetting(
|
||||
"allow_experimental_parallel_reading_from_replicas", Field(0));
|
||||
planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1});
|
||||
LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read");
|
||||
}
|
||||
else if (number_of_replicas_to_use < settings.max_parallel_replicas)
|
||||
{
|
||||
planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", number_of_replicas_to_use);
|
||||
LOG_DEBUG(getLogger("Planner"), "Reducing the number of replicas to use to {}", number_of_replicas_to_use);
|
||||
}
|
||||
}
|
||||
|
||||
// (3) if parallel replicas still enabled - replace reading step
|
||||
if (planner_context->getQueryContext()->canUseParallelReplicasOnInitiator())
|
||||
{
|
||||
from_stage = QueryProcessingStage::WithMergeableState;
|
||||
QueryPlan query_plan_parallel_replicas;
|
||||
ClusterProxy::executeQueryWithParallelReplicas(
|
||||
query_plan_parallel_replicas,
|
||||
storage->getStorageID(),
|
||||
from_stage,
|
||||
table_expression_query_info.query_tree,
|
||||
table_expression_query_info.planner_context,
|
||||
query_context,
|
||||
table_expression_query_info.storage_limits);
|
||||
query_plan = std::move(query_plan_parallel_replicas);
|
||||
}
|
||||
}
|
||||
|
||||
const auto & alias_column_expressions = table_expression_data.getAliasColumnExpressions();
|
||||
if (!alias_column_expressions.empty() && query_plan.isInitialized() && from_stage == QueryProcessingStage::FetchColumns)
|
||||
{
|
||||
|
@ -420,6 +420,24 @@ void ParquetBlockInputFormat::initializeIfNeeded()
|
||||
int num_row_groups = metadata->num_row_groups();
|
||||
row_group_batches.reserve(num_row_groups);
|
||||
|
||||
auto adative_chunk_size = [&](int row_group_idx) -> size_t
|
||||
{
|
||||
size_t total_size = 0;
|
||||
auto row_group_meta = metadata->RowGroup(row_group_idx);
|
||||
for (int column_index : column_indices)
|
||||
{
|
||||
total_size += row_group_meta->ColumnChunk(column_index)->total_uncompressed_size();
|
||||
}
|
||||
if (!total_size || !format_settings.parquet.prefer_block_bytes) return 0;
|
||||
auto average_row_bytes = floor(static_cast<double>(total_size) / row_group_meta->num_rows());
|
||||
// avoid inf preferred_num_rows;
|
||||
if (average_row_bytes < 1) return 0;
|
||||
const size_t preferred_num_rows = static_cast<size_t>(floor(format_settings.parquet.prefer_block_bytes/average_row_bytes));
|
||||
const size_t MIN_ROW_NUM = 128;
|
||||
// size_t != UInt64 in darwin
|
||||
return std::min(std::max(preferred_num_rows, MIN_ROW_NUM), static_cast<size_t>(format_settings.parquet.max_block_size));
|
||||
};
|
||||
|
||||
for (int row_group = 0; row_group < num_row_groups; ++row_group)
|
||||
{
|
||||
if (skip_row_groups.contains(row_group))
|
||||
@ -439,6 +457,8 @@ void ParquetBlockInputFormat::initializeIfNeeded()
|
||||
row_group_batches.back().row_groups_idxs.push_back(row_group);
|
||||
row_group_batches.back().total_rows += metadata->RowGroup(row_group)->num_rows();
|
||||
row_group_batches.back().total_bytes_compressed += metadata->RowGroup(row_group)->total_compressed_size();
|
||||
auto rows = adative_chunk_size(row_group);
|
||||
row_group_batches.back().adaptive_chunk_size = rows ? rows : format_settings.parquet.max_block_size;
|
||||
}
|
||||
}
|
||||
|
||||
@ -449,7 +469,7 @@ void ParquetBlockInputFormat::initializeRowGroupBatchReader(size_t row_group_bat
|
||||
parquet::ArrowReaderProperties arrow_properties;
|
||||
parquet::ReaderProperties reader_properties(ArrowMemoryPool::instance());
|
||||
arrow_properties.set_use_threads(false);
|
||||
arrow_properties.set_batch_size(format_settings.parquet.max_block_size);
|
||||
arrow_properties.set_batch_size(row_group_batch.adaptive_chunk_size);
|
||||
|
||||
// When reading a row group, arrow will:
|
||||
// 1. Look at `metadata` to get all byte ranges it'll need to read from the file (typically one
|
||||
|
@ -208,6 +208,8 @@ private:
|
||||
size_t total_rows = 0;
|
||||
size_t total_bytes_compressed = 0;
|
||||
|
||||
size_t adaptive_chunk_size = 0;
|
||||
|
||||
std::vector<int> row_groups_idxs;
|
||||
|
||||
// These are only used by the decoding thread, so don't require locking the mutex.
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <Processors/QueryPlan/TotalsHavingStep.h>
|
||||
#include <Processors/QueryPlan/DistinctStep.h>
|
||||
#include <Processors/QueryPlan/UnionStep.h>
|
||||
#include <Storages/StorageMerge.h>
|
||||
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include <Interpreters/ArrayJoinAction.h>
|
||||
@ -608,6 +609,14 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes
|
||||
return 3;
|
||||
}
|
||||
|
||||
if (auto * read_from_merge = typeid_cast<ReadFromMerge *>(child.get()))
|
||||
{
|
||||
FilterDAGInfo info{filter->getExpression(), filter->getFilterColumnName(), filter->removesFilterColumn()};
|
||||
read_from_merge->addFilter(std::move(info));
|
||||
std::swap(*parent_node, *child_node);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -4,10 +4,10 @@
|
||||
#include <Processors/QueryPlan/SourceStepWithFilter.h>
|
||||
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
|
||||
#include <Storages/StorageDummy.h>
|
||||
#include <Storages/StorageMerge.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include <Functions/FunctionsLogical.h>
|
||||
#include <Functions/IFunctionAdaptors.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -30,7 +30,7 @@ static void removeFromOutput(ActionsDAG & dag, const std::string name)
|
||||
|
||||
void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
|
||||
{
|
||||
if (stack.size() < 3)
|
||||
if (stack.size() < 2)
|
||||
return;
|
||||
|
||||
auto & frame = stack.back();
|
||||
@ -45,6 +45,9 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
|
||||
if (!source_step_with_filter)
|
||||
return;
|
||||
|
||||
if (typeid_cast<ReadFromMerge *>(frame.node->step.get()))
|
||||
return;
|
||||
|
||||
const auto & storage_snapshot = source_step_with_filter->getStorageSnapshot();
|
||||
const auto & storage = storage_snapshot->storage;
|
||||
if (!storage.canMoveConditionsToPrewhere())
|
||||
|
@ -421,6 +421,9 @@ struct AggregateProjectionCandidates
|
||||
|
||||
/// This flag means that DAG for projection candidate should be used in FilterStep.
|
||||
bool has_filter = false;
|
||||
|
||||
/// If not empty, try to find exact ranges from parts to speed up trivial count queries.
|
||||
String only_count_column;
|
||||
};
|
||||
|
||||
AggregateProjectionCandidates getAggregateProjectionCandidates(
|
||||
@ -502,6 +505,12 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
|
||||
candidates.minmax_projection.emplace(std::move(minmax));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Trivial count optimization only applies after @can_use_minmax_projection.
|
||||
if (keys.empty() && aggregates.size() == 1 && typeid_cast<const AggregateFunctionCount *>(aggregates[0].function.get()))
|
||||
candidates.only_count_column = aggregates[0].column_name;
|
||||
}
|
||||
}
|
||||
|
||||
if (!candidates.minmax_projection)
|
||||
@ -584,13 +593,21 @@ std::optional<String> optimizeUseAggregateProjections(QueryPlan::Node & node, Qu
|
||||
ContextPtr context = reading->getContext();
|
||||
MergeTreeDataSelectExecutor reader(reading->getMergeTreeData());
|
||||
AggregateProjectionCandidate * best_candidate = nullptr;
|
||||
|
||||
/// Stores row count from exact ranges of parts.
|
||||
size_t exact_count = 0;
|
||||
|
||||
if (candidates.minmax_projection)
|
||||
{
|
||||
best_candidate = &candidates.minmax_projection->candidate;
|
||||
}
|
||||
else if (!candidates.real.empty())
|
||||
else if (!candidates.real.empty() || !candidates.only_count_column.empty())
|
||||
{
|
||||
auto ordinary_reading_select_result = reading->selectRangesToRead();
|
||||
auto ordinary_reading_select_result = reading->getAnalyzedResult();
|
||||
bool find_exact_ranges = !candidates.only_count_column.empty();
|
||||
if (!ordinary_reading_select_result || (!ordinary_reading_select_result->has_exact_ranges && find_exact_ranges))
|
||||
ordinary_reading_select_result = reading->selectRangesToRead(find_exact_ranges);
|
||||
|
||||
size_t ordinary_reading_marks = ordinary_reading_select_result->selected_marks;
|
||||
|
||||
/// Nothing to read. Ignore projections.
|
||||
@ -600,7 +617,49 @@ std::optional<String> optimizeUseAggregateProjections(QueryPlan::Node & node, Qu
|
||||
return {};
|
||||
}
|
||||
|
||||
const auto & parts_with_ranges = ordinary_reading_select_result->parts_with_ranges;
|
||||
auto & parts_with_ranges = ordinary_reading_select_result->parts_with_ranges;
|
||||
|
||||
if (!candidates.only_count_column.empty())
|
||||
{
|
||||
for (auto & part_with_ranges : parts_with_ranges)
|
||||
{
|
||||
MarkRanges new_ranges;
|
||||
auto & ranges = part_with_ranges.ranges;
|
||||
const auto & exact_ranges = part_with_ranges.exact_ranges;
|
||||
if (exact_ranges.empty())
|
||||
continue;
|
||||
|
||||
size_t i = 0;
|
||||
size_t len = exact_ranges.size();
|
||||
for (auto & range : ranges)
|
||||
{
|
||||
while (i < len && exact_ranges[i].begin < range.end)
|
||||
{
|
||||
chassert(exact_ranges[i].begin >= range.begin);
|
||||
chassert(exact_ranges[i].end <= range.end);
|
||||
|
||||
/// Found some marks which are not exact
|
||||
if (range.begin < exact_ranges[i].begin)
|
||||
new_ranges.emplace_back(range.begin, exact_ranges[i].begin);
|
||||
|
||||
range.begin = exact_ranges[i].end;
|
||||
ordinary_reading_marks -= exact_ranges[i].end - exact_ranges[i].begin;
|
||||
exact_count += part_with_ranges.data_part->index_granularity.getRowsCountInRange(exact_ranges[i]);
|
||||
++i;
|
||||
}
|
||||
|
||||
/// Current range still contains some marks which are not exact
|
||||
if (range.begin < range.end)
|
||||
new_ranges.emplace_back(range);
|
||||
}
|
||||
chassert(i == len);
|
||||
part_with_ranges.ranges = std::move(new_ranges);
|
||||
}
|
||||
|
||||
std::erase_if(parts_with_ranges, [&](const auto & part_with_ranges) { return part_with_ranges.ranges.empty(); });
|
||||
if (parts_with_ranges.empty())
|
||||
chassert(ordinary_reading_marks == 0);
|
||||
}
|
||||
|
||||
/// Selecting best candidate.
|
||||
for (auto & candidate : candidates.real)
|
||||
@ -630,8 +689,20 @@ std::optional<String> optimizeUseAggregateProjections(QueryPlan::Node & node, Qu
|
||||
|
||||
if (!best_candidate)
|
||||
{
|
||||
reading->setAnalyzedResult(std::move(ordinary_reading_select_result));
|
||||
return {};
|
||||
if (exact_count > 0)
|
||||
{
|
||||
if (ordinary_reading_marks > 0)
|
||||
{
|
||||
ordinary_reading_select_result->selected_marks = ordinary_reading_marks;
|
||||
ordinary_reading_select_result->selected_rows -= exact_count;
|
||||
reading->setAnalyzedResult(std::move(ordinary_reading_select_result));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
reading->setAnalyzedResult(std::move(ordinary_reading_select_result));
|
||||
return {};
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -639,10 +710,11 @@ std::optional<String> optimizeUseAggregateProjections(QueryPlan::Node & node, Qu
|
||||
return {};
|
||||
}
|
||||
|
||||
chassert(best_candidate != nullptr);
|
||||
|
||||
QueryPlanStepPtr projection_reading;
|
||||
bool has_ordinary_parts;
|
||||
String selected_projection_name;
|
||||
if (best_candidate)
|
||||
selected_projection_name = best_candidate->projection->name;
|
||||
|
||||
/// Add reading from projection step.
|
||||
if (candidates.minmax_projection)
|
||||
@ -654,6 +726,32 @@ std::optional<String> optimizeUseAggregateProjections(QueryPlan::Node & node, Qu
|
||||
projection_reading = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
|
||||
has_ordinary_parts = false;
|
||||
}
|
||||
else if (best_candidate == nullptr)
|
||||
{
|
||||
chassert(exact_count > 0);
|
||||
|
||||
auto agg_count = std::make_shared<AggregateFunctionCount>(DataTypes{});
|
||||
|
||||
std::vector<char> state(agg_count->sizeOfData());
|
||||
AggregateDataPtr place = state.data();
|
||||
agg_count->create(place);
|
||||
SCOPE_EXIT_MEMORY_SAFE(agg_count->destroy(place));
|
||||
agg_count->set(place, exact_count);
|
||||
|
||||
auto column = ColumnAggregateFunction::create(agg_count);
|
||||
column->insertFrom(place);
|
||||
|
||||
Block block_with_count{
|
||||
{std::move(column),
|
||||
std::make_shared<DataTypeAggregateFunction>(agg_count, DataTypes{}, Array{}),
|
||||
candidates.only_count_column}};
|
||||
|
||||
Pipe pipe(std::make_shared<SourceFromSingleChunk>(std::move(block_with_count)));
|
||||
projection_reading = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
|
||||
|
||||
selected_projection_name = "Optimized trivial count";
|
||||
has_ordinary_parts = reading->getAnalyzedResult() != nullptr;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto storage_snapshot = reading->getStorageSnapshot();
|
||||
@ -694,46 +792,54 @@ std::optional<String> optimizeUseAggregateProjections(QueryPlan::Node & node, Qu
|
||||
context->getQueryContext()->addQueryAccessInfo(Context::QualifiedProjectionName
|
||||
{
|
||||
.storage_id = reading->getMergeTreeData().getStorageID(),
|
||||
.projection_name = best_candidate->projection->name,
|
||||
.projection_name = selected_projection_name,
|
||||
});
|
||||
}
|
||||
|
||||
// LOG_TRACE(getLogger("optimizeUseProjections"), "Projection reading header {}",
|
||||
// projection_reading->getOutputStream().header.dumpStructure());
|
||||
|
||||
projection_reading->setStepDescription(best_candidate->projection->name);
|
||||
|
||||
projection_reading->setStepDescription(selected_projection_name);
|
||||
auto & projection_reading_node = nodes.emplace_back(QueryPlan::Node{.step = std::move(projection_reading)});
|
||||
auto & expr_or_filter_node = nodes.emplace_back();
|
||||
|
||||
if (candidates.has_filter)
|
||||
/// Root node of optimized child plan using @projection_name
|
||||
QueryPlan::Node * aggregate_projection_node = nullptr;
|
||||
|
||||
if (best_candidate)
|
||||
{
|
||||
expr_or_filter_node.step = std::make_unique<FilterStep>(
|
||||
projection_reading_node.step->getOutputStream(),
|
||||
best_candidate->dag,
|
||||
best_candidate->dag->getOutputs().front()->result_name,
|
||||
true);
|
||||
}
|
||||
else
|
||||
expr_or_filter_node.step = std::make_unique<ExpressionStep>(
|
||||
projection_reading_node.step->getOutputStream(),
|
||||
best_candidate->dag);
|
||||
aggregate_projection_node = &nodes.emplace_back();
|
||||
if (candidates.has_filter)
|
||||
{
|
||||
aggregate_projection_node->step = std::make_unique<FilterStep>(
|
||||
projection_reading_node.step->getOutputStream(),
|
||||
best_candidate->dag,
|
||||
best_candidate->dag->getOutputs().front()->result_name,
|
||||
true);
|
||||
}
|
||||
else
|
||||
aggregate_projection_node->step
|
||||
= std::make_unique<ExpressionStep>(projection_reading_node.step->getOutputStream(), best_candidate->dag);
|
||||
|
||||
expr_or_filter_node.children.push_back(&projection_reading_node);
|
||||
aggregate_projection_node->children.push_back(&projection_reading_node);
|
||||
}
|
||||
else /// trivial count optimization
|
||||
{
|
||||
aggregate_projection_node = &projection_reading_node;
|
||||
}
|
||||
|
||||
if (!has_ordinary_parts)
|
||||
{
|
||||
/// All parts are taken from projection
|
||||
aggregating->requestOnlyMergeForAggregateProjection(expr_or_filter_node.step->getOutputStream());
|
||||
node.children.front() = &expr_or_filter_node;
|
||||
aggregating->requestOnlyMergeForAggregateProjection(aggregate_projection_node->step->getOutputStream());
|
||||
node.children.front() = aggregate_projection_node;
|
||||
}
|
||||
else
|
||||
{
|
||||
node.step = aggregating->convertToAggregatingProjection(expr_or_filter_node.step->getOutputStream());
|
||||
node.children.push_back(&expr_or_filter_node);
|
||||
node.step = aggregating->convertToAggregatingProjection(aggregate_projection_node->step->getOutputStream());
|
||||
node.children.push_back(aggregate_projection_node);
|
||||
}
|
||||
|
||||
return best_candidate->projection->name;
|
||||
return selected_projection_name;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -139,7 +139,9 @@ std::optional<String> optimizeUseNormalProjections(Stack & stack, QueryPlan::Nod
|
||||
const auto & query_info = reading->getQueryInfo();
|
||||
MergeTreeDataSelectExecutor reader(reading->getMergeTreeData());
|
||||
|
||||
auto ordinary_reading_select_result = reading->selectRangesToRead();
|
||||
auto ordinary_reading_select_result = reading->getAnalyzedResult();
|
||||
if (!ordinary_reading_select_result)
|
||||
ordinary_reading_select_result = reading->selectRangesToRead();
|
||||
size_t ordinary_reading_marks = ordinary_reading_select_result->selected_marks;
|
||||
|
||||
/// Nothing to read. Ignore projections.
|
||||
|
@ -25,8 +25,7 @@ namespace QueryPlanOptimizations
|
||||
|
||||
bool canUseProjectionForReadingStep(ReadFromMergeTree * reading)
|
||||
{
|
||||
/// Probably some projection already was applied.
|
||||
if (reading->hasAnalyzedResult())
|
||||
if (reading->getAnalyzedResult() && reading->getAnalyzedResult()->readFromProjection())
|
||||
return false;
|
||||
|
||||
if (reading->isQueryWithFinal())
|
||||
|
@ -1358,9 +1358,9 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
|
||||
return merging_pipes.empty() ? Pipe::unitePipes(std::move(no_merging_pipes)) : Pipe::unitePipes(std::move(merging_pipes));
|
||||
}
|
||||
|
||||
ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead() const
|
||||
ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(bool find_exact_ranges) const
|
||||
{
|
||||
return selectRangesToRead(prepared_parts, alter_conversions_for_parts, false /* find_exact_ranges */);
|
||||
return selectRangesToRead(prepared_parts, alter_conversions_for_parts, find_exact_ranges);
|
||||
}
|
||||
|
||||
ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
||||
@ -1664,6 +1664,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
||||
result.selected_marks_pk = sum_marks_pk;
|
||||
result.total_marks_pk = total_marks_pk;
|
||||
result.selected_rows = sum_rows;
|
||||
result.has_exact_ranges = result.selected_parts == 0 || find_exact_ranges;
|
||||
|
||||
if (query_info_.input_order_info)
|
||||
result.read_type = (query_info_.input_order_info->direction > 0)
|
||||
|
@ -100,7 +100,9 @@ public:
|
||||
UInt64 selected_marks_pk = 0;
|
||||
UInt64 total_marks_pk = 0;
|
||||
UInt64 selected_rows = 0;
|
||||
bool has_exact_ranges = false;
|
||||
|
||||
bool readFromProjection() const { return !parts_with_ranges.empty() && parts_with_ranges.front().data_part->isProjectionPart(); }
|
||||
void checkLimits(const Settings & settings, const SelectQueryInfo & query_info_) const;
|
||||
};
|
||||
|
||||
@ -167,7 +169,7 @@ public:
|
||||
AnalysisResultPtr selectRangesToRead(
|
||||
MergeTreeData::DataPartsVector parts, std::vector<AlterConversionsPtr> alter_conversions, bool find_exact_ranges = false) const;
|
||||
|
||||
AnalysisResultPtr selectRangesToRead() const;
|
||||
AnalysisResultPtr selectRangesToRead(bool find_exact_ranges = false) const;
|
||||
|
||||
StorageMetadataPtr getStorageMetadata() const { return metadata_for_reading; }
|
||||
|
||||
@ -182,7 +184,7 @@ public:
|
||||
bool requestOutputEachPartitionThroughSeparatePort();
|
||||
bool willOutputEachPartitionThroughSeparatePort() const { return output_each_partition_through_separate_port; }
|
||||
|
||||
bool hasAnalyzedResult() const { return analyzed_result_ptr != nullptr; }
|
||||
AnalysisResultPtr getAnalyzedResult() const { return analyzed_result_ptr; }
|
||||
void setAnalyzedResult(AnalysisResultPtr analyzed_result_ptr_) { analyzed_result_ptr = std::move(analyzed_result_ptr_); }
|
||||
|
||||
const MergeTreeData::DataPartsVector & getParts() const { return prepared_parts; }
|
||||
|
@ -49,11 +49,6 @@ public:
|
||||
filter_dags.push_back(std::move(filter_dag));
|
||||
}
|
||||
|
||||
void addFilterFromParentStep(const ActionsDAG::Node * filter_node)
|
||||
{
|
||||
filter_nodes.nodes.push_back(filter_node);
|
||||
}
|
||||
|
||||
/// Apply filters that can optimize reading from storage.
|
||||
void applyFilters()
|
||||
{
|
||||
|
@ -19,18 +19,18 @@ public:
|
||||
size_t sum_index_columns = 0;
|
||||
size_t sum_ordinary_columns = 0;
|
||||
|
||||
ColumnSizeEstimator(ColumnToSize && map_, const Names & key_columns, const Names & ordinary_columns)
|
||||
ColumnSizeEstimator(ColumnToSize && map_, const NamesAndTypesList & key_columns, const NamesAndTypesList & ordinary_columns)
|
||||
: map(std::move(map_))
|
||||
{
|
||||
for (const auto & name : key_columns)
|
||||
for (const auto & [name, _] : key_columns)
|
||||
if (!map.contains(name)) map[name] = 0;
|
||||
for (const auto & name : ordinary_columns)
|
||||
for (const auto & [name, _] : ordinary_columns)
|
||||
if (!map.contains(name)) map[name] = 0;
|
||||
|
||||
for (const auto & name : key_columns)
|
||||
for (const auto & [name, _] : key_columns)
|
||||
sum_index_columns += map.at(name);
|
||||
|
||||
for (const auto & name : ordinary_columns)
|
||||
for (const auto & [name, _] : ordinary_columns)
|
||||
sum_ordinary_columns += map.at(name);
|
||||
|
||||
sum_total = std::max(static_cast<decltype(sum_index_columns)>(1), sum_index_columns + sum_ordinary_columns);
|
||||
|
@ -48,59 +48,23 @@ namespace ErrorCodes
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
}
|
||||
|
||||
|
||||
/// PK columns are sorted and merged, ordinary columns are gathered using info from merge step
|
||||
static void extractMergingAndGatheringColumns(
|
||||
const NamesAndTypesList & storage_columns,
|
||||
const ExpressionActionsPtr & sorting_key_expr,
|
||||
const IndicesDescription & indexes,
|
||||
const MergeTreeData::MergingParams & merging_params,
|
||||
NamesAndTypesList & gathering_columns, Names & gathering_column_names,
|
||||
NamesAndTypesList & merging_columns, Names & merging_column_names)
|
||||
static ColumnsStatistics getStatisticsForColumns(
|
||||
const NamesAndTypesList & columns_to_read,
|
||||
const StorageMetadataPtr & metadata_snapshot)
|
||||
{
|
||||
Names sort_key_columns_vec = sorting_key_expr->getRequiredColumns();
|
||||
std::set<String> key_columns(sort_key_columns_vec.cbegin(), sort_key_columns_vec.cend());
|
||||
for (const auto & index : indexes)
|
||||
ColumnsStatistics all_statistics;
|
||||
const auto & all_columns = metadata_snapshot->getColumns();
|
||||
|
||||
for (const auto & column : columns_to_read)
|
||||
{
|
||||
Names index_columns_vec = index.expression->getRequiredColumns();
|
||||
std::copy(index_columns_vec.cbegin(), index_columns_vec.cend(),
|
||||
std::inserter(key_columns, key_columns.end()));
|
||||
}
|
||||
|
||||
/// Force sign column for Collapsing mode
|
||||
if (merging_params.mode == MergeTreeData::MergingParams::Collapsing)
|
||||
key_columns.emplace(merging_params.sign_column);
|
||||
|
||||
/// Force version column for Replacing mode
|
||||
if (merging_params.mode == MergeTreeData::MergingParams::Replacing)
|
||||
{
|
||||
key_columns.emplace(merging_params.is_deleted_column);
|
||||
key_columns.emplace(merging_params.version_column);
|
||||
}
|
||||
|
||||
/// Force sign column for VersionedCollapsing mode. Version is already in primary key.
|
||||
if (merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing)
|
||||
key_columns.emplace(merging_params.sign_column);
|
||||
|
||||
/// Force to merge at least one column in case of empty key
|
||||
if (key_columns.empty())
|
||||
key_columns.emplace(storage_columns.front().name);
|
||||
|
||||
/// TODO: also force "summing" and "aggregating" columns to make Horizontal merge only for such columns
|
||||
|
||||
for (const auto & column : storage_columns)
|
||||
{
|
||||
if (key_columns.contains(column.name))
|
||||
const auto * desc = all_columns.tryGet(column.name);
|
||||
if (desc && !desc->statistics.empty())
|
||||
{
|
||||
merging_columns.emplace_back(column);
|
||||
merging_column_names.emplace_back(column.name);
|
||||
}
|
||||
else
|
||||
{
|
||||
gathering_columns.emplace_back(column);
|
||||
gathering_column_names.emplace_back(column.name);
|
||||
auto statistics = MergeTreeStatisticsFactory::instance().get(desc->statistics);
|
||||
all_statistics.push_back(std::move(statistics));
|
||||
}
|
||||
}
|
||||
return all_statistics;
|
||||
}
|
||||
|
||||
static void addMissedColumnsToSerializationInfos(
|
||||
@ -129,6 +93,77 @@ static void addMissedColumnsToSerializationInfos(
|
||||
}
|
||||
}
|
||||
|
||||
/// PK columns are sorted and merged, ordinary columns are gathered using info from merge step
|
||||
void MergeTask::ExecuteAndFinalizeHorizontalPart::extractMergingAndGatheringColumns() const
|
||||
{
|
||||
const auto & sorting_key_expr = global_ctx->metadata_snapshot->getSortingKey().expression;
|
||||
Names sort_key_columns_vec = sorting_key_expr->getRequiredColumns();
|
||||
|
||||
std::set<String> key_columns(sort_key_columns_vec.cbegin(), sort_key_columns_vec.cend());
|
||||
|
||||
/// Force sign column for Collapsing mode
|
||||
if (ctx->merging_params.mode == MergeTreeData::MergingParams::Collapsing)
|
||||
key_columns.emplace(ctx->merging_params.sign_column);
|
||||
|
||||
/// Force version column for Replacing mode
|
||||
if (ctx->merging_params.mode == MergeTreeData::MergingParams::Replacing)
|
||||
{
|
||||
key_columns.emplace(ctx->merging_params.is_deleted_column);
|
||||
key_columns.emplace(ctx->merging_params.version_column);
|
||||
}
|
||||
|
||||
/// Force sign column for VersionedCollapsing mode. Version is already in primary key.
|
||||
if (ctx->merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing)
|
||||
key_columns.emplace(ctx->merging_params.sign_column);
|
||||
|
||||
/// Force to merge at least one column in case of empty key
|
||||
if (key_columns.empty())
|
||||
key_columns.emplace(global_ctx->storage_columns.front().name);
|
||||
|
||||
const auto & skip_indexes = global_ctx->metadata_snapshot->getSecondaryIndices();
|
||||
|
||||
for (const auto & index : skip_indexes)
|
||||
{
|
||||
auto index_columns = index.expression->getRequiredColumns();
|
||||
|
||||
/// Calculate indexes that depend only on one column on vertical
|
||||
/// stage and other indexes on horizonatal stage of merge.
|
||||
if (index_columns.size() == 1)
|
||||
{
|
||||
const auto & column_name = index_columns.front();
|
||||
global_ctx->skip_indexes_by_column[column_name].push_back(index);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::ranges::copy(index_columns, std::inserter(key_columns, key_columns.end()));
|
||||
global_ctx->merging_skip_indexes.push_back(index);
|
||||
}
|
||||
}
|
||||
|
||||
/// TODO: also force "summing" and "aggregating" columns to make Horizontal merge only for such columns
|
||||
|
||||
for (const auto & column : global_ctx->storage_columns)
|
||||
{
|
||||
if (key_columns.contains(column.name))
|
||||
{
|
||||
global_ctx->merging_columns.emplace_back(column);
|
||||
|
||||
/// If column is in horizontal stage we need to calculate its indexes on horizontal stage as well
|
||||
auto it = global_ctx->skip_indexes_by_column.find(column.name);
|
||||
if (it != global_ctx->skip_indexes_by_column.end())
|
||||
{
|
||||
for (auto & index : it->second)
|
||||
global_ctx->merging_skip_indexes.push_back(std::move(index));
|
||||
|
||||
global_ctx->skip_indexes_by_column.erase(it);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
global_ctx->gathering_columns.emplace_back(column);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
|
||||
{
|
||||
@ -196,27 +231,18 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
|
||||
if (!global_ctx->parent_part)
|
||||
global_ctx->temporary_directory_lock = global_ctx->data->getTemporaryPartDirectoryHolder(local_tmp_part_basename);
|
||||
|
||||
global_ctx->all_column_names = global_ctx->metadata_snapshot->getColumns().getNamesOfPhysical();
|
||||
global_ctx->storage_columns = global_ctx->metadata_snapshot->getColumns().getAllPhysical();
|
||||
|
||||
auto object_columns = MergeTreeData::getConcreteObjectColumns(global_ctx->future_part->parts, global_ctx->metadata_snapshot->getColumns());
|
||||
|
||||
extendObjectColumns(global_ctx->storage_columns, object_columns, false);
|
||||
global_ctx->storage_snapshot = std::make_shared<StorageSnapshot>(*global_ctx->data, global_ctx->metadata_snapshot, std::move(object_columns));
|
||||
|
||||
extractMergingAndGatheringColumns(
|
||||
global_ctx->storage_columns,
|
||||
global_ctx->metadata_snapshot->getSortingKey().expression,
|
||||
global_ctx->metadata_snapshot->getSecondaryIndices(),
|
||||
ctx->merging_params,
|
||||
global_ctx->gathering_columns,
|
||||
global_ctx->gathering_column_names,
|
||||
global_ctx->merging_columns,
|
||||
global_ctx->merging_column_names);
|
||||
extractMergingAndGatheringColumns();
|
||||
|
||||
global_ctx->new_data_part->uuid = global_ctx->future_part->uuid;
|
||||
global_ctx->new_data_part->partition.assign(global_ctx->future_part->getPartition());
|
||||
global_ctx->new_data_part->is_temp = global_ctx->parent_part == nullptr;
|
||||
|
||||
/// In case of replicated merge tree with zero copy replication
|
||||
/// Here Clickhouse claims that this new part can be deleted in temporary state without unlocking the blobs
|
||||
/// The blobs have to be removed along with the part, this temporary part owns them and does not share them yet.
|
||||
@ -278,6 +304,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
|
||||
|
||||
ctx->sum_input_rows_upper_bound = global_ctx->merge_list_element_ptr->total_rows_count;
|
||||
ctx->sum_compressed_bytes_upper_bound = global_ctx->merge_list_element_ptr->total_size_bytes_compressed;
|
||||
|
||||
global_ctx->chosen_merge_algorithm = chooseMergeAlgorithm();
|
||||
global_ctx->merge_list_element_ptr->merge_algorithm.store(global_ctx->chosen_merge_algorithm, std::memory_order_relaxed);
|
||||
|
||||
@ -298,9 +325,9 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
|
||||
case MergeAlgorithm::Horizontal:
|
||||
{
|
||||
global_ctx->merging_columns = global_ctx->storage_columns;
|
||||
global_ctx->merging_column_names = global_ctx->all_column_names;
|
||||
global_ctx->merging_skip_indexes = global_ctx->metadata_snapshot->getSecondaryIndices();
|
||||
global_ctx->gathering_columns.clear();
|
||||
global_ctx->gathering_column_names.clear();
|
||||
global_ctx->skip_indexes_by_column.clear();
|
||||
break;
|
||||
}
|
||||
case MergeAlgorithm::Vertical:
|
||||
@ -309,13 +336,13 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
|
||||
ctx->rows_sources_write_buf = std::make_unique<CompressedWriteBuffer>(*ctx->rows_sources_uncompressed_write_buf);
|
||||
|
||||
std::map<String, UInt64> local_merged_column_to_size;
|
||||
for (const MergeTreeData::DataPartPtr & part : global_ctx->future_part->parts)
|
||||
for (const auto & part : global_ctx->future_part->parts)
|
||||
part->accumulateColumnSizes(local_merged_column_to_size);
|
||||
|
||||
ctx->column_sizes = ColumnSizeEstimator(
|
||||
std::move(local_merged_column_to_size),
|
||||
global_ctx->merging_column_names,
|
||||
global_ctx->gathering_column_names);
|
||||
global_ctx->merging_columns,
|
||||
global_ctx->gathering_columns);
|
||||
|
||||
break;
|
||||
}
|
||||
@ -323,9 +350,6 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge algorithm must be chosen");
|
||||
}
|
||||
|
||||
assert(global_ctx->gathering_columns.size() == global_ctx->gathering_column_names.size());
|
||||
assert(global_ctx->merging_columns.size() == global_ctx->merging_column_names.size());
|
||||
|
||||
/// If merge is vertical we cannot calculate it
|
||||
ctx->blocks_are_granules_size = (global_ctx->chosen_merge_algorithm == MergeAlgorithm::Vertical);
|
||||
|
||||
@ -342,28 +366,25 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
|
||||
/// resources for this).
|
||||
if (!ctx->need_remove_expired_values)
|
||||
{
|
||||
size_t expired_columns = 0;
|
||||
auto part_serialization_infos = global_ctx->new_data_part->getSerializationInfos();
|
||||
|
||||
NameSet columns_to_remove;
|
||||
for (auto & [column_name, ttl] : global_ctx->new_data_part->ttl_infos.columns_ttl)
|
||||
{
|
||||
if (ttl.finished())
|
||||
{
|
||||
global_ctx->new_data_part->expired_columns.insert(column_name);
|
||||
LOG_TRACE(ctx->log, "Adding expired column {} for part {}", column_name, global_ctx->new_data_part->name);
|
||||
std::erase(global_ctx->gathering_column_names, column_name);
|
||||
std::erase(global_ctx->merging_column_names, column_name);
|
||||
std::erase(global_ctx->all_column_names, column_name);
|
||||
columns_to_remove.insert(column_name);
|
||||
part_serialization_infos.erase(column_name);
|
||||
++expired_columns;
|
||||
}
|
||||
}
|
||||
|
||||
if (expired_columns)
|
||||
if (!columns_to_remove.empty())
|
||||
{
|
||||
global_ctx->gathering_columns = global_ctx->gathering_columns.filter(global_ctx->gathering_column_names);
|
||||
global_ctx->merging_columns = global_ctx->merging_columns.filter(global_ctx->merging_column_names);
|
||||
global_ctx->storage_columns = global_ctx->storage_columns.filter(global_ctx->all_column_names);
|
||||
global_ctx->gathering_columns = global_ctx->gathering_columns.eraseNames(columns_to_remove);
|
||||
global_ctx->merging_columns = global_ctx->merging_columns.eraseNames(columns_to_remove);
|
||||
global_ctx->storage_columns = global_ctx->storage_columns.eraseNames(columns_to_remove);
|
||||
|
||||
global_ctx->new_data_part->setColumns(
|
||||
global_ctx->storage_columns,
|
||||
@ -376,8 +397,8 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare()
|
||||
global_ctx->new_data_part,
|
||||
global_ctx->metadata_snapshot,
|
||||
global_ctx->merging_columns,
|
||||
MergeTreeIndexFactory::instance().getMany(global_ctx->metadata_snapshot->getSecondaryIndices()),
|
||||
MergeTreeStatisticsFactory::instance().getMany(global_ctx->metadata_snapshot->getColumns()),
|
||||
MergeTreeIndexFactory::instance().getMany(global_ctx->merging_skip_indexes),
|
||||
getStatisticsForColumns(global_ctx->merging_columns, global_ctx->metadata_snapshot),
|
||||
ctx->compression_codec,
|
||||
global_ctx->txn ? global_ctx->txn->tid : Tx::PrehistoricTID,
|
||||
/*reset_columns=*/ true,
|
||||
@ -407,9 +428,7 @@ void MergeTask::addGatheringColumn(GlobalRuntimeContextPtr global_ctx, const Str
|
||||
return;
|
||||
|
||||
global_ctx->storage_columns.emplace_back(name, type);
|
||||
global_ctx->all_column_names.emplace_back(name);
|
||||
global_ctx->gathering_columns.emplace_back(name, type);
|
||||
global_ctx->gathering_column_names.emplace_back(name);
|
||||
}
|
||||
|
||||
|
||||
@ -423,7 +442,6 @@ MergeTask::StageRuntimeContextPtr MergeTask::ExecuteAndFinalizeHorizontalPart::g
|
||||
new_ctx->compression_codec = std::move(ctx->compression_codec);
|
||||
new_ctx->tmp_disk = std::move(ctx->tmp_disk);
|
||||
new_ctx->it_name_and_type = std::move(ctx->it_name_and_type);
|
||||
new_ctx->column_num_for_vertical_merge = std::move(ctx->column_num_for_vertical_merge);
|
||||
new_ctx->read_with_direct_io = std::move(ctx->read_with_direct_io);
|
||||
new_ctx->need_sync = std::move(ctx->need_sync);
|
||||
|
||||
@ -510,7 +528,7 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const
|
||||
|
||||
size_t sum_input_rows_exact = global_ctx->merge_list_element_ptr->rows_read;
|
||||
size_t input_rows_filtered = *global_ctx->input_rows_filtered;
|
||||
global_ctx->merge_list_element_ptr->columns_written = global_ctx->merging_column_names.size();
|
||||
global_ctx->merge_list_element_ptr->columns_written = global_ctx->merging_columns.size();
|
||||
global_ctx->merge_list_element_ptr->progress.store(ctx->column_sizes->keyColumnsWeight(), std::memory_order_relaxed);
|
||||
|
||||
ctx->rows_sources_write_buf->next();
|
||||
@ -546,14 +564,12 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const
|
||||
/// Move ownership from std::unique_ptr<ReadBuffer> to std::unique_ptr<ReadBufferFromFile> for CompressedReadBufferFromFile.
|
||||
/// First, release ownership from unique_ptr to base type.
|
||||
reread_buf.release(); /// NOLINT(bugprone-unused-return-value,hicpp-ignored-remove-result): we already have the pointer value in `reread_buffer_raw`
|
||||
|
||||
/// Then, move ownership to unique_ptr to concrete type.
|
||||
std::unique_ptr<ReadBufferFromFile> reread_buffer_from_file(reread_buffer_raw);
|
||||
|
||||
/// CompressedReadBufferFromFile expects std::unique_ptr<ReadBufferFromFile> as argument.
|
||||
ctx->rows_sources_read_buf = std::make_unique<CompressedReadBufferFromFile>(std::move(reread_buffer_from_file));
|
||||
|
||||
/// For external cycle
|
||||
global_ctx->gathering_column_names_size = global_ctx->gathering_column_names.size();
|
||||
ctx->column_num_for_vertical_merge = 0;
|
||||
ctx->it_name_and_type = global_ctx->gathering_columns.cbegin();
|
||||
|
||||
const auto & settings = global_ctx->context->getSettingsRef();
|
||||
@ -636,6 +652,21 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const
|
||||
|
||||
pipe.addTransform(std::move(transform));
|
||||
|
||||
MergeTreeIndices indexes_to_recalc;
|
||||
auto indexes_it = global_ctx->skip_indexes_by_column.find(column_name);
|
||||
|
||||
if (indexes_it != global_ctx->skip_indexes_by_column.end())
|
||||
{
|
||||
indexes_to_recalc = MergeTreeIndexFactory::instance().getMany(indexes_it->second);
|
||||
|
||||
pipe.addTransform(std::make_shared<ExpressionTransform>(
|
||||
pipe.getHeader(),
|
||||
indexes_it->second.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(),
|
||||
global_ctx->data->getContext())));
|
||||
|
||||
pipe.addTransform(std::make_shared<MaterializingTransform>(pipe.getHeader()));
|
||||
}
|
||||
|
||||
ctx->column_parts_pipeline = QueryPipeline(std::move(pipe));
|
||||
|
||||
/// Dereference unique_ptr
|
||||
@ -646,19 +677,16 @@ void MergeTask::VerticalMergeStage::prepareVerticalMergeForOneColumn() const
|
||||
|
||||
/// Is calculated inside MergeProgressCallback.
|
||||
ctx->column_parts_pipeline.disableProfileEventUpdate();
|
||||
|
||||
ctx->executor = std::make_unique<PullingPipelineExecutor>(ctx->column_parts_pipeline);
|
||||
NamesAndTypesList columns_list = {*ctx->it_name_and_type};
|
||||
|
||||
ctx->column_to = std::make_unique<MergedColumnOnlyOutputStream>(
|
||||
global_ctx->new_data_part,
|
||||
global_ctx->metadata_snapshot,
|
||||
ctx->executor->getHeader(),
|
||||
columns_list,
|
||||
ctx->compression_codec,
|
||||
/// we don't need to recalc indices here
|
||||
/// because all of them were already recalculated and written
|
||||
/// as key part of vertical merge
|
||||
std::vector<MergeTreeIndexPtr>{},
|
||||
ColumnsStatistics{}, /// TODO(hanfei)
|
||||
indexes_to_recalc,
|
||||
getStatisticsForColumns(columns_list, global_ctx->metadata_snapshot),
|
||||
&global_ctx->written_offset_columns,
|
||||
global_ctx->to->getIndexGranularity());
|
||||
|
||||
@ -716,8 +744,7 @@ void MergeTask::VerticalMergeStage::finalizeVerticalMergeForOneColumn() const
|
||||
global_ctx->merge_list_element_ptr->bytes_written_uncompressed += bytes;
|
||||
global_ctx->merge_list_element_ptr->progress.store(ctx->progress_before + ctx->column_sizes->columnWeight(column_name), std::memory_order_relaxed);
|
||||
|
||||
/// This is the external cycle increment.
|
||||
++ctx->column_num_for_vertical_merge;
|
||||
/// This is the external loop increment.
|
||||
++ctx->it_name_and_type;
|
||||
}
|
||||
|
||||
@ -749,9 +776,9 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c
|
||||
LOG_DEBUG(ctx->log,
|
||||
"Merge sorted {} rows, containing {} columns ({} merged, {} gathered) in {} sec., {} rows/sec., {}/sec.",
|
||||
global_ctx->merge_list_element_ptr->rows_read,
|
||||
global_ctx->all_column_names.size(),
|
||||
global_ctx->merging_column_names.size(),
|
||||
global_ctx->gathering_column_names.size(),
|
||||
global_ctx->storage_columns.size(),
|
||||
global_ctx->merging_columns.size(),
|
||||
global_ctx->gathering_columns.size(),
|
||||
elapsed_seconds,
|
||||
global_ctx->merge_list_element_ptr->rows_read / elapsed_seconds,
|
||||
ReadableSize(global_ctx->merge_list_element_ptr->bytes_read_uncompressed / elapsed_seconds));
|
||||
@ -888,7 +915,7 @@ bool MergeTask::VerticalMergeStage::executeVerticalMergeForAllColumns() const
|
||||
return false;
|
||||
|
||||
/// This is the external cycle condition
|
||||
if (ctx->column_num_for_vertical_merge >= global_ctx->gathering_column_names_size)
|
||||
if (ctx->it_name_and_type == global_ctx->gathering_columns.end())
|
||||
return false;
|
||||
|
||||
switch (ctx->vertical_merge_one_column_state)
|
||||
@ -976,7 +1003,7 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
|
||||
*global_ctx->data,
|
||||
global_ctx->storage_snapshot,
|
||||
part,
|
||||
global_ctx->merging_column_names,
|
||||
global_ctx->merging_columns.getNames(),
|
||||
/*mark_ranges=*/ {},
|
||||
global_ctx->input_rows_filtered,
|
||||
/*apply_deleted_mask=*/ true,
|
||||
@ -1115,12 +1142,12 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
|
||||
/// If deduplicate_by_columns is empty, add all columns except virtuals.
|
||||
if (global_ctx->deduplicate_by_columns.empty())
|
||||
{
|
||||
for (const auto & column_name : global_ctx->merging_column_names)
|
||||
for (const auto & column : global_ctx->merging_columns)
|
||||
{
|
||||
if (virtuals.tryGet(column_name, VirtualsKind::Persistent))
|
||||
if (virtuals.tryGet(column.name, VirtualsKind::Persistent))
|
||||
continue;
|
||||
|
||||
global_ctx->deduplicate_by_columns.emplace_back(column_name);
|
||||
global_ctx->deduplicate_by_columns.emplace_back(column.name);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1141,11 +1168,13 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
|
||||
builder->addTransform(std::move(transform));
|
||||
}
|
||||
|
||||
if (global_ctx->metadata_snapshot->hasSecondaryIndices())
|
||||
if (!global_ctx->merging_skip_indexes.empty())
|
||||
{
|
||||
const auto & indices = global_ctx->metadata_snapshot->getSecondaryIndices();
|
||||
builder->addTransform(std::make_shared<ExpressionTransform>(
|
||||
builder->getHeader(), indices.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(), global_ctx->data->getContext())));
|
||||
builder->getHeader(),
|
||||
global_ctx->merging_skip_indexes.getSingleExpressionForIndices(global_ctx->metadata_snapshot->getColumns(),
|
||||
global_ctx->data->getContext())));
|
||||
|
||||
builder->addTransform(std::make_shared<MaterializingTransform>(builder->getHeader()));
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include <Storages/MergeTree/MergedColumnOnlyOutputStream.h>
|
||||
#include <Storages/MergeTree/MergeProgress.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndices.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -164,14 +165,13 @@ private:
|
||||
|
||||
NamesAndTypesList gathering_columns{};
|
||||
NamesAndTypesList merging_columns{};
|
||||
Names gathering_column_names{};
|
||||
Names merging_column_names{};
|
||||
NamesAndTypesList storage_columns{};
|
||||
Names all_column_names{};
|
||||
MergeTreeData::DataPart::Checksums checksums_gathered_columns{};
|
||||
|
||||
IndicesDescription merging_skip_indexes;
|
||||
std::unordered_map<String, IndicesDescription> skip_indexes_by_column;
|
||||
|
||||
MergeAlgorithm chosen_merge_algorithm{MergeAlgorithm::Undecided};
|
||||
size_t gathering_column_names_size{0};
|
||||
|
||||
std::unique_ptr<MergeStageProgress> horizontal_stage_progress{nullptr};
|
||||
std::unique_ptr<MergeStageProgress> column_progress{nullptr};
|
||||
@ -232,7 +232,6 @@ private:
|
||||
|
||||
/// Dependencies for next stages
|
||||
std::list<DB::NameAndTypePair>::const_iterator it_name_and_type;
|
||||
size_t column_num_for_vertical_merge{0};
|
||||
bool need_sync{false};
|
||||
};
|
||||
|
||||
@ -260,12 +259,14 @@ private:
|
||||
|
||||
MergeAlgorithm chooseMergeAlgorithm() const;
|
||||
void createMergedStream();
|
||||
void extractMergingAndGatheringColumns() const;
|
||||
|
||||
void setRuntimeContext(StageRuntimeContextPtr local, StageRuntimeContextPtr global) override
|
||||
{
|
||||
ctx = static_pointer_cast<ExecuteAndFinalizeHorizontalPartRuntimeContext>(local);
|
||||
global_ctx = static_pointer_cast<GlobalRuntimeContext>(global);
|
||||
}
|
||||
|
||||
StageRuntimeContextPtr getContextForNextStage() override;
|
||||
|
||||
ExecuteAndFinalizeHorizontalPartRuntimeContextPtr ctx;
|
||||
@ -284,7 +285,6 @@ private:
|
||||
CompressionCodecPtr compression_codec;
|
||||
TemporaryDataOnDiskPtr tmp_disk{nullptr};
|
||||
std::list<DB::NameAndTypePair>::const_iterator it_name_and_type;
|
||||
size_t column_num_for_vertical_merge{0};
|
||||
bool read_with_direct_io{false};
|
||||
bool need_sync{false};
|
||||
/// End dependencies from previous stages
|
||||
|
@ -1981,6 +1981,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optional<std::un
|
||||
}
|
||||
|
||||
void MergeTreeData::loadUnexpectedDataParts()
|
||||
try
|
||||
{
|
||||
{
|
||||
std::lock_guard lock(unexpected_data_parts_mutex);
|
||||
@ -1996,6 +1997,9 @@ void MergeTreeData::loadUnexpectedDataParts()
|
||||
}
|
||||
|
||||
ThreadFuzzer::maybeInjectSleep();
|
||||
|
||||
auto blocker = CannotAllocateThreadFaultInjector::blockFaultInjections();
|
||||
|
||||
ThreadPoolCallbackRunnerLocal<void> runner(getUnexpectedPartsLoadingThreadPool().get(), "UnexpectedParts");
|
||||
|
||||
for (auto & load_state : unexpected_data_parts)
|
||||
@ -2027,6 +2031,13 @@ void MergeTreeData::loadUnexpectedDataParts()
|
||||
unexpected_data_parts_cv.notify_all();
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
LOG_ERROR(log, "Loading of unexpected parts failed. "
|
||||
"Will terminate to avoid undefined behaviour due to inconsistent set of parts. "
|
||||
"Exception: {}", getCurrentExceptionMessage(true));
|
||||
std::terminate();
|
||||
}
|
||||
|
||||
void MergeTreeData::loadOutdatedDataParts(bool is_async)
|
||||
try
|
||||
@ -7061,19 +7072,23 @@ QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage(
|
||||
const StorageSnapshotPtr &,
|
||||
SelectQueryInfo &) const
|
||||
{
|
||||
if (query_context->getClientInfo().collaborate_with_initiator)
|
||||
return QueryProcessingStage::Enum::FetchColumns;
|
||||
|
||||
/// Parallel replicas
|
||||
if (query_context->canUseParallelReplicasOnInitiator() && to_stage >= QueryProcessingStage::WithMergeableState)
|
||||
/// with new analyzer, Planner make decision regarding parallel replicas usage, and so about processing stage on reading
|
||||
if (!query_context->getSettingsRef().allow_experimental_analyzer)
|
||||
{
|
||||
/// ReplicatedMergeTree
|
||||
if (supportsReplication())
|
||||
return QueryProcessingStage::Enum::WithMergeableState;
|
||||
if (query_context->getClientInfo().collaborate_with_initiator)
|
||||
return QueryProcessingStage::Enum::FetchColumns;
|
||||
|
||||
/// For non-replicated MergeTree we allow them only if parallel_replicas_for_non_replicated_merge_tree is enabled
|
||||
if (query_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree)
|
||||
return QueryProcessingStage::Enum::WithMergeableState;
|
||||
/// Parallel replicas
|
||||
if (query_context->canUseParallelReplicasOnInitiator() && to_stage >= QueryProcessingStage::WithMergeableState)
|
||||
{
|
||||
/// ReplicatedMergeTree
|
||||
if (supportsReplication())
|
||||
return QueryProcessingStage::Enum::WithMergeableState;
|
||||
|
||||
/// For non-replicated MergeTree we allow them only if parallel_replicas_for_non_replicated_merge_tree is enabled
|
||||
if (query_context->getSettingsRef().parallel_replicas_for_non_replicated_merge_tree)
|
||||
return QueryProcessingStage::Enum::WithMergeableState;
|
||||
}
|
||||
}
|
||||
|
||||
return QueryProcessingStage::Enum::FetchColumns;
|
||||
|
@ -503,7 +503,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
|
||||
ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted);
|
||||
}
|
||||
|
||||
if (data.getSettings()->allow_experimental_optimized_row_order)
|
||||
if (data.getSettings()->optimize_row_order
|
||||
&& data.merging_params.mode == MergeTreeData::MergingParams::Mode::Ordinary) /// Nobody knows if this optimization messes up specialized MergeTree engines.
|
||||
{
|
||||
RowOrderOptimizer::optimize(block, sort_description, perm);
|
||||
perm_ptr = &perm;
|
||||
@ -730,7 +731,8 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl(
|
||||
ProfileEvents::increment(ProfileEvents::MergeTreeDataProjectionWriterBlocksAlreadySorted);
|
||||
}
|
||||
|
||||
if (data.getSettings()->allow_experimental_optimized_row_order)
|
||||
if (data.getSettings()->optimize_row_order
|
||||
&& data.merging_params.mode == MergeTreeData::MergingParams::Mode::Ordinary) /// Nobody knows if this optimization messes up specialized MergeTree engines.
|
||||
{
|
||||
RowOrderOptimizer::optimize(block, sort_description, perm);
|
||||
perm_ptr = &perm;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user