Merge remote-tracking branch 'ck/master' into rocksdb_metacache

This commit is contained in:
taiyang-li 2022-03-28 21:03:22 +08:00
commit 38702be40c
259 changed files with 5284 additions and 2011 deletions

View File

@ -210,3 +210,6 @@ CheckOptions:
value: false
- key: performance-move-const-arg.CheckTriviallyCopyableMove
value: false
# Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
- key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp
value: expr-type

View File

@ -7,6 +7,7 @@ env:
"on":
schedule:
- cron: '13 3 * * *'
workflow_dispatch:
jobs:
DockerHubPushAarch64:

View File

@ -11,6 +11,7 @@
* Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)).
* Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)).
* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)).
#### New Feature

View File

@ -266,7 +266,7 @@ if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
endif ()
# Allows to build stripped binary in a separate directory
if (OBJCOPY_PATH AND READELF_PATH)
if (OBJCOPY_PATH AND STRIP_PATH)
option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF)
if (INSTALL_STRIPPED_BINARIES)
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")

View File

@ -1,28 +0,0 @@
#!/usr/bin/env bash
BINARY_PATH=$1
BINARY_NAME=$(basename "$BINARY_PATH")
DESTINATION_STRIPPED_DIR=$2
OBJCOPY_PATH=${3:objcopy}
READELF_PATH=${4:readelf}
BUILD_ID=$($READELF_PATH -n "$1" | sed -n '/Build ID/ { s/.*: //p; q; }')
BUILD_ID_PREFIX=${BUILD_ID:0:2}
BUILD_ID_SUFFIX=${BUILD_ID:2}
DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id"
DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin"
mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX"
mkdir -p "$DESTINATION_STRIP_BINARY_DIR"
cp "$BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
$OBJCOPY_PATH --only-keep-debug --compress-debug-sections "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
chmod 0644 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
chown 0:0 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
strip --remove-section=.comment --remove-section=.note "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"

View File

@ -11,16 +11,43 @@ macro(clickhouse_strip_binary)
message(FATAL_ERROR "A binary path name must be provided for stripping binary")
endif()
if (NOT DEFINED STRIP_DESTINATION_DIR)
message(FATAL_ERROR "Destination directory for stripped binary must be provided")
endif()
add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD
COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH}
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin"
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin"
COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
COMMENT "Stripping clickhouse binary" VERBATIM
)
install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse)
install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse)
endmacro()
macro(clickhouse_make_empty_debug_info_for_nfpm)
set(oneValueArgs TARGET DESTINATION_DIR)
cmake_parse_arguments(EMPTY_DEBUG "" "${oneValueArgs}" "" ${ARGN})
if (NOT DEFINED EMPTY_DEBUG_TARGET)
message(FATAL_ERROR "A target name must be provided for stripping binary")
endif()
if (NOT DEFINED EMPTY_DEBUG_DESTINATION_DIR)
message(FATAL_ERROR "Destination directory for empty debug must be provided")
endif()
add_custom_command(TARGET ${EMPTY_DEBUG_TARGET} POST_BUILD
COMMAND mkdir -p "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug"
COMMAND touch "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug"
COMMENT "Addiding empty debug info for NFPM" VERBATIM
)
install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}" COMPONENT clickhouse)
endmacro()

View File

@ -170,32 +170,32 @@ else ()
message (FATAL_ERROR "Cannot find objcopy.")
endif ()
# Readelf (FIXME copypaste)
# Strip (FIXME copypaste)
if (COMPILER_GCC)
find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf")
find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-13" "llvm-strip-12" "llvm-strip-11" "strip")
else ()
find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf")
find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip")
endif ()
if (NOT READELF_PATH AND OS_DARWIN)
if (NOT STRIP_PATH AND OS_DARWIN)
find_program (BREW_PATH NAMES "brew")
if (BREW_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX)
if (LLVM_PREFIX)
find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
find_program (STRIP_PATH NAMES "llvm-strip" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
endif ()
if (NOT READELF_PATH)
if (NOT STRIP_PATH)
execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX)
if (BINUTILS_PREFIX)
find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
find_program (STRIP_PATH NAMES "strip" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
endif ()
endif ()
endif ()
endif ()
if (READELF_PATH)
message (STATUS "Using readelf: ${READELF_PATH}")
if (STRIP_PATH)
message (STATUS "Using strip: ${STRIP_PATH}")
else ()
message (FATAL_ERROR "Cannot find readelf.")
message (FATAL_ERROR "Cannot find strip.")
endif ()

2
contrib/hyperscan vendored

@ -1 +1 @@
Subproject commit e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa
Subproject commit 5edc68c5ac68d2d4f876159e9ee84def6d3dc87c

2
contrib/libcxx vendored

@ -1 +1 @@
Subproject commit 61e60294b1de01483caa9f5d00f437c99b674de6
Subproject commit 172b2ae074f6755145b91c53a95c8540c1468239

View File

@ -18,12 +18,14 @@ set(SRCS
"${LIBCXX_SOURCE_DIR}/src/filesystem/directory_iterator.cpp"
"${LIBCXX_SOURCE_DIR}/src/filesystem/int128_builtins.cpp"
"${LIBCXX_SOURCE_DIR}/src/filesystem/operations.cpp"
"${LIBCXX_SOURCE_DIR}/src/format.cpp"
"${LIBCXX_SOURCE_DIR}/src/functional.cpp"
"${LIBCXX_SOURCE_DIR}/src/future.cpp"
"${LIBCXX_SOURCE_DIR}/src/hash.cpp"
"${LIBCXX_SOURCE_DIR}/src/ios.cpp"
"${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp"
"${LIBCXX_SOURCE_DIR}/src/iostream.cpp"
"${LIBCXX_SOURCE_DIR}/src/legacy_pointer_safety.cpp"
"${LIBCXX_SOURCE_DIR}/src/locale.cpp"
"${LIBCXX_SOURCE_DIR}/src/memory.cpp"
"${LIBCXX_SOURCE_DIR}/src/mutex.cpp"
@ -33,6 +35,9 @@ set(SRCS
"${LIBCXX_SOURCE_DIR}/src/random.cpp"
"${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp"
"${LIBCXX_SOURCE_DIR}/src/regex.cpp"
"${LIBCXX_SOURCE_DIR}/src/ryu/d2fixed.cpp"
"${LIBCXX_SOURCE_DIR}/src/ryu/d2s.cpp"
"${LIBCXX_SOURCE_DIR}/src/ryu/f2s.cpp"
"${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp"
"${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp"
"${LIBCXX_SOURCE_DIR}/src/string.cpp"
@ -49,7 +54,9 @@ set(SRCS
add_library(cxx ${SRCS})
set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake")
target_include_directories(cxx SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>)
target_include_directories(cxx SYSTEM BEFORE PUBLIC
$<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>
$<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}>/src)
target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)
# Enable capturing stack traces for all exceptions.

2
contrib/libcxxabi vendored

@ -1 +1 @@
Subproject commit df8f1e727dbc9e2bedf2282096fa189dc3fe0076
Subproject commit 6eb7cc7a7bdd779e6734d1b9fb451df2274462d7

View File

@ -1,24 +1,24 @@
set(LIBCXXABI_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libcxxabi")
set(SRCS
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_new_delete.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp"
"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp"
)
add_library(cxxabi ${SRCS})
@ -30,6 +30,7 @@ target_compile_options(cxxabi PRIVATE -w)
target_include_directories(cxxabi SYSTEM BEFORE
PUBLIC $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/include>
PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/include>
PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/src>
)
target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY)
target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast.

2
contrib/libxml2 vendored

@ -1 +1 @@
Subproject commit 18890f471c420411aa3c989e104d090966ec9dbf
Subproject commit a075d256fd9ff15590b86d981b75a50ead124fca

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d
Subproject commit 6f0b6f151ae2a044625ae93acd19ca365fcea64d

View File

@ -1,4 +1,3 @@
# rebuild in #33610
# docker build -t clickhouse/docs-check .
ARG FROM_TAG=latest
FROM clickhouse/docs-builder:$FROM_TAG

View File

@ -131,9 +131,6 @@ function start()
# use root to match with current uid
clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log
sleep 0.5
cat /var/log/clickhouse-server/stdout.log
tail -n200 /var/log/clickhouse-server/stderr.log
tail -n200 /var/log/clickhouse-server/clickhouse-server.log
counter=$((counter + 1))
done
@ -211,14 +208,12 @@ stop
start
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|| echo -e 'Server failed to start\tFAIL' >> /test_output/test_results.tsv
|| (echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
&& grep -Fa "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt)
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
[ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL"
# Print Fatal log messages to stdout
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log*
# Grep logs for sanitizer asserts, crashes and other critical errors
# Sanitizer asserts
@ -235,20 +230,26 @@ zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/
|| echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Logical errors
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
&& echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /test_output/logical_errors.txt \
&& echo -e 'Logical error thrown (see clickhouse-server.log or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv
# Remove file logical_errors.txt if it's empty
[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
# Crash
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog)
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
&& echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log* > /test_output/fatal_messages.txt \
&& echo -e 'Fatal message in clickhouse-server.log (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Remove file fatal_messages.txt if it's empty
[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
zgrep -Fa "########################################" /test_output/* > /dev/null \
&& echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv
@ -259,12 +260,12 @@ echo -e "Backward compatibility check\n"
echo "Download previous release server"
mkdir previous_release_package_folder
clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \
|| echo -e 'Download script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \
|| echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv
if [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
then
echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/backward_compatibility_check_results.tsv
echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/test_results.tsv
stop
# Uninstall current packages
@ -290,8 +291,8 @@ then
mkdir tmp_stress_output
./stress --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
&& echo -e 'Test script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \
|| echo -e 'Test script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
&& echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv
rm -rf tmp_stress_output
clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables"
@ -301,8 +302,9 @@ then
# Start new server
configure
start 500
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/backward_compatibility_check_results.tsv \
|| echo -e 'Server failed to start\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|| (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \
&& grep -Fa "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt)
clickhouse-client --query="SELECT 'Server version: ', version()"
@ -312,10 +314,12 @@ then
stop
# Error messages (we should ignore some errors)
echo "Check for Error messages in server log:"
zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
-e "REPLICA_IS_ALREADY_ACTIVE" \
-e "REPLICA_IS_ALREADY_EXIST" \
-e "ALL_REPLICAS_LOST" \
-e "DDLWorker: Cannot parse DDL task query" \
-e "RaftInstance: failed to accept a rpc connection due to error 125" \
-e "UNKNOWN_DATABASE" \
@ -328,47 +332,53 @@ then
-e "Code: 1000, e.code() = 111, Connection refused" \
-e "UNFINISHED" \
-e "Renaming unexpected part" \
/var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "<Error>" > /dev/null \
&& echo -e 'Error message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|| echo -e 'No Error messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
/var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
&& echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Remove file bc_check_error_messages.txt if it's empty
[ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt
# Sanitizer asserts
zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \
&& echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|| echo -e 'No sanitizer asserts\tOK' >> /test_output/backward_compatibility_check_results.tsv
&& echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv
rm -f /test_output/tmp
# OOM
zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
&& echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|| echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
&& echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Logical errors
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
&& echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|| echo -e 'No logical errors\tOK' >> /test_output/backward_compatibility_check_results.tsv
echo "Check for Logical errors in server log:"
zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_logical_errors.txt \
&& echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv
# Remove file bc_check_logical_errors.txt if it's empty
[ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt
# Crash
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|| echo -e 'Not crashed\tOK' >> /test_output/backward_compatibility_check_results.tsv
&& echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog)
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
&& echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|| echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
echo "Check for Fatal message in server log:"
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_fatal_messages.txt \
&& echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Remove file bc_check_fatal_messages.txt if it's empty
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
else
echo -e "Failed to download previous release packets\tFAIL" >> /test_output/backward_compatibility_check_results.tsv
echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv
fi
zgrep -Fa "FAIL" /test_output/backward_compatibility_check_results.tsv > /dev/null \
&& echo -e 'Backward compatibility check\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check\tOK' >> /test_output/test_results.tsv
# Put logs into /test_output/
for log_file in /var/log/clickhouse-server/clickhouse-server.log*
do

View File

@ -137,7 +137,7 @@ CREATE TABLE test.test_orc
`f_array_array_float` Array(Array(Float32)),
`day` String
)
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc')
PARTITION BY day
```

View File

@ -195,5 +195,6 @@ toc_title: Adopters
| <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
| <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
| <a href="https://magenta-technology.ru/sistema-upravleniya-marshrutami-inkassacii-as-strela/" class="favicon">АС "Стрела"</a> | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) |
| <a href="https://piwik.pro/" class="favicon">Piwik PRO</a> | Web Analytics | — | — | — | [Official website, Dec 2018](https://piwik.pro/blog/piwik-pro-clickhouse-faster-efficient-reports/) |
[Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: Caches
# Cache Types {#cache-types}
When performing queries, ClichHouse uses different caches.
When performing queries, ClickHouse uses different caches.
Main cache types:

View File

@ -1616,3 +1616,14 @@ Possible values:
Default value: `10000`.
## global_memory_usage_overcommit_max_wait_microseconds {#global_memory_usage_overcommit_max_wait_microseconds}
Sets maximum waiting time for global overcommit tracker.
Possible values:
- Positive integer.
Default value: `0`.

View File

@ -0,0 +1,31 @@
# Memory overcommit
Memory overcommit is an experimental technique intended to allow to set more flexible memory limits for queries.
The idea of this technique is to introduce settings which can represent guaranteed amount of memory a query can use.
When memory overcommit is enabled and the memory limit is reached ClickHouse will select the most overcommitted query and try to free memory by killing this query.
When memory limit is reached any query will wait some time during atempt to allocate new memory.
If timeout is passed and memory is freed, the query continues execution. Otherwise an exception will be thrown and the query is killed.
Selection of query to stop or kill is performed by either global or user overcommit trackers depending on what memory limit is reached.
## User overcommit tracker
User overcommit tracker finds a query with the biggest overcommit ratio in the user's query list.
Overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage` setting.
Waiting timeout is set by `memory_usage_overcommit_max_wait_microseconds` setting.
**Example**
```sql
SELECT number FROM numbers(1000) GROUP BY number SETTINGS max_guaranteed_memory_usage=4000, memory_usage_overcommit_max_wait_microseconds=500
```
## Global overcommit tracker
Global overcommit tracker finds a query with the biggest overcommit ratio in the list of all queries.
In this case overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage_for_user` setting.
Waiting timeout is set by `global_memory_usage_overcommit_max_wait_microseconds` parameter in the configuration file.

View File

@ -4220,10 +4220,36 @@ Possible values:
- 0 — Disabled.
- 1 — Enabled. The wait time equal shutdown_wait_unfinished config.
Default value: 0.
Default value: `0`.
## shutdown_wait_unfinished
The waiting time in seconds for currently handled connections when shutdown server.
Default Value: 5.
Default Value: `5`.
## max_guaranteed_memory_usage
Maximum guaranteed memory usage for processing of single query.
It represents soft limit in case when hard limit is reached on user level.
Zero means unlimited.
Read more about [memory overcommit](memory-overcommit.md).
Default value: `0`.
## memory_usage_overcommit_max_wait_microseconds
Maximum time thread will wait for memory to be freed in the case of memory overcommit on a user level.
If the timeout is reached and memory is not freed, an exception is thrown.
Read more about [memory overcommit](memory-overcommit.md).
Default value: `0`.
## max_guaranteed_memory_usage_for_user
Maximum guaranteed memory usage for processing all concurrently running queries for the user.
It represents soft limit in case when hard limit is reached on global level.
Zero means unlimited.
Read more about [memory overcommit](memory-overcommit.md).
Default value: `0`.

View File

@ -10,7 +10,7 @@ cssmin==0.2.0
future==0.18.2
htmlmin==0.1.12
idna==2.10
Jinja2>=3.0.3
Jinja2==3.0.3
jinja2-highlight==0.6.1
jsmin==3.0.0
livereload==2.6.3

View File

@ -140,7 +140,7 @@ CREATE TABLE test.test_orc
`f_array_array_float` Array(Array(Float32)),
`day` String
)
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc')
PARTITION BY day
```

View File

@ -15,7 +15,7 @@
```
┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐
│ sumburConsistentHash │ 0 │ 0 │ │
yandexConsistentHash │ 0 │ 0 │ │
kostikConsistentHash │ 0 │ 0 │ │
│ demangle │ 0 │ 0 │ │
│ addressToLine │ 0 │ 0 │ │
│ JSONExtractRaw │ 0 │ 0 │ │

View File

@ -21,8 +21,12 @@ description: |
This package contains the debugging symbols for clickhouse-common.
contents:
- src: root/usr/lib/debug
dst: /usr/lib/debug
- src: root/usr/lib/debug/usr/bin/clickhouse.debug
dst: /usr/lib/debug/usr/bin/clickhouse.debug
- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
# docs
- src: ../AUTHORS
dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS

View File

@ -473,18 +473,11 @@ else ()
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT})
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
endif()
if (NOT INSTALL_STRIPPED_BINARIES)
# Install dunny debug directory
# TODO: move logic to every place where clickhouse_strip_binary is used
add_custom_command(TARGET clickhouse POST_BUILD COMMAND echo > .empty )
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/.empty" DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/.empty)
endif()
if (ENABLE_TESTS)
set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms)
add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS})

View File

@ -137,5 +137,10 @@ if (BUILD_STANDALONE_KEEPER)
add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-keeper)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
endif()

View File

@ -27,5 +27,6 @@ set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECT
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()

View File

@ -42,6 +42,7 @@ endif()
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()

View File

@ -45,6 +45,7 @@
#include <Core/ServerUUID.h>
#include <IO/HTTPCommon.h>
#include <IO/ReadHelpers.h>
#include <IO/IOThreadPool.h>
#include <IO/UseSSL.h>
#include <Interpreters/AsynchronousMetrics.h>
#include <Interpreters/DDLWorker.h>
@ -554,6 +555,10 @@ if (ThreadFuzzer::instance().isEffective())
config().getUInt("thread_pool_queue_size", 10000)
);
IOThreadPool::initialize(
config().getUInt("max_io_thread_pool_size", 100),
config().getUInt("max_io_thread_pool_free_size", 0),
config().getUInt("io_thread_pool_queue_size", 10000));
/// Initialize global local cache for remote filesystem.
if (config().has("local_cache_for_remote_fs"))

View File

@ -29,15 +29,15 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover(
time_t decrease_error_period_,
size_t max_error_cap_)
: Base(std::move(nested_pools_), decrease_error_period_, max_error_cap_, &Poco::Logger::get("ConnectionPoolWithFailover"))
, default_load_balancing(load_balancing)
, get_priority_load_balancing(load_balancing)
{
const std::string & local_hostname = getFQDNOrHostName();
hostname_differences.resize(nested_pools.size());
get_priority_load_balancing.hostname_differences.resize(nested_pools.size());
for (size_t i = 0; i < nested_pools.size(); ++i)
{
ConnectionPool & connection_pool = dynamic_cast<ConnectionPool &>(*nested_pools[i]);
hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost());
get_priority_load_balancing.hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost());
}
}
@ -51,36 +51,15 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
};
size_t offset = 0;
LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
if (settings)
offset = settings->load_balancing_first_offset % nested_pools.size();
GetPriorityFunc get_priority;
switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
{
case LoadBalancing::NEAREST_HOSTNAME:
get_priority = [&](size_t i) { return hostname_differences[i]; };
break;
case LoadBalancing::IN_ORDER:
get_priority = [](size_t i) { return i; };
break;
case LoadBalancing::RANDOM:
break;
case LoadBalancing::FIRST_OR_RANDOM:
get_priority = [offset](size_t i) -> size_t { return i != offset; };
break;
case LoadBalancing::ROUND_ROBIN:
if (last_used >= nested_pools.size())
last_used = 0;
++last_used;
/* Consider nested_pools.size() equals to 5
* last_used = 1 -> get_priority: 0 1 2 3 4
* last_used = 2 -> get_priority: 4 0 1 2 3
* last_used = 3 -> get_priority: 4 3 0 1 2
* ...
* */
get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; };
break;
offset = settings->load_balancing_first_offset % nested_pools.size();
load_balancing = LoadBalancing(settings->load_balancing);
}
GetPriorityFunc get_priority = get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true;
@ -173,38 +152,14 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings)
{
size_t offset = 0;
LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
if (settings)
offset = settings->load_balancing_first_offset % nested_pools.size();
GetPriorityFunc get_priority;
switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
{
case LoadBalancing::NEAREST_HOSTNAME:
get_priority = [&](size_t i) { return hostname_differences[i]; };
break;
case LoadBalancing::IN_ORDER:
get_priority = [](size_t i) { return i; };
break;
case LoadBalancing::RANDOM:
break;
case LoadBalancing::FIRST_OR_RANDOM:
get_priority = [offset](size_t i) -> size_t { return i != offset; };
break;
case LoadBalancing::ROUND_ROBIN:
if (last_used >= nested_pools.size())
last_used = 0;
++last_used;
/* Consider nested_pools.size() equals to 5
* last_used = 1 -> get_priority: 0 1 2 3 4
* last_used = 2 -> get_priority: 5 0 1 2 3
* last_used = 3 -> get_priority: 5 4 0 1 2
* ...
* */
get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; };
break;
offset = settings->load_balancing_first_offset % nested_pools.size();
load_balancing = LoadBalancing(settings->load_balancing);
}
return get_priority;
return get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
}
std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl(

View File

@ -1,6 +1,7 @@
#pragma once
#include <Common/PoolWithFailoverBase.h>
#include <Common/GetPriorityForLoadBalancing.h>
#include <Client/ConnectionPool.h>
#include <chrono>
@ -109,9 +110,7 @@ private:
GetPriorityFunc makeGetPriorityFunc(const Settings * settings);
std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools.
size_t last_used = 0; /// Last used for round_robin policy.
LoadBalancing default_load_balancing;
GetPriorityForLoadBalancing get_priority_load_balancing;
};
using ConnectionPoolWithFailoverPtr = std::shared_ptr<ConnectionPoolWithFailover>;

View File

@ -83,11 +83,20 @@ size_t extractMaskNumericImpl(
const PaddedPODArray<UInt8> * null_bytemap,
PaddedPODArray<UInt8> * nulls)
{
if constexpr (!column_is_short)
{
if (data.size() != mask.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask");
}
size_t ones_count = 0;
size_t data_index = 0;
size_t mask_size = mask.size();
for (size_t i = 0; i != mask_size; ++i)
size_t mask_size = mask.size();
size_t data_size = data.size();
size_t i = 0;
for (; i != mask_size && data_index != data_size; ++i)
{
// Change mask only where value is 1.
if (!mask[i])
@ -120,6 +129,13 @@ size_t extractMaskNumericImpl(
mask[i] = value;
}
if constexpr (column_is_short)
{
if (data_index != data_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask");
}
return ones_count;
}

View File

@ -113,5 +113,35 @@ public:
}
};
class SynchronizedArenaWithFreeLists : private ArenaWithFreeLists
{
public:
explicit SynchronizedArenaWithFreeLists(
const size_t initial_size = 4096, const size_t growth_factor = 2,
const size_t linear_growth_threshold = 128 * 1024 * 1024)
: ArenaWithFreeLists{initial_size, growth_factor, linear_growth_threshold}
{}
char * alloc(const size_t size)
{
std::lock_guard lock{mutex};
return ArenaWithFreeLists::alloc(size);
}
void free(char * ptr, const size_t size)
{
std::lock_guard lock{mutex};
return ArenaWithFreeLists::free(ptr, size);
}
/// Size of the allocated pool in bytes
size_t size() const
{
std::lock_guard lock{mutex};
return ArenaWithFreeLists::size();
}
private:
mutable std::mutex mutex;
};
}

View File

@ -31,8 +31,8 @@ public:
/// probably it worth to try to increase stack size for coroutines.
///
/// Current value is just enough for all tests in our CI. It's not selected in some special
/// way. We will have 40 pages with 4KB page size.
static constexpr size_t default_stack_size = 192 * 1024; /// 64KB was not enough for tests
/// way. We will have 80 pages with 4KB page size.
static constexpr size_t default_stack_size = 320 * 1024; /// 64KB was not enough for tests
explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_)
{

View File

@ -0,0 +1,49 @@
#include <Common/GetPriorityForLoadBalancing.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
std::function<size_t(size_t index)> GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const
{
std::function<size_t(size_t index)> get_priority;
switch (load_balance)
{
case LoadBalancing::NEAREST_HOSTNAME:
if (hostname_differences.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "It's a bug: hostname_differences is not initialized");
get_priority = [&](size_t i) { return hostname_differences[i]; };
break;
case LoadBalancing::IN_ORDER:
get_priority = [](size_t i) { return i; };
break;
case LoadBalancing::RANDOM:
break;
case LoadBalancing::FIRST_OR_RANDOM:
get_priority = [offset](size_t i) -> size_t { return i != offset; };
break;
case LoadBalancing::ROUND_ROBIN:
if (last_used >= pool_size)
last_used = 0;
++last_used;
/* Consider pool_size equals to 5
* last_used = 1 -> get_priority: 0 1 2 3 4
* last_used = 2 -> get_priority: 4 0 1 2 3
* last_used = 3 -> get_priority: 4 3 0 1 2
* ...
* */
get_priority = [&](size_t i)
{
++i;
return i < last_used ? pool_size - i : i - last_used;
};
break;
}
return get_priority;
}
}

View File

@ -0,0 +1,34 @@
#pragma once
#include <Core/SettingsEnums.h>
namespace DB
{
class GetPriorityForLoadBalancing
{
public:
GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {}
GetPriorityForLoadBalancing(){}
bool operator == (const GetPriorityForLoadBalancing & other) const
{
return load_balancing == other.load_balancing && hostname_differences == other.hostname_differences;
}
bool operator != (const GetPriorityForLoadBalancing & other) const
{
return !(*this == other);
}
std::function<size_t(size_t index)> getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const;
std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools.
LoadBalancing load_balancing = LoadBalancing::RANDOM;
private:
mutable size_t last_used = 0; /// Last used for round_robin policy.
};
}

View File

@ -13,6 +13,9 @@ Int32 IntervalKind::toAvgSeconds() const
{
switch (kind)
{
case IntervalKind::Nanosecond: return 0; /// fractional parts of seconds have 0 seconds
case IntervalKind::Microsecond: return 0;
case IntervalKind::Millisecond: return 0;
case IntervalKind::Second: return 1;
case IntervalKind::Minute: return 60;
case IntervalKind::Hour: return 3600;
@ -52,6 +55,9 @@ const char * IntervalKind::toKeyword() const
{
switch (kind)
{
case IntervalKind::Nanosecond: return "NANOSECOND";
case IntervalKind::Microsecond: return "MICROSECOND";
case IntervalKind::Millisecond: return "MILLISECOND";
case IntervalKind::Second: return "SECOND";
case IntervalKind::Minute: return "MINUTE";
case IntervalKind::Hour: return "HOUR";
@ -69,6 +75,9 @@ const char * IntervalKind::toLowercasedKeyword() const
{
switch (kind)
{
case IntervalKind::Nanosecond: return "nanosecond";
case IntervalKind::Microsecond: return "microsecond";
case IntervalKind::Millisecond: return "millisecond";
case IntervalKind::Second: return "second";
case IntervalKind::Minute: return "minute";
case IntervalKind::Hour: return "hour";
@ -86,6 +95,12 @@ const char * IntervalKind::toDateDiffUnit() const
{
switch (kind)
{
case IntervalKind::Nanosecond:
return "nanosecond";
case IntervalKind::Microsecond:
return "microsecond";
case IntervalKind::Millisecond:
return "millisecond";
case IntervalKind::Second:
return "second";
case IntervalKind::Minute:
@ -111,6 +126,12 @@ const char * IntervalKind::toNameOfFunctionToIntervalDataType() const
{
switch (kind)
{
case IntervalKind::Nanosecond:
return "toIntervalNanosecond";
case IntervalKind::Microsecond:
return "toIntervalMicrosecond";
case IntervalKind::Millisecond:
return "toIntervalMillisecond";
case IntervalKind::Second:
return "toIntervalSecond";
case IntervalKind::Minute:
@ -136,6 +157,12 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const
{
switch (kind)
{
case IntervalKind::Nanosecond:
return "toNanosecond";
case IntervalKind::Microsecond:
return "toMicrosecond";
case IntervalKind::Millisecond:
return "toMillisecond";
case IntervalKind::Second:
return "toSecond";
case IntervalKind::Minute:
@ -162,6 +189,21 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const
bool IntervalKind::tryParseString(const std::string & kind, IntervalKind::Kind & result)
{
if ("nanosecond" == kind)
{
result = IntervalKind::Nanosecond;
return true;
}
if ("microsecond" == kind)
{
result = IntervalKind::Microsecond;
return true;
}
if ("millisecond" == kind)
{
result = IntervalKind::Millisecond;
return true;
}
if ("second" == kind)
{
result = IntervalKind::Second;

View File

@ -10,6 +10,9 @@ struct IntervalKind
{
enum Kind
{
Nanosecond,
Microsecond,
Millisecond,
Second,
Minute,
Hour,
@ -61,6 +64,9 @@ struct IntervalKind
/// NOLINTNEXTLINE
#define FOR_EACH_INTERVAL_KIND(M) \
M(Nanosecond) \
M(Microsecond) \
M(Millisecond) \
M(Second) \
M(Minute) \
M(Hour) \

View File

@ -23,6 +23,12 @@ void OvercommitTracker::setMaxWaitTime(UInt64 wait_time)
bool OvercommitTracker::needToStopQuery(MemoryTracker * tracker)
{
// NOTE: Do not change the order of locks
//
// global_mutex must be acquired before overcommit_m, because
// method OvercommitTracker::unsubscribe(MemoryTracker *) is
// always called with already acquired global_mutex in
// ProcessListEntry::~ProcessListEntry().
std::unique_lock<std::mutex> global_lock(global_mutex);
std::unique_lock<std::mutex> lk(overcommit_m);
@ -76,7 +82,7 @@ void UserOvercommitTracker::pickQueryToExcludeImpl()
MemoryTracker * query_tracker = nullptr;
OvercommitRatio current_ratio{0, 0};
// At this moment query list must be read only.
// BlockQueryIfMemoryLimit is used in ProcessList to guarantee this.
// This is guaranteed by locking global_mutex in OvercommitTracker::needToStopQuery.
auto & queries = user_process_list->queries;
LOG_DEBUG(logger, "Trying to choose query to stop from {} queries", queries.size());
for (auto const & query : queries)
@ -111,9 +117,9 @@ void GlobalOvercommitTracker::pickQueryToExcludeImpl()
MemoryTracker * query_tracker = nullptr;
OvercommitRatio current_ratio{0, 0};
// At this moment query list must be read only.
// BlockQueryIfMemoryLimit is used in ProcessList to guarantee this.
LOG_DEBUG(logger, "Trying to choose query to stop");
process_list->processEachQueryStatus([&](DB::QueryStatus const & query)
// This is guaranteed by locking global_mutex in OvercommitTracker::needToStopQuery.
LOG_DEBUG(logger, "Trying to choose query to stop from {} queries", process_list->size());
for (auto const & query : process_list->processes)
{
if (query.isKilled())
return;
@ -134,7 +140,7 @@ void GlobalOvercommitTracker::pickQueryToExcludeImpl()
query_tracker = memory_tracker;
current_ratio = ratio;
}
});
}
LOG_DEBUG(logger, "Selected to stop query with overcommit ratio {}/{}",
current_ratio.committed, current_ratio.soft_limit);
picked_tracker = query_tracker;

View File

@ -43,8 +43,6 @@ class MemoryTracker;
// is killed to free memory.
struct OvercommitTracker : boost::noncopyable
{
explicit OvercommitTracker(std::mutex & global_mutex_);
void setMaxWaitTime(UInt64 wait_time);
bool needToStopQuery(MemoryTracker * tracker);
@ -54,8 +52,12 @@ struct OvercommitTracker : boost::noncopyable
virtual ~OvercommitTracker() = default;
protected:
explicit OvercommitTracker(std::mutex & global_mutex_);
virtual void pickQueryToExcludeImpl() = 0;
// This mutex is used to disallow concurrent access
// to picked_tracker and cancelation_state variables.
mutable std::mutex overcommit_m;
mutable std::condition_variable cv;
@ -87,6 +89,11 @@ private:
}
}
// Global mutex which is used in ProcessList to synchronize
// insertion and deletion of queries.
// OvercommitTracker::pickQueryToExcludeImpl() implementations
// require this mutex to be locked, because they read list (or sublist)
// of queries.
std::mutex & global_mutex;
};

View File

@ -515,6 +515,11 @@ public:
radixSortLSDInternal<false>(arr, size, false, nullptr);
}
static void executeLSD(Element * arr, size_t size, bool reverse)
{
radixSortLSDInternal<false>(arr, size, reverse, nullptr);
}
/** This function will start to sort inplace (modify 'arr')
* but on the last step it will write result directly to the destination
* instead of finishing sorting 'arr'.

View File

@ -22,7 +22,6 @@ target_link_libraries (clickhouse_common_zookeeper_no_log
PRIVATE
string_utils
)
if (ENABLE_EXAMPLES)
add_subdirectory(examples)
endif()

View File

@ -5,15 +5,15 @@
#include <functional>
#include <filesystem>
#include <pcg-random/pcg_random.hpp>
#include <base/logger_useful.h>
#include <base/find_symbols.h>
#include <Common/randomSeed.h>
#include <base/getFQDNOrHostName.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h>
#include <Common/isLocalAddress.h>
#include <Poco/Net/NetException.h>
#include <Poco/Net/DNS.h>
#define ZOOKEEPER_CONNECTION_TIMEOUT_MS 1000
@ -48,7 +48,7 @@ static void check(Coordination::Error code, const std::string & path)
void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_)
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
{
log = &Poco::Logger::get("ZooKeeper");
hosts = hosts_;
@ -57,6 +57,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
operation_timeout_ms = operation_timeout_ms_;
chroot = chroot_;
implementation = implementation_;
get_priority_load_balancing = get_priority_load_balancing_;
if (implementation == "zookeeper")
{
@ -66,14 +67,13 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
Coordination::ZooKeeper::Nodes nodes;
nodes.reserve(hosts.size());
Strings shuffled_hosts = hosts;
/// Shuffle the hosts to distribute the load among ZooKeeper nodes.
pcg64 generator(randomSeed());
std::shuffle(shuffled_hosts.begin(), shuffled_hosts.end(), generator);
std::vector<ShuffleHost> shuffled_hosts = shuffleHosts();
bool dns_error = false;
for (auto & host_string : shuffled_hosts)
for (auto & host : shuffled_hosts)
{
auto & host_string = host.host;
try
{
bool secure = bool(startsWith(host_string, "secure://"));
@ -81,6 +81,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
if (secure)
host_string.erase(0, strlen("secure://"));
LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, Poco::Net::SocketAddress{host_string}.toString());
nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure});
}
catch (const Poco::Net::HostNotFoundException & e)
@ -154,23 +155,47 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
}
}
std::vector<ShuffleHost> ZooKeeper::shuffleHosts() const
{
std::function<size_t(size_t index)> get_priority = get_priority_load_balancing.getPriorityFunc(get_priority_load_balancing.load_balancing, 0, hosts.size());
std::vector<ShuffleHost> shuffle_hosts;
for (size_t i = 0; i < hosts.size(); ++i)
{
ShuffleHost shuffle_host;
shuffle_host.host = hosts[i];
if (get_priority)
shuffle_host.priority = get_priority(i);
shuffle_host.randomize();
shuffle_hosts.emplace_back(shuffle_host);
}
std::sort(
shuffle_hosts.begin(), shuffle_hosts.end(),
[](const ShuffleHost & lhs, const ShuffleHost & rhs)
{
return ShuffleHost::compare(lhs, rhs);
});
return shuffle_hosts;
}
ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_,
int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_,
std::shared_ptr<DB::ZooKeeperLog> zk_log_)
std::shared_ptr<DB::ZooKeeperLog> zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
{
zk_log = std::move(zk_log_);
Strings hosts_strings;
splitInto<','>(hosts_strings, hosts_string);
init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_);
init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_);
}
ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_,
int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_,
std::shared_ptr<DB::ZooKeeperLog> zk_log_)
std::shared_ptr<DB::ZooKeeperLog> zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
{
zk_log = std::move(zk_log_);
init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_);
init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_);
}
struct ZooKeeperArgs
@ -213,6 +238,15 @@ struct ZooKeeperArgs
{
implementation = config.getString(config_name + "." + key);
}
else if (key == "zookeeper_load_balancing")
{
String load_balancing_str = config.getString(config_name + "." + key);
/// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`)
auto load_balancing = magic_enum::enum_cast<DB::LoadBalancing>(Poco::toUpper(load_balancing_str));
if (!load_balancing)
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str);
get_priority_load_balancing.load_balancing = *load_balancing;
}
else
throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS);
}
@ -224,6 +258,15 @@ struct ZooKeeperArgs
if (chroot.back() == '/')
chroot.pop_back();
}
/// init get_priority_load_balancing
get_priority_load_balancing.hostname_differences.resize(hosts.size());
const String & local_hostname = getFQDNOrHostName();
for (size_t i = 0; i < hosts.size(); ++i)
{
const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':'));
get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host);
}
}
Strings hosts;
@ -232,13 +275,14 @@ struct ZooKeeperArgs
int operation_timeout_ms;
std::string chroot;
std::string implementation;
GetPriorityForLoadBalancing get_priority_load_balancing;
};
ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_)
: zk_log(std::move(zk_log_))
{
ZooKeeperArgs args(config, config_name);
init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot);
init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing);
}
bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) const
@ -249,8 +293,11 @@ bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config,
if (args.implementation == implementation && implementation == "testkeeper")
return false;
return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot)
!= std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot);
if (args.get_priority_load_balancing != get_priority_load_balancing)
return true;
return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing)
!= std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, args.get_priority_load_balancing);
}
@ -757,7 +804,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &
ZooKeeperPtr ZooKeeper::startNewSession() const
{
return std::make_shared<ZooKeeper>(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log);
return std::make_shared<ZooKeeper>(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log, get_priority_load_balancing);
}

View File

@ -13,7 +13,10 @@
#include <Common/Stopwatch.h>
#include <Common/ZooKeeper/IKeeper.h>
#include <Common/ZooKeeper/ZooKeeperConstants.h>
#include <Common/GetPriorityForLoadBalancing.h>
#include <Common/thread_local_rng.h>
#include <unistd.h>
#include <random>
namespace ProfileEvents
@ -37,6 +40,25 @@ namespace zkutil
/// Preferred size of multi() command (in number of ops)
constexpr size_t MULTI_BATCH_SIZE = 100;
struct ShuffleHost
{
String host;
Int64 priority = 0;
UInt32 random = 0;
void randomize()
{
random = thread_local_rng();
}
static bool compare(const ShuffleHost & lhs, const ShuffleHost & rhs)
{
return std::forward_as_tuple(lhs.priority, lhs.random)
< std::forward_as_tuple(rhs.priority, rhs.random);
}
};
using GetPriorityForLoadBalancing = DB::GetPriorityForLoadBalancing;
/// ZooKeeper session. The interface is substantially different from the usual libzookeeper API.
///
@ -58,14 +80,16 @@ public:
int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
const std::string & chroot_ = "",
const std::string & implementation_ = "zookeeper",
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr);
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr,
const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {});
explicit ZooKeeper(const Strings & hosts_, const std::string & identity_ = "",
int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS,
int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
const std::string & chroot_ = "",
const std::string & implementation_ = "zookeeper",
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr);
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr,
const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {});
/** Config of the form:
<zookeeper>
@ -91,6 +115,8 @@ public:
*/
ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_);
std::vector<ShuffleHost> shuffleHosts() const;
/// Creates a new session with the same parameters. This method can be used for reconnecting
/// after the session has expired.
/// This object remains unchanged, and the new session is returned.
@ -284,7 +310,7 @@ private:
friend class EphemeralNodeHolder;
void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_);
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_);
/// The following methods don't any throw exceptions but return error codes.
Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created);
@ -311,6 +337,8 @@ private:
Poco::Logger * log = nullptr;
std::shared_ptr<DB::ZooKeeperLog> zk_log;
GetPriorityForLoadBalancing get_priority_load_balancing;
AtomicStopwatch session_uptime;
};

View File

@ -451,7 +451,7 @@ void ZooKeeper::connect(
}
else
{
LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}", socket.peerAddress().toString(), session_id);
LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}{}", socket.peerAddress().toString(), session_id, fail_reasons.str());
}
}

View File

@ -11,7 +11,7 @@
constexpr size_t IPV4_BINARY_LENGTH = 4;
constexpr size_t IPV6_BINARY_LENGTH = 16;
constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
constexpr size_t IPV6_MAX_TEXT_LENGTH = 45; /// Does not count tail zero byte.
namespace DB
{

View File

@ -124,6 +124,7 @@ bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_
size_t getHostNameDifference(const std::string & local_hostname, const std::string & host)
{
/// FIXME should we replace it with Levenstein distance? (we already have it in NamePrompter)
size_t hostname_difference = 0;
for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i)
if (local_hostname[i] != host[i])

View File

@ -13,6 +13,7 @@
#include <iterator>
#include <base/sort.h>
#include <boost/algorithm/string.hpp>
namespace DB
@ -269,8 +270,18 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const
}
const ColumnWithTypeAndName * Block::findByName(const std::string & name) const
const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool case_insensitive) const
{
if (case_insensitive)
{
auto found = std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); });
if (found == data.end())
{
return nullptr;
}
return &*found;
}
auto it = index_by_name.find(name);
if (index_by_name.end() == it)
{
@ -280,19 +291,23 @@ const ColumnWithTypeAndName * Block::findByName(const std::string & name) const
}
const ColumnWithTypeAndName & Block::getByName(const std::string & name) const
const ColumnWithTypeAndName & Block::getByName(const std::string & name, bool case_insensitive) const
{
const auto * result = findByName(name);
const auto * result = findByName(name, case_insensitive);
if (!result)
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
throw Exception(
"Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
return *result;
}
bool Block::has(const std::string & name) const
bool Block::has(const std::string & name, bool case_insensitive) const
{
if (case_insensitive)
return std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); })
!= data.end();
return index_by_name.end() != index_by_name.find(name);
}
@ -301,8 +316,8 @@ size_t Block::getPositionByName(const std::string & name) const
{
auto it = index_by_name.find(name);
if (index_by_name.end() == it)
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
throw Exception(
"Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
return it->second;
}

View File

@ -60,21 +60,21 @@ public:
ColumnWithTypeAndName & safeGetByPosition(size_t position);
const ColumnWithTypeAndName & safeGetByPosition(size_t position) const;
ColumnWithTypeAndName* findByName(const std::string & name)
ColumnWithTypeAndName* findByName(const std::string & name, bool case_insensitive = false)
{
return const_cast<ColumnWithTypeAndName *>(
const_cast<const Block *>(this)->findByName(name));
const_cast<const Block *>(this)->findByName(name, case_insensitive));
}
const ColumnWithTypeAndName * findByName(const std::string & name) const;
const ColumnWithTypeAndName * findByName(const std::string & name, bool case_insensitive = false) const;
ColumnWithTypeAndName & getByName(const std::string & name)
ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false)
{
return const_cast<ColumnWithTypeAndName &>(
const_cast<const Block *>(this)->getByName(name));
const_cast<const Block *>(this)->getByName(name, case_insensitive));
}
const ColumnWithTypeAndName & getByName(const std::string & name) const;
const ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false) const;
Container::iterator begin() { return data.begin(); }
Container::iterator end() { return data.end(); }
@ -83,7 +83,7 @@ public:
Container::const_iterator cbegin() const { return data.cbegin(); }
Container::const_iterator cend() const { return data.cend(); }
bool has(const std::string & name) const;
bool has(const std::string & name, bool case_insensitive = false) const;
size_t getPositionByName(const std::string & name) const;

View File

@ -47,6 +47,8 @@ class IColumn;
M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \
M(UInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \
M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \
M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \
M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \
M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \
M(UInt64, max_distributed_connections, 1024, "The maximum number of connections for distributed processing of one query (should be greater than max_threads).", 0) \
M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "Which part of the query can be read into RAM for parsing (the remaining data for INSERT, if any, is read later)", 0) \
@ -614,11 +616,13 @@ class IColumn;
M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \
M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \
M(Bool, input_format_use_lowercase_column_name, false, "Use lowercase column name while reading input formats", 0) \
M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \
M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \
M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \
M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \
M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \
M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \
M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \
M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \

View File

@ -149,4 +149,5 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
{"str", FormatSettings::MsgPackUUIDRepresentation::STR},
{"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}})
}

View File

@ -13,6 +13,9 @@ bool DataTypeInterval::equals(const IDataType & rhs) const
void registerDataTypeInterval(DataTypeFactory & factory)
{
factory.registerSimpleDataType("IntervalNanosecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Nanosecond)); });
factory.registerSimpleDataType("IntervalMicrosecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Microsecond)); });
factory.registerSimpleDataType("IntervalMillisecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Millisecond)); });
factory.registerSimpleDataType("IntervalSecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Second)); });
factory.registerSimpleDataType("IntervalMinute", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Minute)); });
factory.registerSimpleDataType("IntervalHour", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Hour)); });

View File

@ -15,6 +15,8 @@
#include <Parsers/IAST.h>
#include <boost/algorithm/string/case_conv.hpp>
namespace DB
{
@ -227,14 +229,17 @@ void validateArraySizes(const Block & block)
}
std::unordered_set<String> getAllTableNames(const Block & block)
std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case)
{
std::unordered_set<String> nested_table_names;
for (auto & name : block.getNames())
for (const auto & name : block.getNames())
{
auto nested_table_name = Nested::extractTableName(name);
if (to_lower_case)
boost::to_lower(nested_table_name);
if (!nested_table_name.empty())
nested_table_names.insert(nested_table_name);
nested_table_names.insert(std::move(nested_table_name));
}
return nested_table_names;
}

View File

@ -32,7 +32,7 @@ namespace Nested
void validateArraySizes(const Block & block);
/// Get all nested tables names from a block.
std::unordered_set<String> getAllTableNames(const Block & block);
std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case = false);
}
}

View File

@ -88,6 +88,9 @@ DatabaseReplicated::DatabaseReplicated(
/// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
if (zookeeper_path.front() != '/')
zookeeper_path = "/" + zookeeper_path;
if (!db_settings.collection_name.value.empty())
fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef());
}
String DatabaseReplicated::getFullReplicaName() const
@ -191,22 +194,36 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
shards.back().emplace_back(unescapeForFileName(host_port));
}
String username = db_settings.cluster_username;
String password = db_settings.cluster_password;
UInt16 default_port = getContext()->getTCPPort();
bool secure = db_settings.cluster_secure_connection;
bool treat_local_as_remote = false;
bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL;
return std::make_shared<Cluster>(
getContext()->getSettingsRef(),
shards,
username,
password,
cluster_auth_info.cluster_username,
cluster_auth_info.cluster_password,
default_port,
treat_local_as_remote,
treat_local_port_as_remote,
secure);
cluster_auth_info.cluster_secure_connection,
/*priority=*/1,
database_name,
cluster_auth_info.cluster_secret);
}
void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref)
{
const auto & config_prefix = fmt::format("named_collections.{}", collection_name);
if (!config_ref.has(config_prefix))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name);
cluster_auth_info.cluster_username = config_ref.getString(config_prefix + ".cluster_username", "");
cluster_auth_info.cluster_password = config_ref.getString(config_prefix + ".cluster_password", "");
cluster_auth_info.cluster_secret = config_ref.getString(config_prefix + ".cluster_secret", "");
cluster_auth_info.cluster_secure_connection = config_ref.getBool(config_prefix + ".cluster_secure_connection", false);
}
void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(bool force_attach)

View File

@ -75,6 +75,16 @@ private:
bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
struct
{
String cluster_username{"default"};
String cluster_password;
String cluster_secret;
bool cluster_secure_connection{false};
} cluster_auth_info;
void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config);
void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const;
void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr);

View File

@ -11,9 +11,8 @@ class ASTStorage;
M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \
M(UInt64, max_replication_lag_to_enqueue, 10, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \
M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \
M(String, cluster_username, "default", "Username to use when connecting to hosts of cluster", 0) \
M(String, cluster_password, "", "Password to use when connecting to hosts of cluster", 0) \
M(Bool, cluster_secure_connection, false, "Enable TLS when connecting to hosts of cluster", 0) \
M(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \
DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS)

View File

@ -20,6 +20,7 @@
#include <Common/getRandomASCIIString.h>
#include <Interpreters/Context.h>
#include <Interpreters/threadPoolCallbackRunner.h>
#include <IO/ReadBufferFromS3.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
@ -264,32 +265,6 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
LOG_TRACE(log, "{} to file by path: {}. S3 path: {}",
mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name);
ScheduleFunc schedule = [pool = &getThreadPoolWriter(), thread_group = CurrentThread::getGroup()](auto callback)
{
pool->scheduleOrThrow([callback = std::move(callback), thread_group]()
{
if (thread_group)
CurrentThread::attachTo(thread_group);
SCOPE_EXIT_SAFE(
if (thread_group)
CurrentThread::detachQueryIfNotDetached();
/// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent.
/// Typically, it may be changes from Process to User.
/// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed.
/// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well.
/// When, finally, we destroy the thread (and the ThreadStatus),
/// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\
/// and by this time user-level memory tracker may be already destroyed.
///
/// As a work-around, reset memory tracker to total, which is always alive.
CurrentThread::get().memory_tracker.setParent(&total_memory_tracker);
);
callback();
});
};
auto s3_buffer = std::make_unique<WriteBufferFromS3>(
settings->client,
bucket,
@ -299,7 +274,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
settings->s3_upload_part_size_multiply_parts_count_threshold,
settings->s3_max_single_part_upload_size,
std::move(object_metadata),
buf_size, std::move(schedule));
buf_size, threadPoolCallbackRunner(getThreadPoolWriter()));
auto create_metadata_callback = [this, path, blob_name, mode] (size_t count)
{

View File

@ -89,10 +89,10 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.use_lowercase_column_name = settings.input_format_use_lowercase_column_name;
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
format_settings.parquet.import_nested = settings.input_format_parquet_import_nested;
format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
format_settings.pretty.color = settings.output_format_pretty_color;
@ -123,9 +123,11 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary;
format_settings.arrow.import_nested = settings.input_format_arrow_import_nested;
format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns;
format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching;
format_settings.orc.import_nested = settings.input_format_orc_import_nested;
format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching;
format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
format_settings.seekable_read = settings.input_format_allow_seeks;

View File

@ -32,7 +32,6 @@ struct FormatSettings
bool null_as_default = true;
bool decimal_trailing_zeros = false;
bool defaults_for_omitted_fields = true;
bool use_lowercase_column_name = false;
bool seekable_read = true;
UInt64 max_rows_to_read_for_schema_inference = 100;
@ -75,6 +74,7 @@ struct FormatSettings
bool low_cardinality_as_dictionary = false;
bool import_nested = false;
bool allow_missing_columns = false;
bool case_insensitive_column_matching = false;
} arrow;
struct
@ -137,6 +137,7 @@ struct FormatSettings
UInt64 row_group_size = 1000000;
bool import_nested = false;
bool allow_missing_columns = false;
bool case_insensitive_column_matching = false;
} parquet;
struct Pretty
@ -217,6 +218,7 @@ struct FormatSettings
bool import_nested = false;
bool allow_missing_columns = false;
int64_t row_batch_size = 100'000;
bool case_insensitive_column_matching = false;
} orc;
/// For capnProto format we should determine how to

View File

@ -41,6 +41,11 @@ namespace ErrorCodes
throw Exception("Illegal type Date of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static inline UInt32 dateTimeIsNotSupported(const char * name)
{
throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
/// This factor transformation will say that the function is monotone everywhere.
struct ZeroTransform
{
@ -311,6 +316,133 @@ struct ToStartOfSecondImpl
using FactorTransform = ZeroTransform;
};
struct ToStartOfMillisecondImpl
{
static constexpr auto name = "toStartOfMillisecond";
static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &)
{
// given that scale is 6, scale_multiplier is 1000000
// for DateTime64 value of 123.456789:
// 123456789 - 789 = 123456000
// for DateTime64 value of -123.456789:
// -123456789 - (1000 + (-789)) = -123457000
if (scale_multiplier == 1000)
{
return datetime64;
}
else if (scale_multiplier <= 1000)
{
return datetime64 * (1000 / scale_multiplier);
}
else
{
auto droppable_part_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier<DateTime64, true>(datetime64, scale_multiplier / 1000);
if (droppable_part_with_sign < 0)
droppable_part_with_sign += scale_multiplier;
return datetime64 - droppable_part_with_sign;
}
}
static inline UInt32 execute(UInt32, const DateLUTImpl &)
{
throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static inline UInt32 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt32 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
using FactorTransform = ZeroTransform;
};
struct ToStartOfMicrosecondImpl
{
static constexpr auto name = "toStartOfMicrosecond";
static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &)
{
// @see ToStartOfMillisecondImpl
if (scale_multiplier == 1000000)
{
return datetime64;
}
else if (scale_multiplier <= 1000000)
{
return datetime64 * (1000000 / scale_multiplier);
}
else
{
auto droppable_part_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier<DateTime64, true>(datetime64, scale_multiplier / 1000000);
if (droppable_part_with_sign < 0)
droppable_part_with_sign += scale_multiplier;
return datetime64 - droppable_part_with_sign;
}
}
static inline UInt32 execute(UInt32, const DateLUTImpl &)
{
throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static inline UInt32 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt32 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
using FactorTransform = ZeroTransform;
};
struct ToStartOfNanosecondImpl
{
static constexpr auto name = "toStartOfNanosecond";
static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &)
{
// @see ToStartOfMillisecondImpl
if (scale_multiplier == 1000000000)
{
return datetime64;
}
else if (scale_multiplier <= 1000000000)
{
return datetime64 * (1000000000 / scale_multiplier);
}
else
{
throw Exception("Illegal type of argument for function " + std::string(name) + ", DateTime64 expected", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}
static inline UInt32 execute(UInt32, const DateLUTImpl &)
{
throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static inline UInt32 execute(Int32, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
static inline UInt32 execute(UInt16, const DateLUTImpl &)
{
return dateIsNotSupported(name);
}
using FactorTransform = ZeroTransform;
};
struct ToStartOfFiveMinuteImpl
{
static constexpr auto name = "toStartOfFiveMinute";

View File

@ -40,26 +40,158 @@ namespace ErrorCodes
/// - 'AddSecondsImpl::execute(UInt32, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(DateTime, ...) -> DateTime'
/// - 'AddSecondsImpl::execute(UInt16, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(Date, ...) -> DateTime'
struct AddNanosecondsImpl
{
static constexpr auto name = "addNanoseconds";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9 - scale);
auto division = std::div(t.fractional * multiplier + delta, static_cast<Int64>(1000000000));
return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta};
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9 - scale);
return t * multiplier + delta;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9);
return t * multiplier + delta;
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception("addNanoSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception("addNanoSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
};
struct AddMicrosecondsImpl
{
static constexpr auto name = "addMicroseconds";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(6 - scale));
if (scale <= 6)
{
auto division = std::div((t.fractional + delta), static_cast<Int64>(10e6));
return {t.whole * multiplier + division.quot, division.rem};
}
else
{
auto division = std::div((t.fractional + delta * multiplier), static_cast<Int64>(10e6 * multiplier));
return {t.whole + division.quot, division.rem};
}
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(6 - scale));
return scale <= 6 ? t * multiplier + delta : t + delta * multiplier;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(6);
return t * multiplier + delta;
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception("addMicroSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception("addMicroSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
};
struct AddMillisecondsImpl
{
static constexpr auto name = "addMilliseconds";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(3 - scale));
if (scale <= 3)
{
auto division = std::div((t.fractional + delta), static_cast<Int64>(1000));
return {t.whole * multiplier + division.quot, division.rem};
}
else
{
auto division = std::div((t.fractional + delta * multiplier), static_cast<Int64>(1000 * multiplier));
return {t.whole + division.quot,division.rem};
}
}
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(3 - scale));
return scale <= 3 ? t * multiplier + delta : t + delta * multiplier;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(3);
return t * multiplier + delta;
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception("addMilliSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
{
throw Exception("addMilliSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
};
struct AddSecondsImpl
{
static constexpr auto name = "addSeconds";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
return {t.whole + delta, t.fractional};
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
return t + delta * DecimalUtils::scaleMultiplier<DateTime64>(scale);
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
return t + delta;
}
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
// use default datetime64 scale
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return time_zone.fromDayNum(DayNum(d)) + delta;
}
@ -70,21 +202,29 @@ struct AddMinutesImpl
static constexpr auto name = "addMinutes";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
return {t.whole + delta * 60, t.fractional};
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
return t + 60 * delta * DecimalUtils::scaleMultiplier<DateTime64>(scale);
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
return t + delta * 60;
}
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
// use default datetime64 scale
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return time_zone.fromDayNum(DayNum(d)) + delta * 60;
}
@ -95,20 +235,29 @@ struct AddHoursImpl
static constexpr auto name = "addHours";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
return {t.whole + delta * 3600, t.fractional};
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
{
return t + 3600 * delta * DecimalUtils::scaleMultiplier<DateTime64>(scale);
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
return t + delta * 3600;
}
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
// use default datetime64 scale
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return time_zone.fromDayNum(DayNum(d)) + delta * 3600;
}
@ -119,22 +268,30 @@ struct AddDaysImpl
static constexpr auto name = "addDays";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return {time_zone.addDays(t.whole, delta), t.fractional};
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
auto d = std::div(t, multiplier);
return time_zone.addDays(d.quot, delta) * multiplier + d.rem;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return time_zone.addDays(t, delta);
}
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
return d + delta;
}
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &, UInt16 = 0)
{
return d + delta;
}
@ -145,22 +302,30 @@ struct AddWeeksImpl
static constexpr auto name = "addWeeks";
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone)
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return {time_zone.addWeeks(t.whole, delta), t.fractional};
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
auto d = std::div(t, multiplier);
return time_zone.addDays(d.quot, delta * 7) * multiplier + d.rem;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return time_zone.addWeeks(t, delta);
}
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &, UInt16 = 0)
{
return d + delta * 7;
}
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &)
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &, UInt16 = 0)
{
return d + delta * 7;
}
@ -170,23 +335,31 @@ struct AddMonthsImpl
{
static constexpr auto name = "addMonths";
static inline DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return {time_zone.addMonths(t.whole, delta), t.fractional};
}
static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
auto d = std::div(t, multiplier);
return time_zone.addMonths(d.quot, delta) * multiplier + d.rem;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return time_zone.addMonths(t, delta);
}
static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return time_zone.addMonths(DayNum(d), delta);
}
static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return time_zone.addMonths(ExtendedDayNum(d), delta);
}
@ -197,22 +370,30 @@ struct AddQuartersImpl
static constexpr auto name = "addQuarters";
static inline DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone)
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return {time_zone.addQuarters(t.whole, delta), t.fractional};
}
static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
auto d = std::div(t, multiplier);
return time_zone.addQuarters(d.quot, delta) * multiplier + d.rem;
}
static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return time_zone.addQuarters(t, delta);
}
static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone)
static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return time_zone.addQuarters(DayNum(d), delta);
}
static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone)
static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return time_zone.addQuarters(ExtendedDayNum(d), delta);
}
@ -222,23 +403,31 @@ struct AddYearsImpl
{
static constexpr auto name = "addYears";
static inline DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return {time_zone.addYears(t.whole, delta), t.fractional};
}
static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED DateTime64
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
{
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
auto d = std::div(t, multiplier);
return time_zone.addYears(d.quot, delta) * multiplier + d.rem;
}
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return time_zone.addYears(t, delta);
}
static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return time_zone.addYears(DayNum(d), delta);
}
static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
{
return time_zone.addYears(ExtendedDayNum(d), delta);
}
@ -250,13 +439,16 @@ struct SubtractIntervalImpl : public Transform
using Transform::Transform;
template <typename T>
inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone) const
inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const
{
/// Signed integer overflow is Ok.
return Transform::execute(t, -delta, time_zone);
return Transform::execute(t, -delta, time_zone, scale);
}
};
struct SubtractNanosecondsImpl : SubtractIntervalImpl<AddNanosecondsImpl> { static constexpr auto name = "subtractNanoseconds"; };
struct SubtractMicrosecondsImpl : SubtractIntervalImpl<AddMicrosecondsImpl> { static constexpr auto name = "subtractMicroseconds"; };
struct SubtractMillisecondsImpl : SubtractIntervalImpl<AddMillisecondsImpl> { static constexpr auto name = "subtractMilliseconds"; };
struct SubtractSecondsImpl : SubtractIntervalImpl<AddSecondsImpl> { static constexpr auto name = "subtractSeconds"; };
struct SubtractMinutesImpl : SubtractIntervalImpl<AddMinutesImpl> { static constexpr auto name = "subtractMinutes"; };
struct SubtractHoursImpl : SubtractIntervalImpl<AddHoursImpl> { static constexpr auto name = "subtractHours"; };
@ -277,17 +469,17 @@ struct Adder
{}
template <typename FromVectorType, typename ToVectorType>
void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone) const
void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const
{
size_t size = vec_from.size();
vec_to.resize(size);
for (size_t i = 0; i < size; ++i)
vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone);
vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone, scale);
}
template <typename FromVectorType, typename ToVectorType>
void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone) const
void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const
{
size_t size = vec_from.size();
vec_to.resize(size);
@ -296,11 +488,11 @@ struct Adder
ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64,
ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64,
ColumnFloat32, ColumnFloat64>(
&delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, size); return true; });
&delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, scale, size); return true; });
}
template <typename FromType, typename ToVectorType>
void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone) const
void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const
{
size_t size = delta.size();
vec_to.resize(size);
@ -309,7 +501,7 @@ struct Adder
ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64,
ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64,
ColumnFloat32, ColumnFloat64>(
&delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, size); return true; });
&delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, scale, size); return true; });
}
private:
@ -325,18 +517,18 @@ private:
template <typename FromVectorType, typename ToVectorType, typename DeltaColumnType>
NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector(
const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const
const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const
{
for (size_t i = 0; i < size; ++i)
vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone);
vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone, scale);
}
template <typename FromType, typename ToVectorType, typename DeltaColumnType>
NO_INLINE NO_SANITIZE_UNDEFINED void constantVector(
const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const
const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const
{
for (size_t i = 0; i < size; ++i)
vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone);
vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, scale);
}
};
@ -344,7 +536,7 @@ private:
template <typename FromDataType, typename ToDataType, typename Transform>
struct DateTimeAddIntervalImpl
{
static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type)
static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale = 0)
{
using FromValueType = typename FromDataType::FieldType;
using FromColumnType = typename FromDataType::ColumnType;
@ -363,16 +555,15 @@ struct DateTimeAddIntervalImpl
if (const auto * sources = checkAndGetColumn<FromColumnType>(source_col.get()))
{
if (const auto * delta_const_column = typeid_cast<const ColumnConst *>(&delta_column))
op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone);
op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone, scale);
else
op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone);
op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone, scale);
}
else if (const auto * sources_const = checkAndGetColumnConst<FromColumnType>(source_col.get()))
{
op.constantVector(
sources_const->template getValue<FromValueType>(),
col_to->getData(),
delta_column, time_zone);
col_to->getData(), delta_column, time_zone, scale);
}
else
{
@ -463,18 +654,10 @@ public:
}
}
// TransformDateTime64 helps choosing correct overload of exec and does some transformations
// on input and output parameters to simplify support of DateTime64 in concrete Transform.
template <typename FieldType>
using TransformType = std::conditional_t<
std::is_same_v<FieldType, DateTime64>,
TransformDateTime64<Transform>,
Transform>;
/// Helper templates to deduce return type based on argument type, since some overloads may promote or denote types,
/// e.g. addSeconds(Date, 1) => DateTime
template <typename FieldType>
using TransformExecuteReturnType = decltype(std::declval<TransformType<FieldType>>().execute(FieldType(), 0, std::declval<DateLUTImpl>()));
using TransformExecuteReturnType = decltype(std::declval<Transform>().execute(FieldType(), 0, std::declval<DateLUTImpl>(), 0));
// Deduces RETURN DataType from INPUT DataType, based on return type of Transform{}.execute(INPUT_TYPE, UInt64, DateLUTImpl).
// e.g. for Transform-type that has execute()-overload with 'UInt16' input and 'UInt32' return,
@ -500,11 +683,33 @@ public:
if (typeid_cast<const DataTypeDateTime64 *>(arguments[0].type.get()))
{
const auto & datetime64_type = assert_cast<const DataTypeDateTime64 &>(*arguments[0].type);
return std::make_shared<DataTypeDateTime64>(datetime64_type.getScale(), extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
auto from_scale = datetime64_type.getScale();
auto scale = from_scale;
if (std::is_same_v<Transform, AddNanosecondsImpl>)
scale = 9;
else if (std::is_same_v<Transform, AddMicrosecondsImpl>)
scale = 6;
else if (std::is_same_v<Transform, AddMillisecondsImpl>)
scale = 3;
scale = std::max(scale, from_scale);
return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
}
else
{
return std::make_shared<DataTypeDateTime64>(DataTypeDateTime64::default_scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
auto scale = DataTypeDateTime64::default_scale;
if (std::is_same_v<Transform, AddNanosecondsImpl>)
scale = 9;
else if (std::is_same_v<Transform, AddMicrosecondsImpl>)
scale = 6;
else if (std::is_same_v<Transform, AddMillisecondsImpl>)
scale = 3;
return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
}
}
else
@ -541,9 +746,9 @@ public:
}
else if (const auto * datetime64_type = assert_cast<const DataTypeDateTime64 *>(from_type))
{
using WrappedTransformType = TransformType<typename DataTypeDateTime64::FieldType>;
return DateTimeAddIntervalImpl<DataTypeDateTime64, TransformResultDataType<DataTypeDateTime64>, WrappedTransformType>::execute(
WrappedTransformType{datetime64_type->getScale()}, arguments, result_type);
auto from_scale = datetime64_type->getScale();
return DateTimeAddIntervalImpl<DataTypeDateTime64, TransformResultDataType<DataTypeDateTime64>, Transform>::execute(
Transform{}, arguments, result_type, from_scale);
}
else
throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(),

View File

@ -88,6 +88,20 @@ public:
Int64 scale = DataTypeDateTime64::default_scale;
if (const auto * dt64 = checkAndGetDataType<DataTypeDateTime64>(arguments[0].type.get()))
scale = dt64->getScale();
auto source_scale = scale;
if constexpr (std::is_same_v<ToStartOfMillisecondImpl, Transform>)
{
scale = std::max(source_scale, static_cast<Int64>(3));
}
else if constexpr (std::is_same_v<ToStartOfMicrosecondImpl, Transform>)
{
scale = std::max(source_scale, static_cast<Int64>(6));
}
else if constexpr (std::is_same_v<ToStartOfNanosecondImpl, Transform>)
{
scale = std::max(source_scale, static_cast<Int64>(9));
}
return std::make_shared<ToDataType>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 1, 0));
}

View File

@ -112,6 +112,9 @@ void registerFunctionsConversion(FunctionFactory & factory)
factory.registerFunction<FunctionParseDateTime64BestEffortOrZero>();
factory.registerFunction<FunctionParseDateTime64BestEffortOrNull>();
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalNanosecond, PositiveMonotonicity>>();
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalMicrosecond, PositiveMonotonicity>>();
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalMillisecond, PositiveMonotonicity>>();
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalSecond, PositiveMonotonicity>>();
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalMinute, PositiveMonotonicity>>();
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalHour, PositiveMonotonicity>>();

View File

@ -1487,6 +1487,9 @@ struct NameToDecimal256 { static constexpr auto name = "toDecimal256"; };
static constexpr auto kind = IntervalKind::INTERVAL_KIND; \
};
DEFINE_NAME_TO_INTERVAL(Nanosecond)
DEFINE_NAME_TO_INTERVAL(Microsecond)
DEFINE_NAME_TO_INTERVAL(Millisecond)
DEFINE_NAME_TO_INTERVAL(Second)
DEFINE_NAME_TO_INTERVAL(Minute)
DEFINE_NAME_TO_INTERVAL(Hour)
@ -2703,13 +2706,10 @@ private:
return createWrapper<ToDataType>(from_type, to_type, requested_result_is_nullable);
}
WrapperType createUInt8ToUInt8Wrapper(const DataTypePtr from_type, const DataTypePtr to_type) const
WrapperType createUInt8ToBoolWrapper(const DataTypePtr from_type, const DataTypePtr to_type) const
{
return [from_type, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) -> ColumnPtr
{
if (isBool(from_type) || !isBool(to_type))
return arguments.front().column;
/// Special case when we convert UInt8 column to Bool column.
/// both columns have type UInt8, but we shouldn't use identity wrapper,
/// because Bool column can contain only 0 and 1.
@ -3506,15 +3506,19 @@ private:
/// 'requested_result_is_nullable' is true if CAST to Nullable type is requested.
WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const
{
bool convert_to_ipv6 = to_type->getCustomName() && to_type->getCustomName()->getName() == "IPv6";
if (isUInt8(from_type) && isBool(to_type))
return createUInt8ToBoolWrapper(from_type, to_type);
if (from_type->equals(*to_type) && !convert_to_ipv6)
{
if (isUInt8(from_type))
return createUInt8ToUInt8Wrapper(from_type, to_type);
/// We can cast IPv6 into IPv6, IPv4 into IPv4, but we should not allow to cast FixedString(16) into IPv6 as part of identity cast
bool safe_convert_custom_types = true;
if (const auto * to_type_custom_name = to_type->getCustomName())
safe_convert_custom_types = from_type->getCustomName() && from_type->getCustomName()->getName() == to_type_custom_name->getName();
else if (const auto * from_type_custom_name = from_type->getCustomName())
safe_convert_custom_types = to_type->getCustomName() && from_type_custom_name->getName() == to_type->getCustomName()->getName();
if (from_type->equals(*to_type) && safe_convert_custom_types)
return createIdentityWrapper(from_type);
}
else if (WhichDataType(from_type).isNothing())
return createNothingWrapper(to_type.get());

View File

@ -20,6 +20,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int SYNTAX_ERROR;
}
namespace
@ -167,6 +168,13 @@ struct TimeWindowImpl<TUMBLE>
switch (std::get<0>(interval))
{
//TODO: add proper support for fractional seconds
// case IntervalKind::Nanosecond:
// return executeTumble<UInt32, IntervalKind::Nanosecond>(*time_column_vec, std::get<1>(interval), time_zone);
// case IntervalKind::Microsecond:
// return executeTumble<UInt32, IntervalKind::Microsecond>(*time_column_vec, std::get<1>(interval), time_zone);
// case IntervalKind::Millisecond:
// return executeTumble<UInt32, IntervalKind::Millisecond>(*time_column_vec, std::get<1>(interval), time_zone);
case IntervalKind::Second:
return executeTumble<UInt32, IntervalKind::Second>(*time_column_vec, std::get<1>(interval), time_zone);
case IntervalKind::Minute:
@ -183,6 +191,8 @@ struct TimeWindowImpl<TUMBLE>
return executeTumble<UInt16, IntervalKind::Quarter>(*time_column_vec, std::get<1>(interval), time_zone);
case IntervalKind::Year:
return executeTumble<UInt16, IntervalKind::Year>(*time_column_vec, std::get<1>(interval), time_zone);
default:
throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
}
__builtin_unreachable();
}
@ -350,6 +360,16 @@ struct TimeWindowImpl<HOP>
switch (std::get<0>(window_interval))
{
//TODO: add proper support for fractional seconds
// case IntervalKind::Nanosecond:
// return executeHop<UInt32, IntervalKind::Nanosecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
// case IntervalKind::Microsecond:
// return executeHop<UInt32, IntervalKind::Microsecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
// case IntervalKind::Millisecond:
// return executeHop<UInt32, IntervalKind::Millisecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Second:
return executeHop<UInt32, IntervalKind::Second>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
@ -374,6 +394,8 @@ struct TimeWindowImpl<HOP>
case IntervalKind::Year:
return executeHop<UInt16, IntervalKind::Year>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
default:
throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
}
__builtin_unreachable();
}
@ -487,6 +509,16 @@ struct TimeWindowImpl<WINDOW_ID>
switch (std::get<0>(window_interval))
{
//TODO: add proper support for fractional seconds
// case IntervalKind::Nanosecond:
// return executeHopSlice<UInt32, IntervalKind::Nanosecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
// case IntervalKind::Microsecond:
// return executeHopSlice<UInt32, IntervalKind::Microsecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
// case IntervalKind::Millisecond:
// return executeHopSlice<UInt32, IntervalKind::Millisecond>(
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
case IntervalKind::Second:
return executeHopSlice<UInt32, IntervalKind::Second>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
@ -511,6 +543,8 @@ struct TimeWindowImpl<WINDOW_ID>
case IntervalKind::Year:
return executeHopSlice<UInt16, IntervalKind::Year>(
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
default:
throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
}
__builtin_unreachable();
}

View File

@ -80,7 +80,32 @@ struct ToStartOfTransform;
TRANSFORM_TIME(Hour)
TRANSFORM_TIME(Minute)
TRANSFORM_TIME(Second)
#undef TRANSFORM_DATE
#undef TRANSFORM_TIME
#define TRANSFORM_SUBSECONDS(INTERVAL_KIND, DEF_SCALE) \
template<> \
struct ToStartOfTransform<IntervalKind::INTERVAL_KIND> \
{ \
static Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \
{ \
if (scale <= DEF_SCALE) \
{ \
auto val = t * DecimalUtils::scaleMultiplier<DateTime64>(DEF_SCALE - scale); \
if (delta == 1) \
return val; \
else \
return val - (val % delta); \
} \
else \
{ \
return t - (t % (delta * DecimalUtils::scaleMultiplier<DateTime64>(scale - DEF_SCALE))) ; \
} \
} \
};
TRANSFORM_SUBSECONDS(Millisecond, 3)
TRANSFORM_SUBSECONDS(Microsecond, 6)
TRANSFORM_SUBSECONDS(Nanosecond, 9)
#undef TRANSFORM_SUBSECONDS
template <IntervalKind::Kind unit>
struct AddTime;
@ -117,6 +142,25 @@ struct ToStartOfTransform;
ADD_TIME(Second, 1)
#undef ADD_TIME
#define ADD_SUBSECONDS(INTERVAL_KIND, DEF_SCALE) \
template <> \
struct AddTime<IntervalKind::INTERVAL_KIND> \
{ \
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \
{ \
if (scale < DEF_SCALE) \
{ \
return t + delta * DecimalUtils::scaleMultiplier<DateTime64>(DEF_SCALE - scale); \
} \
else \
return t + delta * DecimalUtils::scaleMultiplier<DateTime64>(scale - DEF_SCALE); \
} \
};
ADD_SUBSECONDS(Millisecond, 3)
ADD_SUBSECONDS(Microsecond, 6)
ADD_SUBSECONDS(Nanosecond, 9)
#undef ADD_SUBSECONDS
template <TimeWindowFunctionName type>
struct TimeWindowImpl
{

View File

@ -0,0 +1,28 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionDateOrDateTimeAddInterval.h>
namespace DB
{
using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval<SubtractNanosecondsImpl>;
void registerFunctionSubtractNanoseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionSubtractNanoseconds>();
};
using FunctionSubtractMicroseconds = FunctionDateOrDateTimeAddInterval<SubtractMicrosecondsImpl>;
void registerFunctionSubtractMicroseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionSubtractMicroseconds>();
};
using FunctionSubtractMilliseconds = FunctionDateOrDateTimeAddInterval<SubtractMillisecondsImpl>;
void registerFunctionSubtractMilliseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionSubtractMilliseconds>();
};
}

View File

@ -13,7 +13,7 @@ namespace DB
* * DateTime64 value and scale factor (2)
* * DateTime64 broken down to components, result of execute is then re-assembled back into DateTime64 value (3)
*
* Suitable Transfotm-types are commonly used in Date/DateTime manipulation functions,
* Suitable Transform-types are commonly used in Date/DateTime manipulation functions,
* and should implement static (or const) function with following signatures:
* 1:
* R execute(Int64 whole_value, ... )

View File

@ -0,0 +1,28 @@
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionDateOrDateTimeAddInterval.h>
namespace DB
{
using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval<AddNanosecondsImpl>;
void registerFunctionAddNanoseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionAddNanoseconds>();
};
using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval<AddMicrosecondsImpl>;
void registerFunctionAddMicroseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionAddMicroseconds>();
};
using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval<AddMillisecondsImpl>;
void registerFunctionAddMilliseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionAddMilliseconds>();
};
}

View File

@ -43,6 +43,9 @@ public:
for (size_t i = 2; i < args.size() - 1; i += 2)
dst_array_types.push_back(args[i]);
// Type of the ELSE branch
dst_array_types.push_back(args.back());
return getLeastSupertype(dst_array_types);
}

View File

@ -7,9 +7,9 @@ namespace DB
{
/// An O(1) time and space consistent hash algorithm by Konstantin Oblakov
struct YandexConsistentHashImpl
struct KostikConsistentHashImpl
{
static constexpr auto name = "yandexConsistentHash";
static constexpr auto name = "kostikConsistentHash";
using HashType = UInt64;
/// Actually it supports UInt64, but it is efficient only if n <= 32768
@ -23,12 +23,12 @@ struct YandexConsistentHashImpl
}
};
using FunctionYandexConsistentHash = FunctionConsistentHashImpl<YandexConsistentHashImpl>;
using FunctionKostikConsistentHash = FunctionConsistentHashImpl<KostikConsistentHashImpl>;
void registerFunctionYandexConsistentHash(FunctionFactory & factory)
void registerFunctionKostikConsistentHash(FunctionFactory & factory)
{
factory.registerFunction<FunctionYandexConsistentHash>();
factory.registerFunction<FunctionKostikConsistentHash>();
factory.registerAlias("yandexConsistentHash", "kostikConsistentHash");
}
}

View File

@ -2,12 +2,12 @@ namespace DB
{
class FunctionFactory;
void registerFunctionYandexConsistentHash(FunctionFactory & factory);
void registerFunctionKostikConsistentHash(FunctionFactory & factory);
void registerFunctionJumpConsistentHash(FunctionFactory & factory);
void registerFunctionsConsistentHashing(FunctionFactory & factory)
{
registerFunctionYandexConsistentHash(factory);
registerFunctionKostikConsistentHash(factory);
registerFunctionJumpConsistentHash(factory);
}

View File

@ -11,6 +11,9 @@ void registerFunctionToDayOfWeek(FunctionFactory &);
void registerFunctionToDayOfYear(FunctionFactory &);
void registerFunctionToHour(FunctionFactory &);
void registerFunctionToMinute(FunctionFactory &);
void registerFunctionToStartOfNanosecond(FunctionFactory &);
void registerFunctionToStartOfMicrosecond(FunctionFactory &);
void registerFunctionToStartOfMillisecond(FunctionFactory &);
void registerFunctionToStartOfSecond(FunctionFactory &);
void registerFunctionToSecond(FunctionFactory &);
void registerFunctionToStartOfDay(FunctionFactory &);
@ -47,6 +50,9 @@ void registerFunctionTimeSlots(FunctionFactory &);
void registerFunctionToYYYYMM(FunctionFactory &);
void registerFunctionToYYYYMMDD(FunctionFactory &);
void registerFunctionToYYYYMMDDhhmmss(FunctionFactory &);
void registerFunctionAddNanoseconds(FunctionFactory &);
void registerFunctionAddMicroseconds(FunctionFactory &);
void registerFunctionAddMilliseconds(FunctionFactory &);
void registerFunctionAddSeconds(FunctionFactory &);
void registerFunctionAddMinutes(FunctionFactory &);
void registerFunctionAddHours(FunctionFactory &);
@ -55,6 +61,9 @@ void registerFunctionAddWeeks(FunctionFactory &);
void registerFunctionAddMonths(FunctionFactory &);
void registerFunctionAddQuarters(FunctionFactory &);
void registerFunctionAddYears(FunctionFactory &);
void registerFunctionSubtractNanoseconds(FunctionFactory &);
void registerFunctionSubtractMicroseconds(FunctionFactory &);
void registerFunctionSubtractMilliseconds(FunctionFactory &);
void registerFunctionSubtractSeconds(FunctionFactory &);
void registerFunctionSubtractMinutes(FunctionFactory &);
void registerFunctionSubtractHours(FunctionFactory &);
@ -93,6 +102,9 @@ void registerFunctionsDateTime(FunctionFactory & factory)
registerFunctionToStartOfMonth(factory);
registerFunctionToStartOfQuarter(factory);
registerFunctionToStartOfYear(factory);
registerFunctionToStartOfNanosecond(factory);
registerFunctionToStartOfMicrosecond(factory);
registerFunctionToStartOfMillisecond(factory);
registerFunctionToStartOfSecond(factory);
registerFunctionToStartOfMinute(factory);
registerFunctionToStartOfFiveMinute(factory);
@ -119,6 +131,9 @@ void registerFunctionsDateTime(FunctionFactory & factory)
registerFunctionToYYYYMM(factory);
registerFunctionToYYYYMMDD(factory);
registerFunctionToYYYYMMDDhhmmss(factory);
registerFunctionAddNanoseconds(factory);
registerFunctionAddMicroseconds(factory);
registerFunctionAddMilliseconds(factory);
registerFunctionAddSeconds(factory);
registerFunctionAddMinutes(factory);
registerFunctionAddHours(factory);
@ -127,6 +142,9 @@ void registerFunctionsDateTime(FunctionFactory & factory)
registerFunctionAddMonths(factory);
registerFunctionAddQuarters(factory);
registerFunctionAddYears(factory);
registerFunctionSubtractNanoseconds(factory);
registerFunctionSubtractMicroseconds(factory);
registerFunctionSubtractMilliseconds(factory);
registerFunctionSubtractSeconds(factory);
registerFunctionSubtractMinutes(factory);
registerFunctionSubtractHours(factory);

View File

@ -33,184 +33,273 @@ namespace
template <>
struct Transform<IntervalKind::Year>
{
static constexpr auto name = function_name;
static UInt16 execute(UInt16 d, UInt64 years, const DateLUTImpl & time_zone)
static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfYearInterval(DayNum(d), years);
}
static UInt16 execute(Int32 d, UInt64 years, const DateLUTImpl & time_zone)
static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years);
}
static UInt16 execute(UInt32 t, UInt64 years, const DateLUTImpl & time_zone)
static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years);
}
static UInt16 execute(Int64 t, UInt64 years, const DateLUTImpl & time_zone)
static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{
return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years);
return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years);
}
};
template <>
struct Transform<IntervalKind::Quarter>
{
static constexpr auto name = function_name;
static UInt16 execute(UInt16 d, UInt64 quarters, const DateLUTImpl & time_zone)
static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfQuarterInterval(DayNum(d), quarters);
}
static UInt16 execute(Int32 d, UInt64 quarters, const DateLUTImpl & time_zone)
static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters);
}
static UInt16 execute(UInt32 t, UInt64 quarters, const DateLUTImpl & time_zone)
static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters);
}
static UInt16 execute(Int64 t, UInt64 quarters, const DateLUTImpl & time_zone)
static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{
return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters);
return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters);
}
};
template <>
struct Transform<IntervalKind::Month>
{
static constexpr auto name = function_name;
static UInt16 execute(UInt16 d, UInt64 months, const DateLUTImpl & time_zone)
static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfMonthInterval(DayNum(d), months);
}
static UInt16 execute(Int32 d, UInt64 months, const DateLUTImpl & time_zone)
static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months);
}
static UInt16 execute(UInt32 t, UInt64 months, const DateLUTImpl & time_zone)
static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months);
}
static UInt16 execute(Int64 t, UInt64 months, const DateLUTImpl & time_zone)
static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{
return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months);
return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months);
}
};
template <>
struct Transform<IntervalKind::Week>
{
static constexpr auto name = function_name;
static UInt16 execute(UInt16 d, UInt64 weeks, const DateLUTImpl & time_zone)
static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfWeekInterval(DayNum(d), weeks);
}
static UInt16 execute(Int32 d, UInt64 weeks, const DateLUTImpl & time_zone)
static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks);
}
static UInt16 execute(UInt32 t, UInt64 weeks, const DateLUTImpl & time_zone)
static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks);
}
static UInt16 execute(Int64 t, UInt64 weeks, const DateLUTImpl & time_zone)
static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{
return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks);
return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks);
}
};
template <>
struct Transform<IntervalKind::Day>
{
static constexpr auto name = function_name;
static UInt32 execute(UInt16 d, UInt64 days, const DateLUTImpl & time_zone)
static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days);
}
static UInt32 execute(Int32 d, UInt64 days, const DateLUTImpl & time_zone)
static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days);
}
static UInt32 execute(UInt32 t, UInt64 days, const DateLUTImpl & time_zone)
static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days);
}
static UInt32 execute(Int64 t, UInt64 days, const DateLUTImpl & time_zone)
static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{
return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days);
return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days);
}
};
template <>
struct Transform<IntervalKind::Hour>
{
static constexpr auto name = function_name;
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); }
static UInt32 execute(Int64 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); }
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfHourInterval(t, hours);
}
static UInt32 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{
return time_zone.toStartOfHourInterval(t / scale_multiplier, hours);
}
};
template <>
struct Transform<IntervalKind::Minute>
{
static constexpr auto name = function_name;
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32 t, UInt64 minutes, const DateLUTImpl & time_zone)
static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfMinuteInterval(t, minutes);
}
static UInt32 execute(Int64 t, UInt64 minutes, const DateLUTImpl & time_zone)
static UInt32 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{
return time_zone.toStartOfMinuteInterval(t, minutes);
return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes);
}
};
template <>
struct Transform<IntervalKind::Second>
{
static constexpr auto name = function_name;
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone)
static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64)
{
return time_zone.toStartOfSecondInterval(t, seconds);
}
static Int64 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone)
static UInt32 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier)
{
return time_zone.toStartOfSecondInterval(t, seconds);
return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds);
}
};
template <>
struct Transform<IntervalKind::Millisecond>
{
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); }
static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier)
{
if (scale_multiplier < 1000)
{
Int64 t_milliseconds = t * (static_cast<Int64>(1000) / scale_multiplier);
if (likely(t >= 0))
return t_milliseconds / milliseconds * milliseconds;
else
return ((t_milliseconds + 1) / milliseconds - 1) * milliseconds;
}
else if (scale_multiplier > 1000)
{
Int64 scale_diff = scale_multiplier / static_cast<Int64>(1000);
if (likely(t >= 0))
return t / milliseconds / scale_diff * milliseconds;
else
return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds;
}
else
if (likely(t >= 0))
return t / milliseconds * milliseconds;
else
return ((t + 1) / milliseconds - 1) * milliseconds;
}
};
template <>
struct Transform<IntervalKind::Microsecond>
{
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); }
static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier)
{
if (scale_multiplier < 1000000)
{
Int64 t_microseconds = t * (static_cast<Int64>(1000000) / scale_multiplier);
if (likely(t >= 0))
return t_microseconds / microseconds * microseconds;
else
return ((t_microseconds + 1) / microseconds - 1) * microseconds;
}
else if (scale_multiplier > 1000000)
{
Int64 scale_diff = scale_multiplier / static_cast<Int64>(1000000);
if (likely(t >= 0))
return t / microseconds / scale_diff * microseconds;
else
return ((t + 1) / microseconds / scale_diff - 1) * microseconds;
}
else
if (likely(t >= 0))
return t / microseconds * microseconds;
else
return ((t + 1) / microseconds - 1) * microseconds;
}
};
template <>
struct Transform<IntervalKind::Nanosecond>
{
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); }
static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier)
{
if (scale_multiplier < 1000000000)
{
Int64 t_nanoseconds = t * (static_cast<Int64>(1000000000) / scale_multiplier);
if (likely(t >= 0))
return t_nanoseconds / nanoseconds * nanoseconds;
else
return ((t_nanoseconds + 1) / nanoseconds - 1) * nanoseconds;
}
else
if (likely(t >= 0))
return t / nanoseconds * nanoseconds;
else
return ((t + 1) / nanoseconds - 1) * nanoseconds;
}
};
class FunctionToStartOfInterval : public IFunction
{
@ -240,6 +329,7 @@ public:
const DataTypeInterval * interval_type = nullptr;
bool result_type_is_date = false;
bool result_type_is_datetime = false;
auto check_interval_argument = [&]
{
interval_type = checkAndGetDataType<DataTypeInterval>(arguments[1].type.get());
@ -251,6 +341,8 @@ public:
result_type_is_date = (interval_type->getKind() == IntervalKind::Year)
|| (interval_type->getKind() == IntervalKind::Quarter) || (interval_type->getKind() == IntervalKind::Month)
|| (interval_type->getKind() == IntervalKind::Week);
result_type_is_datetime = (interval_type->getKind() == IntervalKind::Day) || (interval_type->getKind() == IntervalKind::Hour)
|| (interval_type->getKind() == IntervalKind::Minute) || (interval_type->getKind() == IntervalKind::Second);
};
auto check_timezone_argument = [&]
@ -263,7 +355,7 @@ public:
if (first_argument_is_date && result_type_is_date)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"The timezone argument of function {} with interval type {} is allowed only when the 1st argument "
"has the type DateTime",
"has the type DateTime or DateTime64",
getName(), interval_type->getKind().toString());
};
@ -288,19 +380,33 @@ public:
if (result_type_is_date)
return std::make_shared<DataTypeDate>();
else
else if (result_type_is_datetime)
return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
else
{
auto scale = 0;
if (interval_type->getKind() == IntervalKind::Nanosecond)
scale = 9;
else if (interval_type->getKind() == IntervalKind::Microsecond)
scale = 6;
else if (interval_type->getKind() == IntervalKind::Millisecond)
scale = 3;
return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
}
}
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override
{
const auto & time_column = arguments[0];
const auto & interval_column = arguments[1];
const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0);
auto result_column = dispatchForColumns(time_column, interval_column, time_zone);
auto result_column = dispatchForColumns(time_column, interval_column, result_type, time_zone);
return result_column;
}
@ -316,33 +422,36 @@ public:
private:
ColumnPtr dispatchForColumns(
const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone) const
const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const
{
const auto & from_datatype = *time_column.type.get();
const auto which_type = WhichDataType(from_datatype);
if (which_type.isDateTime64())
{
const auto * time_column_vec = checkAndGetColumn<DataTypeDateTime64::ColumnType>(time_column.column.get());
auto scale = assert_cast<const DataTypeDateTime64 &>(from_datatype).getScale();
if (time_column_vec)
return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime64&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone, scale);
}
if (which_type.isDateTime())
{
const auto * time_column_vec = checkAndGetColumn<ColumnUInt32>(time_column.column.get());
if (time_column_vec)
return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime&>(from_datatype), *time_column_vec, interval_column, time_zone);
return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
}
if (which_type.isDate())
{
const auto * time_column_vec = checkAndGetColumn<ColumnUInt16>(time_column.column.get());
if (time_column_vec)
return dispatchForIntervalColumn(assert_cast<const DataTypeDate&>(from_datatype), *time_column_vec, interval_column, time_zone);
return dispatchForIntervalColumn(assert_cast<const DataTypeDate&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
}
if (which_type.isDate32())
{
const auto * time_column_vec = checkAndGetColumn<ColumnInt32>(time_column.column.get());
if (time_column_vec)
return dispatchForIntervalColumn(assert_cast<const DataTypeDate32&>(from_datatype), *time_column_vec, interval_column, time_zone);
}
if (which_type.isDateTime64())
{
const auto * time_column_vec = checkAndGetColumn<DataTypeDateTime64::ColumnType>(time_column.column.get());
if (time_column_vec)
return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime64&>(from_datatype), *time_column_vec, interval_column, time_zone);
return dispatchForIntervalColumn(assert_cast<const DataTypeDate32&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
}
throw Exception(
"Illegal column for first argument of function " + getName() + ". Must contain dates or dates with time",
@ -351,7 +460,8 @@ private:
template <typename ColumnType, typename FromDataType>
ColumnPtr dispatchForIntervalColumn(
const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone) const
const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column,
const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const
{
const auto * interval_type = checkAndGetDataType<DataTypeInterval>(interval_column.type.get());
if (!interval_type)
@ -368,49 +478,52 @@ private:
switch (interval_type->getKind())
{
case IntervalKind::Nanosecond:
return execute<FromDataType, DataTypeDateTime64, IntervalKind::Nanosecond>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Microsecond:
return execute<FromDataType, DataTypeDateTime64, IntervalKind::Microsecond>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Millisecond:
return execute<FromDataType, DataTypeDateTime64, IntervalKind::Millisecond>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Second:
return execute<FromDataType, UInt32, IntervalKind::Second>(from, time_column, num_units, time_zone);
return execute<FromDataType, DataTypeDateTime, IntervalKind::Second>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Minute:
return execute<FromDataType, UInt32, IntervalKind::Minute>(from, time_column, num_units, time_zone);
return execute<FromDataType, DataTypeDateTime, IntervalKind::Minute>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Hour:
return execute<FromDataType, UInt32, IntervalKind::Hour>(from, time_column, num_units, time_zone);
return execute<FromDataType, DataTypeDateTime, IntervalKind::Hour>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Day:
return execute<FromDataType, UInt32, IntervalKind::Day>(from, time_column, num_units, time_zone);
return execute<FromDataType, DataTypeDateTime, IntervalKind::Day>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Week:
return execute<FromDataType, UInt16, IntervalKind::Week>(from, time_column, num_units, time_zone);
return execute<FromDataType, DataTypeDate, IntervalKind::Week>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Month:
return execute<FromDataType, UInt16, IntervalKind::Month>(from, time_column, num_units, time_zone);
return execute<FromDataType, DataTypeDate, IntervalKind::Month>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Quarter:
return execute<FromDataType, UInt16, IntervalKind::Quarter>(from, time_column, num_units, time_zone);
return execute<FromDataType, DataTypeDate, IntervalKind::Quarter>(from, time_column, num_units, result_type, time_zone, scale);
case IntervalKind::Year:
return execute<FromDataType, UInt16, IntervalKind::Year>(from, time_column, num_units, time_zone);
return execute<FromDataType, DataTypeDate, IntervalKind::Year>(from, time_column, num_units, result_type, time_zone, scale);
}
__builtin_unreachable();
}
template <typename FromDataType, typename ToType, IntervalKind::Kind unit, typename ColumnType>
ColumnPtr execute(const FromDataType & from_datatype, const ColumnType & time_column, UInt64 num_units, const DateLUTImpl & time_zone) const
template <typename FromDataType, typename ToDataType, IntervalKind::Kind unit, typename ColumnType>
ColumnPtr execute(const FromDataType &, const ColumnType & time_column_type, Int64 num_units, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const
{
const auto & time_data = time_column.getData();
size_t size = time_column.size();
auto result = ColumnVector<ToType>::create();
auto & result_data = result->getData();
using ToColumnType = typename ToDataType::ColumnType;
const auto & time_data = time_column_type.getData();
size_t size = time_data.size();
auto result_col = result_type->createColumn();
auto *col_to = assert_cast<ToColumnType *>(result_col.get());
auto & result_data = col_to->getData();
result_data.resize(size);
if constexpr (std::is_same_v<FromDataType, DataTypeDateTime64>)
{
const auto transform = TransformDateTime64<Transform<unit>>{from_datatype.getScale()};
Int64 scale_multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
for (size_t i = 0; i != size; ++i)
result_data[i] = transform.execute(time_data[i], num_units, time_zone);
}
else
{
for (size_t i = 0; i != size; ++i)
result_data[i] = Transform<unit>::execute(time_data[i], num_units, time_zone);
}
return result;
result_data[i] = Transform<unit>::execute(time_data[i], num_units, time_zone, scale_multiplier);
return result_col;
}
};

View File

@ -0,0 +1,30 @@
#include <Functions/FunctionFactory.h>
#include <Functions/DateTimeTransforms.h>
#include <Functions/FunctionDateOrDateTimeToSomething.h>
namespace DB
{
using FunctionToStartOfMillisecond = FunctionDateOrDateTimeToSomething<DataTypeDateTime64, ToStartOfMillisecondImpl>;
void registerFunctionToStartOfMillisecond(FunctionFactory & factory)
{
factory.registerFunction<FunctionToStartOfMillisecond>();
}
using FunctionToStartOfMicrosecond = FunctionDateOrDateTimeToSomething<DataTypeDateTime64, ToStartOfMicrosecondImpl>;
void registerFunctionToStartOfMicrosecond(FunctionFactory & factory)
{
factory.registerFunction<FunctionToStartOfMicrosecond>();
}
using FunctionToStartOfNanosecond = FunctionDateOrDateTimeToSomething<DataTypeDateTime64, ToStartOfNanosecondImpl>;
void registerFunctionToStartOfNanosecond(FunctionFactory & factory)
{
factory.registerFunction<FunctionToStartOfNanosecond>();
}
}

34
src/IO/IOThreadPool.cpp Normal file
View File

@ -0,0 +1,34 @@
#include <IO/IOThreadPool.h>
#include "Core/Field.h"
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
std::unique_ptr<ThreadPool> IOThreadPool::instance;
void IOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
{
if (instance)
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is initialized twice");
}
instance = std::make_unique<ThreadPool>(max_threads, max_free_threads, queue_size, false /*shutdown_on_exception*/);
}
ThreadPool & IOThreadPool::get()
{
if (!instance)
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is not initialized");
}
return *instance;
}
}

20
src/IO/IOThreadPool.h Normal file
View File

@ -0,0 +1,20 @@
#pragma once
#include <Common/ThreadPool.h>
namespace DB
{
/*
* ThreadPool used for the IO.
*/
class IOThreadPool
{
static std::unique_ptr<ThreadPool> instance;
public:
static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
static ThreadPool & get();
};
}

View File

@ -0,0 +1,290 @@
#include <IO/ParallelReadBuffer.h>
#include <base/logger_useful.h>
#include <Poco/Logger.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int CANNOT_SEEK_THROUGH_FILE;
extern const int SEEK_POSITION_OUT_OF_BOUND;
}
ParallelReadBuffer::ParallelReadBuffer(
std::unique_ptr<ReadBufferFactory> reader_factory_,
ThreadPool * pool_,
size_t max_working_readers_,
WorkerSetup worker_setup_,
WorkerCleanup worker_cleanup_)
: SeekableReadBufferWithSize(nullptr, 0)
, pool(pool_)
, max_working_readers(max_working_readers_)
, reader_factory(std::move(reader_factory_))
, worker_setup(std::move(worker_setup_))
, worker_cleanup(std::move(worker_cleanup_))
{
std::unique_lock<std::mutex> lock{mutex};
addReaders(lock);
}
bool ParallelReadBuffer::addReaderToPool(std::unique_lock<std::mutex> & /*buffer_lock*/)
{
auto reader = reader_factory->getReader();
if (!reader)
{
return false;
}
auto worker = read_workers.emplace_back(std::make_shared<ReadWorker>(std::move(reader)));
pool->scheduleOrThrow(
[&, this, worker = std::move(worker)]() mutable
{
ThreadStatus thread_status;
{
std::lock_guard lock{mutex};
++active_working_reader;
}
SCOPE_EXIT({
worker_cleanup(thread_status);
std::lock_guard lock{mutex};
--active_working_reader;
if (active_working_reader == 0)
{
readers_done.notify_all();
}
});
worker_setup(thread_status);
readerThreadFunction(std::move(worker));
});
return true;
}
void ParallelReadBuffer::addReaders(std::unique_lock<std::mutex> & buffer_lock)
{
while (read_workers.size() < max_working_readers && addReaderToPool(buffer_lock))
;
}
off_t ParallelReadBuffer::seek(off_t offset, int whence)
{
if (whence != SEEK_SET)
throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
if (offset < 0)
throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
if (!working_buffer.empty() && static_cast<size_t>(offset) >= current_position - working_buffer.size() && offset < current_position)
{
pos = working_buffer.end() - (current_position - offset);
assert(pos >= working_buffer.begin());
assert(pos <= working_buffer.end());
return offset;
}
std::unique_lock lock{mutex};
const auto offset_is_in_range
= [&](const auto & range) { return static_cast<size_t>(offset) >= range.left && static_cast<size_t>(offset) <= *range.right; };
while (!read_workers.empty() && (offset < current_position || !offset_is_in_range(read_workers.front()->range)))
{
read_workers.front()->cancel = true;
read_workers.pop_front();
}
if (!read_workers.empty())
{
auto & front_worker = read_workers.front();
auto & segments = front_worker->segments;
current_position = front_worker->range.left;
while (true)
{
next_condvar.wait(lock, [&] { return emergency_stop || !segments.empty(); });
if (emergency_stop)
handleEmergencyStop();
auto next_segment = front_worker->nextSegment();
if (static_cast<size_t>(offset) < current_position + next_segment.size())
{
current_segment = std::move(next_segment);
working_buffer = internal_buffer = Buffer(current_segment.data(), current_segment.data() + current_segment.size());
current_position += current_segment.size();
pos = working_buffer.end() - (current_position - offset);
addReaders(lock);
return offset;
}
current_position += next_segment.size();
}
}
lock.unlock();
finishAndWait();
reader_factory->seek(offset, whence);
all_completed = false;
read_workers.clear();
current_position = offset;
resetWorkingBuffer();
emergency_stop = false;
lock.lock();
addReaders(lock);
return offset;
}
std::optional<size_t> ParallelReadBuffer::getTotalSize()
{
std::lock_guard lock{mutex};
return reader_factory->getTotalSize();
}
off_t ParallelReadBuffer::getPosition()
{
return current_position - available();
}
bool ParallelReadBuffer::currentWorkerReady() const
{
assert(!read_workers.empty());
return read_workers.front()->finished || !read_workers.front()->segments.empty();
}
bool ParallelReadBuffer::currentWorkerCompleted() const
{
assert(!read_workers.empty());
return read_workers.front()->finished && read_workers.front()->segments.empty();
}
void ParallelReadBuffer::handleEmergencyStop()
{
// this can only be called from the main thread when there is an exception
assert(background_exception);
if (background_exception)
std::rethrow_exception(background_exception);
}
bool ParallelReadBuffer::nextImpl()
{
if (all_completed)
return false;
while (true)
{
std::unique_lock lock(mutex);
next_condvar.wait(
lock,
[this]()
{
/// Check if no more readers left or current reader can be processed
return emergency_stop || currentWorkerReady();
});
bool worker_removed = false;
/// Remove completed units
while (!read_workers.empty() && currentWorkerCompleted() && !emergency_stop)
{
read_workers.pop_front();
worker_removed = true;
}
if (emergency_stop)
handleEmergencyStop();
if (worker_removed)
addReaders(lock);
/// All readers processed, stop
if (read_workers.empty())
{
all_completed = true;
return false;
}
auto & front_worker = read_workers.front();
/// Read data from first segment of the first reader
if (!front_worker->segments.empty())
{
current_segment = front_worker->nextSegment();
if (currentWorkerCompleted())
{
read_workers.pop_front();
all_completed = !addReaderToPool(lock) && read_workers.empty();
}
break;
}
}
working_buffer = internal_buffer = Buffer(current_segment.data(), current_segment.data() + current_segment.size());
current_position += working_buffer.size();
return true;
}
void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
{
try
{
while (!emergency_stop && !read_worker->cancel)
{
if (!read_worker->reader->next())
throw Exception("Failed to read all the data from the reader", ErrorCodes::LOGICAL_ERROR);
if (emergency_stop || read_worker->cancel)
break;
Buffer buffer = read_worker->reader->buffer();
size_t bytes_to_copy = std::min(buffer.size(), read_worker->bytes_left);
Segment new_segment(bytes_to_copy, &arena);
memcpy(new_segment.data(), buffer.begin(), bytes_to_copy);
read_worker->reader->ignore(bytes_to_copy);
read_worker->bytes_left -= bytes_to_copy;
{
/// New data ready to be read
std::lock_guard lock(mutex);
read_worker->segments.emplace_back(std::move(new_segment));
read_worker->finished = read_worker->bytes_left == 0;
next_condvar.notify_all();
}
if (read_worker->finished)
{
break;
}
}
}
catch (...)
{
onBackgroundException();
}
}
void ParallelReadBuffer::onBackgroundException()
{
std::lock_guard lock(mutex);
if (!background_exception)
{
background_exception = std::current_exception();
}
emergency_stop = true;
next_condvar.notify_all();
}
void ParallelReadBuffer::finishAndWait()
{
emergency_stop = true;
std::unique_lock lock{mutex};
readers_done.wait(lock, [&] { return active_working_reader == 0; });
}
}

174
src/IO/ParallelReadBuffer.h Normal file
View File

@ -0,0 +1,174 @@
#pragma once
#include <IO/BufferWithOwnMemory.h>
#include <IO/ReadBuffer.h>
#include <IO/SeekableReadBuffer.h>
#include <Common/ArenaWithFreeLists.h>
#include <Common/ThreadPool.h>
namespace DB
{
/**
* Reads from multiple ReadBuffers in parallel.
* Preserves order of readers obtained from ReadBufferFactory.
*
* It consumes multiple readers and yields data from them in order as it passed.
* Each working reader save segments of data to internal queue.
*
* ParallelReadBuffer in nextImpl method take first available segment from first reader in deque and fed it to user.
* When first reader finish reading, they will be removed from worker deque and data from next reader consumed.
*
* Number of working readers limited by max_working_readers.
*/
class ParallelReadBuffer : public SeekableReadBufferWithSize
{
private:
/// Blocks until data occurred in the first reader or this reader indicate finishing
/// Finished readers removed from queue and data from next readers processed
bool nextImpl() override;
class Segment : private boost::noncopyable
{
public:
Segment(size_t size_, SynchronizedArenaWithFreeLists * arena_) : arena(arena_), m_data(arena->alloc(size_)), m_size(size_) { }
Segment() = default;
Segment(Segment && other) noexcept : arena(other.arena)
{
std::swap(m_data, other.m_data);
std::swap(m_size, other.m_size);
}
Segment & operator=(Segment && other) noexcept
{
arena = other.arena;
std::swap(m_data, other.m_data);
std::swap(m_size, other.m_size);
return *this;
}
~Segment()
{
if (m_data)
{
arena->free(m_data, m_size);
}
}
auto data() const noexcept { return m_data; }
auto size() const noexcept { return m_size; }
private:
SynchronizedArenaWithFreeLists * arena{nullptr};
char * m_data{nullptr};
size_t m_size{0};
};
public:
class ReadBufferFactory
{
public:
virtual SeekableReadBufferPtr getReader() = 0;
virtual ~ReadBufferFactory() = default;
virtual off_t seek(off_t off, int whence) = 0;
virtual std::optional<size_t> getTotalSize() = 0;
};
using WorkerSetup = std::function<void(ThreadStatus &)>;
using WorkerCleanup = std::function<void(ThreadStatus &)>;
explicit ParallelReadBuffer(
std::unique_ptr<ReadBufferFactory> reader_factory_,
ThreadPool * pool,
size_t max_working_readers,
WorkerSetup worker_setup = {},
WorkerCleanup worker_cleanup = {});
~ParallelReadBuffer() override { finishAndWait(); }
off_t seek(off_t off, int whence) override;
std::optional<size_t> getTotalSize() override;
off_t getPosition() override;
private:
/// Reader in progress with a list of read segments
struct ReadWorker
{
explicit ReadWorker(SeekableReadBufferPtr reader_) : reader(std::move(reader_)), range(reader->getRemainingReadRange())
{
assert(range.right);
bytes_left = *range.right - range.left + 1;
}
Segment nextSegment()
{
assert(!segments.empty());
auto next_segment = std::move(segments.front());
segments.pop_front();
range.left += next_segment.size();
return next_segment;
}
SeekableReadBufferPtr reader;
std::deque<Segment> segments;
bool finished{false};
SeekableReadBuffer::Range range;
size_t bytes_left{0};
std::atomic_bool cancel{false};
};
using ReadWorkerPtr = std::shared_ptr<ReadWorker>;
/// First worker in deque have new data or processed all available amount
bool currentWorkerReady() const;
/// First worker in deque processed and flushed all data
bool currentWorkerCompleted() const;
void handleEmergencyStop();
void addReaders(std::unique_lock<std::mutex> & buffer_lock);
bool addReaderToPool(std::unique_lock<std::mutex> & buffer_lock);
/// Process read_worker, read data and save into internal segments queue
void readerThreadFunction(ReadWorkerPtr read_worker);
void onBackgroundException();
void finishAndWait();
SynchronizedArenaWithFreeLists arena;
Segment current_segment;
ThreadPool * pool;
size_t max_working_readers;
size_t active_working_reader{0};
// Triggered when all reader workers are done
std::condition_variable readers_done;
std::unique_ptr<ReadBufferFactory> reader_factory;
WorkerSetup worker_setup;
WorkerCleanup worker_cleanup;
/**
* FIFO queue of readers.
* Each worker contains reader itself and downloaded segments.
* When reader read all available data it will be removed from
* deque and data from next reader will be consumed to user.
*/
std::deque<ReadWorkerPtr> read_workers;
std::mutex mutex;
/// Triggered when new data available
std::condition_variable next_condvar;
std::exception_ptr background_exception = nullptr;
std::atomic_bool emergency_stop{false};
off_t current_position{0};
bool all_completed{false};
};
}

View File

@ -1,32 +1,33 @@
#pragma once
#include <functional>
#include <base/types.h>
#include <base/sleep.h>
#include <IO/ConnectionTimeouts.h>
#include <IO/HTTPCommon.h>
#include <IO/ParallelReadBuffer.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadBufferFromIStream.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadSettings.h>
#include <base/logger_useful.h>
#include <base/sleep.h>
#include <base/types.h>
#include <Poco/Any.h>
#include <Poco/Net/HTTPBasicCredentials.h>
#include <Poco/Net/HTTPClientSession.h>
#include <Poco/Net/HTTPRequest.h>
#include <Poco/Net/HTTPResponse.h>
#include <Poco/URI.h>
#include <Poco/URIStreamFactory.h>
#include <Poco/Version.h>
#include <Common/DNSResolver.h>
#include <Common/RemoteHostFilter.h>
#include <Common/config.h>
#include <Common/config_version.h>
#include <base/logger_useful.h>
#include <Poco/URIStreamFactory.h>
namespace ProfileEvents
{
extern const Event ReadBufferSeekCancelConnection;
extern const Event ReadBufferSeekCancelConnection;
}
namespace DB
@ -48,7 +49,7 @@ class UpdatableSessionBase
{
protected:
SessionPtr session;
UInt64 redirects { 0 };
UInt64 redirects{0};
Poco::URI initial_uri;
ConnectionTimeouts timeouts;
UInt64 max_redirects;
@ -56,19 +57,12 @@ protected:
public:
virtual void buildNewSession(const Poco::URI & uri) = 0;
explicit UpdatableSessionBase(const Poco::URI uri,
const ConnectionTimeouts & timeouts_,
UInt64 max_redirects_)
: initial_uri { uri }
, timeouts { timeouts_ }
, max_redirects { max_redirects_ }
explicit UpdatableSessionBase(const Poco::URI uri, const ConnectionTimeouts & timeouts_, UInt64 max_redirects_)
: initial_uri{uri}, timeouts{timeouts_}, max_redirects{max_redirects_}
{
}
SessionPtr getSession()
{
return session;
}
SessionPtr getSession() { return session; }
void updateSession(const Poco::URI & uri)
{
@ -99,7 +93,7 @@ namespace detail
/// HTTP range, including right bound [begin, end].
struct Range
{
size_t begin = 0;
std::optional<size_t> begin;
std::optional<size_t> end;
};
@ -144,10 +138,9 @@ namespace detail
return read_range.begin || read_range.end || retry_with_range_header;
}
size_t getOffset() const
{
return read_range.begin + offset_from_begin_pos;
}
size_t getRangeBegin() const { return read_range.begin.value_or(0); }
size_t getOffset() const { return getRangeBegin() + offset_from_begin_pos; }
std::istream * callImpl(Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_)
{
@ -161,7 +154,7 @@ namespace detail
if (out_stream_callback)
request.setChunkedTransferEncoding(true);
for (auto & http_header_entry: http_header_entries)
for (auto & http_header_entry : http_header_entries)
request.set(std::get<0>(http_header_entry), std::get<1>(http_header_entry));
if (withPartialContent())
@ -207,26 +200,14 @@ namespace detail
std::optional<size_t> getTotalSize() override
{
if (read_range.end)
return *read_range.end - read_range.begin;
return *read_range.end - getRangeBegin();
Poco::Net::HTTPResponse response;
for (size_t i = 0; i < 10; ++i)
{
try
{
call(response, Poco::Net::HTTPRequest::HTTP_HEAD);
while (isRedirect(response.getStatus()))
{
Poco::URI uri_redirect(response.get("Location"));
if (remote_host_filter)
remote_host_filter->checkURL(uri_redirect);
session->updateSession(uri_redirect);
istr = callImpl(uri_redirect, response, method);
}
callWithRedirects(response, Poco::Net::HTTPRequest::HTTP_HEAD);
break;
}
catch (const Poco::Exception & e)
@ -236,7 +217,7 @@ namespace detail
}
if (response.hasContentLength())
read_range.end = read_range.begin + response.getContentLength();
read_range.end = getRangeBegin() + response.getContentLength();
return read_range.end;
}
@ -252,6 +233,21 @@ namespace detail
InitializeError initialization_error = InitializeError::NONE;
private:
void setupExternalBuffer()
{
/**
* use_external_buffer -- means we read into the buffer which
* was passed to us from somewhere else. We do not check whether
* previously returned buffer was read or not (no hasPendingData() check is needed),
* because this branch means we are prefetching data,
* each nextImpl() call we can fill a different buffer.
*/
impl->set(internal_buffer.begin(), internal_buffer.size());
assert(working_buffer.begin() != nullptr);
assert(!internal_buffer.empty());
}
public:
using NextCallback = std::function<void(size_t)>;
using OutStreamCallback = std::function<void(std::ostream &)>;
@ -276,7 +272,7 @@ namespace detail
, session {session_}
, out_stream_callback {out_stream_callback_}
, credentials {credentials_}
, http_header_entries {http_header_entries_}
, http_header_entries {std::move(http_header_entries_)}
, remote_host_filter {remote_host_filter_}
, buffer_size {buffer_size_}
, use_external_buffer {use_external_buffer_}
@ -287,18 +283,21 @@ namespace detail
{
if (settings.http_max_tries <= 0 || settings.http_retry_initial_backoff_ms <= 0
|| settings.http_retry_initial_backoff_ms >= settings.http_retry_max_backoff_ms)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Invalid setting for http backoff, "
"must be http_max_tries >= 1 (current is {}) and "
"0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})",
settings.http_max_tries, settings.http_retry_initial_backoff_ms, settings.http_retry_max_backoff_ms);
settings.http_max_tries,
settings.http_retry_initial_backoff_ms,
settings.http_retry_max_backoff_ms);
// Configure User-Agent if it not already set.
const std::string user_agent = "User-Agent";
auto iter = std::find_if(http_header_entries.begin(), http_header_entries.end(), [&user_agent](const HTTPHeaderEntry & entry)
{
return std::get<0>(entry) == user_agent;
});
auto iter = std::find_if(
http_header_entries.begin(),
http_header_entries.end(),
[&user_agent](const HTTPHeaderEntry & entry) { return std::get<0>(entry) == user_agent; });
if (iter == http_header_entries.end())
{
@ -313,7 +312,36 @@ namespace detail
}
}
void call(Poco::Net::HTTPResponse & response, const String & method_)
static bool isRetriableError(const Poco::Net::HTTPResponse::HTTPStatus http_status) noexcept
{
constexpr std::array non_retriable_errors{
Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST,
Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED,
Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND,
Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN,
Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED};
return std::all_of(
non_retriable_errors.begin(), non_retriable_errors.end(), [&](const auto status) { return http_status != status; });
}
void callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false)
{
call(response, method_, throw_on_all_errors);
while (isRedirect(response.getStatus()))
{
Poco::URI uri_redirect(response.get("Location"));
if (remote_host_filter)
remote_host_filter->checkURL(uri_redirect);
session->updateSession(uri_redirect);
istr = callImpl(uri_redirect, response, method);
}
}
void call(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false)
{
try
{
@ -321,18 +349,18 @@ namespace detail
}
catch (...)
{
if (throw_on_all_errors)
{
throw;
}
auto http_status = response.getStatus();
if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND
&& http_skip_not_found_url)
if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND && http_skip_not_found_url)
{
initialization_error = InitializeError::SKIP_NOT_FOUND_URL;
}
else if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN
|| http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED)
else if (!isRetriableError(http_status))
{
initialization_error = InitializeError::NON_RETRIABLE_ERROR;
exception = std::current_exception();
@ -372,12 +400,14 @@ namespace detail
if (withPartialContent() && response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT)
{
/// Having `200 OK` instead of `206 Partial Content` is acceptable in case we retried with range.begin == 0.
if (read_range.begin)
if (read_range.begin && *read_range.begin != 0)
{
if (!exception)
exception = std::make_exception_ptr(
Exception(ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
"Cannot read with range: [{}, {}]", read_range.begin, read_range.end ? *read_range.end : '-'));
exception = std::make_exception_ptr(Exception(
ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
"Cannot read with range: [{}, {}]",
*read_range.begin,
read_range.end ? *read_range.end : '-'));
initialization_error = InitializeError::NON_RETRIABLE_ERROR;
return;
@ -386,12 +416,12 @@ namespace detail
{
/// We could have range.begin == 0 and range.end != 0 in case of DiskWeb and failing to read with partial content
/// will affect only performance, so a warning is enough.
LOG_WARNING(log, "Unable to read with range header: [{}, {}]", read_range.begin, *read_range.end);
LOG_WARNING(log, "Unable to read with range header: [{}, {}]", getRangeBegin(), *read_range.end);
}
}
if (!offset_from_begin_pos && !read_range.end && response.hasContentLength())
read_range.end = read_range.begin + response.getContentLength();
read_range.end = getRangeBegin() + response.getContentLength();
try
{
@ -399,12 +429,7 @@ namespace detail
if (use_external_buffer)
{
/**
* See comment 30 lines below.
*/
impl->set(internal_buffer.begin(), internal_buffer.size());
assert(working_buffer.begin() != nullptr);
assert(!internal_buffer.empty());
setupExternalBuffer();
}
}
catch (const Poco::Exception & e)
@ -426,23 +451,17 @@ namespace detail
if (next_callback)
next_callback(count());
if (read_range.end && getOffset() == read_range.end.value())
if (read_range.end && getOffset() > read_range.end.value())
{
assert(getOffset() == read_range.end.value() + 1);
return false;
}
if (impl)
{
if (use_external_buffer)
{
/**
* use_external_buffer -- means we read into the buffer which
* was passed to us from somewhere else. We do not check whether
* previously returned buffer was read or not (no hasPendingData() check is needed),
* because this branch means we are prefetching data,
* each nextImpl() call we can fill a different buffer.
*/
impl->set(internal_buffer.begin(), internal_buffer.size());
assert(working_buffer.begin() != nullptr);
assert(!internal_buffer.empty());
setupExternalBuffer();
}
else
{
@ -477,10 +496,7 @@ namespace detail
if (use_external_buffer)
{
/// See comment 40 lines above.
impl->set(internal_buffer.begin(), internal_buffer.size());
assert(working_buffer.begin() != nullptr);
assert(!internal_buffer.empty());
setupExternalBuffer();
}
}
@ -498,13 +514,18 @@ namespace detail
if (!can_retry_request)
throw;
LOG_ERROR(log,
LOG_ERROR(
log,
"HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. "
"Error: {}. (Current backoff wait is {}/{} ms)",
uri.toString(), i + 1, settings.http_max_tries,
getOffset(), read_range.end ? toString(*read_range.end) : "unknown",
uri.toString(),
i + 1,
settings.http_max_tries,
getOffset(),
read_range.end ? toString(*read_range.end) : "unknown",
e.displayText(),
milliseconds_to_wait, settings.http_retry_max_backoff_ms);
milliseconds_to_wait,
settings.http_retry_max_backoff_ms);
retry_with_range_header = true;
exception = std::current_exception();
@ -529,10 +550,7 @@ namespace detail
return true;
}
off_t getPosition() override
{
return getOffset() - available();
}
off_t getPosition() override { return getOffset() - available(); }
off_t seek(off_t offset_, int whence) override
{
@ -540,12 +558,11 @@ namespace detail
throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
if (offset_ < 0)
throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
throw Exception(
"Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
off_t current_offset = getOffset();
if (!working_buffer.empty()
&& size_t(offset_) >= current_offset - working_buffer.size()
&& offset_ < current_offset)
if (!working_buffer.empty() && size_t(offset_) >= current_offset - working_buffer.size() && offset_ < current_offset)
{
pos = working_buffer.end() - (current_offset - offset_);
assert(pos >= working_buffer.begin());
@ -567,7 +584,6 @@ namespace detail
if (impl)
{
ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection);
impl.reset();
}
@ -580,6 +596,8 @@ namespace detail
return offset_;
}
SeekableReadBuffer::Range getRemainingReadRange() const override { return {getOffset(), read_range.end}; }
std::string getResponseCookie(const std::string & name, const std::string & def) const
{
for (const auto & cookie : cookies)
@ -599,10 +617,7 @@ namespace detail
next_callback(count());
}
const std::string & getCompressionMethod() const
{
return content_encoding;
}
const std::string & getCompressionMethod() const { return content_encoding; }
};
}
@ -611,19 +626,50 @@ class UpdatableSession : public UpdatableSessionBase<HTTPSessionPtr>
using Parent = UpdatableSessionBase<HTTPSessionPtr>;
public:
UpdatableSession(
const Poco::URI uri,
const ConnectionTimeouts & timeouts_,
const UInt64 max_redirects_)
UpdatableSession(const Poco::URI uri, const ConnectionTimeouts & timeouts_, const UInt64 max_redirects_)
: Parent(uri, timeouts_, max_redirects_)
{
session = makeHTTPSession(initial_uri, timeouts);
}
void buildNewSession(const Poco::URI & uri) override
void buildNewSession(const Poco::URI & uri) override { session = makeHTTPSession(uri, timeouts); }
};
class RangeGenerator
{
public:
explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
: from(range_start), range_step(range_step_), total_size(total_size_)
{
session = makeHTTPSession(uri, timeouts);
}
size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
using Range = std::pair<size_t, size_t>;
// return upper exclusive range of values, i.e. [from_range, to_range>
std::optional<Range> nextRange()
{
if (from >= total_size)
{
return std::nullopt;
}
auto to = from + range_step;
if (to >= total_size)
{
to = total_size;
}
Range range{from, to};
from = to;
return std::move(range);
}
private:
size_t from;
size_t range_step;
size_t total_size;
};
class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>
@ -646,14 +692,117 @@ public:
bool delay_initialization_ = true,
bool use_external_buffer_ = false,
bool skip_not_found_url_ = false)
: Parent(std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects),
uri_, credentials_, method_, out_stream_callback_, buffer_size_,
settings_, http_header_entries_, read_range_, remote_host_filter_,
delay_initialization_, use_external_buffer_, skip_not_found_url_)
: Parent(
std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects),
uri_,
credentials_,
method_,
out_stream_callback_,
buffer_size_,
settings_,
http_header_entries_,
read_range_,
remote_host_filter_,
delay_initialization_,
use_external_buffer_,
skip_not_found_url_)
{
}
};
class RangedReadWriteBufferFromHTTPFactory : public ParallelReadBuffer::ReadBufferFactory
{
using OutStreamCallback = ReadWriteBufferFromHTTP::OutStreamCallback;
public:
RangedReadWriteBufferFromHTTPFactory(
size_t total_object_size_,
size_t range_step_,
Poco::URI uri_,
std::string method_,
OutStreamCallback out_stream_callback_,
ConnectionTimeouts timeouts_,
const Poco::Net::HTTPBasicCredentials & credentials_,
UInt64 max_redirects_ = 0,
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
ReadSettings settings_ = {},
ReadWriteBufferFromHTTP::HTTPHeaderEntries http_header_entries_ = {},
const RemoteHostFilter * remote_host_filter_ = nullptr,
bool delay_initialization_ = true,
bool use_external_buffer_ = false,
bool skip_not_found_url_ = false)
: range_generator(total_object_size_, range_step_)
, total_object_size(total_object_size_)
, range_step(range_step_)
, uri(uri_)
, method(std::move(method_))
, out_stream_callback(out_stream_callback_)
, timeouts(std::move(timeouts_))
, credentials(credentials_)
, max_redirects(max_redirects_)
, buffer_size(buffer_size_)
, settings(std::move(settings_))
, http_header_entries(std::move(http_header_entries_))
, remote_host_filter(remote_host_filter_)
, delay_initialization(delay_initialization_)
, use_external_buffer(use_external_buffer_)
, skip_not_found_url(skip_not_found_url_)
{
}
SeekableReadBufferPtr getReader() override
{
const auto next_range = range_generator.nextRange();
if (!next_range)
{
return nullptr;
}
return std::make_shared<ReadWriteBufferFromHTTP>(
uri,
method,
out_stream_callback,
timeouts,
credentials,
max_redirects,
buffer_size,
settings,
http_header_entries,
// HTTP Range has inclusive bounds, i.e. [from, to]
ReadWriteBufferFromHTTP::Range{next_range->first, next_range->second - 1},
remote_host_filter,
delay_initialization,
use_external_buffer,
skip_not_found_url);
}
off_t seek(off_t off, [[maybe_unused]] int whence) override
{
range_generator = RangeGenerator{total_object_size, range_step, static_cast<size_t>(off)};
return off;
}
std::optional<size_t> getTotalSize() override { return total_object_size; }
private:
RangeGenerator range_generator;
size_t total_object_size;
size_t range_step;
Poco::URI uri;
std::string method;
OutStreamCallback out_stream_callback;
ConnectionTimeouts timeouts;
const Poco::Net::HTTPBasicCredentials & credentials;
UInt64 max_redirects;
size_t buffer_size;
ReadSettings settings;
ReadWriteBufferFromHTTP::HTTPHeaderEntries http_header_entries;
const RemoteHostFilter * remote_host_filter;
bool delay_initialization;
bool use_external_buffer;
bool skip_not_found_url;
};
class UpdatablePooledSession : public UpdatableSessionBase<PooledHTTPSessionPtr>
{
using Parent = UpdatableSessionBase<PooledHTTPSessionPtr>;
@ -662,20 +811,14 @@ private:
size_t per_endpoint_pool_size;
public:
explicit UpdatablePooledSession(const Poco::URI uri,
const ConnectionTimeouts & timeouts_,
const UInt64 max_redirects_,
size_t per_endpoint_pool_size_)
: Parent(uri, timeouts_, max_redirects_)
, per_endpoint_pool_size { per_endpoint_pool_size_ }
explicit UpdatablePooledSession(
const Poco::URI uri, const ConnectionTimeouts & timeouts_, const UInt64 max_redirects_, size_t per_endpoint_pool_size_)
: Parent(uri, timeouts_, max_redirects_), per_endpoint_pool_size{per_endpoint_pool_size_}
{
session = makePooledHTTPSession(initial_uri, timeouts, per_endpoint_pool_size);
}
void buildNewSession(const Poco::URI & uri) override
{
session = makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size);
}
void buildNewSession(const Poco::URI & uri) override { session = makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size); }
};
class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatablePooledSession>>
@ -683,7 +826,8 @@ class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatablePooledSession>>;
public:
explicit PooledReadWriteBufferFromHTTP(Poco::URI uri_,
explicit PooledReadWriteBufferFromHTTP(
Poco::URI uri_,
const std::string & method_ = {},
OutStreamCallback out_stream_callback_ = {},
const ConnectionTimeouts & timeouts_ = {},
@ -691,7 +835,8 @@ public:
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
const UInt64 max_redirects = 0,
size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT)
: Parent(std::make_shared<UpdatablePooledSession>(uri_, timeouts_, max_redirects, max_connections_per_endpoint),
: Parent(
std::make_shared<UpdatablePooledSession>(uri_, timeouts_, max_redirects, max_connections_per_endpoint),
uri_,
credentials_,
method_,

View File

@ -372,8 +372,8 @@ SetPtr makeExplicitSet(
element_type = low_cardinality_type->getDictionaryType();
auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types);
if (prepared_sets.count(set_key))
return prepared_sets.at(set_key); /// Already prepared.
if (auto it = prepared_sets.find(set_key); it != prepared_sets.end())
return it->second; /// Already prepared.
Block block;
const auto & right_arg_func = std::dynamic_pointer_cast<ASTFunction>(right_arg);
@ -388,7 +388,7 @@ SetPtr makeExplicitSet(
set->insertFromBlock(block.getColumnsWithTypeAndName());
set->finishInsert();
prepared_sets[set_key] = set;
prepared_sets.emplace(set_key, set);
return set;
}
@ -707,7 +707,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat
if (tid != 0)
tuple_ast = tuple_ast->clone();
auto literal = std::make_shared<ASTLiteral>(UInt64(++tid));
auto literal = std::make_shared<ASTLiteral>(UInt64{++tid});
visit(*literal, literal, data);
auto func = makeASTFunction("tupleElement", tuple_ast, literal);
@ -814,13 +814,12 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
if (!data.only_consts)
{
/// We are in the part of the tree that we are not going to compute. You just need to define types.
/// Do not subquery and create sets. We replace "in*" function to "in*IgnoreSet".
/// Do not evaluate subquery and create sets. We replace "in*" function to "in*IgnoreSet".
auto argument_name = node.arguments->children.at(0)->getColumnName();
data.addFunction(
FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()),
{ argument_name, argument_name },
{argument_name, argument_name},
column_name);
}
return;
@ -1145,8 +1144,8 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
if (no_subqueries)
return {};
auto set_key = PreparedSetKey::forSubquery(*right_in_operand);
if (data.prepared_sets.count(set_key))
return data.prepared_sets.at(set_key);
if (auto it = data.prepared_sets.find(set_key); it != data.prepared_sets.end())
return it->second;
/// A special case is if the name of the table is specified on the right side of the IN statement,
/// and the table has the type Set (a previously prepared set).
@ -1160,7 +1159,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
StorageSet * storage_set = dynamic_cast<StorageSet *>(table.get());
if (storage_set)
{
data.prepared_sets[set_key] = storage_set->getSet();
data.prepared_sets.emplace(set_key, storage_set->getSet());
return storage_set->getSet();
}
}
@ -1174,7 +1173,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
/// If you already created a Set with the same subquery / table.
if (subquery_for_set.set)
{
data.prepared_sets[set_key] = subquery_for_set.set;
data.prepared_sets.emplace(set_key, subquery_for_set.set);
return subquery_for_set.set;
}
@ -1196,7 +1195,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
}
subquery_for_set.set = set;
data.prepared_sets[set_key] = set;
data.prepared_sets.emplace(set_key, set);
return set;
}
else

View File

@ -10,6 +10,7 @@
namespace DB
{
class ASTExpressionList;
class ASTFunction;
class ExpressionActions;
@ -89,10 +90,7 @@ struct ScopeStack : WithContext
void addColumn(ColumnWithTypeAndName column);
void addAlias(const std::string & name, std::string alias);
void addArrayJoin(const std::string & source_name, std::string result_name);
void addFunction(
const FunctionOverloadResolverPtr & function,
const Names & argument_names,
std::string result_name);
void addFunction(const FunctionOverloadResolverPtr & function, const Names & argument_names, std::string result_name);
ActionsDAGPtr popLevel();

View File

@ -26,10 +26,10 @@ extern const int CANNOT_LOAD_CATBOOST_MODEL;
extern const int CANNOT_APPLY_CATBOOST_MODEL;
}
/// CatBoost wrapper interface functions.
struct CatBoostWrapperAPI
class CatBoostWrapperAPI
{
public:
using ModelCalcerHandle = void;
ModelCalcerHandle * (* ModelCalcerCreate)(); // NOLINT
@ -68,9 +68,6 @@ struct CatBoostWrapperAPI
};
namespace
{
class CatBoostModelHolder
{
private:
@ -84,7 +81,61 @@ public:
};
class CatBoostModelImpl : public ICatBoostModel
/// Holds CatBoost wrapper library and provides wrapper interface.
class CatBoostLibHolder
{
public:
explicit CatBoostLibHolder(std::string lib_path_) : lib_path(std::move(lib_path_)), lib(lib_path) { initAPI(); }
const CatBoostWrapperAPI & getAPI() const { return api; }
const std::string & getCurrentPath() const { return lib_path; }
private:
CatBoostWrapperAPI api;
std::string lib_path;
SharedLibrary lib;
void initAPI()
{
load(api.ModelCalcerCreate, "ModelCalcerCreate");
load(api.ModelCalcerDelete, "ModelCalcerDelete");
load(api.GetErrorString, "GetErrorString");
load(api.LoadFullModelFromFile, "LoadFullModelFromFile");
load(api.CalcModelPredictionFlat, "CalcModelPredictionFlat");
load(api.CalcModelPrediction, "CalcModelPrediction");
load(api.CalcModelPredictionWithHashedCatFeatures, "CalcModelPredictionWithHashedCatFeatures");
load(api.GetStringCatFeatureHash, "GetStringCatFeatureHash");
load(api.GetIntegerCatFeatureHash, "GetIntegerCatFeatureHash");
load(api.GetFloatFeaturesCount, "GetFloatFeaturesCount");
load(api.GetCatFeaturesCount, "GetCatFeaturesCount");
tryLoad(api.CheckModelMetadataHasKey, "CheckModelMetadataHasKey");
tryLoad(api.GetModelInfoValueSize, "GetModelInfoValueSize");
tryLoad(api.GetModelInfoValue, "GetModelInfoValue");
tryLoad(api.GetTreeCount, "GetTreeCount");
tryLoad(api.GetDimensionsCount, "GetDimensionsCount");
}
template <typename T>
void load(T& func, const std::string & name) { func = lib.get<T>(name); }
template <typename T>
void tryLoad(T& func, const std::string & name) { func = lib.tryGet<T>(name); }
};
std::shared_ptr<CatBoostLibHolder> getCatBoostWrapperHolder(const std::string & lib_path)
{
static std::shared_ptr<CatBoostLibHolder> ptr;
static std::mutex mutex;
std::lock_guard lock(mutex);
if (!ptr || ptr->getCurrentPath() != lib_path)
ptr = std::make_shared<CatBoostLibHolder>(lib_path);
return ptr;
}
class CatBoostModelImpl
{
public:
CatBoostModelImpl(const CatBoostWrapperAPI * api_, const std::string & model_path) : api(api_)
@ -92,13 +143,15 @@ public:
handle = std::make_unique<CatBoostModelHolder>(api);
if (!handle)
{
std::string msg = "Cannot create CatBoost model: ";
throw Exception(msg + api->GetErrorString(), ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL);
throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL,
"Cannot create CatBoost model: {}",
api->GetErrorString());
}
if (!api->LoadFullModelFromFile(handle->get(), model_path.c_str()))
{
std::string msg = "Cannot load CatBoost model: ";
throw Exception(msg + api->GetErrorString(), ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL);
throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL,
"Cannot load CatBoost model: {}",
api->GetErrorString());
}
float_features_count = api->GetFloatFeaturesCount(handle->get());
@ -108,32 +161,22 @@ public:
tree_count = api->GetDimensionsCount(handle->get());
}
ColumnPtr evaluate(const ColumnRawPtrs & columns) const override
ColumnPtr evaluate(const ColumnRawPtrs & columns) const
{
if (columns.empty())
throw Exception("Got empty columns list for CatBoost model.", ErrorCodes::BAD_ARGUMENTS);
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got empty columns list for CatBoost model.");
if (columns.size() != float_features_count + cat_features_count)
{
std::string msg;
{
WriteBufferFromString buffer(msg);
buffer << "Number of columns is different with number of features: ";
buffer << columns.size() << " vs " << float_features_count << " + " << cat_features_count;
}
throw Exception(msg, ErrorCodes::BAD_ARGUMENTS);
}
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
float_features_count,
cat_features_count);
for (size_t i = 0; i < float_features_count; ++i)
{
if (!columns[i]->isNumeric())
{
std::string msg;
{
WriteBufferFromString buffer(msg);
buffer << "Column " << i << " should be numeric to make float feature.";
}
throw Exception(msg, ErrorCodes::BAD_ARGUMENTS);
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric to make float feature.", i);
}
}
@ -142,16 +185,13 @@ public:
{
const auto * column = columns[i];
if (column->isNumeric())
{
cat_features_are_strings = false;
}
else if (!(typeid_cast<const ColumnString *>(column)
|| typeid_cast<const ColumnFixedString *>(column)))
{
std::string msg;
{
WriteBufferFromString buffer(msg);
buffer << "Column " << i << " should be numeric or string.";
}
throw Exception(msg, ErrorCodes::BAD_ARGUMENTS);
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric or string.", i);
}
}
@ -187,9 +227,9 @@ public:
return ColumnTuple::create(std::move(mutable_columns));
}
size_t getFloatFeaturesCount() const override { return float_features_count; }
size_t getCatFeaturesCount() const override { return cat_features_count; }
size_t getTreeCount() const override { return tree_count; }
size_t getFloatFeaturesCount() const { return float_features_count; }
size_t getCatFeaturesCount() const { return cat_features_count; }
size_t getTreeCount() const { return tree_count; }
private:
std::unique_ptr<CatBoostModelHolder> handle;
@ -435,66 +475,6 @@ private:
}
};
/// Holds CatBoost wrapper library and provides wrapper interface.
class CatBoostLibHolder: public CatBoostWrapperAPIProvider
{
public:
explicit CatBoostLibHolder(std::string lib_path_) : lib_path(std::move(lib_path_)), lib(lib_path) { initAPI(); }
const CatBoostWrapperAPI & getAPI() const override { return api; }
const std::string & getCurrentPath() const { return lib_path; }
private:
CatBoostWrapperAPI api;
std::string lib_path;
SharedLibrary lib;
void initAPI();
template <typename T>
void load(T& func, const std::string & name) { func = lib.get<T>(name); }
template <typename T>
void tryLoad(T& func, const std::string & name) { func = lib.tryGet<T>(name); }
};
void CatBoostLibHolder::initAPI()
{
load(api.ModelCalcerCreate, "ModelCalcerCreate");
load(api.ModelCalcerDelete, "ModelCalcerDelete");
load(api.GetErrorString, "GetErrorString");
load(api.LoadFullModelFromFile, "LoadFullModelFromFile");
load(api.CalcModelPredictionFlat, "CalcModelPredictionFlat");
load(api.CalcModelPrediction, "CalcModelPrediction");
load(api.CalcModelPredictionWithHashedCatFeatures, "CalcModelPredictionWithHashedCatFeatures");
load(api.GetStringCatFeatureHash, "GetStringCatFeatureHash");
load(api.GetIntegerCatFeatureHash, "GetIntegerCatFeatureHash");
load(api.GetFloatFeaturesCount, "GetFloatFeaturesCount");
load(api.GetCatFeaturesCount, "GetCatFeaturesCount");
tryLoad(api.CheckModelMetadataHasKey, "CheckModelMetadataHasKey");
tryLoad(api.GetModelInfoValueSize, "GetModelInfoValueSize");
tryLoad(api.GetModelInfoValue, "GetModelInfoValue");
tryLoad(api.GetTreeCount, "GetTreeCount");
tryLoad(api.GetDimensionsCount, "GetDimensionsCount");
}
std::shared_ptr<CatBoostLibHolder> getCatBoostWrapperHolder(const std::string & lib_path)
{
static std::shared_ptr<CatBoostLibHolder> ptr;
static std::mutex mutex;
std::lock_guard lock(mutex);
if (!ptr || ptr->getCurrentPath() != lib_path)
ptr = std::make_shared<CatBoostLibHolder>(lib_path);
return ptr;
}
}
CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::string lib_path_,
const ExternalLoadableLifetime & lifetime_)
: name(std::move(name_)), model_path(std::move(model_path_)), lib_path(std::move(lib_path_)), lifetime(lifetime_)
@ -502,43 +482,28 @@ CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::st
api_provider = getCatBoostWrapperHolder(lib_path);
api = &api_provider->getAPI();
model = std::make_unique<CatBoostModelImpl>(api, model_path);
float_features_count = model->getFloatFeaturesCount();
cat_features_count = model->getCatFeaturesCount();
tree_count = model->getTreeCount();
}
const ExternalLoadableLifetime & CatBoostModel::getLifetime() const
{
return lifetime;
}
bool CatBoostModel::isModified() const
{
return true;
}
std::shared_ptr<const IExternalLoadable> CatBoostModel::clone() const
{
return std::make_shared<CatBoostModel>(name, model_path, lib_path, lifetime);
}
CatBoostModel::~CatBoostModel() = default;
size_t CatBoostModel::getFloatFeaturesCount() const
{
return float_features_count;
return model->getFloatFeaturesCount();
}
size_t CatBoostModel::getCatFeaturesCount() const
{
return cat_features_count;
return model->getCatFeaturesCount();
}
size_t CatBoostModel::getTreeCount() const
{
return tree_count;
return model->getTreeCount();
}
DataTypePtr CatBoostModel::getReturnType() const
{
size_t tree_count = getTreeCount();
auto type = std::make_shared<DataTypeFloat64>();
if (tree_count == 1)
return type;
@ -552,6 +517,7 @@ ColumnPtr CatBoostModel::evaluate(const ColumnRawPtrs & columns) const
{
if (!model)
throw Exception("CatBoost model was not loaded.", ErrorCodes::LOGICAL_ERROR);
return model->evaluate(columns);
}

View File

@ -8,47 +8,32 @@
namespace DB
{
/// CatBoost wrapper interface functions.
struct CatBoostWrapperAPI;
class CatBoostWrapperAPIProvider
{
public:
virtual ~CatBoostWrapperAPIProvider() = default;
virtual const CatBoostWrapperAPI & getAPI() const = 0;
};
/// CatBoost model interface.
class ICatBoostModel
{
public:
virtual ~ICatBoostModel() = default;
/// Evaluate model. Use first `float_features_count` columns as float features,
/// the others `cat_features_count` as categorical features.
virtual ColumnPtr evaluate(const ColumnRawPtrs & columns) const = 0;
virtual size_t getFloatFeaturesCount() const = 0;
virtual size_t getCatFeaturesCount() const = 0;
virtual size_t getTreeCount() const = 0;
};
class CatBoostLibHolder;
class CatBoostWrapperAPI;
class CatBoostModelImpl;
class IDataType;
using DataTypePtr = std::shared_ptr<const IDataType>;
/// General ML model evaluator interface.
class IModel : public IExternalLoadable
class IMLModel : public IExternalLoadable
{
public:
IMLModel() = default;
virtual ColumnPtr evaluate(const ColumnRawPtrs & columns) const = 0;
virtual std::string getTypeName() const = 0;
virtual DataTypePtr getReturnType() const = 0;
virtual ~IMLModel() override = default;
};
class CatBoostModel : public IModel
class CatBoostModel : public IMLModel
{
public:
CatBoostModel(std::string name, std::string model_path,
std::string lib_path, const ExternalLoadableLifetime & lifetime);
~CatBoostModel() override;
ColumnPtr evaluate(const ColumnRawPtrs & columns) const override;
std::string getTypeName() const override { return "catboost"; }
@ -59,29 +44,28 @@ public:
/// IExternalLoadable interface.
const ExternalLoadableLifetime & getLifetime() const override;
const ExternalLoadableLifetime & getLifetime() const override { return lifetime; }
std::string getLoadableName() const override { return name; }
bool supportUpdates() const override { return true; }
bool isModified() const override;
bool isModified() const override { return true; }
std::shared_ptr<const IExternalLoadable> clone() const override;
std::shared_ptr<const IExternalLoadable> clone() const override
{
return std::make_shared<CatBoostModel>(name, model_path, lib_path, lifetime);
}
private:
const std::string name;
std::string model_path;
std::string lib_path;
ExternalLoadableLifetime lifetime;
std::shared_ptr<CatBoostWrapperAPIProvider> api_provider;
std::shared_ptr<CatBoostLibHolder> api_provider;
const CatBoostWrapperAPI * api;
std::unique_ptr<ICatBoostModel> model;
size_t float_features_count;
size_t cat_features_count;
size_t tree_count;
std::unique_ptr<CatBoostModelImpl> model;
void init();
};

View File

@ -132,7 +132,9 @@ Cluster::Address::Address(
bool secure_,
Int64 priority_,
UInt32 shard_index_,
UInt32 replica_index_)
UInt32 replica_index_,
String cluster_name_,
String cluster_secret_)
: user(user_), password(password_)
{
bool can_be_local = true;
@ -164,6 +166,8 @@ Cluster::Address::Address(
is_local = can_be_local && isLocal(clickhouse_port);
shard_index = shard_index_;
replica_index = replica_index_;
cluster = cluster_name_;
cluster_secret = cluster_secret_;
}
@ -537,10 +541,14 @@ Cluster::Cluster(
bool treat_local_as_remote,
bool treat_local_port_as_remote,
bool secure,
Int64 priority)
Int64 priority,
String cluster_name,
String cluster_secret)
{
UInt32 current_shard_num = 1;
secret = cluster_secret;
for (const auto & shard : names)
{
Addresses current;
@ -554,7 +562,9 @@ Cluster::Cluster(
secure,
priority,
current_shard_num,
current.size() + 1);
current.size() + 1,
cluster_name,
cluster_secret);
addresses_with_failover.emplace_back(current);
@ -690,6 +700,9 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
}
}
secret = from.secret;
name = from.name;
initMisc();
}
@ -704,6 +717,9 @@ Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector
addresses_with_failover.emplace_back(from.addresses_with_failover.at(index));
}
secret = from.secret;
name = from.name;
initMisc();
}

View File

@ -55,7 +55,9 @@ public:
bool treat_local_as_remote,
bool treat_local_port_as_remote,
bool secure = false,
Int64 priority = 1);
Int64 priority = 1,
String cluster_name = "",
String cluster_secret = "");
Cluster(const Cluster &)= delete;
Cluster & operator=(const Cluster &) = delete;
@ -127,7 +129,9 @@ public:
bool secure_ = false,
Int64 priority_ = 1,
UInt32 shard_index_ = 0,
UInt32 replica_index_ = 0);
UInt32 replica_index_ = 0,
String cluster_name = "",
String cluster_secret_ = "");
/// Returns 'escaped_host_name:port'
String toString() const;

View File

@ -350,6 +350,12 @@ void DDLWorker::scheduleTasks(bool reinitialized)
bool maybe_concurrently_deleting = task && !zookeeper->exists(fs::path(task->entry_path) / "active");
return task && !maybe_concurrently_deleting && !maybe_currently_processing;
}
else if (last_skipped_entry_name.has_value() && !queue_fully_loaded_after_initialization_debug_helper)
{
/// If connection was lost during queue loading
/// we may start processing from finished task (because we don't know yet that it's finished) and it's ok.
return false;
}
else
{
/// Return true if entry should not be scheduled.
@ -365,7 +371,11 @@ void DDLWorker::scheduleTasks(bool reinitialized)
String reason;
auto task = initAndCheckTask(entry_name, reason, zookeeper);
if (!task)
if (task)
{
queue_fully_loaded_after_initialization_debug_helper = true;
}
else
{
LOG_DEBUG(log, "Will not execute task {}: {}", entry_name, reason);
updateMaxDDLEntryID(entry_name);

View File

@ -131,6 +131,9 @@ protected:
std::optional<String> first_failed_task_name;
std::list<DDLTaskPtr> current_tasks;
/// This flag is needed for debug assertions only
bool queue_fully_loaded_after_initialization_debug_helper = false;
Coordination::Stat queue_node_stat;
std::shared_ptr<Poco::Event> queue_updated_event = std::make_shared<Poco::Event>();
std::shared_ptr<Poco::Event> cleanup_event = std::make_shared<Poco::Event>();

View File

@ -100,20 +100,9 @@ bool checkPositionalArguments(ASTPtr & argument, const ASTSelectQuery * select_q
{
auto columns = select_query->select()->children;
const auto * group_by_expr_with_alias = dynamic_cast<const ASTWithAlias *>(argument.get());
if (group_by_expr_with_alias && !group_by_expr_with_alias->alias.empty())
{
for (const auto & column : columns)
{
const auto * col_with_alias = dynamic_cast<const ASTWithAlias *>(column.get());
if (col_with_alias)
{
const auto & alias = col_with_alias->alias;
if (!alias.empty() && alias == group_by_expr_with_alias->alias)
const auto * expr_with_alias = dynamic_cast<const ASTWithAlias *>(argument.get());
if (expr_with_alias && !expr_with_alias->alias.empty())
return false;
}
}
}
const auto * ast_literal = typeid_cast<const ASTLiteral *>(argument.get());
if (!ast_literal)
@ -130,7 +119,7 @@ bool checkPositionalArguments(ASTPtr & argument, const ASTSelectQuery * select_q
pos, columns.size());
const auto & column = columns[--pos];
if (typeid_cast<const ASTIdentifier *>(column.get()))
if (typeid_cast<const ASTIdentifier *>(column.get()) || typeid_cast<const ASTLiteral *>(column.get()))
{
argument = column->clone();
}
@ -259,7 +248,7 @@ NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & a
if (!array_join_expression_list)
return src_columns;
getRootActionsNoMakeSet(array_join_expression_list, true, actions, false);
getRootActionsNoMakeSet(array_join_expression_list, actions, false);
auto array_join = addMultipleArrayJoinAction(actions, is_array_join_left);
auto sample_columns = actions->getResultColumns();
@ -294,7 +283,7 @@ NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const
const ASTTablesInSelectQueryElement * join = select_query->join();
if (join)
{
getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), true, actions, false);
getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), actions, false);
auto sample_columns = actions->getNamesAndTypesList();
syntax->analyzed_join->addJoinedColumnsAndCorrectTypes(sample_columns, true);
actions = std::make_shared<ActionsDAG>(sample_columns);
@ -332,14 +321,14 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions)
{
NameSet unique_keys;
ASTs & group_asts = group_by_ast->children;
for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i)
for (ssize_t i = 0; i < static_cast<ssize_t>(group_asts.size()); ++i)
{
ssize_t size = group_asts.size();
if (getContext()->getSettingsRef().enable_positional_arguments)
replaceForPositionalArguments(group_asts[i], select_query, ASTSelectQuery::Expression::GROUP_BY);
getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);
getRootActionsNoMakeSet(group_asts[i], temp_actions, false);
const auto & column_name = group_asts[i]->getColumnName();
@ -405,8 +394,8 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global)
{
if (do_global)
{
GlobalSubqueriesVisitor::Data subqueries_data(getContext(), subquery_depth, isRemoteStorage(),
external_tables, subqueries_for_sets, has_global_subqueries);
GlobalSubqueriesVisitor::Data subqueries_data(
getContext(), subquery_depth, isRemoteStorage(), external_tables, subqueries_for_sets, has_global_subqueries);
GlobalSubqueriesVisitor(subqueries_data).visit(query);
}
}
@ -416,7 +405,7 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_
{
auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name);
if (prepared_sets.count(set_key))
if (prepared_sets.contains(set_key))
return; /// Already prepared.
if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name))
@ -509,33 +498,62 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
}
void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts)
void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts)
{
LogAST log;
ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth,
sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets,
no_subqueries, false, only_consts, !isRemoteStorage());
ActionsVisitor::Data visitor_data(
getContext(),
settings.size_limits_for_set,
subquery_depth,
sourceColumns(),
std::move(actions),
prepared_sets,
subqueries_for_sets,
no_makeset_for_subqueries,
false /* no_makeset */,
only_consts,
!isRemoteStorage() /* create_source_for_in */);
ActionsVisitor(visitor_data, log.stream()).visit(ast);
actions = visitor_data.getActions();
}
void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts)
void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts)
{
LogAST log;
ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth,
sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets,
no_subqueries, true, only_consts, !isRemoteStorage());
ActionsVisitor::Data visitor_data(
getContext(),
settings.size_limits_for_set,
subquery_depth,
sourceColumns(),
std::move(actions),
prepared_sets,
subqueries_for_sets,
true /* no_makeset_for_subqueries, no_makeset implies no_makeset_for_subqueries */,
true /* no_makeset */,
only_consts,
!isRemoteStorage() /* create_source_for_in */);
ActionsVisitor(visitor_data, log.stream()).visit(ast);
actions = visitor_data.getActions();
}
void ExpressionAnalyzer::getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts)
void ExpressionAnalyzer::getRootActionsForHaving(
const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts)
{
LogAST log;
ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth,
sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets,
no_subqueries, false, only_consts, true);
ActionsVisitor::Data visitor_data(
getContext(),
settings.size_limits_for_set,
subquery_depth,
sourceColumns(),
std::move(actions),
prepared_sets,
subqueries_for_sets,
no_makeset_for_subqueries,
false /* no_makeset */,
only_consts,
true /* create_source_for_in */);
ActionsVisitor(visitor_data, log.stream()).visit(ast);
actions = visitor_data.getActions();
}
@ -547,7 +565,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr
{
AggregateDescription aggregate;
if (node->arguments)
getRootActionsNoMakeSet(node->arguments, true, actions);
getRootActionsNoMakeSet(node->arguments, actions);
aggregate.column_name = node->getColumnName();
@ -746,8 +764,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
// Requiring a constant reference to a shared pointer to non-const AST
// doesn't really look sane, but the visitor does indeed require it.
// Hence we clone the node (not very sane either, I know).
getRootActionsNoMakeSet(window_function.function_node->clone(),
true, actions);
getRootActionsNoMakeSet(window_function.function_node->clone(), actions);
const ASTs & arguments
= window_function.function_node->arguments->children;
@ -867,8 +884,7 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi
auto array_join = addMultipleArrayJoinAction(step.actions(), is_array_join_left);
before_array_join = chain.getLastActions();
chain.steps.push_back(std::make_unique<ExpressionActionsChain::ArrayJoinStep>(
array_join, step.getResultColumns()));
chain.steps.push_back(std::make_unique<ExpressionActionsChain::ArrayJoinStep>(array_join, step.getResultColumns()));
chain.addStep();
@ -1099,8 +1115,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
}
}
chain.steps.emplace_back(std::make_unique<ExpressionActionsChain::ExpressionActionsStep>(
std::make_shared<ActionsDAG>(std::move(columns))));
chain.steps.emplace_back(
std::make_unique<ExpressionActionsChain::ExpressionActionsStep>(std::make_shared<ActionsDAG>(std::move(columns))));
chain.steps.back()->additional_input = std::move(unused_source_columns);
chain.getLastActions();
chain.addStep();
@ -1210,8 +1226,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
// recursively together with (1b) as ASTFunction::window_definition.
if (getSelectQuery()->window())
{
getRootActionsNoMakeSet(getSelectQuery()->window(),
true /* no_subqueries */, step.actions());
getRootActionsNoMakeSet(getSelectQuery()->window(), step.actions());
}
for (const auto & [_, w] : window_descriptions)
@ -1222,8 +1237,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
// definitions (1a).
// Requiring a constant reference to a shared pointer to non-const AST
// doesn't really look sane, but the visitor does indeed require it.
getRootActionsNoMakeSet(f.function_node->clone(),
true /* no_subqueries */, step.actions());
getRootActionsNoMakeSet(f.function_node->clone(), step.actions());
// (2b) Required function argument columns.
for (const auto & a : f.function_node->arguments->children)
@ -1299,8 +1313,10 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
if (getContext()->getSettingsRef().enable_positional_arguments)
{
replaceForPositionalArguments(ast->children.at(0), select_query, ASTSelectQuery::Expression::ORDER_BY);
}
}
getRootActions(select_query->orderBy(), only_types, step.actions());
@ -1456,7 +1472,7 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r
alias = name;
result_columns.emplace_back(name, alias);
result_names.push_back(alias);
getRootActions(ast, false, actions_dag);
getRootActions(ast, false /* no_makeset_for_subqueries */, actions_dag);
}
if (add_aliases)
@ -1496,7 +1512,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const ColumnsWithTypeAn
{
auto actions = std::make_shared<ActionsDAG>(constant_inputs);
getRootActions(query, true, actions, true);
getRootActions(query, true /* no_makeset_for_subqueries */, actions, true /* only_consts */);
return std::make_shared<ExpressionActions>(actions, ExpressionActionsSettings::fromContext(getContext()));
}

View File

@ -172,15 +172,15 @@ protected:
ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const;
void getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
/** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in
* analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the
* prepared sets would not be applicable for MergeTree index optimization.
*/
void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
void getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts = false);
void getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
void getRootActionsForHaving(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
/** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,
* Create a set of columns aggregated_columns resulting after the aggregation, if any,

View File

@ -15,14 +15,14 @@ namespace DB
class ExternalModelsLoader : public ExternalLoader, WithContext
{
public:
using ModelPtr = std::shared_ptr<const IModel>;
using ModelPtr = std::shared_ptr<const IMLModel>;
/// Models will be loaded immediately and then will be updated in separate thread, each 'reload_period' seconds.
explicit ExternalModelsLoader(ContextPtr context_);
ModelPtr getModel(const std::string & model_name) const
{
return std::static_pointer_cast<const IModel>(load(model_name));
return std::static_pointer_cast<const IMLModel>(load(model_name));
}
void reloadModel(const std::string & model_name) const

View File

@ -10,6 +10,7 @@
#include <Interpreters/interpretSubquery.h>
#include <Interpreters/SubqueryForSet.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTSubquery.h>
@ -17,7 +18,11 @@
#include <Parsers/IAST.h>
#include <Processors/Executors/CompletedPipelineExecutor.h>
#include <Processors/Sinks/SinkToStorage.h>
#include <Processors/QueryPlan/QueryPlan.h>
#include <Common/typeid_cast.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/ConstraintsDescription.h>
#include <Storages/IStorage.h>
namespace DB
{
@ -34,7 +39,6 @@ public:
{
size_t subquery_depth;
bool is_remote;
size_t external_table_id;
TemporaryTablesMapping & external_tables;
SubqueriesForSets & subqueries_for_sets;
bool & has_global_subqueries;
@ -49,7 +53,6 @@ public:
: WithContext(context_)
, subquery_depth(subquery_depth_)
, is_remote(is_remote_)
, external_table_id(1)
, external_tables(tables)
, subqueries_for_sets(subqueries_for_sets_)
, has_global_subqueries(has_global_subqueries_)
@ -92,48 +95,33 @@ public:
{
/// If this is already an external table, you do not need to add anything. Just remember its presence.
auto temporary_table_name = getIdentifierName(subquery_or_table_name);
bool exists_in_local_map = external_tables.end() != external_tables.find(temporary_table_name);
bool exists_in_local_map = external_tables.contains(temporary_table_name);
bool exists_in_context = static_cast<bool>(getContext()->tryResolveStorageID(
StorageID("", temporary_table_name), Context::ResolveExternal));
if (exists_in_local_map || exists_in_context)
return;
}
String external_table_name = subquery_or_table_name->tryGetAlias();
if (external_table_name.empty())
String alias = subquery_or_table_name->tryGetAlias();
String external_table_name;
if (alias.empty())
{
/// Generate the name for the external table.
external_table_name = "_data" + toString(external_table_id);
while (external_tables.count(external_table_name))
{
++external_table_id;
external_table_name = "_data" + toString(external_table_id);
auto hash = subquery_or_table_name->getTreeHash();
external_table_name = fmt::format("_data_{}_{}", hash.first, hash.second);
}
}
auto interpreter = interpretSubquery(subquery_or_table_name, getContext(), subquery_depth, {});
Block sample = interpreter->getSampleBlock();
NamesAndTypesList columns = sample.getNamesAndTypesList();
auto external_storage_holder = std::make_shared<TemporaryTableHolder>(
getContext(),
ColumnsDescription{columns},
ConstraintsDescription{},
nullptr,
/*create_for_global_subquery*/ true);
StoragePtr external_storage = external_storage_holder->getTable();
else
external_table_name = alias;
/** We replace the subquery with the name of the temporary table.
* It is in this form, the request will go to the remote server.
* This temporary table will go to the remote server, and on its side,
* instead of doing a subquery, you just need to read it.
* TODO We can do better than using alias to name external tables
*/
auto database_and_table_name = std::make_shared<ASTTableIdentifier>(external_table_name);
if (set_alias)
{
String alias = subquery_or_table_name->tryGetAlias();
if (auto * table_name = subquery_or_table_name->as<ASTTableIdentifier>())
if (alias.empty())
alias = table_name->shortName();
@ -151,8 +139,27 @@ public:
else
ast = database_and_table_name;
external_tables[external_table_name] = external_storage_holder;
if (external_tables.contains(external_table_name))
return;
auto interpreter = interpretSubquery(subquery_or_table_name, getContext(), subquery_depth, {});
Block sample = interpreter->getSampleBlock();
NamesAndTypesList columns = sample.getNamesAndTypesList();
auto external_storage_holder = std::make_shared<TemporaryTableHolder>(
getContext(),
ColumnsDescription{columns},
ConstraintsDescription{},
nullptr,
/*create_for_global_subquery*/ true);
StoragePtr external_storage = external_storage_holder->getTable();
external_tables.emplace(external_table_name, external_storage_holder);
/// We need to materialize external tables immediately because reading from distributed
/// tables might generate local plans which can refer to external tables during index
/// analysis. It's too late to populate the external table via CreatingSetsTransform.
if (getContext()->getSettingsRef().use_index_for_in_with_subqueries)
{
auto external_table = external_storage_holder->getTable();

View File

@ -962,18 +962,29 @@ public:
/// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin.
for (size_t j = 0, size = right_indexes.size(); j < size; ++j)
{
const auto & column = *block.getByPosition(right_indexes[j]).column;
if (auto * nullable_col = typeid_cast<ColumnNullable *>(columns[j].get()); nullable_col && !column.isNullable())
nullable_col->insertFromNotNullable(column, row_num);
auto column_from_block = block.getByPosition(right_indexes[j]);
if (type_name[j].type->lowCardinality() != column_from_block.type->lowCardinality())
{
JoinCommon::changeLowCardinalityInplace(column_from_block);
}
if (auto * nullable_col = typeid_cast<ColumnNullable *>(columns[j].get());
nullable_col && !column_from_block.column->isNullable())
nullable_col->insertFromNotNullable(*column_from_block.column, row_num);
else
columns[j]->insertFrom(column, row_num);
columns[j]->insertFrom(*column_from_block.column, row_num);
}
}
else
{
for (size_t j = 0, size = right_indexes.size(); j < size; ++j)
{
columns[j]->insertFrom(*block.getByPosition(right_indexes[j]).column, row_num);
auto column_from_block = block.getByPosition(right_indexes[j]);
if (type_name[j].type->lowCardinality() != column_from_block.type->lowCardinality())
{
JoinCommon::changeLowCardinalityInplace(column_from_block);
}
columns[j]->insertFrom(*column_from_block.column, row_num);
}
}
}
@ -1013,6 +1024,7 @@ private:
void addColumn(const ColumnWithTypeAndName & src_column, const std::string & qualified_name)
{
columns.push_back(src_column.column->cloneEmpty());
columns.back()->reserve(src_column.column->size());
type_name.emplace_back(src_column.type, src_column.name, qualified_name);
@ -1237,16 +1249,16 @@ NO_INLINE IColumn::Filter joinRightColumns(
{
const IColumn & left_asof_key = added_columns.leftAsofKey();
auto [block, row_num] = mapped->findAsof(left_asof_key, i);
if (block)
auto row_ref = mapped->findAsof(left_asof_key, i);
if (row_ref.block)
{
setUsed<need_filter>(filter, i);
if constexpr (multiple_disjuncts)
used_flags.template setUsed<jf.need_flags, multiple_disjuncts>(block, row_num, 0);
used_flags.template setUsed<jf.need_flags, multiple_disjuncts>(row_ref.block, row_ref.row_num, 0);
else
used_flags.template setUsed<jf.need_flags, multiple_disjuncts>(find_result);
added_columns.appendFromBlock<jf.add_missing>(*block, row_num);
added_columns.appendFromBlock<jf.add_missing>(*row_ref.block, row_ref.row_num);
}
else
addNotFoundRow<jf.add_missing, jf.need_replication>(added_columns, current_offset);

View File

@ -1180,11 +1180,10 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
/// old instance of the storage. For example, AsynchronousMetrics may cause ATTACH to fail,
/// so we allow waiting here. If database_atomic_wait_for_drop_and_detach_synchronously is disabled
/// and old storage instance still exists it will throw exception.
bool throw_if_table_in_use = getContext()->getSettingsRef().database_atomic_wait_for_drop_and_detach_synchronously;
if (throw_if_table_in_use)
database->checkDetachedTableNotInUse(create.uuid);
else
if (getContext()->getSettingsRef().database_atomic_wait_for_drop_and_detach_synchronously)
database->waitDetachedTableNotInUse(create.uuid);
else
database->checkDetachedTableNotInUse(create.uuid);
}
StoragePtr res;

View File

@ -359,6 +359,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
table_lock.reset();
table_id = StorageID::createEmpty();
metadata_snapshot = nullptr;
storage_snapshot = nullptr;
}
}
@ -1241,10 +1242,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
}
preliminary_sort();
// If there is no global subqueries, we can run subqueries only when receive them on server.
if (!query_analyzer->hasGlobalSubqueries() && !subqueries_for_sets.empty())
executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets);
}
if (expressions.second_stage || from_aggregation_stage)
@ -1427,7 +1424,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
}
}
if (!subqueries_for_sets.empty() && (expressions.hasHaving() || query_analyzer->hasGlobalSubqueries()))
if (!subqueries_for_sets.empty())
executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets);
}
@ -1891,7 +1888,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
&& limit_length <= std::numeric_limits<UInt64>::max() - limit_offset
&& limit_length + limit_offset < max_block_size)
{
max_block_size = std::max(UInt64(1), limit_length + limit_offset);
max_block_size = std::max(UInt64{1}, limit_length + limit_offset);
max_threads_execute_query = max_streams = 1;
}
@ -2577,11 +2574,11 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan)
void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_plan, SubqueriesForSets & subqueries_for_sets)
{
const auto & input_order_info = query_info.input_order_info
? query_info.input_order_info
: (query_info.projection ? query_info.projection->input_order_info : nullptr);
if (input_order_info)
executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins");
// const auto & input_order_info = query_info.input_order_info
// ? query_info.input_order_info
// : (query_info.projection ? query_info.projection->input_order_info : nullptr);
// if (input_order_info)
// executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins");
const Settings & settings = context->getSettingsRef();

View File

@ -351,15 +351,6 @@ public:
max_size = max_size_;
}
// Before calling this method you should be sure
// that lock is acquired.
template <typename F>
void processEachQueryStatus(F && func) const
{
for (auto && query : processes)
func(query);
}
void setMaxInsertQueriesAmount(size_t max_insert_queries_amount_)
{
std::lock_guard lock(mutex);

View File

@ -1,5 +1,6 @@
#include <Interpreters/RowRefs.h>
#include <Common/RadixSort.h>
#include <AggregateFunctions/Helpers.h>
#include <Columns/IColumn.h>
#include <DataTypes/IDataType.h>
@ -44,38 +45,52 @@ class SortedLookupVector : public SortedLookupVectorBase
{
struct Entry
{
/// We don't store a RowRef and instead keep it's members separately (and return a tuple) to reduce the memory usage.
/// For example, for sizeof(T) == 4 => sizeof(Entry) == 16 (while before it would be 20). Then when you put it into a vector, the effect is even greater
decltype(RowRef::block) block;
decltype(RowRef::row_num) row_num;
TKey asof_value;
TKey value;
uint32_t row_ref_index;
Entry() = delete;
Entry(TKey v, const Block * b, size_t r) : block(b), row_num(r), asof_value(v) { }
Entry(TKey value_, uint32_t row_ref_index_)
: value(value_)
, row_ref_index(row_ref_index_)
{ }
bool operator<(const Entry & other) const { return asof_value < other.asof_value; }
};
struct LessEntryOperator
{
ALWAYS_INLINE bool operator()(const Entry & lhs, const Entry & rhs) const
{
return lhs.value < rhs.value;
}
};
struct GreaterEntryOperator
{
bool operator()(Entry const & a, Entry const & b) const { return a.asof_value > b.asof_value; }
ALWAYS_INLINE bool operator()(const Entry & lhs, const Entry & rhs) const
{
return lhs.value > rhs.value;
}
};
public:
using Base = std::vector<Entry>;
using Keys = std::vector<TKey>;
static constexpr bool isDescending = (inequality == ASOF::Inequality::Greater || inequality == ASOF::Inequality::GreaterOrEquals);
static constexpr bool isStrict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater);
using Entries = PaddedPODArray<Entry>;
using RowRefs = PaddedPODArray<RowRef>;
static constexpr bool is_descending = (inequality == ASOF::Inequality::Greater || inequality == ASOF::Inequality::GreaterOrEquals);
static constexpr bool is_strict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater);
void insert(const IColumn & asof_column, const Block * block, size_t row_num) override
{
using ColumnType = ColumnVectorOrDecimal<TKey>;
const auto & column = assert_cast<const ColumnType &>(asof_column);
TKey k = column.getElement(row_num);
TKey key = column.getElement(row_num);
assert(!sorted.load(std::memory_order_acquire));
array.emplace_back(k, block, row_num);
entries.emplace_back(key, row_refs.size());
row_refs.emplace_back(RowRef(block, row_num));
}
/// Unrolled version of upper_bound and lower_bound
@ -84,30 +99,30 @@ public:
/// at https://en.algorithmica.org/hpc/data-structures/s-tree/
size_t boundSearch(TKey value)
{
size_t size = array.size();
size_t size = entries.size();
size_t low = 0;
/// This is a single binary search iteration as a macro to unroll. Takes into account the inequality:
/// isStrict -> Equal values are not requested
/// isDescending -> The vector is sorted in reverse (for greater or greaterOrEquals)
/// is_strict -> Equal values are not requested
/// is_descending -> The vector is sorted in reverse (for greater or greaterOrEquals)
#define BOUND_ITERATION \
{ \
size_t half = size / 2; \
size_t other_half = size - half; \
size_t probe = low + half; \
size_t other_low = low + other_half; \
TKey v = array[probe].asof_value; \
TKey & v = entries[probe].value; \
size = half; \
if constexpr (isDescending) \
if constexpr (is_descending) \
{ \
if constexpr (isStrict) \
if constexpr (is_strict) \
low = value <= v ? other_low : low; \
else \
low = value < v ? other_low : low; \
} \
else \
{ \
if constexpr (isStrict) \
if constexpr (is_strict) \
low = value >= v ? other_low : low; \
else \
low = value > v ? other_low : low; \
@ -130,7 +145,7 @@ public:
return low;
}
std::tuple<decltype(RowRef::block), decltype(RowRef::row_num)> findAsof(const IColumn & asof_column, size_t row_num) override
RowRef findAsof(const IColumn & asof_column, size_t row_num) override
{
sort();
@ -139,8 +154,11 @@ public:
TKey k = column.getElement(row_num);
size_t pos = boundSearch(k);
if (pos != array.size())
return std::make_tuple(array[pos].block, array[pos].row_num);
if (pos != entries.size())
{
size_t row_ref_index = entries[pos].row_ref_index;
return row_refs[row_ref_index];
}
return {nullptr, 0};
}
@ -148,7 +166,8 @@ public:
private:
std::atomic<bool> sorted = false;
mutable std::mutex lock;
Base array;
Entries entries;
RowRefs row_refs;
// Double checked locking with SC atomics works in C++
// https://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/
@ -160,12 +179,37 @@ private:
if (!sorted.load(std::memory_order_acquire))
{
std::lock_guard<std::mutex> l(lock);
if (!sorted.load(std::memory_order_relaxed))
{
if constexpr (isDescending)
::sort(array.begin(), array.end(), GreaterEntryOperator());
if constexpr (std::is_arithmetic_v<TKey> && !std::is_floating_point_v<TKey>)
{
if (likely(entries.size() > 256))
{
struct RadixSortTraits : RadixSortNumTraits<TKey>
{
using Element = Entry;
using Result = Element;
static TKey & extractKey(Element & elem) { return elem.value; }
static Element extractResult(Element & elem) { return elem; }
};
if constexpr (is_descending)
RadixSort<RadixSortTraits>::executeLSD(entries.data(), entries.size(), true);
else
::sort(array.begin(), array.end());
RadixSort<RadixSortTraits>::executeLSD(entries.data(), entries.size(), false);
sorted.store(true, std::memory_order_release);
return;
}
}
if constexpr (is_descending)
::sort(entries.begin(), entries.end(), GreaterEntryOperator());
else
::sort(entries.begin(), entries.end(), LessEntryOperator());
sorted.store(true, std::memory_order_release);
}
}

Some files were not shown because too many files have changed in this diff Show More