mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 16:50:48 +00:00
Merge branch 'master' into improve-schema-inference
This commit is contained in:
commit
d45143ffe0
1
.github/workflows/nightly.yml
vendored
1
.github/workflows/nightly.yml
vendored
@ -7,6 +7,7 @@ env:
|
||||
"on":
|
||||
schedule:
|
||||
- cron: '13 3 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
DockerHubPushAarch64:
|
||||
|
@ -11,6 +11,7 @@
|
||||
|
||||
* Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)).
|
||||
* Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)).
|
||||
* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
|
||||
#### New Feature
|
||||
|
||||
|
@ -266,7 +266,7 @@ if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
|
||||
endif ()
|
||||
|
||||
# Allows to build stripped binary in a separate directory
|
||||
if (OBJCOPY_PATH AND READELF_PATH)
|
||||
if (OBJCOPY_PATH AND STRIP_PATH)
|
||||
option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF)
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")
|
||||
|
@ -1,28 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
BINARY_PATH=$1
|
||||
BINARY_NAME=$(basename "$BINARY_PATH")
|
||||
DESTINATION_STRIPPED_DIR=$2
|
||||
OBJCOPY_PATH=${3:objcopy}
|
||||
READELF_PATH=${4:readelf}
|
||||
|
||||
BUILD_ID=$($READELF_PATH -n "$1" | sed -n '/Build ID/ { s/.*: //p; q; }')
|
||||
BUILD_ID_PREFIX=${BUILD_ID:0:2}
|
||||
BUILD_ID_SUFFIX=${BUILD_ID:2}
|
||||
|
||||
DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id"
|
||||
DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin"
|
||||
|
||||
mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX"
|
||||
mkdir -p "$DESTINATION_STRIP_BINARY_DIR"
|
||||
|
||||
|
||||
cp "$BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
|
||||
|
||||
$OBJCOPY_PATH --only-keep-debug --compress-debug-sections "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
|
||||
chmod 0644 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
|
||||
chown 0:0 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
|
||||
|
||||
strip --remove-section=.comment --remove-section=.note "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
|
||||
|
||||
$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
|
@ -11,16 +11,43 @@ macro(clickhouse_strip_binary)
|
||||
message(FATAL_ERROR "A binary path name must be provided for stripping binary")
|
||||
endif()
|
||||
|
||||
|
||||
if (NOT DEFINED STRIP_DESTINATION_DIR)
|
||||
message(FATAL_ERROR "Destination directory for stripped binary must be provided")
|
||||
endif()
|
||||
|
||||
add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD
|
||||
COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH}
|
||||
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin"
|
||||
COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin"
|
||||
COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
|
||||
COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
|
||||
COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
|
||||
COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
|
||||
COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
|
||||
COMMENT "Stripping clickhouse binary" VERBATIM
|
||||
)
|
||||
|
||||
install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse)
|
||||
install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse)
|
||||
endmacro()
|
||||
|
||||
|
||||
macro(clickhouse_make_empty_debug_info_for_nfpm)
|
||||
set(oneValueArgs TARGET DESTINATION_DIR)
|
||||
cmake_parse_arguments(EMPTY_DEBUG "" "${oneValueArgs}" "" ${ARGN})
|
||||
|
||||
if (NOT DEFINED EMPTY_DEBUG_TARGET)
|
||||
message(FATAL_ERROR "A target name must be provided for stripping binary")
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED EMPTY_DEBUG_DESTINATION_DIR)
|
||||
message(FATAL_ERROR "Destination directory for empty debug must be provided")
|
||||
endif()
|
||||
|
||||
add_custom_command(TARGET ${EMPTY_DEBUG_TARGET} POST_BUILD
|
||||
COMMAND mkdir -p "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug"
|
||||
COMMAND touch "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug"
|
||||
COMMENT "Addiding empty debug info for NFPM" VERBATIM
|
||||
)
|
||||
|
||||
install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}" COMPONENT clickhouse)
|
||||
endmacro()
|
||||
|
@ -170,32 +170,32 @@ else ()
|
||||
message (FATAL_ERROR "Cannot find objcopy.")
|
||||
endif ()
|
||||
|
||||
# Readelf (FIXME copypaste)
|
||||
# Strip (FIXME copypaste)
|
||||
|
||||
if (COMPILER_GCC)
|
||||
find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf")
|
||||
find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-13" "llvm-strip-12" "llvm-strip-11" "strip")
|
||||
else ()
|
||||
find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf")
|
||||
find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip")
|
||||
endif ()
|
||||
|
||||
if (NOT READELF_PATH AND OS_DARWIN)
|
||||
if (NOT STRIP_PATH AND OS_DARWIN)
|
||||
find_program (BREW_PATH NAMES "brew")
|
||||
if (BREW_PATH)
|
||||
execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX)
|
||||
if (LLVM_PREFIX)
|
||||
find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
|
||||
find_program (STRIP_PATH NAMES "llvm-strip" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
|
||||
endif ()
|
||||
if (NOT READELF_PATH)
|
||||
if (NOT STRIP_PATH)
|
||||
execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX)
|
||||
if (BINUTILS_PREFIX)
|
||||
find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
|
||||
find_program (STRIP_PATH NAMES "strip" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (READELF_PATH)
|
||||
message (STATUS "Using readelf: ${READELF_PATH}")
|
||||
if (STRIP_PATH)
|
||||
message (STATUS "Using strip: ${STRIP_PATH}")
|
||||
else ()
|
||||
message (FATAL_ERROR "Cannot find readelf.")
|
||||
message (FATAL_ERROR "Cannot find strip.")
|
||||
endif ()
|
||||
|
2
contrib/libxml2
vendored
2
contrib/libxml2
vendored
@ -1 +1 @@
|
||||
Subproject commit 18890f471c420411aa3c989e104d090966ec9dbf
|
||||
Subproject commit a075d256fd9ff15590b86d981b75a50ead124fca
|
@ -1,4 +1,3 @@
|
||||
# rebuild in #33610
|
||||
# docker build -t clickhouse/docs-check .
|
||||
ARG FROM_TAG=latest
|
||||
FROM clickhouse/docs-builder:$FROM_TAG
|
||||
|
@ -131,9 +131,6 @@ function start()
|
||||
# use root to match with current uid
|
||||
clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log
|
||||
sleep 0.5
|
||||
cat /var/log/clickhouse-server/stdout.log
|
||||
tail -n200 /var/log/clickhouse-server/stderr.log
|
||||
tail -n200 /var/log/clickhouse-server/clickhouse-server.log
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
|
||||
@ -211,14 +208,12 @@ stop
|
||||
start
|
||||
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Server failed to start\tFAIL' >> /test_output/test_results.tsv
|
||||
|| (echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
&& grep -Fa "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt)
|
||||
|
||||
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
|
||||
[ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL"
|
||||
|
||||
# Print Fatal log messages to stdout
|
||||
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log*
|
||||
|
||||
# Grep logs for sanitizer asserts, crashes and other critical errors
|
||||
|
||||
# Sanitizer asserts
|
||||
@ -235,20 +230,26 @@ zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/
|
||||
|| echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
|
||||
# Logical errors
|
||||
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
|
||||
&& echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /test_output/logical_errors.txt \
|
||||
&& echo -e 'Logical error thrown (see clickhouse-server.log or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file logical_errors.txt if it's empty
|
||||
[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
|
||||
|
||||
# Crash
|
||||
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
|
||||
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv
|
||||
|
||||
# It also checks for crash without stacktrace (printed by watchdog)
|
||||
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
|
||||
&& echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|
||||
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log* > /test_output/fatal_messages.txt \
|
||||
&& echo -e 'Fatal message in clickhouse-server.log (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file fatal_messages.txt if it's empty
|
||||
[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
|
||||
|
||||
zgrep -Fa "########################################" /test_output/* > /dev/null \
|
||||
&& echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv
|
||||
|
||||
@ -259,12 +260,12 @@ echo -e "Backward compatibility check\n"
|
||||
|
||||
echo "Download previous release server"
|
||||
mkdir previous_release_package_folder
|
||||
clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'Download script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
|
||||
clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv
|
||||
|
||||
if [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
|
||||
then
|
||||
echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/backward_compatibility_check_results.tsv
|
||||
echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/test_results.tsv
|
||||
stop
|
||||
|
||||
# Uninstall current packages
|
||||
@ -290,8 +291,8 @@ then
|
||||
mkdir tmp_stress_output
|
||||
|
||||
./stress --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
|
||||
&& echo -e 'Test script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'Test script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
|
||||
&& echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv
|
||||
rm -rf tmp_stress_output
|
||||
|
||||
clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables"
|
||||
@ -301,8 +302,9 @@ then
|
||||
# Start new server
|
||||
configure
|
||||
start 500
|
||||
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'Server failed to start\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
|
||||
clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \
|
||||
|| (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \
|
||||
&& grep -Fa "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt)
|
||||
|
||||
clickhouse-client --query="SELECT 'Server version: ', version()"
|
||||
|
||||
@ -312,10 +314,12 @@ then
|
||||
stop
|
||||
|
||||
# Error messages (we should ignore some errors)
|
||||
echo "Check for Error messages in server log:"
|
||||
zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
|
||||
-e "Code: 236. DB::Exception: Cancelled mutating parts" \
|
||||
-e "REPLICA_IS_ALREADY_ACTIVE" \
|
||||
-e "REPLICA_IS_ALREADY_EXIST" \
|
||||
-e "ALL_REPLICAS_LOST" \
|
||||
-e "DDLWorker: Cannot parse DDL task query" \
|
||||
-e "RaftInstance: failed to accept a rpc connection due to error 125" \
|
||||
-e "UNKNOWN_DATABASE" \
|
||||
@ -328,47 +332,53 @@ then
|
||||
-e "Code: 1000, e.code() = 111, Connection refused" \
|
||||
-e "UNFINISHED" \
|
||||
-e "Renaming unexpected part" \
|
||||
/var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "<Error>" > /dev/null \
|
||||
&& echo -e 'Error message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No Error messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
/var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
|
||||
&& echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_error_messages.txt if it's empty
|
||||
[ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt
|
||||
|
||||
# Sanitizer asserts
|
||||
zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
|
||||
zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \
|
||||
&& echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No sanitizer asserts\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
&& echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv
|
||||
rm -f /test_output/tmp
|
||||
|
||||
# OOM
|
||||
zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
|
||||
&& echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
&& echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
|
||||
# Logical errors
|
||||
zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
|
||||
&& echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No logical errors\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
echo "Check for Logical errors in server log:"
|
||||
zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_logical_errors.txt \
|
||||
&& echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_logical_errors.txt if it's empty
|
||||
[ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt
|
||||
|
||||
# Crash
|
||||
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
|
||||
&& echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'Not crashed\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
&& echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv
|
||||
|
||||
# It also checks for crash without stacktrace (printed by watchdog)
|
||||
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
|
||||
&& echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
|
||||
|| echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
|
||||
echo "Check for Fatal message in server log:"
|
||||
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_fatal_messages.txt \
|
||||
&& echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
|
||||
|
||||
# Remove file bc_check_fatal_messages.txt if it's empty
|
||||
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
|
||||
|
||||
else
|
||||
echo -e "Failed to download previous release packets\tFAIL" >> /test_output/backward_compatibility_check_results.tsv
|
||||
echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv
|
||||
fi
|
||||
|
||||
zgrep -Fa "FAIL" /test_output/backward_compatibility_check_results.tsv > /dev/null \
|
||||
&& echo -e 'Backward compatibility check\tFAIL' >> /test_output/test_results.tsv \
|
||||
|| echo -e 'Backward compatibility check\tOK' >> /test_output/test_results.tsv
|
||||
|
||||
|
||||
# Put logs into /test_output/
|
||||
for log_file in /var/log/clickhouse-server/clickhouse-server.log*
|
||||
do
|
||||
|
@ -137,7 +137,7 @@ CREATE TABLE test.test_orc
|
||||
`f_array_array_float` Array(Array(Float32)),
|
||||
`day` String
|
||||
)
|
||||
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
|
||||
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc')
|
||||
PARTITION BY day
|
||||
|
||||
```
|
||||
|
@ -195,5 +195,6 @@ toc_title: Adopters
|
||||
| <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
|
||||
| <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
|
||||
| <a href="https://magenta-technology.ru/sistema-upravleniya-marshrutami-inkassacii-as-strela/" class="favicon">АС "Стрела"</a> | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) |
|
||||
| <a href="https://piwik.pro/" class="favicon">Piwik PRO</a> | Web Analytics | — | — | — | [Official website, Dec 2018](https://piwik.pro/blog/piwik-pro-clickhouse-faster-efficient-reports/) |
|
||||
|
||||
[Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->
|
||||
|
@ -5,7 +5,7 @@ toc_title: Caches
|
||||
|
||||
# Cache Types {#cache-types}
|
||||
|
||||
When performing queries, ClichHouse uses different caches.
|
||||
When performing queries, ClickHouse uses different caches.
|
||||
|
||||
Main cache types:
|
||||
|
||||
|
@ -10,7 +10,7 @@ cssmin==0.2.0
|
||||
future==0.18.2
|
||||
htmlmin==0.1.12
|
||||
idna==2.10
|
||||
Jinja2>=3.0.3
|
||||
Jinja2==3.0.3
|
||||
jinja2-highlight==0.6.1
|
||||
jsmin==3.0.0
|
||||
livereload==2.6.3
|
||||
|
@ -140,7 +140,7 @@ CREATE TABLE test.test_orc
|
||||
`f_array_array_float` Array(Array(Float32)),
|
||||
`day` String
|
||||
)
|
||||
ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
|
||||
ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc')
|
||||
PARTITION BY day
|
||||
|
||||
```
|
||||
|
@ -15,7 +15,7 @@
|
||||
```
|
||||
┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐
|
||||
│ sumburConsistentHash │ 0 │ 0 │ │
|
||||
│ yandexConsistentHash │ 0 │ 0 │ │
|
||||
│ kostikConsistentHash │ 0 │ 0 │ │
|
||||
│ demangle │ 0 │ 0 │ │
|
||||
│ addressToLine │ 0 │ 0 │ │
|
||||
│ JSONExtractRaw │ 0 │ 0 │ │
|
||||
|
@ -21,8 +21,12 @@ description: |
|
||||
This package contains the debugging symbols for clickhouse-common.
|
||||
|
||||
contents:
|
||||
- src: root/usr/lib/debug
|
||||
dst: /usr/lib/debug
|
||||
- src: root/usr/lib/debug/usr/bin/clickhouse.debug
|
||||
dst: /usr/lib/debug/usr/bin/clickhouse.debug
|
||||
- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
|
||||
dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
|
||||
- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
|
||||
dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
|
||||
# docs
|
||||
- src: ../AUTHORS
|
||||
dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS
|
||||
|
@ -473,18 +473,11 @@ else ()
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse)
|
||||
else()
|
||||
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT})
|
||||
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (NOT INSTALL_STRIPPED_BINARIES)
|
||||
# Install dunny debug directory
|
||||
# TODO: move logic to every place where clickhouse_strip_binary is used
|
||||
add_custom_command(TARGET clickhouse POST_BUILD COMMAND echo > .empty )
|
||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/.empty" DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/.empty)
|
||||
endif()
|
||||
|
||||
|
||||
if (ENABLE_TESTS)
|
||||
set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms)
|
||||
add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS})
|
||||
|
@ -137,5 +137,10 @@ if (BUILD_STANDALONE_KEEPER)
|
||||
add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
|
||||
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
|
||||
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
clickhouse_strip_binary(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-keeper)
|
||||
else()
|
||||
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
|
||||
install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
endif()
|
||||
endif()
|
||||
|
@ -27,5 +27,6 @@ set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECT
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge)
|
||||
else()
|
||||
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
|
||||
install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
endif()
|
||||
|
@ -42,6 +42,7 @@ endif()
|
||||
if (INSTALL_STRIPPED_BINARIES)
|
||||
clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge)
|
||||
else()
|
||||
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
|
||||
install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
|
||||
endif()
|
||||
|
||||
|
@ -29,15 +29,15 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover(
|
||||
time_t decrease_error_period_,
|
||||
size_t max_error_cap_)
|
||||
: Base(std::move(nested_pools_), decrease_error_period_, max_error_cap_, &Poco::Logger::get("ConnectionPoolWithFailover"))
|
||||
, default_load_balancing(load_balancing)
|
||||
, get_priority_load_balancing(load_balancing)
|
||||
{
|
||||
const std::string & local_hostname = getFQDNOrHostName();
|
||||
|
||||
hostname_differences.resize(nested_pools.size());
|
||||
get_priority_load_balancing.hostname_differences.resize(nested_pools.size());
|
||||
for (size_t i = 0; i < nested_pools.size(); ++i)
|
||||
{
|
||||
ConnectionPool & connection_pool = dynamic_cast<ConnectionPool &>(*nested_pools[i]);
|
||||
hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost());
|
||||
get_priority_load_balancing.hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost());
|
||||
}
|
||||
}
|
||||
|
||||
@ -51,36 +51,15 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
|
||||
};
|
||||
|
||||
size_t offset = 0;
|
||||
LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
|
||||
if (settings)
|
||||
offset = settings->load_balancing_first_offset % nested_pools.size();
|
||||
GetPriorityFunc get_priority;
|
||||
switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
|
||||
{
|
||||
case LoadBalancing::NEAREST_HOSTNAME:
|
||||
get_priority = [&](size_t i) { return hostname_differences[i]; };
|
||||
break;
|
||||
case LoadBalancing::IN_ORDER:
|
||||
get_priority = [](size_t i) { return i; };
|
||||
break;
|
||||
case LoadBalancing::RANDOM:
|
||||
break;
|
||||
case LoadBalancing::FIRST_OR_RANDOM:
|
||||
get_priority = [offset](size_t i) -> size_t { return i != offset; };
|
||||
break;
|
||||
case LoadBalancing::ROUND_ROBIN:
|
||||
if (last_used >= nested_pools.size())
|
||||
last_used = 0;
|
||||
++last_used;
|
||||
/* Consider nested_pools.size() equals to 5
|
||||
* last_used = 1 -> get_priority: 0 1 2 3 4
|
||||
* last_used = 2 -> get_priority: 4 0 1 2 3
|
||||
* last_used = 3 -> get_priority: 4 3 0 1 2
|
||||
* ...
|
||||
* */
|
||||
get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; };
|
||||
break;
|
||||
offset = settings->load_balancing_first_offset % nested_pools.size();
|
||||
load_balancing = LoadBalancing(settings->load_balancing);
|
||||
}
|
||||
|
||||
GetPriorityFunc get_priority = get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
|
||||
|
||||
UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
|
||||
bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true;
|
||||
|
||||
@ -173,38 +152,14 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
|
||||
ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings)
|
||||
{
|
||||
size_t offset = 0;
|
||||
LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
|
||||
if (settings)
|
||||
offset = settings->load_balancing_first_offset % nested_pools.size();
|
||||
|
||||
GetPriorityFunc get_priority;
|
||||
switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
|
||||
{
|
||||
case LoadBalancing::NEAREST_HOSTNAME:
|
||||
get_priority = [&](size_t i) { return hostname_differences[i]; };
|
||||
break;
|
||||
case LoadBalancing::IN_ORDER:
|
||||
get_priority = [](size_t i) { return i; };
|
||||
break;
|
||||
case LoadBalancing::RANDOM:
|
||||
break;
|
||||
case LoadBalancing::FIRST_OR_RANDOM:
|
||||
get_priority = [offset](size_t i) -> size_t { return i != offset; };
|
||||
break;
|
||||
case LoadBalancing::ROUND_ROBIN:
|
||||
if (last_used >= nested_pools.size())
|
||||
last_used = 0;
|
||||
++last_used;
|
||||
/* Consider nested_pools.size() equals to 5
|
||||
* last_used = 1 -> get_priority: 0 1 2 3 4
|
||||
* last_used = 2 -> get_priority: 5 0 1 2 3
|
||||
* last_used = 3 -> get_priority: 5 4 0 1 2
|
||||
* ...
|
||||
* */
|
||||
get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; };
|
||||
break;
|
||||
offset = settings->load_balancing_first_offset % nested_pools.size();
|
||||
load_balancing = LoadBalancing(settings->load_balancing);
|
||||
}
|
||||
|
||||
return get_priority;
|
||||
return get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
|
||||
}
|
||||
|
||||
std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl(
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/PoolWithFailoverBase.h>
|
||||
#include <Common/GetPriorityForLoadBalancing.h>
|
||||
#include <Client/ConnectionPool.h>
|
||||
|
||||
#include <chrono>
|
||||
@ -109,9 +110,7 @@ private:
|
||||
|
||||
GetPriorityFunc makeGetPriorityFunc(const Settings * settings);
|
||||
|
||||
std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools.
|
||||
size_t last_used = 0; /// Last used for round_robin policy.
|
||||
LoadBalancing default_load_balancing;
|
||||
GetPriorityForLoadBalancing get_priority_load_balancing;
|
||||
};
|
||||
|
||||
using ConnectionPoolWithFailoverPtr = std::shared_ptr<ConnectionPoolWithFailover>;
|
||||
|
@ -83,11 +83,20 @@ size_t extractMaskNumericImpl(
|
||||
const PaddedPODArray<UInt8> * null_bytemap,
|
||||
PaddedPODArray<UInt8> * nulls)
|
||||
{
|
||||
if constexpr (!column_is_short)
|
||||
{
|
||||
if (data.size() != mask.size())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask");
|
||||
}
|
||||
|
||||
size_t ones_count = 0;
|
||||
size_t data_index = 0;
|
||||
size_t mask_size = mask.size();
|
||||
|
||||
for (size_t i = 0; i != mask_size; ++i)
|
||||
size_t mask_size = mask.size();
|
||||
size_t data_size = data.size();
|
||||
|
||||
size_t i = 0;
|
||||
for (; i != mask_size && data_index != data_size; ++i)
|
||||
{
|
||||
// Change mask only where value is 1.
|
||||
if (!mask[i])
|
||||
@ -120,6 +129,13 @@ size_t extractMaskNumericImpl(
|
||||
|
||||
mask[i] = value;
|
||||
}
|
||||
|
||||
if constexpr (column_is_short)
|
||||
{
|
||||
if (data_index != data_size)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask");
|
||||
}
|
||||
|
||||
return ones_count;
|
||||
}
|
||||
|
||||
|
@ -31,8 +31,8 @@ public:
|
||||
/// probably it worth to try to increase stack size for coroutines.
|
||||
///
|
||||
/// Current value is just enough for all tests in our CI. It's not selected in some special
|
||||
/// way. We will have 40 pages with 4KB page size.
|
||||
static constexpr size_t default_stack_size = 192 * 1024; /// 64KB was not enough for tests
|
||||
/// way. We will have 80 pages with 4KB page size.
|
||||
static constexpr size_t default_stack_size = 320 * 1024; /// 64KB was not enough for tests
|
||||
|
||||
explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_)
|
||||
{
|
||||
|
49
src/Common/GetPriorityForLoadBalancing.cpp
Normal file
49
src/Common/GetPriorityForLoadBalancing.cpp
Normal file
@ -0,0 +1,49 @@
|
||||
#include <Common/GetPriorityForLoadBalancing.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
std::function<size_t(size_t index)> GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const
|
||||
{
|
||||
std::function<size_t(size_t index)> get_priority;
|
||||
switch (load_balance)
|
||||
{
|
||||
case LoadBalancing::NEAREST_HOSTNAME:
|
||||
if (hostname_differences.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "It's a bug: hostname_differences is not initialized");
|
||||
get_priority = [&](size_t i) { return hostname_differences[i]; };
|
||||
break;
|
||||
case LoadBalancing::IN_ORDER:
|
||||
get_priority = [](size_t i) { return i; };
|
||||
break;
|
||||
case LoadBalancing::RANDOM:
|
||||
break;
|
||||
case LoadBalancing::FIRST_OR_RANDOM:
|
||||
get_priority = [offset](size_t i) -> size_t { return i != offset; };
|
||||
break;
|
||||
case LoadBalancing::ROUND_ROBIN:
|
||||
if (last_used >= pool_size)
|
||||
last_used = 0;
|
||||
++last_used;
|
||||
/* Consider pool_size equals to 5
|
||||
* last_used = 1 -> get_priority: 0 1 2 3 4
|
||||
* last_used = 2 -> get_priority: 4 0 1 2 3
|
||||
* last_used = 3 -> get_priority: 4 3 0 1 2
|
||||
* ...
|
||||
* */
|
||||
get_priority = [&](size_t i)
|
||||
{
|
||||
++i;
|
||||
return i < last_used ? pool_size - i : i - last_used;
|
||||
};
|
||||
break;
|
||||
}
|
||||
return get_priority;
|
||||
}
|
||||
|
||||
}
|
34
src/Common/GetPriorityForLoadBalancing.h
Normal file
34
src/Common/GetPriorityForLoadBalancing.h
Normal file
@ -0,0 +1,34 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/SettingsEnums.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class GetPriorityForLoadBalancing
|
||||
{
|
||||
public:
|
||||
GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {}
|
||||
GetPriorityForLoadBalancing(){}
|
||||
|
||||
bool operator == (const GetPriorityForLoadBalancing & other) const
|
||||
{
|
||||
return load_balancing == other.load_balancing && hostname_differences == other.hostname_differences;
|
||||
}
|
||||
|
||||
bool operator != (const GetPriorityForLoadBalancing & other) const
|
||||
{
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
std::function<size_t(size_t index)> getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const;
|
||||
|
||||
std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools.
|
||||
|
||||
LoadBalancing load_balancing = LoadBalancing::RANDOM;
|
||||
|
||||
private:
|
||||
mutable size_t last_used = 0; /// Last used for round_robin policy.
|
||||
};
|
||||
|
||||
}
|
@ -13,6 +13,9 @@ Int32 IntervalKind::toAvgSeconds() const
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case IntervalKind::Nanosecond: return 0; /// fractional parts of seconds have 0 seconds
|
||||
case IntervalKind::Microsecond: return 0;
|
||||
case IntervalKind::Millisecond: return 0;
|
||||
case IntervalKind::Second: return 1;
|
||||
case IntervalKind::Minute: return 60;
|
||||
case IntervalKind::Hour: return 3600;
|
||||
@ -52,6 +55,9 @@ const char * IntervalKind::toKeyword() const
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case IntervalKind::Nanosecond: return "NANOSECOND";
|
||||
case IntervalKind::Microsecond: return "MICROSECOND";
|
||||
case IntervalKind::Millisecond: return "MILLISECOND";
|
||||
case IntervalKind::Second: return "SECOND";
|
||||
case IntervalKind::Minute: return "MINUTE";
|
||||
case IntervalKind::Hour: return "HOUR";
|
||||
@ -69,6 +75,9 @@ const char * IntervalKind::toLowercasedKeyword() const
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case IntervalKind::Nanosecond: return "nanosecond";
|
||||
case IntervalKind::Microsecond: return "microsecond";
|
||||
case IntervalKind::Millisecond: return "millisecond";
|
||||
case IntervalKind::Second: return "second";
|
||||
case IntervalKind::Minute: return "minute";
|
||||
case IntervalKind::Hour: return "hour";
|
||||
@ -86,6 +95,12 @@ const char * IntervalKind::toDateDiffUnit() const
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case IntervalKind::Nanosecond:
|
||||
return "nanosecond";
|
||||
case IntervalKind::Microsecond:
|
||||
return "microsecond";
|
||||
case IntervalKind::Millisecond:
|
||||
return "millisecond";
|
||||
case IntervalKind::Second:
|
||||
return "second";
|
||||
case IntervalKind::Minute:
|
||||
@ -111,6 +126,12 @@ const char * IntervalKind::toNameOfFunctionToIntervalDataType() const
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case IntervalKind::Nanosecond:
|
||||
return "toIntervalNanosecond";
|
||||
case IntervalKind::Microsecond:
|
||||
return "toIntervalMicrosecond";
|
||||
case IntervalKind::Millisecond:
|
||||
return "toIntervalMillisecond";
|
||||
case IntervalKind::Second:
|
||||
return "toIntervalSecond";
|
||||
case IntervalKind::Minute:
|
||||
@ -136,6 +157,12 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case IntervalKind::Nanosecond:
|
||||
return "toNanosecond";
|
||||
case IntervalKind::Microsecond:
|
||||
return "toMicrosecond";
|
||||
case IntervalKind::Millisecond:
|
||||
return "toMillisecond";
|
||||
case IntervalKind::Second:
|
||||
return "toSecond";
|
||||
case IntervalKind::Minute:
|
||||
@ -162,6 +189,21 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const
|
||||
|
||||
bool IntervalKind::tryParseString(const std::string & kind, IntervalKind::Kind & result)
|
||||
{
|
||||
if ("nanosecond" == kind)
|
||||
{
|
||||
result = IntervalKind::Nanosecond;
|
||||
return true;
|
||||
}
|
||||
if ("microsecond" == kind)
|
||||
{
|
||||
result = IntervalKind::Microsecond;
|
||||
return true;
|
||||
}
|
||||
if ("millisecond" == kind)
|
||||
{
|
||||
result = IntervalKind::Millisecond;
|
||||
return true;
|
||||
}
|
||||
if ("second" == kind)
|
||||
{
|
||||
result = IntervalKind::Second;
|
||||
|
@ -10,6 +10,9 @@ struct IntervalKind
|
||||
{
|
||||
enum Kind
|
||||
{
|
||||
Nanosecond,
|
||||
Microsecond,
|
||||
Millisecond,
|
||||
Second,
|
||||
Minute,
|
||||
Hour,
|
||||
@ -61,6 +64,9 @@ struct IntervalKind
|
||||
|
||||
/// NOLINTNEXTLINE
|
||||
#define FOR_EACH_INTERVAL_KIND(M) \
|
||||
M(Nanosecond) \
|
||||
M(Microsecond) \
|
||||
M(Millisecond) \
|
||||
M(Second) \
|
||||
M(Minute) \
|
||||
M(Hour) \
|
||||
|
@ -515,6 +515,11 @@ public:
|
||||
radixSortLSDInternal<false>(arr, size, false, nullptr);
|
||||
}
|
||||
|
||||
static void executeLSD(Element * arr, size_t size, bool reverse)
|
||||
{
|
||||
radixSortLSDInternal<false>(arr, size, reverse, nullptr);
|
||||
}
|
||||
|
||||
/** This function will start to sort inplace (modify 'arr')
|
||||
* but on the last step it will write result directly to the destination
|
||||
* instead of finishing sorting 'arr'.
|
||||
|
@ -22,7 +22,6 @@ target_link_libraries (clickhouse_common_zookeeper_no_log
|
||||
PRIVATE
|
||||
string_utils
|
||||
)
|
||||
|
||||
if (ENABLE_EXAMPLES)
|
||||
add_subdirectory(examples)
|
||||
endif()
|
||||
|
@ -5,15 +5,15 @@
|
||||
|
||||
#include <functional>
|
||||
#include <filesystem>
|
||||
#include <pcg-random/pcg_random.hpp>
|
||||
|
||||
#include <base/logger_useful.h>
|
||||
#include <base/find_symbols.h>
|
||||
#include <Common/randomSeed.h>
|
||||
#include <base/getFQDNOrHostName.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/isLocalAddress.h>
|
||||
|
||||
#include <Poco/Net/NetException.h>
|
||||
#include <Poco/Net/DNS.h>
|
||||
|
||||
|
||||
#define ZOOKEEPER_CONNECTION_TIMEOUT_MS 1000
|
||||
@ -48,7 +48,7 @@ static void check(Coordination::Error code, const std::string & path)
|
||||
|
||||
|
||||
void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
|
||||
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_)
|
||||
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
|
||||
{
|
||||
log = &Poco::Logger::get("ZooKeeper");
|
||||
hosts = hosts_;
|
||||
@ -57,6 +57,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
|
||||
operation_timeout_ms = operation_timeout_ms_;
|
||||
chroot = chroot_;
|
||||
implementation = implementation_;
|
||||
get_priority_load_balancing = get_priority_load_balancing_;
|
||||
|
||||
if (implementation == "zookeeper")
|
||||
{
|
||||
@ -66,14 +67,13 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
|
||||
Coordination::ZooKeeper::Nodes nodes;
|
||||
nodes.reserve(hosts.size());
|
||||
|
||||
Strings shuffled_hosts = hosts;
|
||||
/// Shuffle the hosts to distribute the load among ZooKeeper nodes.
|
||||
pcg64 generator(randomSeed());
|
||||
std::shuffle(shuffled_hosts.begin(), shuffled_hosts.end(), generator);
|
||||
std::vector<ShuffleHost> shuffled_hosts = shuffleHosts();
|
||||
|
||||
bool dns_error = false;
|
||||
for (auto & host_string : shuffled_hosts)
|
||||
for (auto & host : shuffled_hosts)
|
||||
{
|
||||
auto & host_string = host.host;
|
||||
try
|
||||
{
|
||||
bool secure = bool(startsWith(host_string, "secure://"));
|
||||
@ -81,6 +81,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
|
||||
if (secure)
|
||||
host_string.erase(0, strlen("secure://"));
|
||||
|
||||
LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, Poco::Net::SocketAddress{host_string}.toString());
|
||||
nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure});
|
||||
}
|
||||
catch (const Poco::Net::HostNotFoundException & e)
|
||||
@ -154,23 +155,47 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<ShuffleHost> ZooKeeper::shuffleHosts() const
|
||||
{
|
||||
std::function<size_t(size_t index)> get_priority = get_priority_load_balancing.getPriorityFunc(get_priority_load_balancing.load_balancing, 0, hosts.size());
|
||||
std::vector<ShuffleHost> shuffle_hosts;
|
||||
for (size_t i = 0; i < hosts.size(); ++i)
|
||||
{
|
||||
ShuffleHost shuffle_host;
|
||||
shuffle_host.host = hosts[i];
|
||||
if (get_priority)
|
||||
shuffle_host.priority = get_priority(i);
|
||||
shuffle_host.randomize();
|
||||
shuffle_hosts.emplace_back(shuffle_host);
|
||||
}
|
||||
|
||||
std::sort(
|
||||
shuffle_hosts.begin(), shuffle_hosts.end(),
|
||||
[](const ShuffleHost & lhs, const ShuffleHost & rhs)
|
||||
{
|
||||
return ShuffleHost::compare(lhs, rhs);
|
||||
});
|
||||
|
||||
return shuffle_hosts;
|
||||
}
|
||||
|
||||
ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_,
|
||||
int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_,
|
||||
std::shared_ptr<DB::ZooKeeperLog> zk_log_)
|
||||
std::shared_ptr<DB::ZooKeeperLog> zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
|
||||
{
|
||||
zk_log = std::move(zk_log_);
|
||||
Strings hosts_strings;
|
||||
splitInto<','>(hosts_strings, hosts_string);
|
||||
|
||||
init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_);
|
||||
init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_);
|
||||
}
|
||||
|
||||
ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_,
|
||||
int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_,
|
||||
std::shared_ptr<DB::ZooKeeperLog> zk_log_)
|
||||
std::shared_ptr<DB::ZooKeeperLog> zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
|
||||
{
|
||||
zk_log = std::move(zk_log_);
|
||||
init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_);
|
||||
init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_);
|
||||
}
|
||||
|
||||
struct ZooKeeperArgs
|
||||
@ -213,6 +238,15 @@ struct ZooKeeperArgs
|
||||
{
|
||||
implementation = config.getString(config_name + "." + key);
|
||||
}
|
||||
else if (key == "zookeeper_load_balancing")
|
||||
{
|
||||
String load_balancing_str = config.getString(config_name + "." + key);
|
||||
/// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`)
|
||||
auto load_balancing = magic_enum::enum_cast<DB::LoadBalancing>(Poco::toUpper(load_balancing_str));
|
||||
if (!load_balancing)
|
||||
throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str);
|
||||
get_priority_load_balancing.load_balancing = *load_balancing;
|
||||
}
|
||||
else
|
||||
throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS);
|
||||
}
|
||||
@ -224,6 +258,15 @@ struct ZooKeeperArgs
|
||||
if (chroot.back() == '/')
|
||||
chroot.pop_back();
|
||||
}
|
||||
|
||||
/// init get_priority_load_balancing
|
||||
get_priority_load_balancing.hostname_differences.resize(hosts.size());
|
||||
const String & local_hostname = getFQDNOrHostName();
|
||||
for (size_t i = 0; i < hosts.size(); ++i)
|
||||
{
|
||||
const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':'));
|
||||
get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host);
|
||||
}
|
||||
}
|
||||
|
||||
Strings hosts;
|
||||
@ -232,13 +275,14 @@ struct ZooKeeperArgs
|
||||
int operation_timeout_ms;
|
||||
std::string chroot;
|
||||
std::string implementation;
|
||||
GetPriorityForLoadBalancing get_priority_load_balancing;
|
||||
};
|
||||
|
||||
ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_)
|
||||
: zk_log(std::move(zk_log_))
|
||||
{
|
||||
ZooKeeperArgs args(config, config_name);
|
||||
init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot);
|
||||
init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing);
|
||||
}
|
||||
|
||||
bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) const
|
||||
@ -249,8 +293,11 @@ bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config,
|
||||
if (args.implementation == implementation && implementation == "testkeeper")
|
||||
return false;
|
||||
|
||||
return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot)
|
||||
!= std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot);
|
||||
if (args.get_priority_load_balancing != get_priority_load_balancing)
|
||||
return true;
|
||||
|
||||
return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing)
|
||||
!= std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, args.get_priority_load_balancing);
|
||||
}
|
||||
|
||||
|
||||
@ -757,7 +804,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &
|
||||
|
||||
ZooKeeperPtr ZooKeeper::startNewSession() const
|
||||
{
|
||||
return std::make_shared<ZooKeeper>(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log);
|
||||
return std::make_shared<ZooKeeper>(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log, get_priority_load_balancing);
|
||||
}
|
||||
|
||||
|
||||
|
@ -13,7 +13,10 @@
|
||||
#include <Common/Stopwatch.h>
|
||||
#include <Common/ZooKeeper/IKeeper.h>
|
||||
#include <Common/ZooKeeper/ZooKeeperConstants.h>
|
||||
#include <Common/GetPriorityForLoadBalancing.h>
|
||||
#include <Common/thread_local_rng.h>
|
||||
#include <unistd.h>
|
||||
#include <random>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
@ -37,6 +40,25 @@ namespace zkutil
|
||||
/// Preferred size of multi() command (in number of ops)
|
||||
constexpr size_t MULTI_BATCH_SIZE = 100;
|
||||
|
||||
struct ShuffleHost
|
||||
{
|
||||
String host;
|
||||
Int64 priority = 0;
|
||||
UInt32 random = 0;
|
||||
|
||||
void randomize()
|
||||
{
|
||||
random = thread_local_rng();
|
||||
}
|
||||
|
||||
static bool compare(const ShuffleHost & lhs, const ShuffleHost & rhs)
|
||||
{
|
||||
return std::forward_as_tuple(lhs.priority, lhs.random)
|
||||
< std::forward_as_tuple(rhs.priority, rhs.random);
|
||||
}
|
||||
};
|
||||
|
||||
using GetPriorityForLoadBalancing = DB::GetPriorityForLoadBalancing;
|
||||
|
||||
/// ZooKeeper session. The interface is substantially different from the usual libzookeeper API.
|
||||
///
|
||||
@ -58,14 +80,16 @@ public:
|
||||
int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
|
||||
const std::string & chroot_ = "",
|
||||
const std::string & implementation_ = "zookeeper",
|
||||
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr);
|
||||
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr,
|
||||
const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {});
|
||||
|
||||
explicit ZooKeeper(const Strings & hosts_, const std::string & identity_ = "",
|
||||
int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS,
|
||||
int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
|
||||
const std::string & chroot_ = "",
|
||||
const std::string & implementation_ = "zookeeper",
|
||||
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr);
|
||||
std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr,
|
||||
const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {});
|
||||
|
||||
/** Config of the form:
|
||||
<zookeeper>
|
||||
@ -91,6 +115,8 @@ public:
|
||||
*/
|
||||
ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_);
|
||||
|
||||
std::vector<ShuffleHost> shuffleHosts() const;
|
||||
|
||||
/// Creates a new session with the same parameters. This method can be used for reconnecting
|
||||
/// after the session has expired.
|
||||
/// This object remains unchanged, and the new session is returned.
|
||||
@ -284,7 +310,7 @@ private:
|
||||
friend class EphemeralNodeHolder;
|
||||
|
||||
void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
|
||||
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_);
|
||||
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_);
|
||||
|
||||
/// The following methods don't any throw exceptions but return error codes.
|
||||
Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created);
|
||||
@ -311,6 +337,8 @@ private:
|
||||
Poco::Logger * log = nullptr;
|
||||
std::shared_ptr<DB::ZooKeeperLog> zk_log;
|
||||
|
||||
GetPriorityForLoadBalancing get_priority_load_balancing;
|
||||
|
||||
AtomicStopwatch session_uptime;
|
||||
};
|
||||
|
||||
|
@ -451,7 +451,7 @@ void ZooKeeper::connect(
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}", socket.peerAddress().toString(), session_id);
|
||||
LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}{}", socket.peerAddress().toString(), session_id, fail_reasons.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
constexpr size_t IPV4_BINARY_LENGTH = 4;
|
||||
constexpr size_t IPV6_BINARY_LENGTH = 16;
|
||||
constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte.
|
||||
constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
|
||||
constexpr size_t IPV6_MAX_TEXT_LENGTH = 45; /// Does not count tail zero byte.
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -124,6 +124,7 @@ bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_
|
||||
|
||||
size_t getHostNameDifference(const std::string & local_hostname, const std::string & host)
|
||||
{
|
||||
/// FIXME should we replace it with Levenstein distance? (we already have it in NamePrompter)
|
||||
size_t hostname_difference = 0;
|
||||
for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i)
|
||||
if (local_hostname[i] != host[i])
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include <iterator>
|
||||
#include <base/sort.h>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -269,8 +270,18 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const
|
||||
}
|
||||
|
||||
|
||||
const ColumnWithTypeAndName * Block::findByName(const std::string & name) const
|
||||
const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool case_insensitive) const
|
||||
{
|
||||
if (case_insensitive)
|
||||
{
|
||||
auto found = std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); });
|
||||
if (found == data.end())
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
return &*found;
|
||||
}
|
||||
|
||||
auto it = index_by_name.find(name);
|
||||
if (index_by_name.end() == it)
|
||||
{
|
||||
@ -280,19 +291,23 @@ const ColumnWithTypeAndName * Block::findByName(const std::string & name) const
|
||||
}
|
||||
|
||||
|
||||
const ColumnWithTypeAndName & Block::getByName(const std::string & name) const
|
||||
const ColumnWithTypeAndName & Block::getByName(const std::string & name, bool case_insensitive) const
|
||||
{
|
||||
const auto * result = findByName(name);
|
||||
const auto * result = findByName(name, case_insensitive);
|
||||
if (!result)
|
||||
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
|
||||
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
throw Exception(
|
||||
"Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
|
||||
return *result;
|
||||
}
|
||||
|
||||
|
||||
bool Block::has(const std::string & name) const
|
||||
bool Block::has(const std::string & name, bool case_insensitive) const
|
||||
{
|
||||
if (case_insensitive)
|
||||
return std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); })
|
||||
!= data.end();
|
||||
|
||||
return index_by_name.end() != index_by_name.find(name);
|
||||
}
|
||||
|
||||
@ -301,8 +316,8 @@ size_t Block::getPositionByName(const std::string & name) const
|
||||
{
|
||||
auto it = index_by_name.find(name);
|
||||
if (index_by_name.end() == it)
|
||||
throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
|
||||
, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
throw Exception(
|
||||
"Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
@ -60,21 +60,21 @@ public:
|
||||
ColumnWithTypeAndName & safeGetByPosition(size_t position);
|
||||
const ColumnWithTypeAndName & safeGetByPosition(size_t position) const;
|
||||
|
||||
ColumnWithTypeAndName* findByName(const std::string & name)
|
||||
ColumnWithTypeAndName* findByName(const std::string & name, bool case_insensitive = false)
|
||||
{
|
||||
return const_cast<ColumnWithTypeAndName *>(
|
||||
const_cast<const Block *>(this)->findByName(name));
|
||||
const_cast<const Block *>(this)->findByName(name, case_insensitive));
|
||||
}
|
||||
|
||||
const ColumnWithTypeAndName * findByName(const std::string & name) const;
|
||||
const ColumnWithTypeAndName * findByName(const std::string & name, bool case_insensitive = false) const;
|
||||
|
||||
ColumnWithTypeAndName & getByName(const std::string & name)
|
||||
ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false)
|
||||
{
|
||||
return const_cast<ColumnWithTypeAndName &>(
|
||||
const_cast<const Block *>(this)->getByName(name));
|
||||
const_cast<const Block *>(this)->getByName(name, case_insensitive));
|
||||
}
|
||||
|
||||
const ColumnWithTypeAndName & getByName(const std::string & name) const;
|
||||
const ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false) const;
|
||||
|
||||
Container::iterator begin() { return data.begin(); }
|
||||
Container::iterator end() { return data.end(); }
|
||||
@ -83,7 +83,7 @@ public:
|
||||
Container::const_iterator cbegin() const { return data.cbegin(); }
|
||||
Container::const_iterator cend() const { return data.cend(); }
|
||||
|
||||
bool has(const std::string & name) const;
|
||||
bool has(const std::string & name, bool case_insensitive = false) const;
|
||||
|
||||
size_t getPositionByName(const std::string & name) const;
|
||||
|
||||
|
@ -616,11 +616,13 @@ class IColumn;
|
||||
M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
|
||||
M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \
|
||||
M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \
|
||||
M(Bool, input_format_use_lowercase_column_name, false, "Use lowercase column name while reading input formats", 0) \
|
||||
M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \
|
||||
M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \
|
||||
M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \
|
||||
M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \
|
||||
M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \
|
||||
M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
|
||||
M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \
|
||||
M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
|
||||
M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \
|
||||
M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \
|
||||
|
@ -149,4 +149,5 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
|
||||
{"str", FormatSettings::MsgPackUUIDRepresentation::STR},
|
||||
{"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}})
|
||||
|
||||
|
||||
}
|
||||
|
@ -13,6 +13,9 @@ bool DataTypeInterval::equals(const IDataType & rhs) const
|
||||
|
||||
void registerDataTypeInterval(DataTypeFactory & factory)
|
||||
{
|
||||
factory.registerSimpleDataType("IntervalNanosecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Nanosecond)); });
|
||||
factory.registerSimpleDataType("IntervalMicrosecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Microsecond)); });
|
||||
factory.registerSimpleDataType("IntervalMillisecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Millisecond)); });
|
||||
factory.registerSimpleDataType("IntervalSecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Second)); });
|
||||
factory.registerSimpleDataType("IntervalMinute", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Minute)); });
|
||||
factory.registerSimpleDataType("IntervalHour", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Hour)); });
|
||||
|
@ -15,6 +15,8 @@
|
||||
|
||||
#include <Parsers/IAST.h>
|
||||
|
||||
#include <boost/algorithm/string/case_conv.hpp>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -227,14 +229,17 @@ void validateArraySizes(const Block & block)
|
||||
}
|
||||
|
||||
|
||||
std::unordered_set<String> getAllTableNames(const Block & block)
|
||||
std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case)
|
||||
{
|
||||
std::unordered_set<String> nested_table_names;
|
||||
for (auto & name : block.getNames())
|
||||
for (const auto & name : block.getNames())
|
||||
{
|
||||
auto nested_table_name = Nested::extractTableName(name);
|
||||
if (to_lower_case)
|
||||
boost::to_lower(nested_table_name);
|
||||
|
||||
if (!nested_table_name.empty())
|
||||
nested_table_names.insert(nested_table_name);
|
||||
nested_table_names.insert(std::move(nested_table_name));
|
||||
}
|
||||
return nested_table_names;
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ namespace Nested
|
||||
void validateArraySizes(const Block & block);
|
||||
|
||||
/// Get all nested tables names from a block.
|
||||
std::unordered_set<String> getAllTableNames(const Block & block);
|
||||
std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case = false);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -88,6 +88,9 @@ DatabaseReplicated::DatabaseReplicated(
|
||||
/// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
|
||||
if (zookeeper_path.front() != '/')
|
||||
zookeeper_path = "/" + zookeeper_path;
|
||||
|
||||
if (!db_settings.collection_name.value.empty())
|
||||
fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef());
|
||||
}
|
||||
|
||||
String DatabaseReplicated::getFullReplicaName() const
|
||||
@ -191,22 +194,36 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
|
||||
shards.back().emplace_back(unescapeForFileName(host_port));
|
||||
}
|
||||
|
||||
String username = db_settings.cluster_username;
|
||||
String password = db_settings.cluster_password;
|
||||
UInt16 default_port = getContext()->getTCPPort();
|
||||
bool secure = db_settings.cluster_secure_connection;
|
||||
|
||||
bool treat_local_as_remote = false;
|
||||
bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL;
|
||||
return std::make_shared<Cluster>(
|
||||
getContext()->getSettingsRef(),
|
||||
shards,
|
||||
username,
|
||||
password,
|
||||
cluster_auth_info.cluster_username,
|
||||
cluster_auth_info.cluster_password,
|
||||
default_port,
|
||||
treat_local_as_remote,
|
||||
treat_local_port_as_remote,
|
||||
secure);
|
||||
cluster_auth_info.cluster_secure_connection,
|
||||
/*priority=*/1,
|
||||
database_name,
|
||||
cluster_auth_info.cluster_secret);
|
||||
}
|
||||
|
||||
|
||||
void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref)
|
||||
{
|
||||
const auto & config_prefix = fmt::format("named_collections.{}", collection_name);
|
||||
|
||||
if (!config_ref.has(config_prefix))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name);
|
||||
|
||||
cluster_auth_info.cluster_username = config_ref.getString(config_prefix + ".cluster_username", "");
|
||||
cluster_auth_info.cluster_password = config_ref.getString(config_prefix + ".cluster_password", "");
|
||||
cluster_auth_info.cluster_secret = config_ref.getString(config_prefix + ".cluster_secret", "");
|
||||
cluster_auth_info.cluster_secure_connection = config_ref.getBool(config_prefix + ".cluster_secure_connection", false);
|
||||
}
|
||||
|
||||
void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(bool force_attach)
|
||||
|
@ -75,6 +75,16 @@ private:
|
||||
bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
|
||||
void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
|
||||
|
||||
struct
|
||||
{
|
||||
String cluster_username{"default"};
|
||||
String cluster_password;
|
||||
String cluster_secret;
|
||||
bool cluster_secure_connection{false};
|
||||
} cluster_auth_info;
|
||||
|
||||
void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config);
|
||||
|
||||
void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const;
|
||||
|
||||
void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr);
|
||||
|
@ -11,9 +11,8 @@ class ASTStorage;
|
||||
M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \
|
||||
M(UInt64, max_replication_lag_to_enqueue, 10, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \
|
||||
M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \
|
||||
M(String, cluster_username, "default", "Username to use when connecting to hosts of cluster", 0) \
|
||||
M(String, cluster_password, "", "Password to use when connecting to hosts of cluster", 0) \
|
||||
M(Bool, cluster_secure_connection, false, "Enable TLS when connecting to hosts of cluster", 0) \
|
||||
M(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \
|
||||
|
||||
|
||||
DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS)
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <Common/getRandomASCIIString.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/threadPoolCallbackRunner.h>
|
||||
#include <IO/ReadBufferFromS3.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
@ -264,32 +265,6 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
|
||||
LOG_TRACE(log, "{} to file by path: {}. S3 path: {}",
|
||||
mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name);
|
||||
|
||||
ScheduleFunc schedule = [pool = &getThreadPoolWriter(), thread_group = CurrentThread::getGroup()](auto callback)
|
||||
{
|
||||
pool->scheduleOrThrow([callback = std::move(callback), thread_group]()
|
||||
{
|
||||
if (thread_group)
|
||||
CurrentThread::attachTo(thread_group);
|
||||
|
||||
SCOPE_EXIT_SAFE(
|
||||
if (thread_group)
|
||||
CurrentThread::detachQueryIfNotDetached();
|
||||
|
||||
/// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent.
|
||||
/// Typically, it may be changes from Process to User.
|
||||
/// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed.
|
||||
/// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well.
|
||||
/// When, finally, we destroy the thread (and the ThreadStatus),
|
||||
/// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\
|
||||
/// and by this time user-level memory tracker may be already destroyed.
|
||||
///
|
||||
/// As a work-around, reset memory tracker to total, which is always alive.
|
||||
CurrentThread::get().memory_tracker.setParent(&total_memory_tracker);
|
||||
);
|
||||
callback();
|
||||
});
|
||||
};
|
||||
|
||||
auto s3_buffer = std::make_unique<WriteBufferFromS3>(
|
||||
settings->client,
|
||||
bucket,
|
||||
@ -299,7 +274,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
|
||||
settings->s3_upload_part_size_multiply_parts_count_threshold,
|
||||
settings->s3_max_single_part_upload_size,
|
||||
std::move(object_metadata),
|
||||
buf_size, std::move(schedule));
|
||||
buf_size, threadPoolCallbackRunner(getThreadPoolWriter()));
|
||||
|
||||
auto create_metadata_callback = [this, path, blob_name, mode] (size_t count)
|
||||
{
|
||||
|
@ -90,10 +90,10 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
|
||||
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
|
||||
format_settings.null_as_default = settings.input_format_null_as_default;
|
||||
format_settings.use_lowercase_column_name = settings.input_format_use_lowercase_column_name;
|
||||
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
|
||||
format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
|
||||
format_settings.parquet.import_nested = settings.input_format_parquet_import_nested;
|
||||
format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
|
||||
format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
|
||||
format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference;
|
||||
format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
|
||||
@ -131,6 +131,13 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
|
||||
format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
|
||||
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference;
|
||||
format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference;
|
||||
format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching;
|
||||
format_settings.orc.import_nested = settings.input_format_orc_import_nested;
|
||||
format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
|
||||
format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
|
||||
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference;
|
||||
format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching;
|
||||
format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
|
||||
format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
|
||||
format_settings.seekable_read = settings.input_format_allow_seeks;
|
||||
|
@ -32,7 +32,6 @@ struct FormatSettings
|
||||
bool null_as_default = true;
|
||||
bool decimal_trailing_zeros = false;
|
||||
bool defaults_for_omitted_fields = true;
|
||||
bool use_lowercase_column_name = false;
|
||||
|
||||
bool seekable_read = true;
|
||||
UInt64 max_rows_to_read_for_schema_inference = 100;
|
||||
@ -78,6 +77,7 @@ struct FormatSettings
|
||||
bool import_nested = false;
|
||||
bool allow_missing_columns = false;
|
||||
bool skip_columns_with_unsupported_types_in_schema_inference = false;
|
||||
bool case_insensitive_column_matching = false;
|
||||
} arrow;
|
||||
|
||||
struct
|
||||
@ -142,6 +142,7 @@ struct FormatSettings
|
||||
bool import_nested = false;
|
||||
bool allow_missing_columns = false;
|
||||
bool skip_columns_with_unsupported_types_in_schema_inference = false;
|
||||
bool case_insensitive_column_matching = false;
|
||||
} parquet;
|
||||
|
||||
struct Pretty
|
||||
@ -224,6 +225,7 @@ struct FormatSettings
|
||||
bool allow_missing_columns = false;
|
||||
int64_t row_batch_size = 100'000;
|
||||
bool skip_columns_with_unsupported_types_in_schema_inference = false;
|
||||
bool case_insensitive_column_matching = false;
|
||||
} orc;
|
||||
|
||||
/// For capnProto format we should determine how to
|
||||
|
@ -41,6 +41,11 @@ namespace ErrorCodes
|
||||
throw Exception("Illegal type Date of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
static inline UInt32 dateTimeIsNotSupported(const char * name)
|
||||
{
|
||||
throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
/// This factor transformation will say that the function is monotone everywhere.
|
||||
struct ZeroTransform
|
||||
{
|
||||
@ -311,6 +316,133 @@ struct ToStartOfSecondImpl
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
struct ToStartOfMillisecondImpl
|
||||
{
|
||||
static constexpr auto name = "toStartOfMillisecond";
|
||||
|
||||
static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &)
|
||||
{
|
||||
// given that scale is 6, scale_multiplier is 1000000
|
||||
// for DateTime64 value of 123.456789:
|
||||
// 123456789 - 789 = 123456000
|
||||
// for DateTime64 value of -123.456789:
|
||||
// -123456789 - (1000 + (-789)) = -123457000
|
||||
|
||||
if (scale_multiplier == 1000)
|
||||
{
|
||||
return datetime64;
|
||||
}
|
||||
else if (scale_multiplier <= 1000)
|
||||
{
|
||||
return datetime64 * (1000 / scale_multiplier);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto droppable_part_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier<DateTime64, true>(datetime64, scale_multiplier / 1000);
|
||||
|
||||
if (droppable_part_with_sign < 0)
|
||||
droppable_part_with_sign += scale_multiplier;
|
||||
|
||||
return datetime64 - droppable_part_with_sign;
|
||||
}
|
||||
}
|
||||
|
||||
static inline UInt32 execute(UInt32, const DateLUTImpl &)
|
||||
{
|
||||
throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
static inline UInt32 execute(Int32, const DateLUTImpl &)
|
||||
{
|
||||
return dateIsNotSupported(name);
|
||||
}
|
||||
static inline UInt32 execute(UInt16, const DateLUTImpl &)
|
||||
{
|
||||
return dateIsNotSupported(name);
|
||||
}
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
struct ToStartOfMicrosecondImpl
|
||||
{
|
||||
static constexpr auto name = "toStartOfMicrosecond";
|
||||
|
||||
static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &)
|
||||
{
|
||||
// @see ToStartOfMillisecondImpl
|
||||
|
||||
if (scale_multiplier == 1000000)
|
||||
{
|
||||
return datetime64;
|
||||
}
|
||||
else if (scale_multiplier <= 1000000)
|
||||
{
|
||||
return datetime64 * (1000000 / scale_multiplier);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto droppable_part_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier<DateTime64, true>(datetime64, scale_multiplier / 1000000);
|
||||
|
||||
if (droppable_part_with_sign < 0)
|
||||
droppable_part_with_sign += scale_multiplier;
|
||||
|
||||
return datetime64 - droppable_part_with_sign;
|
||||
}
|
||||
}
|
||||
|
||||
static inline UInt32 execute(UInt32, const DateLUTImpl &)
|
||||
{
|
||||
throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
static inline UInt32 execute(Int32, const DateLUTImpl &)
|
||||
{
|
||||
return dateIsNotSupported(name);
|
||||
}
|
||||
static inline UInt32 execute(UInt16, const DateLUTImpl &)
|
||||
{
|
||||
return dateIsNotSupported(name);
|
||||
}
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
struct ToStartOfNanosecondImpl
|
||||
{
|
||||
static constexpr auto name = "toStartOfNanosecond";
|
||||
|
||||
static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &)
|
||||
{
|
||||
// @see ToStartOfMillisecondImpl
|
||||
if (scale_multiplier == 1000000000)
|
||||
{
|
||||
return datetime64;
|
||||
}
|
||||
else if (scale_multiplier <= 1000000000)
|
||||
{
|
||||
return datetime64 * (1000000000 / scale_multiplier);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception("Illegal type of argument for function " + std::string(name) + ", DateTime64 expected", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
}
|
||||
|
||||
static inline UInt32 execute(UInt32, const DateLUTImpl &)
|
||||
{
|
||||
throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
static inline UInt32 execute(Int32, const DateLUTImpl &)
|
||||
{
|
||||
return dateIsNotSupported(name);
|
||||
}
|
||||
static inline UInt32 execute(UInt16, const DateLUTImpl &)
|
||||
{
|
||||
return dateIsNotSupported(name);
|
||||
}
|
||||
|
||||
using FactorTransform = ZeroTransform;
|
||||
};
|
||||
|
||||
struct ToStartOfFiveMinuteImpl
|
||||
{
|
||||
static constexpr auto name = "toStartOfFiveMinute";
|
||||
|
@ -40,26 +40,158 @@ namespace ErrorCodes
|
||||
/// - 'AddSecondsImpl::execute(UInt32, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(DateTime, ...) -> DateTime'
|
||||
/// - 'AddSecondsImpl::execute(UInt16, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(Date, ...) -> DateTime'
|
||||
|
||||
struct AddNanosecondsImpl
|
||||
{
|
||||
static constexpr auto name = "addNanoseconds";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9 - scale);
|
||||
auto division = std::div(t.fractional * multiplier + delta, static_cast<Int64>(1000000000));
|
||||
return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta};
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9 - scale);
|
||||
return t * multiplier + delta;
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9);
|
||||
return t * multiplier + delta;
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
throw Exception("addNanoSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
throw Exception("addNanoSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
};
|
||||
|
||||
struct AddMicrosecondsImpl
|
||||
{
|
||||
static constexpr auto name = "addMicroseconds";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(6 - scale));
|
||||
if (scale <= 6)
|
||||
{
|
||||
auto division = std::div((t.fractional + delta), static_cast<Int64>(10e6));
|
||||
return {t.whole * multiplier + division.quot, division.rem};
|
||||
}
|
||||
else
|
||||
{
|
||||
auto division = std::div((t.fractional + delta * multiplier), static_cast<Int64>(10e6 * multiplier));
|
||||
return {t.whole + division.quot, division.rem};
|
||||
}
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(6 - scale));
|
||||
return scale <= 6 ? t * multiplier + delta : t + delta * multiplier;
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(6);
|
||||
return t * multiplier + delta;
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
throw Exception("addMicroSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
throw Exception("addMicroSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
};
|
||||
|
||||
struct AddMillisecondsImpl
|
||||
{
|
||||
static constexpr auto name = "addMilliseconds";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(3 - scale));
|
||||
if (scale <= 3)
|
||||
{
|
||||
auto division = std::div((t.fractional + delta), static_cast<Int64>(1000));
|
||||
return {t.whole * multiplier + division.quot, division.rem};
|
||||
}
|
||||
else
|
||||
{
|
||||
auto division = std::div((t.fractional + delta * multiplier), static_cast<Int64>(1000 * multiplier));
|
||||
return {t.whole + division.quot,division.rem};
|
||||
}
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(3 - scale));
|
||||
return scale <= 3 ? t * multiplier + delta : t + delta * multiplier;
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(3);
|
||||
return t * multiplier + delta;
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
throw Exception("addMilliSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
throw Exception("addMilliSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
};
|
||||
|
||||
struct AddSecondsImpl
|
||||
{
|
||||
static constexpr auto name = "addSeconds";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
return {t.whole + delta, t.fractional};
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
|
||||
{
|
||||
return t + delta * DecimalUtils::scaleMultiplier<DateTime64>(scale);
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
return t + delta;
|
||||
}
|
||||
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
// use default datetime64 scale
|
||||
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000;
|
||||
}
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return time_zone.fromDayNum(DayNum(d)) + delta;
|
||||
}
|
||||
@ -70,21 +202,29 @@ struct AddMinutesImpl
|
||||
static constexpr auto name = "addMinutes";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
return {t.whole + delta * 60, t.fractional};
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
|
||||
{
|
||||
return t + 60 * delta * DecimalUtils::scaleMultiplier<DateTime64>(scale);
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
return t + delta * 60;
|
||||
}
|
||||
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
// use default datetime64 scale
|
||||
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000;
|
||||
}
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return time_zone.fromDayNum(DayNum(d)) + delta * 60;
|
||||
}
|
||||
@ -95,20 +235,29 @@ struct AddHoursImpl
|
||||
static constexpr auto name = "addHours";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
return {t.whole + delta * 3600, t.fractional};
|
||||
}
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
|
||||
{
|
||||
return t + 3600 * delta * DecimalUtils::scaleMultiplier<DateTime64>(scale);
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
return t + delta * 3600;
|
||||
}
|
||||
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
// use default datetime64 scale
|
||||
return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000;
|
||||
}
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return time_zone.fromDayNum(DayNum(d)) + delta * 3600;
|
||||
}
|
||||
@ -119,22 +268,30 @@ struct AddDaysImpl
|
||||
static constexpr auto name = "addDays";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return {time_zone.addDays(t.whole, delta), t.fractional};
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
|
||||
{
|
||||
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
|
||||
auto d = std::div(t, multiplier);
|
||||
return time_zone.addDays(d.quot, delta) * multiplier + d.rem;
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return time_zone.addDays(t, delta);
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
|
||||
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
return d + delta;
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &)
|
||||
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
return d + delta;
|
||||
}
|
||||
@ -145,22 +302,30 @@ struct AddWeeksImpl
|
||||
static constexpr auto name = "addWeeks";
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone)
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return {time_zone.addWeeks(t.whole, delta), t.fractional};
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone)
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
|
||||
{
|
||||
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
|
||||
auto d = std::div(t, multiplier);
|
||||
return time_zone.addDays(d.quot, delta * 7) * multiplier + d.rem;
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return time_zone.addWeeks(t, delta);
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &)
|
||||
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
return d + delta * 7;
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &)
|
||||
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &, UInt16 = 0)
|
||||
{
|
||||
return d + delta * 7;
|
||||
}
|
||||
@ -170,23 +335,31 @@ struct AddMonthsImpl
|
||||
{
|
||||
static constexpr auto name = "addMonths";
|
||||
|
||||
static inline DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return {time_zone.addMonths(t.whole, delta), t.fractional};
|
||||
}
|
||||
|
||||
static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
|
||||
{
|
||||
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
|
||||
auto d = std::div(t, multiplier);
|
||||
return time_zone.addMonths(d.quot, delta) * multiplier + d.rem;
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return time_zone.addMonths(t, delta);
|
||||
}
|
||||
|
||||
static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return time_zone.addMonths(DayNum(d), delta);
|
||||
}
|
||||
|
||||
static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return time_zone.addMonths(ExtendedDayNum(d), delta);
|
||||
}
|
||||
@ -197,22 +370,30 @@ struct AddQuartersImpl
|
||||
static constexpr auto name = "addQuarters";
|
||||
|
||||
static inline DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone)
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return {time_zone.addQuarters(t.whole, delta), t.fractional};
|
||||
}
|
||||
|
||||
static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone)
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
|
||||
{
|
||||
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
|
||||
auto d = std::div(t, multiplier);
|
||||
return time_zone.addQuarters(d.quot, delta) * multiplier + d.rem;
|
||||
}
|
||||
|
||||
static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return time_zone.addQuarters(t, delta);
|
||||
}
|
||||
|
||||
static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone)
|
||||
static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return time_zone.addQuarters(DayNum(d), delta);
|
||||
}
|
||||
|
||||
static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone)
|
||||
static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return time_zone.addQuarters(ExtendedDayNum(d), delta);
|
||||
}
|
||||
@ -222,23 +403,31 @@ struct AddYearsImpl
|
||||
{
|
||||
static constexpr auto name = "addYears";
|
||||
|
||||
static inline DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
|
||||
static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
|
||||
execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return {time_zone.addYears(t.whole, delta), t.fractional};
|
||||
}
|
||||
|
||||
static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
|
||||
static inline NO_SANITIZE_UNDEFINED DateTime64
|
||||
execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
|
||||
{
|
||||
auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
|
||||
auto d = std::div(t, multiplier);
|
||||
return time_zone.addYears(d.quot, delta) * multiplier + d.rem;
|
||||
}
|
||||
|
||||
static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return time_zone.addYears(t, delta);
|
||||
}
|
||||
|
||||
static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return time_zone.addYears(DayNum(d), delta);
|
||||
}
|
||||
|
||||
static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
|
||||
static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
|
||||
{
|
||||
return time_zone.addYears(ExtendedDayNum(d), delta);
|
||||
}
|
||||
@ -250,13 +439,16 @@ struct SubtractIntervalImpl : public Transform
|
||||
using Transform::Transform;
|
||||
|
||||
template <typename T>
|
||||
inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone) const
|
||||
inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const
|
||||
{
|
||||
/// Signed integer overflow is Ok.
|
||||
return Transform::execute(t, -delta, time_zone);
|
||||
return Transform::execute(t, -delta, time_zone, scale);
|
||||
}
|
||||
};
|
||||
|
||||
struct SubtractNanosecondsImpl : SubtractIntervalImpl<AddNanosecondsImpl> { static constexpr auto name = "subtractNanoseconds"; };
|
||||
struct SubtractMicrosecondsImpl : SubtractIntervalImpl<AddMicrosecondsImpl> { static constexpr auto name = "subtractMicroseconds"; };
|
||||
struct SubtractMillisecondsImpl : SubtractIntervalImpl<AddMillisecondsImpl> { static constexpr auto name = "subtractMilliseconds"; };
|
||||
struct SubtractSecondsImpl : SubtractIntervalImpl<AddSecondsImpl> { static constexpr auto name = "subtractSeconds"; };
|
||||
struct SubtractMinutesImpl : SubtractIntervalImpl<AddMinutesImpl> { static constexpr auto name = "subtractMinutes"; };
|
||||
struct SubtractHoursImpl : SubtractIntervalImpl<AddHoursImpl> { static constexpr auto name = "subtractHours"; };
|
||||
@ -277,17 +469,17 @@ struct Adder
|
||||
{}
|
||||
|
||||
template <typename FromVectorType, typename ToVectorType>
|
||||
void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone) const
|
||||
void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const
|
||||
{
|
||||
size_t size = vec_from.size();
|
||||
vec_to.resize(size);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone);
|
||||
vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone, scale);
|
||||
}
|
||||
|
||||
template <typename FromVectorType, typename ToVectorType>
|
||||
void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone) const
|
||||
void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const
|
||||
{
|
||||
size_t size = vec_from.size();
|
||||
vec_to.resize(size);
|
||||
@ -296,11 +488,11 @@ struct Adder
|
||||
ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64,
|
||||
ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64,
|
||||
ColumnFloat32, ColumnFloat64>(
|
||||
&delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, size); return true; });
|
||||
&delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, scale, size); return true; });
|
||||
}
|
||||
|
||||
template <typename FromType, typename ToVectorType>
|
||||
void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone) const
|
||||
void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const
|
||||
{
|
||||
size_t size = delta.size();
|
||||
vec_to.resize(size);
|
||||
@ -309,7 +501,7 @@ struct Adder
|
||||
ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64,
|
||||
ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64,
|
||||
ColumnFloat32, ColumnFloat64>(
|
||||
&delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, size); return true; });
|
||||
&delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, scale, size); return true; });
|
||||
}
|
||||
|
||||
private:
|
||||
@ -325,18 +517,18 @@ private:
|
||||
|
||||
template <typename FromVectorType, typename ToVectorType, typename DeltaColumnType>
|
||||
NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector(
|
||||
const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const
|
||||
const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone);
|
||||
vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone, scale);
|
||||
}
|
||||
|
||||
template <typename FromType, typename ToVectorType, typename DeltaColumnType>
|
||||
NO_INLINE NO_SANITIZE_UNDEFINED void constantVector(
|
||||
const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const
|
||||
const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const
|
||||
{
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone);
|
||||
vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, scale);
|
||||
}
|
||||
};
|
||||
|
||||
@ -344,7 +536,7 @@ private:
|
||||
template <typename FromDataType, typename ToDataType, typename Transform>
|
||||
struct DateTimeAddIntervalImpl
|
||||
{
|
||||
static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type)
|
||||
static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale = 0)
|
||||
{
|
||||
using FromValueType = typename FromDataType::FieldType;
|
||||
using FromColumnType = typename FromDataType::ColumnType;
|
||||
@ -363,16 +555,15 @@ struct DateTimeAddIntervalImpl
|
||||
if (const auto * sources = checkAndGetColumn<FromColumnType>(source_col.get()))
|
||||
{
|
||||
if (const auto * delta_const_column = typeid_cast<const ColumnConst *>(&delta_column))
|
||||
op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone);
|
||||
op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone, scale);
|
||||
else
|
||||
op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone);
|
||||
op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone, scale);
|
||||
}
|
||||
else if (const auto * sources_const = checkAndGetColumnConst<FromColumnType>(source_col.get()))
|
||||
{
|
||||
op.constantVector(
|
||||
sources_const->template getValue<FromValueType>(),
|
||||
col_to->getData(),
|
||||
delta_column, time_zone);
|
||||
col_to->getData(), delta_column, time_zone, scale);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -463,18 +654,10 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// TransformDateTime64 helps choosing correct overload of exec and does some transformations
|
||||
// on input and output parameters to simplify support of DateTime64 in concrete Transform.
|
||||
template <typename FieldType>
|
||||
using TransformType = std::conditional_t<
|
||||
std::is_same_v<FieldType, DateTime64>,
|
||||
TransformDateTime64<Transform>,
|
||||
Transform>;
|
||||
|
||||
/// Helper templates to deduce return type based on argument type, since some overloads may promote or denote types,
|
||||
/// e.g. addSeconds(Date, 1) => DateTime
|
||||
template <typename FieldType>
|
||||
using TransformExecuteReturnType = decltype(std::declval<TransformType<FieldType>>().execute(FieldType(), 0, std::declval<DateLUTImpl>()));
|
||||
using TransformExecuteReturnType = decltype(std::declval<Transform>().execute(FieldType(), 0, std::declval<DateLUTImpl>(), 0));
|
||||
|
||||
// Deduces RETURN DataType from INPUT DataType, based on return type of Transform{}.execute(INPUT_TYPE, UInt64, DateLUTImpl).
|
||||
// e.g. for Transform-type that has execute()-overload with 'UInt16' input and 'UInt32' return,
|
||||
@ -500,11 +683,33 @@ public:
|
||||
if (typeid_cast<const DataTypeDateTime64 *>(arguments[0].type.get()))
|
||||
{
|
||||
const auto & datetime64_type = assert_cast<const DataTypeDateTime64 &>(*arguments[0].type);
|
||||
return std::make_shared<DataTypeDateTime64>(datetime64_type.getScale(), extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
|
||||
|
||||
auto from_scale = datetime64_type.getScale();
|
||||
auto scale = from_scale;
|
||||
|
||||
if (std::is_same_v<Transform, AddNanosecondsImpl>)
|
||||
scale = 9;
|
||||
else if (std::is_same_v<Transform, AddMicrosecondsImpl>)
|
||||
scale = 6;
|
||||
else if (std::is_same_v<Transform, AddMillisecondsImpl>)
|
||||
scale = 3;
|
||||
|
||||
scale = std::max(scale, from_scale);
|
||||
|
||||
return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
|
||||
}
|
||||
else
|
||||
{
|
||||
return std::make_shared<DataTypeDateTime64>(DataTypeDateTime64::default_scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
|
||||
auto scale = DataTypeDateTime64::default_scale;
|
||||
|
||||
if (std::is_same_v<Transform, AddNanosecondsImpl>)
|
||||
scale = 9;
|
||||
else if (std::is_same_v<Transform, AddMicrosecondsImpl>)
|
||||
scale = 6;
|
||||
else if (std::is_same_v<Transform, AddMillisecondsImpl>)
|
||||
scale = 3;
|
||||
|
||||
return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -541,9 +746,9 @@ public:
|
||||
}
|
||||
else if (const auto * datetime64_type = assert_cast<const DataTypeDateTime64 *>(from_type))
|
||||
{
|
||||
using WrappedTransformType = TransformType<typename DataTypeDateTime64::FieldType>;
|
||||
return DateTimeAddIntervalImpl<DataTypeDateTime64, TransformResultDataType<DataTypeDateTime64>, WrappedTransformType>::execute(
|
||||
WrappedTransformType{datetime64_type->getScale()}, arguments, result_type);
|
||||
auto from_scale = datetime64_type->getScale();
|
||||
return DateTimeAddIntervalImpl<DataTypeDateTime64, TransformResultDataType<DataTypeDateTime64>, Transform>::execute(
|
||||
Transform{}, arguments, result_type, from_scale);
|
||||
}
|
||||
else
|
||||
throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(),
|
||||
|
@ -88,6 +88,20 @@ public:
|
||||
Int64 scale = DataTypeDateTime64::default_scale;
|
||||
if (const auto * dt64 = checkAndGetDataType<DataTypeDateTime64>(arguments[0].type.get()))
|
||||
scale = dt64->getScale();
|
||||
auto source_scale = scale;
|
||||
|
||||
if constexpr (std::is_same_v<ToStartOfMillisecondImpl, Transform>)
|
||||
{
|
||||
scale = std::max(source_scale, static_cast<Int64>(3));
|
||||
}
|
||||
else if constexpr (std::is_same_v<ToStartOfMicrosecondImpl, Transform>)
|
||||
{
|
||||
scale = std::max(source_scale, static_cast<Int64>(6));
|
||||
}
|
||||
else if constexpr (std::is_same_v<ToStartOfNanosecondImpl, Transform>)
|
||||
{
|
||||
scale = std::max(source_scale, static_cast<Int64>(9));
|
||||
}
|
||||
|
||||
return std::make_shared<ToDataType>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 1, 0));
|
||||
}
|
||||
|
@ -112,6 +112,9 @@ void registerFunctionsConversion(FunctionFactory & factory)
|
||||
factory.registerFunction<FunctionParseDateTime64BestEffortOrZero>();
|
||||
factory.registerFunction<FunctionParseDateTime64BestEffortOrNull>();
|
||||
|
||||
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalNanosecond, PositiveMonotonicity>>();
|
||||
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalMicrosecond, PositiveMonotonicity>>();
|
||||
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalMillisecond, PositiveMonotonicity>>();
|
||||
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalSecond, PositiveMonotonicity>>();
|
||||
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalMinute, PositiveMonotonicity>>();
|
||||
factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalHour, PositiveMonotonicity>>();
|
||||
|
@ -1487,6 +1487,9 @@ struct NameToDecimal256 { static constexpr auto name = "toDecimal256"; };
|
||||
static constexpr auto kind = IntervalKind::INTERVAL_KIND; \
|
||||
};
|
||||
|
||||
DEFINE_NAME_TO_INTERVAL(Nanosecond)
|
||||
DEFINE_NAME_TO_INTERVAL(Microsecond)
|
||||
DEFINE_NAME_TO_INTERVAL(Millisecond)
|
||||
DEFINE_NAME_TO_INTERVAL(Second)
|
||||
DEFINE_NAME_TO_INTERVAL(Minute)
|
||||
DEFINE_NAME_TO_INTERVAL(Hour)
|
||||
@ -2703,13 +2706,10 @@ private:
|
||||
return createWrapper<ToDataType>(from_type, to_type, requested_result_is_nullable);
|
||||
}
|
||||
|
||||
WrapperType createUInt8ToUInt8Wrapper(const DataTypePtr from_type, const DataTypePtr to_type) const
|
||||
WrapperType createUInt8ToBoolWrapper(const DataTypePtr from_type, const DataTypePtr to_type) const
|
||||
{
|
||||
return [from_type, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) -> ColumnPtr
|
||||
{
|
||||
if (isBool(from_type) || !isBool(to_type))
|
||||
return arguments.front().column;
|
||||
|
||||
/// Special case when we convert UInt8 column to Bool column.
|
||||
/// both columns have type UInt8, but we shouldn't use identity wrapper,
|
||||
/// because Bool column can contain only 0 and 1.
|
||||
@ -3506,15 +3506,19 @@ private:
|
||||
/// 'requested_result_is_nullable' is true if CAST to Nullable type is requested.
|
||||
WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const
|
||||
{
|
||||
bool convert_to_ipv6 = to_type->getCustomName() && to_type->getCustomName()->getName() == "IPv6";
|
||||
if (isUInt8(from_type) && isBool(to_type))
|
||||
return createUInt8ToBoolWrapper(from_type, to_type);
|
||||
|
||||
if (from_type->equals(*to_type) && !convert_to_ipv6)
|
||||
{
|
||||
if (isUInt8(from_type))
|
||||
return createUInt8ToUInt8Wrapper(from_type, to_type);
|
||||
/// We can cast IPv6 into IPv6, IPv4 into IPv4, but we should not allow to cast FixedString(16) into IPv6 as part of identity cast
|
||||
bool safe_convert_custom_types = true;
|
||||
|
||||
if (const auto * to_type_custom_name = to_type->getCustomName())
|
||||
safe_convert_custom_types = from_type->getCustomName() && from_type->getCustomName()->getName() == to_type_custom_name->getName();
|
||||
else if (const auto * from_type_custom_name = from_type->getCustomName())
|
||||
safe_convert_custom_types = to_type->getCustomName() && from_type_custom_name->getName() == to_type->getCustomName()->getName();
|
||||
|
||||
if (from_type->equals(*to_type) && safe_convert_custom_types)
|
||||
return createIdentityWrapper(from_type);
|
||||
}
|
||||
else if (WhichDataType(from_type).isNothing())
|
||||
return createNothingWrapper(to_type.get());
|
||||
|
||||
|
@ -20,6 +20,7 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int SYNTAX_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -167,6 +168,13 @@ struct TimeWindowImpl<TUMBLE>
|
||||
|
||||
switch (std::get<0>(interval))
|
||||
{
|
||||
//TODO: add proper support for fractional seconds
|
||||
// case IntervalKind::Nanosecond:
|
||||
// return executeTumble<UInt32, IntervalKind::Nanosecond>(*time_column_vec, std::get<1>(interval), time_zone);
|
||||
// case IntervalKind::Microsecond:
|
||||
// return executeTumble<UInt32, IntervalKind::Microsecond>(*time_column_vec, std::get<1>(interval), time_zone);
|
||||
// case IntervalKind::Millisecond:
|
||||
// return executeTumble<UInt32, IntervalKind::Millisecond>(*time_column_vec, std::get<1>(interval), time_zone);
|
||||
case IntervalKind::Second:
|
||||
return executeTumble<UInt32, IntervalKind::Second>(*time_column_vec, std::get<1>(interval), time_zone);
|
||||
case IntervalKind::Minute:
|
||||
@ -183,6 +191,8 @@ struct TimeWindowImpl<TUMBLE>
|
||||
return executeTumble<UInt16, IntervalKind::Quarter>(*time_column_vec, std::get<1>(interval), time_zone);
|
||||
case IntervalKind::Year:
|
||||
return executeTumble<UInt16, IntervalKind::Year>(*time_column_vec, std::get<1>(interval), time_zone);
|
||||
default:
|
||||
throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
@ -350,6 +360,16 @@ struct TimeWindowImpl<HOP>
|
||||
|
||||
switch (std::get<0>(window_interval))
|
||||
{
|
||||
//TODO: add proper support for fractional seconds
|
||||
// case IntervalKind::Nanosecond:
|
||||
// return executeHop<UInt32, IntervalKind::Nanosecond>(
|
||||
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
|
||||
// case IntervalKind::Microsecond:
|
||||
// return executeHop<UInt32, IntervalKind::Microsecond>(
|
||||
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
|
||||
// case IntervalKind::Millisecond:
|
||||
// return executeHop<UInt32, IntervalKind::Millisecond>(
|
||||
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
|
||||
case IntervalKind::Second:
|
||||
return executeHop<UInt32, IntervalKind::Second>(
|
||||
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
|
||||
@ -374,6 +394,8 @@ struct TimeWindowImpl<HOP>
|
||||
case IntervalKind::Year:
|
||||
return executeHop<UInt16, IntervalKind::Year>(
|
||||
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
|
||||
default:
|
||||
throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
@ -487,6 +509,16 @@ struct TimeWindowImpl<WINDOW_ID>
|
||||
|
||||
switch (std::get<0>(window_interval))
|
||||
{
|
||||
//TODO: add proper support for fractional seconds
|
||||
// case IntervalKind::Nanosecond:
|
||||
// return executeHopSlice<UInt32, IntervalKind::Nanosecond>(
|
||||
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
|
||||
// case IntervalKind::Microsecond:
|
||||
// return executeHopSlice<UInt32, IntervalKind::Microsecond>(
|
||||
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
|
||||
// case IntervalKind::Millisecond:
|
||||
// return executeHopSlice<UInt32, IntervalKind::Millisecond>(
|
||||
// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
|
||||
case IntervalKind::Second:
|
||||
return executeHopSlice<UInt32, IntervalKind::Second>(
|
||||
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
|
||||
@ -511,6 +543,8 @@ struct TimeWindowImpl<WINDOW_ID>
|
||||
case IntervalKind::Year:
|
||||
return executeHopSlice<UInt16, IntervalKind::Year>(
|
||||
*time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
|
||||
default:
|
||||
throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
@ -80,7 +80,32 @@ struct ToStartOfTransform;
|
||||
TRANSFORM_TIME(Hour)
|
||||
TRANSFORM_TIME(Minute)
|
||||
TRANSFORM_TIME(Second)
|
||||
#undef TRANSFORM_DATE
|
||||
#undef TRANSFORM_TIME
|
||||
|
||||
#define TRANSFORM_SUBSECONDS(INTERVAL_KIND, DEF_SCALE) \
|
||||
template<> \
|
||||
struct ToStartOfTransform<IntervalKind::INTERVAL_KIND> \
|
||||
{ \
|
||||
static Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \
|
||||
{ \
|
||||
if (scale <= DEF_SCALE) \
|
||||
{ \
|
||||
auto val = t * DecimalUtils::scaleMultiplier<DateTime64>(DEF_SCALE - scale); \
|
||||
if (delta == 1) \
|
||||
return val; \
|
||||
else \
|
||||
return val - (val % delta); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
return t - (t % (delta * DecimalUtils::scaleMultiplier<DateTime64>(scale - DEF_SCALE))) ; \
|
||||
} \
|
||||
} \
|
||||
};
|
||||
TRANSFORM_SUBSECONDS(Millisecond, 3)
|
||||
TRANSFORM_SUBSECONDS(Microsecond, 6)
|
||||
TRANSFORM_SUBSECONDS(Nanosecond, 9)
|
||||
#undef TRANSFORM_SUBSECONDS
|
||||
|
||||
template <IntervalKind::Kind unit>
|
||||
struct AddTime;
|
||||
@ -117,6 +142,25 @@ struct ToStartOfTransform;
|
||||
ADD_TIME(Second, 1)
|
||||
#undef ADD_TIME
|
||||
|
||||
#define ADD_SUBSECONDS(INTERVAL_KIND, DEF_SCALE) \
|
||||
template <> \
|
||||
struct AddTime<IntervalKind::INTERVAL_KIND> \
|
||||
{ \
|
||||
static inline NO_SANITIZE_UNDEFINED Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \
|
||||
{ \
|
||||
if (scale < DEF_SCALE) \
|
||||
{ \
|
||||
return t + delta * DecimalUtils::scaleMultiplier<DateTime64>(DEF_SCALE - scale); \
|
||||
} \
|
||||
else \
|
||||
return t + delta * DecimalUtils::scaleMultiplier<DateTime64>(scale - DEF_SCALE); \
|
||||
} \
|
||||
};
|
||||
ADD_SUBSECONDS(Millisecond, 3)
|
||||
ADD_SUBSECONDS(Microsecond, 6)
|
||||
ADD_SUBSECONDS(Nanosecond, 9)
|
||||
#undef ADD_SUBSECONDS
|
||||
|
||||
template <TimeWindowFunctionName type>
|
||||
struct TimeWindowImpl
|
||||
{
|
||||
|
28
src/Functions/SubtractSubSeconds.cpp
Normal file
28
src/Functions/SubtractSubSeconds.cpp
Normal file
@ -0,0 +1,28 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionDateOrDateTimeAddInterval.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval<SubtractNanosecondsImpl>;
|
||||
void registerFunctionSubtractNanoseconds(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionSubtractNanoseconds>();
|
||||
};
|
||||
|
||||
using FunctionSubtractMicroseconds = FunctionDateOrDateTimeAddInterval<SubtractMicrosecondsImpl>;
|
||||
void registerFunctionSubtractMicroseconds(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionSubtractMicroseconds>();
|
||||
};
|
||||
|
||||
using FunctionSubtractMilliseconds = FunctionDateOrDateTimeAddInterval<SubtractMillisecondsImpl>;
|
||||
void registerFunctionSubtractMilliseconds(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionSubtractMilliseconds>();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@ namespace DB
|
||||
* * DateTime64 value and scale factor (2)
|
||||
* * DateTime64 broken down to components, result of execute is then re-assembled back into DateTime64 value (3)
|
||||
*
|
||||
* Suitable Transfotm-types are commonly used in Date/DateTime manipulation functions,
|
||||
* Suitable Transform-types are commonly used in Date/DateTime manipulation functions,
|
||||
* and should implement static (or const) function with following signatures:
|
||||
* 1:
|
||||
* R execute(Int64 whole_value, ... )
|
||||
|
28
src/Functions/addSubSeconds.cpp
Normal file
28
src/Functions/addSubSeconds.cpp
Normal file
@ -0,0 +1,28 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionDateOrDateTimeAddInterval.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval<AddNanosecondsImpl>;
|
||||
void registerFunctionAddNanoseconds(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionAddNanoseconds>();
|
||||
};
|
||||
|
||||
using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval<AddMicrosecondsImpl>;
|
||||
void registerFunctionAddMicroseconds(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionAddMicroseconds>();
|
||||
};
|
||||
|
||||
using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval<AddMillisecondsImpl>;
|
||||
void registerFunctionAddMilliseconds(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionAddMilliseconds>();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
@ -7,9 +7,9 @@ namespace DB
|
||||
{
|
||||
|
||||
/// An O(1) time and space consistent hash algorithm by Konstantin Oblakov
|
||||
struct YandexConsistentHashImpl
|
||||
struct KostikConsistentHashImpl
|
||||
{
|
||||
static constexpr auto name = "yandexConsistentHash";
|
||||
static constexpr auto name = "kostikConsistentHash";
|
||||
|
||||
using HashType = UInt64;
|
||||
/// Actually it supports UInt64, but it is efficient only if n <= 32768
|
||||
@ -23,12 +23,12 @@ struct YandexConsistentHashImpl
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionYandexConsistentHash = FunctionConsistentHashImpl<YandexConsistentHashImpl>;
|
||||
using FunctionKostikConsistentHash = FunctionConsistentHashImpl<KostikConsistentHashImpl>;
|
||||
|
||||
void registerFunctionYandexConsistentHash(FunctionFactory & factory)
|
||||
void registerFunctionKostikConsistentHash(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionYandexConsistentHash>();
|
||||
factory.registerFunction<FunctionKostikConsistentHash>();
|
||||
factory.registerAlias("yandexConsistentHash", "kostikConsistentHash");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2,12 +2,12 @@ namespace DB
|
||||
{
|
||||
class FunctionFactory;
|
||||
|
||||
void registerFunctionYandexConsistentHash(FunctionFactory & factory);
|
||||
void registerFunctionKostikConsistentHash(FunctionFactory & factory);
|
||||
void registerFunctionJumpConsistentHash(FunctionFactory & factory);
|
||||
|
||||
void registerFunctionsConsistentHashing(FunctionFactory & factory)
|
||||
{
|
||||
registerFunctionYandexConsistentHash(factory);
|
||||
registerFunctionKostikConsistentHash(factory);
|
||||
registerFunctionJumpConsistentHash(factory);
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,9 @@ void registerFunctionToDayOfWeek(FunctionFactory &);
|
||||
void registerFunctionToDayOfYear(FunctionFactory &);
|
||||
void registerFunctionToHour(FunctionFactory &);
|
||||
void registerFunctionToMinute(FunctionFactory &);
|
||||
void registerFunctionToStartOfNanosecond(FunctionFactory &);
|
||||
void registerFunctionToStartOfMicrosecond(FunctionFactory &);
|
||||
void registerFunctionToStartOfMillisecond(FunctionFactory &);
|
||||
void registerFunctionToStartOfSecond(FunctionFactory &);
|
||||
void registerFunctionToSecond(FunctionFactory &);
|
||||
void registerFunctionToStartOfDay(FunctionFactory &);
|
||||
@ -47,6 +50,9 @@ void registerFunctionTimeSlots(FunctionFactory &);
|
||||
void registerFunctionToYYYYMM(FunctionFactory &);
|
||||
void registerFunctionToYYYYMMDD(FunctionFactory &);
|
||||
void registerFunctionToYYYYMMDDhhmmss(FunctionFactory &);
|
||||
void registerFunctionAddNanoseconds(FunctionFactory &);
|
||||
void registerFunctionAddMicroseconds(FunctionFactory &);
|
||||
void registerFunctionAddMilliseconds(FunctionFactory &);
|
||||
void registerFunctionAddSeconds(FunctionFactory &);
|
||||
void registerFunctionAddMinutes(FunctionFactory &);
|
||||
void registerFunctionAddHours(FunctionFactory &);
|
||||
@ -55,6 +61,9 @@ void registerFunctionAddWeeks(FunctionFactory &);
|
||||
void registerFunctionAddMonths(FunctionFactory &);
|
||||
void registerFunctionAddQuarters(FunctionFactory &);
|
||||
void registerFunctionAddYears(FunctionFactory &);
|
||||
void registerFunctionSubtractNanoseconds(FunctionFactory &);
|
||||
void registerFunctionSubtractMicroseconds(FunctionFactory &);
|
||||
void registerFunctionSubtractMilliseconds(FunctionFactory &);
|
||||
void registerFunctionSubtractSeconds(FunctionFactory &);
|
||||
void registerFunctionSubtractMinutes(FunctionFactory &);
|
||||
void registerFunctionSubtractHours(FunctionFactory &);
|
||||
@ -93,6 +102,9 @@ void registerFunctionsDateTime(FunctionFactory & factory)
|
||||
registerFunctionToStartOfMonth(factory);
|
||||
registerFunctionToStartOfQuarter(factory);
|
||||
registerFunctionToStartOfYear(factory);
|
||||
registerFunctionToStartOfNanosecond(factory);
|
||||
registerFunctionToStartOfMicrosecond(factory);
|
||||
registerFunctionToStartOfMillisecond(factory);
|
||||
registerFunctionToStartOfSecond(factory);
|
||||
registerFunctionToStartOfMinute(factory);
|
||||
registerFunctionToStartOfFiveMinute(factory);
|
||||
@ -119,6 +131,9 @@ void registerFunctionsDateTime(FunctionFactory & factory)
|
||||
registerFunctionToYYYYMM(factory);
|
||||
registerFunctionToYYYYMMDD(factory);
|
||||
registerFunctionToYYYYMMDDhhmmss(factory);
|
||||
registerFunctionAddNanoseconds(factory);
|
||||
registerFunctionAddMicroseconds(factory);
|
||||
registerFunctionAddMilliseconds(factory);
|
||||
registerFunctionAddSeconds(factory);
|
||||
registerFunctionAddMinutes(factory);
|
||||
registerFunctionAddHours(factory);
|
||||
@ -127,6 +142,9 @@ void registerFunctionsDateTime(FunctionFactory & factory)
|
||||
registerFunctionAddMonths(factory);
|
||||
registerFunctionAddQuarters(factory);
|
||||
registerFunctionAddYears(factory);
|
||||
registerFunctionSubtractNanoseconds(factory);
|
||||
registerFunctionSubtractMicroseconds(factory);
|
||||
registerFunctionSubtractMilliseconds(factory);
|
||||
registerFunctionSubtractSeconds(factory);
|
||||
registerFunctionSubtractMinutes(factory);
|
||||
registerFunctionSubtractHours(factory);
|
||||
|
@ -33,184 +33,273 @@ namespace
|
||||
template <>
|
||||
struct Transform<IntervalKind::Year>
|
||||
{
|
||||
static constexpr auto name = function_name;
|
||||
|
||||
static UInt16 execute(UInt16 d, UInt64 years, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfYearInterval(DayNum(d), years);
|
||||
}
|
||||
|
||||
static UInt16 execute(Int32 d, UInt64 years, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years);
|
||||
}
|
||||
|
||||
static UInt16 execute(UInt32 t, UInt64 years, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years);
|
||||
}
|
||||
|
||||
static UInt16 execute(Int64 t, UInt64 years, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier)
|
||||
{
|
||||
return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years);
|
||||
return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Transform<IntervalKind::Quarter>
|
||||
{
|
||||
static constexpr auto name = function_name;
|
||||
|
||||
static UInt16 execute(UInt16 d, UInt64 quarters, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfQuarterInterval(DayNum(d), quarters);
|
||||
}
|
||||
|
||||
static UInt16 execute(Int32 d, UInt64 quarters, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters);
|
||||
}
|
||||
|
||||
static UInt16 execute(UInt32 t, UInt64 quarters, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters);
|
||||
}
|
||||
|
||||
static UInt16 execute(Int64 t, UInt64 quarters, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier)
|
||||
{
|
||||
return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters);
|
||||
return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Transform<IntervalKind::Month>
|
||||
{
|
||||
static constexpr auto name = function_name;
|
||||
|
||||
static UInt16 execute(UInt16 d, UInt64 months, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfMonthInterval(DayNum(d), months);
|
||||
}
|
||||
|
||||
static UInt16 execute(Int32 d, UInt64 months, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months);
|
||||
}
|
||||
|
||||
static UInt16 execute(UInt32 t, UInt64 months, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months);
|
||||
}
|
||||
|
||||
static UInt16 execute(Int64 t, UInt64 months, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier)
|
||||
{
|
||||
return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months);
|
||||
return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Transform<IntervalKind::Week>
|
||||
{
|
||||
static constexpr auto name = function_name;
|
||||
|
||||
static UInt16 execute(UInt16 d, UInt64 weeks, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfWeekInterval(DayNum(d), weeks);
|
||||
}
|
||||
|
||||
static UInt16 execute(Int32 d, UInt64 weeks, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks);
|
||||
}
|
||||
|
||||
static UInt16 execute(UInt32 t, UInt64 weeks, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks);
|
||||
}
|
||||
|
||||
static UInt16 execute(Int64 t, UInt64 weeks, const DateLUTImpl & time_zone)
|
||||
static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier)
|
||||
{
|
||||
return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks);
|
||||
return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Transform<IntervalKind::Day>
|
||||
{
|
||||
static constexpr auto name = function_name;
|
||||
|
||||
static UInt32 execute(UInt16 d, UInt64 days, const DateLUTImpl & time_zone)
|
||||
static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days);
|
||||
}
|
||||
|
||||
static UInt32 execute(Int32 d, UInt64 days, const DateLUTImpl & time_zone)
|
||||
static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days);
|
||||
}
|
||||
|
||||
static UInt32 execute(UInt32 t, UInt64 days, const DateLUTImpl & time_zone)
|
||||
static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days);
|
||||
}
|
||||
|
||||
static UInt32 execute(Int64 t, UInt64 days, const DateLUTImpl & time_zone)
|
||||
static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier)
|
||||
{
|
||||
return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days);
|
||||
return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Transform<IntervalKind::Hour>
|
||||
{
|
||||
static constexpr auto name = function_name;
|
||||
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
|
||||
|
||||
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
|
||||
static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
|
||||
static UInt32 execute(UInt32 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); }
|
||||
static UInt32 execute(Int64 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); }
|
||||
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
|
||||
|
||||
static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfHourInterval(t, hours);
|
||||
}
|
||||
|
||||
static UInt32 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier)
|
||||
{
|
||||
return time_zone.toStartOfHourInterval(t / scale_multiplier, hours);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Transform<IntervalKind::Minute>
|
||||
{
|
||||
static constexpr auto name = function_name;
|
||||
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
|
||||
|
||||
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
|
||||
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
|
||||
|
||||
static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
|
||||
|
||||
static UInt32 execute(UInt32 t, UInt64 minutes, const DateLUTImpl & time_zone)
|
||||
static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfMinuteInterval(t, minutes);
|
||||
}
|
||||
|
||||
static UInt32 execute(Int64 t, UInt64 minutes, const DateLUTImpl & time_zone)
|
||||
static UInt32 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier)
|
||||
{
|
||||
return time_zone.toStartOfMinuteInterval(t, minutes);
|
||||
return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Transform<IntervalKind::Second>
|
||||
{
|
||||
static constexpr auto name = function_name;
|
||||
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
|
||||
|
||||
static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
|
||||
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
|
||||
|
||||
static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
|
||||
|
||||
static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone)
|
||||
static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64)
|
||||
{
|
||||
return time_zone.toStartOfSecondInterval(t, seconds);
|
||||
}
|
||||
|
||||
static Int64 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone)
|
||||
static UInt32 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier)
|
||||
{
|
||||
return time_zone.toStartOfSecondInterval(t, seconds);
|
||||
return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Transform<IntervalKind::Millisecond>
|
||||
{
|
||||
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
|
||||
|
||||
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
|
||||
|
||||
static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); }
|
||||
|
||||
static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier)
|
||||
{
|
||||
if (scale_multiplier < 1000)
|
||||
{
|
||||
Int64 t_milliseconds = t * (static_cast<Int64>(1000) / scale_multiplier);
|
||||
if (likely(t >= 0))
|
||||
return t_milliseconds / milliseconds * milliseconds;
|
||||
else
|
||||
return ((t_milliseconds + 1) / milliseconds - 1) * milliseconds;
|
||||
}
|
||||
else if (scale_multiplier > 1000)
|
||||
{
|
||||
Int64 scale_diff = scale_multiplier / static_cast<Int64>(1000);
|
||||
if (likely(t >= 0))
|
||||
return t / milliseconds / scale_diff * milliseconds;
|
||||
else
|
||||
return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds;
|
||||
}
|
||||
else
|
||||
if (likely(t >= 0))
|
||||
return t / milliseconds * milliseconds;
|
||||
else
|
||||
return ((t + 1) / milliseconds - 1) * milliseconds;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Transform<IntervalKind::Microsecond>
|
||||
{
|
||||
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
|
||||
|
||||
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
|
||||
|
||||
static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); }
|
||||
|
||||
static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier)
|
||||
{
|
||||
if (scale_multiplier < 1000000)
|
||||
{
|
||||
Int64 t_microseconds = t * (static_cast<Int64>(1000000) / scale_multiplier);
|
||||
if (likely(t >= 0))
|
||||
return t_microseconds / microseconds * microseconds;
|
||||
else
|
||||
return ((t_microseconds + 1) / microseconds - 1) * microseconds;
|
||||
}
|
||||
else if (scale_multiplier > 1000000)
|
||||
{
|
||||
Int64 scale_diff = scale_multiplier / static_cast<Int64>(1000000);
|
||||
if (likely(t >= 0))
|
||||
return t / microseconds / scale_diff * microseconds;
|
||||
else
|
||||
return ((t + 1) / microseconds / scale_diff - 1) * microseconds;
|
||||
}
|
||||
else
|
||||
if (likely(t >= 0))
|
||||
return t / microseconds * microseconds;
|
||||
else
|
||||
return ((t + 1) / microseconds - 1) * microseconds;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Transform<IntervalKind::Nanosecond>
|
||||
{
|
||||
static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
|
||||
|
||||
static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
|
||||
|
||||
static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); }
|
||||
|
||||
static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier)
|
||||
{
|
||||
if (scale_multiplier < 1000000000)
|
||||
{
|
||||
Int64 t_nanoseconds = t * (static_cast<Int64>(1000000000) / scale_multiplier);
|
||||
if (likely(t >= 0))
|
||||
return t_nanoseconds / nanoseconds * nanoseconds;
|
||||
else
|
||||
return ((t_nanoseconds + 1) / nanoseconds - 1) * nanoseconds;
|
||||
}
|
||||
else
|
||||
if (likely(t >= 0))
|
||||
return t / nanoseconds * nanoseconds;
|
||||
else
|
||||
return ((t + 1) / nanoseconds - 1) * nanoseconds;
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionToStartOfInterval : public IFunction
|
||||
{
|
||||
@ -240,6 +329,7 @@ public:
|
||||
|
||||
const DataTypeInterval * interval_type = nullptr;
|
||||
bool result_type_is_date = false;
|
||||
bool result_type_is_datetime = false;
|
||||
auto check_interval_argument = [&]
|
||||
{
|
||||
interval_type = checkAndGetDataType<DataTypeInterval>(arguments[1].type.get());
|
||||
@ -251,6 +341,8 @@ public:
|
||||
result_type_is_date = (interval_type->getKind() == IntervalKind::Year)
|
||||
|| (interval_type->getKind() == IntervalKind::Quarter) || (interval_type->getKind() == IntervalKind::Month)
|
||||
|| (interval_type->getKind() == IntervalKind::Week);
|
||||
result_type_is_datetime = (interval_type->getKind() == IntervalKind::Day) || (interval_type->getKind() == IntervalKind::Hour)
|
||||
|| (interval_type->getKind() == IntervalKind::Minute) || (interval_type->getKind() == IntervalKind::Second);
|
||||
};
|
||||
|
||||
auto check_timezone_argument = [&]
|
||||
@ -263,7 +355,7 @@ public:
|
||||
if (first_argument_is_date && result_type_is_date)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"The timezone argument of function {} with interval type {} is allowed only when the 1st argument "
|
||||
"has the type DateTime",
|
||||
"has the type DateTime or DateTime64",
|
||||
getName(), interval_type->getKind().toString());
|
||||
};
|
||||
|
||||
@ -288,19 +380,33 @@ public:
|
||||
|
||||
if (result_type_is_date)
|
||||
return std::make_shared<DataTypeDate>();
|
||||
else
|
||||
else if (result_type_is_datetime)
|
||||
return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
|
||||
else
|
||||
{
|
||||
auto scale = 0;
|
||||
|
||||
if (interval_type->getKind() == IntervalKind::Nanosecond)
|
||||
scale = 9;
|
||||
else if (interval_type->getKind() == IntervalKind::Microsecond)
|
||||
scale = 6;
|
||||
else if (interval_type->getKind() == IntervalKind::Millisecond)
|
||||
scale = 3;
|
||||
|
||||
return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override
|
||||
{
|
||||
const auto & time_column = arguments[0];
|
||||
const auto & interval_column = arguments[1];
|
||||
const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0);
|
||||
auto result_column = dispatchForColumns(time_column, interval_column, time_zone);
|
||||
auto result_column = dispatchForColumns(time_column, interval_column, result_type, time_zone);
|
||||
return result_column;
|
||||
}
|
||||
|
||||
@ -316,33 +422,36 @@ public:
|
||||
|
||||
private:
|
||||
ColumnPtr dispatchForColumns(
|
||||
const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone) const
|
||||
const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const
|
||||
{
|
||||
const auto & from_datatype = *time_column.type.get();
|
||||
const auto which_type = WhichDataType(from_datatype);
|
||||
|
||||
if (which_type.isDateTime64())
|
||||
{
|
||||
const auto * time_column_vec = checkAndGetColumn<DataTypeDateTime64::ColumnType>(time_column.column.get());
|
||||
auto scale = assert_cast<const DataTypeDateTime64 &>(from_datatype).getScale();
|
||||
|
||||
if (time_column_vec)
|
||||
return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime64&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone, scale);
|
||||
}
|
||||
if (which_type.isDateTime())
|
||||
{
|
||||
const auto * time_column_vec = checkAndGetColumn<ColumnUInt32>(time_column.column.get());
|
||||
if (time_column_vec)
|
||||
return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime&>(from_datatype), *time_column_vec, interval_column, time_zone);
|
||||
return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
|
||||
}
|
||||
if (which_type.isDate())
|
||||
{
|
||||
const auto * time_column_vec = checkAndGetColumn<ColumnUInt16>(time_column.column.get());
|
||||
if (time_column_vec)
|
||||
return dispatchForIntervalColumn(assert_cast<const DataTypeDate&>(from_datatype), *time_column_vec, interval_column, time_zone);
|
||||
return dispatchForIntervalColumn(assert_cast<const DataTypeDate&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
|
||||
}
|
||||
if (which_type.isDate32())
|
||||
{
|
||||
const auto * time_column_vec = checkAndGetColumn<ColumnInt32>(time_column.column.get());
|
||||
if (time_column_vec)
|
||||
return dispatchForIntervalColumn(assert_cast<const DataTypeDate32&>(from_datatype), *time_column_vec, interval_column, time_zone);
|
||||
}
|
||||
if (which_type.isDateTime64())
|
||||
{
|
||||
const auto * time_column_vec = checkAndGetColumn<DataTypeDateTime64::ColumnType>(time_column.column.get());
|
||||
if (time_column_vec)
|
||||
return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime64&>(from_datatype), *time_column_vec, interval_column, time_zone);
|
||||
return dispatchForIntervalColumn(assert_cast<const DataTypeDate32&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
|
||||
}
|
||||
throw Exception(
|
||||
"Illegal column for first argument of function " + getName() + ". Must contain dates or dates with time",
|
||||
@ -351,7 +460,8 @@ private:
|
||||
|
||||
template <typename ColumnType, typename FromDataType>
|
||||
ColumnPtr dispatchForIntervalColumn(
|
||||
const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone) const
|
||||
const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column,
|
||||
const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const
|
||||
{
|
||||
const auto * interval_type = checkAndGetDataType<DataTypeInterval>(interval_column.type.get());
|
||||
if (!interval_type)
|
||||
@ -368,49 +478,52 @@ private:
|
||||
|
||||
switch (interval_type->getKind())
|
||||
{
|
||||
case IntervalKind::Nanosecond:
|
||||
return execute<FromDataType, DataTypeDateTime64, IntervalKind::Nanosecond>(from, time_column, num_units, result_type, time_zone, scale);
|
||||
case IntervalKind::Microsecond:
|
||||
return execute<FromDataType, DataTypeDateTime64, IntervalKind::Microsecond>(from, time_column, num_units, result_type, time_zone, scale);
|
||||
case IntervalKind::Millisecond:
|
||||
return execute<FromDataType, DataTypeDateTime64, IntervalKind::Millisecond>(from, time_column, num_units, result_type, time_zone, scale);
|
||||
case IntervalKind::Second:
|
||||
return execute<FromDataType, UInt32, IntervalKind::Second>(from, time_column, num_units, time_zone);
|
||||
return execute<FromDataType, DataTypeDateTime, IntervalKind::Second>(from, time_column, num_units, result_type, time_zone, scale);
|
||||
case IntervalKind::Minute:
|
||||
return execute<FromDataType, UInt32, IntervalKind::Minute>(from, time_column, num_units, time_zone);
|
||||
return execute<FromDataType, DataTypeDateTime, IntervalKind::Minute>(from, time_column, num_units, result_type, time_zone, scale);
|
||||
case IntervalKind::Hour:
|
||||
return execute<FromDataType, UInt32, IntervalKind::Hour>(from, time_column, num_units, time_zone);
|
||||
return execute<FromDataType, DataTypeDateTime, IntervalKind::Hour>(from, time_column, num_units, result_type, time_zone, scale);
|
||||
case IntervalKind::Day:
|
||||
return execute<FromDataType, UInt32, IntervalKind::Day>(from, time_column, num_units, time_zone);
|
||||
return execute<FromDataType, DataTypeDateTime, IntervalKind::Day>(from, time_column, num_units, result_type, time_zone, scale);
|
||||
case IntervalKind::Week:
|
||||
return execute<FromDataType, UInt16, IntervalKind::Week>(from, time_column, num_units, time_zone);
|
||||
return execute<FromDataType, DataTypeDate, IntervalKind::Week>(from, time_column, num_units, result_type, time_zone, scale);
|
||||
case IntervalKind::Month:
|
||||
return execute<FromDataType, UInt16, IntervalKind::Month>(from, time_column, num_units, time_zone);
|
||||
return execute<FromDataType, DataTypeDate, IntervalKind::Month>(from, time_column, num_units, result_type, time_zone, scale);
|
||||
case IntervalKind::Quarter:
|
||||
return execute<FromDataType, UInt16, IntervalKind::Quarter>(from, time_column, num_units, time_zone);
|
||||
return execute<FromDataType, DataTypeDate, IntervalKind::Quarter>(from, time_column, num_units, result_type, time_zone, scale);
|
||||
case IntervalKind::Year:
|
||||
return execute<FromDataType, UInt16, IntervalKind::Year>(from, time_column, num_units, time_zone);
|
||||
return execute<FromDataType, DataTypeDate, IntervalKind::Year>(from, time_column, num_units, result_type, time_zone, scale);
|
||||
}
|
||||
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
||||
|
||||
template <typename FromDataType, typename ToType, IntervalKind::Kind unit, typename ColumnType>
|
||||
ColumnPtr execute(const FromDataType & from_datatype, const ColumnType & time_column, UInt64 num_units, const DateLUTImpl & time_zone) const
|
||||
template <typename FromDataType, typename ToDataType, IntervalKind::Kind unit, typename ColumnType>
|
||||
ColumnPtr execute(const FromDataType &, const ColumnType & time_column_type, Int64 num_units, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const
|
||||
{
|
||||
const auto & time_data = time_column.getData();
|
||||
size_t size = time_column.size();
|
||||
auto result = ColumnVector<ToType>::create();
|
||||
auto & result_data = result->getData();
|
||||
using ToColumnType = typename ToDataType::ColumnType;
|
||||
|
||||
const auto & time_data = time_column_type.getData();
|
||||
size_t size = time_data.size();
|
||||
|
||||
auto result_col = result_type->createColumn();
|
||||
auto *col_to = assert_cast<ToColumnType *>(result_col.get());
|
||||
auto & result_data = col_to->getData();
|
||||
result_data.resize(size);
|
||||
|
||||
if constexpr (std::is_same_v<FromDataType, DataTypeDateTime64>)
|
||||
{
|
||||
const auto transform = TransformDateTime64<Transform<unit>>{from_datatype.getScale()};
|
||||
Int64 scale_multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
|
||||
|
||||
for (size_t i = 0; i != size; ++i)
|
||||
result_data[i] = transform.execute(time_data[i], num_units, time_zone);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i != size; ++i)
|
||||
result_data[i] = Transform<unit>::execute(time_data[i], num_units, time_zone);
|
||||
}
|
||||
return result;
|
||||
result_data[i] = Transform<unit>::execute(time_data[i], num_units, time_zone, scale_multiplier);
|
||||
|
||||
return result_col;
|
||||
}
|
||||
};
|
||||
|
||||
|
30
src/Functions/toStartOfSubsecond.cpp
Normal file
30
src/Functions/toStartOfSubsecond.cpp
Normal file
@ -0,0 +1,30 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/DateTimeTransforms.h>
|
||||
#include <Functions/FunctionDateOrDateTimeToSomething.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using FunctionToStartOfMillisecond = FunctionDateOrDateTimeToSomething<DataTypeDateTime64, ToStartOfMillisecondImpl>;
|
||||
|
||||
void registerFunctionToStartOfMillisecond(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionToStartOfMillisecond>();
|
||||
}
|
||||
|
||||
using FunctionToStartOfMicrosecond = FunctionDateOrDateTimeToSomething<DataTypeDateTime64, ToStartOfMicrosecondImpl>;
|
||||
|
||||
void registerFunctionToStartOfMicrosecond(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionToStartOfMicrosecond>();
|
||||
}
|
||||
|
||||
using FunctionToStartOfNanosecond = FunctionDateOrDateTimeToSomething<DataTypeDateTime64, ToStartOfNanosecondImpl>;
|
||||
|
||||
void registerFunctionToStartOfNanosecond(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionToStartOfNanosecond>();
|
||||
}
|
||||
|
||||
}
|
@ -372,8 +372,8 @@ SetPtr makeExplicitSet(
|
||||
element_type = low_cardinality_type->getDictionaryType();
|
||||
|
||||
auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types);
|
||||
if (prepared_sets.count(set_key))
|
||||
return prepared_sets.at(set_key); /// Already prepared.
|
||||
if (auto it = prepared_sets.find(set_key); it != prepared_sets.end())
|
||||
return it->second; /// Already prepared.
|
||||
|
||||
Block block;
|
||||
const auto & right_arg_func = std::dynamic_pointer_cast<ASTFunction>(right_arg);
|
||||
@ -388,7 +388,7 @@ SetPtr makeExplicitSet(
|
||||
set->insertFromBlock(block.getColumnsWithTypeAndName());
|
||||
set->finishInsert();
|
||||
|
||||
prepared_sets[set_key] = set;
|
||||
prepared_sets.emplace(set_key, set);
|
||||
return set;
|
||||
}
|
||||
|
||||
@ -707,7 +707,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat
|
||||
if (tid != 0)
|
||||
tuple_ast = tuple_ast->clone();
|
||||
|
||||
auto literal = std::make_shared<ASTLiteral>(UInt64(++tid));
|
||||
auto literal = std::make_shared<ASTLiteral>(UInt64{++tid});
|
||||
visit(*literal, literal, data);
|
||||
|
||||
auto func = makeASTFunction("tupleElement", tuple_ast, literal);
|
||||
@ -814,13 +814,12 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
|
||||
if (!data.only_consts)
|
||||
{
|
||||
/// We are in the part of the tree that we are not going to compute. You just need to define types.
|
||||
/// Do not subquery and create sets. We replace "in*" function to "in*IgnoreSet".
|
||||
/// Do not evaluate subquery and create sets. We replace "in*" function to "in*IgnoreSet".
|
||||
|
||||
auto argument_name = node.arguments->children.at(0)->getColumnName();
|
||||
|
||||
data.addFunction(
|
||||
FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()),
|
||||
{ argument_name, argument_name },
|
||||
{argument_name, argument_name},
|
||||
column_name);
|
||||
}
|
||||
return;
|
||||
@ -1145,8 +1144,8 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
|
||||
if (no_subqueries)
|
||||
return {};
|
||||
auto set_key = PreparedSetKey::forSubquery(*right_in_operand);
|
||||
if (data.prepared_sets.count(set_key))
|
||||
return data.prepared_sets.at(set_key);
|
||||
if (auto it = data.prepared_sets.find(set_key); it != data.prepared_sets.end())
|
||||
return it->second;
|
||||
|
||||
/// A special case is if the name of the table is specified on the right side of the IN statement,
|
||||
/// and the table has the type Set (a previously prepared set).
|
||||
@ -1160,7 +1159,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
|
||||
StorageSet * storage_set = dynamic_cast<StorageSet *>(table.get());
|
||||
if (storage_set)
|
||||
{
|
||||
data.prepared_sets[set_key] = storage_set->getSet();
|
||||
data.prepared_sets.emplace(set_key, storage_set->getSet());
|
||||
return storage_set->getSet();
|
||||
}
|
||||
}
|
||||
@ -1174,7 +1173,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
|
||||
/// If you already created a Set with the same subquery / table.
|
||||
if (subquery_for_set.set)
|
||||
{
|
||||
data.prepared_sets[set_key] = subquery_for_set.set;
|
||||
data.prepared_sets.emplace(set_key, subquery_for_set.set);
|
||||
return subquery_for_set.set;
|
||||
}
|
||||
|
||||
@ -1196,7 +1195,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
|
||||
}
|
||||
|
||||
subquery_for_set.set = set;
|
||||
data.prepared_sets[set_key] = set;
|
||||
data.prepared_sets.emplace(set_key, set);
|
||||
return set;
|
||||
}
|
||||
else
|
||||
|
@ -10,6 +10,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTExpressionList;
|
||||
class ASTFunction;
|
||||
|
||||
class ExpressionActions;
|
||||
@ -89,10 +90,7 @@ struct ScopeStack : WithContext
|
||||
void addColumn(ColumnWithTypeAndName column);
|
||||
void addAlias(const std::string & name, std::string alias);
|
||||
void addArrayJoin(const std::string & source_name, std::string result_name);
|
||||
void addFunction(
|
||||
const FunctionOverloadResolverPtr & function,
|
||||
const Names & argument_names,
|
||||
std::string result_name);
|
||||
void addFunction(const FunctionOverloadResolverPtr & function, const Names & argument_names, std::string result_name);
|
||||
|
||||
ActionsDAGPtr popLevel();
|
||||
|
||||
|
@ -26,10 +26,10 @@ extern const int CANNOT_LOAD_CATBOOST_MODEL;
|
||||
extern const int CANNOT_APPLY_CATBOOST_MODEL;
|
||||
}
|
||||
|
||||
|
||||
/// CatBoost wrapper interface functions.
|
||||
struct CatBoostWrapperAPI
|
||||
class CatBoostWrapperAPI
|
||||
{
|
||||
public:
|
||||
using ModelCalcerHandle = void;
|
||||
|
||||
ModelCalcerHandle * (* ModelCalcerCreate)(); // NOLINT
|
||||
@ -68,9 +68,6 @@ struct CatBoostWrapperAPI
|
||||
};
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class CatBoostModelHolder
|
||||
{
|
||||
private:
|
||||
@ -84,7 +81,61 @@ public:
|
||||
};
|
||||
|
||||
|
||||
class CatBoostModelImpl : public ICatBoostModel
|
||||
/// Holds CatBoost wrapper library and provides wrapper interface.
|
||||
class CatBoostLibHolder
|
||||
{
|
||||
public:
|
||||
explicit CatBoostLibHolder(std::string lib_path_) : lib_path(std::move(lib_path_)), lib(lib_path) { initAPI(); }
|
||||
|
||||
const CatBoostWrapperAPI & getAPI() const { return api; }
|
||||
const std::string & getCurrentPath() const { return lib_path; }
|
||||
|
||||
private:
|
||||
CatBoostWrapperAPI api;
|
||||
std::string lib_path;
|
||||
SharedLibrary lib;
|
||||
|
||||
void initAPI()
|
||||
{
|
||||
load(api.ModelCalcerCreate, "ModelCalcerCreate");
|
||||
load(api.ModelCalcerDelete, "ModelCalcerDelete");
|
||||
load(api.GetErrorString, "GetErrorString");
|
||||
load(api.LoadFullModelFromFile, "LoadFullModelFromFile");
|
||||
load(api.CalcModelPredictionFlat, "CalcModelPredictionFlat");
|
||||
load(api.CalcModelPrediction, "CalcModelPrediction");
|
||||
load(api.CalcModelPredictionWithHashedCatFeatures, "CalcModelPredictionWithHashedCatFeatures");
|
||||
load(api.GetStringCatFeatureHash, "GetStringCatFeatureHash");
|
||||
load(api.GetIntegerCatFeatureHash, "GetIntegerCatFeatureHash");
|
||||
load(api.GetFloatFeaturesCount, "GetFloatFeaturesCount");
|
||||
load(api.GetCatFeaturesCount, "GetCatFeaturesCount");
|
||||
tryLoad(api.CheckModelMetadataHasKey, "CheckModelMetadataHasKey");
|
||||
tryLoad(api.GetModelInfoValueSize, "GetModelInfoValueSize");
|
||||
tryLoad(api.GetModelInfoValue, "GetModelInfoValue");
|
||||
tryLoad(api.GetTreeCount, "GetTreeCount");
|
||||
tryLoad(api.GetDimensionsCount, "GetDimensionsCount");
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void load(T& func, const std::string & name) { func = lib.get<T>(name); }
|
||||
|
||||
template <typename T>
|
||||
void tryLoad(T& func, const std::string & name) { func = lib.tryGet<T>(name); }
|
||||
};
|
||||
|
||||
std::shared_ptr<CatBoostLibHolder> getCatBoostWrapperHolder(const std::string & lib_path)
|
||||
{
|
||||
static std::shared_ptr<CatBoostLibHolder> ptr;
|
||||
static std::mutex mutex;
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (!ptr || ptr->getCurrentPath() != lib_path)
|
||||
ptr = std::make_shared<CatBoostLibHolder>(lib_path);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
class CatBoostModelImpl
|
||||
{
|
||||
public:
|
||||
CatBoostModelImpl(const CatBoostWrapperAPI * api_, const std::string & model_path) : api(api_)
|
||||
@ -92,13 +143,15 @@ public:
|
||||
handle = std::make_unique<CatBoostModelHolder>(api);
|
||||
if (!handle)
|
||||
{
|
||||
std::string msg = "Cannot create CatBoost model: ";
|
||||
throw Exception(msg + api->GetErrorString(), ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL);
|
||||
throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL,
|
||||
"Cannot create CatBoost model: {}",
|
||||
api->GetErrorString());
|
||||
}
|
||||
if (!api->LoadFullModelFromFile(handle->get(), model_path.c_str()))
|
||||
{
|
||||
std::string msg = "Cannot load CatBoost model: ";
|
||||
throw Exception(msg + api->GetErrorString(), ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL);
|
||||
throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL,
|
||||
"Cannot load CatBoost model: {}",
|
||||
api->GetErrorString());
|
||||
}
|
||||
|
||||
float_features_count = api->GetFloatFeaturesCount(handle->get());
|
||||
@ -108,32 +161,22 @@ public:
|
||||
tree_count = api->GetDimensionsCount(handle->get());
|
||||
}
|
||||
|
||||
ColumnPtr evaluate(const ColumnRawPtrs & columns) const override
|
||||
ColumnPtr evaluate(const ColumnRawPtrs & columns) const
|
||||
{
|
||||
if (columns.empty())
|
||||
throw Exception("Got empty columns list for CatBoost model.", ErrorCodes::BAD_ARGUMENTS);
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got empty columns list for CatBoost model.");
|
||||
|
||||
if (columns.size() != float_features_count + cat_features_count)
|
||||
{
|
||||
std::string msg;
|
||||
{
|
||||
WriteBufferFromString buffer(msg);
|
||||
buffer << "Number of columns is different with number of features: ";
|
||||
buffer << columns.size() << " vs " << float_features_count << " + " << cat_features_count;
|
||||
}
|
||||
throw Exception(msg, ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
|
||||
float_features_count,
|
||||
cat_features_count);
|
||||
|
||||
for (size_t i = 0; i < float_features_count; ++i)
|
||||
{
|
||||
if (!columns[i]->isNumeric())
|
||||
{
|
||||
std::string msg;
|
||||
{
|
||||
WriteBufferFromString buffer(msg);
|
||||
buffer << "Column " << i << " should be numeric to make float feature.";
|
||||
}
|
||||
throw Exception(msg, ErrorCodes::BAD_ARGUMENTS);
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric to make float feature.", i);
|
||||
}
|
||||
}
|
||||
|
||||
@ -142,16 +185,13 @@ public:
|
||||
{
|
||||
const auto * column = columns[i];
|
||||
if (column->isNumeric())
|
||||
{
|
||||
cat_features_are_strings = false;
|
||||
}
|
||||
else if (!(typeid_cast<const ColumnString *>(column)
|
||||
|| typeid_cast<const ColumnFixedString *>(column)))
|
||||
{
|
||||
std::string msg;
|
||||
{
|
||||
WriteBufferFromString buffer(msg);
|
||||
buffer << "Column " << i << " should be numeric or string.";
|
||||
}
|
||||
throw Exception(msg, ErrorCodes::BAD_ARGUMENTS);
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric or string.", i);
|
||||
}
|
||||
}
|
||||
|
||||
@ -187,9 +227,9 @@ public:
|
||||
return ColumnTuple::create(std::move(mutable_columns));
|
||||
}
|
||||
|
||||
size_t getFloatFeaturesCount() const override { return float_features_count; }
|
||||
size_t getCatFeaturesCount() const override { return cat_features_count; }
|
||||
size_t getTreeCount() const override { return tree_count; }
|
||||
size_t getFloatFeaturesCount() const { return float_features_count; }
|
||||
size_t getCatFeaturesCount() const { return cat_features_count; }
|
||||
size_t getTreeCount() const { return tree_count; }
|
||||
|
||||
private:
|
||||
std::unique_ptr<CatBoostModelHolder> handle;
|
||||
@ -435,66 +475,6 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/// Holds CatBoost wrapper library and provides wrapper interface.
|
||||
class CatBoostLibHolder: public CatBoostWrapperAPIProvider
|
||||
{
|
||||
public:
|
||||
explicit CatBoostLibHolder(std::string lib_path_) : lib_path(std::move(lib_path_)), lib(lib_path) { initAPI(); }
|
||||
|
||||
const CatBoostWrapperAPI & getAPI() const override { return api; }
|
||||
const std::string & getCurrentPath() const { return lib_path; }
|
||||
|
||||
private:
|
||||
CatBoostWrapperAPI api;
|
||||
std::string lib_path;
|
||||
SharedLibrary lib;
|
||||
|
||||
void initAPI();
|
||||
|
||||
template <typename T>
|
||||
void load(T& func, const std::string & name) { func = lib.get<T>(name); }
|
||||
|
||||
template <typename T>
|
||||
void tryLoad(T& func, const std::string & name) { func = lib.tryGet<T>(name); }
|
||||
};
|
||||
|
||||
void CatBoostLibHolder::initAPI()
|
||||
{
|
||||
load(api.ModelCalcerCreate, "ModelCalcerCreate");
|
||||
load(api.ModelCalcerDelete, "ModelCalcerDelete");
|
||||
load(api.GetErrorString, "GetErrorString");
|
||||
load(api.LoadFullModelFromFile, "LoadFullModelFromFile");
|
||||
load(api.CalcModelPredictionFlat, "CalcModelPredictionFlat");
|
||||
load(api.CalcModelPrediction, "CalcModelPrediction");
|
||||
load(api.CalcModelPredictionWithHashedCatFeatures, "CalcModelPredictionWithHashedCatFeatures");
|
||||
load(api.GetStringCatFeatureHash, "GetStringCatFeatureHash");
|
||||
load(api.GetIntegerCatFeatureHash, "GetIntegerCatFeatureHash");
|
||||
load(api.GetFloatFeaturesCount, "GetFloatFeaturesCount");
|
||||
load(api.GetCatFeaturesCount, "GetCatFeaturesCount");
|
||||
tryLoad(api.CheckModelMetadataHasKey, "CheckModelMetadataHasKey");
|
||||
tryLoad(api.GetModelInfoValueSize, "GetModelInfoValueSize");
|
||||
tryLoad(api.GetModelInfoValue, "GetModelInfoValue");
|
||||
tryLoad(api.GetTreeCount, "GetTreeCount");
|
||||
tryLoad(api.GetDimensionsCount, "GetDimensionsCount");
|
||||
}
|
||||
|
||||
std::shared_ptr<CatBoostLibHolder> getCatBoostWrapperHolder(const std::string & lib_path)
|
||||
{
|
||||
static std::shared_ptr<CatBoostLibHolder> ptr;
|
||||
static std::mutex mutex;
|
||||
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
if (!ptr || ptr->getCurrentPath() != lib_path)
|
||||
ptr = std::make_shared<CatBoostLibHolder>(lib_path);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::string lib_path_,
|
||||
const ExternalLoadableLifetime & lifetime_)
|
||||
: name(std::move(name_)), model_path(std::move(model_path_)), lib_path(std::move(lib_path_)), lifetime(lifetime_)
|
||||
@ -502,43 +482,28 @@ CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::st
|
||||
api_provider = getCatBoostWrapperHolder(lib_path);
|
||||
api = &api_provider->getAPI();
|
||||
model = std::make_unique<CatBoostModelImpl>(api, model_path);
|
||||
float_features_count = model->getFloatFeaturesCount();
|
||||
cat_features_count = model->getCatFeaturesCount();
|
||||
tree_count = model->getTreeCount();
|
||||
}
|
||||
|
||||
const ExternalLoadableLifetime & CatBoostModel::getLifetime() const
|
||||
{
|
||||
return lifetime;
|
||||
}
|
||||
|
||||
bool CatBoostModel::isModified() const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
std::shared_ptr<const IExternalLoadable> CatBoostModel::clone() const
|
||||
{
|
||||
return std::make_shared<CatBoostModel>(name, model_path, lib_path, lifetime);
|
||||
}
|
||||
CatBoostModel::~CatBoostModel() = default;
|
||||
|
||||
size_t CatBoostModel::getFloatFeaturesCount() const
|
||||
{
|
||||
return float_features_count;
|
||||
return model->getFloatFeaturesCount();
|
||||
}
|
||||
|
||||
size_t CatBoostModel::getCatFeaturesCount() const
|
||||
{
|
||||
return cat_features_count;
|
||||
return model->getCatFeaturesCount();
|
||||
}
|
||||
|
||||
size_t CatBoostModel::getTreeCount() const
|
||||
{
|
||||
return tree_count;
|
||||
return model->getTreeCount();
|
||||
}
|
||||
|
||||
DataTypePtr CatBoostModel::getReturnType() const
|
||||
{
|
||||
size_t tree_count = getTreeCount();
|
||||
auto type = std::make_shared<DataTypeFloat64>();
|
||||
if (tree_count == 1)
|
||||
return type;
|
||||
@ -552,6 +517,7 @@ ColumnPtr CatBoostModel::evaluate(const ColumnRawPtrs & columns) const
|
||||
{
|
||||
if (!model)
|
||||
throw Exception("CatBoost model was not loaded.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
return model->evaluate(columns);
|
||||
}
|
||||
|
||||
|
@ -8,47 +8,32 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// CatBoost wrapper interface functions.
|
||||
struct CatBoostWrapperAPI;
|
||||
class CatBoostWrapperAPIProvider
|
||||
{
|
||||
public:
|
||||
virtual ~CatBoostWrapperAPIProvider() = default;
|
||||
virtual const CatBoostWrapperAPI & getAPI() const = 0;
|
||||
};
|
||||
|
||||
/// CatBoost model interface.
|
||||
class ICatBoostModel
|
||||
{
|
||||
public:
|
||||
virtual ~ICatBoostModel() = default;
|
||||
/// Evaluate model. Use first `float_features_count` columns as float features,
|
||||
/// the others `cat_features_count` as categorical features.
|
||||
virtual ColumnPtr evaluate(const ColumnRawPtrs & columns) const = 0;
|
||||
|
||||
virtual size_t getFloatFeaturesCount() const = 0;
|
||||
virtual size_t getCatFeaturesCount() const = 0;
|
||||
virtual size_t getTreeCount() const = 0;
|
||||
};
|
||||
class CatBoostLibHolder;
|
||||
class CatBoostWrapperAPI;
|
||||
class CatBoostModelImpl;
|
||||
|
||||
class IDataType;
|
||||
using DataTypePtr = std::shared_ptr<const IDataType>;
|
||||
|
||||
/// General ML model evaluator interface.
|
||||
class IModel : public IExternalLoadable
|
||||
class IMLModel : public IExternalLoadable
|
||||
{
|
||||
public:
|
||||
IMLModel() = default;
|
||||
virtual ColumnPtr evaluate(const ColumnRawPtrs & columns) const = 0;
|
||||
virtual std::string getTypeName() const = 0;
|
||||
virtual DataTypePtr getReturnType() const = 0;
|
||||
virtual ~IMLModel() override = default;
|
||||
};
|
||||
|
||||
class CatBoostModel : public IModel
|
||||
class CatBoostModel : public IMLModel
|
||||
{
|
||||
public:
|
||||
CatBoostModel(std::string name, std::string model_path,
|
||||
std::string lib_path, const ExternalLoadableLifetime & lifetime);
|
||||
|
||||
~CatBoostModel() override;
|
||||
|
||||
ColumnPtr evaluate(const ColumnRawPtrs & columns) const override;
|
||||
std::string getTypeName() const override { return "catboost"; }
|
||||
|
||||
@ -59,29 +44,28 @@ public:
|
||||
|
||||
/// IExternalLoadable interface.
|
||||
|
||||
const ExternalLoadableLifetime & getLifetime() const override;
|
||||
const ExternalLoadableLifetime & getLifetime() const override { return lifetime; }
|
||||
|
||||
std::string getLoadableName() const override { return name; }
|
||||
|
||||
bool supportUpdates() const override { return true; }
|
||||
|
||||
bool isModified() const override;
|
||||
bool isModified() const override { return true; }
|
||||
|
||||
std::shared_ptr<const IExternalLoadable> clone() const override;
|
||||
std::shared_ptr<const IExternalLoadable> clone() const override
|
||||
{
|
||||
return std::make_shared<CatBoostModel>(name, model_path, lib_path, lifetime);
|
||||
}
|
||||
|
||||
private:
|
||||
const std::string name;
|
||||
std::string model_path;
|
||||
std::string lib_path;
|
||||
ExternalLoadableLifetime lifetime;
|
||||
std::shared_ptr<CatBoostWrapperAPIProvider> api_provider;
|
||||
std::shared_ptr<CatBoostLibHolder> api_provider;
|
||||
const CatBoostWrapperAPI * api;
|
||||
|
||||
std::unique_ptr<ICatBoostModel> model;
|
||||
|
||||
size_t float_features_count;
|
||||
size_t cat_features_count;
|
||||
size_t tree_count;
|
||||
std::unique_ptr<CatBoostModelImpl> model;
|
||||
|
||||
void init();
|
||||
};
|
||||
|
@ -132,7 +132,9 @@ Cluster::Address::Address(
|
||||
bool secure_,
|
||||
Int64 priority_,
|
||||
UInt32 shard_index_,
|
||||
UInt32 replica_index_)
|
||||
UInt32 replica_index_,
|
||||
String cluster_name_,
|
||||
String cluster_secret_)
|
||||
: user(user_), password(password_)
|
||||
{
|
||||
bool can_be_local = true;
|
||||
@ -164,6 +166,8 @@ Cluster::Address::Address(
|
||||
is_local = can_be_local && isLocal(clickhouse_port);
|
||||
shard_index = shard_index_;
|
||||
replica_index = replica_index_;
|
||||
cluster = cluster_name_;
|
||||
cluster_secret = cluster_secret_;
|
||||
}
|
||||
|
||||
|
||||
@ -537,10 +541,14 @@ Cluster::Cluster(
|
||||
bool treat_local_as_remote,
|
||||
bool treat_local_port_as_remote,
|
||||
bool secure,
|
||||
Int64 priority)
|
||||
Int64 priority,
|
||||
String cluster_name,
|
||||
String cluster_secret)
|
||||
{
|
||||
UInt32 current_shard_num = 1;
|
||||
|
||||
secret = cluster_secret;
|
||||
|
||||
for (const auto & shard : names)
|
||||
{
|
||||
Addresses current;
|
||||
@ -554,7 +562,9 @@ Cluster::Cluster(
|
||||
secure,
|
||||
priority,
|
||||
current_shard_num,
|
||||
current.size() + 1);
|
||||
current.size() + 1,
|
||||
cluster_name,
|
||||
cluster_secret);
|
||||
|
||||
addresses_with_failover.emplace_back(current);
|
||||
|
||||
@ -690,6 +700,9 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
|
||||
}
|
||||
}
|
||||
|
||||
secret = from.secret;
|
||||
name = from.name;
|
||||
|
||||
initMisc();
|
||||
}
|
||||
|
||||
@ -704,6 +717,9 @@ Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector
|
||||
addresses_with_failover.emplace_back(from.addresses_with_failover.at(index));
|
||||
}
|
||||
|
||||
secret = from.secret;
|
||||
name = from.name;
|
||||
|
||||
initMisc();
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,9 @@ public:
|
||||
bool treat_local_as_remote,
|
||||
bool treat_local_port_as_remote,
|
||||
bool secure = false,
|
||||
Int64 priority = 1);
|
||||
Int64 priority = 1,
|
||||
String cluster_name = "",
|
||||
String cluster_secret = "");
|
||||
|
||||
Cluster(const Cluster &)= delete;
|
||||
Cluster & operator=(const Cluster &) = delete;
|
||||
@ -127,7 +129,9 @@ public:
|
||||
bool secure_ = false,
|
||||
Int64 priority_ = 1,
|
||||
UInt32 shard_index_ = 0,
|
||||
UInt32 replica_index_ = 0);
|
||||
UInt32 replica_index_ = 0,
|
||||
String cluster_name = "",
|
||||
String cluster_secret_ = "");
|
||||
|
||||
/// Returns 'escaped_host_name:port'
|
||||
String toString() const;
|
||||
|
@ -350,6 +350,12 @@ void DDLWorker::scheduleTasks(bool reinitialized)
|
||||
bool maybe_concurrently_deleting = task && !zookeeper->exists(fs::path(task->entry_path) / "active");
|
||||
return task && !maybe_concurrently_deleting && !maybe_currently_processing;
|
||||
}
|
||||
else if (last_skipped_entry_name.has_value() && !queue_fully_loaded_after_initialization_debug_helper)
|
||||
{
|
||||
/// If connection was lost during queue loading
|
||||
/// we may start processing from finished task (because we don't know yet that it's finished) and it's ok.
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Return true if entry should not be scheduled.
|
||||
@ -365,7 +371,11 @@ void DDLWorker::scheduleTasks(bool reinitialized)
|
||||
|
||||
String reason;
|
||||
auto task = initAndCheckTask(entry_name, reason, zookeeper);
|
||||
if (!task)
|
||||
if (task)
|
||||
{
|
||||
queue_fully_loaded_after_initialization_debug_helper = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_DEBUG(log, "Will not execute task {}: {}", entry_name, reason);
|
||||
updateMaxDDLEntryID(entry_name);
|
||||
|
@ -131,6 +131,9 @@ protected:
|
||||
std::optional<String> first_failed_task_name;
|
||||
std::list<DDLTaskPtr> current_tasks;
|
||||
|
||||
/// This flag is needed for debug assertions only
|
||||
bool queue_fully_loaded_after_initialization_debug_helper = false;
|
||||
|
||||
Coordination::Stat queue_node_stat;
|
||||
std::shared_ptr<Poco::Event> queue_updated_event = std::make_shared<Poco::Event>();
|
||||
std::shared_ptr<Poco::Event> cleanup_event = std::make_shared<Poco::Event>();
|
||||
|
@ -259,7 +259,7 @@ NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & a
|
||||
if (!array_join_expression_list)
|
||||
return src_columns;
|
||||
|
||||
getRootActionsNoMakeSet(array_join_expression_list, true, actions, false);
|
||||
getRootActionsNoMakeSet(array_join_expression_list, actions, false);
|
||||
|
||||
auto array_join = addMultipleArrayJoinAction(actions, is_array_join_left);
|
||||
auto sample_columns = actions->getResultColumns();
|
||||
@ -294,7 +294,7 @@ NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const
|
||||
const ASTTablesInSelectQueryElement * join = select_query->join();
|
||||
if (join)
|
||||
{
|
||||
getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), true, actions, false);
|
||||
getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), actions, false);
|
||||
auto sample_columns = actions->getNamesAndTypesList();
|
||||
syntax->analyzed_join->addJoinedColumnsAndCorrectTypes(sample_columns, true);
|
||||
actions = std::make_shared<ActionsDAG>(sample_columns);
|
||||
@ -332,14 +332,14 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions)
|
||||
{
|
||||
NameSet unique_keys;
|
||||
ASTs & group_asts = group_by_ast->children;
|
||||
for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i)
|
||||
for (ssize_t i = 0; i < static_cast<ssize_t>(group_asts.size()); ++i)
|
||||
{
|
||||
ssize_t size = group_asts.size();
|
||||
|
||||
if (getContext()->getSettingsRef().enable_positional_arguments)
|
||||
replaceForPositionalArguments(group_asts[i], select_query, ASTSelectQuery::Expression::GROUP_BY);
|
||||
|
||||
getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);
|
||||
getRootActionsNoMakeSet(group_asts[i], temp_actions, false);
|
||||
|
||||
const auto & column_name = group_asts[i]->getColumnName();
|
||||
|
||||
@ -405,8 +405,8 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global)
|
||||
{
|
||||
if (do_global)
|
||||
{
|
||||
GlobalSubqueriesVisitor::Data subqueries_data(getContext(), subquery_depth, isRemoteStorage(),
|
||||
external_tables, subqueries_for_sets, has_global_subqueries);
|
||||
GlobalSubqueriesVisitor::Data subqueries_data(
|
||||
getContext(), subquery_depth, isRemoteStorage(), external_tables, subqueries_for_sets, has_global_subqueries);
|
||||
GlobalSubqueriesVisitor(subqueries_data).visit(query);
|
||||
}
|
||||
}
|
||||
@ -416,7 +416,7 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_
|
||||
{
|
||||
auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name);
|
||||
|
||||
if (prepared_sets.count(set_key))
|
||||
if (prepared_sets.contains(set_key))
|
||||
return; /// Already prepared.
|
||||
|
||||
if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name))
|
||||
@ -509,33 +509,62 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
|
||||
}
|
||||
|
||||
|
||||
void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts)
|
||||
void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts)
|
||||
{
|
||||
LogAST log;
|
||||
ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth,
|
||||
sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets,
|
||||
no_subqueries, false, only_consts, !isRemoteStorage());
|
||||
ActionsVisitor::Data visitor_data(
|
||||
getContext(),
|
||||
settings.size_limits_for_set,
|
||||
subquery_depth,
|
||||
sourceColumns(),
|
||||
std::move(actions),
|
||||
prepared_sets,
|
||||
subqueries_for_sets,
|
||||
no_makeset_for_subqueries,
|
||||
false /* no_makeset */,
|
||||
only_consts,
|
||||
!isRemoteStorage() /* create_source_for_in */);
|
||||
ActionsVisitor(visitor_data, log.stream()).visit(ast);
|
||||
actions = visitor_data.getActions();
|
||||
}
|
||||
|
||||
|
||||
void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts)
|
||||
void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts)
|
||||
{
|
||||
LogAST log;
|
||||
ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth,
|
||||
sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets,
|
||||
no_subqueries, true, only_consts, !isRemoteStorage());
|
||||
ActionsVisitor::Data visitor_data(
|
||||
getContext(),
|
||||
settings.size_limits_for_set,
|
||||
subquery_depth,
|
||||
sourceColumns(),
|
||||
std::move(actions),
|
||||
prepared_sets,
|
||||
subqueries_for_sets,
|
||||
true /* no_makeset_for_subqueries, no_makeset implies no_makeset_for_subqueries */,
|
||||
true /* no_makeset */,
|
||||
only_consts,
|
||||
!isRemoteStorage() /* create_source_for_in */);
|
||||
ActionsVisitor(visitor_data, log.stream()).visit(ast);
|
||||
actions = visitor_data.getActions();
|
||||
}
|
||||
|
||||
void ExpressionAnalyzer::getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts)
|
||||
|
||||
void ExpressionAnalyzer::getRootActionsForHaving(
|
||||
const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts)
|
||||
{
|
||||
LogAST log;
|
||||
ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth,
|
||||
sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets,
|
||||
no_subqueries, false, only_consts, true);
|
||||
ActionsVisitor::Data visitor_data(
|
||||
getContext(),
|
||||
settings.size_limits_for_set,
|
||||
subquery_depth,
|
||||
sourceColumns(),
|
||||
std::move(actions),
|
||||
prepared_sets,
|
||||
subqueries_for_sets,
|
||||
no_makeset_for_subqueries,
|
||||
false /* no_makeset */,
|
||||
only_consts,
|
||||
true /* create_source_for_in */);
|
||||
ActionsVisitor(visitor_data, log.stream()).visit(ast);
|
||||
actions = visitor_data.getActions();
|
||||
}
|
||||
@ -547,7 +576,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr
|
||||
{
|
||||
AggregateDescription aggregate;
|
||||
if (node->arguments)
|
||||
getRootActionsNoMakeSet(node->arguments, true, actions);
|
||||
getRootActionsNoMakeSet(node->arguments, actions);
|
||||
|
||||
aggregate.column_name = node->getColumnName();
|
||||
|
||||
@ -746,8 +775,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
|
||||
// Requiring a constant reference to a shared pointer to non-const AST
|
||||
// doesn't really look sane, but the visitor does indeed require it.
|
||||
// Hence we clone the node (not very sane either, I know).
|
||||
getRootActionsNoMakeSet(window_function.function_node->clone(),
|
||||
true, actions);
|
||||
getRootActionsNoMakeSet(window_function.function_node->clone(), actions);
|
||||
|
||||
const ASTs & arguments
|
||||
= window_function.function_node->arguments->children;
|
||||
@ -867,8 +895,7 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi
|
||||
auto array_join = addMultipleArrayJoinAction(step.actions(), is_array_join_left);
|
||||
before_array_join = chain.getLastActions();
|
||||
|
||||
chain.steps.push_back(std::make_unique<ExpressionActionsChain::ArrayJoinStep>(
|
||||
array_join, step.getResultColumns()));
|
||||
chain.steps.push_back(std::make_unique<ExpressionActionsChain::ArrayJoinStep>(array_join, step.getResultColumns()));
|
||||
|
||||
chain.addStep();
|
||||
|
||||
@ -1099,8 +1126,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
|
||||
}
|
||||
}
|
||||
|
||||
chain.steps.emplace_back(std::make_unique<ExpressionActionsChain::ExpressionActionsStep>(
|
||||
std::make_shared<ActionsDAG>(std::move(columns))));
|
||||
chain.steps.emplace_back(
|
||||
std::make_unique<ExpressionActionsChain::ExpressionActionsStep>(std::make_shared<ActionsDAG>(std::move(columns))));
|
||||
chain.steps.back()->additional_input = std::move(unused_source_columns);
|
||||
chain.getLastActions();
|
||||
chain.addStep();
|
||||
@ -1210,8 +1237,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
|
||||
// recursively together with (1b) as ASTFunction::window_definition.
|
||||
if (getSelectQuery()->window())
|
||||
{
|
||||
getRootActionsNoMakeSet(getSelectQuery()->window(),
|
||||
true /* no_subqueries */, step.actions());
|
||||
getRootActionsNoMakeSet(getSelectQuery()->window(), step.actions());
|
||||
}
|
||||
|
||||
for (const auto & [_, w] : window_descriptions)
|
||||
@ -1222,8 +1248,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
|
||||
// definitions (1a).
|
||||
// Requiring a constant reference to a shared pointer to non-const AST
|
||||
// doesn't really look sane, but the visitor does indeed require it.
|
||||
getRootActionsNoMakeSet(f.function_node->clone(),
|
||||
true /* no_subqueries */, step.actions());
|
||||
getRootActionsNoMakeSet(f.function_node->clone(), step.actions());
|
||||
|
||||
// (2b) Required function argument columns.
|
||||
for (const auto & a : f.function_node->arguments->children)
|
||||
@ -1456,7 +1481,7 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r
|
||||
alias = name;
|
||||
result_columns.emplace_back(name, alias);
|
||||
result_names.push_back(alias);
|
||||
getRootActions(ast, false, actions_dag);
|
||||
getRootActions(ast, false /* no_makeset_for_subqueries */, actions_dag);
|
||||
}
|
||||
|
||||
if (add_aliases)
|
||||
@ -1496,7 +1521,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const ColumnsWithTypeAn
|
||||
{
|
||||
auto actions = std::make_shared<ActionsDAG>(constant_inputs);
|
||||
|
||||
getRootActions(query, true, actions, true);
|
||||
getRootActions(query, true /* no_makeset_for_subqueries */, actions, true /* only_consts */);
|
||||
return std::make_shared<ExpressionActions>(actions, ExpressionActionsSettings::fromContext(getContext()));
|
||||
}
|
||||
|
||||
|
@ -172,15 +172,15 @@ protected:
|
||||
|
||||
ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const;
|
||||
|
||||
void getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
|
||||
void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
|
||||
|
||||
/** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in
|
||||
* analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the
|
||||
* prepared sets would not be applicable for MergeTree index optimization.
|
||||
*/
|
||||
void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
|
||||
void getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts = false);
|
||||
|
||||
void getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
|
||||
void getRootActionsForHaving(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
|
||||
|
||||
/** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,
|
||||
* Create a set of columns aggregated_columns resulting after the aggregation, if any,
|
||||
|
@ -15,14 +15,14 @@ namespace DB
|
||||
class ExternalModelsLoader : public ExternalLoader, WithContext
|
||||
{
|
||||
public:
|
||||
using ModelPtr = std::shared_ptr<const IModel>;
|
||||
using ModelPtr = std::shared_ptr<const IMLModel>;
|
||||
|
||||
/// Models will be loaded immediately and then will be updated in separate thread, each 'reload_period' seconds.
|
||||
explicit ExternalModelsLoader(ContextPtr context_);
|
||||
|
||||
ModelPtr getModel(const std::string & model_name) const
|
||||
{
|
||||
return std::static_pointer_cast<const IModel>(load(model_name));
|
||||
return std::static_pointer_cast<const IMLModel>(load(model_name));
|
||||
}
|
||||
|
||||
void reloadModel(const std::string & model_name) const
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Interpreters/interpretSubquery.h>
|
||||
#include <Interpreters/SubqueryForSet.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTSubquery.h>
|
||||
@ -17,7 +18,11 @@
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Processors/Executors/CompletedPipelineExecutor.h>
|
||||
#include <Processors/Sinks/SinkToStorage.h>
|
||||
#include <Processors/QueryPlan/QueryPlan.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
#include <Storages/ConstraintsDescription.h>
|
||||
#include <Storages/IStorage.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -34,7 +39,6 @@ public:
|
||||
{
|
||||
size_t subquery_depth;
|
||||
bool is_remote;
|
||||
size_t external_table_id;
|
||||
TemporaryTablesMapping & external_tables;
|
||||
SubqueriesForSets & subqueries_for_sets;
|
||||
bool & has_global_subqueries;
|
||||
@ -49,7 +53,6 @@ public:
|
||||
: WithContext(context_)
|
||||
, subquery_depth(subquery_depth_)
|
||||
, is_remote(is_remote_)
|
||||
, external_table_id(1)
|
||||
, external_tables(tables)
|
||||
, subqueries_for_sets(subqueries_for_sets_)
|
||||
, has_global_subqueries(has_global_subqueries_)
|
||||
@ -92,48 +95,33 @@ public:
|
||||
{
|
||||
/// If this is already an external table, you do not need to add anything. Just remember its presence.
|
||||
auto temporary_table_name = getIdentifierName(subquery_or_table_name);
|
||||
bool exists_in_local_map = external_tables.end() != external_tables.find(temporary_table_name);
|
||||
bool exists_in_local_map = external_tables.contains(temporary_table_name);
|
||||
bool exists_in_context = static_cast<bool>(getContext()->tryResolveStorageID(
|
||||
StorageID("", temporary_table_name), Context::ResolveExternal));
|
||||
if (exists_in_local_map || exists_in_context)
|
||||
return;
|
||||
}
|
||||
|
||||
String external_table_name = subquery_or_table_name->tryGetAlias();
|
||||
if (external_table_name.empty())
|
||||
String alias = subquery_or_table_name->tryGetAlias();
|
||||
String external_table_name;
|
||||
if (alias.empty())
|
||||
{
|
||||
/// Generate the name for the external table.
|
||||
external_table_name = "_data" + toString(external_table_id);
|
||||
while (external_tables.count(external_table_name))
|
||||
{
|
||||
++external_table_id;
|
||||
external_table_name = "_data" + toString(external_table_id);
|
||||
auto hash = subquery_or_table_name->getTreeHash();
|
||||
external_table_name = fmt::format("_data_{}_{}", hash.first, hash.second);
|
||||
}
|
||||
}
|
||||
|
||||
auto interpreter = interpretSubquery(subquery_or_table_name, getContext(), subquery_depth, {});
|
||||
|
||||
Block sample = interpreter->getSampleBlock();
|
||||
NamesAndTypesList columns = sample.getNamesAndTypesList();
|
||||
|
||||
auto external_storage_holder = std::make_shared<TemporaryTableHolder>(
|
||||
getContext(),
|
||||
ColumnsDescription{columns},
|
||||
ConstraintsDescription{},
|
||||
nullptr,
|
||||
/*create_for_global_subquery*/ true);
|
||||
StoragePtr external_storage = external_storage_holder->getTable();
|
||||
else
|
||||
external_table_name = alias;
|
||||
|
||||
/** We replace the subquery with the name of the temporary table.
|
||||
* It is in this form, the request will go to the remote server.
|
||||
* This temporary table will go to the remote server, and on its side,
|
||||
* instead of doing a subquery, you just need to read it.
|
||||
* TODO We can do better than using alias to name external tables
|
||||
*/
|
||||
|
||||
auto database_and_table_name = std::make_shared<ASTTableIdentifier>(external_table_name);
|
||||
if (set_alias)
|
||||
{
|
||||
String alias = subquery_or_table_name->tryGetAlias();
|
||||
if (auto * table_name = subquery_or_table_name->as<ASTTableIdentifier>())
|
||||
if (alias.empty())
|
||||
alias = table_name->shortName();
|
||||
@ -151,8 +139,27 @@ public:
|
||||
else
|
||||
ast = database_and_table_name;
|
||||
|
||||
external_tables[external_table_name] = external_storage_holder;
|
||||
if (external_tables.contains(external_table_name))
|
||||
return;
|
||||
|
||||
auto interpreter = interpretSubquery(subquery_or_table_name, getContext(), subquery_depth, {});
|
||||
|
||||
Block sample = interpreter->getSampleBlock();
|
||||
NamesAndTypesList columns = sample.getNamesAndTypesList();
|
||||
|
||||
auto external_storage_holder = std::make_shared<TemporaryTableHolder>(
|
||||
getContext(),
|
||||
ColumnsDescription{columns},
|
||||
ConstraintsDescription{},
|
||||
nullptr,
|
||||
/*create_for_global_subquery*/ true);
|
||||
StoragePtr external_storage = external_storage_holder->getTable();
|
||||
|
||||
external_tables.emplace(external_table_name, external_storage_holder);
|
||||
|
||||
/// We need to materialize external tables immediately because reading from distributed
|
||||
/// tables might generate local plans which can refer to external tables during index
|
||||
/// analysis. It's too late to populate the external table via CreatingSetsTransform.
|
||||
if (getContext()->getSettingsRef().use_index_for_in_with_subqueries)
|
||||
{
|
||||
auto external_table = external_storage_holder->getTable();
|
||||
|
@ -1237,16 +1237,16 @@ NO_INLINE IColumn::Filter joinRightColumns(
|
||||
{
|
||||
const IColumn & left_asof_key = added_columns.leftAsofKey();
|
||||
|
||||
auto [block, row_num] = mapped->findAsof(left_asof_key, i);
|
||||
if (block)
|
||||
auto row_ref = mapped->findAsof(left_asof_key, i);
|
||||
if (row_ref.block)
|
||||
{
|
||||
setUsed<need_filter>(filter, i);
|
||||
if constexpr (multiple_disjuncts)
|
||||
used_flags.template setUsed<jf.need_flags, multiple_disjuncts>(block, row_num, 0);
|
||||
used_flags.template setUsed<jf.need_flags, multiple_disjuncts>(row_ref.block, row_ref.row_num, 0);
|
||||
else
|
||||
used_flags.template setUsed<jf.need_flags, multiple_disjuncts>(find_result);
|
||||
|
||||
added_columns.appendFromBlock<jf.add_missing>(*block, row_num);
|
||||
added_columns.appendFromBlock<jf.add_missing>(*row_ref.block, row_ref.row_num);
|
||||
}
|
||||
else
|
||||
addNotFoundRow<jf.add_missing, jf.need_replication>(added_columns, current_offset);
|
||||
|
@ -1242,10 +1242,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
}
|
||||
|
||||
preliminary_sort();
|
||||
|
||||
// If there is no global subqueries, we can run subqueries only when receive them on server.
|
||||
if (!query_analyzer->hasGlobalSubqueries() && !subqueries_for_sets.empty())
|
||||
executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets);
|
||||
}
|
||||
|
||||
if (expressions.second_stage || from_aggregation_stage)
|
||||
@ -1428,7 +1424,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
|
||||
}
|
||||
}
|
||||
|
||||
if (!subqueries_for_sets.empty() && (expressions.hasHaving() || query_analyzer->hasGlobalSubqueries()))
|
||||
if (!subqueries_for_sets.empty())
|
||||
executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets);
|
||||
}
|
||||
|
||||
@ -1892,7 +1888,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
|
||||
&& limit_length <= std::numeric_limits<UInt64>::max() - limit_offset
|
||||
&& limit_length + limit_offset < max_block_size)
|
||||
{
|
||||
max_block_size = std::max(UInt64(1), limit_length + limit_offset);
|
||||
max_block_size = std::max(UInt64{1}, limit_length + limit_offset);
|
||||
max_threads_execute_query = max_streams = 1;
|
||||
}
|
||||
|
||||
@ -2578,11 +2574,11 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan)
|
||||
|
||||
void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_plan, SubqueriesForSets & subqueries_for_sets)
|
||||
{
|
||||
const auto & input_order_info = query_info.input_order_info
|
||||
? query_info.input_order_info
|
||||
: (query_info.projection ? query_info.projection->input_order_info : nullptr);
|
||||
if (input_order_info)
|
||||
executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins");
|
||||
// const auto & input_order_info = query_info.input_order_info
|
||||
// ? query_info.input_order_info
|
||||
// : (query_info.projection ? query_info.projection->input_order_info : nullptr);
|
||||
// if (input_order_info)
|
||||
// executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins");
|
||||
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Interpreters/RowRefs.h>
|
||||
|
||||
#include <Common/RadixSort.h>
|
||||
#include <AggregateFunctions/Helpers.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
@ -44,38 +45,52 @@ class SortedLookupVector : public SortedLookupVectorBase
|
||||
{
|
||||
struct Entry
|
||||
{
|
||||
/// We don't store a RowRef and instead keep it's members separately (and return a tuple) to reduce the memory usage.
|
||||
/// For example, for sizeof(T) == 4 => sizeof(Entry) == 16 (while before it would be 20). Then when you put it into a vector, the effect is even greater
|
||||
decltype(RowRef::block) block;
|
||||
decltype(RowRef::row_num) row_num;
|
||||
TKey asof_value;
|
||||
TKey value;
|
||||
uint32_t row_ref_index;
|
||||
|
||||
Entry() = delete;
|
||||
Entry(TKey v, const Block * b, size_t r) : block(b), row_num(r), asof_value(v) { }
|
||||
Entry(TKey value_, uint32_t row_ref_index_)
|
||||
: value(value_)
|
||||
, row_ref_index(row_ref_index_)
|
||||
{ }
|
||||
|
||||
bool operator<(const Entry & other) const { return asof_value < other.asof_value; }
|
||||
};
|
||||
|
||||
struct LessEntryOperator
|
||||
{
|
||||
ALWAYS_INLINE bool operator()(const Entry & lhs, const Entry & rhs) const
|
||||
{
|
||||
return lhs.value < rhs.value;
|
||||
}
|
||||
};
|
||||
|
||||
struct GreaterEntryOperator
|
||||
{
|
||||
bool operator()(Entry const & a, Entry const & b) const { return a.asof_value > b.asof_value; }
|
||||
ALWAYS_INLINE bool operator()(const Entry & lhs, const Entry & rhs) const
|
||||
{
|
||||
return lhs.value > rhs.value;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
public:
|
||||
using Base = std::vector<Entry>;
|
||||
using Keys = std::vector<TKey>;
|
||||
static constexpr bool isDescending = (inequality == ASOF::Inequality::Greater || inequality == ASOF::Inequality::GreaterOrEquals);
|
||||
static constexpr bool isStrict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater);
|
||||
using Entries = PaddedPODArray<Entry>;
|
||||
using RowRefs = PaddedPODArray<RowRef>;
|
||||
|
||||
static constexpr bool is_descending = (inequality == ASOF::Inequality::Greater || inequality == ASOF::Inequality::GreaterOrEquals);
|
||||
static constexpr bool is_strict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater);
|
||||
|
||||
void insert(const IColumn & asof_column, const Block * block, size_t row_num) override
|
||||
{
|
||||
using ColumnType = ColumnVectorOrDecimal<TKey>;
|
||||
const auto & column = assert_cast<const ColumnType &>(asof_column);
|
||||
TKey k = column.getElement(row_num);
|
||||
TKey key = column.getElement(row_num);
|
||||
|
||||
assert(!sorted.load(std::memory_order_acquire));
|
||||
array.emplace_back(k, block, row_num);
|
||||
|
||||
entries.emplace_back(key, row_refs.size());
|
||||
row_refs.emplace_back(RowRef(block, row_num));
|
||||
}
|
||||
|
||||
/// Unrolled version of upper_bound and lower_bound
|
||||
@ -84,30 +99,30 @@ public:
|
||||
/// at https://en.algorithmica.org/hpc/data-structures/s-tree/
|
||||
size_t boundSearch(TKey value)
|
||||
{
|
||||
size_t size = array.size();
|
||||
size_t size = entries.size();
|
||||
size_t low = 0;
|
||||
|
||||
/// This is a single binary search iteration as a macro to unroll. Takes into account the inequality:
|
||||
/// isStrict -> Equal values are not requested
|
||||
/// isDescending -> The vector is sorted in reverse (for greater or greaterOrEquals)
|
||||
/// is_strict -> Equal values are not requested
|
||||
/// is_descending -> The vector is sorted in reverse (for greater or greaterOrEquals)
|
||||
#define BOUND_ITERATION \
|
||||
{ \
|
||||
size_t half = size / 2; \
|
||||
size_t other_half = size - half; \
|
||||
size_t probe = low + half; \
|
||||
size_t other_low = low + other_half; \
|
||||
TKey v = array[probe].asof_value; \
|
||||
TKey & v = entries[probe].value; \
|
||||
size = half; \
|
||||
if constexpr (isDescending) \
|
||||
if constexpr (is_descending) \
|
||||
{ \
|
||||
if constexpr (isStrict) \
|
||||
if constexpr (is_strict) \
|
||||
low = value <= v ? other_low : low; \
|
||||
else \
|
||||
low = value < v ? other_low : low; \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if constexpr (isStrict) \
|
||||
if constexpr (is_strict) \
|
||||
low = value >= v ? other_low : low; \
|
||||
else \
|
||||
low = value > v ? other_low : low; \
|
||||
@ -130,7 +145,7 @@ public:
|
||||
return low;
|
||||
}
|
||||
|
||||
std::tuple<decltype(RowRef::block), decltype(RowRef::row_num)> findAsof(const IColumn & asof_column, size_t row_num) override
|
||||
RowRef findAsof(const IColumn & asof_column, size_t row_num) override
|
||||
{
|
||||
sort();
|
||||
|
||||
@ -139,8 +154,11 @@ public:
|
||||
TKey k = column.getElement(row_num);
|
||||
|
||||
size_t pos = boundSearch(k);
|
||||
if (pos != array.size())
|
||||
return std::make_tuple(array[pos].block, array[pos].row_num);
|
||||
if (pos != entries.size())
|
||||
{
|
||||
size_t row_ref_index = entries[pos].row_ref_index;
|
||||
return row_refs[row_ref_index];
|
||||
}
|
||||
|
||||
return {nullptr, 0};
|
||||
}
|
||||
@ -148,7 +166,8 @@ public:
|
||||
private:
|
||||
std::atomic<bool> sorted = false;
|
||||
mutable std::mutex lock;
|
||||
Base array;
|
||||
Entries entries;
|
||||
RowRefs row_refs;
|
||||
|
||||
// Double checked locking with SC atomics works in C++
|
||||
// https://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/
|
||||
@ -160,12 +179,37 @@ private:
|
||||
if (!sorted.load(std::memory_order_acquire))
|
||||
{
|
||||
std::lock_guard<std::mutex> l(lock);
|
||||
|
||||
if (!sorted.load(std::memory_order_relaxed))
|
||||
{
|
||||
if constexpr (isDescending)
|
||||
::sort(array.begin(), array.end(), GreaterEntryOperator());
|
||||
if constexpr (std::is_arithmetic_v<TKey> && !std::is_floating_point_v<TKey>)
|
||||
{
|
||||
if (likely(entries.size() > 256))
|
||||
{
|
||||
struct RadixSortTraits : RadixSortNumTraits<TKey>
|
||||
{
|
||||
using Element = Entry;
|
||||
using Result = Element;
|
||||
|
||||
static TKey & extractKey(Element & elem) { return elem.value; }
|
||||
static Element extractResult(Element & elem) { return elem; }
|
||||
};
|
||||
|
||||
if constexpr (is_descending)
|
||||
RadixSort<RadixSortTraits>::executeLSD(entries.data(), entries.size(), true);
|
||||
else
|
||||
::sort(array.begin(), array.end());
|
||||
RadixSort<RadixSortTraits>::executeLSD(entries.data(), entries.size(), false);
|
||||
|
||||
sorted.store(true, std::memory_order_release);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (is_descending)
|
||||
::sort(entries.begin(), entries.end(), GreaterEntryOperator());
|
||||
else
|
||||
::sort(entries.begin(), entries.end(), LessEntryOperator());
|
||||
|
||||
sorted.store(true, std::memory_order_release);
|
||||
}
|
||||
}
|
||||
|
@ -146,7 +146,7 @@ private:
|
||||
struct SortedLookupVectorBase
|
||||
{
|
||||
SortedLookupVectorBase() = default;
|
||||
virtual ~SortedLookupVectorBase() { }
|
||||
virtual ~SortedLookupVectorBase() = default;
|
||||
|
||||
static std::optional<TypeIndex> getTypeSize(const IColumn & asof_column, size_t & type_size);
|
||||
|
||||
@ -154,7 +154,7 @@ struct SortedLookupVectorBase
|
||||
virtual void insert(const IColumn &, const Block *, size_t) = 0;
|
||||
|
||||
// This needs to be synchronized internally
|
||||
virtual std::tuple<decltype(RowRef::block), decltype(RowRef::row_num)> findAsof(const IColumn &, size_t) = 0;
|
||||
virtual RowRef findAsof(const IColumn &, size_t) = 0;
|
||||
};
|
||||
|
||||
|
||||
|
@ -41,6 +41,57 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
class StorageWithComment : public IAST
|
||||
{
|
||||
public:
|
||||
ASTPtr storage;
|
||||
ASTPtr comment;
|
||||
|
||||
String getID(char) const override { return "Storage with comment definition"; }
|
||||
|
||||
ASTPtr clone() const override
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method clone is not supported");
|
||||
}
|
||||
|
||||
void formatImpl(const FormatSettings &, FormatState &, FormatStateStacked) const override
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported");
|
||||
}
|
||||
};
|
||||
|
||||
class ParserStorageWithComment : public IParserBase
|
||||
{
|
||||
protected:
|
||||
const char * getName() const override { return "storage definition with comment"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
|
||||
{
|
||||
ParserStorage storage_p;
|
||||
ASTPtr storage;
|
||||
|
||||
if (!storage_p.parse(pos, storage, expected))
|
||||
return false;
|
||||
|
||||
ParserKeyword s_comment("COMMENT");
|
||||
ParserStringLiteral string_literal_parser;
|
||||
ASTPtr comment;
|
||||
|
||||
if (s_comment.ignore(pos, expected))
|
||||
string_literal_parser.parse(pos, comment, expected);
|
||||
|
||||
auto storage_with_comment = std::make_shared<StorageWithComment>();
|
||||
storage_with_comment->storage = std::move(storage);
|
||||
storage_with_comment->comment = std::move(comment);
|
||||
|
||||
node = storage_with_comment;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -102,8 +153,9 @@ std::shared_ptr<TSystemLog> createSystemLog(
|
||||
engine += " TTL " + ttl;
|
||||
engine += " ORDER BY (event_date, event_time)";
|
||||
}
|
||||
|
||||
// Validate engine definition grammatically to prevent some configuration errors
|
||||
ParserStorage storage_parser;
|
||||
ParserStorageWithComment storage_parser;
|
||||
parseQuery(storage_parser, engine.data(), engine.data() + engine.size(),
|
||||
"Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
|
||||
@ -450,7 +502,6 @@ void SystemLog<LogElement>::prepareTable()
|
||||
is_prepared = true;
|
||||
}
|
||||
|
||||
|
||||
template <typename LogElement>
|
||||
ASTPtr SystemLog<LogElement>::getCreateTableQuery()
|
||||
{
|
||||
@ -465,11 +516,16 @@ ASTPtr SystemLog<LogElement>::getCreateTableQuery()
|
||||
new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(ordinary_columns, alias_columns));
|
||||
create->set(create->columns_list, new_columns_list);
|
||||
|
||||
ParserStorage storage_parser;
|
||||
ASTPtr storage_ast = parseQuery(
|
||||
ParserStorageWithComment storage_parser;
|
||||
|
||||
ASTPtr storage_with_comment_ast = parseQuery(
|
||||
storage_parser, storage_def.data(), storage_def.data() + storage_def.size(),
|
||||
"Storage to create table for " + LogElement::name(), 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
|
||||
create->set(create->storage, storage_ast);
|
||||
|
||||
StorageWithComment & storage_with_comment = storage_with_comment_ast->as<StorageWithComment &>();
|
||||
|
||||
create->set(create->storage, storage_with_comment.storage);
|
||||
create->set(create->comment, storage_with_comment.comment);
|
||||
|
||||
/// Write additional (default) settings for MergeTree engine to make it make it possible to compare ASTs
|
||||
/// and recreate tables on settings changes.
|
||||
|
39
src/Interpreters/threadPoolCallbackRunner.cpp
Normal file
39
src/Interpreters/threadPoolCallbackRunner.cpp
Normal file
@ -0,0 +1,39 @@
|
||||
#include "threadPoolCallbackRunner.h"
|
||||
|
||||
#include <base/scope_guard_safe.h>
|
||||
|
||||
#include <Common/CurrentThread.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
CallbackRunner threadPoolCallbackRunner(ThreadPool & pool)
|
||||
{
|
||||
return [pool = &pool, thread_group = CurrentThread::getGroup()](auto callback)
|
||||
{
|
||||
pool->scheduleOrThrow([callback = std::move(callback), thread_group]()
|
||||
{
|
||||
if (thread_group)
|
||||
CurrentThread::attachTo(thread_group);
|
||||
|
||||
SCOPE_EXIT_SAFE({
|
||||
if (thread_group)
|
||||
CurrentThread::detachQueryIfNotDetached();
|
||||
|
||||
/// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent.
|
||||
/// Typically, it may be changes from Process to User.
|
||||
/// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed.
|
||||
/// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well.
|
||||
/// When, finally, we destroy the thread (and the ThreadStatus),
|
||||
/// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\
|
||||
/// and by this time user-level memory tracker may be already destroyed.
|
||||
///
|
||||
/// As a work-around, reset memory tracker to total, which is always alive.
|
||||
CurrentThread::get().memory_tracker.setParent(&total_memory_tracker);
|
||||
});
|
||||
callback();
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
}
|
15
src/Interpreters/threadPoolCallbackRunner.h
Normal file
15
src/Interpreters/threadPoolCallbackRunner.h
Normal file
@ -0,0 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/ThreadPool.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously
|
||||
using CallbackRunner = std::function<void(std::function<void()>)>;
|
||||
|
||||
/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()'
|
||||
CallbackRunner threadPoolCallbackRunner(ThreadPool & pool);
|
||||
|
||||
}
|
@ -7,6 +7,27 @@ namespace DB
|
||||
{
|
||||
bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & result)
|
||||
{
|
||||
if (ParserKeyword("NANOSECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_NANOSECOND").ignore(pos, expected)
|
||||
|| ParserKeyword("NS").ignore(pos, expected))
|
||||
{
|
||||
result = IntervalKind::Nanosecond;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ParserKeyword("MICROSECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_MICROSECOND").ignore(pos, expected)
|
||||
|| ParserKeyword("MCS").ignore(pos, expected))
|
||||
{
|
||||
result = IntervalKind::Microsecond;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ParserKeyword("MILLISECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_MILLISECOND").ignore(pos, expected)
|
||||
|| ParserKeyword("MS").ignore(pos, expected))
|
||||
{
|
||||
result = IntervalKind::Millisecond;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (ParserKeyword("SECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_SECOND").ignore(pos, expected)
|
||||
|| ParserKeyword("SS").ignore(pos, expected) || ParserKeyword("S").ignore(pos, expected))
|
||||
{
|
||||
|
@ -140,7 +140,11 @@ void ArrowBlockInputFormat::prepareReader()
|
||||
}
|
||||
|
||||
arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(
|
||||
getPort().getHeader(), "Arrow", format_settings.arrow.import_nested, format_settings.arrow.allow_missing_columns);
|
||||
getPort().getHeader(),
|
||||
"Arrow",
|
||||
format_settings.arrow.import_nested,
|
||||
format_settings.arrow.allow_missing_columns,
|
||||
format_settings.arrow.case_insensitive_column_matching);
|
||||
missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema);
|
||||
|
||||
if (stream)
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include <arrow/builder.h>
|
||||
#include <arrow/array.h>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/algorithm/string/case_conv.hpp>
|
||||
|
||||
/// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn.
|
||||
#define FOR_ARROW_NUMERIC_TYPES(M) \
|
||||
@ -524,19 +525,22 @@ static void checkStatus(const arrow::Status & status, const String & column_name
|
||||
}
|
||||
|
||||
|
||||
Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, bool skip_columns_with_unsupported_types, const Block * hint_header)
|
||||
Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(
|
||||
const arrow::Schema & schema, const std::string & format_name, bool skip_columns_with_unsupported_types, const Block * hint_header, bool ignore_case)
|
||||
{
|
||||
ColumnsWithTypeAndName sample_columns;
|
||||
std::unordered_set<String> nested_table_names;
|
||||
if (hint_header)
|
||||
nested_table_names = Nested::getAllTableNames(*hint_header);
|
||||
nested_table_names = Nested::getAllTableNames(*hint_header, ignore_case);
|
||||
|
||||
for (const auto & field : schema.fields())
|
||||
{
|
||||
if (hint_header && !hint_header->has(field->name()) && !nested_table_names.contains(field->name()))
|
||||
if (hint_header && !hint_header->has(field->name(), ignore_case)
|
||||
&& !nested_table_names.contains(ignore_case ? boost::to_lower_copy(field->name()) : field->name()))
|
||||
continue;
|
||||
|
||||
/// Create empty arrow column by it's type and convert it to ClickHouse column.
|
||||
arrow::MemoryPool* pool = arrow::default_memory_pool();
|
||||
arrow::MemoryPool * pool = arrow::default_memory_pool();
|
||||
std::unique_ptr<arrow::ArrayBuilder> array_builder;
|
||||
arrow::Status status = MakeBuilder(pool, field->type(), &array_builder);
|
||||
checkStatus(status, field->name(), format_name);
|
||||
@ -557,20 +561,31 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema,
|
||||
}
|
||||
|
||||
ArrowColumnToCHColumn::ArrowColumnToCHColumn(
|
||||
const Block & header_, const std::string & format_name_, bool import_nested_, bool allow_missing_columns_)
|
||||
: header(header_), format_name(format_name_), import_nested(import_nested_), allow_missing_columns(allow_missing_columns_)
|
||||
const Block & header_,
|
||||
const std::string & format_name_,
|
||||
bool import_nested_,
|
||||
bool allow_missing_columns_,
|
||||
bool case_insensitive_matching_)
|
||||
: header(header_)
|
||||
, format_name(format_name_)
|
||||
, import_nested(import_nested_)
|
||||
, allow_missing_columns(allow_missing_columns_)
|
||||
, case_insensitive_matching(case_insensitive_matching_)
|
||||
{
|
||||
}
|
||||
|
||||
void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table)
|
||||
{
|
||||
NameToColumnPtr name_to_column_ptr;
|
||||
for (const auto & column_name : table->ColumnNames())
|
||||
for (auto column_name : table->ColumnNames())
|
||||
{
|
||||
std::shared_ptr<arrow::ChunkedArray> arrow_column = table->GetColumnByName(column_name);
|
||||
if (!arrow_column)
|
||||
throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column '{}' is duplicated", column_name);
|
||||
name_to_column_ptr[column_name] = arrow_column;
|
||||
|
||||
if (case_insensitive_matching)
|
||||
boost::to_lower(column_name);
|
||||
name_to_column_ptr[std::move(column_name)] = arrow_column;
|
||||
}
|
||||
|
||||
arrowColumnsToCHChunk(res, name_to_column_ptr);
|
||||
@ -590,22 +605,31 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
|
||||
{
|
||||
const ColumnWithTypeAndName & header_column = header.getByPosition(column_i);
|
||||
|
||||
auto search_column_name = header_column.name;
|
||||
if (case_insensitive_matching)
|
||||
boost::to_lower(search_column_name);
|
||||
|
||||
bool read_from_nested = false;
|
||||
String nested_table_name = Nested::extractTableName(header_column.name);
|
||||
if (!name_to_column_ptr.contains(header_column.name))
|
||||
String search_nested_table_name = nested_table_name;
|
||||
if (case_insensitive_matching)
|
||||
boost::to_lower(search_nested_table_name);
|
||||
|
||||
if (!name_to_column_ptr.contains(search_column_name))
|
||||
{
|
||||
/// Check if it's a column from nested table.
|
||||
if (import_nested && name_to_column_ptr.contains(nested_table_name))
|
||||
if (import_nested && name_to_column_ptr.contains(search_nested_table_name))
|
||||
{
|
||||
if (!nested_tables.contains(nested_table_name))
|
||||
if (!nested_tables.contains(search_nested_table_name))
|
||||
{
|
||||
std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[nested_table_name];
|
||||
ColumnsWithTypeAndName cols = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true, true, false, skipped)};
|
||||
std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[search_nested_table_name];
|
||||
ColumnsWithTypeAndName cols
|
||||
= {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true, true, false, skipped)};
|
||||
Block block(cols);
|
||||
nested_tables[nested_table_name] = std::make_shared<Block>(Nested::flatten(block));
|
||||
nested_tables[search_nested_table_name] = std::make_shared<Block>(Nested::flatten(block));
|
||||
}
|
||||
|
||||
read_from_nested = nested_tables[nested_table_name]->has(header_column.name);
|
||||
read_from_nested = nested_tables[search_nested_table_name]->has(header_column.name, case_insensitive_matching);
|
||||
}
|
||||
|
||||
if (!read_from_nested)
|
||||
@ -622,13 +646,19 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<arrow::ChunkedArray> arrow_column = name_to_column_ptr[header_column.name];
|
||||
|
||||
ColumnWithTypeAndName column;
|
||||
if (read_from_nested)
|
||||
column = nested_tables[nested_table_name]->getByName(header_column.name);
|
||||
{
|
||||
column = nested_tables[search_nested_table_name]->getByName(header_column.name, case_insensitive_matching);
|
||||
if (case_insensitive_matching)
|
||||
column.name = header_column.name;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto arrow_column = name_to_column_ptr[search_column_name];
|
||||
column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true, true, false, skipped);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
@ -636,8 +666,11 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
e.addMessage(fmt::format("while converting column {} from type {} to type {}",
|
||||
backQuote(header_column.name), column.type->getName(), header_column.type->getName()));
|
||||
e.addMessage(fmt::format(
|
||||
"while converting column {} from type {} to type {}",
|
||||
backQuote(header_column.name),
|
||||
column.type->getName(),
|
||||
header_column.type->getName()));
|
||||
throw;
|
||||
}
|
||||
|
||||
@ -651,22 +684,23 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr &
|
||||
std::vector<size_t> ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema & schema) const
|
||||
{
|
||||
std::vector<size_t> missing_columns;
|
||||
auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header);
|
||||
auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header, case_insensitive_matching);
|
||||
auto flatten_block_from_arrow = Nested::flatten(block_from_arrow);
|
||||
|
||||
for (size_t i = 0, columns = header.columns(); i < columns; ++i)
|
||||
{
|
||||
const auto & column = header.getByPosition(i);
|
||||
const auto & header_column = header.getByPosition(i);
|
||||
bool read_from_nested = false;
|
||||
String nested_table_name = Nested::extractTableName(column.name);
|
||||
if (!block_from_arrow.has(column.name))
|
||||
String nested_table_name = Nested::extractTableName(header_column.name);
|
||||
if (!block_from_arrow.has(header_column.name, case_insensitive_matching))
|
||||
{
|
||||
if (import_nested && block_from_arrow.has(nested_table_name))
|
||||
read_from_nested = flatten_block_from_arrow.has(column.name);
|
||||
if (import_nested && block_from_arrow.has(nested_table_name, case_insensitive_matching))
|
||||
read_from_nested = flatten_block_from_arrow.has(header_column.name, case_insensitive_matching);
|
||||
|
||||
if (!read_from_nested)
|
||||
{
|
||||
if (!allow_missing_columns)
|
||||
throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", column.name};
|
||||
throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name};
|
||||
|
||||
missing_columns.push_back(i);
|
||||
}
|
||||
|
@ -25,7 +25,8 @@ public:
|
||||
const Block & header_,
|
||||
const std::string & format_name_,
|
||||
bool import_nested_,
|
||||
bool allow_missing_columns_);
|
||||
bool allow_missing_columns_,
|
||||
bool case_insensitive_matching_ = false);
|
||||
|
||||
void arrowTableToCHChunk(Chunk & res, std::shared_ptr<arrow::Table> & table);
|
||||
|
||||
@ -40,7 +41,8 @@ public:
|
||||
const arrow::Schema & schema,
|
||||
const std::string & format_name,
|
||||
bool skip_columns_with_unsupported_types = false,
|
||||
const Block * hint_header = nullptr);
|
||||
const Block * hint_header = nullptr,
|
||||
bool ignore_case = false);
|
||||
|
||||
private:
|
||||
const Block & header;
|
||||
@ -48,6 +50,7 @@ private:
|
||||
bool import_nested;
|
||||
/// If false, throw exception if some columns in header not exists in arrow table.
|
||||
bool allow_missing_columns;
|
||||
bool case_insensitive_matching;
|
||||
|
||||
/// Map {column name : dictionary column}.
|
||||
/// To avoid converting dictionary from Arrow Dictionary
|
||||
|
@ -54,9 +54,6 @@ Chunk ORCBlockInputFormat::generate()
|
||||
if (!table || !table->num_rows())
|
||||
return res;
|
||||
|
||||
if (format_settings.use_lowercase_column_name)
|
||||
table = *table->RenameColumns(include_column_names);
|
||||
|
||||
arrow_column_to_ch_column->arrowTableToCHChunk(res, table);
|
||||
/// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields.
|
||||
/// Otherwise fill the missing columns with zero values of its type.
|
||||
@ -74,7 +71,6 @@ void ORCBlockInputFormat::resetParser()
|
||||
|
||||
file_reader.reset();
|
||||
include_indices.clear();
|
||||
include_column_names.clear();
|
||||
block_missing_values.clear();
|
||||
}
|
||||
|
||||
@ -126,20 +122,6 @@ static void getFileReaderAndSchema(
|
||||
if (!read_schema_result.ok())
|
||||
throw Exception(read_schema_result.status().ToString(), ErrorCodes::BAD_ARGUMENTS);
|
||||
schema = std::move(read_schema_result).ValueOrDie();
|
||||
|
||||
if (format_settings.use_lowercase_column_name)
|
||||
{
|
||||
std::vector<std::shared_ptr<::arrow::Field>> fields;
|
||||
fields.reserve(schema->num_fields());
|
||||
for (int i = 0; i < schema->num_fields(); ++i)
|
||||
{
|
||||
const auto& field = schema->field(i);
|
||||
auto name = field->name();
|
||||
boost::to_lower(name);
|
||||
fields.push_back(field->WithName(name));
|
||||
}
|
||||
schema = arrow::schema(fields, schema->metadata());
|
||||
}
|
||||
}
|
||||
|
||||
void ORCBlockInputFormat::prepareReader()
|
||||
@ -150,12 +132,17 @@ void ORCBlockInputFormat::prepareReader()
|
||||
return;
|
||||
|
||||
arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(
|
||||
getPort().getHeader(), "ORC", format_settings.orc.import_nested, format_settings.orc.allow_missing_columns);
|
||||
getPort().getHeader(),
|
||||
"ORC",
|
||||
format_settings.orc.import_nested,
|
||||
format_settings.orc.allow_missing_columns,
|
||||
format_settings.orc.case_insensitive_column_matching);
|
||||
missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema);
|
||||
|
||||
const bool ignore_case = format_settings.orc.case_insensitive_column_matching;
|
||||
std::unordered_set<String> nested_table_names;
|
||||
if (format_settings.orc.import_nested)
|
||||
nested_table_names = Nested::getAllTableNames(getPort().getHeader());
|
||||
nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case);
|
||||
|
||||
/// In ReadStripe column indices should be started from 1,
|
||||
/// because 0 indicates to select all columns.
|
||||
@ -166,19 +153,18 @@ void ORCBlockInputFormat::prepareReader()
|
||||
/// so we should recursively count the number of indices we need for this type.
|
||||
int indexes_count = countIndicesForType(schema->field(i)->type());
|
||||
const auto & name = schema->field(i)->name();
|
||||
if (getPort().getHeader().has(name) || nested_table_names.contains(name))
|
||||
if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name))
|
||||
{
|
||||
for (int j = 0; j != indexes_count; ++j)
|
||||
{
|
||||
include_indices.push_back(index + j);
|
||||
include_column_names.push_back(name);
|
||||
}
|
||||
}
|
||||
|
||||
index += indexes_count;
|
||||
}
|
||||
}
|
||||
|
||||
ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : ISchemaReader(in_), format_settings(format_settings_)
|
||||
ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
|
||||
: ISchemaReader(in_), format_settings(format_settings_)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -47,7 +47,6 @@ private:
|
||||
|
||||
// indices of columns to read from ORC file
|
||||
std::vector<int> include_indices;
|
||||
std::vector<String> include_column_names;
|
||||
|
||||
std::vector<size_t> missing_columns;
|
||||
BlockMissingValues block_missing_values;
|
||||
|
@ -54,11 +54,7 @@ Chunk ParquetBlockInputFormat::generate()
|
||||
std::shared_ptr<arrow::Table> table;
|
||||
arrow::Status read_status = file_reader->ReadRowGroup(row_group_current, column_indices, &table);
|
||||
if (!read_status.ok())
|
||||
throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(),
|
||||
ErrorCodes::CANNOT_READ_ALL_DATA};
|
||||
|
||||
if (format_settings.use_lowercase_column_name)
|
||||
table = *table->RenameColumns(column_names);
|
||||
throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA};
|
||||
|
||||
++row_group_current;
|
||||
|
||||
@ -79,7 +75,6 @@ void ParquetBlockInputFormat::resetParser()
|
||||
|
||||
file_reader.reset();
|
||||
column_indices.clear();
|
||||
column_names.clear();
|
||||
row_group_current = 0;
|
||||
block_missing_values.clear();
|
||||
}
|
||||
@ -124,20 +119,6 @@ static void getFileReaderAndSchema(
|
||||
return;
|
||||
THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(std::move(arrow_file), arrow::default_memory_pool(), &file_reader));
|
||||
THROW_ARROW_NOT_OK(file_reader->GetSchema(&schema));
|
||||
|
||||
if (format_settings.use_lowercase_column_name)
|
||||
{
|
||||
std::vector<std::shared_ptr<::arrow::Field>> fields;
|
||||
fields.reserve(schema->num_fields());
|
||||
for (int i = 0; i < schema->num_fields(); ++i)
|
||||
{
|
||||
const auto& field = schema->field(i);
|
||||
auto name = field->name();
|
||||
boost::to_lower(name);
|
||||
fields.push_back(field->WithName(name));
|
||||
}
|
||||
schema = arrow::schema(fields, schema->metadata());
|
||||
}
|
||||
}
|
||||
|
||||
void ParquetBlockInputFormat::prepareReader()
|
||||
@ -150,12 +131,18 @@ void ParquetBlockInputFormat::prepareReader()
|
||||
row_group_total = file_reader->num_row_groups();
|
||||
row_group_current = 0;
|
||||
|
||||
arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(getPort().getHeader(), "Parquet", format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns);
|
||||
arrow_column_to_ch_column = std::make_unique<ArrowColumnToCHColumn>(
|
||||
getPort().getHeader(),
|
||||
"Parquet",
|
||||
format_settings.parquet.import_nested,
|
||||
format_settings.parquet.allow_missing_columns,
|
||||
format_settings.parquet.case_insensitive_column_matching);
|
||||
missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema);
|
||||
|
||||
const bool ignore_case = format_settings.parquet.case_insensitive_column_matching;
|
||||
std::unordered_set<String> nested_table_names;
|
||||
if (format_settings.parquet.import_nested)
|
||||
nested_table_names = Nested::getAllTableNames(getPort().getHeader());
|
||||
nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case);
|
||||
|
||||
int index = 0;
|
||||
for (int i = 0; i < schema->num_fields(); ++i)
|
||||
@ -165,19 +152,19 @@ void ParquetBlockInputFormat::prepareReader()
|
||||
/// count the number of indices we need for this type.
|
||||
int indexes_count = countIndicesForType(schema->field(i)->type());
|
||||
const auto & name = schema->field(i)->name();
|
||||
if (getPort().getHeader().has(name) || nested_table_names.contains(name))
|
||||
|
||||
if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name))
|
||||
{
|
||||
for (int j = 0; j != indexes_count; ++j)
|
||||
{
|
||||
column_indices.push_back(index + j);
|
||||
column_names.push_back(name);
|
||||
}
|
||||
}
|
||||
|
||||
index += indexes_count;
|
||||
}
|
||||
}
|
||||
|
||||
ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : ISchemaReader(in_), format_settings(format_settings_)
|
||||
ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
|
||||
: ISchemaReader(in_), format_settings(format_settings_)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -40,7 +40,6 @@ private:
|
||||
int row_group_total = 0;
|
||||
// indices of columns to read from Parquet file
|
||||
std::vector<int> column_indices;
|
||||
std::vector<String> column_names;
|
||||
std::unique_ptr<ArrowColumnToCHColumn> arrow_column_to_ch_column;
|
||||
int row_group_current = 0;
|
||||
std::vector<size_t> missing_columns;
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <Formats/EscapingRuleUtils.h>
|
||||
#include <DataTypes/Serializations/SerializationNullable.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -242,15 +241,16 @@ std::unordered_map<String, DataTypePtr> TSKVSchemaReader::readRowAndGetNamesAndD
|
||||
|
||||
std::unordered_map<String, DataTypePtr> names_and_types;
|
||||
StringRef name_ref;
|
||||
String name_tmp;
|
||||
String name_buf;
|
||||
String value;
|
||||
do
|
||||
{
|
||||
bool has_value = readName(in, name_ref, name_tmp);
|
||||
bool has_value = readName(in, name_ref, name_buf);
|
||||
String name = String(name_ref);
|
||||
if (has_value)
|
||||
{
|
||||
readEscapedString(value, in);
|
||||
names_and_types[String(name_ref)] = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped);
|
||||
names_and_types[std::move(name)] = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -34,16 +34,16 @@ Block FillingTransform::transformHeader(Block header, const SortDescription & so
|
||||
|
||||
template <typename T>
|
||||
static FillColumnDescription::StepFunction getStepFunction(
|
||||
IntervalKind kind, Int64 step, const DateLUTImpl & date_lut)
|
||||
IntervalKind kind, Int64 step, const DateLUTImpl & date_lut, UInt16 scale = DataTypeDateTime64::default_scale)
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
#define DECLARE_CASE(NAME) \
|
||||
#define DECLARE_CASE(NAME) \
|
||||
case IntervalKind::NAME: \
|
||||
return [step, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get<T>(field), step, date_lut); };
|
||||
return [step, scale, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get<T>(field), step, date_lut, scale); };
|
||||
|
||||
FOR_EACH_INTERVAL_KIND(DECLARE_CASE)
|
||||
#undef DECLARE_CASE
|
||||
#undef DECLARE_CASE
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
@ -108,20 +108,18 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr &
|
||||
|
||||
switch (*descr.step_kind)
|
||||
{
|
||||
#define DECLARE_CASE(NAME) \
|
||||
#define DECLARE_CASE(NAME) \
|
||||
case IntervalKind::NAME: \
|
||||
descr.step_func = [step, &time_zone = date_time64->getTimeZone()](Field & field) \
|
||||
{ \
|
||||
auto field_decimal = get<DecimalField<DateTime64>>(field); \
|
||||
auto components = DecimalUtils::splitWithScaleMultiplier(field_decimal.getValue(), field_decimal.getScaleMultiplier()); \
|
||||
auto res = Add##NAME##sImpl::execute(components, step, time_zone); \
|
||||
auto res_decimal = decimalFromComponentsWithMultiplier<DateTime64>(res, field_decimal.getScaleMultiplier()); \
|
||||
field = DecimalField(res_decimal, field_decimal.getScale()); \
|
||||
auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), step, time_zone, field_decimal.getScale()); \
|
||||
field = DecimalField(res, field_decimal.getScale()); \
|
||||
}; \
|
||||
break;
|
||||
|
||||
FOR_EACH_INTERVAL_KIND(DECLARE_CASE)
|
||||
#undef DECLARE_CASE
|
||||
#undef DECLARE_CASE
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -399,6 +399,7 @@ bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr, bool is_final) c
|
||||
return true;
|
||||
|
||||
/// disallow GLOBAL IN, GLOBAL NOT IN
|
||||
/// TODO why?
|
||||
if ("globalIn" == function_ptr->name
|
||||
|| "globalNotIn" == function_ptr->name)
|
||||
return true;
|
||||
|
@ -179,8 +179,9 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
|
||||
method = chooseCompressionMethod(current_path, compression_method);
|
||||
}
|
||||
|
||||
/// For clickhouse-local add progress callback to display progress bar.
|
||||
if (context->getApplicationType() == Context::ApplicationType::LOCAL)
|
||||
/// For clickhouse-local and clickhouse-client add progress callback to display progress bar.
|
||||
if (context->getApplicationType() == Context::ApplicationType::LOCAL
|
||||
|| context->getApplicationType() == Context::ApplicationType::CLIENT)
|
||||
{
|
||||
auto & in = static_cast<ReadBufferFromFileDescriptor &>(*nested_buffer);
|
||||
in.setProgressCallback(context);
|
||||
@ -643,7 +644,9 @@ Pipe StorageFile::read(
|
||||
|
||||
/// Set total number of bytes to process. For progress bar.
|
||||
auto progress_callback = context->getFileProgressCallback();
|
||||
if (context->getApplicationType() == Context::ApplicationType::LOCAL && progress_callback)
|
||||
if ((context->getApplicationType() == Context::ApplicationType::LOCAL
|
||||
|| context->getApplicationType() == Context::ApplicationType::CLIENT)
|
||||
&& progress_callback)
|
||||
progress_callback(FileProgress(0, total_bytes_to_read));
|
||||
|
||||
for (size_t i = 0; i < num_streams; ++i)
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/TreeRewriter.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Interpreters/threadPoolCallbackRunner.h>
|
||||
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
@ -20,6 +21,7 @@
|
||||
#include <Storages/StorageFactory.h>
|
||||
#include <Storages/StorageS3.h>
|
||||
#include <Storages/StorageS3Settings.h>
|
||||
#include <Storages/StorageSnapshot.h>
|
||||
#include <Storages/PartitionedSink.h>
|
||||
|
||||
#include <IO/ReadBufferFromS3.h>
|
||||
@ -374,6 +376,16 @@ static bool checkIfObjectExists(const std::shared_ptr<Aws::S3::S3Client> & clien
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: common thread pool for IO must be used instead after PR #35150
|
||||
static ThreadPool & getThreadPoolStorageS3()
|
||||
{
|
||||
constexpr size_t pool_size = 100;
|
||||
constexpr size_t queue_size = 1000000;
|
||||
static ThreadPool pool(pool_size, pool_size, queue_size);
|
||||
return pool;
|
||||
}
|
||||
|
||||
|
||||
class StorageS3Sink : public SinkToStorage
|
||||
{
|
||||
public:
|
||||
@ -398,7 +410,7 @@ public:
|
||||
std::make_unique<WriteBufferFromS3>(
|
||||
client, bucket, key, min_upload_part_size,
|
||||
upload_part_size_multiply_factor, upload_part_size_multiply_parts_count_threshold,
|
||||
max_single_part_upload_size), compression_method, 3);
|
||||
max_single_part_upload_size, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, threadPoolCallbackRunner(getThreadPoolStorageS3())), compression_method, 3);
|
||||
writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, {}, format_settings);
|
||||
}
|
||||
|
||||
|
@ -38,7 +38,7 @@ void StorageSystemModels::fillData(MutableColumns & res_columns, ContextPtr cont
|
||||
|
||||
if (load_result.object)
|
||||
{
|
||||
const auto model_ptr = std::static_pointer_cast<const IModel>(load_result.object);
|
||||
const auto model_ptr = std::static_pointer_cast<const IMLModel>(load_result.object);
|
||||
res_columns[3]->insert(model_ptr->getTypeName());
|
||||
}
|
||||
else
|
||||
|
@ -57,6 +57,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int SYNTAX_ERROR;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int INCORRECT_QUERY;
|
||||
@ -262,7 +263,13 @@ namespace
|
||||
|
||||
IntervalKind strToIntervalKind(const String& interval_str)
|
||||
{
|
||||
if (interval_str == "Second")
|
||||
if (interval_str == "Nanosecond")
|
||||
return IntervalKind::Nanosecond;
|
||||
else if (interval_str == "Microsecond")
|
||||
return IntervalKind::Microsecond;
|
||||
else if (interval_str == "Millisecond")
|
||||
return IntervalKind::Millisecond;
|
||||
else if (interval_str == "Second")
|
||||
return IntervalKind::Second;
|
||||
else if (interval_str == "Minute")
|
||||
return IntervalKind::Minute;
|
||||
@ -307,6 +314,12 @@ namespace
|
||||
{
|
||||
switch (kind)
|
||||
{
|
||||
case IntervalKind::Nanosecond:
|
||||
throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR);
|
||||
case IntervalKind::Microsecond:
|
||||
throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR);
|
||||
case IntervalKind::Millisecond:
|
||||
throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR);
|
||||
#define CASE_WINDOW_KIND(KIND) \
|
||||
case IntervalKind::KIND: { \
|
||||
return AddTime<IntervalKind::KIND>::execute(time_sec, num_units, time_zone); \
|
||||
@ -738,6 +751,12 @@ UInt32 StorageWindowView::getWindowLowerBound(UInt32 time_sec)
|
||||
|
||||
switch (window_interval_kind)
|
||||
{
|
||||
case IntervalKind::Nanosecond:
|
||||
throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR);
|
||||
case IntervalKind::Microsecond:
|
||||
throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR);
|
||||
case IntervalKind::Millisecond:
|
||||
throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR);
|
||||
#define CASE_WINDOW_KIND(KIND) \
|
||||
case IntervalKind::KIND: \
|
||||
{ \
|
||||
@ -773,6 +792,13 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec)
|
||||
|
||||
switch (window_interval_kind)
|
||||
{
|
||||
case IntervalKind::Nanosecond:
|
||||
throw Exception("Fractional seconds are not supported by window view yet", ErrorCodes::SYNTAX_ERROR);
|
||||
case IntervalKind::Microsecond:
|
||||
throw Exception("Fractional seconds are not supported by window view yet", ErrorCodes::SYNTAX_ERROR);
|
||||
case IntervalKind::Millisecond:
|
||||
throw Exception("Fractional seconds are not supported by window view yet", ErrorCodes::SYNTAX_ERROR);
|
||||
|
||||
#define CASE_WINDOW_KIND(KIND) \
|
||||
case IntervalKind::KIND: \
|
||||
{ \
|
||||
|
@ -9,11 +9,10 @@ from github import Github
|
||||
|
||||
from env_helper import (
|
||||
GITHUB_REPOSITORY,
|
||||
TEMP_PATH,
|
||||
REPO_COPY,
|
||||
GITHUB_RUN_URL,
|
||||
REPORTS_PATH,
|
||||
GITHUB_SERVER_URL,
|
||||
GITHUB_RUN_ID,
|
||||
REPO_COPY,
|
||||
TEMP_PATH,
|
||||
)
|
||||
from s3_helper import S3Helper
|
||||
from get_robot_token import get_best_robot_token
|
||||
@ -126,7 +125,7 @@ if __name__ == "__main__":
|
||||
logging.info("Exception uploading file %s text %s", f, ex)
|
||||
paths[f] = ""
|
||||
|
||||
report_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}"
|
||||
report_url = GITHUB_RUN_URL
|
||||
if paths["runlog.log"]:
|
||||
report_url = paths["runlog.log"]
|
||||
if paths["main.log"]:
|
||||
|
@ -11,7 +11,7 @@ from env_helper import (
|
||||
TEMP_PATH,
|
||||
GITHUB_REPOSITORY,
|
||||
GITHUB_SERVER_URL,
|
||||
GITHUB_RUN_ID,
|
||||
GITHUB_RUN_URL,
|
||||
)
|
||||
from report import create_build_html_report
|
||||
from s3_helper import S3Helper
|
||||
@ -180,9 +180,7 @@ if __name__ == "__main__":
|
||||
branch_name = "PR #{}".format(pr_info.number)
|
||||
branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}"
|
||||
commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}"
|
||||
task_url = (
|
||||
f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}"
|
||||
)
|
||||
task_url = GITHUB_RUN_URL
|
||||
report = create_build_html_report(
|
||||
build_check_name,
|
||||
build_results,
|
||||
|
@ -11,7 +11,7 @@ from typing import Dict, List, Optional, Set, Tuple, Union
|
||||
|
||||
from github import Github
|
||||
|
||||
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP
|
||||
from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL
|
||||
from s3_helper import S3Helper
|
||||
from pr_info import PRInfo
|
||||
from get_robot_token import get_best_robot_token, get_parameter_from_ssm
|
||||
@ -234,6 +234,7 @@ def build_and_push_one_image(
|
||||
with open(build_log, "wb") as bl:
|
||||
cmd = (
|
||||
"docker buildx build --builder default "
|
||||
f"--label build-url={GITHUB_RUN_URL} "
|
||||
f"{from_tag_arg}"
|
||||
f"--build-arg BUILDKIT_INLINE_CACHE=1 "
|
||||
f"--tag {image.repo}:{version_string} "
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user