diff --git a/.clang-tidy b/.clang-tidy index 0400b500e5c..ca84a4834e5 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -210,3 +210,6 @@ CheckOptions: value: false - key: performance-move-const-arg.CheckTriviallyCopyableMove value: false + # Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097 + - key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp + value: expr-type diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index a172947b2fc..5b47f94a324 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -7,6 +7,7 @@ env: "on": schedule: - cron: '13 3 * * *' + workflow_dispatch: jobs: DockerHubPushAarch64: diff --git a/CHANGELOG.md b/CHANGELOG.md index 61724ab2d0c..100b03ab92b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)). * Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)). +* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)). #### New Feature @@ -366,7 +367,7 @@ #### Improvement -* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. +* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. * Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Improve `Bool` type serialization and deserialization, check the range of values. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)). * If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)). diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ed3872fd6e..deef582c790 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -266,7 +266,7 @@ if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE)) endif () # Allows to build stripped binary in a separate directory -if (OBJCOPY_PATH AND READELF_PATH) +if (OBJCOPY_PATH AND STRIP_PATH) option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF) if (INSTALL_STRIPPED_BINARIES) set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information") diff --git a/cmake/strip.sh b/cmake/strip.sh deleted file mode 100755 index f85d82fab31..00000000000 --- a/cmake/strip.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -BINARY_PATH=$1 -BINARY_NAME=$(basename "$BINARY_PATH") -DESTINATION_STRIPPED_DIR=$2 -OBJCOPY_PATH=${3:objcopy} -READELF_PATH=${4:readelf} - -BUILD_ID=$($READELF_PATH -n "$1" | sed -n '/Build ID/ { s/.*: //p; q; }') -BUILD_ID_PREFIX=${BUILD_ID:0:2} -BUILD_ID_SUFFIX=${BUILD_ID:2} - -DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id" -DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin" - -mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX" -mkdir -p "$DESTINATION_STRIP_BINARY_DIR" - - -cp "$BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" - -$OBJCOPY_PATH --only-keep-debug --compress-debug-sections "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" -chmod 0644 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" -chown 0:0 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" - -strip --remove-section=.comment --remove-section=.note "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" - -$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" diff --git a/cmake/strip_binary.cmake b/cmake/strip_binary.cmake index e430807772d..1f24790a159 100644 --- a/cmake/strip_binary.cmake +++ b/cmake/strip_binary.cmake @@ -11,16 +11,43 @@ macro(clickhouse_strip_binary) message(FATAL_ERROR "A binary path name must be provided for stripping binary") endif() - if (NOT DEFINED STRIP_DESTINATION_DIR) message(FATAL_ERROR "Destination directory for stripped binary must be provided") endif() add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD - COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH} - COMMENT "Stripping clickhouse binary" VERBATIM + COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin" + COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin" + COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" + COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" + COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMENT "Stripping clickhouse binary" VERBATIM ) install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) + install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse) +endmacro() + + +macro(clickhouse_make_empty_debug_info_for_nfpm) + set(oneValueArgs TARGET DESTINATION_DIR) + cmake_parse_arguments(EMPTY_DEBUG "" "${oneValueArgs}" "" ${ARGN}) + + if (NOT DEFINED EMPTY_DEBUG_TARGET) + message(FATAL_ERROR "A target name must be provided for stripping binary") + endif() + + if (NOT DEFINED EMPTY_DEBUG_DESTINATION_DIR) + message(FATAL_ERROR "Destination directory for empty debug must be provided") + endif() + + add_custom_command(TARGET ${EMPTY_DEBUG_TARGET} POST_BUILD + COMMAND mkdir -p "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug" + COMMAND touch "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" + COMMENT "Addiding empty debug info for NFPM" VERBATIM + ) + + install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}" COMPONENT clickhouse) endmacro() diff --git a/cmake/tools.cmake b/cmake/tools.cmake index d6fddd0509e..d571a46ad26 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -170,32 +170,32 @@ else () message (FATAL_ERROR "Cannot find objcopy.") endif () -# Readelf (FIXME copypaste) +# Strip (FIXME copypaste) if (COMPILER_GCC) - find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf") + find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-13" "llvm-strip-12" "llvm-strip-11" "strip") else () - find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf") + find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip") endif () -if (NOT READELF_PATH AND OS_DARWIN) +if (NOT STRIP_PATH AND OS_DARWIN) find_program (BREW_PATH NAMES "brew") if (BREW_PATH) execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX) if (LLVM_PREFIX) - find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) + find_program (STRIP_PATH NAMES "llvm-strip" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) endif () - if (NOT READELF_PATH) + if (NOT STRIP_PATH) execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX) if (BINUTILS_PREFIX) - find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) + find_program (STRIP_PATH NAMES "strip" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) endif () endif () endif () endif () -if (READELF_PATH) - message (STATUS "Using readelf: ${READELF_PATH}") +if (STRIP_PATH) + message (STATUS "Using strip: ${STRIP_PATH}") else () - message (FATAL_ERROR "Cannot find readelf.") + message (FATAL_ERROR "Cannot find strip.") endif () diff --git a/contrib/hyperscan b/contrib/hyperscan index e9f08df0213..5edc68c5ac6 160000 --- a/contrib/hyperscan +++ b/contrib/hyperscan @@ -1 +1 @@ -Subproject commit e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa +Subproject commit 5edc68c5ac68d2d4f876159e9ee84def6d3dc87c diff --git a/contrib/libcxx b/contrib/libcxx index 61e60294b1d..172b2ae074f 160000 --- a/contrib/libcxx +++ b/contrib/libcxx @@ -1 +1 @@ -Subproject commit 61e60294b1de01483caa9f5d00f437c99b674de6 +Subproject commit 172b2ae074f6755145b91c53a95c8540c1468239 diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index 332fb0411cd..dc9df48b2c1 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -18,12 +18,14 @@ set(SRCS "${LIBCXX_SOURCE_DIR}/src/filesystem/directory_iterator.cpp" "${LIBCXX_SOURCE_DIR}/src/filesystem/int128_builtins.cpp" "${LIBCXX_SOURCE_DIR}/src/filesystem/operations.cpp" +"${LIBCXX_SOURCE_DIR}/src/format.cpp" "${LIBCXX_SOURCE_DIR}/src/functional.cpp" "${LIBCXX_SOURCE_DIR}/src/future.cpp" "${LIBCXX_SOURCE_DIR}/src/hash.cpp" "${LIBCXX_SOURCE_DIR}/src/ios.cpp" "${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp" "${LIBCXX_SOURCE_DIR}/src/iostream.cpp" +"${LIBCXX_SOURCE_DIR}/src/legacy_pointer_safety.cpp" "${LIBCXX_SOURCE_DIR}/src/locale.cpp" "${LIBCXX_SOURCE_DIR}/src/memory.cpp" "${LIBCXX_SOURCE_DIR}/src/mutex.cpp" @@ -33,6 +35,9 @@ set(SRCS "${LIBCXX_SOURCE_DIR}/src/random.cpp" "${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp" "${LIBCXX_SOURCE_DIR}/src/regex.cpp" +"${LIBCXX_SOURCE_DIR}/src/ryu/d2fixed.cpp" +"${LIBCXX_SOURCE_DIR}/src/ryu/d2s.cpp" +"${LIBCXX_SOURCE_DIR}/src/ryu/f2s.cpp" "${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp" "${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp" "${LIBCXX_SOURCE_DIR}/src/string.cpp" @@ -49,7 +54,9 @@ set(SRCS add_library(cxx ${SRCS}) set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake") -target_include_directories(cxx SYSTEM BEFORE PUBLIC $) +target_include_directories(cxx SYSTEM BEFORE PUBLIC + $ + $/src) target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) # Enable capturing stack traces for all exceptions. diff --git a/contrib/libcxxabi b/contrib/libcxxabi index df8f1e727db..6eb7cc7a7bd 160000 --- a/contrib/libcxxabi +++ b/contrib/libcxxabi @@ -1 +1 @@ -Subproject commit df8f1e727dbc9e2bedf2282096fa189dc3fe0076 +Subproject commit 6eb7cc7a7bdd779e6734d1b9fb451df2274462d7 diff --git a/contrib/libcxxabi-cmake/CMakeLists.txt b/contrib/libcxxabi-cmake/CMakeLists.txt index 425111d9b26..bf1ede8a60e 100644 --- a/contrib/libcxxabi-cmake/CMakeLists.txt +++ b/contrib/libcxxabi-cmake/CMakeLists.txt @@ -1,24 +1,24 @@ set(LIBCXXABI_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libcxxabi") set(SRCS -"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp" "${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp" "${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp" "${LIBCXXABI_SOURCE_DIR}/src/stdlib_new_delete.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp" +"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp" ) add_library(cxxabi ${SRCS}) @@ -30,6 +30,7 @@ target_compile_options(cxxabi PRIVATE -w) target_include_directories(cxxabi SYSTEM BEFORE PUBLIC $ PRIVATE $ + PRIVATE $ ) target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY) target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast. diff --git a/contrib/libxml2 b/contrib/libxml2 index 18890f471c4..a075d256fd9 160000 --- a/contrib/libxml2 +++ b/contrib/libxml2 @@ -1 +1 @@ -Subproject commit 18890f471c420411aa3c989e104d090966ec9dbf +Subproject commit a075d256fd9ff15590b86d981b75a50ead124fca diff --git a/contrib/replxx b/contrib/replxx index 9460e5e0fc1..6f0b6f151ae 160000 --- a/contrib/replxx +++ b/contrib/replxx @@ -1 +1 @@ -Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d +Subproject commit 6f0b6f151ae2a044625ae93acd19ca365fcea64d diff --git a/docker/docs/check/Dockerfile b/docker/docs/check/Dockerfile index 174be123eed..4eb03a91e7a 100644 --- a/docker/docs/check/Dockerfile +++ b/docker/docs/check/Dockerfile @@ -1,4 +1,3 @@ -# rebuild in #33610 # docker build -t clickhouse/docs-check . ARG FROM_TAG=latest FROM clickhouse/docs-builder:$FROM_TAG diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 4af74d3ba54..3cef5b008db 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -131,9 +131,6 @@ function start() # use root to match with current uid clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log sleep 0.5 - cat /var/log/clickhouse-server/stdout.log - tail -n200 /var/log/clickhouse-server/stderr.log - tail -n200 /var/log/clickhouse-server/clickhouse-server.log counter=$((counter + 1)) done @@ -211,14 +208,12 @@ stop start clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \ - || echo -e 'Server failed to start\tFAIL' >> /test_output/test_results.tsv + || (echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt) [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" [ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL" -# Print Fatal log messages to stdout -zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* - # Grep logs for sanitizer asserts, crashes and other critical errors # Sanitizer asserts @@ -235,20 +230,26 @@ zgrep -Fa " Application: Child process was terminated by signal 9" /var/ || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv # Logical errors -zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ - && echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ +zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /test_output/logical_errors.txt \ + && echo -e 'Logical error thrown (see clickhouse-server.log or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv +# Remove file logical_errors.txt if it's empty +[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt + # Crash zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv # It also checks for crash without stacktrace (printed by watchdog) -zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ - && echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ +zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* > /test_output/fatal_messages.txt \ + && echo -e 'Fatal message in clickhouse-server.log (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv +# Remove file fatal_messages.txt if it's empty +[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt + zgrep -Fa "########################################" /test_output/* > /dev/null \ && echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv @@ -259,12 +260,12 @@ echo -e "Backward compatibility check\n" echo "Download previous release server" mkdir previous_release_package_folder -clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'Download script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv +clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv if [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] then - echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/backward_compatibility_check_results.tsv + echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/test_results.tsv stop # Uninstall current packages @@ -290,8 +291,8 @@ then mkdir tmp_stress_output ./stress --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \ - && echo -e 'Test script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'Test script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv + && echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv rm -rf tmp_stress_output clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" @@ -301,8 +302,9 @@ then # Start new server configure start 500 - clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'Server failed to start\tFAIL' >> /test_output/backward_compatibility_check_results.tsv + clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ + || (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \ + && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt) clickhouse-client --query="SELECT 'Server version: ', version()" @@ -312,10 +314,12 @@ then stop # Error messages (we should ignore some errors) + echo "Check for Error messages in server log:" zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ -e "Code: 236. DB::Exception: Cancelled mutating parts" \ -e "REPLICA_IS_ALREADY_ACTIVE" \ -e "REPLICA_IS_ALREADY_EXIST" \ + -e "ALL_REPLICAS_LOST" \ -e "DDLWorker: Cannot parse DDL task query" \ -e "RaftInstance: failed to accept a rpc connection due to error 125" \ -e "UNKNOWN_DATABASE" \ @@ -328,47 +332,53 @@ then -e "Code: 1000, e.code() = 111, Connection refused" \ -e "UNFINISHED" \ -e "Renaming unexpected part" \ - /var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "" > /dev/null \ - && echo -e 'Error message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No Error messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv + /var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ + && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_error_messages.txt if it's empty + [ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt # Sanitizer asserts zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \ - && echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No sanitizer asserts\tOK' >> /test_output/backward_compatibility_check_results.tsv + && echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv rm -f /test_output/tmp # OOM zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ - && echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv + && echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv # Logical errors - zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ - && echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No logical errors\tOK' >> /test_output/backward_compatibility_check_results.tsv + echo "Check for Logical errors in server log:" + zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_logical_errors.txt \ + && echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_logical_errors.txt if it's empty + [ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt # Crash zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ - && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'Not crashed\tOK' >> /test_output/backward_compatibility_check_results.tsv + && echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv # It also checks for crash without stacktrace (printed by watchdog) - zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ - && echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv + echo "Check for Fatal message in server log:" + zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_fatal_messages.txt \ + && echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_fatal_messages.txt if it's empty + [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt else - echo -e "Failed to download previous release packets\tFAIL" >> /test_output/backward_compatibility_check_results.tsv + echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv fi -zgrep -Fa "FAIL" /test_output/backward_compatibility_check_results.tsv > /dev/null \ - && echo -e 'Backward compatibility check\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check\tOK' >> /test_output/test_results.tsv - - # Put logs into /test_output/ for log_file in /var/log/clickhouse-server/clickhouse-server.log* do diff --git a/docs/en/engines/table-engines/integrations/hive.md b/docs/en/engines/table-engines/integrations/hive.md index b804b9c2279..61147467690 100644 --- a/docs/en/engines/table-engines/integrations/hive.md +++ b/docs/en/engines/table-engines/integrations/hive.md @@ -137,7 +137,7 @@ CREATE TABLE test.test_orc `f_array_array_float` Array(Array(Float32)), `day` String ) -ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc') PARTITION BY day ``` diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 9c7fab7424d..ad199ce452e 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -195,5 +195,6 @@ toc_title: Adopters | ООО «МПЗ Богородский» | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) | | ДомКлик | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) | | АС "Стрела" | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) | +| Piwik PRO | Web Analytics | — | — | — | [Official website, Dec 2018](https://piwik.pro/blog/piwik-pro-clickhouse-faster-efficient-reports/) | [Original article](https://clickhouse.com/docs/en/introduction/adopters/) diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md index 279204a8af1..9aa6419d89c 100644 --- a/docs/en/operations/caches.md +++ b/docs/en/operations/caches.md @@ -5,7 +5,7 @@ toc_title: Caches # Cache Types {#cache-types} -When performing queries, ClichHouse uses different caches. +When performing queries, ClickHouse uses different caches. Main cache types: diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 266abadb087..88c43c9c3c2 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1616,3 +1616,14 @@ Possible values: Default value: `10000`. +## global_memory_usage_overcommit_max_wait_microseconds {#global_memory_usage_overcommit_max_wait_microseconds} + +Sets maximum waiting time for global overcommit tracker. + +Possible values: + +- Positive integer. + +Default value: `0`. + + diff --git a/docs/en/operations/settings/memory-overcommit.md b/docs/en/operations/settings/memory-overcommit.md new file mode 100644 index 00000000000..3f99382b826 --- /dev/null +++ b/docs/en/operations/settings/memory-overcommit.md @@ -0,0 +1,31 @@ +# Memory overcommit + +Memory overcommit is an experimental technique intended to allow to set more flexible memory limits for queries. + +The idea of this technique is to introduce settings which can represent guaranteed amount of memory a query can use. +When memory overcommit is enabled and the memory limit is reached ClickHouse will select the most overcommitted query and try to free memory by killing this query. + +When memory limit is reached any query will wait some time during atempt to allocate new memory. +If timeout is passed and memory is freed, the query continues execution. Otherwise an exception will be thrown and the query is killed. + +Selection of query to stop or kill is performed by either global or user overcommit trackers depending on what memory limit is reached. + +## User overcommit tracker + +User overcommit tracker finds a query with the biggest overcommit ratio in the user's query list. +Overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage` setting. + +Waiting timeout is set by `memory_usage_overcommit_max_wait_microseconds` setting. + +**Example** + +```sql +SELECT number FROM numbers(1000) GROUP BY number SETTINGS max_guaranteed_memory_usage=4000, memory_usage_overcommit_max_wait_microseconds=500 +``` + +## Global overcommit tracker + +Global overcommit tracker finds a query with the biggest overcommit ratio in the list of all queries. +In this case overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage_for_user` setting. + +Waiting timeout is set by `global_memory_usage_overcommit_max_wait_microseconds` parameter in the configuration file. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 93075284cfc..91bf0812de4 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4220,10 +4220,36 @@ Possible values: - 0 — Disabled. - 1 — Enabled. The wait time equal shutdown_wait_unfinished config. -Default value: 0. +Default value: `0`. ## shutdown_wait_unfinished The waiting time in seconds for currently handled connections when shutdown server. -Default Value: 5. +Default Value: `5`. + +## max_guaranteed_memory_usage + +Maximum guaranteed memory usage for processing of single query. +It represents soft limit in case when hard limit is reached on user level. +Zero means unlimited. +Read more about [memory overcommit](memory-overcommit.md). + +Default value: `0`. + +## memory_usage_overcommit_max_wait_microseconds + +Maximum time thread will wait for memory to be freed in the case of memory overcommit on a user level. +If the timeout is reached and memory is not freed, an exception is thrown. +Read more about [memory overcommit](memory-overcommit.md). + +Default value: `0`. + +## max_guaranteed_memory_usage_for_user + +Maximum guaranteed memory usage for processing all concurrently running queries for the user. +It represents soft limit in case when hard limit is reached on global level. +Zero means unlimited. +Read more about [memory overcommit](memory-overcommit.md). + +Default value: `0`. diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 8bf1a5f477c..c48a70b0909 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -10,7 +10,7 @@ cssmin==0.2.0 future==0.18.2 htmlmin==0.1.12 idna==2.10 -Jinja2>=3.0.3 +Jinja2==3.0.3 jinja2-highlight==0.6.1 jsmin==3.0.0 livereload==2.6.3 diff --git a/docs/zh/engines/table-engines/integrations/hive.md b/docs/zh/engines/table-engines/integrations/hive.md index aa2c82d902a..24e0834d2fc 100644 --- a/docs/zh/engines/table-engines/integrations/hive.md +++ b/docs/zh/engines/table-engines/integrations/hive.md @@ -140,7 +140,7 @@ CREATE TABLE test.test_orc `f_array_array_float` Array(Array(Float32)), `day` String ) -ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc') PARTITION BY day ``` diff --git a/docs/zh/operations/system-tables/functions.md b/docs/zh/operations/system-tables/functions.md index 695c7b7fee1..75df1f65c1f 100644 --- a/docs/zh/operations/system-tables/functions.md +++ b/docs/zh/operations/system-tables/functions.md @@ -15,7 +15,7 @@ ``` ┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐ │ sumburConsistentHash │ 0 │ 0 │ │ -│ yandexConsistentHash │ 0 │ 0 │ │ +│ kostikConsistentHash │ 0 │ 0 │ │ │ demangle │ 0 │ 0 │ │ │ addressToLine │ 0 │ 0 │ │ │ JSONExtractRaw │ 0 │ 0 │ │ diff --git a/packages/clickhouse-common-static-dbg.yaml b/packages/clickhouse-common-static-dbg.yaml index 1213f4215c8..12a1594bd30 100644 --- a/packages/clickhouse-common-static-dbg.yaml +++ b/packages/clickhouse-common-static-dbg.yaml @@ -21,8 +21,12 @@ description: | This package contains the debugging symbols for clickhouse-common. contents: -- src: root/usr/lib/debug - dst: /usr/lib/debug +- src: root/usr/lib/debug/usr/bin/clickhouse.debug + dst: /usr/lib/debug/usr/bin/clickhouse.debug +- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug + dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug +- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug + dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug # docs - src: ../AUTHORS dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 1e2420021b6..cca7be97b61 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -473,18 +473,11 @@ else () if (INSTALL_STRIPPED_BINARIES) clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse) else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT}) install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() endif() -if (NOT INSTALL_STRIPPED_BINARIES) - # Install dunny debug directory - # TODO: move logic to every place where clickhouse_strip_binary is used - add_custom_command(TARGET clickhouse POST_BUILD COMMAND echo > .empty ) - install(FILES "${CMAKE_CURRENT_BINARY_DIR}/.empty" DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/.empty) -endif() - - if (ENABLE_TESTS) set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms) add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS}) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 92bb5dc45a3..9491d503fbf 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -137,5 +137,10 @@ if (BUILD_STANDALONE_KEEPER) add_dependencies(clickhouse-keeper clickhouse_keeper_configs) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) - install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + if (INSTALL_STRIPPED_BINARIES) + clickhouse_strip_binary(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-keeper) + else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) + install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + endif() endif() diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index aded9664b35..90ce3d8be7f 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -27,5 +27,6 @@ set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECT if (INSTALL_STRIPPED_BINARIES) clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge) else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 50a8bb629c8..b530e08ca26 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -42,6 +42,7 @@ endif() if (INSTALL_STRIPPED_BINARIES) clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge) else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index fb432160064..8604fee4aea 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -554,6 +555,10 @@ if (ThreadFuzzer::instance().isEffective()) config().getUInt("thread_pool_queue_size", 10000) ); + IOThreadPool::initialize( + config().getUInt("max_io_thread_pool_size", 100), + config().getUInt("max_io_thread_pool_free_size", 0), + config().getUInt("io_thread_pool_queue_size", 10000)); /// Initialize global local cache for remote filesystem. if (config().has("local_cache_for_remote_fs")) diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index aaffe85ae2e..13d39980e1c 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -29,15 +29,15 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover( time_t decrease_error_period_, size_t max_error_cap_) : Base(std::move(nested_pools_), decrease_error_period_, max_error_cap_, &Poco::Logger::get("ConnectionPoolWithFailover")) - , default_load_balancing(load_balancing) + , get_priority_load_balancing(load_balancing) { const std::string & local_hostname = getFQDNOrHostName(); - hostname_differences.resize(nested_pools.size()); + get_priority_load_balancing.hostname_differences.resize(nested_pools.size()); for (size_t i = 0; i < nested_pools.size(); ++i) { ConnectionPool & connection_pool = dynamic_cast(*nested_pools[i]); - hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost()); + get_priority_load_balancing.hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost()); } } @@ -51,36 +51,15 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts }; size_t offset = 0; + LoadBalancing load_balancing = get_priority_load_balancing.load_balancing; if (settings) - offset = settings->load_balancing_first_offset % nested_pools.size(); - GetPriorityFunc get_priority; - switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing) { - case LoadBalancing::NEAREST_HOSTNAME: - get_priority = [&](size_t i) { return hostname_differences[i]; }; - break; - case LoadBalancing::IN_ORDER: - get_priority = [](size_t i) { return i; }; - break; - case LoadBalancing::RANDOM: - break; - case LoadBalancing::FIRST_OR_RANDOM: - get_priority = [offset](size_t i) -> size_t { return i != offset; }; - break; - case LoadBalancing::ROUND_ROBIN: - if (last_used >= nested_pools.size()) - last_used = 0; - ++last_used; - /* Consider nested_pools.size() equals to 5 - * last_used = 1 -> get_priority: 0 1 2 3 4 - * last_used = 2 -> get_priority: 4 0 1 2 3 - * last_used = 3 -> get_priority: 4 3 0 1 2 - * ... - * */ - get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; }; - break; + offset = settings->load_balancing_first_offset % nested_pools.size(); + load_balancing = LoadBalancing(settings->load_balancing); } + GetPriorityFunc get_priority = get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size()); + UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0; bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true; @@ -173,38 +152,14 @@ std::vector ConnectionPoolWithFailover::g ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings) { size_t offset = 0; + LoadBalancing load_balancing = get_priority_load_balancing.load_balancing; if (settings) - offset = settings->load_balancing_first_offset % nested_pools.size(); - - GetPriorityFunc get_priority; - switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing) { - case LoadBalancing::NEAREST_HOSTNAME: - get_priority = [&](size_t i) { return hostname_differences[i]; }; - break; - case LoadBalancing::IN_ORDER: - get_priority = [](size_t i) { return i; }; - break; - case LoadBalancing::RANDOM: - break; - case LoadBalancing::FIRST_OR_RANDOM: - get_priority = [offset](size_t i) -> size_t { return i != offset; }; - break; - case LoadBalancing::ROUND_ROBIN: - if (last_used >= nested_pools.size()) - last_used = 0; - ++last_used; - /* Consider nested_pools.size() equals to 5 - * last_used = 1 -> get_priority: 0 1 2 3 4 - * last_used = 2 -> get_priority: 5 0 1 2 3 - * last_used = 3 -> get_priority: 5 4 0 1 2 - * ... - * */ - get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; }; - break; + offset = settings->load_balancing_first_offset % nested_pools.size(); + load_balancing = LoadBalancing(settings->load_balancing); } - return get_priority; + return get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size()); } std::vector ConnectionPoolWithFailover::getManyImpl( diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index 4e47905aae6..df7dd572ef3 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -109,9 +110,7 @@ private: GetPriorityFunc makeGetPriorityFunc(const Settings * settings); - std::vector hostname_differences; /// Distances from name of this host to the names of hosts of pools. - size_t last_used = 0; /// Last used for round_robin policy. - LoadBalancing default_load_balancing; + GetPriorityForLoadBalancing get_priority_load_balancing; }; using ConnectionPoolWithFailoverPtr = std::shared_ptr; diff --git a/src/Columns/MaskOperations.cpp b/src/Columns/MaskOperations.cpp index 64d90ae82cb..9e2d02253be 100644 --- a/src/Columns/MaskOperations.cpp +++ b/src/Columns/MaskOperations.cpp @@ -83,11 +83,20 @@ size_t extractMaskNumericImpl( const PaddedPODArray * null_bytemap, PaddedPODArray * nulls) { + if constexpr (!column_is_short) + { + if (data.size() != mask.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask"); + } + size_t ones_count = 0; size_t data_index = 0; - size_t mask_size = mask.size(); - for (size_t i = 0; i != mask_size; ++i) + size_t mask_size = mask.size(); + size_t data_size = data.size(); + + size_t i = 0; + for (; i != mask_size && data_index != data_size; ++i) { // Change mask only where value is 1. if (!mask[i]) @@ -120,6 +129,13 @@ size_t extractMaskNumericImpl( mask[i] = value; } + + if constexpr (column_is_short) + { + if (data_index != data_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask"); + } + return ones_count; } diff --git a/src/Common/ArenaWithFreeLists.h b/src/Common/ArenaWithFreeLists.h index cd4c1bc9d8d..53a59c98299 100644 --- a/src/Common/ArenaWithFreeLists.h +++ b/src/Common/ArenaWithFreeLists.h @@ -113,5 +113,35 @@ public: } }; +class SynchronizedArenaWithFreeLists : private ArenaWithFreeLists +{ +public: + explicit SynchronizedArenaWithFreeLists( + const size_t initial_size = 4096, const size_t growth_factor = 2, + const size_t linear_growth_threshold = 128 * 1024 * 1024) + : ArenaWithFreeLists{initial_size, growth_factor, linear_growth_threshold} + {} + + char * alloc(const size_t size) + { + std::lock_guard lock{mutex}; + return ArenaWithFreeLists::alloc(size); + } + + void free(char * ptr, const size_t size) + { + std::lock_guard lock{mutex}; + return ArenaWithFreeLists::free(ptr, size); + } + + /// Size of the allocated pool in bytes + size_t size() const + { + std::lock_guard lock{mutex}; + return ArenaWithFreeLists::size(); + } +private: + mutable std::mutex mutex; +}; } diff --git a/src/Common/FiberStack.h b/src/Common/FiberStack.h index 29f84ee6d85..c55608311d0 100644 --- a/src/Common/FiberStack.h +++ b/src/Common/FiberStack.h @@ -31,8 +31,8 @@ public: /// probably it worth to try to increase stack size for coroutines. /// /// Current value is just enough for all tests in our CI. It's not selected in some special - /// way. We will have 40 pages with 4KB page size. - static constexpr size_t default_stack_size = 192 * 1024; /// 64KB was not enough for tests + /// way. We will have 80 pages with 4KB page size. + static constexpr size_t default_stack_size = 320 * 1024; /// 64KB was not enough for tests explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_) { diff --git a/src/Common/GetPriorityForLoadBalancing.cpp b/src/Common/GetPriorityForLoadBalancing.cpp new file mode 100644 index 00000000000..d8e7566e891 --- /dev/null +++ b/src/Common/GetPriorityForLoadBalancing.cpp @@ -0,0 +1,49 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +std::function GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const +{ + std::function get_priority; + switch (load_balance) + { + case LoadBalancing::NEAREST_HOSTNAME: + if (hostname_differences.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "It's a bug: hostname_differences is not initialized"); + get_priority = [&](size_t i) { return hostname_differences[i]; }; + break; + case LoadBalancing::IN_ORDER: + get_priority = [](size_t i) { return i; }; + break; + case LoadBalancing::RANDOM: + break; + case LoadBalancing::FIRST_OR_RANDOM: + get_priority = [offset](size_t i) -> size_t { return i != offset; }; + break; + case LoadBalancing::ROUND_ROBIN: + if (last_used >= pool_size) + last_used = 0; + ++last_used; + /* Consider pool_size equals to 5 + * last_used = 1 -> get_priority: 0 1 2 3 4 + * last_used = 2 -> get_priority: 4 0 1 2 3 + * last_used = 3 -> get_priority: 4 3 0 1 2 + * ... + * */ + get_priority = [&](size_t i) + { + ++i; + return i < last_used ? pool_size - i : i - last_used; + }; + break; + } + return get_priority; +} + +} diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h new file mode 100644 index 00000000000..e57b02b5e90 --- /dev/null +++ b/src/Common/GetPriorityForLoadBalancing.h @@ -0,0 +1,34 @@ +#pragma once + +#include + +namespace DB +{ + +class GetPriorityForLoadBalancing +{ +public: + GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {} + GetPriorityForLoadBalancing(){} + + bool operator == (const GetPriorityForLoadBalancing & other) const + { + return load_balancing == other.load_balancing && hostname_differences == other.hostname_differences; + } + + bool operator != (const GetPriorityForLoadBalancing & other) const + { + return !(*this == other); + } + + std::function getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const; + + std::vector hostname_differences; /// Distances from name of this host to the names of hosts of pools. + + LoadBalancing load_balancing = LoadBalancing::RANDOM; + +private: + mutable size_t last_used = 0; /// Last used for round_robin policy. +}; + +} diff --git a/src/Common/IntervalKind.cpp b/src/Common/IntervalKind.cpp index 69b56be48ac..1478b832282 100644 --- a/src/Common/IntervalKind.cpp +++ b/src/Common/IntervalKind.cpp @@ -13,6 +13,9 @@ Int32 IntervalKind::toAvgSeconds() const { switch (kind) { + case IntervalKind::Nanosecond: return 0; /// fractional parts of seconds have 0 seconds + case IntervalKind::Microsecond: return 0; + case IntervalKind::Millisecond: return 0; case IntervalKind::Second: return 1; case IntervalKind::Minute: return 60; case IntervalKind::Hour: return 3600; @@ -52,6 +55,9 @@ const char * IntervalKind::toKeyword() const { switch (kind) { + case IntervalKind::Nanosecond: return "NANOSECOND"; + case IntervalKind::Microsecond: return "MICROSECOND"; + case IntervalKind::Millisecond: return "MILLISECOND"; case IntervalKind::Second: return "SECOND"; case IntervalKind::Minute: return "MINUTE"; case IntervalKind::Hour: return "HOUR"; @@ -69,6 +75,9 @@ const char * IntervalKind::toLowercasedKeyword() const { switch (kind) { + case IntervalKind::Nanosecond: return "nanosecond"; + case IntervalKind::Microsecond: return "microsecond"; + case IntervalKind::Millisecond: return "millisecond"; case IntervalKind::Second: return "second"; case IntervalKind::Minute: return "minute"; case IntervalKind::Hour: return "hour"; @@ -86,6 +95,12 @@ const char * IntervalKind::toDateDiffUnit() const { switch (kind) { + case IntervalKind::Nanosecond: + return "nanosecond"; + case IntervalKind::Microsecond: + return "microsecond"; + case IntervalKind::Millisecond: + return "millisecond"; case IntervalKind::Second: return "second"; case IntervalKind::Minute: @@ -111,6 +126,12 @@ const char * IntervalKind::toNameOfFunctionToIntervalDataType() const { switch (kind) { + case IntervalKind::Nanosecond: + return "toIntervalNanosecond"; + case IntervalKind::Microsecond: + return "toIntervalMicrosecond"; + case IntervalKind::Millisecond: + return "toIntervalMillisecond"; case IntervalKind::Second: return "toIntervalSecond"; case IntervalKind::Minute: @@ -136,6 +157,12 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const { switch (kind) { + case IntervalKind::Nanosecond: + return "toNanosecond"; + case IntervalKind::Microsecond: + return "toMicrosecond"; + case IntervalKind::Millisecond: + return "toMillisecond"; case IntervalKind::Second: return "toSecond"; case IntervalKind::Minute: @@ -162,6 +189,21 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const bool IntervalKind::tryParseString(const std::string & kind, IntervalKind::Kind & result) { + if ("nanosecond" == kind) + { + result = IntervalKind::Nanosecond; + return true; + } + if ("microsecond" == kind) + { + result = IntervalKind::Microsecond; + return true; + } + if ("millisecond" == kind) + { + result = IntervalKind::Millisecond; + return true; + } if ("second" == kind) { result = IntervalKind::Second; diff --git a/src/Common/IntervalKind.h b/src/Common/IntervalKind.h index aab0bb79be5..d5f2b5672cd 100644 --- a/src/Common/IntervalKind.h +++ b/src/Common/IntervalKind.h @@ -10,6 +10,9 @@ struct IntervalKind { enum Kind { + Nanosecond, + Microsecond, + Millisecond, Second, Minute, Hour, @@ -61,6 +64,9 @@ struct IntervalKind /// NOLINTNEXTLINE #define FOR_EACH_INTERVAL_KIND(M) \ + M(Nanosecond) \ + M(Microsecond) \ + M(Millisecond) \ M(Second) \ M(Minute) \ M(Hour) \ diff --git a/src/Common/OvercommitTracker.cpp b/src/Common/OvercommitTracker.cpp index 0e70619f628..7b03b9f271d 100644 --- a/src/Common/OvercommitTracker.cpp +++ b/src/Common/OvercommitTracker.cpp @@ -23,6 +23,12 @@ void OvercommitTracker::setMaxWaitTime(UInt64 wait_time) bool OvercommitTracker::needToStopQuery(MemoryTracker * tracker) { + // NOTE: Do not change the order of locks + // + // global_mutex must be acquired before overcommit_m, because + // method OvercommitTracker::unsubscribe(MemoryTracker *) is + // always called with already acquired global_mutex in + // ProcessListEntry::~ProcessListEntry(). std::unique_lock global_lock(global_mutex); std::unique_lock lk(overcommit_m); @@ -76,7 +82,7 @@ void UserOvercommitTracker::pickQueryToExcludeImpl() MemoryTracker * query_tracker = nullptr; OvercommitRatio current_ratio{0, 0}; // At this moment query list must be read only. - // BlockQueryIfMemoryLimit is used in ProcessList to guarantee this. + // This is guaranteed by locking global_mutex in OvercommitTracker::needToStopQuery. auto & queries = user_process_list->queries; LOG_DEBUG(logger, "Trying to choose query to stop from {} queries", queries.size()); for (auto const & query : queries) @@ -111,9 +117,9 @@ void GlobalOvercommitTracker::pickQueryToExcludeImpl() MemoryTracker * query_tracker = nullptr; OvercommitRatio current_ratio{0, 0}; // At this moment query list must be read only. - // BlockQueryIfMemoryLimit is used in ProcessList to guarantee this. - LOG_DEBUG(logger, "Trying to choose query to stop"); - process_list->processEachQueryStatus([&](DB::QueryStatus const & query) + // This is guaranteed by locking global_mutex in OvercommitTracker::needToStopQuery. + LOG_DEBUG(logger, "Trying to choose query to stop from {} queries", process_list->size()); + for (auto const & query : process_list->processes) { if (query.isKilled()) return; @@ -134,7 +140,7 @@ void GlobalOvercommitTracker::pickQueryToExcludeImpl() query_tracker = memory_tracker; current_ratio = ratio; } - }); + } LOG_DEBUG(logger, "Selected to stop query with overcommit ratio {}/{}", current_ratio.committed, current_ratio.soft_limit); picked_tracker = query_tracker; diff --git a/src/Common/OvercommitTracker.h b/src/Common/OvercommitTracker.h index 7c7974f0a24..f59390a8ace 100644 --- a/src/Common/OvercommitTracker.h +++ b/src/Common/OvercommitTracker.h @@ -43,8 +43,6 @@ class MemoryTracker; // is killed to free memory. struct OvercommitTracker : boost::noncopyable { - explicit OvercommitTracker(std::mutex & global_mutex_); - void setMaxWaitTime(UInt64 wait_time); bool needToStopQuery(MemoryTracker * tracker); @@ -54,8 +52,12 @@ struct OvercommitTracker : boost::noncopyable virtual ~OvercommitTracker() = default; protected: + explicit OvercommitTracker(std::mutex & global_mutex_); + virtual void pickQueryToExcludeImpl() = 0; + // This mutex is used to disallow concurrent access + // to picked_tracker and cancelation_state variables. mutable std::mutex overcommit_m; mutable std::condition_variable cv; @@ -87,6 +89,11 @@ private: } } + // Global mutex which is used in ProcessList to synchronize + // insertion and deletion of queries. + // OvercommitTracker::pickQueryToExcludeImpl() implementations + // require this mutex to be locked, because they read list (or sublist) + // of queries. std::mutex & global_mutex; }; diff --git a/src/Common/RadixSort.h b/src/Common/RadixSort.h index 944ab860355..2f02ebb9e03 100644 --- a/src/Common/RadixSort.h +++ b/src/Common/RadixSort.h @@ -515,6 +515,11 @@ public: radixSortLSDInternal(arr, size, false, nullptr); } + static void executeLSD(Element * arr, size_t size, bool reverse) + { + radixSortLSDInternal(arr, size, reverse, nullptr); + } + /** This function will start to sort inplace (modify 'arr') * but on the last step it will write result directly to the destination * instead of finishing sorting 'arr'. diff --git a/src/Common/ZooKeeper/CMakeLists.txt b/src/Common/ZooKeeper/CMakeLists.txt index 34ebad9bb50..a9a335d1461 100644 --- a/src/Common/ZooKeeper/CMakeLists.txt +++ b/src/Common/ZooKeeper/CMakeLists.txt @@ -22,7 +22,6 @@ target_link_libraries (clickhouse_common_zookeeper_no_log PRIVATE string_utils ) - if (ENABLE_EXAMPLES) add_subdirectory(examples) endif() diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index b1574341c40..118789c0ffc 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -5,15 +5,15 @@ #include #include -#include -#include #include -#include +#include #include #include +#include #include +#include #define ZOOKEEPER_CONNECTION_TIMEOUT_MS 1000 @@ -48,7 +48,7 @@ static void check(Coordination::Error code, const std::string & path) void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, - int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_) + int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_) { log = &Poco::Logger::get("ZooKeeper"); hosts = hosts_; @@ -57,6 +57,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ operation_timeout_ms = operation_timeout_ms_; chroot = chroot_; implementation = implementation_; + get_priority_load_balancing = get_priority_load_balancing_; if (implementation == "zookeeper") { @@ -66,14 +67,13 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ Coordination::ZooKeeper::Nodes nodes; nodes.reserve(hosts.size()); - Strings shuffled_hosts = hosts; /// Shuffle the hosts to distribute the load among ZooKeeper nodes. - pcg64 generator(randomSeed()); - std::shuffle(shuffled_hosts.begin(), shuffled_hosts.end(), generator); + std::vector shuffled_hosts = shuffleHosts(); bool dns_error = false; - for (auto & host_string : shuffled_hosts) + for (auto & host : shuffled_hosts) { + auto & host_string = host.host; try { bool secure = bool(startsWith(host_string, "secure://")); @@ -81,6 +81,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ if (secure) host_string.erase(0, strlen("secure://")); + LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, Poco::Net::SocketAddress{host_string}.toString()); nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure}); } catch (const Poco::Net::HostNotFoundException & e) @@ -154,23 +155,47 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ } } +std::vector ZooKeeper::shuffleHosts() const +{ + std::function get_priority = get_priority_load_balancing.getPriorityFunc(get_priority_load_balancing.load_balancing, 0, hosts.size()); + std::vector shuffle_hosts; + for (size_t i = 0; i < hosts.size(); ++i) + { + ShuffleHost shuffle_host; + shuffle_host.host = hosts[i]; + if (get_priority) + shuffle_host.priority = get_priority(i); + shuffle_host.randomize(); + shuffle_hosts.emplace_back(shuffle_host); + } + + std::sort( + shuffle_hosts.begin(), shuffle_hosts.end(), + [](const ShuffleHost & lhs, const ShuffleHost & rhs) + { + return ShuffleHost::compare(lhs, rhs); + }); + + return shuffle_hosts; +} + ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_, - std::shared_ptr zk_log_) + std::shared_ptr zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_) { zk_log = std::move(zk_log_); Strings hosts_strings; splitInto<','>(hosts_strings, hosts_string); - init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); + init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_); } ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_, - std::shared_ptr zk_log_) + std::shared_ptr zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_) { zk_log = std::move(zk_log_); - init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); + init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_); } struct ZooKeeperArgs @@ -213,6 +238,15 @@ struct ZooKeeperArgs { implementation = config.getString(config_name + "." + key); } + else if (key == "zookeeper_load_balancing") + { + String load_balancing_str = config.getString(config_name + "." + key); + /// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`) + auto load_balancing = magic_enum::enum_cast(Poco::toUpper(load_balancing_str)); + if (!load_balancing) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str); + get_priority_load_balancing.load_balancing = *load_balancing; + } else throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); } @@ -224,6 +258,15 @@ struct ZooKeeperArgs if (chroot.back() == '/') chroot.pop_back(); } + + /// init get_priority_load_balancing + get_priority_load_balancing.hostname_differences.resize(hosts.size()); + const String & local_hostname = getFQDNOrHostName(); + for (size_t i = 0; i < hosts.size(); ++i) + { + const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':')); + get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host); + } } Strings hosts; @@ -232,13 +275,14 @@ struct ZooKeeperArgs int operation_timeout_ms; std::string chroot; std::string implementation; + GetPriorityForLoadBalancing get_priority_load_balancing; }; ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr zk_log_) : zk_log(std::move(zk_log_)) { ZooKeeperArgs args(config, config_name); - init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot); + init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing); } bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) const @@ -249,8 +293,11 @@ bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, if (args.implementation == implementation && implementation == "testkeeper") return false; - return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot) - != std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot); + if (args.get_priority_load_balancing != get_priority_load_balancing) + return true; + + return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing) + != std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, args.get_priority_load_balancing); } @@ -757,7 +804,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition & ZooKeeperPtr ZooKeeper::startNewSession() const { - return std::make_shared(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log); + return std::make_shared(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log, get_priority_load_balancing); } diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 6d0f8a438b1..f901a79591f 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -13,7 +13,10 @@ #include #include #include +#include +#include #include +#include namespace ProfileEvents @@ -37,6 +40,25 @@ namespace zkutil /// Preferred size of multi() command (in number of ops) constexpr size_t MULTI_BATCH_SIZE = 100; +struct ShuffleHost +{ + String host; + Int64 priority = 0; + UInt32 random = 0; + + void randomize() + { + random = thread_local_rng(); + } + + static bool compare(const ShuffleHost & lhs, const ShuffleHost & rhs) + { + return std::forward_as_tuple(lhs.priority, lhs.random) + < std::forward_as_tuple(rhs.priority, rhs.random); + } +}; + +using GetPriorityForLoadBalancing = DB::GetPriorityForLoadBalancing; /// ZooKeeper session. The interface is substantially different from the usual libzookeeper API. /// @@ -58,14 +80,16 @@ public: int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, const std::string & chroot_ = "", const std::string & implementation_ = "zookeeper", - std::shared_ptr zk_log_ = nullptr); + std::shared_ptr zk_log_ = nullptr, + const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {}); explicit ZooKeeper(const Strings & hosts_, const std::string & identity_ = "", int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS, int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, const std::string & chroot_ = "", const std::string & implementation_ = "zookeeper", - std::shared_ptr zk_log_ = nullptr); + std::shared_ptr zk_log_ = nullptr, + const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {}); /** Config of the form: @@ -91,6 +115,8 @@ public: */ ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr zk_log_); + std::vector shuffleHosts() const; + /// Creates a new session with the same parameters. This method can be used for reconnecting /// after the session has expired. /// This object remains unchanged, and the new session is returned. @@ -284,7 +310,7 @@ private: friend class EphemeralNodeHolder; void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, - int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_); + int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_); /// The following methods don't any throw exceptions but return error codes. Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); @@ -311,6 +337,8 @@ private: Poco::Logger * log = nullptr; std::shared_ptr zk_log; + GetPriorityForLoadBalancing get_priority_load_balancing; + AtomicStopwatch session_uptime; }; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 0627a70193f..d3c993344b6 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -451,7 +451,7 @@ void ZooKeeper::connect( } else { - LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}", socket.peerAddress().toString(), session_id); + LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}{}", socket.peerAddress().toString(), session_id, fail_reasons.str()); } } diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h index 1a65adae55b..d6efeed17e6 100644 --- a/src/Common/formatIPv6.h +++ b/src/Common/formatIPv6.h @@ -11,7 +11,7 @@ constexpr size_t IPV4_BINARY_LENGTH = 4; constexpr size_t IPV6_BINARY_LENGTH = 16; constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte. -constexpr size_t IPV6_MAX_TEXT_LENGTH = 39; +constexpr size_t IPV6_MAX_TEXT_LENGTH = 45; /// Does not count tail zero byte. namespace DB { diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp index d79e4cebd15..596fd4caad7 100644 --- a/src/Common/isLocalAddress.cpp +++ b/src/Common/isLocalAddress.cpp @@ -124,6 +124,7 @@ bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_ size_t getHostNameDifference(const std::string & local_hostname, const std::string & host) { + /// FIXME should we replace it with Levenstein distance? (we already have it in NamePrompter) size_t hostname_difference = 0; for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i) if (local_hostname[i] != host[i]) diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 5c93d6719fa..a7142ef7f2e 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -13,6 +13,7 @@ #include #include +#include namespace DB @@ -269,8 +270,18 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const } -const ColumnWithTypeAndName * Block::findByName(const std::string & name) const +const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool case_insensitive) const { + if (case_insensitive) + { + auto found = std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); }); + if (found == data.end()) + { + return nullptr; + } + return &*found; + } + auto it = index_by_name.find(name); if (index_by_name.end() == it) { @@ -280,19 +291,23 @@ const ColumnWithTypeAndName * Block::findByName(const std::string & name) const } -const ColumnWithTypeAndName & Block::getByName(const std::string & name) const +const ColumnWithTypeAndName & Block::getByName(const std::string & name, bool case_insensitive) const { - const auto * result = findByName(name); + const auto * result = findByName(name, case_insensitive); if (!result) - throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() - , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception( + "Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); return *result; } -bool Block::has(const std::string & name) const +bool Block::has(const std::string & name, bool case_insensitive) const { + if (case_insensitive) + return std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); }) + != data.end(); + return index_by_name.end() != index_by_name.find(name); } @@ -301,8 +316,8 @@ size_t Block::getPositionByName(const std::string & name) const { auto it = index_by_name.find(name); if (index_by_name.end() == it) - throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() - , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception( + "Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); return it->second; } diff --git a/src/Core/Block.h b/src/Core/Block.h index 66e16b70f47..c5d3e1ae35a 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -60,21 +60,21 @@ public: ColumnWithTypeAndName & safeGetByPosition(size_t position); const ColumnWithTypeAndName & safeGetByPosition(size_t position) const; - ColumnWithTypeAndName* findByName(const std::string & name) + ColumnWithTypeAndName* findByName(const std::string & name, bool case_insensitive = false) { return const_cast( - const_cast(this)->findByName(name)); + const_cast(this)->findByName(name, case_insensitive)); } - const ColumnWithTypeAndName * findByName(const std::string & name) const; + const ColumnWithTypeAndName * findByName(const std::string & name, bool case_insensitive = false) const; - ColumnWithTypeAndName & getByName(const std::string & name) + ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false) { return const_cast( - const_cast(this)->getByName(name)); + const_cast(this)->getByName(name, case_insensitive)); } - const ColumnWithTypeAndName & getByName(const std::string & name) const; + const ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false) const; Container::iterator begin() { return data.begin(); } Container::iterator end() { return data.end(); } @@ -83,7 +83,7 @@ public: Container::const_iterator cbegin() const { return data.cbegin(); } Container::const_iterator cend() const { return data.cend(); } - bool has(const std::string & name) const; + bool has(const std::string & name, bool case_insensitive = false) const; size_t getPositionByName(const std::string & name) const; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 8d28696094b..f81b61ea648 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -47,6 +47,8 @@ class IColumn; M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \ M(UInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \ M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \ + M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \ + M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \ M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \ M(UInt64, max_distributed_connections, 1024, "The maximum number of connections for distributed processing of one query (should be greater than max_threads).", 0) \ M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "Which part of the query can be read into RAM for parsing (the remaining data for INSERT, if any, is read later)", 0) \ @@ -614,11 +616,13 @@ class IColumn; M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \ M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \ - M(Bool, input_format_use_lowercase_column_name, false, "Use lowercase column name while reading input formats", 0) \ M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ + M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \ M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \ M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \ + M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \ M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \ + M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \ M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \ M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \ M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index ddd1c29785c..3f68038560c 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -149,4 +149,5 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, {"str", FormatSettings::MsgPackUUIDRepresentation::STR}, {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}}) + } diff --git a/src/DataTypes/DataTypeInterval.cpp b/src/DataTypes/DataTypeInterval.cpp index 57d071a8666..9faf0cec2d8 100644 --- a/src/DataTypes/DataTypeInterval.cpp +++ b/src/DataTypes/DataTypeInterval.cpp @@ -13,6 +13,9 @@ bool DataTypeInterval::equals(const IDataType & rhs) const void registerDataTypeInterval(DataTypeFactory & factory) { + factory.registerSimpleDataType("IntervalNanosecond", [] { return DataTypePtr(std::make_shared(IntervalKind::Nanosecond)); }); + factory.registerSimpleDataType("IntervalMicrosecond", [] { return DataTypePtr(std::make_shared(IntervalKind::Microsecond)); }); + factory.registerSimpleDataType("IntervalMillisecond", [] { return DataTypePtr(std::make_shared(IntervalKind::Millisecond)); }); factory.registerSimpleDataType("IntervalSecond", [] { return DataTypePtr(std::make_shared(IntervalKind::Second)); }); factory.registerSimpleDataType("IntervalMinute", [] { return DataTypePtr(std::make_shared(IntervalKind::Minute)); }); factory.registerSimpleDataType("IntervalHour", [] { return DataTypePtr(std::make_shared(IntervalKind::Hour)); }); diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index df504bc34a8..8f5e40de5b8 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -15,6 +15,8 @@ #include +#include + namespace DB { @@ -227,14 +229,17 @@ void validateArraySizes(const Block & block) } -std::unordered_set getAllTableNames(const Block & block) +std::unordered_set getAllTableNames(const Block & block, bool to_lower_case) { std::unordered_set nested_table_names; - for (auto & name : block.getNames()) + for (const auto & name : block.getNames()) { auto nested_table_name = Nested::extractTableName(name); + if (to_lower_case) + boost::to_lower(nested_table_name); + if (!nested_table_name.empty()) - nested_table_names.insert(nested_table_name); + nested_table_names.insert(std::move(nested_table_name)); } return nested_table_names; } diff --git a/src/DataTypes/NestedUtils.h b/src/DataTypes/NestedUtils.h index 2ca5c17dc74..f6dc42d5c58 100644 --- a/src/DataTypes/NestedUtils.h +++ b/src/DataTypes/NestedUtils.h @@ -32,7 +32,7 @@ namespace Nested void validateArraySizes(const Block & block); /// Get all nested tables names from a block. - std::unordered_set getAllTableNames(const Block & block); + std::unordered_set getAllTableNames(const Block & block, bool to_lower_case = false); } } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index d9d9f5b45f6..0c3cc56c061 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -88,6 +88,9 @@ DatabaseReplicated::DatabaseReplicated( /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it. if (zookeeper_path.front() != '/') zookeeper_path = "/" + zookeeper_path; + + if (!db_settings.collection_name.value.empty()) + fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef()); } String DatabaseReplicated::getFullReplicaName() const @@ -191,22 +194,36 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const shards.back().emplace_back(unescapeForFileName(host_port)); } - String username = db_settings.cluster_username; - String password = db_settings.cluster_password; UInt16 default_port = getContext()->getTCPPort(); - bool secure = db_settings.cluster_secure_connection; bool treat_local_as_remote = false; bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL; return std::make_shared( getContext()->getSettingsRef(), shards, - username, - password, + cluster_auth_info.cluster_username, + cluster_auth_info.cluster_password, default_port, treat_local_as_remote, treat_local_port_as_remote, - secure); + cluster_auth_info.cluster_secure_connection, + /*priority=*/1, + database_name, + cluster_auth_info.cluster_secret); +} + + +void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref) +{ + const auto & config_prefix = fmt::format("named_collections.{}", collection_name); + + if (!config_ref.has(config_prefix)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name); + + cluster_auth_info.cluster_username = config_ref.getString(config_prefix + ".cluster_username", ""); + cluster_auth_info.cluster_password = config_ref.getString(config_prefix + ".cluster_password", ""); + cluster_auth_info.cluster_secret = config_ref.getString(config_prefix + ".cluster_secret", ""); + cluster_auth_info.cluster_secure_connection = config_ref.getBool(config_prefix + ".cluster_secure_connection", false); } void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(bool force_attach) diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index fcb8a2c4d33..ac212e168b8 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -75,6 +75,16 @@ private: bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper); void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper); + struct + { + String cluster_username{"default"}; + String cluster_password; + String cluster_secret; + bool cluster_secure_connection{false}; + } cluster_auth_info; + + void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config); + void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const; void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr); diff --git a/src/Databases/DatabaseReplicatedSettings.h b/src/Databases/DatabaseReplicatedSettings.h index 0aff26712c0..8bed1ababf6 100644 --- a/src/Databases/DatabaseReplicatedSettings.h +++ b/src/Databases/DatabaseReplicatedSettings.h @@ -8,12 +8,11 @@ namespace DB class ASTStorage; #define LIST_OF_DATABASE_REPLICATED_SETTINGS(M) \ - M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \ + M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \ M(UInt64, max_replication_lag_to_enqueue, 10, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \ M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \ - M(String, cluster_username, "default", "Username to use when connecting to hosts of cluster", 0) \ - M(String, cluster_password, "", "Password to use when connecting to hosts of cluster", 0) \ - M(Bool, cluster_secure_connection, false, "Enable TLS when connecting to hosts of cluster", 0) \ + M(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \ + DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS) diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index e46620d9d1f..e05ccef74c0 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -264,32 +265,6 @@ std::unique_ptr DiskS3::writeFile(const String & path, LOG_TRACE(log, "{} to file by path: {}. S3 path: {}", mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name); - ScheduleFunc schedule = [pool = &getThreadPoolWriter(), thread_group = CurrentThread::getGroup()](auto callback) - { - pool->scheduleOrThrow([callback = std::move(callback), thread_group]() - { - if (thread_group) - CurrentThread::attachTo(thread_group); - - SCOPE_EXIT_SAFE( - if (thread_group) - CurrentThread::detachQueryIfNotDetached(); - - /// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent. - /// Typically, it may be changes from Process to User. - /// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed. - /// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well. - /// When, finally, we destroy the thread (and the ThreadStatus), - /// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\ - /// and by this time user-level memory tracker may be already destroyed. - /// - /// As a work-around, reset memory tracker to total, which is always alive. - CurrentThread::get().memory_tracker.setParent(&total_memory_tracker); - ); - callback(); - }); - }; - auto s3_buffer = std::make_unique( settings->client, bucket, @@ -299,7 +274,7 @@ std::unique_ptr DiskS3::writeFile(const String & path, settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size, std::move(object_metadata), - buf_size, std::move(schedule)); + buf_size, threadPoolCallbackRunner(getThreadPoolWriter())); auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) { diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 08554cf7e07..3aa82cb79b4 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -89,10 +89,10 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers; format_settings.json.quote_denormals = settings.output_format_json_quote_denormals; format_settings.null_as_default = settings.input_format_null_as_default; - format_settings.use_lowercase_column_name = settings.input_format_use_lowercase_column_name; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; format_settings.parquet.import_nested = settings.input_format_parquet_import_nested; + format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching; format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; format_settings.pretty.color = settings.output_format_pretty_color; @@ -123,9 +123,11 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary; format_settings.arrow.import_nested = settings.input_format_arrow_import_nested; format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns; + format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching; format_settings.orc.import_nested = settings.input_format_orc_import_nested; format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; + format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.seekable_read = settings.input_format_allow_seeks; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 4881c1a43c8..bd0a84d9ded 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -32,7 +32,6 @@ struct FormatSettings bool null_as_default = true; bool decimal_trailing_zeros = false; bool defaults_for_omitted_fields = true; - bool use_lowercase_column_name = false; bool seekable_read = true; UInt64 max_rows_to_read_for_schema_inference = 100; @@ -75,6 +74,7 @@ struct FormatSettings bool low_cardinality_as_dictionary = false; bool import_nested = false; bool allow_missing_columns = false; + bool case_insensitive_column_matching = false; } arrow; struct @@ -137,6 +137,7 @@ struct FormatSettings UInt64 row_group_size = 1000000; bool import_nested = false; bool allow_missing_columns = false; + bool case_insensitive_column_matching = false; } parquet; struct Pretty @@ -217,6 +218,7 @@ struct FormatSettings bool import_nested = false; bool allow_missing_columns = false; int64_t row_batch_size = 100'000; + bool case_insensitive_column_matching = false; } orc; /// For capnProto format we should determine how to diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index a7f06689820..bc1ae807e7d 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -41,6 +41,11 @@ namespace ErrorCodes throw Exception("Illegal type Date of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } + static inline UInt32 dateTimeIsNotSupported(const char * name) + { + throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + /// This factor transformation will say that the function is monotone everywhere. struct ZeroTransform { @@ -311,6 +316,133 @@ struct ToStartOfSecondImpl using FactorTransform = ZeroTransform; }; +struct ToStartOfMillisecondImpl +{ + static constexpr auto name = "toStartOfMillisecond"; + + static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) + { + // given that scale is 6, scale_multiplier is 1000000 + // for DateTime64 value of 123.456789: + // 123456789 - 789 = 123456000 + // for DateTime64 value of -123.456789: + // -123456789 - (1000 + (-789)) = -123457000 + + if (scale_multiplier == 1000) + { + return datetime64; + } + else if (scale_multiplier <= 1000) + { + return datetime64 * (1000 / scale_multiplier); + } + else + { + auto droppable_part_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier(datetime64, scale_multiplier / 1000); + + if (droppable_part_with_sign < 0) + droppable_part_with_sign += scale_multiplier; + + return datetime64 - droppable_part_with_sign; + } + } + + static inline UInt32 execute(UInt32, const DateLUTImpl &) + { + throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + static inline UInt32 execute(UInt16, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + + using FactorTransform = ZeroTransform; +}; + +struct ToStartOfMicrosecondImpl +{ + static constexpr auto name = "toStartOfMicrosecond"; + + static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) + { + // @see ToStartOfMillisecondImpl + + if (scale_multiplier == 1000000) + { + return datetime64; + } + else if (scale_multiplier <= 1000000) + { + return datetime64 * (1000000 / scale_multiplier); + } + else + { + auto droppable_part_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier(datetime64, scale_multiplier / 1000000); + + if (droppable_part_with_sign < 0) + droppable_part_with_sign += scale_multiplier; + + return datetime64 - droppable_part_with_sign; + } + } + + static inline UInt32 execute(UInt32, const DateLUTImpl &) + { + throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + static inline UInt32 execute(UInt16, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + + using FactorTransform = ZeroTransform; +}; + +struct ToStartOfNanosecondImpl +{ + static constexpr auto name = "toStartOfNanosecond"; + + static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) + { + // @see ToStartOfMillisecondImpl + if (scale_multiplier == 1000000000) + { + return datetime64; + } + else if (scale_multiplier <= 1000000000) + { + return datetime64 * (1000000000 / scale_multiplier); + } + else + { + throw Exception("Illegal type of argument for function " + std::string(name) + ", DateTime64 expected", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + + static inline UInt32 execute(UInt32, const DateLUTImpl &) + { + throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + static inline UInt32 execute(UInt16, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + + using FactorTransform = ZeroTransform; +}; + struct ToStartOfFiveMinuteImpl { static constexpr auto name = "toStartOfFiveMinute"; diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index cb48b819481..fbfc9e9bc1f 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -40,26 +40,158 @@ namespace ErrorCodes /// - 'AddSecondsImpl::execute(UInt32, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(DateTime, ...) -> DateTime' /// - 'AddSecondsImpl::execute(UInt16, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(Date, ...) -> DateTime' +struct AddNanosecondsImpl +{ + static constexpr auto name = "addNanoseconds"; + + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + auto division = std::div(t.fractional * multiplier + delta, static_cast(1000000000)); + return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta}; + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + return t * multiplier + delta; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(9); + return t * multiplier + delta; + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) + { + throw Exception("addNanoSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) + { + throw Exception("addNanoSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } +}; + +struct AddMicrosecondsImpl +{ + static constexpr auto name = "addMicroseconds"; + + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); + if (scale <= 6) + { + auto division = std::div((t.fractional + delta), static_cast(10e6)); + return {t.whole * multiplier + division.quot, division.rem}; + } + else + { + auto division = std::div((t.fractional + delta * multiplier), static_cast(10e6 * multiplier)); + return {t.whole + division.quot, division.rem}; + } + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); + return scale <= 6 ? t * multiplier + delta : t + delta * multiplier; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(6); + return t * multiplier + delta; + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) + { + throw Exception("addMicroSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) + { + throw Exception("addMicroSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } +}; + +struct AddMillisecondsImpl +{ + static constexpr auto name = "addMilliseconds"; + + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); + if (scale <= 3) + { + auto division = std::div((t.fractional + delta), static_cast(1000)); + return {t.whole * multiplier + division.quot, division.rem}; + } + else + { + auto division = std::div((t.fractional + delta * multiplier), static_cast(1000 * multiplier)); + return {t.whole + division.quot,division.rem}; + } + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); + return scale <= 3 ? t * multiplier + delta : t + delta * multiplier; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(3); + return t * multiplier + delta; + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) + { + throw Exception("addMilliSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) + { + throw Exception("addMilliSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } +}; + struct AddSecondsImpl { static constexpr auto name = "addSeconds"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return {t.whole + delta, t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + return t + delta * DecimalUtils::scaleMultiplier(scale); + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta; } @@ -70,21 +202,29 @@ struct AddMinutesImpl static constexpr auto name = "addMinutes"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return {t.whole + delta * 60, t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + return t + 60 * delta * DecimalUtils::scaleMultiplier(scale); + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta * 60; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta * 60; } @@ -95,20 +235,29 @@ struct AddHoursImpl static constexpr auto name = "addHours"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return {t.whole + delta * 3600, t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) + + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + return t + 3600 * delta * DecimalUtils::scaleMultiplier(scale); + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta * 3600; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta * 3600; } @@ -119,22 +268,30 @@ struct AddDaysImpl static constexpr auto name = "addDays"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addDays(t.whole, delta), t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + { + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addDays(d.quot, delta) * multiplier + d.rem; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addDays(t, delta); } - static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta; } - static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta; } @@ -145,22 +302,30 @@ struct AddWeeksImpl static constexpr auto name = "addWeeks"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone) + execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addWeeks(t.whole, delta), t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + { + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addDays(d.quot, delta * 7) * multiplier + d.rem; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addWeeks(t, delta); } - static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta * 7; } - static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta * 7; } @@ -170,23 +335,31 @@ struct AddMonthsImpl { static constexpr auto name = "addMonths"; - static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addMonths(t.whole, delta), t.fractional}; } - static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + { + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addMonths(d.quot, delta) * multiplier + d.rem; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addMonths(t, delta); } - static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addMonths(DayNum(d), delta); } - static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addMonths(ExtendedDayNum(d), delta); } @@ -197,22 +370,30 @@ struct AddQuartersImpl static constexpr auto name = "addQuarters"; static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone) + execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addQuarters(t.whole, delta), t.fractional}; } - static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + { + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addQuarters(d.quot, delta) * multiplier + d.rem; + } + + static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addQuarters(t, delta); } - static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone) + static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addQuarters(DayNum(d), delta); } - static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone) + static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addQuarters(ExtendedDayNum(d), delta); } @@ -222,23 +403,31 @@ struct AddYearsImpl { static constexpr auto name = "addYears"; - static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addYears(t.whole, delta), t.fractional}; } - static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + { + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addYears(d.quot, delta) * multiplier + d.rem; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addYears(t, delta); } - static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addYears(DayNum(d), delta); } - static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addYears(ExtendedDayNum(d), delta); } @@ -250,13 +439,16 @@ struct SubtractIntervalImpl : public Transform using Transform::Transform; template - inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone) const + inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const { /// Signed integer overflow is Ok. - return Transform::execute(t, -delta, time_zone); + return Transform::execute(t, -delta, time_zone, scale); } }; +struct SubtractNanosecondsImpl : SubtractIntervalImpl { static constexpr auto name = "subtractNanoseconds"; }; +struct SubtractMicrosecondsImpl : SubtractIntervalImpl { static constexpr auto name = "subtractMicroseconds"; }; +struct SubtractMillisecondsImpl : SubtractIntervalImpl { static constexpr auto name = "subtractMilliseconds"; }; struct SubtractSecondsImpl : SubtractIntervalImpl { static constexpr auto name = "subtractSeconds"; }; struct SubtractMinutesImpl : SubtractIntervalImpl { static constexpr auto name = "subtractMinutes"; }; struct SubtractHoursImpl : SubtractIntervalImpl { static constexpr auto name = "subtractHours"; }; @@ -277,17 +469,17 @@ struct Adder {} template - void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone) const + void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const { size_t size = vec_from.size(); vec_to.resize(size); for (size_t i = 0; i < size; ++i) - vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone); + vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone, scale); } template - void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone) const + void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const { size_t size = vec_from.size(); vec_to.resize(size); @@ -296,11 +488,11 @@ struct Adder ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat32, ColumnFloat64>( - &delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, size); return true; }); + &delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, scale, size); return true; }); } template - void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone) const + void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const { size_t size = delta.size(); vec_to.resize(size); @@ -309,7 +501,7 @@ struct Adder ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat32, ColumnFloat64>( - &delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, size); return true; }); + &delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, scale, size); return true; }); } private: @@ -325,18 +517,18 @@ private: template NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector( - const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const + const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const { for (size_t i = 0; i < size; ++i) - vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone); + vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone, scale); } template NO_INLINE NO_SANITIZE_UNDEFINED void constantVector( - const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const + const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const { for (size_t i = 0; i < size; ++i) - vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone); + vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, scale); } }; @@ -344,7 +536,7 @@ private: template struct DateTimeAddIntervalImpl { - static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) + static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale = 0) { using FromValueType = typename FromDataType::FieldType; using FromColumnType = typename FromDataType::ColumnType; @@ -363,16 +555,15 @@ struct DateTimeAddIntervalImpl if (const auto * sources = checkAndGetColumn(source_col.get())) { if (const auto * delta_const_column = typeid_cast(&delta_column)) - op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone); + op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone, scale); else - op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone); + op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone, scale); } else if (const auto * sources_const = checkAndGetColumnConst(source_col.get())) { op.constantVector( sources_const->template getValue(), - col_to->getData(), - delta_column, time_zone); + col_to->getData(), delta_column, time_zone, scale); } else { @@ -463,18 +654,10 @@ public: } } - // TransformDateTime64 helps choosing correct overload of exec and does some transformations - // on input and output parameters to simplify support of DateTime64 in concrete Transform. - template - using TransformType = std::conditional_t< - std::is_same_v, - TransformDateTime64, - Transform>; - /// Helper templates to deduce return type based on argument type, since some overloads may promote or denote types, /// e.g. addSeconds(Date, 1) => DateTime template - using TransformExecuteReturnType = decltype(std::declval>().execute(FieldType(), 0, std::declval())); + using TransformExecuteReturnType = decltype(std::declval().execute(FieldType(), 0, std::declval(), 0)); // Deduces RETURN DataType from INPUT DataType, based on return type of Transform{}.execute(INPUT_TYPE, UInt64, DateLUTImpl). // e.g. for Transform-type that has execute()-overload with 'UInt16' input and 'UInt32' return, @@ -500,11 +683,33 @@ public: if (typeid_cast(arguments[0].type.get())) { const auto & datetime64_type = assert_cast(*arguments[0].type); - return std::make_shared(datetime64_type.getScale(), extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + + auto from_scale = datetime64_type.getScale(); + auto scale = from_scale; + + if (std::is_same_v) + scale = 9; + else if (std::is_same_v) + scale = 6; + else if (std::is_same_v) + scale = 3; + + scale = std::max(scale, from_scale); + + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); } else { - return std::make_shared(DataTypeDateTime64::default_scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + auto scale = DataTypeDateTime64::default_scale; + + if (std::is_same_v) + scale = 9; + else if (std::is_same_v) + scale = 6; + else if (std::is_same_v) + scale = 3; + + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); } } else @@ -541,9 +746,9 @@ public: } else if (const auto * datetime64_type = assert_cast(from_type)) { - using WrappedTransformType = TransformType; - return DateTimeAddIntervalImpl, WrappedTransformType>::execute( - WrappedTransformType{datetime64_type->getScale()}, arguments, result_type); + auto from_scale = datetime64_type->getScale(); + return DateTimeAddIntervalImpl, Transform>::execute( + Transform{}, arguments, result_type, from_scale); } else throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(), diff --git a/src/Functions/FunctionDateOrDateTimeToSomething.h b/src/Functions/FunctionDateOrDateTimeToSomething.h index 00678e65364..5269eecea37 100644 --- a/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -88,6 +88,20 @@ public: Int64 scale = DataTypeDateTime64::default_scale; if (const auto * dt64 = checkAndGetDataType(arguments[0].type.get())) scale = dt64->getScale(); + auto source_scale = scale; + + if constexpr (std::is_same_v) + { + scale = std::max(source_scale, static_cast(3)); + } + else if constexpr (std::is_same_v) + { + scale = std::max(source_scale, static_cast(6)); + } + else if constexpr (std::is_same_v) + { + scale = std::max(source_scale, static_cast(9)); + } return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 1, 0)); } diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 4f5f6ae483f..7f8e9148032 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -112,6 +112,9 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index d1564008dfe..e098378f51a 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -1487,6 +1487,9 @@ struct NameToDecimal256 { static constexpr auto name = "toDecimal256"; }; static constexpr auto kind = IntervalKind::INTERVAL_KIND; \ }; +DEFINE_NAME_TO_INTERVAL(Nanosecond) +DEFINE_NAME_TO_INTERVAL(Microsecond) +DEFINE_NAME_TO_INTERVAL(Millisecond) DEFINE_NAME_TO_INTERVAL(Second) DEFINE_NAME_TO_INTERVAL(Minute) DEFINE_NAME_TO_INTERVAL(Hour) @@ -2703,13 +2706,10 @@ private: return createWrapper(from_type, to_type, requested_result_is_nullable); } - WrapperType createUInt8ToUInt8Wrapper(const DataTypePtr from_type, const DataTypePtr to_type) const + WrapperType createUInt8ToBoolWrapper(const DataTypePtr from_type, const DataTypePtr to_type) const { return [from_type, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) -> ColumnPtr { - if (isBool(from_type) || !isBool(to_type)) - return arguments.front().column; - /// Special case when we convert UInt8 column to Bool column. /// both columns have type UInt8, but we shouldn't use identity wrapper, /// because Bool column can contain only 0 and 1. @@ -3506,15 +3506,19 @@ private: /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested. WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const { - bool convert_to_ipv6 = to_type->getCustomName() && to_type->getCustomName()->getName() == "IPv6"; + if (isUInt8(from_type) && isBool(to_type)) + return createUInt8ToBoolWrapper(from_type, to_type); - if (from_type->equals(*to_type) && !convert_to_ipv6) - { - if (isUInt8(from_type)) - return createUInt8ToUInt8Wrapper(from_type, to_type); + /// We can cast IPv6 into IPv6, IPv4 into IPv4, but we should not allow to cast FixedString(16) into IPv6 as part of identity cast + bool safe_convert_custom_types = true; + if (const auto * to_type_custom_name = to_type->getCustomName()) + safe_convert_custom_types = from_type->getCustomName() && from_type->getCustomName()->getName() == to_type_custom_name->getName(); + else if (const auto * from_type_custom_name = from_type->getCustomName()) + safe_convert_custom_types = to_type->getCustomName() && from_type_custom_name->getName() == to_type->getCustomName()->getName(); + + if (from_type->equals(*to_type) && safe_convert_custom_types) return createIdentityWrapper(from_type); - } else if (WhichDataType(from_type).isNothing()) return createNothingWrapper(to_type.get()); diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index 79ce7356ee7..76844e2e6fb 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; + extern const int SYNTAX_ERROR; } namespace @@ -167,6 +168,13 @@ struct TimeWindowImpl switch (std::get<0>(interval)) { + //TODO: add proper support for fractional seconds +// case IntervalKind::Nanosecond: +// return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); +// case IntervalKind::Microsecond: +// return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); +// case IntervalKind::Millisecond: +// return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); case IntervalKind::Second: return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); case IntervalKind::Minute: @@ -183,6 +191,8 @@ struct TimeWindowImpl return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); case IntervalKind::Year: return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + default: + throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR); } __builtin_unreachable(); } @@ -350,6 +360,16 @@ struct TimeWindowImpl switch (std::get<0>(window_interval)) { + //TODO: add proper support for fractional seconds +// case IntervalKind::Nanosecond: +// return executeHop( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); +// case IntervalKind::Microsecond: +// return executeHop( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); +// case IntervalKind::Millisecond: +// return executeHop( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); case IntervalKind::Second: return executeHop( *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); @@ -374,6 +394,8 @@ struct TimeWindowImpl case IntervalKind::Year: return executeHop( *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + default: + throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR); } __builtin_unreachable(); } @@ -487,6 +509,16 @@ struct TimeWindowImpl switch (std::get<0>(window_interval)) { + //TODO: add proper support for fractional seconds +// case IntervalKind::Nanosecond: +// return executeHopSlice( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); +// case IntervalKind::Microsecond: +// return executeHopSlice( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); +// case IntervalKind::Millisecond: +// return executeHopSlice( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); case IntervalKind::Second: return executeHopSlice( *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); @@ -511,6 +543,8 @@ struct TimeWindowImpl case IntervalKind::Year: return executeHopSlice( *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + default: + throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR); } __builtin_unreachable(); } diff --git a/src/Functions/FunctionsTimeWindow.h b/src/Functions/FunctionsTimeWindow.h index 313de10702d..3ea397e4c7d 100644 --- a/src/Functions/FunctionsTimeWindow.h +++ b/src/Functions/FunctionsTimeWindow.h @@ -80,7 +80,32 @@ struct ToStartOfTransform; TRANSFORM_TIME(Hour) TRANSFORM_TIME(Minute) TRANSFORM_TIME(Second) -#undef TRANSFORM_DATE +#undef TRANSFORM_TIME + +#define TRANSFORM_SUBSECONDS(INTERVAL_KIND, DEF_SCALE) \ +template<> \ + struct ToStartOfTransform \ + { \ + static Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \ + { \ + if (scale <= DEF_SCALE) \ + { \ + auto val = t * DecimalUtils::scaleMultiplier(DEF_SCALE - scale); \ + if (delta == 1) \ + return val; \ + else \ + return val - (val % delta); \ + } \ + else \ + { \ + return t - (t % (delta * DecimalUtils::scaleMultiplier(scale - DEF_SCALE))) ; \ + } \ + } \ + }; + TRANSFORM_SUBSECONDS(Millisecond, 3) + TRANSFORM_SUBSECONDS(Microsecond, 6) + TRANSFORM_SUBSECONDS(Nanosecond, 9) +#undef TRANSFORM_SUBSECONDS template struct AddTime; @@ -117,6 +142,25 @@ struct ToStartOfTransform; ADD_TIME(Second, 1) #undef ADD_TIME +#define ADD_SUBSECONDS(INTERVAL_KIND, DEF_SCALE) \ +template <> \ + struct AddTime \ + { \ + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \ + { \ + if (scale < DEF_SCALE) \ + { \ + return t + delta * DecimalUtils::scaleMultiplier(DEF_SCALE - scale); \ + } \ + else \ + return t + delta * DecimalUtils::scaleMultiplier(scale - DEF_SCALE); \ + } \ + }; + ADD_SUBSECONDS(Millisecond, 3) + ADD_SUBSECONDS(Microsecond, 6) + ADD_SUBSECONDS(Nanosecond, 9) +#undef ADD_SUBSECONDS + template struct TimeWindowImpl { diff --git a/src/Functions/SubtractSubSeconds.cpp b/src/Functions/SubtractSubSeconds.cpp new file mode 100644 index 00000000000..5eeb24c8748 --- /dev/null +++ b/src/Functions/SubtractSubSeconds.cpp @@ -0,0 +1,28 @@ +#include +#include + + +namespace DB +{ + +using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionSubtractNanoseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +using FunctionSubtractMicroseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionSubtractMicroseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +using FunctionSubtractMilliseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionSubtractMilliseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +} + + diff --git a/src/Functions/TransformDateTime64.h b/src/Functions/TransformDateTime64.h index b05bdab65ad..9ac28118b8f 100644 --- a/src/Functions/TransformDateTime64.h +++ b/src/Functions/TransformDateTime64.h @@ -13,7 +13,7 @@ namespace DB * * DateTime64 value and scale factor (2) * * DateTime64 broken down to components, result of execute is then re-assembled back into DateTime64 value (3) * - * Suitable Transfotm-types are commonly used in Date/DateTime manipulation functions, + * Suitable Transform-types are commonly used in Date/DateTime manipulation functions, * and should implement static (or const) function with following signatures: * 1: * R execute(Int64 whole_value, ... ) diff --git a/src/Functions/addSubSeconds.cpp b/src/Functions/addSubSeconds.cpp new file mode 100644 index 00000000000..f58f8b20b99 --- /dev/null +++ b/src/Functions/addSubSeconds.cpp @@ -0,0 +1,28 @@ +#include +#include + + +namespace DB +{ + +using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionAddNanoseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionAddMicroseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionAddMilliseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +} + + diff --git a/src/Functions/caseWithExpression.cpp b/src/Functions/caseWithExpression.cpp index 37ee89c1f11..e06a01431da 100644 --- a/src/Functions/caseWithExpression.cpp +++ b/src/Functions/caseWithExpression.cpp @@ -43,6 +43,9 @@ public: for (size_t i = 2; i < args.size() - 1; i += 2) dst_array_types.push_back(args[i]); + // Type of the ELSE branch + dst_array_types.push_back(args.back()); + return getLeastSupertype(dst_array_types); } diff --git a/src/Functions/yandexConsistentHash.cpp b/src/Functions/kostikConsistentHash.cpp similarity index 59% rename from src/Functions/yandexConsistentHash.cpp rename to src/Functions/kostikConsistentHash.cpp index 58617e29af7..a38c3c965d8 100644 --- a/src/Functions/yandexConsistentHash.cpp +++ b/src/Functions/kostikConsistentHash.cpp @@ -7,9 +7,9 @@ namespace DB { /// An O(1) time and space consistent hash algorithm by Konstantin Oblakov -struct YandexConsistentHashImpl +struct KostikConsistentHashImpl { - static constexpr auto name = "yandexConsistentHash"; + static constexpr auto name = "kostikConsistentHash"; using HashType = UInt64; /// Actually it supports UInt64, but it is efficient only if n <= 32768 @@ -23,12 +23,12 @@ struct YandexConsistentHashImpl } }; -using FunctionYandexConsistentHash = FunctionConsistentHashImpl; +using FunctionKostikConsistentHash = FunctionConsistentHashImpl; -void registerFunctionYandexConsistentHash(FunctionFactory & factory) +void registerFunctionKostikConsistentHash(FunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction(); + factory.registerAlias("yandexConsistentHash", "kostikConsistentHash"); } } - diff --git a/src/Functions/registerFunctionsConsistentHashing.cpp b/src/Functions/registerFunctionsConsistentHashing.cpp index d4d740bc92f..84a78cd6765 100644 --- a/src/Functions/registerFunctionsConsistentHashing.cpp +++ b/src/Functions/registerFunctionsConsistentHashing.cpp @@ -2,12 +2,12 @@ namespace DB { class FunctionFactory; -void registerFunctionYandexConsistentHash(FunctionFactory & factory); +void registerFunctionKostikConsistentHash(FunctionFactory & factory); void registerFunctionJumpConsistentHash(FunctionFactory & factory); void registerFunctionsConsistentHashing(FunctionFactory & factory) { - registerFunctionYandexConsistentHash(factory); + registerFunctionKostikConsistentHash(factory); registerFunctionJumpConsistentHash(factory); } diff --git a/src/Functions/registerFunctionsDateTime.cpp b/src/Functions/registerFunctionsDateTime.cpp index 5211a62ff1e..dd7b67c47ac 100644 --- a/src/Functions/registerFunctionsDateTime.cpp +++ b/src/Functions/registerFunctionsDateTime.cpp @@ -11,6 +11,9 @@ void registerFunctionToDayOfWeek(FunctionFactory &); void registerFunctionToDayOfYear(FunctionFactory &); void registerFunctionToHour(FunctionFactory &); void registerFunctionToMinute(FunctionFactory &); +void registerFunctionToStartOfNanosecond(FunctionFactory &); +void registerFunctionToStartOfMicrosecond(FunctionFactory &); +void registerFunctionToStartOfMillisecond(FunctionFactory &); void registerFunctionToStartOfSecond(FunctionFactory &); void registerFunctionToSecond(FunctionFactory &); void registerFunctionToStartOfDay(FunctionFactory &); @@ -47,6 +50,9 @@ void registerFunctionTimeSlots(FunctionFactory &); void registerFunctionToYYYYMM(FunctionFactory &); void registerFunctionToYYYYMMDD(FunctionFactory &); void registerFunctionToYYYYMMDDhhmmss(FunctionFactory &); +void registerFunctionAddNanoseconds(FunctionFactory &); +void registerFunctionAddMicroseconds(FunctionFactory &); +void registerFunctionAddMilliseconds(FunctionFactory &); void registerFunctionAddSeconds(FunctionFactory &); void registerFunctionAddMinutes(FunctionFactory &); void registerFunctionAddHours(FunctionFactory &); @@ -55,6 +61,9 @@ void registerFunctionAddWeeks(FunctionFactory &); void registerFunctionAddMonths(FunctionFactory &); void registerFunctionAddQuarters(FunctionFactory &); void registerFunctionAddYears(FunctionFactory &); +void registerFunctionSubtractNanoseconds(FunctionFactory &); +void registerFunctionSubtractMicroseconds(FunctionFactory &); +void registerFunctionSubtractMilliseconds(FunctionFactory &); void registerFunctionSubtractSeconds(FunctionFactory &); void registerFunctionSubtractMinutes(FunctionFactory &); void registerFunctionSubtractHours(FunctionFactory &); @@ -93,6 +102,9 @@ void registerFunctionsDateTime(FunctionFactory & factory) registerFunctionToStartOfMonth(factory); registerFunctionToStartOfQuarter(factory); registerFunctionToStartOfYear(factory); + registerFunctionToStartOfNanosecond(factory); + registerFunctionToStartOfMicrosecond(factory); + registerFunctionToStartOfMillisecond(factory); registerFunctionToStartOfSecond(factory); registerFunctionToStartOfMinute(factory); registerFunctionToStartOfFiveMinute(factory); @@ -119,6 +131,9 @@ void registerFunctionsDateTime(FunctionFactory & factory) registerFunctionToYYYYMM(factory); registerFunctionToYYYYMMDD(factory); registerFunctionToYYYYMMDDhhmmss(factory); + registerFunctionAddNanoseconds(factory); + registerFunctionAddMicroseconds(factory); + registerFunctionAddMilliseconds(factory); registerFunctionAddSeconds(factory); registerFunctionAddMinutes(factory); registerFunctionAddHours(factory); @@ -127,6 +142,9 @@ void registerFunctionsDateTime(FunctionFactory & factory) registerFunctionAddMonths(factory); registerFunctionAddQuarters(factory); registerFunctionAddYears(factory); + registerFunctionSubtractNanoseconds(factory); + registerFunctionSubtractMicroseconds(factory); + registerFunctionSubtractMilliseconds(factory); registerFunctionSubtractSeconds(factory); registerFunctionSubtractMinutes(factory); registerFunctionSubtractHours(factory); diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 09b7931de8d..bff33f9b061 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -33,184 +33,273 @@ namespace template <> struct Transform { - static constexpr auto name = function_name; - - static UInt16 execute(UInt16 d, UInt64 years, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfYearInterval(DayNum(d), years); } - static UInt16 execute(Int32 d, UInt64 years, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); } - static UInt16 execute(UInt32 t, UInt64 years, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); } - static UInt16 execute(Int64 t, UInt64 years, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); + return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); } }; template <> struct Transform { - static constexpr auto name = function_name; - - static UInt16 execute(UInt16 d, UInt64 quarters, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfQuarterInterval(DayNum(d), quarters); } - static UInt16 execute(Int32 d, UInt64 quarters, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); } - static UInt16 execute(UInt32 t, UInt64 quarters, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); } - static UInt16 execute(Int64 t, UInt64 quarters, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); + return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); } }; template <> struct Transform { - static constexpr auto name = function_name; - - static UInt16 execute(UInt16 d, UInt64 months, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMonthInterval(DayNum(d), months); } - static UInt16 execute(Int32 d, UInt64 months, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); } - static UInt16 execute(UInt32 t, UInt64 months, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); } - static UInt16 execute(Int64 t, UInt64 months, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); + return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); } }; template <> struct Transform { - static constexpr auto name = function_name; - - static UInt16 execute(UInt16 d, UInt64 weeks, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfWeekInterval(DayNum(d), weeks); } - static UInt16 execute(Int32 d, UInt64 weeks, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); } - static UInt16 execute(UInt32 t, UInt64 weeks, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); } - static UInt16 execute(Int64 t, UInt64 weeks, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); + return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); } }; template <> struct Transform { - static constexpr auto name = function_name; - - static UInt32 execute(UInt16 d, UInt64 days, const DateLUTImpl & time_zone) + static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); } - static UInt32 execute(Int32 d, UInt64 days, const DateLUTImpl & time_zone) + static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); } - static UInt32 execute(UInt32 t, UInt64 days, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days); } - static UInt32 execute(Int64 t, UInt64 days, const DateLUTImpl & time_zone) + static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days); + return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); } }; template <> struct Transform { - static constexpr auto name = function_name; + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); } - static UInt32 execute(Int64 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfHourInterval(t, hours); + } + + static UInt32 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); + } }; template <> struct Transform { - static constexpr auto name = function_name; + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, UInt64 minutes, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMinuteInterval(t, minutes); } - static UInt32 execute(Int64 t, UInt64 minutes, const DateLUTImpl & time_zone) + static UInt32 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfMinuteInterval(t, minutes); + return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); } }; template <> struct Transform { - static constexpr auto name = function_name; + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfSecondInterval(t, seconds); } - static Int64 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone) + static UInt32 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfSecondInterval(t, seconds); + return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); } }; + template <> + struct Transform + { + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); } + + static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier) + { + if (scale_multiplier < 1000) + { + Int64 t_milliseconds = t * (static_cast(1000) / scale_multiplier); + if (likely(t >= 0)) + return t_milliseconds / milliseconds * milliseconds; + else + return ((t_milliseconds + 1) / milliseconds - 1) * milliseconds; + } + else if (scale_multiplier > 1000) + { + Int64 scale_diff = scale_multiplier / static_cast(1000); + if (likely(t >= 0)) + return t / milliseconds / scale_diff * milliseconds; + else + return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds; + } + else + if (likely(t >= 0)) + return t / milliseconds * milliseconds; + else + return ((t + 1) / milliseconds - 1) * milliseconds; + } + }; + + template <> + struct Transform + { + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); } + + static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier) + { + if (scale_multiplier < 1000000) + { + Int64 t_microseconds = t * (static_cast(1000000) / scale_multiplier); + if (likely(t >= 0)) + return t_microseconds / microseconds * microseconds; + else + return ((t_microseconds + 1) / microseconds - 1) * microseconds; + } + else if (scale_multiplier > 1000000) + { + Int64 scale_diff = scale_multiplier / static_cast(1000000); + if (likely(t >= 0)) + return t / microseconds / scale_diff * microseconds; + else + return ((t + 1) / microseconds / scale_diff - 1) * microseconds; + } + else + if (likely(t >= 0)) + return t / microseconds * microseconds; + else + return ((t + 1) / microseconds - 1) * microseconds; + } + }; + + template <> + struct Transform + { + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); } + + static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier) + { + if (scale_multiplier < 1000000000) + { + Int64 t_nanoseconds = t * (static_cast(1000000000) / scale_multiplier); + if (likely(t >= 0)) + return t_nanoseconds / nanoseconds * nanoseconds; + else + return ((t_nanoseconds + 1) / nanoseconds - 1) * nanoseconds; + } + else + if (likely(t >= 0)) + return t / nanoseconds * nanoseconds; + else + return ((t + 1) / nanoseconds - 1) * nanoseconds; + } + }; class FunctionToStartOfInterval : public IFunction { @@ -240,6 +329,7 @@ public: const DataTypeInterval * interval_type = nullptr; bool result_type_is_date = false; + bool result_type_is_datetime = false; auto check_interval_argument = [&] { interval_type = checkAndGetDataType(arguments[1].type.get()); @@ -251,6 +341,8 @@ public: result_type_is_date = (interval_type->getKind() == IntervalKind::Year) || (interval_type->getKind() == IntervalKind::Quarter) || (interval_type->getKind() == IntervalKind::Month) || (interval_type->getKind() == IntervalKind::Week); + result_type_is_datetime = (interval_type->getKind() == IntervalKind::Day) || (interval_type->getKind() == IntervalKind::Hour) + || (interval_type->getKind() == IntervalKind::Minute) || (interval_type->getKind() == IntervalKind::Second); }; auto check_timezone_argument = [&] @@ -263,7 +355,7 @@ public: if (first_argument_is_date && result_type_is_date) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The timezone argument of function {} with interval type {} is allowed only when the 1st argument " - "has the type DateTime", + "has the type DateTime or DateTime64", getName(), interval_type->getKind().toString()); }; @@ -288,19 +380,33 @@ public: if (result_type_is_date) return std::make_shared(); - else + else if (result_type_is_datetime) return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + else + { + auto scale = 0; + + if (interval_type->getKind() == IntervalKind::Nanosecond) + scale = 9; + else if (interval_type->getKind() == IntervalKind::Microsecond) + scale = 6; + else if (interval_type->getKind() == IntervalKind::Millisecond) + scale = 3; + + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + } + } bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override { const auto & time_column = arguments[0]; const auto & interval_column = arguments[1]; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0); - auto result_column = dispatchForColumns(time_column, interval_column, time_zone); + auto result_column = dispatchForColumns(time_column, interval_column, result_type, time_zone); return result_column; } @@ -316,33 +422,36 @@ public: private: ColumnPtr dispatchForColumns( - const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone) const + const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const { const auto & from_datatype = *time_column.type.get(); const auto which_type = WhichDataType(from_datatype); + + if (which_type.isDateTime64()) + { + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + auto scale = assert_cast(from_datatype).getScale(); + + if (time_column_vec) + return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone, scale); + } if (which_type.isDateTime()) { const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, time_zone); + return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } if (which_type.isDate()) { const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, time_zone); + return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } if (which_type.isDate32()) { const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, time_zone); - } - if (which_type.isDateTime64()) - { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); - if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, time_zone); + return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } throw Exception( "Illegal column for first argument of function " + getName() + ". Must contain dates or dates with time", @@ -351,7 +460,8 @@ private: template ColumnPtr dispatchForIntervalColumn( - const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone) const + const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column, + const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const { const auto * interval_type = checkAndGetDataType(interval_column.type.get()); if (!interval_type) @@ -368,49 +478,52 @@ private: switch (interval_type->getKind()) { + case IntervalKind::Nanosecond: + return execute(from, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Microsecond: + return execute(from, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Millisecond: + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Second: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Minute: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Hour: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Day: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Week: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Month: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Quarter: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Year: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); } __builtin_unreachable(); } - - template - ColumnPtr execute(const FromDataType & from_datatype, const ColumnType & time_column, UInt64 num_units, const DateLUTImpl & time_zone) const + template + ColumnPtr execute(const FromDataType &, const ColumnType & time_column_type, Int64 num_units, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const { - const auto & time_data = time_column.getData(); - size_t size = time_column.size(); - auto result = ColumnVector::create(); - auto & result_data = result->getData(); + using ToColumnType = typename ToDataType::ColumnType; + + const auto & time_data = time_column_type.getData(); + size_t size = time_data.size(); + + auto result_col = result_type->createColumn(); + auto *col_to = assert_cast(result_col.get()); + auto & result_data = col_to->getData(); result_data.resize(size); - if constexpr (std::is_same_v) - { - const auto transform = TransformDateTime64>{from_datatype.getScale()}; - for (size_t i = 0; i != size; ++i) - result_data[i] = transform.execute(time_data[i], num_units, time_zone); - } - else - { - for (size_t i = 0; i != size; ++i) - result_data[i] = Transform::execute(time_data[i], num_units, time_zone); - } - return result; + Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); + + for (size_t i = 0; i != size; ++i) + result_data[i] = Transform::execute(time_data[i], num_units, time_zone, scale_multiplier); + + return result_col; } }; diff --git a/src/Functions/toStartOfSubsecond.cpp b/src/Functions/toStartOfSubsecond.cpp new file mode 100644 index 00000000000..b2257c5e3cd --- /dev/null +++ b/src/Functions/toStartOfSubsecond.cpp @@ -0,0 +1,30 @@ +#include +#include +#include + + +namespace DB +{ + +using FunctionToStartOfMillisecond = FunctionDateOrDateTimeToSomething; + +void registerFunctionToStartOfMillisecond(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +using FunctionToStartOfMicrosecond = FunctionDateOrDateTimeToSomething; + +void registerFunctionToStartOfMicrosecond(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +using FunctionToStartOfNanosecond = FunctionDateOrDateTimeToSomething; + +void registerFunctionToStartOfNanosecond(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/IO/IOThreadPool.cpp b/src/IO/IOThreadPool.cpp new file mode 100644 index 00000000000..4014d00d8b8 --- /dev/null +++ b/src/IO/IOThreadPool.cpp @@ -0,0 +1,34 @@ +#include +#include "Core/Field.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +std::unique_ptr IOThreadPool::instance; + +void IOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size) +{ + if (instance) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is initialized twice"); + } + + instance = std::make_unique(max_threads, max_free_threads, queue_size, false /*shutdown_on_exception*/); +} + +ThreadPool & IOThreadPool::get() +{ + if (!instance) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is not initialized"); + } + + return *instance; +} + +} diff --git a/src/IO/IOThreadPool.h b/src/IO/IOThreadPool.h new file mode 100644 index 00000000000..4fcf99b6048 --- /dev/null +++ b/src/IO/IOThreadPool.h @@ -0,0 +1,20 @@ +#pragma once + +#include + +namespace DB +{ + +/* + * ThreadPool used for the IO. + */ +class IOThreadPool +{ + static std::unique_ptr instance; + +public: + static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size); + static ThreadPool & get(); +}; + +} diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp new file mode 100644 index 00000000000..7fa10c160ad --- /dev/null +++ b/src/IO/ParallelReadBuffer.cpp @@ -0,0 +1,290 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int CANNOT_SEEK_THROUGH_FILE; + extern const int SEEK_POSITION_OUT_OF_BOUND; + +} + +ParallelReadBuffer::ParallelReadBuffer( + std::unique_ptr reader_factory_, + ThreadPool * pool_, + size_t max_working_readers_, + WorkerSetup worker_setup_, + WorkerCleanup worker_cleanup_) + : SeekableReadBufferWithSize(nullptr, 0) + , pool(pool_) + , max_working_readers(max_working_readers_) + , reader_factory(std::move(reader_factory_)) + , worker_setup(std::move(worker_setup_)) + , worker_cleanup(std::move(worker_cleanup_)) +{ + std::unique_lock lock{mutex}; + addReaders(lock); +} + +bool ParallelReadBuffer::addReaderToPool(std::unique_lock & /*buffer_lock*/) +{ + auto reader = reader_factory->getReader(); + if (!reader) + { + return false; + } + + auto worker = read_workers.emplace_back(std::make_shared(std::move(reader))); + + pool->scheduleOrThrow( + [&, this, worker = std::move(worker)]() mutable + { + ThreadStatus thread_status; + + { + std::lock_guard lock{mutex}; + ++active_working_reader; + } + + SCOPE_EXIT({ + worker_cleanup(thread_status); + + std::lock_guard lock{mutex}; + --active_working_reader; + if (active_working_reader == 0) + { + readers_done.notify_all(); + } + }); + worker_setup(thread_status); + + readerThreadFunction(std::move(worker)); + }); + return true; +} + +void ParallelReadBuffer::addReaders(std::unique_lock & buffer_lock) +{ + while (read_workers.size() < max_working_readers && addReaderToPool(buffer_lock)) + ; +} + +off_t ParallelReadBuffer::seek(off_t offset, int whence) +{ + if (whence != SEEK_SET) + throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + + if (offset < 0) + throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + + if (!working_buffer.empty() && static_cast(offset) >= current_position - working_buffer.size() && offset < current_position) + { + pos = working_buffer.end() - (current_position - offset); + assert(pos >= working_buffer.begin()); + assert(pos <= working_buffer.end()); + + return offset; + } + + std::unique_lock lock{mutex}; + const auto offset_is_in_range + = [&](const auto & range) { return static_cast(offset) >= range.left && static_cast(offset) <= *range.right; }; + + while (!read_workers.empty() && (offset < current_position || !offset_is_in_range(read_workers.front()->range))) + { + read_workers.front()->cancel = true; + read_workers.pop_front(); + } + + if (!read_workers.empty()) + { + auto & front_worker = read_workers.front(); + auto & segments = front_worker->segments; + current_position = front_worker->range.left; + while (true) + { + next_condvar.wait(lock, [&] { return emergency_stop || !segments.empty(); }); + + if (emergency_stop) + handleEmergencyStop(); + + auto next_segment = front_worker->nextSegment(); + if (static_cast(offset) < current_position + next_segment.size()) + { + current_segment = std::move(next_segment); + working_buffer = internal_buffer = Buffer(current_segment.data(), current_segment.data() + current_segment.size()); + current_position += current_segment.size(); + pos = working_buffer.end() - (current_position - offset); + addReaders(lock); + return offset; + } + + current_position += next_segment.size(); + } + } + + lock.unlock(); + finishAndWait(); + + reader_factory->seek(offset, whence); + all_completed = false; + read_workers.clear(); + + current_position = offset; + resetWorkingBuffer(); + + emergency_stop = false; + + lock.lock(); + addReaders(lock); + return offset; +} + +std::optional ParallelReadBuffer::getTotalSize() +{ + std::lock_guard lock{mutex}; + return reader_factory->getTotalSize(); +} + +off_t ParallelReadBuffer::getPosition() +{ + return current_position - available(); +} + +bool ParallelReadBuffer::currentWorkerReady() const +{ + assert(!read_workers.empty()); + return read_workers.front()->finished || !read_workers.front()->segments.empty(); +} + +bool ParallelReadBuffer::currentWorkerCompleted() const +{ + assert(!read_workers.empty()); + return read_workers.front()->finished && read_workers.front()->segments.empty(); +} + +void ParallelReadBuffer::handleEmergencyStop() +{ + // this can only be called from the main thread when there is an exception + assert(background_exception); + if (background_exception) + std::rethrow_exception(background_exception); +} + +bool ParallelReadBuffer::nextImpl() +{ + if (all_completed) + return false; + + while (true) + { + std::unique_lock lock(mutex); + next_condvar.wait( + lock, + [this]() + { + /// Check if no more readers left or current reader can be processed + return emergency_stop || currentWorkerReady(); + }); + + bool worker_removed = false; + /// Remove completed units + while (!read_workers.empty() && currentWorkerCompleted() && !emergency_stop) + { + read_workers.pop_front(); + worker_removed = true; + } + + if (emergency_stop) + handleEmergencyStop(); + + if (worker_removed) + addReaders(lock); + + /// All readers processed, stop + if (read_workers.empty()) + { + all_completed = true; + return false; + } + + auto & front_worker = read_workers.front(); + /// Read data from first segment of the first reader + if (!front_worker->segments.empty()) + { + current_segment = front_worker->nextSegment(); + if (currentWorkerCompleted()) + { + read_workers.pop_front(); + all_completed = !addReaderToPool(lock) && read_workers.empty(); + } + break; + } + } + working_buffer = internal_buffer = Buffer(current_segment.data(), current_segment.data() + current_segment.size()); + current_position += working_buffer.size(); + return true; +} + +void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker) +{ + try + { + while (!emergency_stop && !read_worker->cancel) + { + if (!read_worker->reader->next()) + throw Exception("Failed to read all the data from the reader", ErrorCodes::LOGICAL_ERROR); + + if (emergency_stop || read_worker->cancel) + break; + + Buffer buffer = read_worker->reader->buffer(); + size_t bytes_to_copy = std::min(buffer.size(), read_worker->bytes_left); + Segment new_segment(bytes_to_copy, &arena); + memcpy(new_segment.data(), buffer.begin(), bytes_to_copy); + read_worker->reader->ignore(bytes_to_copy); + read_worker->bytes_left -= bytes_to_copy; + { + /// New data ready to be read + std::lock_guard lock(mutex); + read_worker->segments.emplace_back(std::move(new_segment)); + read_worker->finished = read_worker->bytes_left == 0; + next_condvar.notify_all(); + } + + if (read_worker->finished) + { + break; + } + } + } + catch (...) + { + onBackgroundException(); + } +} + +void ParallelReadBuffer::onBackgroundException() +{ + std::lock_guard lock(mutex); + if (!background_exception) + { + background_exception = std::current_exception(); + } + emergency_stop = true; + next_condvar.notify_all(); +} + +void ParallelReadBuffer::finishAndWait() +{ + emergency_stop = true; + + std::unique_lock lock{mutex}; + readers_done.wait(lock, [&] { return active_working_reader == 0; }); +} + +} diff --git a/src/IO/ParallelReadBuffer.h b/src/IO/ParallelReadBuffer.h new file mode 100644 index 00000000000..7b364205e8e --- /dev/null +++ b/src/IO/ParallelReadBuffer.h @@ -0,0 +1,174 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +/** + * Reads from multiple ReadBuffers in parallel. + * Preserves order of readers obtained from ReadBufferFactory. + * + * It consumes multiple readers and yields data from them in order as it passed. + * Each working reader save segments of data to internal queue. + * + * ParallelReadBuffer in nextImpl method take first available segment from first reader in deque and fed it to user. + * When first reader finish reading, they will be removed from worker deque and data from next reader consumed. + * + * Number of working readers limited by max_working_readers. + */ +class ParallelReadBuffer : public SeekableReadBufferWithSize +{ +private: + /// Blocks until data occurred in the first reader or this reader indicate finishing + /// Finished readers removed from queue and data from next readers processed + bool nextImpl() override; + + class Segment : private boost::noncopyable + { + public: + Segment(size_t size_, SynchronizedArenaWithFreeLists * arena_) : arena(arena_), m_data(arena->alloc(size_)), m_size(size_) { } + + Segment() = default; + + Segment(Segment && other) noexcept : arena(other.arena) + { + std::swap(m_data, other.m_data); + std::swap(m_size, other.m_size); + } + + Segment & operator=(Segment && other) noexcept + { + arena = other.arena; + std::swap(m_data, other.m_data); + std::swap(m_size, other.m_size); + return *this; + } + + ~Segment() + { + if (m_data) + { + arena->free(m_data, m_size); + } + } + + auto data() const noexcept { return m_data; } + auto size() const noexcept { return m_size; } + + private: + SynchronizedArenaWithFreeLists * arena{nullptr}; + char * m_data{nullptr}; + size_t m_size{0}; + }; + +public: + class ReadBufferFactory + { + public: + virtual SeekableReadBufferPtr getReader() = 0; + virtual ~ReadBufferFactory() = default; + virtual off_t seek(off_t off, int whence) = 0; + virtual std::optional getTotalSize() = 0; + }; + + using WorkerSetup = std::function; + using WorkerCleanup = std::function; + explicit ParallelReadBuffer( + std::unique_ptr reader_factory_, + ThreadPool * pool, + size_t max_working_readers, + WorkerSetup worker_setup = {}, + WorkerCleanup worker_cleanup = {}); + + ~ParallelReadBuffer() override { finishAndWait(); } + + off_t seek(off_t off, int whence) override; + std::optional getTotalSize() override; + off_t getPosition() override; + +private: + /// Reader in progress with a list of read segments + struct ReadWorker + { + explicit ReadWorker(SeekableReadBufferPtr reader_) : reader(std::move(reader_)), range(reader->getRemainingReadRange()) + { + assert(range.right); + bytes_left = *range.right - range.left + 1; + } + + Segment nextSegment() + { + assert(!segments.empty()); + auto next_segment = std::move(segments.front()); + segments.pop_front(); + range.left += next_segment.size(); + return next_segment; + } + + SeekableReadBufferPtr reader; + std::deque segments; + bool finished{false}; + SeekableReadBuffer::Range range; + size_t bytes_left{0}; + std::atomic_bool cancel{false}; + }; + + using ReadWorkerPtr = std::shared_ptr; + + /// First worker in deque have new data or processed all available amount + bool currentWorkerReady() const; + /// First worker in deque processed and flushed all data + bool currentWorkerCompleted() const; + + void handleEmergencyStop(); + + void addReaders(std::unique_lock & buffer_lock); + bool addReaderToPool(std::unique_lock & buffer_lock); + + /// Process read_worker, read data and save into internal segments queue + void readerThreadFunction(ReadWorkerPtr read_worker); + + void onBackgroundException(); + void finishAndWait(); + + SynchronizedArenaWithFreeLists arena; + + Segment current_segment; + + ThreadPool * pool; + size_t max_working_readers; + size_t active_working_reader{0}; + // Triggered when all reader workers are done + std::condition_variable readers_done; + + std::unique_ptr reader_factory; + + WorkerSetup worker_setup; + WorkerCleanup worker_cleanup; + + /** + * FIFO queue of readers. + * Each worker contains reader itself and downloaded segments. + * When reader read all available data it will be removed from + * deque and data from next reader will be consumed to user. + */ + std::deque read_workers; + + std::mutex mutex; + /// Triggered when new data available + std::condition_variable next_condvar; + + std::exception_ptr background_exception = nullptr; + std::atomic_bool emergency_stop{false}; + + off_t current_position{0}; + + bool all_completed{false}; +}; + +} diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 73ad73bf895..061dd772212 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -1,32 +1,33 @@ #pragma once #include -#include -#include #include #include +#include #include #include #include #include +#include +#include +#include #include #include #include #include #include #include +#include #include #include #include #include #include -#include -#include namespace ProfileEvents { - extern const Event ReadBufferSeekCancelConnection; +extern const Event ReadBufferSeekCancelConnection; } namespace DB @@ -48,7 +49,7 @@ class UpdatableSessionBase { protected: SessionPtr session; - UInt64 redirects { 0 }; + UInt64 redirects{0}; Poco::URI initial_uri; ConnectionTimeouts timeouts; UInt64 max_redirects; @@ -56,19 +57,12 @@ protected: public: virtual void buildNewSession(const Poco::URI & uri) = 0; - explicit UpdatableSessionBase(const Poco::URI uri, - const ConnectionTimeouts & timeouts_, - UInt64 max_redirects_) - : initial_uri { uri } - , timeouts { timeouts_ } - , max_redirects { max_redirects_ } + explicit UpdatableSessionBase(const Poco::URI uri, const ConnectionTimeouts & timeouts_, UInt64 max_redirects_) + : initial_uri{uri}, timeouts{timeouts_}, max_redirects{max_redirects_} { } - SessionPtr getSession() - { - return session; - } + SessionPtr getSession() { return session; } void updateSession(const Poco::URI & uri) { @@ -99,7 +93,7 @@ namespace detail /// HTTP range, including right bound [begin, end]. struct Range { - size_t begin = 0; + std::optional begin; std::optional end; }; @@ -144,10 +138,9 @@ namespace detail return read_range.begin || read_range.end || retry_with_range_header; } - size_t getOffset() const - { - return read_range.begin + offset_from_begin_pos; - } + size_t getRangeBegin() const { return read_range.begin.value_or(0); } + + size_t getOffset() const { return getRangeBegin() + offset_from_begin_pos; } std::istream * callImpl(Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_) { @@ -161,7 +154,7 @@ namespace detail if (out_stream_callback) request.setChunkedTransferEncoding(true); - for (auto & http_header_entry: http_header_entries) + for (auto & http_header_entry : http_header_entries) request.set(std::get<0>(http_header_entry), std::get<1>(http_header_entry)); if (withPartialContent()) @@ -207,26 +200,14 @@ namespace detail std::optional getTotalSize() override { if (read_range.end) - return *read_range.end - read_range.begin; + return *read_range.end - getRangeBegin(); Poco::Net::HTTPResponse response; for (size_t i = 0; i < 10; ++i) { try { - call(response, Poco::Net::HTTPRequest::HTTP_HEAD); - - while (isRedirect(response.getStatus())) - { - Poco::URI uri_redirect(response.get("Location")); - if (remote_host_filter) - remote_host_filter->checkURL(uri_redirect); - - session->updateSession(uri_redirect); - - istr = callImpl(uri_redirect, response, method); - } - + callWithRedirects(response, Poco::Net::HTTPRequest::HTTP_HEAD); break; } catch (const Poco::Exception & e) @@ -236,7 +217,7 @@ namespace detail } if (response.hasContentLength()) - read_range.end = read_range.begin + response.getContentLength(); + read_range.end = getRangeBegin() + response.getContentLength(); return read_range.end; } @@ -252,6 +233,21 @@ namespace detail InitializeError initialization_error = InitializeError::NONE; + private: + void setupExternalBuffer() + { + /** + * use_external_buffer -- means we read into the buffer which + * was passed to us from somewhere else. We do not check whether + * previously returned buffer was read or not (no hasPendingData() check is needed), + * because this branch means we are prefetching data, + * each nextImpl() call we can fill a different buffer. + */ + impl->set(internal_buffer.begin(), internal_buffer.size()); + assert(working_buffer.begin() != nullptr); + assert(!internal_buffer.empty()); + } + public: using NextCallback = std::function; using OutStreamCallback = std::function; @@ -276,7 +272,7 @@ namespace detail , session {session_} , out_stream_callback {out_stream_callback_} , credentials {credentials_} - , http_header_entries {http_header_entries_} + , http_header_entries {std::move(http_header_entries_)} , remote_host_filter {remote_host_filter_} , buffer_size {buffer_size_} , use_external_buffer {use_external_buffer_} @@ -287,18 +283,21 @@ namespace detail { if (settings.http_max_tries <= 0 || settings.http_retry_initial_backoff_ms <= 0 || settings.http_retry_initial_backoff_ms >= settings.http_retry_max_backoff_ms) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Invalid setting for http backoff, " - "must be http_max_tries >= 1 (current is {}) and " - "0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})", - settings.http_max_tries, settings.http_retry_initial_backoff_ms, settings.http_retry_max_backoff_ms); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Invalid setting for http backoff, " + "must be http_max_tries >= 1 (current is {}) and " + "0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})", + settings.http_max_tries, + settings.http_retry_initial_backoff_ms, + settings.http_retry_max_backoff_ms); // Configure User-Agent if it not already set. const std::string user_agent = "User-Agent"; - auto iter = std::find_if(http_header_entries.begin(), http_header_entries.end(), [&user_agent](const HTTPHeaderEntry & entry) - { - return std::get<0>(entry) == user_agent; - }); + auto iter = std::find_if( + http_header_entries.begin(), + http_header_entries.end(), + [&user_agent](const HTTPHeaderEntry & entry) { return std::get<0>(entry) == user_agent; }); if (iter == http_header_entries.end()) { @@ -313,7 +312,36 @@ namespace detail } } - void call(Poco::Net::HTTPResponse & response, const String & method_) + static bool isRetriableError(const Poco::Net::HTTPResponse::HTTPStatus http_status) noexcept + { + constexpr std::array non_retriable_errors{ + Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN, + Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED}; + + return std::all_of( + non_retriable_errors.begin(), non_retriable_errors.end(), [&](const auto status) { return http_status != status; }); + } + + void callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false) + { + call(response, method_, throw_on_all_errors); + + while (isRedirect(response.getStatus())) + { + Poco::URI uri_redirect(response.get("Location")); + if (remote_host_filter) + remote_host_filter->checkURL(uri_redirect); + + session->updateSession(uri_redirect); + + istr = callImpl(uri_redirect, response, method); + } + } + + void call(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false) { try { @@ -321,18 +349,18 @@ namespace detail } catch (...) { + if (throw_on_all_errors) + { + throw; + } + auto http_status = response.getStatus(); - if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND - && http_skip_not_found_url) + if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND && http_skip_not_found_url) { initialization_error = InitializeError::SKIP_NOT_FOUND_URL; } - else if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST - || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED - || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND - || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN - || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED) + else if (!isRetriableError(http_status)) { initialization_error = InitializeError::NON_RETRIABLE_ERROR; exception = std::current_exception(); @@ -372,12 +400,14 @@ namespace detail if (withPartialContent() && response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT) { /// Having `200 OK` instead of `206 Partial Content` is acceptable in case we retried with range.begin == 0. - if (read_range.begin) + if (read_range.begin && *read_range.begin != 0) { if (!exception) - exception = std::make_exception_ptr( - Exception(ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, - "Cannot read with range: [{}, {}]", read_range.begin, read_range.end ? *read_range.end : '-')); + exception = std::make_exception_ptr(Exception( + ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE, + "Cannot read with range: [{}, {}]", + *read_range.begin, + read_range.end ? *read_range.end : '-')); initialization_error = InitializeError::NON_RETRIABLE_ERROR; return; @@ -386,12 +416,12 @@ namespace detail { /// We could have range.begin == 0 and range.end != 0 in case of DiskWeb and failing to read with partial content /// will affect only performance, so a warning is enough. - LOG_WARNING(log, "Unable to read with range header: [{}, {}]", read_range.begin, *read_range.end); + LOG_WARNING(log, "Unable to read with range header: [{}, {}]", getRangeBegin(), *read_range.end); } } if (!offset_from_begin_pos && !read_range.end && response.hasContentLength()) - read_range.end = read_range.begin + response.getContentLength(); + read_range.end = getRangeBegin() + response.getContentLength(); try { @@ -399,12 +429,7 @@ namespace detail if (use_external_buffer) { - /** - * See comment 30 lines below. - */ - impl->set(internal_buffer.begin(), internal_buffer.size()); - assert(working_buffer.begin() != nullptr); - assert(!internal_buffer.empty()); + setupExternalBuffer(); } } catch (const Poco::Exception & e) @@ -426,23 +451,17 @@ namespace detail if (next_callback) next_callback(count()); - if (read_range.end && getOffset() == read_range.end.value()) + if (read_range.end && getOffset() > read_range.end.value()) + { + assert(getOffset() == read_range.end.value() + 1); return false; + } if (impl) { if (use_external_buffer) { - /** - * use_external_buffer -- means we read into the buffer which - * was passed to us from somewhere else. We do not check whether - * previously returned buffer was read or not (no hasPendingData() check is needed), - * because this branch means we are prefetching data, - * each nextImpl() call we can fill a different buffer. - */ - impl->set(internal_buffer.begin(), internal_buffer.size()); - assert(working_buffer.begin() != nullptr); - assert(!internal_buffer.empty()); + setupExternalBuffer(); } else { @@ -477,10 +496,7 @@ namespace detail if (use_external_buffer) { - /// See comment 40 lines above. - impl->set(internal_buffer.begin(), internal_buffer.size()); - assert(working_buffer.begin() != nullptr); - assert(!internal_buffer.empty()); + setupExternalBuffer(); } } @@ -498,13 +514,18 @@ namespace detail if (!can_retry_request) throw; - LOG_ERROR(log, - "HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. " - "Error: {}. (Current backoff wait is {}/{} ms)", - uri.toString(), i + 1, settings.http_max_tries, - getOffset(), read_range.end ? toString(*read_range.end) : "unknown", - e.displayText(), - milliseconds_to_wait, settings.http_retry_max_backoff_ms); + LOG_ERROR( + log, + "HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. " + "Error: {}. (Current backoff wait is {}/{} ms)", + uri.toString(), + i + 1, + settings.http_max_tries, + getOffset(), + read_range.end ? toString(*read_range.end) : "unknown", + e.displayText(), + milliseconds_to_wait, + settings.http_retry_max_backoff_ms); retry_with_range_header = true; exception = std::current_exception(); @@ -529,10 +550,7 @@ namespace detail return true; } - off_t getPosition() override - { - return getOffset() - available(); - } + off_t getPosition() override { return getOffset() - available(); } off_t seek(off_t offset_, int whence) override { @@ -540,12 +558,11 @@ namespace detail throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); if (offset_ < 0) - throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception( + "Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); off_t current_offset = getOffset(); - if (!working_buffer.empty() - && size_t(offset_) >= current_offset - working_buffer.size() - && offset_ < current_offset) + if (!working_buffer.empty() && size_t(offset_) >= current_offset - working_buffer.size() && offset_ < current_offset) { pos = working_buffer.end() - (current_offset - offset_); assert(pos >= working_buffer.begin()); @@ -567,7 +584,6 @@ namespace detail if (impl) { - ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection); impl.reset(); } @@ -580,6 +596,8 @@ namespace detail return offset_; } + SeekableReadBuffer::Range getRemainingReadRange() const override { return {getOffset(), read_range.end}; } + std::string getResponseCookie(const std::string & name, const std::string & def) const { for (const auto & cookie : cookies) @@ -599,10 +617,7 @@ namespace detail next_callback(count()); } - const std::string & getCompressionMethod() const - { - return content_encoding; - } + const std::string & getCompressionMethod() const { return content_encoding; } }; } @@ -611,19 +626,50 @@ class UpdatableSession : public UpdatableSessionBase using Parent = UpdatableSessionBase; public: - UpdatableSession( - const Poco::URI uri, - const ConnectionTimeouts & timeouts_, - const UInt64 max_redirects_) + UpdatableSession(const Poco::URI uri, const ConnectionTimeouts & timeouts_, const UInt64 max_redirects_) : Parent(uri, timeouts_, max_redirects_) { session = makeHTTPSession(initial_uri, timeouts); } - void buildNewSession(const Poco::URI & uri) override + void buildNewSession(const Poco::URI & uri) override { session = makeHTTPSession(uri, timeouts); } +}; + +class RangeGenerator +{ +public: + explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0) + : from(range_start), range_step(range_step_), total_size(total_size_) { - session = makeHTTPSession(uri, timeouts); } + + size_t totalRanges() const { return static_cast(round(static_cast(total_size - from) / range_step)); } + + using Range = std::pair; + + // return upper exclusive range of values, i.e. [from_range, to_range> + std::optional nextRange() + { + if (from >= total_size) + { + return std::nullopt; + } + + auto to = from + range_step; + if (to >= total_size) + { + to = total_size; + } + + Range range{from, to}; + from = to; + return std::move(range); + } + +private: + size_t from; + size_t range_step; + size_t total_size; }; class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase> @@ -631,7 +677,7 @@ class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase>; public: - ReadWriteBufferFromHTTP( + ReadWriteBufferFromHTTP( Poco::URI uri_, const std::string & method_, OutStreamCallback out_stream_callback_, @@ -646,14 +692,117 @@ public: bool delay_initialization_ = true, bool use_external_buffer_ = false, bool skip_not_found_url_ = false) - : Parent(std::make_shared(uri_, timeouts, max_redirects), - uri_, credentials_, method_, out_stream_callback_, buffer_size_, - settings_, http_header_entries_, read_range_, remote_host_filter_, - delay_initialization_, use_external_buffer_, skip_not_found_url_) + : Parent( + std::make_shared(uri_, timeouts, max_redirects), + uri_, + credentials_, + method_, + out_stream_callback_, + buffer_size_, + settings_, + http_header_entries_, + read_range_, + remote_host_filter_, + delay_initialization_, + use_external_buffer_, + skip_not_found_url_) { } }; +class RangedReadWriteBufferFromHTTPFactory : public ParallelReadBuffer::ReadBufferFactory +{ + using OutStreamCallback = ReadWriteBufferFromHTTP::OutStreamCallback; + +public: + RangedReadWriteBufferFromHTTPFactory( + size_t total_object_size_, + size_t range_step_, + Poco::URI uri_, + std::string method_, + OutStreamCallback out_stream_callback_, + ConnectionTimeouts timeouts_, + const Poco::Net::HTTPBasicCredentials & credentials_, + UInt64 max_redirects_ = 0, + size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, + ReadSettings settings_ = {}, + ReadWriteBufferFromHTTP::HTTPHeaderEntries http_header_entries_ = {}, + const RemoteHostFilter * remote_host_filter_ = nullptr, + bool delay_initialization_ = true, + bool use_external_buffer_ = false, + bool skip_not_found_url_ = false) + : range_generator(total_object_size_, range_step_) + , total_object_size(total_object_size_) + , range_step(range_step_) + , uri(uri_) + , method(std::move(method_)) + , out_stream_callback(out_stream_callback_) + , timeouts(std::move(timeouts_)) + , credentials(credentials_) + , max_redirects(max_redirects_) + , buffer_size(buffer_size_) + , settings(std::move(settings_)) + , http_header_entries(std::move(http_header_entries_)) + , remote_host_filter(remote_host_filter_) + , delay_initialization(delay_initialization_) + , use_external_buffer(use_external_buffer_) + , skip_not_found_url(skip_not_found_url_) + { + } + + SeekableReadBufferPtr getReader() override + { + const auto next_range = range_generator.nextRange(); + if (!next_range) + { + return nullptr; + } + + return std::make_shared( + uri, + method, + out_stream_callback, + timeouts, + credentials, + max_redirects, + buffer_size, + settings, + http_header_entries, + // HTTP Range has inclusive bounds, i.e. [from, to] + ReadWriteBufferFromHTTP::Range{next_range->first, next_range->second - 1}, + remote_host_filter, + delay_initialization, + use_external_buffer, + skip_not_found_url); + } + + off_t seek(off_t off, [[maybe_unused]] int whence) override + { + range_generator = RangeGenerator{total_object_size, range_step, static_cast(off)}; + return off; + } + + std::optional getTotalSize() override { return total_object_size; } + +private: + RangeGenerator range_generator; + size_t total_object_size; + size_t range_step; + Poco::URI uri; + std::string method; + OutStreamCallback out_stream_callback; + ConnectionTimeouts timeouts; + const Poco::Net::HTTPBasicCredentials & credentials; + UInt64 max_redirects; + size_t buffer_size; + ReadSettings settings; + ReadWriteBufferFromHTTP::HTTPHeaderEntries http_header_entries; + const RemoteHostFilter * remote_host_filter; + bool delay_initialization; + bool use_external_buffer; + bool skip_not_found_url; +}; + class UpdatablePooledSession : public UpdatableSessionBase { using Parent = UpdatableSessionBase; @@ -662,20 +811,14 @@ private: size_t per_endpoint_pool_size; public: - explicit UpdatablePooledSession(const Poco::URI uri, - const ConnectionTimeouts & timeouts_, - const UInt64 max_redirects_, - size_t per_endpoint_pool_size_) - : Parent(uri, timeouts_, max_redirects_) - , per_endpoint_pool_size { per_endpoint_pool_size_ } + explicit UpdatablePooledSession( + const Poco::URI uri, const ConnectionTimeouts & timeouts_, const UInt64 max_redirects_, size_t per_endpoint_pool_size_) + : Parent(uri, timeouts_, max_redirects_), per_endpoint_pool_size{per_endpoint_pool_size_} { session = makePooledHTTPSession(initial_uri, timeouts, per_endpoint_pool_size); } - void buildNewSession(const Poco::URI & uri) override - { - session = makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size); - } + void buildNewSession(const Poco::URI & uri) override { session = makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size); } }; class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase> @@ -683,7 +826,8 @@ class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase using Parent = detail::ReadWriteBufferFromHTTPBase>; public: - explicit PooledReadWriteBufferFromHTTP(Poco::URI uri_, + explicit PooledReadWriteBufferFromHTTP( + Poco::URI uri_, const std::string & method_ = {}, OutStreamCallback out_stream_callback_ = {}, const ConnectionTimeouts & timeouts_ = {}, @@ -691,12 +835,13 @@ public: size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, const UInt64 max_redirects = 0, size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT) - : Parent(std::make_shared(uri_, timeouts_, max_redirects, max_connections_per_endpoint), - uri_, - credentials_, - method_, - out_stream_callback_, - buffer_size_) + : Parent( + std::make_shared(uri_, timeouts_, max_redirects, max_connections_per_endpoint), + uri_, + credentials_, + method_, + out_stream_callback_, + buffer_size_) { } }; diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index fb9752ae391..181ac9aed7e 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -372,8 +372,8 @@ SetPtr makeExplicitSet( element_type = low_cardinality_type->getDictionaryType(); auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types); - if (prepared_sets.count(set_key)) - return prepared_sets.at(set_key); /// Already prepared. + if (auto it = prepared_sets.find(set_key); it != prepared_sets.end()) + return it->second; /// Already prepared. Block block; const auto & right_arg_func = std::dynamic_pointer_cast(right_arg); @@ -388,7 +388,7 @@ SetPtr makeExplicitSet( set->insertFromBlock(block.getColumnsWithTypeAndName()); set->finishInsert(); - prepared_sets[set_key] = set; + prepared_sets.emplace(set_key, set); return set; } @@ -707,7 +707,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat if (tid != 0) tuple_ast = tuple_ast->clone(); - auto literal = std::make_shared(UInt64(++tid)); + auto literal = std::make_shared(UInt64{++tid}); visit(*literal, literal, data); auto func = makeASTFunction("tupleElement", tuple_ast, literal); @@ -814,14 +814,13 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (!data.only_consts) { /// We are in the part of the tree that we are not going to compute. You just need to define types. - /// Do not subquery and create sets. We replace "in*" function to "in*IgnoreSet". + /// Do not evaluate subquery and create sets. We replace "in*" function to "in*IgnoreSet". auto argument_name = node.arguments->children.at(0)->getColumnName(); - data.addFunction( - FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()), - { argument_name, argument_name }, - column_name); + FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()), + {argument_name, argument_name}, + column_name); } return; } @@ -1145,8 +1144,8 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su if (no_subqueries) return {}; auto set_key = PreparedSetKey::forSubquery(*right_in_operand); - if (data.prepared_sets.count(set_key)) - return data.prepared_sets.at(set_key); + if (auto it = data.prepared_sets.find(set_key); it != data.prepared_sets.end()) + return it->second; /// A special case is if the name of the table is specified on the right side of the IN statement, /// and the table has the type Set (a previously prepared set). @@ -1160,7 +1159,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su StorageSet * storage_set = dynamic_cast(table.get()); if (storage_set) { - data.prepared_sets[set_key] = storage_set->getSet(); + data.prepared_sets.emplace(set_key, storage_set->getSet()); return storage_set->getSet(); } } @@ -1174,7 +1173,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su /// If you already created a Set with the same subquery / table. if (subquery_for_set.set) { - data.prepared_sets[set_key] = subquery_for_set.set; + data.prepared_sets.emplace(set_key, subquery_for_set.set); return subquery_for_set.set; } @@ -1196,7 +1195,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su } subquery_for_set.set = set; - data.prepared_sets[set_key] = set; + data.prepared_sets.emplace(set_key, set); return set; } else diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index b6b67bac81c..342cc9eef9d 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -10,6 +10,7 @@ namespace DB { +class ASTExpressionList; class ASTFunction; class ExpressionActions; @@ -89,10 +90,7 @@ struct ScopeStack : WithContext void addColumn(ColumnWithTypeAndName column); void addAlias(const std::string & name, std::string alias); void addArrayJoin(const std::string & source_name, std::string result_name); - void addFunction( - const FunctionOverloadResolverPtr & function, - const Names & argument_names, - std::string result_name); + void addFunction(const FunctionOverloadResolverPtr & function, const Names & argument_names, std::string result_name); ActionsDAGPtr popLevel(); diff --git a/src/Interpreters/CatBoostModel.cpp b/src/Interpreters/CatBoostModel.cpp index 1b6e30a0959..cffaa81c4f0 100644 --- a/src/Interpreters/CatBoostModel.cpp +++ b/src/Interpreters/CatBoostModel.cpp @@ -26,10 +26,10 @@ extern const int CANNOT_LOAD_CATBOOST_MODEL; extern const int CANNOT_APPLY_CATBOOST_MODEL; } - /// CatBoost wrapper interface functions. -struct CatBoostWrapperAPI +class CatBoostWrapperAPI { +public: using ModelCalcerHandle = void; ModelCalcerHandle * (* ModelCalcerCreate)(); // NOLINT @@ -68,9 +68,6 @@ struct CatBoostWrapperAPI }; -namespace -{ - class CatBoostModelHolder { private: @@ -84,7 +81,61 @@ public: }; -class CatBoostModelImpl : public ICatBoostModel +/// Holds CatBoost wrapper library and provides wrapper interface. +class CatBoostLibHolder +{ +public: + explicit CatBoostLibHolder(std::string lib_path_) : lib_path(std::move(lib_path_)), lib(lib_path) { initAPI(); } + + const CatBoostWrapperAPI & getAPI() const { return api; } + const std::string & getCurrentPath() const { return lib_path; } + +private: + CatBoostWrapperAPI api; + std::string lib_path; + SharedLibrary lib; + + void initAPI() + { + load(api.ModelCalcerCreate, "ModelCalcerCreate"); + load(api.ModelCalcerDelete, "ModelCalcerDelete"); + load(api.GetErrorString, "GetErrorString"); + load(api.LoadFullModelFromFile, "LoadFullModelFromFile"); + load(api.CalcModelPredictionFlat, "CalcModelPredictionFlat"); + load(api.CalcModelPrediction, "CalcModelPrediction"); + load(api.CalcModelPredictionWithHashedCatFeatures, "CalcModelPredictionWithHashedCatFeatures"); + load(api.GetStringCatFeatureHash, "GetStringCatFeatureHash"); + load(api.GetIntegerCatFeatureHash, "GetIntegerCatFeatureHash"); + load(api.GetFloatFeaturesCount, "GetFloatFeaturesCount"); + load(api.GetCatFeaturesCount, "GetCatFeaturesCount"); + tryLoad(api.CheckModelMetadataHasKey, "CheckModelMetadataHasKey"); + tryLoad(api.GetModelInfoValueSize, "GetModelInfoValueSize"); + tryLoad(api.GetModelInfoValue, "GetModelInfoValue"); + tryLoad(api.GetTreeCount, "GetTreeCount"); + tryLoad(api.GetDimensionsCount, "GetDimensionsCount"); + } + + template + void load(T& func, const std::string & name) { func = lib.get(name); } + + template + void tryLoad(T& func, const std::string & name) { func = lib.tryGet(name); } +}; + +std::shared_ptr getCatBoostWrapperHolder(const std::string & lib_path) +{ + static std::shared_ptr ptr; + static std::mutex mutex; + + std::lock_guard lock(mutex); + + if (!ptr || ptr->getCurrentPath() != lib_path) + ptr = std::make_shared(lib_path); + + return ptr; +} + +class CatBoostModelImpl { public: CatBoostModelImpl(const CatBoostWrapperAPI * api_, const std::string & model_path) : api(api_) @@ -92,13 +143,15 @@ public: handle = std::make_unique(api); if (!handle) { - std::string msg = "Cannot create CatBoost model: "; - throw Exception(msg + api->GetErrorString(), ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL); + throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL, + "Cannot create CatBoost model: {}", + api->GetErrorString()); } if (!api->LoadFullModelFromFile(handle->get(), model_path.c_str())) { - std::string msg = "Cannot load CatBoost model: "; - throw Exception(msg + api->GetErrorString(), ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL); + throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL, + "Cannot load CatBoost model: {}", + api->GetErrorString()); } float_features_count = api->GetFloatFeaturesCount(handle->get()); @@ -108,32 +161,22 @@ public: tree_count = api->GetDimensionsCount(handle->get()); } - ColumnPtr evaluate(const ColumnRawPtrs & columns) const override + ColumnPtr evaluate(const ColumnRawPtrs & columns) const { if (columns.empty()) - throw Exception("Got empty columns list for CatBoost model.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got empty columns list for CatBoost model."); if (columns.size() != float_features_count + cat_features_count) - { - std::string msg; - { - WriteBufferFromString buffer(msg); - buffer << "Number of columns is different with number of features: "; - buffer << columns.size() << " vs " << float_features_count << " + " << cat_features_count; - } - throw Exception(msg, ErrorCodes::BAD_ARGUMENTS); - } + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Number of columns is different with number of features: columns size {} float features size {} + cat features size {}", + float_features_count, + cat_features_count); for (size_t i = 0; i < float_features_count; ++i) { if (!columns[i]->isNumeric()) { - std::string msg; - { - WriteBufferFromString buffer(msg); - buffer << "Column " << i << " should be numeric to make float feature."; - } - throw Exception(msg, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric to make float feature.", i); } } @@ -142,16 +185,13 @@ public: { const auto * column = columns[i]; if (column->isNumeric()) + { cat_features_are_strings = false; + } else if (!(typeid_cast(column) || typeid_cast(column))) { - std::string msg; - { - WriteBufferFromString buffer(msg); - buffer << "Column " << i << " should be numeric or string."; - } - throw Exception(msg, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric or string.", i); } } @@ -187,9 +227,9 @@ public: return ColumnTuple::create(std::move(mutable_columns)); } - size_t getFloatFeaturesCount() const override { return float_features_count; } - size_t getCatFeaturesCount() const override { return cat_features_count; } - size_t getTreeCount() const override { return tree_count; } + size_t getFloatFeaturesCount() const { return float_features_count; } + size_t getCatFeaturesCount() const { return cat_features_count; } + size_t getTreeCount() const { return tree_count; } private: std::unique_ptr handle; @@ -435,66 +475,6 @@ private: } }; - -/// Holds CatBoost wrapper library and provides wrapper interface. -class CatBoostLibHolder: public CatBoostWrapperAPIProvider -{ -public: - explicit CatBoostLibHolder(std::string lib_path_) : lib_path(std::move(lib_path_)), lib(lib_path) { initAPI(); } - - const CatBoostWrapperAPI & getAPI() const override { return api; } - const std::string & getCurrentPath() const { return lib_path; } - -private: - CatBoostWrapperAPI api; - std::string lib_path; - SharedLibrary lib; - - void initAPI(); - - template - void load(T& func, const std::string & name) { func = lib.get(name); } - - template - void tryLoad(T& func, const std::string & name) { func = lib.tryGet(name); } -}; - -void CatBoostLibHolder::initAPI() -{ - load(api.ModelCalcerCreate, "ModelCalcerCreate"); - load(api.ModelCalcerDelete, "ModelCalcerDelete"); - load(api.GetErrorString, "GetErrorString"); - load(api.LoadFullModelFromFile, "LoadFullModelFromFile"); - load(api.CalcModelPredictionFlat, "CalcModelPredictionFlat"); - load(api.CalcModelPrediction, "CalcModelPrediction"); - load(api.CalcModelPredictionWithHashedCatFeatures, "CalcModelPredictionWithHashedCatFeatures"); - load(api.GetStringCatFeatureHash, "GetStringCatFeatureHash"); - load(api.GetIntegerCatFeatureHash, "GetIntegerCatFeatureHash"); - load(api.GetFloatFeaturesCount, "GetFloatFeaturesCount"); - load(api.GetCatFeaturesCount, "GetCatFeaturesCount"); - tryLoad(api.CheckModelMetadataHasKey, "CheckModelMetadataHasKey"); - tryLoad(api.GetModelInfoValueSize, "GetModelInfoValueSize"); - tryLoad(api.GetModelInfoValue, "GetModelInfoValue"); - tryLoad(api.GetTreeCount, "GetTreeCount"); - tryLoad(api.GetDimensionsCount, "GetDimensionsCount"); -} - -std::shared_ptr getCatBoostWrapperHolder(const std::string & lib_path) -{ - static std::shared_ptr ptr; - static std::mutex mutex; - - std::lock_guard lock(mutex); - - if (!ptr || ptr->getCurrentPath() != lib_path) - ptr = std::make_shared(lib_path); - - return ptr; -} - -} - - CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::string lib_path_, const ExternalLoadableLifetime & lifetime_) : name(std::move(name_)), model_path(std::move(model_path_)), lib_path(std::move(lib_path_)), lifetime(lifetime_) @@ -502,43 +482,28 @@ CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::st api_provider = getCatBoostWrapperHolder(lib_path); api = &api_provider->getAPI(); model = std::make_unique(api, model_path); - float_features_count = model->getFloatFeaturesCount(); - cat_features_count = model->getCatFeaturesCount(); - tree_count = model->getTreeCount(); } -const ExternalLoadableLifetime & CatBoostModel::getLifetime() const -{ - return lifetime; -} - -bool CatBoostModel::isModified() const -{ - return true; -} - -std::shared_ptr CatBoostModel::clone() const -{ - return std::make_shared(name, model_path, lib_path, lifetime); -} +CatBoostModel::~CatBoostModel() = default; size_t CatBoostModel::getFloatFeaturesCount() const { - return float_features_count; + return model->getFloatFeaturesCount(); } size_t CatBoostModel::getCatFeaturesCount() const { - return cat_features_count; + return model->getCatFeaturesCount(); } size_t CatBoostModel::getTreeCount() const { - return tree_count; + return model->getTreeCount(); } DataTypePtr CatBoostModel::getReturnType() const { + size_t tree_count = getTreeCount(); auto type = std::make_shared(); if (tree_count == 1) return type; @@ -552,6 +517,7 @@ ColumnPtr CatBoostModel::evaluate(const ColumnRawPtrs & columns) const { if (!model) throw Exception("CatBoost model was not loaded.", ErrorCodes::LOGICAL_ERROR); + return model->evaluate(columns); } diff --git a/src/Interpreters/CatBoostModel.h b/src/Interpreters/CatBoostModel.h index 51bf0ba94f5..7bb1df92b67 100644 --- a/src/Interpreters/CatBoostModel.h +++ b/src/Interpreters/CatBoostModel.h @@ -8,47 +8,32 @@ namespace DB { -/// CatBoost wrapper interface functions. -struct CatBoostWrapperAPI; -class CatBoostWrapperAPIProvider -{ -public: - virtual ~CatBoostWrapperAPIProvider() = default; - virtual const CatBoostWrapperAPI & getAPI() const = 0; -}; - -/// CatBoost model interface. -class ICatBoostModel -{ -public: - virtual ~ICatBoostModel() = default; - /// Evaluate model. Use first `float_features_count` columns as float features, - /// the others `cat_features_count` as categorical features. - virtual ColumnPtr evaluate(const ColumnRawPtrs & columns) const = 0; - - virtual size_t getFloatFeaturesCount() const = 0; - virtual size_t getCatFeaturesCount() const = 0; - virtual size_t getTreeCount() const = 0; -}; +class CatBoostLibHolder; +class CatBoostWrapperAPI; +class CatBoostModelImpl; class IDataType; using DataTypePtr = std::shared_ptr; /// General ML model evaluator interface. -class IModel : public IExternalLoadable +class IMLModel : public IExternalLoadable { public: + IMLModel() = default; virtual ColumnPtr evaluate(const ColumnRawPtrs & columns) const = 0; virtual std::string getTypeName() const = 0; virtual DataTypePtr getReturnType() const = 0; + virtual ~IMLModel() override = default; }; -class CatBoostModel : public IModel +class CatBoostModel : public IMLModel { public: CatBoostModel(std::string name, std::string model_path, std::string lib_path, const ExternalLoadableLifetime & lifetime); + ~CatBoostModel() override; + ColumnPtr evaluate(const ColumnRawPtrs & columns) const override; std::string getTypeName() const override { return "catboost"; } @@ -59,29 +44,28 @@ public: /// IExternalLoadable interface. - const ExternalLoadableLifetime & getLifetime() const override; + const ExternalLoadableLifetime & getLifetime() const override { return lifetime; } std::string getLoadableName() const override { return name; } bool supportUpdates() const override { return true; } - bool isModified() const override; + bool isModified() const override { return true; } - std::shared_ptr clone() const override; + std::shared_ptr clone() const override + { + return std::make_shared(name, model_path, lib_path, lifetime); + } private: const std::string name; std::string model_path; std::string lib_path; ExternalLoadableLifetime lifetime; - std::shared_ptr api_provider; + std::shared_ptr api_provider; const CatBoostWrapperAPI * api; - std::unique_ptr model; - - size_t float_features_count; - size_t cat_features_count; - size_t tree_count; + std::unique_ptr model; void init(); }; diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index d558d1cfd67..1039fac6883 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -132,7 +132,9 @@ Cluster::Address::Address( bool secure_, Int64 priority_, UInt32 shard_index_, - UInt32 replica_index_) + UInt32 replica_index_, + String cluster_name_, + String cluster_secret_) : user(user_), password(password_) { bool can_be_local = true; @@ -164,6 +166,8 @@ Cluster::Address::Address( is_local = can_be_local && isLocal(clickhouse_port); shard_index = shard_index_; replica_index = replica_index_; + cluster = cluster_name_; + cluster_secret = cluster_secret_; } @@ -537,10 +541,14 @@ Cluster::Cluster( bool treat_local_as_remote, bool treat_local_port_as_remote, bool secure, - Int64 priority) + Int64 priority, + String cluster_name, + String cluster_secret) { UInt32 current_shard_num = 1; + secret = cluster_secret; + for (const auto & shard : names) { Addresses current; @@ -554,7 +562,9 @@ Cluster::Cluster( secure, priority, current_shard_num, - current.size() + 1); + current.size() + 1, + cluster_name, + cluster_secret); addresses_with_failover.emplace_back(current); @@ -690,6 +700,9 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti } } + secret = from.secret; + name = from.name; + initMisc(); } @@ -704,6 +717,9 @@ Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector addresses_with_failover.emplace_back(from.addresses_with_failover.at(index)); } + secret = from.secret; + name = from.name; + initMisc(); } diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index e9f26c21089..13f19f7c0ed 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -55,7 +55,9 @@ public: bool treat_local_as_remote, bool treat_local_port_as_remote, bool secure = false, - Int64 priority = 1); + Int64 priority = 1, + String cluster_name = "", + String cluster_secret = ""); Cluster(const Cluster &)= delete; Cluster & operator=(const Cluster &) = delete; @@ -127,7 +129,9 @@ public: bool secure_ = false, Int64 priority_ = 1, UInt32 shard_index_ = 0, - UInt32 replica_index_ = 0); + UInt32 replica_index_ = 0, + String cluster_name = "", + String cluster_secret_ = ""); /// Returns 'escaped_host_name:port' String toString() const; diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 3e414d5b6de..4d2cdf7dd2c 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -350,6 +350,12 @@ void DDLWorker::scheduleTasks(bool reinitialized) bool maybe_concurrently_deleting = task && !zookeeper->exists(fs::path(task->entry_path) / "active"); return task && !maybe_concurrently_deleting && !maybe_currently_processing; } + else if (last_skipped_entry_name.has_value() && !queue_fully_loaded_after_initialization_debug_helper) + { + /// If connection was lost during queue loading + /// we may start processing from finished task (because we don't know yet that it's finished) and it's ok. + return false; + } else { /// Return true if entry should not be scheduled. @@ -365,7 +371,11 @@ void DDLWorker::scheduleTasks(bool reinitialized) String reason; auto task = initAndCheckTask(entry_name, reason, zookeeper); - if (!task) + if (task) + { + queue_fully_loaded_after_initialization_debug_helper = true; + } + else { LOG_DEBUG(log, "Will not execute task {}: {}", entry_name, reason); updateMaxDDLEntryID(entry_name); diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h index dbdf0e94f06..7cdbf880a2b 100644 --- a/src/Interpreters/DDLWorker.h +++ b/src/Interpreters/DDLWorker.h @@ -131,6 +131,9 @@ protected: std::optional first_failed_task_name; std::list current_tasks; + /// This flag is needed for debug assertions only + bool queue_fully_loaded_after_initialization_debug_helper = false; + Coordination::Stat queue_node_stat; std::shared_ptr queue_updated_event = std::make_shared(); std::shared_ptr cleanup_event = std::make_shared(); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 7e150f59694..5877ca35392 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -100,20 +100,9 @@ bool checkPositionalArguments(ASTPtr & argument, const ASTSelectQuery * select_q { auto columns = select_query->select()->children; - const auto * group_by_expr_with_alias = dynamic_cast(argument.get()); - if (group_by_expr_with_alias && !group_by_expr_with_alias->alias.empty()) - { - for (const auto & column : columns) - { - const auto * col_with_alias = dynamic_cast(column.get()); - if (col_with_alias) - { - const auto & alias = col_with_alias->alias; - if (!alias.empty() && alias == group_by_expr_with_alias->alias) - return false; - } - } - } + const auto * expr_with_alias = dynamic_cast(argument.get()); + if (expr_with_alias && !expr_with_alias->alias.empty()) + return false; const auto * ast_literal = typeid_cast(argument.get()); if (!ast_literal) @@ -130,7 +119,7 @@ bool checkPositionalArguments(ASTPtr & argument, const ASTSelectQuery * select_q pos, columns.size()); const auto & column = columns[--pos]; - if (typeid_cast(column.get())) + if (typeid_cast(column.get()) || typeid_cast(column.get())) { argument = column->clone(); } @@ -259,7 +248,7 @@ NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & a if (!array_join_expression_list) return src_columns; - getRootActionsNoMakeSet(array_join_expression_list, true, actions, false); + getRootActionsNoMakeSet(array_join_expression_list, actions, false); auto array_join = addMultipleArrayJoinAction(actions, is_array_join_left); auto sample_columns = actions->getResultColumns(); @@ -294,7 +283,7 @@ NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const const ASTTablesInSelectQueryElement * join = select_query->join(); if (join) { - getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), true, actions, false); + getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), actions, false); auto sample_columns = actions->getNamesAndTypesList(); syntax->analyzed_join->addJoinedColumnsAndCorrectTypes(sample_columns, true); actions = std::make_shared(sample_columns); @@ -332,14 +321,14 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) { NameSet unique_keys; ASTs & group_asts = group_by_ast->children; - for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i) + for (ssize_t i = 0; i < static_cast(group_asts.size()); ++i) { ssize_t size = group_asts.size(); if (getContext()->getSettingsRef().enable_positional_arguments) replaceForPositionalArguments(group_asts[i], select_query, ASTSelectQuery::Expression::GROUP_BY); - getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false); + getRootActionsNoMakeSet(group_asts[i], temp_actions, false); const auto & column_name = group_asts[i]->getColumnName(); @@ -405,8 +394,8 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global) { if (do_global) { - GlobalSubqueriesVisitor::Data subqueries_data(getContext(), subquery_depth, isRemoteStorage(), - external_tables, subqueries_for_sets, has_global_subqueries); + GlobalSubqueriesVisitor::Data subqueries_data( + getContext(), subquery_depth, isRemoteStorage(), external_tables, subqueries_for_sets, has_global_subqueries); GlobalSubqueriesVisitor(subqueries_data).visit(query); } } @@ -416,7 +405,7 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_ { auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name); - if (prepared_sets.count(set_key)) + if (prepared_sets.contains(set_key)) return; /// Already prepared. if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name)) @@ -509,33 +498,62 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) } -void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) { LogAST log; - ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth, - sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets, - no_subqueries, false, only_consts, !isRemoteStorage()); + ActionsVisitor::Data visitor_data( + getContext(), + settings.size_limits_for_set, + subquery_depth, + sourceColumns(), + std::move(actions), + prepared_sets, + subqueries_for_sets, + no_makeset_for_subqueries, + false /* no_makeset */, + only_consts, + !isRemoteStorage() /* create_source_for_in */); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } -void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts) { LogAST log; - ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth, - sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets, - no_subqueries, true, only_consts, !isRemoteStorage()); + ActionsVisitor::Data visitor_data( + getContext(), + settings.size_limits_for_set, + subquery_depth, + sourceColumns(), + std::move(actions), + prepared_sets, + subqueries_for_sets, + true /* no_makeset_for_subqueries, no_makeset implies no_makeset_for_subqueries */, + true /* no_makeset */, + only_consts, + !isRemoteStorage() /* create_source_for_in */); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } -void ExpressionAnalyzer::getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts) + +void ExpressionAnalyzer::getRootActionsForHaving( + const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) { LogAST log; - ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth, - sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets, - no_subqueries, false, only_consts, true); + ActionsVisitor::Data visitor_data( + getContext(), + settings.size_limits_for_set, + subquery_depth, + sourceColumns(), + std::move(actions), + prepared_sets, + subqueries_for_sets, + no_makeset_for_subqueries, + false /* no_makeset */, + only_consts, + true /* create_source_for_in */); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -547,7 +565,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr { AggregateDescription aggregate; if (node->arguments) - getRootActionsNoMakeSet(node->arguments, true, actions); + getRootActionsNoMakeSet(node->arguments, actions); aggregate.column_name = node->getColumnName(); @@ -746,8 +764,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) // Requiring a constant reference to a shared pointer to non-const AST // doesn't really look sane, but the visitor does indeed require it. // Hence we clone the node (not very sane either, I know). - getRootActionsNoMakeSet(window_function.function_node->clone(), - true, actions); + getRootActionsNoMakeSet(window_function.function_node->clone(), actions); const ASTs & arguments = window_function.function_node->arguments->children; @@ -867,8 +884,7 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi auto array_join = addMultipleArrayJoinAction(step.actions(), is_array_join_left); before_array_join = chain.getLastActions(); - chain.steps.push_back(std::make_unique( - array_join, step.getResultColumns())); + chain.steps.push_back(std::make_unique(array_join, step.getResultColumns())); chain.addStep(); @@ -1099,8 +1115,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( } } - chain.steps.emplace_back(std::make_unique( - std::make_shared(std::move(columns)))); + chain.steps.emplace_back( + std::make_unique(std::make_shared(std::move(columns)))); chain.steps.back()->additional_input = std::move(unused_source_columns); chain.getLastActions(); chain.addStep(); @@ -1210,8 +1226,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments( // recursively together with (1b) as ASTFunction::window_definition. if (getSelectQuery()->window()) { - getRootActionsNoMakeSet(getSelectQuery()->window(), - true /* no_subqueries */, step.actions()); + getRootActionsNoMakeSet(getSelectQuery()->window(), step.actions()); } for (const auto & [_, w] : window_descriptions) @@ -1222,8 +1237,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments( // definitions (1a). // Requiring a constant reference to a shared pointer to non-const AST // doesn't really look sane, but the visitor does indeed require it. - getRootActionsNoMakeSet(f.function_node->clone(), - true /* no_subqueries */, step.actions()); + getRootActionsNoMakeSet(f.function_node->clone(), step.actions()); // (2b) Required function argument columns. for (const auto & a : f.function_node->arguments->children) @@ -1299,7 +1313,9 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); if (getContext()->getSettingsRef().enable_positional_arguments) + { replaceForPositionalArguments(ast->children.at(0), select_query, ASTSelectQuery::Expression::ORDER_BY); + } } getRootActions(select_query->orderBy(), only_types, step.actions()); @@ -1456,7 +1472,7 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r alias = name; result_columns.emplace_back(name, alias); result_names.push_back(alias); - getRootActions(ast, false, actions_dag); + getRootActions(ast, false /* no_makeset_for_subqueries */, actions_dag); } if (add_aliases) @@ -1496,7 +1512,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const ColumnsWithTypeAn { auto actions = std::make_shared(constant_inputs); - getRootActions(query, true, actions, true); + getRootActions(query, true /* no_makeset_for_subqueries */, actions, true /* only_consts */); return std::make_shared(actions, ExpressionActionsSettings::fromContext(getContext())); } @@ -1513,13 +1529,13 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::simpleSelectActions() } ExpressionAnalysisResult::ExpressionAnalysisResult( - SelectQueryExpressionAnalyzer & query_analyzer, - const StorageMetadataPtr & metadata_snapshot, - bool first_stage_, - bool second_stage_, - bool only_types, - const FilterDAGInfoPtr & filter_info_, - const Block & source_header) + SelectQueryExpressionAnalyzer & query_analyzer, + const StorageMetadataPtr & metadata_snapshot, + bool first_stage_, + bool second_stage_, + bool only_types, + const FilterDAGInfoPtr & filter_info_, + const Block & source_header) : first_stage(first_stage_) , second_stage(second_stage_) , need_aggregate(query_analyzer.hasAggregation()) diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index b6bb3c5fad5..5dcbdc2486b 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -172,15 +172,15 @@ protected: ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const; - void getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false); /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the * prepared sets would not be applicable for MergeTree index optimization. */ - void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts = false); - void getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActionsForHaving(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false); /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions, * Create a set of columns aggregated_columns resulting after the aggregation, if any, diff --git a/src/Interpreters/ExternalModelsLoader.h b/src/Interpreters/ExternalModelsLoader.h index 18e1f1123f6..042906bee9e 100644 --- a/src/Interpreters/ExternalModelsLoader.h +++ b/src/Interpreters/ExternalModelsLoader.h @@ -15,14 +15,14 @@ namespace DB class ExternalModelsLoader : public ExternalLoader, WithContext { public: - using ModelPtr = std::shared_ptr; + using ModelPtr = std::shared_ptr; /// Models will be loaded immediately and then will be updated in separate thread, each 'reload_period' seconds. explicit ExternalModelsLoader(ContextPtr context_); ModelPtr getModel(const std::string & model_name) const { - return std::static_pointer_cast(load(model_name)); + return std::static_pointer_cast(load(model_name)); } void reloadModel(const std::string & model_name) const diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 5d2df583b9e..50ce7977534 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,11 @@ #include #include #include +#include #include +#include +#include +#include namespace DB { @@ -34,7 +39,6 @@ public: { size_t subquery_depth; bool is_remote; - size_t external_table_id; TemporaryTablesMapping & external_tables; SubqueriesForSets & subqueries_for_sets; bool & has_global_subqueries; @@ -49,7 +53,6 @@ public: : WithContext(context_) , subquery_depth(subquery_depth_) , is_remote(is_remote_) - , external_table_id(1) , external_tables(tables) , subqueries_for_sets(subqueries_for_sets_) , has_global_subqueries(has_global_subqueries_) @@ -92,48 +95,33 @@ public: { /// If this is already an external table, you do not need to add anything. Just remember its presence. auto temporary_table_name = getIdentifierName(subquery_or_table_name); - bool exists_in_local_map = external_tables.end() != external_tables.find(temporary_table_name); + bool exists_in_local_map = external_tables.contains(temporary_table_name); bool exists_in_context = static_cast(getContext()->tryResolveStorageID( StorageID("", temporary_table_name), Context::ResolveExternal)); if (exists_in_local_map || exists_in_context) return; } - String external_table_name = subquery_or_table_name->tryGetAlias(); - if (external_table_name.empty()) + String alias = subquery_or_table_name->tryGetAlias(); + String external_table_name; + if (alias.empty()) { - /// Generate the name for the external table. - external_table_name = "_data" + toString(external_table_id); - while (external_tables.count(external_table_name)) - { - ++external_table_id; - external_table_name = "_data" + toString(external_table_id); - } + auto hash = subquery_or_table_name->getTreeHash(); + external_table_name = fmt::format("_data_{}_{}", hash.first, hash.second); } - - auto interpreter = interpretSubquery(subquery_or_table_name, getContext(), subquery_depth, {}); - - Block sample = interpreter->getSampleBlock(); - NamesAndTypesList columns = sample.getNamesAndTypesList(); - - auto external_storage_holder = std::make_shared( - getContext(), - ColumnsDescription{columns}, - ConstraintsDescription{}, - nullptr, - /*create_for_global_subquery*/ true); - StoragePtr external_storage = external_storage_holder->getTable(); + else + external_table_name = alias; /** We replace the subquery with the name of the temporary table. * It is in this form, the request will go to the remote server. * This temporary table will go to the remote server, and on its side, * instead of doing a subquery, you just need to read it. + * TODO We can do better than using alias to name external tables */ auto database_and_table_name = std::make_shared(external_table_name); if (set_alias) { - String alias = subquery_or_table_name->tryGetAlias(); if (auto * table_name = subquery_or_table_name->as()) if (alias.empty()) alias = table_name->shortName(); @@ -151,8 +139,27 @@ public: else ast = database_and_table_name; - external_tables[external_table_name] = external_storage_holder; + if (external_tables.contains(external_table_name)) + return; + auto interpreter = interpretSubquery(subquery_or_table_name, getContext(), subquery_depth, {}); + + Block sample = interpreter->getSampleBlock(); + NamesAndTypesList columns = sample.getNamesAndTypesList(); + + auto external_storage_holder = std::make_shared( + getContext(), + ColumnsDescription{columns}, + ConstraintsDescription{}, + nullptr, + /*create_for_global_subquery*/ true); + StoragePtr external_storage = external_storage_holder->getTable(); + + external_tables.emplace(external_table_name, external_storage_holder); + + /// We need to materialize external tables immediately because reading from distributed + /// tables might generate local plans which can refer to external tables during index + /// analysis. It's too late to populate the external table via CreatingSetsTransform. if (getContext()->getSettingsRef().use_index_for_in_with_subqueries) { auto external_table = external_storage_holder->getTable(); diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index c56529b3214..00568cfdf08 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -962,18 +962,29 @@ public: /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin. for (size_t j = 0, size = right_indexes.size(); j < size; ++j) { - const auto & column = *block.getByPosition(right_indexes[j]).column; - if (auto * nullable_col = typeid_cast(columns[j].get()); nullable_col && !column.isNullable()) - nullable_col->insertFromNotNullable(column, row_num); + auto column_from_block = block.getByPosition(right_indexes[j]); + if (type_name[j].type->lowCardinality() != column_from_block.type->lowCardinality()) + { + JoinCommon::changeLowCardinalityInplace(column_from_block); + } + + if (auto * nullable_col = typeid_cast(columns[j].get()); + nullable_col && !column_from_block.column->isNullable()) + nullable_col->insertFromNotNullable(*column_from_block.column, row_num); else - columns[j]->insertFrom(column, row_num); + columns[j]->insertFrom(*column_from_block.column, row_num); } } else { for (size_t j = 0, size = right_indexes.size(); j < size; ++j) { - columns[j]->insertFrom(*block.getByPosition(right_indexes[j]).column, row_num); + auto column_from_block = block.getByPosition(right_indexes[j]); + if (type_name[j].type->lowCardinality() != column_from_block.type->lowCardinality()) + { + JoinCommon::changeLowCardinalityInplace(column_from_block); + } + columns[j]->insertFrom(*column_from_block.column, row_num); } } } @@ -1013,6 +1024,7 @@ private: void addColumn(const ColumnWithTypeAndName & src_column, const std::string & qualified_name) { + columns.push_back(src_column.column->cloneEmpty()); columns.back()->reserve(src_column.column->size()); type_name.emplace_back(src_column.type, src_column.name, qualified_name); @@ -1237,16 +1249,16 @@ NO_INLINE IColumn::Filter joinRightColumns( { const IColumn & left_asof_key = added_columns.leftAsofKey(); - auto [block, row_num] = mapped->findAsof(left_asof_key, i); - if (block) + auto row_ref = mapped->findAsof(left_asof_key, i); + if (row_ref.block) { setUsed(filter, i); if constexpr (multiple_disjuncts) - used_flags.template setUsed(block, row_num, 0); + used_flags.template setUsed(row_ref.block, row_ref.row_num, 0); else used_flags.template setUsed(find_result); - added_columns.appendFromBlock(*block, row_num); + added_columns.appendFromBlock(*row_ref.block, row_ref.row_num); } else addNotFoundRow(added_columns, current_offset); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index f7dbd1c8b65..d8923b3cc42 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1180,11 +1180,10 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, /// old instance of the storage. For example, AsynchronousMetrics may cause ATTACH to fail, /// so we allow waiting here. If database_atomic_wait_for_drop_and_detach_synchronously is disabled /// and old storage instance still exists it will throw exception. - bool throw_if_table_in_use = getContext()->getSettingsRef().database_atomic_wait_for_drop_and_detach_synchronously; - if (throw_if_table_in_use) - database->checkDetachedTableNotInUse(create.uuid); - else + if (getContext()->getSettingsRef().database_atomic_wait_for_drop_and_detach_synchronously) database->waitDetachedTableNotInUse(create.uuid); + else + database->checkDetachedTableNotInUse(create.uuid); } StoragePtr res; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index ce0929f9c6e..27ed8438fc8 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -359,6 +359,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( table_lock.reset(); table_id = StorageID::createEmpty(); metadata_snapshot = nullptr; + storage_snapshot = nullptr; } } @@ -1241,10 +1242,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

hasGlobalSubqueries() && !subqueries_for_sets.empty()) - executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets); } if (expressions.second_stage || from_aggregation_stage) @@ -1427,7 +1424,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

hasGlobalSubqueries())) + if (!subqueries_for_sets.empty()) executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets); } @@ -1891,7 +1888,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc && limit_length <= std::numeric_limits::max() - limit_offset && limit_length + limit_offset < max_block_size) { - max_block_size = std::max(UInt64(1), limit_length + limit_offset); + max_block_size = std::max(UInt64{1}, limit_length + limit_offset); max_threads_execute_query = max_streams = 1; } @@ -2577,11 +2574,11 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan) void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_plan, SubqueriesForSets & subqueries_for_sets) { - const auto & input_order_info = query_info.input_order_info - ? query_info.input_order_info - : (query_info.projection ? query_info.projection->input_order_info : nullptr); - if (input_order_info) - executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins"); + // const auto & input_order_info = query_info.input_order_info + // ? query_info.input_order_info + // : (query_info.projection ? query_info.projection->input_order_info : nullptr); + // if (input_order_info) + // executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins"); const Settings & settings = context->getSettingsRef(); diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index c90c271679c..0994f34d003 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -351,15 +351,6 @@ public: max_size = max_size_; } - // Before calling this method you should be sure - // that lock is acquired. - template - void processEachQueryStatus(F && func) const - { - for (auto && query : processes) - func(query); - } - void setMaxInsertQueriesAmount(size_t max_insert_queries_amount_) { std::lock_guard lock(mutex); diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp index 39fc7965eb2..2b791f5a189 100644 --- a/src/Interpreters/RowRefs.cpp +++ b/src/Interpreters/RowRefs.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -44,38 +45,52 @@ class SortedLookupVector : public SortedLookupVectorBase { struct Entry { - /// We don't store a RowRef and instead keep it's members separately (and return a tuple) to reduce the memory usage. - /// For example, for sizeof(T) == 4 => sizeof(Entry) == 16 (while before it would be 20). Then when you put it into a vector, the effect is even greater - decltype(RowRef::block) block; - decltype(RowRef::row_num) row_num; - TKey asof_value; + TKey value; + uint32_t row_ref_index; Entry() = delete; - Entry(TKey v, const Block * b, size_t r) : block(b), row_num(r), asof_value(v) { } + Entry(TKey value_, uint32_t row_ref_index_) + : value(value_) + , row_ref_index(row_ref_index_) + { } - bool operator<(const Entry & other) const { return asof_value < other.asof_value; } + }; + + struct LessEntryOperator + { + ALWAYS_INLINE bool operator()(const Entry & lhs, const Entry & rhs) const + { + return lhs.value < rhs.value; + } }; struct GreaterEntryOperator { - bool operator()(Entry const & a, Entry const & b) const { return a.asof_value > b.asof_value; } + ALWAYS_INLINE bool operator()(const Entry & lhs, const Entry & rhs) const + { + return lhs.value > rhs.value; + } }; public: - using Base = std::vector; using Keys = std::vector; - static constexpr bool isDescending = (inequality == ASOF::Inequality::Greater || inequality == ASOF::Inequality::GreaterOrEquals); - static constexpr bool isStrict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater); + using Entries = PaddedPODArray; + using RowRefs = PaddedPODArray; + + static constexpr bool is_descending = (inequality == ASOF::Inequality::Greater || inequality == ASOF::Inequality::GreaterOrEquals); + static constexpr bool is_strict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater); void insert(const IColumn & asof_column, const Block * block, size_t row_num) override { using ColumnType = ColumnVectorOrDecimal; const auto & column = assert_cast(asof_column); - TKey k = column.getElement(row_num); + TKey key = column.getElement(row_num); assert(!sorted.load(std::memory_order_acquire)); - array.emplace_back(k, block, row_num); + + entries.emplace_back(key, row_refs.size()); + row_refs.emplace_back(RowRef(block, row_num)); } /// Unrolled version of upper_bound and lower_bound @@ -84,30 +99,30 @@ public: /// at https://en.algorithmica.org/hpc/data-structures/s-tree/ size_t boundSearch(TKey value) { - size_t size = array.size(); + size_t size = entries.size(); size_t low = 0; /// This is a single binary search iteration as a macro to unroll. Takes into account the inequality: - /// isStrict -> Equal values are not requested - /// isDescending -> The vector is sorted in reverse (for greater or greaterOrEquals) + /// is_strict -> Equal values are not requested + /// is_descending -> The vector is sorted in reverse (for greater or greaterOrEquals) #define BOUND_ITERATION \ { \ size_t half = size / 2; \ size_t other_half = size - half; \ size_t probe = low + half; \ size_t other_low = low + other_half; \ - TKey v = array[probe].asof_value; \ + TKey & v = entries[probe].value; \ size = half; \ - if constexpr (isDescending) \ + if constexpr (is_descending) \ { \ - if constexpr (isStrict) \ + if constexpr (is_strict) \ low = value <= v ? other_low : low; \ else \ low = value < v ? other_low : low; \ } \ else \ { \ - if constexpr (isStrict) \ + if constexpr (is_strict) \ low = value >= v ? other_low : low; \ else \ low = value > v ? other_low : low; \ @@ -130,7 +145,7 @@ public: return low; } - std::tuple findAsof(const IColumn & asof_column, size_t row_num) override + RowRef findAsof(const IColumn & asof_column, size_t row_num) override { sort(); @@ -139,8 +154,11 @@ public: TKey k = column.getElement(row_num); size_t pos = boundSearch(k); - if (pos != array.size()) - return std::make_tuple(array[pos].block, array[pos].row_num); + if (pos != entries.size()) + { + size_t row_ref_index = entries[pos].row_ref_index; + return row_refs[row_ref_index]; + } return {nullptr, 0}; } @@ -148,7 +166,8 @@ public: private: std::atomic sorted = false; mutable std::mutex lock; - Base array; + Entries entries; + RowRefs row_refs; // Double checked locking with SC atomics works in C++ // https://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/ @@ -160,12 +179,37 @@ private: if (!sorted.load(std::memory_order_acquire)) { std::lock_guard l(lock); + if (!sorted.load(std::memory_order_relaxed)) { - if constexpr (isDescending) - ::sort(array.begin(), array.end(), GreaterEntryOperator()); + if constexpr (std::is_arithmetic_v && !std::is_floating_point_v) + { + if (likely(entries.size() > 256)) + { + struct RadixSortTraits : RadixSortNumTraits + { + using Element = Entry; + using Result = Element; + + static TKey & extractKey(Element & elem) { return elem.value; } + static Element extractResult(Element & elem) { return elem; } + }; + + if constexpr (is_descending) + RadixSort::executeLSD(entries.data(), entries.size(), true); + else + RadixSort::executeLSD(entries.data(), entries.size(), false); + + sorted.store(true, std::memory_order_release); + return; + } + } + + if constexpr (is_descending) + ::sort(entries.begin(), entries.end(), GreaterEntryOperator()); else - ::sort(array.begin(), array.end()); + ::sort(entries.begin(), entries.end(), LessEntryOperator()); + sorted.store(true, std::memory_order_release); } } diff --git a/src/Interpreters/RowRefs.h b/src/Interpreters/RowRefs.h index 02462833050..fa5ce867613 100644 --- a/src/Interpreters/RowRefs.h +++ b/src/Interpreters/RowRefs.h @@ -146,7 +146,7 @@ private: struct SortedLookupVectorBase { SortedLookupVectorBase() = default; - virtual ~SortedLookupVectorBase() { } + virtual ~SortedLookupVectorBase() = default; static std::optional getTypeSize(const IColumn & asof_column, size_t & type_size); @@ -154,7 +154,7 @@ struct SortedLookupVectorBase virtual void insert(const IColumn &, const Block *, size_t) = 0; // This needs to be synchronized internally - virtual std::tuple findAsof(const IColumn &, size_t) = 0; + virtual RowRef findAsof(const IColumn &, size_t) = 0; }; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 59545d4314d..3b4d665e41b 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -41,6 +41,57 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; +} + +namespace +{ + class StorageWithComment : public IAST + { + public: + ASTPtr storage; + ASTPtr comment; + + String getID(char) const override { return "Storage with comment definition"; } + + ASTPtr clone() const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method clone is not supported"); + } + + void formatImpl(const FormatSettings &, FormatState &, FormatStateStacked) const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported"); + } + }; + + class ParserStorageWithComment : public IParserBase + { + protected: + const char * getName() const override { return "storage definition with comment"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + ParserStorage storage_p; + ASTPtr storage; + + if (!storage_p.parse(pos, storage, expected)) + return false; + + ParserKeyword s_comment("COMMENT"); + ParserStringLiteral string_literal_parser; + ASTPtr comment; + + if (s_comment.ignore(pos, expected)) + string_literal_parser.parse(pos, comment, expected); + + auto storage_with_comment = std::make_shared(); + storage_with_comment->storage = std::move(storage); + storage_with_comment->comment = std::move(comment); + + node = storage_with_comment; + return true; + } + }; } namespace @@ -102,8 +153,9 @@ std::shared_ptr createSystemLog( engine += " TTL " + ttl; engine += " ORDER BY (event_date, event_time)"; } + // Validate engine definition grammatically to prevent some configuration errors - ParserStorage storage_parser; + ParserStorageWithComment storage_parser; parseQuery(storage_parser, engine.data(), engine.data() + engine.size(), "Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); @@ -450,7 +502,6 @@ void SystemLog::prepareTable() is_prepared = true; } - template ASTPtr SystemLog::getCreateTableQuery() { @@ -465,11 +516,16 @@ ASTPtr SystemLog::getCreateTableQuery() new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(ordinary_columns, alias_columns)); create->set(create->columns_list, new_columns_list); - ParserStorage storage_parser; - ASTPtr storage_ast = parseQuery( + ParserStorageWithComment storage_parser; + + ASTPtr storage_with_comment_ast = parseQuery( storage_parser, storage_def.data(), storage_def.data() + storage_def.size(), "Storage to create table for " + LogElement::name(), 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - create->set(create->storage, storage_ast); + + StorageWithComment & storage_with_comment = storage_with_comment_ast->as(); + + create->set(create->storage, storage_with_comment.storage); + create->set(create->comment, storage_with_comment.comment); /// Write additional (default) settings for MergeTree engine to make it make it possible to compare ASTs /// and recreate tables on settings changes. diff --git a/src/Interpreters/join_common.cpp b/src/Interpreters/join_common.cpp index 478df653f3b..47b792f81e9 100644 --- a/src/Interpreters/join_common.cpp +++ b/src/Interpreters/join_common.cpp @@ -326,9 +326,10 @@ ColumnRawPtrMap materializeColumnsInplaceMap(Block & block, const Names & names) for (const auto & column_name : names) { - auto & column = block.getByName(column_name).column; - column = recursiveRemoveLowCardinality(column->convertToFullColumnIfConst()); - ptrs[column_name] = column.get(); + auto & column = block.getByName(column_name); + column.column = recursiveRemoveLowCardinality(column.column->convertToFullColumnIfConst()); + column.type = recursiveRemoveLowCardinality(column.type); + ptrs[column_name] = column.column.get(); } return ptrs; diff --git a/src/Interpreters/threadPoolCallbackRunner.cpp b/src/Interpreters/threadPoolCallbackRunner.cpp new file mode 100644 index 00000000000..fadad235039 --- /dev/null +++ b/src/Interpreters/threadPoolCallbackRunner.cpp @@ -0,0 +1,39 @@ +#include "threadPoolCallbackRunner.h" + +#include + +#include + +namespace DB +{ + +CallbackRunner threadPoolCallbackRunner(ThreadPool & pool) +{ + return [pool = &pool, thread_group = CurrentThread::getGroup()](auto callback) + { + pool->scheduleOrThrow([callback = std::move(callback), thread_group]() + { + if (thread_group) + CurrentThread::attachTo(thread_group); + + SCOPE_EXIT_SAFE({ + if (thread_group) + CurrentThread::detachQueryIfNotDetached(); + + /// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent. + /// Typically, it may be changes from Process to User. + /// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed. + /// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well. + /// When, finally, we destroy the thread (and the ThreadStatus), + /// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\ + /// and by this time user-level memory tracker may be already destroyed. + /// + /// As a work-around, reset memory tracker to total, which is always alive. + CurrentThread::get().memory_tracker.setParent(&total_memory_tracker); + }); + callback(); + }); + }; +} + +} diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h new file mode 100644 index 00000000000..59d06f2f1bc --- /dev/null +++ b/src/Interpreters/threadPoolCallbackRunner.h @@ -0,0 +1,15 @@ +#pragma once + +#include + + +namespace DB +{ + +/// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously +using CallbackRunner = std::function)>; + +/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()' +CallbackRunner threadPoolCallbackRunner(ThreadPool & pool); + +} diff --git a/src/Parsers/parseIntervalKind.cpp b/src/Parsers/parseIntervalKind.cpp index 7d36133e81c..0704aa107ca 100644 --- a/src/Parsers/parseIntervalKind.cpp +++ b/src/Parsers/parseIntervalKind.cpp @@ -7,6 +7,27 @@ namespace DB { bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & result) { + if (ParserKeyword("NANOSECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_NANOSECOND").ignore(pos, expected) + || ParserKeyword("NS").ignore(pos, expected)) + { + result = IntervalKind::Nanosecond; + return true; + } + + if (ParserKeyword("MICROSECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_MICROSECOND").ignore(pos, expected) + || ParserKeyword("MCS").ignore(pos, expected)) + { + result = IntervalKind::Microsecond; + return true; + } + + if (ParserKeyword("MILLISECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_MILLISECOND").ignore(pos, expected) + || ParserKeyword("MS").ignore(pos, expected)) + { + result = IntervalKind::Millisecond; + return true; + } + if (ParserKeyword("SECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_SECOND").ignore(pos, expected) || ParserKeyword("SS").ignore(pos, expected) || ParserKeyword("S").ignore(pos, expected)) { diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index cf5cfa681a1..37a107ae367 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -139,7 +139,11 @@ void ArrowBlockInputFormat::prepareReader() } arrow_column_to_ch_column = std::make_unique( - getPort().getHeader(), "Arrow", format_settings.arrow.import_nested, format_settings.arrow.allow_missing_columns); + getPort().getHeader(), + "Arrow", + format_settings.arrow.import_nested, + format_settings.arrow.allow_missing_columns, + format_settings.arrow.case_insensitive_column_matching); missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); if (stream) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 14c81a0d90d..0a72e561e4e 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -31,6 +31,7 @@ #include #include #include +#include /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn. #define FOR_ARROW_NUMERIC_TYPES(M) \ @@ -484,19 +485,22 @@ static void checkStatus(const arrow::Status & status, const String & column_name throw Exception{ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column '{}': {}.", format_name, column_name, status.ToString()}; } -Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, const Block * hint_header) +Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( + const arrow::Schema & schema, const std::string & format_name, const Block * hint_header, bool ignore_case) { ColumnsWithTypeAndName sample_columns; std::unordered_set nested_table_names; if (hint_header) - nested_table_names = Nested::getAllTableNames(*hint_header); + nested_table_names = Nested::getAllTableNames(*hint_header, ignore_case); + for (const auto & field : schema.fields()) { - if (hint_header && !hint_header->has(field->name()) && !nested_table_names.contains(field->name())) + if (hint_header && !hint_header->has(field->name(), ignore_case) + && !nested_table_names.contains(ignore_case ? boost::to_lower_copy(field->name()) : field->name())) continue; /// Create empty arrow column by it's type and convert it to ClickHouse column. - arrow::MemoryPool* pool = arrow::default_memory_pool(); + arrow::MemoryPool * pool = arrow::default_memory_pool(); std::unique_ptr array_builder; arrow::Status status = MakeBuilder(pool, field->type(), &array_builder); checkStatus(status, field->name(), format_name); @@ -516,20 +520,31 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, } ArrowColumnToCHColumn::ArrowColumnToCHColumn( - const Block & header_, const std::string & format_name_, bool import_nested_, bool allow_missing_columns_) - : header(header_), format_name(format_name_), import_nested(import_nested_), allow_missing_columns(allow_missing_columns_) + const Block & header_, + const std::string & format_name_, + bool import_nested_, + bool allow_missing_columns_, + bool case_insensitive_matching_) + : header(header_) + , format_name(format_name_) + , import_nested(import_nested_) + , allow_missing_columns(allow_missing_columns_) + , case_insensitive_matching(case_insensitive_matching_) { } void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr & table) { NameToColumnPtr name_to_column_ptr; - for (const auto & column_name : table->ColumnNames()) + for (auto column_name : table->ColumnNames()) { std::shared_ptr arrow_column = table->GetColumnByName(column_name); if (!arrow_column) throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column '{}' is duplicated", column_name); - name_to_column_ptr[column_name] = arrow_column; + + if (case_insensitive_matching) + boost::to_lower(column_name); + name_to_column_ptr[std::move(column_name)] = arrow_column; } arrowColumnsToCHChunk(res, name_to_column_ptr); @@ -548,22 +563,31 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & { const ColumnWithTypeAndName & header_column = header.getByPosition(column_i); + auto search_column_name = header_column.name; + if (case_insensitive_matching) + boost::to_lower(search_column_name); + bool read_from_nested = false; String nested_table_name = Nested::extractTableName(header_column.name); - if (!name_to_column_ptr.contains(header_column.name)) + String search_nested_table_name = nested_table_name; + if (case_insensitive_matching) + boost::to_lower(search_nested_table_name); + + if (!name_to_column_ptr.contains(search_column_name)) { /// Check if it's a column from nested table. - if (import_nested && name_to_column_ptr.contains(nested_table_name)) + if (import_nested && name_to_column_ptr.contains(search_nested_table_name)) { - if (!nested_tables.contains(nested_table_name)) + if (!nested_tables.contains(search_nested_table_name)) { - std::shared_ptr arrow_column = name_to_column_ptr[nested_table_name]; - ColumnsWithTypeAndName cols = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true)}; + std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; + ColumnsWithTypeAndName cols + = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true)}; Block block(cols); - nested_tables[nested_table_name] = std::make_shared(Nested::flatten(block)); + nested_tables[search_nested_table_name] = std::make_shared(Nested::flatten(block)); } - read_from_nested = nested_tables[nested_table_name]->has(header_column.name); + read_from_nested = nested_tables[search_nested_table_name]->has(header_column.name, case_insensitive_matching); } if (!read_from_nested) @@ -580,13 +604,19 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } } - std::shared_ptr arrow_column = name_to_column_ptr[header_column.name]; ColumnWithTypeAndName column; if (read_from_nested) - column = nested_tables[nested_table_name]->getByName(header_column.name); + { + column = nested_tables[search_nested_table_name]->getByName(header_column.name, case_insensitive_matching); + if (case_insensitive_matching) + column.name = header_column.name; + } else + { + auto arrow_column = name_to_column_ptr[search_column_name]; column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true); + } try { @@ -594,8 +624,11 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } catch (Exception & e) { - e.addMessage(fmt::format("while converting column {} from type {} to type {}", - backQuote(header_column.name), column.type->getName(), header_column.type->getName())); + e.addMessage(fmt::format( + "while converting column {} from type {} to type {}", + backQuote(header_column.name), + column.type->getName(), + header_column.type->getName())); throw; } @@ -609,22 +642,23 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & std::vector ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema & schema) const { std::vector missing_columns; - auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header); + auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header, case_insensitive_matching); auto flatten_block_from_arrow = Nested::flatten(block_from_arrow); + for (size_t i = 0, columns = header.columns(); i < columns; ++i) { - const auto & column = header.getByPosition(i); + const auto & header_column = header.getByPosition(i); bool read_from_nested = false; - String nested_table_name = Nested::extractTableName(column.name); - if (!block_from_arrow.has(column.name)) + String nested_table_name = Nested::extractTableName(header_column.name); + if (!block_from_arrow.has(header_column.name, case_insensitive_matching)) { - if (import_nested && block_from_arrow.has(nested_table_name)) - read_from_nested = flatten_block_from_arrow.has(column.name); + if (import_nested && block_from_arrow.has(nested_table_name, case_insensitive_matching)) + read_from_nested = flatten_block_from_arrow.has(header_column.name, case_insensitive_matching); if (!read_from_nested) { if (!allow_missing_columns) - throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", column.name}; + throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name}; missing_columns.push_back(i); } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index cf4f6bb3ff3..0a712326941 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -25,7 +25,8 @@ public: const Block & header_, const std::string & format_name_, bool import_nested_, - bool allow_missing_columns_); + bool allow_missing_columns_, + bool case_insensitive_matching_ = false); void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table); @@ -36,7 +37,8 @@ public: /// Transform arrow schema to ClickHouse header. If hint_header is provided, /// we will skip columns in schema that are not in hint_header. - static Block arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, const Block * hint_header = nullptr); + static Block arrowSchemaToCHHeader( + const arrow::Schema & schema, const std::string & format_name, const Block * hint_header = nullptr, bool ignore_case = false); private: const Block & header; @@ -44,6 +46,7 @@ private: bool import_nested; /// If false, throw exception if some columns in header not exists in arrow table. bool allow_missing_columns; + bool case_insensitive_matching; /// Map {column name : dictionary column}. /// To avoid converting dictionary from Arrow Dictionary diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index aa9f7874ae8..c68b59833db 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -53,9 +53,6 @@ Chunk ORCBlockInputFormat::generate() if (!table || !table->num_rows()) return res; - if (format_settings.use_lowercase_column_name) - table = *table->RenameColumns(include_column_names); - arrow_column_to_ch_column->arrowTableToCHChunk(res, table); /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. @@ -73,7 +70,6 @@ void ORCBlockInputFormat::resetParser() file_reader.reset(); include_indices.clear(); - include_column_names.clear(); block_missing_values.clear(); } @@ -125,20 +121,6 @@ static void getFileReaderAndSchema( if (!read_schema_result.ok()) throw Exception(read_schema_result.status().ToString(), ErrorCodes::BAD_ARGUMENTS); schema = std::move(read_schema_result).ValueOrDie(); - - if (format_settings.use_lowercase_column_name) - { - std::vector> fields; - fields.reserve(schema->num_fields()); - for (int i = 0; i < schema->num_fields(); ++i) - { - const auto& field = schema->field(i); - auto name = field->name(); - boost::to_lower(name); - fields.push_back(field->WithName(name)); - } - schema = arrow::schema(fields, schema->metadata()); - } } void ORCBlockInputFormat::prepareReader() @@ -149,12 +131,17 @@ void ORCBlockInputFormat::prepareReader() return; arrow_column_to_ch_column = std::make_unique( - getPort().getHeader(), "ORC", format_settings.orc.import_nested, format_settings.orc.allow_missing_columns); + getPort().getHeader(), + "ORC", + format_settings.orc.import_nested, + format_settings.orc.allow_missing_columns, + format_settings.orc.case_insensitive_column_matching); missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); + const bool ignore_case = format_settings.orc.case_insensitive_column_matching; std::unordered_set nested_table_names; if (format_settings.orc.import_nested) - nested_table_names = Nested::getAllTableNames(getPort().getHeader()); + nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); /// In ReadStripe column indices should be started from 1, /// because 0 indicates to select all columns. @@ -165,19 +152,18 @@ void ORCBlockInputFormat::prepareReader() /// so we should recursively count the number of indices we need for this type. int indexes_count = countIndicesForType(schema->field(i)->type()); const auto & name = schema->field(i)->name(); - if (getPort().getHeader().has(name) || nested_table_names.contains(name)) + if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name)) { for (int j = 0; j != indexes_count; ++j) - { include_indices.push_back(index + j); - include_column_names.push_back(name); - } } + index += indexes_count; } } -ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : ISchemaReader(in_), format_settings(format_settings_) +ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) + : ISchemaReader(in_), format_settings(format_settings_) { } diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index bd2151d78ff..b7a771730ea 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -47,7 +47,6 @@ private: // indices of columns to read from ORC file std::vector include_indices; - std::vector include_column_names; std::vector missing_columns; BlockMissingValues block_missing_values; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 548bf0138f5..13582ce5019 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -53,11 +53,7 @@ Chunk ParquetBlockInputFormat::generate() std::shared_ptr table; arrow::Status read_status = file_reader->ReadRowGroup(row_group_current, column_indices, &table); if (!read_status.ok()) - throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), - ErrorCodes::CANNOT_READ_ALL_DATA}; - - if (format_settings.use_lowercase_column_name) - table = *table->RenameColumns(column_names); + throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; ++row_group_current; @@ -78,7 +74,6 @@ void ParquetBlockInputFormat::resetParser() file_reader.reset(); column_indices.clear(); - column_names.clear(); row_group_current = 0; block_missing_values.clear(); } @@ -123,20 +118,6 @@ static void getFileReaderAndSchema( return; THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(std::move(arrow_file), arrow::default_memory_pool(), &file_reader)); THROW_ARROW_NOT_OK(file_reader->GetSchema(&schema)); - - if (format_settings.use_lowercase_column_name) - { - std::vector> fields; - fields.reserve(schema->num_fields()); - for (int i = 0; i < schema->num_fields(); ++i) - { - const auto& field = schema->field(i); - auto name = field->name(); - boost::to_lower(name); - fields.push_back(field->WithName(name)); - } - schema = arrow::schema(fields, schema->metadata()); - } } void ParquetBlockInputFormat::prepareReader() @@ -149,12 +130,18 @@ void ParquetBlockInputFormat::prepareReader() row_group_total = file_reader->num_row_groups(); row_group_current = 0; - arrow_column_to_ch_column = std::make_unique(getPort().getHeader(), "Parquet", format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns); + arrow_column_to_ch_column = std::make_unique( + getPort().getHeader(), + "Parquet", + format_settings.parquet.import_nested, + format_settings.parquet.allow_missing_columns, + format_settings.parquet.case_insensitive_column_matching); missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); + const bool ignore_case = format_settings.parquet.case_insensitive_column_matching; std::unordered_set nested_table_names; if (format_settings.parquet.import_nested) - nested_table_names = Nested::getAllTableNames(getPort().getHeader()); + nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); int index = 0; for (int i = 0; i < schema->num_fields(); ++i) @@ -164,19 +151,19 @@ void ParquetBlockInputFormat::prepareReader() /// count the number of indices we need for this type. int indexes_count = countIndicesForType(schema->field(i)->type()); const auto & name = schema->field(i)->name(); - if (getPort().getHeader().has(name) || nested_table_names.contains(name)) + + if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name)) { for (int j = 0; j != indexes_count; ++j) - { column_indices.push_back(index + j); - column_names.push_back(name); - } } + index += indexes_count; } } -ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : ISchemaReader(in_), format_settings(format_settings_) +ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) + : ISchemaReader(in_), format_settings(format_settings_) { } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index eba9aac29f2..1faadaa3d21 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -40,7 +40,6 @@ private: int row_group_total = 0; // indices of columns to read from Parquet file std::vector column_indices; - std::vector column_names; std::unique_ptr arrow_column_to_ch_column; int row_group_current = 0; std::vector missing_columns; diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index f63d6fa9c46..87ba1b18fa7 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -4,7 +4,6 @@ #include #include #include -#include namespace DB @@ -242,15 +241,16 @@ std::unordered_map TSKVSchemaReader::readRowAndGetNamesAndD std::unordered_map names_and_types; StringRef name_ref; - String name_tmp; + String name_buf; String value; do { - bool has_value = readName(in, name_ref, name_tmp); + bool has_value = readName(in, name_ref, name_buf); + String name = String(name_ref); if (has_value) { readEscapedString(value, in); - names_and_types[String(name_ref)] = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped); + names_and_types[std::move(name)] = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped); } else { diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index d948c16a78d..b2305d9aab2 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -1,16 +1,22 @@ -#include -#include -#include -#include -#include +#include + +#include + #include #include -#include + +#include +#include + +#include +#include #include #include -#include +#include #include -#include + +#include + namespace DB { @@ -388,6 +394,7 @@ void QueryPlan::explainPlan(WriteBuffer & buffer, const ExplainPlanOptions & opt static void explainPipelineStep(IQueryPlanStep & step, IQueryPlanStep::FormatSettings & settings) { settings.out << String(settings.offset, settings.indent_char) << "(" << step.getName() << ")\n"; + size_t current_offset = settings.offset; step.describePipeline(settings); if (current_offset == settings.offset) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 1bfc1ec7306..e1b099e44c3 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -112,6 +112,9 @@ ReadFromMergeTree::ReadFromMergeTree( if (enable_parallel_reading) read_task_callback = context->getMergeTreeReadTaskCallback(); + + /// Add explicit description. + setStepDescription(data.getStorageID().getFullNameNotQuoted()); } Pipe ReadFromMergeTree::readFromPool( diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.h b/src/Processors/QueryPlan/ReadFromMergeTree.h index 685b99a7bdc..6846506f260 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.h +++ b/src/Processors/QueryPlan/ReadFromMergeTree.h @@ -100,7 +100,8 @@ public: bool enable_parallel_reading ); - String getName() const override { return "ReadFromMergeTree"; } + static constexpr auto name = "ReadFromMergeTree"; + String getName() const override { return name; } void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 1276157cc91..ae97a769b23 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -34,16 +34,16 @@ Block FillingTransform::transformHeader(Block header, const SortDescription & so template static FillColumnDescription::StepFunction getStepFunction( - IntervalKind kind, Int64 step, const DateLUTImpl & date_lut) + IntervalKind kind, Int64 step, const DateLUTImpl & date_lut, UInt16 scale = DataTypeDateTime64::default_scale) { switch (kind) { - #define DECLARE_CASE(NAME) \ +#define DECLARE_CASE(NAME) \ case IntervalKind::NAME: \ - return [step, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get(field), step, date_lut); }; + return [step, scale, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get(field), step, date_lut, scale); }; FOR_EACH_INTERVAL_KIND(DECLARE_CASE) - #undef DECLARE_CASE +#undef DECLARE_CASE } __builtin_unreachable(); } @@ -92,7 +92,7 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & Int64 avg_seconds = get(descr.fill_step) * descr.step_kind->toAvgSeconds(); if (avg_seconds < 86400) throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, - "Value of step is to low ({} seconds). Must be >= 1 day", avg_seconds); + "Value of step is to low ({} seconds). Must be >= 1 day", avg_seconds); } if (which.isDate()) @@ -108,25 +108,23 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & switch (*descr.step_kind) { - #define DECLARE_CASE(NAME) \ +#define DECLARE_CASE(NAME) \ case IntervalKind::NAME: \ descr.step_func = [step, &time_zone = date_time64->getTimeZone()](Field & field) \ { \ auto field_decimal = get>(field); \ - auto components = DecimalUtils::splitWithScaleMultiplier(field_decimal.getValue(), field_decimal.getScaleMultiplier()); \ - auto res = Add##NAME##sImpl::execute(components, step, time_zone); \ - auto res_decimal = decimalFromComponentsWithMultiplier(res, field_decimal.getScaleMultiplier()); \ - field = DecimalField(res_decimal, field_decimal.getScale()); \ + auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), step, time_zone, field_decimal.getScale()); \ + field = DecimalField(res, field_decimal.getScale()); \ }; \ break; FOR_EACH_INTERVAL_KIND(DECLARE_CASE) - #undef DECLARE_CASE +#undef DECLARE_CASE } } else throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, - "STEP of Interval type can be used only with Date/DateTime types, but got {}", type->getName()); + "STEP of Interval type can be used only with Date/DateTime types, but got {}", type->getName()); } else { @@ -140,12 +138,12 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & } FillingTransform::FillingTransform( - const Block & header_, const SortDescription & sort_description_, bool on_totals_) - : ISimpleTransform(header_, transformHeader(header_, sort_description_), true) - , sort_description(sort_description_) - , on_totals(on_totals_) - , filling_row(sort_description_) - , next_row(sort_description_) + const Block & header_, const SortDescription & sort_description_, bool on_totals_) + : ISimpleTransform(header_, transformHeader(header_, sort_description_), true) + , sort_description(sort_description_) + , on_totals(on_totals_) + , filling_row(sort_description_) + , next_row(sort_description_) { if (on_totals) return; @@ -162,14 +160,14 @@ FillingTransform::FillingTransform( if (!tryConvertFields(descr, type)) throw Exception("Incompatible types of WITH FILL expression values with column type " - + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); if (type->isValueRepresentedByUnsignedInteger() && ((!descr.fill_from.isNull() && less(descr.fill_from, Field{0}, 1)) || - (!descr.fill_to.isNull() && less(descr.fill_to, Field{0}, 1)))) + (!descr.fill_to.isNull() && less(descr.fill_to, Field{0}, 1)))) { throw Exception("WITH FILL bound values cannot be negative for unsigned type " - + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); } } @@ -214,7 +212,7 @@ void FillingTransform::transform(Chunk & chunk) MutableColumns res_other_columns; auto init_columns_by_positions = [](const Columns & old_columns, Columns & new_columns, - MutableColumns & new_mutable_columns, const Positions & positions) + MutableColumns & new_mutable_columns, const Positions & positions) { for (size_t pos : positions) { diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 66356fd005b..68ffb42a90a 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -279,14 +279,17 @@ bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrite ProfileEvents::increment(ProfileEvents::DataAfterMergeDiffersFromReplica); LOG_ERROR(log, - "{}. Data after merge is not byte-identical to data on another replicas. There could be several" - " reasons: 1. Using newer version of compression library after server update. 2. Using another" - " compression method. 3. Non-deterministic compression algorithm (highly unlikely). 4." - " Non-deterministic merge algorithm due to logical error in code. 5. Data corruption in memory due" - " to bug in code. 6. Data corruption in memory due to hardware issue. 7. Manual modification of" - " source data after server startup. 8. Manual modification of checksums stored in ZooKeeper. 9." - " Part format related settings like 'enable_mixed_granularity_parts' are different on different" - " replicas. We will download merged part from replica to force byte-identical result.", + "{}. Data after merge is not byte-identical to data on another replicas. There could be several reasons:" + " 1. Using newer version of compression library after server update." + " 2. Using another compression method." + " 3. Non-deterministic compression algorithm (highly unlikely)." + " 4. Non-deterministic merge algorithm due to logical error in code." + " 5. Data corruption in memory due to bug in code." + " 6. Data corruption in memory due to hardware issue." + " 7. Manual modification of source data after server startup." + " 8. Manual modification of checksums stored in ZooKeeper." + " 9. Part format related settings like 'enable_mixed_granularity_parts' are different on different replicas." + " We will download merged part from replica to force byte-identical result.", getCurrentExceptionMessage(false)); write_part_log(ExecutionStatus::fromCurrentException()); diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 7a85791d172..737e0c9d4b7 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -399,6 +399,7 @@ bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr, bool is_final) c return true; /// disallow GLOBAL IN, GLOBAL NOT IN + /// TODO why? if ("globalIn" == function_ptr->name || "globalNotIn" == function_ptr->name) return true; diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 3f220566260..309432e4675 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -185,7 +185,8 @@ bool MutateFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrit ProfileEvents::increment(ProfileEvents::DataAfterMutationDiffersFromReplica); - LOG_ERROR(log, "{}. Data after mutation is not byte-identical to data on another replicas. We will download merged part from replica to force byte-identical result.", getCurrentExceptionMessage(false)); + LOG_ERROR(log, "{}. Data after mutation is not byte-identical to data on another replicas. " + "We will download merged part from replica to force byte-identical result.", getCurrentExceptionMessage(false)); write_part_log(ExecutionStatus::fromCurrentException()); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 93cceadaf93..761b4ecdeb1 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -179,8 +179,9 @@ std::unique_ptr createReadBuffer( method = chooseCompressionMethod(current_path, compression_method); } - /// For clickhouse-local add progress callback to display progress bar. - if (context->getApplicationType() == Context::ApplicationType::LOCAL) + /// For clickhouse-local and clickhouse-client add progress callback to display progress bar. + if (context->getApplicationType() == Context::ApplicationType::LOCAL + || context->getApplicationType() == Context::ApplicationType::CLIENT) { auto & in = static_cast(*nested_buffer); in.setProgressCallback(context); @@ -643,7 +644,9 @@ Pipe StorageFile::read( /// Set total number of bytes to process. For progress bar. auto progress_callback = context->getFileProgressCallback(); - if (context->getApplicationType() == Context::ApplicationType::LOCAL && progress_callback) + if ((context->getApplicationType() == Context::ApplicationType::LOCAL + || context->getApplicationType() == Context::ApplicationType::CLIENT) + && progress_callback) progress_callback(FileProgress(0, total_bytes_to_read)); for (size_t i = 0; i < num_streams; ++i) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b013b24f17b..d9f72cf7feb 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1312,10 +1312,14 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil: if (replica_part_header.getColumnsHash() != local_part_header.getColumnsHash()) { - /// Either it's a bug or ZooKeeper contains broken data. - /// TODO Fix KILL MUTATION and replace CHECKSUM_DOESNT_MATCH with LOGICAL_ERROR - /// (some replicas may skip killed mutation even if it was executed on other replicas) - throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Part {} from {} has different columns hash", part_name, replica); + /// Currently there are two (known) cases when it may happen: + /// - KILL MUTATION query had removed mutation before all replicas have executed assigned MUTATE_PART entries. + /// Some replicas may skip this mutation and update part version without actually applying any changes. + /// It leads to mismatching checksum if changes were applied on other replicas. + /// - ALTER_METADATA and MERGE_PARTS were reordered on some replicas. + /// It may lead to different number of columns in merged parts on these replicas. + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Part {} from {} has different columns hash " + "(it may rarely happen on race condition with KILL MUTATION or ALTER COLUMN).", part_name, replica); } replica_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index f319bd1097b..ce31308fdd7 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include #include #include @@ -374,6 +376,16 @@ static bool checkIfObjectExists(const std::shared_ptr & clien return false; } +// TODO: common thread pool for IO must be used instead after PR #35150 +static ThreadPool & getThreadPoolStorageS3() +{ + constexpr size_t pool_size = 100; + constexpr size_t queue_size = 1000000; + static ThreadPool pool(pool_size, pool_size, queue_size); + return pool; +} + + class StorageS3Sink : public SinkToStorage { public: @@ -398,7 +410,7 @@ public: std::make_unique( client, bucket, key, min_upload_part_size, upload_part_size_multiply_factor, upload_part_size_multiply_parts_count_threshold, - max_single_part_upload_size), compression_method, 3); + max_single_part_upload_size, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, threadPoolCallbackRunner(getThreadPoolStorageS3())), compression_method, 3); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, {}, format_settings); } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index f727b8f6952..5c8a7ea2be5 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -3,30 +3,35 @@ #include #include #include -#include #include +#include +#include +#include +#include +#include #include #include #include -#include -#include #include #include #include #include -#include #include #include +#include "Common/ThreadStatus.h" +#include +#include "IO/HTTPCommon.h" +#include "IO/ReadWriteBufferFromHTTP.h" -#include +#include +#include #include #include -#include #include -#include +#include namespace DB @@ -43,8 +48,7 @@ namespace ErrorCodes static bool urlWithGlobs(const String & uri) { - return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) - || uri.find('|') != std::string::npos; + return (uri.find('{') != std::string::npos && uri.find('}') != std::string::npos) || uri.find('|') != std::string::npos; } @@ -88,8 +92,7 @@ IStorageURLBase::IStorageURLBase( namespace { - ReadWriteBufferFromHTTP::HTTPHeaderEntries getHeaders( - const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_) + ReadWriteBufferFromHTTP::HTTPHeaderEntries getHeaders(const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_) { ReadWriteBufferFromHTTP::HTTPHeaderEntries headers(headers_.begin(), headers_.end()); // Propagate OpenTelemetry trace context, if any, downstream. @@ -98,13 +101,11 @@ namespace const auto & thread_trace_context = CurrentThread::get().thread_trace_context; if (thread_trace_context.trace_id != UUID()) { - headers.emplace_back("traceparent", - thread_trace_context.composeTraceparentHeader()); + headers.emplace_back("traceparent", thread_trace_context.composeTraceparentHeader()); if (!thread_trace_context.tracestate.empty()) { - headers.emplace_back("tracestate", - thread_trace_context.tracestate); + headers.emplace_back("tracestate", thread_trace_context.tracestate); } } } @@ -114,8 +115,7 @@ namespace class StorageURLSource : public SourceWithProgress { - - using URIParams = std::vector>; + using URIParams = std::vector>; public: struct URIInfo @@ -160,11 +160,11 @@ namespace UInt64 max_block_size, const ConnectionTimeouts & timeouts, const String & compression_method, + size_t download_threads, const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_ = {}, const URIParams & params = {}, bool glob_url = false) - : SourceWithProgress(sample_block), name(std::move(name_)) - , uri_info(uri_info_) + : SourceWithProgress(sample_block), name(std::move(name_)), uri_info(uri_info_) { auto headers = getHeaders(headers_); @@ -176,33 +176,40 @@ namespace auto first_option = uri_options.begin(); read_buf = getFirstAvailableURLReadBuffer( - first_option, uri_options.end(), context, params, http_method, - callback, timeouts, compression_method, credentials, headers, glob_url, uri_options.size() == 1); + first_option, + uri_options.end(), + context, + params, + http_method, + callback, + timeouts, + compression_method, + credentials, + headers, + glob_url, + uri_options.size() == 1, + download_threads); - auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings); + auto input_format + = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings); QueryPipelineBuilder builder; builder.init(Pipe(input_format)); - builder.addSimpleTransform([&](const Block & cur_header) - { - return std::make_shared(cur_header, columns, *input_format, context); - }); + builder.addSimpleTransform( + [&](const Block & cur_header) + { return std::make_shared(cur_header, columns, *input_format, context); }); pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); reader = std::make_unique(*pipeline); }; } - String getName() const override - { - return name; - } + String getName() const override { return name; } Chunk generate() override { while (true) { - if (!reader) { auto current_uri_pos = uri_info->next_uri_to_read.fetch_add(1); @@ -239,7 +246,8 @@ namespace Poco::Net::HTTPBasicCredentials & credentials, const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers, bool glob_url, - bool delay_initialization) + bool delay_initialization, + size_t download_threads) { String first_exception_message; ReadSettings read_settings = context->getReadSettings(); @@ -255,8 +263,137 @@ namespace setCredentials(credentials, request_uri); + const auto settings = context->getSettings(); try { + if (download_threads > 1) + { + try + { + ReadWriteBufferFromHTTP buffer( + request_uri, + Poco::Net::HTTPRequest::HTTP_HEAD, + callback, + timeouts, + credentials, + settings.max_http_get_redirects, + DBMS_DEFAULT_BUFFER_SIZE, + read_settings, + headers, + ReadWriteBufferFromHTTP::Range{0, std::nullopt}, + &context->getRemoteHostFilter(), + true, + /* use_external_buffer */ false, + /* skip_url_not_found_error */ skip_url_not_found_error); + + Poco::Net::HTTPResponse res; + + for (size_t i = 0; i < settings.http_max_tries; ++i) + { + try + { + buffer.callWithRedirects(res, Poco::Net::HTTPRequest::HTTP_HEAD, true); + break; + } + catch (const Poco::Exception & e) + { + LOG_TRACE( + &Poco::Logger::get("StorageURLSource"), + "HTTP HEAD request to `{}` failed at try {}/{}. " + "Error: {}.", + request_uri.toString(), + i + 1, + settings.http_max_tries, + e.displayText()); + if (!ReadWriteBufferFromHTTP::isRetriableError(res.getStatus())) + { + throw; + } + } + } + + // to check if Range header is supported, we need to send a request with it set + const bool supports_ranges = (res.has("Accept-Ranges") && res.get("Accept-Ranges") == "bytes") + || (res.has("Content-Range") && res.get("Content-Range").starts_with("bytes")); + LOG_TRACE( + &Poco::Logger::get("StorageURLSource"), + fmt::runtime(supports_ranges ? "HTTP Range is supported" : "HTTP Range is not supported")); + + + if (supports_ranges && res.getStatus() == Poco::Net::HTTPResponse::HTTP_PARTIAL_CONTENT + && res.hasContentLength()) + { + LOG_TRACE( + &Poco::Logger::get("StorageURLSource"), + "Using ParallelReadBuffer with {} workers with chunks of {} bytes", + download_threads, + settings.max_download_buffer_size); + + auto read_buffer_factory = std::make_unique( + res.getContentLength(), + settings.max_download_buffer_size, + request_uri, + http_method, + callback, + timeouts, + credentials, + settings.max_http_get_redirects, + DBMS_DEFAULT_BUFFER_SIZE, + read_settings, + headers, + &context->getRemoteHostFilter(), + delay_initialization, + /* use_external_buffer */ false, + /* skip_url_not_found_error */ skip_url_not_found_error); + + ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup() + ? CurrentThread::get().getThreadGroup() + : MainThreadStatus::getInstance().getThreadGroup(); + + ContextPtr query_context + = CurrentThread::isInitialized() ? CurrentThread::get().getQueryContext() : nullptr; + + auto worker_cleanup = [has_running_group = running_group == nullptr](ThreadStatus & thread_status) + { + if (has_running_group) + thread_status.detachQuery(false); + }; + + auto worker_setup = [query_context = std::move(query_context), + running_group = std::move(running_group)](ThreadStatus & thread_status) + { + /// Save query context if any, because cache implementation needs it. + if (query_context) + thread_status.attachQueryContext(query_context); + + /// To be able to pass ProfileEvents. + if (running_group) + thread_status.attachQuery(running_group); + }; + + + return wrapReadBufferWithCompressionMethod( + std::make_unique( + std::move(read_buffer_factory), + &IOThreadPool::get(), + download_threads, + std::move(worker_setup), + std::move(worker_cleanup)), + chooseCompressionMethod(request_uri.getPath(), compression_method)); + } + } + catch (const Poco::Exception & e) + { + LOG_TRACE( + &Poco::Logger::get("StorageURLSource"), + "Failed to setup ParallelReadBuffer because of an exception:\n{}.\nFalling back to the single-threaded " + "buffer", + e.displayText()); + } + } + + LOG_TRACE(&Poco::Logger::get("StorageURLSource"), "Using single-threaded read buffer"); + return wrapReadBufferWithCompressionMethod( std::make_unique( request_uri, @@ -264,15 +401,15 @@ namespace callback, timeouts, credentials, - context->getSettingsRef().max_http_get_redirects, + settings.max_http_get_redirects, DBMS_DEFAULT_BUFFER_SIZE, read_settings, headers, ReadWriteBufferFromHTTP::Range{}, &context->getRemoteHostFilter(), delay_initialization, - /* use_external_buffer */false, - /* skip_url_not_found_error */skip_url_not_found_error), + /* use_external_buffer */ false, + /* skip_url_not_found_error */ skip_url_not_found_error), chooseCompressionMethod(request_uri.getPath(), compression_method)); } catch (...) @@ -323,10 +460,10 @@ StorageURLSink::StorageURLSink( std::string content_encoding = toContentEncodingName(compression_method); write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique(Poco::URI(uri), http_method, content_type, content_encoding, timeouts), - compression_method, 3); - writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block, - context, {} /* write callback */, format_settings); + std::make_unique(Poco::URI(uri), http_method, content_type, content_encoding, timeouts), + compression_method, + 3); + writer = FormatFactory::instance().getOutputFormat(format, *write_buf, sample_block, context, {} /* write callback */, format_settings); } @@ -355,15 +492,15 @@ public: const ConnectionTimeouts & timeouts_, const CompressionMethod compression_method_, const String & http_method_) - : PartitionedSink(partition_by, context_, sample_block_) - , uri(uri_) - , format(format_) - , format_settings(format_settings_) - , sample_block(sample_block_) - , context(context_) - , timeouts(timeouts_) - , compression_method(compression_method_) - , http_method(http_method_) + : PartitionedSink(partition_by, context_, sample_block_) + , uri(uri_) + , format(format_) + , format_settings(format_settings_) + , sample_block(sample_block_) + , context(context_) + , timeouts(timeouts_) + , compression_method(compression_method_) + , http_method(http_method_) { } @@ -371,8 +508,8 @@ public: { auto partition_path = PartitionedSink::replaceWildcards(uri, partition_id); context->getRemoteHostFilter().checkURL(Poco::URI(partition_path)); - return std::make_shared(partition_path, format, - format_settings, sample_block, context, timeouts, compression_method, http_method); + return std::make_shared( + partition_path, format, format_settings, sample_block, context, timeouts, compression_method, http_method); } private: @@ -462,7 +599,8 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( credentials, headers, false, - false); + false, + context->getSettingsRef().max_download_threads); }; try @@ -479,7 +617,10 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( } while (++option < urls_to_check.end()); - throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "All attempts to extract table structure from urls failed. Errors:\n{}", exception_messages); + throw Exception( + ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, + "All attempts to extract table structure from urls failed. Errors:\n{}", + exception_messages); } bool IStorageURLBase::isColumnOriented() const @@ -512,6 +653,8 @@ Pipe IStorageURLBase::read( block_for_format = storage_snapshot->metadata->getSampleBlock(); } + size_t max_download_threads = local_context->getSettingsRef().max_download_threads; + if (urlWithGlobs(uri)) { size_t max_addresses = local_context->getSettingsRef().glob_expansion_max_elements; @@ -528,14 +671,13 @@ Pipe IStorageURLBase::read( Pipes pipes; pipes.reserve(num_streams); + size_t download_threads = num_streams >= max_download_threads ? 1 : (max_download_threads / num_streams); for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( uri_info, getReadMethod(), - getReadPOSTDataCallback( - column_names, columns_description, query_info, - local_context, processed_stage, max_block_size), + getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), @@ -544,7 +686,11 @@ Pipe IStorageURLBase::read( columns_description, max_block_size, ConnectionTimeouts::getHTTPTimeouts(local_context), - compression_method, headers, params, /* glob_url */true)); + compression_method, + download_threads, + headers, + params, + /* glob_url */ true)); } return Pipe::unitePipes(std::move(pipes)); } @@ -555,9 +701,7 @@ Pipe IStorageURLBase::read( return Pipe(std::make_shared( uri_info, getReadMethod(), - getReadPOSTDataCallback( - column_names, columns_description, query_info, - local_context, processed_stage, max_block_size), + getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), @@ -566,7 +710,10 @@ Pipe IStorageURLBase::read( columns_description, max_block_size, ConnectionTimeouts::getHTTPTimeouts(local_context), - compression_method, headers, params)); + compression_method, + max_download_threads, + headers, + params)); } } @@ -598,12 +745,10 @@ Pipe StorageURLWithFailover::read( auto uri_info = std::make_shared(); uri_info->uri_list_to_read.emplace_back(uri_options); - auto pipe = Pipe(std::make_shared( + auto pipe = Pipe(std::make_shared( uri_info, getReadMethod(), - getReadPOSTDataCallback( - column_names, columns_description, query_info, - local_context, processed_stage, max_block_size), + getReadPOSTDataCallback(column_names, columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), @@ -612,7 +757,10 @@ Pipe StorageURLWithFailover::read( columns_description, max_block_size, ConnectionTimeouts::getHTTPTimeouts(local_context), - compression_method, headers, params)); + compression_method, + local_context->getSettingsRef().max_download_threads, + headers, + params)); std::shuffle(uri_options.begin(), uri_options.end(), thread_local_rng); return pipe; } @@ -632,17 +780,26 @@ SinkToStoragePtr IStorageURLBase::write(const ASTPtr & query, const StorageMetad { return std::make_shared( partition_by_ast, - uri, format_name, - format_settings, metadata_snapshot->getSampleBlock(), context, + uri, + format_name, + format_settings, + metadata_snapshot->getSampleBlock(), + context, ConnectionTimeouts::getHTTPTimeouts(context), - chooseCompressionMethod(uri, compression_method), http_method); + chooseCompressionMethod(uri, compression_method), + http_method); } else { - return std::make_shared(uri, format_name, - format_settings, metadata_snapshot->getSampleBlock(), context, + return std::make_shared( + uri, + format_name, + format_settings, + metadata_snapshot->getSampleBlock(), + context, ConnectionTimeouts::getHTTPTimeouts(context), - chooseCompressionMethod(uri, compression_method), http_method); + chooseCompressionMethod(uri, compression_method), + http_method); } } @@ -659,8 +816,19 @@ StorageURL::StorageURL( const ReadWriteBufferFromHTTP::HTTPHeaderEntries & headers_, const String & http_method_, ASTPtr partition_by_) - : IStorageURLBase(uri_, context_, table_id_, format_name_, format_settings_, - columns_, constraints_, comment, compression_method_, headers_, http_method_, partition_by_) + : IStorageURLBase( + uri_, + context_, + table_id_, + format_name_, + format_settings_, + columns_, + constraints_, + comment, + compression_method_, + headers_, + http_method_, + partition_by_) { context_->getRemoteHostFilter().checkURL(Poco::URI(uri)); } @@ -711,8 +879,7 @@ FormatSettings StorageURL::getFormatSettingsFromArgs(const StorageFactory::Argum // Apply changes from SETTINGS clause, with validation. user_format_settings.applyChanges(args.storage_def->settings->changes); - format_settings = getFormatSettings(args.getContext(), - user_format_settings); + format_settings = getFormatSettings(args.getContext(), user_format_settings); } else { @@ -731,12 +898,12 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex auto [common_configuration, storage_specific_args] = named_collection.value(); configuration.set(common_configuration); - if (!configuration.http_method.empty() - && configuration.http_method != Poco::Net::HTTPRequest::HTTP_POST + if (!configuration.http_method.empty() && configuration.http_method != Poco::Net::HTTPRequest::HTTP_POST && configuration.http_method != Poco::Net::HTTPRequest::HTTP_PUT) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Http method can be POST or PUT (current: {}). For insert default is POST, for select GET", - configuration.http_method); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Http method can be POST or PUT (current: {}). For insert default is POST, for select GET", + configuration.http_method); if (!storage_specific_args.empty()) { @@ -754,7 +921,8 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex { if (args.empty() || args.size() > 3) throw Exception( - "Storage URL requires 1, 2 or 3 arguments: url, name of used format (taken from file extension by default) and optional compression method.", + "Storage URL requires 1, 2 or 3 arguments: url, name of used format (taken from file extension by default) and optional " + "compression method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (auto & arg : args) @@ -776,43 +944,45 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex void registerStorageURL(StorageFactory & factory) { - factory.registerStorage("URL", [](const StorageFactory::Arguments & args) - { - ASTs & engine_args = args.engine_args; - auto configuration = StorageURL::getConfiguration(engine_args, args.getLocalContext()); - auto format_settings = StorageURL::getFormatSettingsFromArgs(args); - - ReadWriteBufferFromHTTP::HTTPHeaderEntries headers; - for (const auto & [header, value] : configuration.headers) + factory.registerStorage( + "URL", + [](const StorageFactory::Arguments & args) { - auto value_literal = value.safeGet(); - if (header == "Range") - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Range headers are not allowed"); - headers.emplace_back(std::make_pair(header, value_literal)); - } + ASTs & engine_args = args.engine_args; + auto configuration = StorageURL::getConfiguration(engine_args, args.getLocalContext()); + auto format_settings = StorageURL::getFormatSettingsFromArgs(args); - ASTPtr partition_by; - if (args.storage_def->partition_by) - partition_by = args.storage_def->partition_by->clone(); + ReadWriteBufferFromHTTP::HTTPHeaderEntries headers; + for (const auto & [header, value] : configuration.headers) + { + auto value_literal = value.safeGet(); + if (header == "Range") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Range headers are not allowed"); + headers.emplace_back(std::make_pair(header, value_literal)); + } - return StorageURL::create( - configuration.url, - args.table_id, - configuration.format, - format_settings, - args.columns, - args.constraints, - args.comment, - args.getContext(), - configuration.compression_method, - headers, - configuration.http_method, - partition_by); - }, - { - .supports_settings = true, - .supports_schema_inference = true, - .source_access_type = AccessType::URL, - }); + ASTPtr partition_by; + if (args.storage_def->partition_by) + partition_by = args.storage_def->partition_by->clone(); + + return StorageURL::create( + configuration.url, + args.table_id, + configuration.format, + format_settings, + args.columns, + args.constraints, + args.comment, + args.getContext(), + configuration.compression_method, + headers, + configuration.http_method, + partition_by); + }, + { + .supports_settings = true, + .supports_schema_inference = true, + .source_access_type = AccessType::URL, + }); } } diff --git a/src/Storages/System/StorageSystemModels.cpp b/src/Storages/System/StorageSystemModels.cpp index 3df48e830bb..4a4dbbc69df 100644 --- a/src/Storages/System/StorageSystemModels.cpp +++ b/src/Storages/System/StorageSystemModels.cpp @@ -38,7 +38,7 @@ void StorageSystemModels::fillData(MutableColumns & res_columns, ContextPtr cont if (load_result.object) { - const auto model_ptr = std::static_pointer_cast(load_result.object); + const auto model_ptr = std::static_pointer_cast(load_result.object); res_columns[3]->insert(model_ptr->getTypeName()); } else diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index a329b01e9f2..644ab5d57c2 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -57,6 +57,7 @@ namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; extern const int BAD_ARGUMENTS; + extern const int SYNTAX_ERROR; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int INCORRECT_QUERY; @@ -262,7 +263,13 @@ namespace IntervalKind strToIntervalKind(const String& interval_str) { - if (interval_str == "Second") + if (interval_str == "Nanosecond") + return IntervalKind::Nanosecond; + else if (interval_str == "Microsecond") + return IntervalKind::Microsecond; + else if (interval_str == "Millisecond") + return IntervalKind::Millisecond; + else if (interval_str == "Second") return IntervalKind::Second; else if (interval_str == "Minute") return IntervalKind::Minute; @@ -307,6 +314,12 @@ namespace { switch (kind) { + case IntervalKind::Nanosecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Microsecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Millisecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: { \ return AddTime::execute(time_sec, num_units, time_zone); \ @@ -738,6 +751,12 @@ UInt32 StorageWindowView::getWindowLowerBound(UInt32 time_sec) switch (window_interval_kind) { + case IntervalKind::Nanosecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Microsecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Millisecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: \ { \ @@ -773,6 +792,13 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) switch (window_interval_kind) { + case IntervalKind::Nanosecond: + throw Exception("Fractional seconds are not supported by window view yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Microsecond: + throw Exception("Fractional seconds are not supported by window view yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Millisecond: + throw Exception("Fractional seconds are not supported by window view yet", ErrorCodes::SYNTAX_ERROR); + #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: \ { \ diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index c330d1c725b..94f5eff51d7 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -9,11 +9,10 @@ from github import Github from env_helper import ( GITHUB_REPOSITORY, - TEMP_PATH, - REPO_COPY, + GITHUB_RUN_URL, REPORTS_PATH, - GITHUB_SERVER_URL, - GITHUB_RUN_ID, + REPO_COPY, + TEMP_PATH, ) from s3_helper import S3Helper from get_robot_token import get_best_robot_token @@ -126,7 +125,7 @@ if __name__ == "__main__": logging.info("Exception uploading file %s text %s", f, ex) paths[f] = "" - report_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" + report_url = GITHUB_RUN_URL if paths["runlog.log"]: report_url = paths["runlog.log"] if paths["main.log"]: diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 1cee5fd42de..5afe2991073 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -11,7 +11,7 @@ from env_helper import ( TEMP_PATH, GITHUB_REPOSITORY, GITHUB_SERVER_URL, - GITHUB_RUN_ID, + GITHUB_RUN_URL, ) from report import create_build_html_report from s3_helper import S3Helper @@ -180,9 +180,7 @@ if __name__ == "__main__": branch_name = "PR #{}".format(pr_info.number) branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}" commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}" - task_url = ( - f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}" - ) + task_url = GITHUB_RUN_URL report = create_build_html_report( build_check_name, build_results, diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 9d8a7463b3e..7ccbcb4a47e 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -8,30 +8,16 @@ from get_robot_token import get_parameter_from_ssm class ClickHouseHelper: - def __init__(self, url=None, user=None, password=None): - self.url2 = None - self.auth2 = None - + def __init__(self, url=None): if url is None: - url = get_parameter_from_ssm("clickhouse-test-stat-url") - self.url2 = get_parameter_from_ssm("clickhouse-test-stat-url2") - self.auth2 = { + self.url = get_parameter_from_ssm("clickhouse-test-stat-url2") + self.auth = { "X-ClickHouse-User": get_parameter_from_ssm( "clickhouse-test-stat-login2" ), "X-ClickHouse-Key": "", } - self.url = url - self.auth = { - "X-ClickHouse-User": user - if user is not None - else get_parameter_from_ssm("clickhouse-test-stat-login"), - "X-ClickHouse-Key": password - if password is not None - else get_parameter_from_ssm("clickhouse-test-stat-password"), - } - @staticmethod def _insert_json_str_info_impl(url, auth, db, table, json_str): params = { @@ -78,8 +64,6 @@ class ClickHouseHelper: def _insert_json_str_info(self, db, table, json_str): self._insert_json_str_info_impl(self.url, self.auth, db, table, json_str) - if self.url2: - self._insert_json_str_info_impl(self.url2, self.auth2, db, table, json_str) def insert_event_into(self, db, table, event): event_str = json.dumps(event) diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 818478f6430..3d0cc468aec 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -11,7 +11,7 @@ from typing import Dict, List, Optional, Set, Tuple, Union from github import Github -from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP +from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL from s3_helper import S3Helper from pr_info import PRInfo from get_robot_token import get_best_robot_token, get_parameter_from_ssm @@ -234,6 +234,7 @@ def build_and_push_one_image( with open(build_log, "wb") as bl: cmd = ( "docker buildx build --builder default " + f"--label build-url={GITHUB_RUN_URL} " f"{from_tag_arg}" f"--build-arg BUILDKIT_INLINE_CACHE=1 " f"--tag {image.repo}:{version_string} " diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index 27bfe07db53..2b864b6b94c 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -4,6 +4,7 @@ import os import unittest from unittest.mock import patch +from env_helper import GITHUB_RUN_URL from pr_info import PRInfo import docker_images_check as di @@ -117,7 +118,8 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "docker buildx build --builder default --build-arg FROM_TAG=version " + f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " + "--build-arg FROM_TAG=version " "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version --cache-from " "type=registry,ref=name:version --push --progress plain path", mock_popen.call_args.args, @@ -133,7 +135,8 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "docker buildx build --builder default --build-arg FROM_TAG=version2 " + f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " + "--build-arg FROM_TAG=version2 " "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version2 --cache-from " "type=registry,ref=name:version2 --progress plain path", mock_popen.call_args.args, @@ -149,7 +152,7 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "docker buildx build --builder default " + f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version2 --cache-from " "type=registry,ref=name:version2 --progress plain path", mock_popen.call_args.args, diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index 90178e5c56a..c34162ba51a 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -7,9 +7,10 @@ CACHES_PATH = os.getenv("CACHES_PATH", TEMP_PATH) CLOUDFLARE_TOKEN = os.getenv("CLOUDFLARE_TOKEN") GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH") GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") -GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID") +GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID", "0") GITHUB_SERVER_URL = os.getenv("GITHUB_SERVER_URL", "https://github.com") GITHUB_WORKSPACE = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) +GITHUB_RUN_URL = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" IMAGES_PATH = os.getenv("IMAGES_PATH") REPORTS_PATH = os.getenv("REPORTS_PATH", "./reports") REPO_COPY = os.getenv("REPO_COPY", os.path.abspath("../../")) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 79cea83b1c8..289e32406ef 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -2,7 +2,7 @@ import logging from github import Github -from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_ID +from env_helper import GITHUB_RUN_URL from pr_info import PRInfo from get_robot_token import get_best_robot_token from commit_status_helper import get_commit @@ -33,7 +33,7 @@ if __name__ == "__main__": gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) - url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" + url = GITHUB_RUN_URL statuses = filter_statuses(list(commit.get_statuses())) if NAME in statuses and statuses[NAME].state == "pending": commit.create_status( diff --git a/tests/ci/keeper_jepsen_check.py b/tests/ci/keeper_jepsen_check.py index b0ec1e7ba8b..24d720e67ab 100644 --- a/tests/ci/keeper_jepsen_check.py +++ b/tests/ci/keeper_jepsen_check.py @@ -200,10 +200,8 @@ if __name__ == "__main__": head = requests.head(build_url) counter += 1 if counter >= 180: - post_commit_status( - gh, pr_info.sha, CHECK_NAME, "Cannot fetch build to run", "error", "" - ) - raise Exception("Cannot fetch build") + logging.warning("Cannot fetch build in 30 minutes, exiting") + sys.exit(0) with SSHKey(key_value=get_parameter_from_ssm("jepsen_ssh_key") + "\n"): ssh_auth_sock = os.environ["SSH_AUTH_SOCK"] diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 761b1ac9257..c6ce86b2ce1 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -11,6 +11,7 @@ import re from github import Github +from env_helper import GITHUB_RUN_URL from pr_info import PRInfo from s3_helper import S3Helper from get_robot_token import get_best_robot_token @@ -88,9 +89,9 @@ if __name__ == "__main__": else: pr_link = f"https://github.com/ClickHouse/ClickHouse/pull/{pr_info.number}" - task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" - docker_env += ' -e CHPC_ADD_REPORT_LINKS="Job (actions) Tested commit"'.format( - task_url, pr_link + docker_env += ( + f' -e CHPC_ADD_REPORT_LINKS="' + f'Job (actions) Tested commit"' ) if "RUN_BY_HASH_TOTAL" in os.environ: @@ -199,7 +200,7 @@ if __name__ == "__main__": status = "failure" message = "No message in report." - report_url = task_url + report_url = GITHUB_RUN_URL if paths["runlog.log"]: report_url = paths["runlog.log"] diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 64e22712059..ee4399792ae 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -8,7 +8,7 @@ from build_download_helper import get_with_retries from env_helper import ( GITHUB_REPOSITORY, GITHUB_SERVER_URL, - GITHUB_RUN_ID, + GITHUB_RUN_URL, GITHUB_EVENT_PATH, ) @@ -111,7 +111,7 @@ class PRInfo: self.sha = github_event["pull_request"]["head"]["sha"] repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" - self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" + self.task_url = GITHUB_RUN_URL self.repo_full_name = GITHUB_REPOSITORY self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" @@ -142,7 +142,7 @@ class PRInfo: self.sha = github_event["after"] pull_request = get_pr_for_commit(self.sha, github_event["ref"]) repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" - self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" + self.task_url = GITHUB_RUN_URL self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" self.repo_full_name = GITHUB_REPOSITORY if pull_request is None or pull_request["state"] == "closed": @@ -180,7 +180,7 @@ class PRInfo: self.number = 0 self.labels = {} repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" - self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" + self.task_url = GITHUB_RUN_URL self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" self.repo_full_name = GITHUB_REPOSITORY self.pr_html_url = f"{repo_prefix}/commits/{ref}" diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 5b89082532d..9c7ba13f8e4 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -5,7 +5,7 @@ import re from typing import Tuple from github import Github -from env_helper import GITHUB_RUN_ID, GITHUB_REPOSITORY, GITHUB_SERVER_URL +from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL from pr_info import PRInfo from get_robot_token import get_best_robot_token from commit_status_helper import get_commit @@ -231,7 +231,7 @@ if __name__ == "__main__": ) sys.exit(1) - url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" + url = GITHUB_RUN_URL if not can_run: print("::notice ::Cannot run") commit.create_status( diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index f7b74e8d5dd..289fc4b3184 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -2,7 +2,7 @@ import os import logging import ast -from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_ID +from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_URL from report import ReportColorTheme, create_test_html_report @@ -66,7 +66,7 @@ def upload_results( branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_number}" commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{commit_sha}" - task_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" + task_url = GITHUB_RUN_URL if additional_urls: raw_log_url = additional_urls[0] diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 02e22ee0c4d..3bb547333e7 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -238,7 +238,7 @@ def _update_dockerfile(repo_path: str, version: ClickHouseVersion): def update_version_local(repo_path, version, version_type="testing"): update_contributors() version.with_description(version_type) - update_cmake_version(version, version_type) + update_cmake_version(version) _update_changelog(repo_path, version) _update_dockerfile(repo_path, version) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 9dd05cacce4..2c830e6ea40 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -373,6 +373,11 @@ class SettingsRandomizer: "priority": lambda: int(abs(random.gauss(0, 2))), "output_format_parallel_formatting": lambda: random.randint(0, 1), "input_format_parallel_parsing": lambda: random.randint(0, 1), + "min_chunk_bytes_for_parallel_parsing": lambda: max(1024, int(random.gauss(10 * 1024 * 1024, 5 * 1000 * 1000))), + "max_read_buffer_size": lambda: random.randint(1, 20) if random.random() < 0.1 else random.randint(500000, 1048576), + "prefer_localhost_replica": lambda: random.randint(0, 1), + "max_block_size": lambda: random.randint(8000, 100000), + "max_threads": lambda: random.randint(1, 64), } @staticmethod diff --git a/tests/config/config.d/zookeeper.xml b/tests/config/config.d/zookeeper.xml index 4fa529a6180..63057224ef9 100644 --- a/tests/config/config.d/zookeeper.xml +++ b/tests/config/config.d/zookeeper.xml @@ -1,5 +1,7 @@ + + random localhost 9181 diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict index bf25f1fa484..1863cd20bdd 100644 --- a/tests/fuzz/all.dict +++ b/tests/fuzz/all.dict @@ -1459,7 +1459,7 @@ "xor" "xxHash32" "xxHash64" -"yandexConsistentHash" +"kostikConsistentHash" "YEAR" "yearweek" "yesterday" diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict index 722e931dc09..3f393aa6846 100644 --- a/tests/fuzz/dictionaries/functions.dict +++ b/tests/fuzz/dictionaries/functions.dict @@ -26,7 +26,7 @@ "toUnixTimestamp64Nano" "toUnixTimestamp64Micro" "jumpConsistentHash" -"yandexConsistentHash" +"kostikConsistentHash" "addressToSymbol" "toJSONString" "JSON_VALUE" diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index d0b5e892f5b..dffd09ae849 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -16,21 +16,28 @@ import traceback import urllib.parse import shlex import urllib3 - -from cassandra.policies import RoundRobinPolicy -import cassandra.cluster -import psycopg2 -import pymongo -import pymysql import requests -from confluent_kafka.avro.cached_schema_registry_client import ( - CachedSchemaRegistryClient, -) + +try: + # Please, add modules that required for specific tests only here. + # So contributors will be able to run most tests locally + # without installing tons of unneeded packages that may be not so easy to install. + from cassandra.policies import RoundRobinPolicy + import cassandra.cluster + import psycopg2 + from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT + import pymongo + import pymysql + from confluent_kafka.avro.cached_schema_registry_client import ( + CachedSchemaRegistryClient, + ) +except Exception as e: + logging.warning(f"Cannot import some modules, some tests may not work: {e}") + from dict2xml import dict2xml from kazoo.client import KazooClient from kazoo.exceptions import KazooException from minio import Minio -from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT from helpers.test_tools import assert_eq_with_retry, exec_query_with_retry from helpers import pytest_xdist_logging_to_separate_files diff --git a/tests/integration/test_distributed_queries_stress/test.py b/tests/integration/test_distributed_queries_stress/test.py index a5df8562676..fce42b4e58b 100644 --- a/tests/integration/test_distributed_queries_stress/test.py +++ b/tests/integration/test_distributed_queries_stress/test.py @@ -67,10 +67,10 @@ def started_cluster(): insert into data (key) select * from numbers(10); create table if not exists dist_one as data engine=Distributed(one_shard, currentDatabase(), data, key); - create table if not exists dist_one_over_dist as data engine=Distributed(one_shard, currentDatabase(), dist_one, yandexConsistentHash(key, 2)); + create table if not exists dist_one_over_dist as data engine=Distributed(one_shard, currentDatabase(), dist_one, kostikConsistentHash(key, 2)); create table if not exists dist_two as data engine=Distributed(two_shards, currentDatabase(), data, key); - create table if not exists dist_two_over_dist as data engine=Distributed(two_shards, currentDatabase(), dist_two, yandexConsistentHash(key, 2)); + create table if not exists dist_two_over_dist as data engine=Distributed(two_shards, currentDatabase(), dist_two, kostikConsistentHash(key, 2)); """ ) yield cluster diff --git a/tests/integration/test_redirect_url_storage/test.py b/tests/integration/test_redirect_url_storage/test.py index 13d00b811df..06ff78707d7 100644 --- a/tests/integration/test_redirect_url_storage/test.py +++ b/tests/integration/test_redirect_url_storage/test.py @@ -162,4 +162,4 @@ def test_url_reconnect(started_cluster): thread.join() assert (int(result), 6581218782194912115) - assert node1.contains_in_log("Error: Timeout: connect timed out") + assert node1.contains_in_log("Timeout: connect timed out") diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index d7aa4feb1d2..1ce1047ebec 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -361,6 +361,8 @@ def test_s3_zero_copy_with_ttl_delete(cluster, large_data, iterations): ) node1.query("OPTIMIZE TABLE ttl_delete_test FINAL") + + node1.query("SYSTEM SYNC REPLICA ttl_delete_test") node2.query("SYSTEM SYNC REPLICA ttl_delete_test") if large_data: diff --git a/tests/integration/test_system_logs_comment/__init__.py b/tests/integration/test_system_logs_comment/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_system_logs_comment/test.py b/tests/integration/test_system_logs_comment/test.py new file mode 100644 index 00000000000..0659a2689a0 --- /dev/null +++ b/tests/integration/test_system_logs_comment/test.py @@ -0,0 +1,49 @@ +# pylint: disable=line-too-long +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node_default", stay_alive=True) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_system_logs_comment(): + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + + ENGINE = MergeTree + PARTITION BY (event_date) + ORDER BY (event_time) + TTL event_date + INTERVAL 14 DAY DELETE + SETTINGS ttl_only_drop_parts=1 + COMMENT 'test_comment' + + + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """, + ] + ) + node.restart_clickhouse() + + node.query("select 1") + node.query("system flush logs") + + comment = node.query("SELECT comment FROM system.tables WHERE name = 'query_log'") + assert comment == "test_comment\n" diff --git a/tests/integration/test_zookeeper_config_load_balancing/__init__.py b/tests/integration/test_zookeeper_config_load_balancing/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing.xml new file mode 100644 index 00000000000..5416e5e82de --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing.xml @@ -0,0 +1,19 @@ + + + + random + + zoo1 + 2181 + + + zoo2 + 2181 + + + zoo3 + 2181 + + 3000 + + diff --git a/tests/integration/test_zookeeper_config_load_balancing/test.py b/tests/integration/test_zookeeper_config_load_balancing/test.py new file mode 100644 index 00000000000..56af7513389 --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/test.py @@ -0,0 +1,427 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager + +cluster = ClickHouseCluster( + __file__, zookeeper_config_path="configs/zookeeper_load_balancing.xml" +) + +# use 3-letter hostnames, so getHostNameDifference("nod1", "zoo1") will work as expected +node1 = cluster.add_instance( + "nod1", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"] +) +node2 = cluster.add_instance( + "nod2", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"] +) +node3 = cluster.add_instance( + "nod3", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"] +) + + +def change_balancing(old, new, reload=True): + line = "{}<" + old_line = line.format(old) + new_line = line.format(new) + for node in [node1, node2, node3]: + node.replace_in_config( + "/etc/clickhouse-server/config.d/zookeeper_load_balancing.xml", + old_line, + new_line, + ) + if reload: + node.query("select '{}', '{}'".format(old, new)) + node.query("system reload config") + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_first_or_random(started_cluster): + try: + change_balancing("random", "first_or_random") + print( + str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + finally: + change_balancing("first_or_random", "random", reload=False) + + +def test_in_order(started_cluster): + try: + change_balancing("random", "in_order") + print( + str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + finally: + change_balancing("in_order", "random", reload=False) + + +def test_nearest_hostname(started_cluster): + try: + change_balancing("random", "nearest_hostname") + print( + str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo3_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + finally: + change_balancing("nearest_hostname", "random", reload=False) + + +def test_round_robin(started_cluster): + pm = PartitionManager() + try: + pm._add_rule( + { + "source": node1.ip_address, + "destination": cluster.get_instance_ip("zoo1"), + "action": "REJECT --reject-with tcp-reset", + } + ) + pm._add_rule( + { + "source": node2.ip_address, + "destination": cluster.get_instance_ip("zoo1"), + "action": "REJECT --reject-with tcp-reset", + } + ) + pm._add_rule( + { + "source": node3.ip_address, + "destination": cluster.get_instance_ip("zoo1"), + "action": "REJECT --reject-with tcp-reset", + } + ) + change_balancing("random", "round_robin") + + print( + str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + finally: + pm.heal_all() + change_balancing("round_robin", "random", reload=False) diff --git a/tests/performance/consistent_hashes.xml b/tests/performance/consistent_hashes.xml index 3610579f545..c65a1151536 100644 --- a/tests/performance/consistent_hashes.xml +++ b/tests/performance/consistent_hashes.xml @@ -3,7 +3,7 @@ hash_func - yandexConsistentHash + kostikConsistentHash jumpConsistentHash diff --git a/tests/queries/0_stateless/00153_transform.reference b/tests/queries/0_stateless/00153_transform.reference index eea4fa0e1a8..8a38f4f8172 100644 --- a/tests/queries/0_stateless/00153_transform.reference +++ b/tests/queries/0_stateless/00153_transform.reference @@ -99,6 +99,6 @@ abc 1 1 Остальные -Яндекс +Bigmir)net Google Остальные diff --git a/tests/queries/0_stateless/00153_transform.sql b/tests/queries/0_stateless/00153_transform.sql index a5e531d36a4..78ec3cd4d1c 100644 --- a/tests/queries/0_stateless/00153_transform.sql +++ b/tests/queries/0_stateless/00153_transform.sql @@ -8,10 +8,10 @@ SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], 0) FROM sys SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], -1) FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], -1.1) FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222.2, 333], 1) FROM system.numbers LIMIT 10; -SELECT transform(1, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; -SELECT transform(2, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; -SELECT transform(3, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; -SELECT transform(4, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; +SELECT transform(1, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title; +SELECT transform(2, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title; +SELECT transform(3, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title; +SELECT transform(4, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title; SELECT transform('hello', 'wrong', 1); -- { serverError 43 } SELECT transform('hello', ['wrong'], 1); -- { serverError 43 } SELECT transform('hello', ['wrong'], [1]); -- { serverError 43 } diff --git a/tests/queries/0_stateless/00165_transform_non_const_default.reference b/tests/queries/0_stateless/00165_transform_non_const_default.reference index d66471d9741..01890b91309 100644 --- a/tests/queries/0_stateless/00165_transform_non_const_default.reference +++ b/tests/queries/0_stateless/00165_transform_non_const_default.reference @@ -79,6 +79,6 @@ abc 1 1 Остальные -Яндекс +Meta.ua Google Остальные diff --git a/tests/queries/0_stateless/00165_transform_non_const_default.sql b/tests/queries/0_stateless/00165_transform_non_const_default.sql index f68327f7700..ef3b7c1f1c5 100644 --- a/tests/queries/0_stateless/00165_transform_non_const_default.sql +++ b/tests/queries/0_stateless/00165_transform_non_const_default.sql @@ -6,7 +6,7 @@ SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], materialize SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], materialize(-1)) FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], materialize(-1.1)) FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222.2, 333], materialize(1)) FROM system.numbers LIMIT 10; -SELECT transform(1, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title; -SELECT transform(2, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title; -SELECT transform(3, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title; -SELECT transform(4, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title; +SELECT transform(1, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title; +SELECT transform(2, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title; +SELECT transform(3, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title; +SELECT transform(4, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title; diff --git a/tests/queries/0_stateless/00255_array_concat_string.reference b/tests/queries/0_stateless/00255_array_concat_string.reference index 4ffac8e5de0..edd1101beb6 100644 --- a/tests/queries/0_stateless/00255_array_concat_string.reference +++ b/tests/queries/0_stateless/00255_array_concat_string.reference @@ -34,25 +34,25 @@ Hello, World 0,1,2,3,4,5,6,7 0,1,2,3,4,5,6,7,8 -yandex -yandex google -yandex google test -yandex google test 123 -yandex google test 123 -yandex google test 123 hello -yandex google test 123 hello world -yandex google test 123 hello world goodbye -yandex google test 123 hello world goodbye xyz -yandex google test 123 hello world goodbye xyz yandex -yandex google test 123 hello world goodbye xyz yandex google -yandex google test 123 hello world goodbye xyz yandex google test -yandex google test 123 hello world goodbye xyz yandex google test 123 -yandex google test 123 hello world goodbye xyz yandex google test 123 -yandex google test 123 hello world goodbye xyz yandex google test 123 hello -yandex google test 123 hello world goodbye xyz yandex google test 123 hello world -yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye -yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye xyz -yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye xyz yandex +meta.ua +meta.ua google +meta.ua google test +meta.ua google test 123 +meta.ua google test 123 +meta.ua google test 123 hello +meta.ua google test 123 hello world +meta.ua google test 123 hello world goodbye +meta.ua google test 123 hello world goodbye xyz +meta.ua google test 123 hello world goodbye xyz meta.ua +meta.ua google test 123 hello world goodbye xyz meta.ua google +meta.ua google test 123 hello world goodbye xyz meta.ua google test +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world goodbye +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world goodbye xyz +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world goodbye xyz meta.ua 0 01 diff --git a/tests/queries/0_stateless/00255_array_concat_string.sql b/tests/queries/0_stateless/00255_array_concat_string.sql index f4f95956a16..a18d349bac8 100644 --- a/tests/queries/0_stateless/00255_array_concat_string.sql +++ b/tests/queries/0_stateless/00255_array_concat_string.sql @@ -6,7 +6,7 @@ SELECT arrayStringConcat(emptyArrayString()); SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number)), '') FROM system.numbers LIMIT 10; SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number)), ',') FROM system.numbers LIMIT 10; -SELECT arrayStringConcat(arrayMap(x -> transform(x, [0, 1, 2, 3, 4, 5, 6, 7, 8], ['yandex', 'google', 'test', '123', '', 'hello', 'world', 'goodbye', 'xyz'], ''), arrayMap(x -> x % 9, range(number))), ' ') FROM system.numbers LIMIT 20; +SELECT arrayStringConcat(arrayMap(x -> transform(x, [0, 1, 2, 3, 4, 5, 6, 7, 8], ['meta.ua', 'google', 'test', '123', '', 'hello', 'world', 'goodbye', 'xyz'], ''), arrayMap(x -> x % 9, range(number))), ' ') FROM system.numbers LIMIT 20; SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number % 4))) FROM system.numbers LIMIT 10; SELECT arrayStringConcat([Null, 'hello', Null, 'world', Null, 'xyz', 'def', Null], ';'); SELECT arrayStringConcat([Null::Nullable(String), Null::Nullable(String)], ';'); diff --git a/tests/queries/0_stateless/00296_url_parameters.reference b/tests/queries/0_stateless/00296_url_parameters.reference index 91a7fe8d488..348651d3f7e 100644 --- a/tests/queries/0_stateless/00296_url_parameters.reference +++ b/tests/queries/0_stateless/00296_url_parameters.reference @@ -1,8 +1,8 @@ ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] b d f d f h b d d h f h b d f d f h b d d h f h -http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f //yandex.ru/?c=d //yandex.ru/?a=b //yandex.ru/?a=b&c=d# //yandex.ru/?a&c=d#e=f //yandex.ru/?a#e=f //yandex.ru/?a&c=d# //yandex.ru/?a=b&c=d#e=f //yandex.ru/?c=d#e //yandex.ru/?a=b#e //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b#e&g=h //yandex.ru/?a=b&c=d#e&g=h //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b&c=d#test?e=f&g=h //yandex.ru/?a=b&c=d#test?g=h //yandex.ru/?a=b&c=d#test?e=f +http://bigmir.net/?c=d http://bigmir.net/?a=b http://bigmir.net/?a=b&c=d# http://bigmir.net/?a&c=d#e=f http://bigmir.net/?a#e=f http://bigmir.net/?a&c=d# http://bigmir.net/?a=b&c=d#e=f http://bigmir.net/?c=d#e http://bigmir.net/?a=b#e http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b#e&g=h http://bigmir.net/?a=b&c=d#e&g=h http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b&c=d#test?e=f&g=h http://bigmir.net/?a=b&c=d#test?g=h http://bigmir.net/?a=b&c=d#test?e=f //bigmir.net/?c=d //bigmir.net/?a=b //bigmir.net/?a=b&c=d# //bigmir.net/?a&c=d#e=f //bigmir.net/?a#e=f //bigmir.net/?a&c=d# //bigmir.net/?a=b&c=d#e=f //bigmir.net/?c=d#e //bigmir.net/?a=b#e //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b#e&g=h //bigmir.net/?a=b&c=d#e&g=h //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b&c=d#test?e=f&g=h //bigmir.net/?a=b&c=d#test?g=h //bigmir.net/?a=b&c=d#test?e=f ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] b d f d f h b d d h f h b d f d f h b d d h f h -http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f //yandex.ru/?c=d //yandex.ru/?a=b //yandex.ru/?a=b&c=d# //yandex.ru/?a&c=d#e=f //yandex.ru/?a#e=f //yandex.ru/?a&c=d# //yandex.ru/?a=b&c=d#e=f //yandex.ru/?c=d#e //yandex.ru/?a=b#e //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b#e&g=h //yandex.ru/?a=b&c=d#e&g=h //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b&c=d#test?e=f&g=h //yandex.ru/?a=b&c=d#test?g=h //yandex.ru/?a=b&c=d#test?e=f +http://bigmir.net/?c=d http://bigmir.net/?a=b http://bigmir.net/?a=b&c=d# http://bigmir.net/?a&c=d#e=f http://bigmir.net/?a#e=f http://bigmir.net/?a&c=d# http://bigmir.net/?a=b&c=d#e=f http://bigmir.net/?c=d#e http://bigmir.net/?a=b#e http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b#e&g=h http://bigmir.net/?a=b&c=d#e&g=h http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b&c=d#test?e=f&g=h http://bigmir.net/?a=b&c=d#test?g=h http://bigmir.net/?a=b&c=d#test?e=f //bigmir.net/?c=d //bigmir.net/?a=b //bigmir.net/?a=b&c=d# //bigmir.net/?a&c=d#e=f //bigmir.net/?a#e=f //bigmir.net/?a&c=d# //bigmir.net/?a=b&c=d#e=f //bigmir.net/?c=d#e //bigmir.net/?a=b#e //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b#e&g=h //bigmir.net/?a=b&c=d#e&g=h //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b&c=d#test?e=f&g=h //bigmir.net/?a=b&c=d#test?g=h //bigmir.net/?a=b&c=d#test?e=f diff --git a/tests/queries/0_stateless/00296_url_parameters.sql b/tests/queries/0_stateless/00296_url_parameters.sql index f6dad306319..8a96e3888fe 100644 --- a/tests/queries/0_stateless/00296_url_parameters.sql +++ b/tests/queries/0_stateless/00296_url_parameters.sql @@ -1,200 +1,200 @@ SELECT - extractURLParameters('http://yandex.ru/?a=b&c=d'), - extractURLParameters('http://yandex.ru/?a=b&c=d#e=f'), - extractURLParameters('http://yandex.ru/?a&c=d#e=f'), - extractURLParameters('http://yandex.ru/?a=b&c=d#e=f&g=h'), - extractURLParameters('http://yandex.ru/?a=b&c=d#e'), - extractURLParameters('http://yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameters('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), - extractURLParameters('//yandex.ru/?a=b&c=d'), - extractURLParameters('//yandex.ru/?a=b&c=d#e=f'), - extractURLParameters('//yandex.ru/?a&c=d#e=f'), - extractURLParameters('//yandex.ru/?a=b&c=d#e=f&g=h'), - extractURLParameters('//yandex.ru/?a=b&c=d#e'), - extractURLParameters('//yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameters('//yandex.ru/?a=b&c=d#test?e=f&g=h'); + extractURLParameters('http://bigmir.net/?a=b&c=d'), + extractURLParameters('http://bigmir.net/?a=b&c=d#e=f'), + extractURLParameters('http://bigmir.net/?a&c=d#e=f'), + extractURLParameters('http://bigmir.net/?a=b&c=d#e=f&g=h'), + extractURLParameters('http://bigmir.net/?a=b&c=d#e'), + extractURLParameters('http://bigmir.net/?a=b&c=d#e&g=h'), + extractURLParameters('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), + extractURLParameters('//bigmir.net/?a=b&c=d'), + extractURLParameters('//bigmir.net/?a=b&c=d#e=f'), + extractURLParameters('//bigmir.net/?a&c=d#e=f'), + extractURLParameters('//bigmir.net/?a=b&c=d#e=f&g=h'), + extractURLParameters('//bigmir.net/?a=b&c=d#e'), + extractURLParameters('//bigmir.net/?a=b&c=d#e&g=h'), + extractURLParameters('//bigmir.net/?a=b&c=d#test?e=f&g=h'); SELECT - extractURLParameterNames('http://yandex.ru/?a=b&c=d'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#e=f'), - extractURLParameterNames('http://yandex.ru/?a&c=d#e=f'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#e=f&g=h'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#e'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), - extractURLParameterNames('//yandex.ru/?a=b&c=d'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#e=f'), - extractURLParameterNames('//yandex.ru/?a&c=d#e=f'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#e=f&g=h'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#e'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#test?e=f&g=h'); + extractURLParameterNames('http://bigmir.net/?a=b&c=d'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#e=f'), + extractURLParameterNames('http://bigmir.net/?a&c=d#e=f'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#e=f&g=h'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#e'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#e&g=h'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), + extractURLParameterNames('//bigmir.net/?a=b&c=d'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#e=f'), + extractURLParameterNames('//bigmir.net/?a&c=d#e=f'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#e=f&g=h'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#e'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#e&g=h'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#test?e=f&g=h'); SELECT - extractURLParameter('http://yandex.ru/?a=b&c=d', 'a'), - extractURLParameter('http://yandex.ru/?a=b&c=d', 'c'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e=f', 'e'), - extractURLParameter('http://yandex.ru/?a&c=d#e=f', 'a'), - extractURLParameter('http://yandex.ru/?a&c=d#e=f', 'c'), - extractURLParameter('http://yandex.ru/?a&c=d#e=f', 'e'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e=f&g=h', 'g'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e', 'a'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e', 'c'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e', 'e'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'c'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'e'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'g'), - extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), - extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'), - extractURLParameter('//yandex.ru/?a=b&c=d', 'a'), - extractURLParameter('//yandex.ru/?a=b&c=d', 'c'), - extractURLParameter('//yandex.ru/?a=b&c=d#e=f', 'e'), - extractURLParameter('//yandex.ru/?a&c=d#e=f', 'a'), - extractURLParameter('//yandex.ru/?a&c=d#e=f', 'c'), - extractURLParameter('//yandex.ru/?a&c=d#e=f', 'e'), - extractURLParameter('//yandex.ru/?a=b&c=d#e=f&g=h', 'g'), - extractURLParameter('//yandex.ru/?a=b&c=d#e', 'a'), - extractURLParameter('//yandex.ru/?a=b&c=d#e', 'c'), - extractURLParameter('//yandex.ru/?a=b&c=d#e', 'e'), - extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'c'), - extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'e'), - extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'g'), - extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), - extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'); + extractURLParameter('http://bigmir.net/?a=b&c=d', 'a'), + extractURLParameter('http://bigmir.net/?a=b&c=d', 'c'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e=f', 'e'), + extractURLParameter('http://bigmir.net/?a&c=d#e=f', 'a'), + extractURLParameter('http://bigmir.net/?a&c=d#e=f', 'c'), + extractURLParameter('http://bigmir.net/?a&c=d#e=f', 'e'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e=f&g=h', 'g'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e', 'a'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e', 'c'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e', 'e'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'c'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'e'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'g'), + extractURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'), + extractURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'), + extractURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'g'), + extractURLParameter('//bigmir.net/?a=b&c=d', 'a'), + extractURLParameter('//bigmir.net/?a=b&c=d', 'c'), + extractURLParameter('//bigmir.net/?a=b&c=d#e=f', 'e'), + extractURLParameter('//bigmir.net/?a&c=d#e=f', 'a'), + extractURLParameter('//bigmir.net/?a&c=d#e=f', 'c'), + extractURLParameter('//bigmir.net/?a&c=d#e=f', 'e'), + extractURLParameter('//bigmir.net/?a=b&c=d#e=f&g=h', 'g'), + extractURLParameter('//bigmir.net/?a=b&c=d#e', 'a'), + extractURLParameter('//bigmir.net/?a=b&c=d#e', 'c'), + extractURLParameter('//bigmir.net/?a=b&c=d#e', 'e'), + extractURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'c'), + extractURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'e'), + extractURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'g'), + extractURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'), + extractURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'), + extractURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'g'); SELECT - cutURLParameter('http://yandex.ru/?a=b&c=d', 'a'), - cutURLParameter('http://yandex.ru/?a=b&c=d', 'c'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e=f', 'e'), - cutURLParameter('http://yandex.ru/?a&c=d#e=f', 'a'), - cutURLParameter('http://yandex.ru/?a&c=d#e=f', 'c'), - cutURLParameter('http://yandex.ru/?a&c=d#e=f', 'e'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e=f&g=h', 'g'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e', 'a'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e', 'c'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e', 'e'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'c'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'e'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'g'), - cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), - cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'), - cutURLParameter('//yandex.ru/?a=b&c=d', 'a'), - cutURLParameter('//yandex.ru/?a=b&c=d', 'c'), - cutURLParameter('//yandex.ru/?a=b&c=d#e=f', 'e'), - cutURLParameter('//yandex.ru/?a&c=d#e=f', 'a'), - cutURLParameter('//yandex.ru/?a&c=d#e=f', 'c'), - cutURLParameter('//yandex.ru/?a&c=d#e=f', 'e'), - cutURLParameter('//yandex.ru/?a=b&c=d#e=f&g=h', 'g'), - cutURLParameter('//yandex.ru/?a=b&c=d#e', 'a'), - cutURLParameter('//yandex.ru/?a=b&c=d#e', 'c'), - cutURLParameter('//yandex.ru/?a=b&c=d#e', 'e'), - cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'c'), - cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'e'), - cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'g'), - cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), - cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'); + cutURLParameter('http://bigmir.net/?a=b&c=d', 'a'), + cutURLParameter('http://bigmir.net/?a=b&c=d', 'c'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e=f', 'e'), + cutURLParameter('http://bigmir.net/?a&c=d#e=f', 'a'), + cutURLParameter('http://bigmir.net/?a&c=d#e=f', 'c'), + cutURLParameter('http://bigmir.net/?a&c=d#e=f', 'e'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e=f&g=h', 'g'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e', 'a'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e', 'c'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e', 'e'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'c'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'e'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'g'), + cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'), + cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'), + cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'g'), + cutURLParameter('//bigmir.net/?a=b&c=d', 'a'), + cutURLParameter('//bigmir.net/?a=b&c=d', 'c'), + cutURLParameter('//bigmir.net/?a=b&c=d#e=f', 'e'), + cutURLParameter('//bigmir.net/?a&c=d#e=f', 'a'), + cutURLParameter('//bigmir.net/?a&c=d#e=f', 'c'), + cutURLParameter('//bigmir.net/?a&c=d#e=f', 'e'), + cutURLParameter('//bigmir.net/?a=b&c=d#e=f&g=h', 'g'), + cutURLParameter('//bigmir.net/?a=b&c=d#e', 'a'), + cutURLParameter('//bigmir.net/?a=b&c=d#e', 'c'), + cutURLParameter('//bigmir.net/?a=b&c=d#e', 'e'), + cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'c'), + cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'e'), + cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'g'), + cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'), + cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'), + cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'g'); SELECT - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e=f')), - extractURLParameters(materialize('http://yandex.ru/?a&c=d#e=f')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e=f')), - extractURLParameters(materialize('//yandex.ru/?a&c=d#e=f')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e=f&g=h')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h')); + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e=f')), + extractURLParameters(materialize('http://bigmir.net/?a&c=d#e=f')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e&g=h')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e=f')), + extractURLParameters(materialize('//bigmir.net/?a&c=d#e=f')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e=f&g=h')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e&g=h')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h')); SELECT - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e=f')), - extractURLParameterNames(materialize('http://yandex.ru/?a&c=d#e=f')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e=f')), - extractURLParameterNames(materialize('//yandex.ru/?a&c=d#e=f')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e=f&g=h')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h')); + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e=f')), + extractURLParameterNames(materialize('http://bigmir.net/?a&c=d#e=f')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e&g=h')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e=f')), + extractURLParameterNames(materialize('//bigmir.net/?a&c=d#e=f')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e=f&g=h')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e&g=h')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h')); SELECT - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'a'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'c'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'a'), - extractURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'c'), - extractURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'a'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'c'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'c'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'g'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'a'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'c'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'a'), - extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'c'), - extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'a'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'c'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'c'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'g'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'); + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'a'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'c'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'a'), + extractURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'c'), + extractURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h'), 'g'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'a'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'c'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'c'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'g'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'a'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'c'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'a'), + extractURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'c'), + extractURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f&g=h'), 'g'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'a'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'c'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'c'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'g'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g'); SELECT - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'a'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'c'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'a'), - cutURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'c'), - cutURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'a'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'c'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'c'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'g'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'a'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'c'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'a'), - cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'c'), - cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'a'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'c'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'c'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'g'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'); + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'a'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'c'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'a'), + cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'c'), + cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h'), 'g'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'a'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'c'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'c'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'g'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'a'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'c'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'a'), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'c'), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f&g=h'), 'g'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'a'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'c'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'c'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'g'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g'); diff --git a/tests/queries/0_stateless/00381_first_significant_subdomain.reference b/tests/queries/0_stateless/00381_first_significant_subdomain.reference index 1f1230a2104..086f3b0f9ce 100644 --- a/tests/queries/0_stateless/00381_first_significant_subdomain.reference +++ b/tests/queries/0_stateless/00381_first_significant_subdomain.reference @@ -1,3 +1,3 @@ canada congo net-domena -yandex yandex yandex яндекс yandex +meta bigmir yahoo гугл meta canada hello hello canada diff --git a/tests/queries/0_stateless/00381_first_significant_subdomain.sql b/tests/queries/0_stateless/00381_first_significant_subdomain.sql index 5badd14f200..5d8c53afc9c 100644 --- a/tests/queries/0_stateless/00381_first_significant_subdomain.sql +++ b/tests/queries/0_stateless/00381_first_significant_subdomain.sql @@ -4,12 +4,12 @@ SELECT firstSignificantSubdomain('http://pochemu.net-domena.ru') AS why; SELECT - firstSignificantSubdomain('ftp://www.yandex.com.tr/news.html'), - firstSignificantSubdomain('https://www.yandex.ua/news.html'), - firstSignificantSubdomain('magnet:yandex.abc'), - firstSignificantSubdomain('ftp://www.yandex.co.uk/news.html'), - firstSignificantSubdomain('https://api.www3.static.dev.ввв.яндекс.рф'), - firstSignificantSubdomain('//www.yandex.com.tr/news.html'); + firstSignificantSubdomain('ftp://www.meta.com.ua/news.html'), + firstSignificantSubdomain('https://www.bigmir.net/news.html'), + firstSignificantSubdomain('magnet:ukr.abc'), + firstSignificantSubdomain('ftp://www.yahoo.co.jp/news.html'), + firstSignificantSubdomain('https://api.www3.static.dev.ввв.гугл.ком'), + firstSignificantSubdomain('//www.meta.com.ua/news.html'); SELECT firstSignificantSubdomain('http://hello.canada.c'), diff --git a/tests/queries/0_stateless/00505_secure.sh b/tests/queries/0_stateless/00505_secure.sh index e69515253ed..b7c12911b90 100755 --- a/tests/queries/0_stateless/00505_secure.sh +++ b/tests/queries/0_stateless/00505_secure.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-random-settings # set -x diff --git a/tests/queries/0_stateless/00580_consistent_hashing_functions.sql b/tests/queries/0_stateless/00580_consistent_hashing_functions.sql index 08e785929c7..f470642d391 100644 --- a/tests/queries/0_stateless/00580_consistent_hashing_functions.sql +++ b/tests/queries/0_stateless/00580_consistent_hashing_functions.sql @@ -1,6 +1,6 @@ -- Tags: no-fasttest SELECT jumpConsistentHash(1, 1), jumpConsistentHash(42, 57), jumpConsistentHash(256, 1024), jumpConsistentHash(3735883980, 1), jumpConsistentHash(3735883980, 666), jumpConsistentHash(16045690984833335023, 255); -SELECT yandexConsistentHash(16045690984833335023, 1), yandexConsistentHash(16045690984833335023, 2), yandexConsistentHash(16045690984833335023, 3), yandexConsistentHash(16045690984833335023, 4), yandexConsistentHash(16045690984833335023, 173), yandexConsistentHash(16045690984833335023, 255); +SELECT kostikConsistentHash(16045690984833335023, 1), kostikConsistentHash(16045690984833335023, 2), kostikConsistentHash(16045690984833335023, 3), kostikConsistentHash(16045690984833335023, 4), kostikConsistentHash(16045690984833335023, 173), kostikConsistentHash(16045690984833335023, 255); SELECT jumpConsistentHash(intHash64(number), 787) FROM system.numbers LIMIT 1000000, 2; -SELECT yandexConsistentHash(16045690984833335023+number-number, 120) FROM system.numbers LIMIT 1000000, 2; +SELECT kostikConsistentHash(16045690984833335023+number-number, 120) FROM system.numbers LIMIT 1000000, 2; diff --git a/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference b/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference index a0265bdb7ed..2b1089c6840 100644 --- a/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference +++ b/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference @@ -1,23 +1,23 @@ {"total":"1","domain":"baidu.com"} {"total":"2","domain":"facebook.com"} {"total":"1","domain":"google.com"} -{"total":"2","domain":"yandex.ru"} +{"total":"2","domain":"meta.ua"} {"total":"1","domain":"baidu.com"} {"total":"2","domain":"facebook.com"} {"total":"1","domain":"google.com"} -{"total":"2","domain":"yandex.ru"} +{"total":"2","domain":"meta.ua"} 1 baidu.com 2 facebook.com 1 google.com -2 yandex.ru +2 meta.ua 1 baidu.com 2 facebook.com 1 google.com -2 yandex.ru +2 meta.ua 1 baidu.com 1 google.com 2 facebook.com -2 yandex.ru +2 meta.ua 1 1 2 @@ -25,4 +25,4 @@ baidu.com google.com facebook.com -yandex.ru +meta.ua diff --git a/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql b/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql index 49975daaa7e..07d39e8d54a 100644 --- a/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql +++ b/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql @@ -4,8 +4,8 @@ DROP TABLE IF EXISTS transactions; CREATE TABLE clicks (domain String) ENGINE = Memory; CREATE TABLE transactions (domain String) ENGINE = Memory; -INSERT INTO clicks VALUES ('facebook.com'), ('yandex.ru'), ('google.com'); -INSERT INTO transactions VALUES ('facebook.com'), ('yandex.ru'), ('baidu.com'); +INSERT INTO clicks VALUES ('facebook.com'), ('meta.ua'), ('google.com'); +INSERT INTO transactions VALUES ('facebook.com'), ('meta.ua'), ('baidu.com'); SELECT diff --git a/tests/queries/0_stateless/00646_url_engine.python b/tests/queries/0_stateless/00646_url_engine.python index 4f47e819328..d1836817867 100644 --- a/tests/queries/0_stateless/00646_url_engine.python +++ b/tests/queries/0_stateless/00646_url_engine.python @@ -120,18 +120,14 @@ class CSVHTTPServer(BaseHTTPRequestHandler): class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 -def start_server(requests_amount): +def start_server(): if IS_IPV6: httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer) else: httpd = HTTPServer(HTTP_SERVER_ADDRESS, CSVHTTPServer) - def real_func(): - for i in range(requests_amount): - httpd.handle_request() - - t = threading.Thread(target=real_func) - return t + t = threading.Thread(target=httpd.serve_forever) + return t, httpd # test section @@ -201,7 +197,7 @@ def main(): 'select double, count(*) from {tbl} group by double': "7.7\t2\n9.9\t10" } - t = start_server(len(select_only_requests) * 2 + (len(insert_requests) + len(select_requests)) * 2) + t, httpd = start_server() t.start() # test table with url engine test_select(table_name="test_table_select", requests=list(select_only_requests.keys()), answers=list(select_only_requests.values()), test_data=test_data) @@ -211,6 +207,8 @@ def main(): test_insert(table_name="test_table_insert", requests_insert=insert_requests, requests_select=list(select_requests.keys()), answers=list(select_requests.values())) #test insert into table function url test_insert(requests_insert=insert_requests, requests_select=list(select_requests.keys()), answers=list(select_requests.values())) + + httpd.shutdown() t.join() print("PASSED") diff --git a/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh b/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh index f49aeb93184..00a7e3c5232 100755 --- a/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh +++ b/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh @@ -25,7 +25,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE string_test_table (val String) ENGINE ${CLICKHOUSE_CLIENT} --query="CREATE TABLE fixed_string_test_table (val FixedString(1)) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE signed_integer_test_table (val Int32) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE unsigned_integer_test_table (val UInt32) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE enum_test_table (val Enum16('hello' = 1, 'world' = 2, 'yandex' = 256, 'clickhouse' = 257)) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE enum_test_table (val Enum16('hello' = 1, 'world' = 2, 'youtube' = 256, 'clickhouse' = 257)) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE date_test_table (val Date) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" ${CLICKHOUSE_CLIENT} --query="SYSTEM STOP MERGES string_test_table;" @@ -40,7 +40,7 @@ ${CLICKHOUSE_CLIENT} --query="INSERT INTO fixed_string_test_table VALUES ('0'), # 131072 -> 17 bit is 1 ${CLICKHOUSE_CLIENT} --query="INSERT INTO signed_integer_test_table VALUES (-2), (0), (2), (2), (131072), (131073), (131073);" ${CLICKHOUSE_CLIENT} --query="INSERT INTO unsigned_integer_test_table VALUES (0), (2), (2), (131072), (131073), (131073);" -${CLICKHOUSE_CLIENT} --query="INSERT INTO enum_test_table VALUES ('hello'), ('world'), ('world'), ('yandex'), ('clickhouse'), ('clickhouse');" +${CLICKHOUSE_CLIENT} --query="INSERT INTO enum_test_table VALUES ('hello'), ('world'), ('world'), ('youtube'), ('clickhouse'), ('clickhouse');" ${CLICKHOUSE_CLIENT} --query="INSERT INTO date_test_table VALUES (1), (2), (2), (256), (257), (257);" CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g') diff --git a/tests/queries/0_stateless/00900_long_parquet_load.reference b/tests/queries/0_stateless/00900_long_parquet_load.reference index 6ecff505b2e..b295a226853 100644 --- a/tests/queries/0_stateless/00900_long_parquet_load.reference +++ b/tests/queries/0_stateless/00900_long_parquet_load.reference @@ -88,6 +88,9 @@ idx10 ['This','is','a','test'] 22 23 24 +=== Try load data from case_insensitive_column_matching.parquet +123 1 +456 2 === Try load data from datapage_v2.snappy.parquet Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Unknown encoding type.: While executing ParquetBlockInputFormat: data for INSERT was parsed from stdin: (in query: INSERT INTO parquet_load FORMAT Parquet). (CANNOT_READ_ALL_DATA) @@ -339,9 +342,6 @@ Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Unkno (NULL) === Try load data from single_nan.parquet \N -=== Try load data from test_setting_input_format_use_lowercase_column_name.parquet -123 1 -456 2 === Try load data from userdata1.parquet 1454486129 1 Amanda Jordan ajordan0@com.com Female 1.197.201.2 6759521864920116 Indonesia 3/8/1971 49756.53 Internal Auditor 1E+02 1454519043 2 Albert Freeman afreeman1@is.gd Male 218.111.175.34 Canada 1/16/1968 150280.17 Accountant IV diff --git a/tests/queries/0_stateless/00909_ngram_distance.reference b/tests/queries/0_stateless/00909_ngram_distance.reference index 290e24faac5..4323fa86151 100644 --- a/tests/queries/0_stateless/00909_ngram_distance.reference +++ b/tests/queries/0_stateless/00909_ngram_distance.reference @@ -113,112 +113,112 @@ 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 0 привет как дела?... Херсон 600 -пап привет как дела - Яндекс.Видео 684 -привет как дела клип - Яндекс.Видео 692 -привет братан как дела - Яндекс.Видео 707 +пап привет как дела - TUT.BY 625 +привет как дела клип - TUT.BY 636 +привет братан как дела - TUT.BY 657 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 привет 1000 0 http://metric.ru/ 765 http://metris.ru/ 765 http://metrika.ru/ 778 http://autometric.ru/ 810 -http://metrica.yandex.com/ 846 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 привет как дела?... Херсон 297 -пап привет как дела - Яндекс.Видео 422 -привет как дела клип - Яндекс.Видео 435 -привет братан как дела - Яндекс.Видео 500 +пап привет как дела - TUT.BY 333 +привет как дела клип - TUT.BY 350 +привет братан как дела - TUT.BY 429 привет 529 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 436 привет как дела?... Херсон 459 -пап привет как дела - Яндекс.Видео 511 +привет как дела клип - TUT.BY 500 +привет братан как дела - TUT.BY 524 привет 529 -привет как дела клип - Яндекс.Видео 565 -привет братан как дела - Яндекс.Видео 583 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 http://metrika.ru/ 524 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 +http://top.bigmir.net/ 920 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 600 -http://metrica.yandex.com/ 655 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 619 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 600 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 -http://metrica.yandex.com/ 724 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 -http://metrica.yandex.com/ 714 +http://top.bigmir.net/ 667 +http://metrika.ru/ 900 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 0 @@ -335,135 +335,135 @@ http://metris.ru/ 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 0 привет как дела?... Херсон 600 -пап привет как дела - Яндекс.Видео 684 -привет как дела клип - Яндекс.Видео 692 -привет братан как дела - Яндекс.Видео 707 +пап привет как дела - TUT.BY 625 +привет как дела клип - TUT.BY 636 +привет братан как дела - TUT.BY 657 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 привет 1000 0 http://metric.ru/ 765 http://metris.ru/ 765 http://metrika.ru/ 778 http://autometric.ru/ 810 -http://metrica.yandex.com/ 846 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 привет как дела?... Херсон 297 -пап привет как дела - Яндекс.Видео 422 -привет как дела клип - Яндекс.Видео 435 -привет братан как дела - Яндекс.Видео 500 +пап привет как дела - TUT.BY 333 +привет как дела клип - TUT.BY 350 +привет братан как дела - TUT.BY 429 привет 529 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 436 привет как дела?... Херсон 459 -пап привет как дела - Яндекс.Видео 511 +привет как дела клип - TUT.BY 500 +привет братан как дела - TUT.BY 524 привет 529 -привет как дела клип - Яндекс.Видео 565 -привет братан как дела - Яндекс.Видео 583 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 http://metrika.ru/ 524 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 +http://top.bigmir.net/ 920 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 524 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 +http://top.bigmir.net/ 920 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 600 -http://metrica.yandex.com/ 655 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 619 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 600 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 -http://metrica.yandex.com/ 724 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 -http://metrica.yandex.com/ 714 +http://top.bigmir.net/ 667 +http://metrika.ru/ 900 + 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metris.ru/ 1000 +пап привет как дела - TUT.BY 1000 +привет 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 +привет как дела?... Херсон 1000 +привет как дела клип - TUT.BY 0 +пап привет как дела - TUT.BY 208 +привет братан как дела - TUT.BY 286 +привет как дела?... Херсон 490 +привет 742 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 -привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 0 -пап привет как дела - Яндекс.Видео 169 -привет братан как дела - Яндекс.Видео 235 -привет как дела?... Херсон 544 -привет 784 - 1000 -http://autometric.ru/ 1000 -http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metrika.ru/ 1000 -http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 0 0 0 @@ -576,82 +576,82 @@ http://metris.ru/ 1000 111 429 1000 +пап привет как дела - TUT.BY 242 привет как дела?... Херсон 254 -пап привет как дела - Яндекс.Видео 398 -привет как дела клип - Яндекс.Видео 412 -привет братан как дела - Яндекс.Видео 461 +привет как дела клип - TUT.BY 265 +привет братан как дела - TUT.BY 333 привет 471 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 303 привет как дела?... Херсон 343 -пап привет как дела - Яндекс.Видео 446 +привет как дела клип - TUT.BY 353 +привет братан как дела - TUT.BY 389 привет 471 -привет как дела клип - Яндекс.Видео 482 -привет братан как дела - Яндекс.Видео 506 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 http://metrika.ru/ 579 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 667 -http://metrica.yandex.com/ 704 http://autometric.ru/ 727 http://metris.ru/ 778 http://metrika.ru/ 789 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 684 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 667 http://autometric.ru/ 727 -http://metrica.yandex.com/ 778 http://metris.ru/ 778 http://metrika.ru/ 789 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 -http://metrica.yandex.com/ 769 +http://top.bigmir.net/ 727 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 0 @@ -765,91 +765,91 @@ http://metris.ru/ 1000 111 600 1000 +пап привет как дела - TUT.BY 909 привет как дела?... Херсон 910 -пап привет как дела - Яндекс.Видео 928 -привет как дела клип - Яндекс.Видео 929 -привет братан как дела - Яндекс.Видео 955 +привет как дела клип - TUT.BY 912 +привет братан как дела - TUT.BY 944 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 привет 1000 +пап привет как дела - TUT.BY 667 привет как дела?... Херсон 672 -пап привет как дела - Яндекс.Видео 735 -привет как дела клип - Яндекс.Видео 741 -привет братан как дела - Яндекс.Видео 753 +привет как дела клип - TUT.BY 676 +привет братан как дела - TUT.BY 694 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 привет 1000 http://metrika.ru/ 579 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 579 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 667 -http://metrica.yandex.com/ 704 http://autometric.ru/ 727 http://metris.ru/ 778 http://metrika.ru/ 789 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 684 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 667 http://autometric.ru/ 727 -http://metrica.yandex.com/ 778 http://metris.ru/ 778 http://metrika.ru/ 789 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 -http://metrica.yandex.com/ 769 +http://top.bigmir.net/ 727 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 diff --git a/tests/queries/0_stateless/00909_ngram_distance.sql b/tests/queries/0_stateless/00909_ngram_distance.sql index b2f403c415a..28aff50d22e 100644 --- a/tests/queries/0_stateless/00909_ngram_distance.sql +++ b/tests/queries/0_stateless/00909_ngram_distance.sql @@ -32,7 +32,7 @@ select round(1000 * ngramDistanceUTF8('абвгдеёжз', 'ёёёёёёёё')) drop table if exists test_distance; create table test_distance (Title String) engine = Memory; -insert into test_distance values ('привет как дела?... Херсон'), ('привет как дела клип - Яндекс.Видео'), ('привет'), ('пап привет как дела - Яндекс.Видео'), ('привет братан как дела - Яндекс.Видео'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://metrica.yandex.com/'), ('http://metris.ru/'), ('http://metrika.ru/'), (''); +insert into test_distance values ('привет как дела?... Херсон'), ('привет как дела клип - TUT.BY'), ('привет'), ('пап привет как дела - TUT.BY'), ('привет братан как дела - TUT.BY'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://top.bigmir.net/'), ('http://metris.ru/'), ('http://metrika.ru/'), (''); SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, Title) as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, extract(Title, 'как дела')) as distance, Title; @@ -44,7 +44,7 @@ SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUT SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metriks') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'yandex') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'bigmir') as distance, Title; select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5; @@ -91,8 +91,8 @@ SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCa SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'mEtrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'metriKS') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'YanDEX') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - яндеКс.видео') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'BigMIR') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - TuT.by') as distance, Title; select round(1000 * ngramDistance(materialize(''), '')) from system.numbers limit 5; @@ -134,7 +134,7 @@ SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(T SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metriks') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'yandex') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'bigmir') as distance, Title; select round(1000 * ngramDistanceCaseInsensitive(materialize(''), '')) from system.numbers limit 5; select round(1000 * ngramDistanceCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5; @@ -175,6 +175,6 @@ SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCa SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'mEtrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'metriKS') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'YanDEX') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'BigMIR') as distance, Title; drop table if exists test_distance; diff --git a/tests/queries/0_stateless/00926_multimatch.reference b/tests/queries/0_stateless/00926_multimatch.reference index 4a2320de57b..7ff32ecd73b 100644 --- a/tests/queries/0_stateless/00926_multimatch.reference +++ b/tests/queries/0_stateless/00926_multimatch.reference @@ -600,16 +600,16 @@ 1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 1 1 diff --git a/tests/queries/0_stateless/00926_multimatch.sql b/tests/queries/0_stateless/00926_multimatch.sql index 90cc289b5a5..b9843a1b4ba 100644 --- a/tests/queries/0_stateless/00926_multimatch.sql +++ b/tests/queries/0_stateless/00926_multimatch.sql @@ -75,18 +75,18 @@ select 1 = multiMatchAny(materialize('abcdef'), ['a......', 'a.....']) from syst select 0 = multiMatchAny(materialize('aaaa'), ['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}']) from system.numbers limit 10; select 1 = multiMatchAny(materialize('abc'), ['a\0d']) from system.numbers limit 10; -select 1 = multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['google', 'yandex1']) from system.numbers limit 10; -select 2 = multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['google1', 'yandex']) from system.numbers limit 10; -select 0 != multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['.*goo.*', '.*yan.*']) from system.numbers limit 10; +select 1 = multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), ['google', 'unian1']) from system.numbers limit 10; +select 2 = multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), ['google1', 'unian']) from system.numbers limit 10; +select 0 != multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), ['.*goo.*', '.*yan.*']) from system.numbers limit 10; select 5 = multiMatchAnyIndex(materialize('vladizlvav dabe don\'t heart me no more'), ['what', 'is', 'love', 'baby', 'no mo??', 'dont', 'h.rt me']) from system.numbers limit 10; SELECT multiMatchAny(materialize('/odezhda-dlya-bega/'), ['/odezhda-dlya-bega/', 'kurtki-i-vetrovki-dlya-bega', 'futbolki-i-mayki-dlya-bega']); SELECT 1 = multiMatchAny('фабрикант', ['f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]']); -- All indices tests -SELECT [1, 2] = arraySort(multiMatchAllIndices(materialize('gogleuedeyandexgoogle'), ['.*goo.*', '.*yan.*'])) from system.numbers limit 5; -SELECT [1, 3] = arraySort(multiMatchAllIndices(materialize('gogleuedeyandexgoogle'), ['.*goo.*', 'neverexisted', '.*yan.*'])) from system.numbers limit 5; -SELECT [] = multiMatchAllIndices(materialize('gogleuedeyandexgoogle'), ['neverexisted', 'anotherone', 'andanotherone']) from system.numbers limit 5; +SELECT [1, 2] = arraySort(multiMatchAllIndices(materialize('gogleuedeuniangoogle'), ['.*goo.*', '.*yan.*'])) from system.numbers limit 5; +SELECT [1, 3] = arraySort(multiMatchAllIndices(materialize('gogleuedeuniangoogle'), ['.*goo.*', 'neverexisted', '.*yan.*'])) from system.numbers limit 5; +SELECT [] = multiMatchAllIndices(materialize('gogleuedeuniangoogle'), ['neverexisted', 'anotherone', 'andanotherone']) from system.numbers limit 5; SELECT [1, 2, 3, 11] = arraySort(multiMatchAllIndices('фабрикант', ['', 'рикан', 'а', 'f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]'])); SELECT [1] = multiMatchAllIndices(materialize('/odezhda-dlya-bega/'), ['/odezhda-dlya-bega/', 'kurtki-i-vetrovki-dlya-bega', 'futbolki-i-mayki-dlya-bega']); SELECT [] = multiMatchAllIndices(materialize('aaaa'), ['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}']); diff --git a/tests/queries/0_stateless/00951_ngram_search.reference b/tests/queries/0_stateless/00951_ngram_search.reference index a98f63a198a..ece03fc649e 100644 --- a/tests/queries/0_stateless/00951_ngram_search.reference +++ b/tests/queries/0_stateless/00951_ngram_search.reference @@ -113,113 +113,113 @@ 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 308 -привет братан как дела - Яндекс.Видео 923 -пап привет как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 923 +пап привет как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 308 -привет братан как дела - Яндекс.Видео 769 -привет как дела клип - Яндекс.Видео 769 +привет братан как дела - TUT.BY 769 +привет как дела клип - TUT.BY 769 привет как дела?... Херсон 769 -пап привет как дела - Яндекс.Видео 846 +пап привет как дела - TUT.BY 846 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 +http://top.bigmir.net/ 200 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 600 http://metris.ru/ 600 http://autometric.ru/ 800 http://metric.ru/ 800 -http://metrica.yandex.com/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 800 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 600 http://metris.ru/ 600 http://autometric.ru/ 800 http://metric.ru/ 800 -http://metrica.yandex.com/ 800 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 -http://metrica.yandex.com/ 1000 +http://metrika.ru/ 250 +http://top.bigmir.net/ 1000 1000 1000 1000 @@ -335,135 +335,135 @@ http://metrica.yandex.com/ 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 308 -привет братан как дела - Яндекс.Видео 923 -пап привет как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 923 +пап привет как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 308 -привет братан как дела - Яндекс.Видео 769 -привет как дела клип - Яндекс.Видео 769 +привет братан как дела - TUT.BY 769 +привет как дела клип - TUT.BY 769 привет как дела?... Херсон 769 -пап привет как дела - Яндекс.Видео 846 +пап привет как дела - TUT.BY 846 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 +http://top.bigmir.net/ 200 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 +http://top.bigmir.net/ 200 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 600 http://metris.ru/ 600 http://autometric.ru/ 800 http://metric.ru/ 800 -http://metrica.yandex.com/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 800 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 600 http://metris.ru/ 600 http://autometric.ru/ 800 http://metric.ru/ 800 -http://metrica.yandex.com/ 800 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 -http://metrica.yandex.com/ 1000 +http://metrika.ru/ 250 +http://top.bigmir.net/ 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 -http://metrika.ru/ 0 http://metris.ru/ 0 -привет 121 -привет как дела?... Херсон 394 -привет братан как дела - Яндекс.Видео 788 -пап привет как дела - Яндекс.Видео 818 -привет как дела клип - Яндекс.Видео 1000 +http://metrika.ru/ 32 +привет 129 +http://top.bigmir.net/ 258 +привет как дела?... Херсон 419 +привет братан как дела - TUT.BY 452 +пап привет как дела - TUT.BY 484 +привет как дела клип - TUT.BY 677 1000 1000 1000 @@ -579,80 +579,80 @@ http://metris.ru/ 0 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 360 -привет братан как дела - Яндекс.Видео 960 -пап привет как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 960 +пап привет как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 360 -привет братан как дела - Яндекс.Видео 880 -привет как дела клип - Яндекс.Видео 880 +привет братан как дела - TUT.BY 880 +привет как дела клип - TUT.BY 880 привет как дела?... Херсон 880 -пап привет как дела - Яндекс.Видео 920 +пап привет как дела - TUT.BY 920 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 500 http://metris.ru/ 500 http://autometric.ru/ 750 http://metric.ru/ 750 -http://metrica.yandex.com/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 750 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 500 http://metris.ru/ 500 http://autometric.ru/ 750 http://metric.ru/ 750 -http://metrica.yandex.com/ 750 0 http://autometric.ru/ 0 http://metric.ru/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 -http://metrica.yandex.com/ 1000 +http://top.bigmir.net/ 1000 1000 1000 1000 @@ -768,88 +768,88 @@ http://metrica.yandex.com/ 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 0 -привет братан как дела - Яндекс.Видео 80 -пап привет как дела - Яндекс.Видео 120 -привет как дела клип - Яндекс.Видео 120 +привет братан как дела - TUT.BY 80 +пап привет как дела - TUT.BY 120 +привет как дела клип - TUT.BY 120 привет как дела?... Херсон 120 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 0 -пап привет как дела - Яндекс.Видео 440 -привет братан как дела - Яндекс.Видео 440 -привет как дела клип - Яндекс.Видео 440 +пап привет как дела - TUT.BY 440 +привет братан как дела - TUT.BY 440 +привет как дела клип - TUT.BY 440 привет как дела?... Херсон 440 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 500 http://metris.ru/ 500 http://autometric.ru/ 750 http://metric.ru/ 750 -http://metrica.yandex.com/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 750 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 500 http://metris.ru/ 500 http://autometric.ru/ 750 http://metric.ru/ 750 -http://metrica.yandex.com/ 750 0 http://autometric.ru/ 0 http://metric.ru/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 -http://metrica.yandex.com/ 1000 +http://top.bigmir.net/ 1000 diff --git a/tests/queries/0_stateless/00951_ngram_search.sql b/tests/queries/0_stateless/00951_ngram_search.sql index f1a37605ebc..77525d86013 100644 --- a/tests/queries/0_stateless/00951_ngram_search.sql +++ b/tests/queries/0_stateless/00951_ngram_search.sql @@ -32,7 +32,7 @@ select round(1000 * ngramSearchUTF8('абвгдеёжз', 'ёёёёёёёё')); drop table if exists test_entry_distance; create table test_entry_distance (Title String) engine = Memory; -insert into test_entry_distance values ('привет как дела?... Херсон'), ('привет как дела клип - Яндекс.Видео'), ('привет'), ('пап привет как дела - Яндекс.Видео'), ('привет братан как дела - Яндекс.Видео'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://metrica.yandex.com/'), ('http://metris.ru/'), ('http://metrika.ru/'), (''); +insert into test_entry_distance values ('привет как дела?... Херсон'), ('привет как дела клип - TUT.BY'), ('привет'), ('пап привет как дела - TUT.BY'), ('привет братан как дела - TUT.BY'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://top.bigmir.net/'), ('http://metris.ru/'), ('http://metrika.ru/'), (''); SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, Title) as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, extract(Title, 'как дела')) as distance, Title; @@ -44,7 +44,7 @@ SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSear SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metriks') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'yandex') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'bigmir') as distance, Title; select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5; @@ -91,8 +91,8 @@ SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSear SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'mEtrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metriKS') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'YanDEX') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - яндеКс.видео') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'BigMIR') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - bigMir.Net') as distance, Title; select round(1000 * ngramSearch(materialize(''), '')) from system.numbers limit 5; @@ -134,7 +134,7 @@ SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSear SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metriks') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'yandex') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'bigmir') as distance, Title; select round(1000 * ngramSearchCaseInsensitive(materialize(''), '')) from system.numbers limit 5; select round(1000 * ngramSearchCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5; @@ -175,6 +175,6 @@ SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSear SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'mEtrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metriKS') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'YanDEX') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'BigMIR') as distance, Title; drop table if exists test_entry_distance; diff --git a/tests/queries/0_stateless/00979_yandex_consistent_hash_fpe.sql b/tests/queries/0_stateless/00979_yandex_consistent_hash_fpe.sql index 79fabeae7ef..3da52f2cb96 100644 --- a/tests/queries/0_stateless/00979_yandex_consistent_hash_fpe.sql +++ b/tests/queries/0_stateless/00979_yandex_consistent_hash_fpe.sql @@ -1 +1 @@ -SELECT yandexConsistentHash(-1, 40000); -- { serverError 36 } +SELECT kostikConsistentHash(-1, 40000); -- { serverError 36 } diff --git a/tests/queries/0_stateless/00998_constraints_all_tables.reference b/tests/queries/0_stateless/00998_constraints_all_tables.reference index 0ec8b004ae4..90a2380df1e 100644 --- a/tests/queries/0_stateless/00998_constraints_all_tables.reference +++ b/tests/queries/0_stateless/00998_constraints_all_tables.reference @@ -10,5 +10,5 @@ 0 0 3 -CREATE TABLE default.constrained\n(\n `URL` String,\n CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = \'yandex.ru\',\n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log -CREATE TABLE default.constrained2\n(\n `URL` String,\n CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = \'yandex.ru\',\n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log +CREATE TABLE default.constrained\n(\n `URL` String,\n CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = \'censor.net\',\n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log +CREATE TABLE default.constrained2\n(\n `URL` String,\n CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = \'censor.net\',\n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log diff --git a/tests/queries/0_stateless/00998_constraints_all_tables.sql b/tests/queries/0_stateless/00998_constraints_all_tables.sql index e47b7eaf83c..bb0d6933a01 100644 --- a/tests/queries/0_stateless/00998_constraints_all_tables.sql +++ b/tests/queries/0_stateless/00998_constraints_all_tables.sql @@ -1,53 +1,53 @@ DROP TABLE IF EXISTS constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Null; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Null; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); DROP TABLE constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Memory; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Memory; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); SELECT count() FROM constrained; DROP TABLE constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = StripeLog; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = StripeLog; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); SELECT count() FROM constrained; DROP TABLE constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = TinyLog; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = TinyLog; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); SELECT count() FROM constrained; DROP TABLE constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); SELECT count() FROM constrained; DROP TABLE constrained; DROP TABLE IF EXISTS constrained2; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log; +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log; CREATE TABLE constrained2 AS constrained; SHOW CREATE TABLE constrained; SHOW CREATE TABLE constrained2; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } -INSERT INTO constrained2 VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +INSERT INTO constrained2 VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } DROP TABLE constrained; DROP TABLE constrained2; diff --git a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql index 6d1c7fd5ef6..86c84427297 100644 --- a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql +++ b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql @@ -5,7 +5,7 @@ set allow_experimental_parallel_reading_from_replicas=0; drop table if exists test_max_parallel_replicas_lr; -- If you wonder why the table is named with "_lr" suffix in this test. --- No reason. Actually it is the name of the table in Yandex.Market and they provided this test case for us. +-- No reason. Actually it is the name of the table in our customer and they provided this test case for us. CREATE TABLE test_max_parallel_replicas_lr (timestamp UInt64) ENGINE = MergeTree ORDER BY (intHash32(timestamp)) SAMPLE BY intHash32(timestamp); INSERT INTO test_max_parallel_replicas_lr select number as timestamp from system.numbers limit 100; diff --git a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh index fae263b076f..c2a35a3ef63 100755 --- a/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh +++ b/tests/queries/0_stateless/01037_polygon_dicts_correctness_all.sh @@ -34,7 +34,7 @@ CREATE TABLE test_01037.polygons_array ENGINE = Memory; " -$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.polygons_array FORMAT JSONEachRow" --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" +$CLICKHOUSE_CLIENT --query="INSERT INTO test_01037.polygons_array FORMAT JSONEachRow" --min_chunk_bytes_for_parallel_parsing=10485760 --max_insert_block_size=100000 < "${CURDIR}/01037_polygon_data" rm "${CURDIR}"/01037_polygon_data diff --git a/tests/queries/0_stateless/01059_storage_file_compression.sh b/tests/queries/0_stateless/01059_storage_file_compression.sh index fbee5070d8d..9d928986638 100755 --- a/tests/queries/0_stateless/01059_storage_file_compression.sh +++ b/tests/queries/0_stateless/01059_storage_file_compression.sh @@ -16,9 +16,10 @@ do ${CLICKHOUSE_CLIENT} --query "DROP TABLE file" done -${CLICKHOUSE_CLIENT} --query "SELECT count(), max(x) FROM file('${CLICKHOUSE_DATABASE}/{gz,br,xz,zst,lz4,bz2}.tsv.{gz,br,xz,zst,lz4,bz2}', TSV, 'x UInt64')" +${CLICKHOUSE_CLIENT} --max_read_buffer_size=1048576 --query "SELECT count(), max(x) FROM file('${CLICKHOUSE_DATABASE}/{gz,br,xz,zst,lz4,bz2}.tsv.{gz,br,xz,zst,lz4,bz2}', TSV, 'x UInt64')" for m in gz br xz zst lz4 bz2 do - ${CLICKHOUSE_CLIENT} --query "SELECT count() < 4000000, max(x) FROM file('${CLICKHOUSE_DATABASE}/${m}.tsv.${m}', RowBinary, 'x UInt8', 'none')" + ${CLICKHOUSE_CLIENT} --max_read_buffer_size=1048576 --query "SELECT count() < 4000000, max(x) FROM file('${CLICKHOUSE_DATABASE}/${m}.tsv.${m}', RowBinary, 'x UInt8', 'none')" done + diff --git a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql index dd39277ee31..b162fdb21fd 100644 --- a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql +++ b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql @@ -1,5 +1,7 @@ -- Tags: no-parallel, no-fasttest +SET prefer_localhost_replica=1; + DROP TABLE IF EXISTS file; DROP TABLE IF EXISTS url; DROP TABLE IF EXISTS view; diff --git a/tests/queries/0_stateless/01091_num_threads.sql b/tests/queries/0_stateless/01091_num_threads.sql index e32d663880f..faeceb0e6d6 100644 --- a/tests/queries/0_stateless/01091_num_threads.sql +++ b/tests/queries/0_stateless/01091_num_threads.sql @@ -1,5 +1,6 @@ set log_queries=1; set log_query_threads=1; +set max_threads=0; WITH 01091 AS id SELECT 1; SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql index de93166d891..aa924218360 100644 --- a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql +++ b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql @@ -3,6 +3,7 @@ -- set insert_distributed_sync = 1; -- see https://github.com/ClickHouse/ClickHouse/issues/18971 SET allow_experimental_parallel_reading_from_replicas = 0; -- see https://github.com/ClickHouse/ClickHouse/issues/34525 +SET prefer_localhost_replica = 1; DROP TABLE IF EXISTS local_01099_a; DROP TABLE IF EXISTS local_01099_b; diff --git a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh index 300d32f5a0c..60650cb9cc3 100755 --- a/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh +++ b/tests/queries/0_stateless/01107_atomic_db_detach_attach.sh @@ -13,7 +13,7 @@ $CLICKHOUSE_CLIENT -q "INSERT INTO test_01107.mt SELECT number + sleepEachRow(3) sleep 1 $CLICKHOUSE_CLIENT -q "DETACH TABLE test_01107.mt" --database_atomic_wait_for_drop_and_detach_synchronously=0 -$CLICKHOUSE_CLIENT -q "ATTACH TABLE test_01107.mt" 2>&1 | grep -F "Code: 57" > /dev/null && echo "OK" +$CLICKHOUSE_CLIENT -q "ATTACH TABLE test_01107.mt" --database_atomic_wait_for_drop_and_detach_synchronously=0 2>&1 | grep -F "Code: 57" > /dev/null && echo "OK" $CLICKHOUSE_CLIENT -q "DETACH DATABASE test_01107" --database_atomic_wait_for_drop_and_detach_synchronously=0 2>&1 | grep -F "Code: 219" > /dev/null && echo "OK" wait diff --git a/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql b/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql index 4aacecc9734..3822f22d353 100644 --- a/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql +++ b/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql @@ -1,4 +1,4 @@ --- Tags: distributed, no-replicated-database, no-parallel, no-fasttest +-- Tags: distributed, no-replicated-database, no-parallel, no-fasttest, no-random-settings SET allow_experimental_live_view = 1; diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index c9c01455e31..1807da6948a 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan +# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings # Tag no-fasttest: max_memory_usage_for_user can interfere another queries running concurrently # Regression for MemoryTracker that had been incorrectly accounted diff --git a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh index 9a4df37d206..34fa822b6ea 100755 --- a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh +++ b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel +# Tags: long, zookeeper, no-parallel, no-backward-compatibility-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference index 994e3f24aaf..7cf545176e3 100644 --- a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference +++ b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference @@ -2,13 +2,13 @@ other other google other -yandex +censor.net other yahoo other other other -SELECT transform(number, [2, 4, 6], _CAST([\'google\', \'yandex\', \'yahoo\'], \'Array(Enum8(\\\'google\\\' = 1, \\\'other\\\' = 2, \\\'yahoo\\\' = 3, \\\'yandex\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'google\\\' = 1, \\\'other\\\' = 2, \\\'yahoo\\\' = 3, \\\'yandex\\\' = 4)\')) +SELECT transform(number, [2, 4, 6], _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) FROM system.numbers LIMIT 10 google @@ -17,24 +17,24 @@ google google google google -yandex -yandex -yandex -yandex -SELECT if(number > 5, \'yandex\', \'google\') +censor.net +censor.net +censor.net +censor.net +SELECT if(number > 5, \'censor.net\', \'google\') FROM system.numbers LIMIT 10 other other google other -yandex +censor.net other yahoo other other other -SELECT transform(number, [2, 4, 6], [\'google\', \'yandex\', \'yahoo\'], \'other\') +SELECT transform(number, [2, 4, 6], [\'google\', \'censor.net\', \'yahoo\'], \'other\') FROM system.numbers LIMIT 10 google @@ -43,10 +43,10 @@ google google google google -yandex -yandex -yandex -yandex -SELECT if(number > 5, \'yandex\', \'google\') +censor.net +censor.net +censor.net +censor.net +SELECT if(number > 5, \'censor.net\', \'google\') FROM system.numbers LIMIT 10 diff --git a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.sql b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.sql index dcb082c650a..ee2f48a53da 100644 --- a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.sql +++ b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.sql @@ -1,13 +1,13 @@ set optimize_if_transform_strings_to_enum = 1; -SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -SELECT number > 5 ? 'yandex' : 'google' FROM system.numbers LIMIT 10; -EXPLAIN SYNTAX SELECT number > 5 ? 'yandex' : 'google' FROM system.numbers LIMIT 10; +SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; set optimize_if_transform_strings_to_enum = 0; -SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -SELECT number > 5 ? 'yandex' : 'google' FROM system.numbers LIMIT 10; -EXPLAIN SYNTAX SELECT number > 5 ? 'yandex' : 'google' FROM system.numbers LIMIT 10; +SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; diff --git a/tests/queries/0_stateless/01358_constexpr_constraint.sql b/tests/queries/0_stateless/01358_constexpr_constraint.sql index 799f6f32259..4560ac47c42 100644 --- a/tests/queries/0_stateless/01358_constexpr_constraint.sql +++ b/tests/queries/0_stateless/01358_constexpr_constraint.sql @@ -1,7 +1,7 @@ CREATE TEMPORARY TABLE constrained ( `URL` String, - CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = domainWithoutWWW(URL), + CONSTRAINT identity CHECK domainWithoutWWW(URL) = domainWithoutWWW(URL), CONSTRAINT is_utf8 CHECK isValidUTF8(URL) ); diff --git a/tests/queries/0_stateless/01407_lambda_arrayJoin.sql b/tests/queries/0_stateless/01407_lambda_arrayJoin.sql index 4f34bb59527..363b1d92dbb 100644 --- a/tests/queries/0_stateless/01407_lambda_arrayJoin.sql +++ b/tests/queries/0_stateless/01407_lambda_arrayJoin.sql @@ -1,6 +1,5 @@ SELECT arrayFilter((a) -> ((a, arrayJoin([])) IN (Null, [Null])), []); SELECT arrayFilter((a) -> ((a, arrayJoin([[]])) IN (Null, [Null])), []); --- simplified from the https://clickhouse-test-reports.s3.yandex.net/10373/6c4748a63e7acde2cc3283d96ffec590aae1e724/fuzzer/fuzzer.log#fail1 SELECT * FROM system.one ARRAY JOIN arrayFilter((a) -> ((a, arrayJoin([])) IN (NULL)), []) AS arr_x; -- { serverError 43; } SELECT * FROM numbers(1) LEFT ARRAY JOIN arrayFilter((x_0, x_1) -> (arrayJoin([]) IN (NULL)), [], []) AS arr_x; diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.reference b/tests/queries/0_stateless/01455_opentelemetry_distributed.reference index 119642df395..e70506599ec 100644 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.reference +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.reference @@ -11,9 +11,11 @@ ===native=== {"query":"select * from url('http:\/\/127.0.0.2:8123\/?query=select%201%20format%20Null', CSV, 'a int')","status":"QueryFinish","tracestate":"another custom state","sorted_by_start_time":1} {"query":"select 1 format Null\n","status":"QueryFinish","tracestate":"another custom state","sorted_by_start_time":1} +{"query":"select 1 format Null\n","status":"QueryFinish","tracestate":"another custom state","sorted_by_start_time":1} +{"query":"select 1 format Null\n","query_status":"QueryFinish","tracestate":"another custom state","sorted_by_finish_time":1} {"query":"select 1 format Null\n","query_status":"QueryFinish","tracestate":"another custom state","sorted_by_finish_time":1} {"query":"select * from url('http:\/\/127.0.0.2:8123\/?query=select%201%20format%20Null', CSV, 'a int')","query_status":"QueryFinish","tracestate":"another custom state","sorted_by_finish_time":1} -{"total spans":"2","unique spans":"2","unique non-zero parent spans":"2"} +{"total spans":"3","unique spans":"3","unique non-zero parent spans":"2"} {"initial query spans with proper parent":"1"} {"unique non-empty tracestate values":"1"} ===sampled=== diff --git a/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql b/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql index ee144264193..1e1d87a5ad5 100644 --- a/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql +++ b/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql @@ -8,8 +8,8 @@ SET optimize_if_transform_strings_to_enum = 1; SELECT any(number + 1) FROM numbers(1); SELECT uniq(bitNot(number)) FROM numbers(1); SELECT sum(number + 1) FROM numbers(1); -SELECT transform(number, [1, 2], ['google', 'yandex'], 'other') FROM numbers(1); -SELECT number > 0 ? 'yandex' : 'google' FROM numbers(1); +SELECT transform(number, [1, 2], ['google', 'censor.net'], 'other') FROM numbers(1); +SELECT number > 0 ? 'censor.net' : 'google' FROM numbers(1); DROP TABLE IF EXISTS local_table; @@ -23,8 +23,8 @@ INSERT INTO local_table SELECT number FROM numbers(1); SELECT any(number + 1) FROM dist; SELECT uniq(bitNot(number)) FROM dist; SELECT sum(number + 1) FROM dist; -SELECT transform(number, [1, 2], ['google', 'yandex'], 'other') FROM dist; -SELECT number > 0 ? 'yandex' : 'google' FROM dist; +SELECT transform(number, [1, 2], ['google', 'censor.net'], 'other') FROM dist; +SELECT number > 0 ? 'censor.net' : 'google' FROM dist; DROP TABLE local_table; DROP TABLE dist; diff --git a/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql b/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql index f9c227942ac..0595e67f2b0 100644 --- a/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql +++ b/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + DROP TABLE IF EXISTS buf_dest; DROP TABLE IF EXISTS buf; diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql index ca9f296b6bf..90975b0d9c4 100644 --- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql @@ -1,6 +1,7 @@ DROP TABLE IF EXISTS select_final; SET do_not_merge_across_partitions_select_final = 1; +SET max_threads = 0; CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t); diff --git a/tests/queries/0_stateless/01529_bad_memory_tracking.sh b/tests/queries/0_stateless/01529_bad_memory_tracking.sh index f5d096cc799..d12623d04b9 100755 --- a/tests/queries/0_stateless/01529_bad_memory_tracking.sh +++ b/tests/queries/0_stateless/01529_bad_memory_tracking.sh @@ -8,6 +8,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh for _ in {1..10}; do - ${CLICKHOUSE_CLIENT} --max_memory_usage '10G' --query "SELECT i FROM generateRandom('i Array(Int8)', 1, 1, 1048577) LIMIT 65536" |& grep -v -e 'Received exception from server' -e 'Code: 241' -e '(query: ' + ${CLICKHOUSE_CLIENT} --max_block_size=65505 --max_memory_usage '10G' --query "SELECT i FROM generateRandom('i Array(Int8)', 1, 1, 1048577) LIMIT 65536" |& grep -v -e 'Received exception from server' -e 'Code: 241' -e '(query: ' done exit 0 diff --git a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference index d8c9b88d8e8..46aaa6e07d6 100644 --- a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference +++ b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference @@ -9,7 +9,7 @@ Expression (Projection) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromMergeTree + ReadFromMergeTree (default.test_order_by) SELECT timestamp, key @@ -21,7 +21,7 @@ Expression (Projection) Sorting Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromMergeTree + ReadFromMergeTree (default.test_order_by) SELECT timestamp, key @@ -35,7 +35,7 @@ Expression (Projection) Sorting Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromMergeTree + ReadFromMergeTree (default.test_order_by) SELECT timestamp, key diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference index 07d361cfa46..11cc146dd62 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference @@ -26,35 +26,35 @@ Expression (Projection) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromMergeTree + ReadFromMergeTree (default.test_table) Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) Sorting Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromMergeTree + ReadFromMergeTree (default.test_table) Expression (Projection) Limit (preliminary LIMIT (without OFFSET)) Sorting Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromMergeTree + ReadFromMergeTree (default.test_table) optimize_aggregation_in_order Expression ((Projection + Before ORDER BY)) Aggregating Expression (Before GROUP BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromMergeTree + ReadFromMergeTree (default.test_table) Expression ((Projection + Before ORDER BY)) Aggregating Expression (Before GROUP BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromMergeTree + ReadFromMergeTree (default.test_table) Expression ((Projection + Before ORDER BY)) Aggregating Expression (Before GROUP BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromMergeTree + ReadFromMergeTree (default.test_table) second-index 1 1 diff --git a/tests/queries/0_stateless/01598_memory_limit_zeros.sql b/tests/queries/0_stateless/01598_memory_limit_zeros.sql index 5b321687e43..cc2a75e023e 100644 --- a/tests/queries/0_stateless/01598_memory_limit_zeros.sql +++ b/tests/queries/0_stateless/01598_memory_limit_zeros.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-parallel, no-fasttest, no-random-settings SET max_memory_usage = 1, max_untracked_memory = 1000000, max_threads=40; select 'test', count(*) from zeros_mt(1000000) where not ignore(zero); -- { serverError 241 } diff --git a/tests/queries/0_stateless/01622_constraints_simple_optimization.sql b/tests/queries/0_stateless/01622_constraints_simple_optimization.sql index e1922975a2a..7ec9e1a3158 100644 --- a/tests/queries/0_stateless/01622_constraints_simple_optimization.sql +++ b/tests/queries/0_stateless/01622_constraints_simple_optimization.sql @@ -8,23 +8,23 @@ SET optimize_move_to_prewhere = 1; SET optimize_substitute_columns = 1; SET optimize_append_index = 1; -CREATE TABLE constraint_test_assumption (URL String, a Int32, CONSTRAINT c1 ASSUME domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT c2 ASSUME URL > 'zzz' AND startsWith(URL, 'test') = True) ENGINE = TinyLog; +CREATE TABLE constraint_test_assumption (URL String, a Int32, CONSTRAINT c1 ASSUME domainWithoutWWW(URL) = 'bigmir.net', CONSTRAINT c2 ASSUME URL > 'zzz' AND startsWith(URL, 'test') = True) ENGINE = TinyLog; --- Add wrong rows in order to check optimization INSERT INTO constraint_test_assumption (URL, a) VALUES ('1', 1); INSERT INTO constraint_test_assumption (URL, a) VALUES ('2', 2); -INSERT INTO constraint_test_assumption (URL, a) VALUES ('yandex.ru', 3); +INSERT INTO constraint_test_assumption (URL, a) VALUES ('bigmir.net', 3); INSERT INTO constraint_test_assumption (URL, a) VALUES ('3', 4); -SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) = 'yandex.ru'; --- assumption -> 4 -SELECT count() FROM constraint_test_assumption WHERE NOT (domainWithoutWWW(URL) = 'yandex.ru'); --- assumption -> 0 -SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) != 'yandex.ru'; --- assumption -> 0 +SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) = 'bigmir.net'; --- assumption -> 4 +SELECT count() FROM constraint_test_assumption WHERE NOT (domainWithoutWWW(URL) = 'bigmir.net'); --- assumption -> 0 +SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) != 'bigmir.net'; --- assumption -> 0 SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) = 'nothing'; --- not optimized -> 0 -SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND URL > 'zzz'); ---> assumption -> 4 -SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND NOT URL <= 'zzz'); ---> assumption -> 4 -SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND URL > 'zzz') OR (a = 10 AND a + 5 < 100); ---> assumption -> 4 -SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND URL = '111'); ---> assumption & no assumption -> 0 +SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'bigmir.net' AND URL > 'zzz'); ---> assumption -> 4 +SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'bigmir.net' AND NOT URL <= 'zzz'); ---> assumption -> 4 +SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'bigmir.net' AND URL > 'zzz') OR (a = 10 AND a + 5 < 100); ---> assumption -> 4 +SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'bigmir.net' AND URL = '111'); ---> assumption & no assumption -> 0 SELECT count() FROM constraint_test_assumption WHERE (startsWith(URL, 'test') = True); ---> assumption -> 4 DROP TABLE constraint_test_assumption; diff --git a/tests/queries/0_stateless/01661_referer.reference b/tests/queries/0_stateless/01661_referer.reference index 49d29c80f17..d247c53413e 100644 --- a/tests/queries/0_stateless/01661_referer.reference +++ b/tests/queries/0_stateless/01661_referer.reference @@ -1,2 +1,2 @@ 1 -https://yandex.ru/ +https://meta.ua/ diff --git a/tests/queries/0_stateless/01661_referer.sh b/tests/queries/0_stateless/01661_referer.sh index 8123c925454..0299ee063ea 100755 --- a/tests/queries/0_stateless/01661_referer.sh +++ b/tests/queries/0_stateless/01661_referer.sh @@ -4,6 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT 1' --referer 'https://yandex.ru/' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT 1' --referer 'https://meta.ua/' ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" -${CLICKHOUSE_CLIENT} --query "SELECT http_referer FROM system.query_log WHERE current_database = currentDatabase() AND http_referer LIKE '%yandex%' LIMIT 1" +${CLICKHOUSE_CLIENT} --query "SELECT http_referer FROM system.query_log WHERE current_database = currentDatabase() AND http_referer LIKE '%meta%' LIMIT 1" diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.reference b/tests/queries/0_stateless/01675_data_type_coroutine.reference index 7326d960397..541dab48def 100644 --- a/tests/queries/0_stateless/01675_data_type_coroutine.reference +++ b/tests/queries/0_stateless/01675_data_type_coroutine.reference @@ -1 +1,2 @@ Ok +Ok diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.sh b/tests/queries/0_stateless/01675_data_type_coroutine.sh index 8e80d722a4c..9f7d5401bd2 100755 --- a/tests/queries/0_stateless/01675_data_type_coroutine.sh +++ b/tests/queries/0_stateless/01675_data_type_coroutine.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -6,6 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) counter=0 retries=60 + I=0 while [[ $counter -lt $retries ]]; do I=$((I + 1)) @@ -14,5 +16,16 @@ while [[ $counter -lt $retries ]]; do ((++counter)) done +echo 'Ok' + +counter=0 +I=0 +while [[ $counter -lt $retries ]]; do + I=$((I + 1)) + TYPE=$(perl -e "print 'Array(' x $I; print 'UInt8'; print ')' x $I") + ${CLICKHOUSE_CLIENT} --prefer_localhost_replica=0 --max_parser_depth 1000000 --query "SELECT * FROM remote('127.0.0.{1,2}', generateRandom('x $TYPE', 1, 1, 1)) LIMIT 1 FORMAT Null" 2>&1 | grep -q -F 'Maximum parse depth' && break; + ((++counter)) +done + #echo "I = ${I}" echo 'Ok' diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh index edc4f6916ff..0fe04fb95fd 100755 --- a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh +++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh @@ -6,6 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} -q "create table insert_big_json(a String, b String) engine=MergeTree() order by tuple()"; -python3 -c "[print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000)) for i in range(10)]; [print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000)) for i in range(10)]" 2>/dev/null | ${CLICKHOUSE_CLIENT} --input_format_parallel_parsing=1 --max_memory_usage=0 -q "insert into insert_big_json FORMAT JSONEachRow" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||: +python3 -c "[print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000)) for i in range(10)]; [print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000)) for i in range(10)]" 2>/dev/null | ${CLICKHOUSE_CLIENT} --min_chunk_bytes_for_parallel_parsing=10485760 --max_threads=0 --input_format_parallel_parsing=1 --max_memory_usage=0 -q "insert into insert_big_json FORMAT JSONEachRow" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||: ${CLICKHOUSE_CLIENT} -q "drop table insert_big_json" diff --git a/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh index 7295537a2d2..03f7893eb04 100755 --- a/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh +++ b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br' "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | brotli -d | tail -n30 | head -n23 +${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br' "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON SETTINGS max_block_size=65505" | brotli -d | tail -n30 | head -n23 diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index 7f36bcb6c8a..62b578c21d6 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -1,4 +1,4 @@ --- Tags: long, distributed +-- Tags: long, distributed, no-random-settings drop table if exists data_01730; diff --git a/tests/queries/0_stateless/01733_transform_ubsan.sql b/tests/queries/0_stateless/01733_transform_ubsan.sql index 256603e9087..7c3d8ef653a 100644 --- a/tests/queries/0_stateless/01733_transform_ubsan.sql +++ b/tests/queries/0_stateless/01733_transform_ubsan.sql @@ -1,4 +1,4 @@ -SELECT arrayStringConcat(arrayMap(x -> transform(x, [1025, -9223372036854775808, 65537, 257, 1048576, 10, 7, 1048575, 65536], ['yandex', 'googlegooglegooglegoogle', 'test', '', '', 'hello', 'world', '', 'xyz'], ''), arrayMap(x -> (x % -inf), range(number))), '') +SELECT arrayStringConcat(arrayMap(x -> transform(x, [1025, -9223372036854775808, 65537, 257, 1048576, 10, 7, 1048575, 65536], ['censor.net', 'googlegooglegooglegoogle', 'test', '', '', 'hello', 'world', '', 'xyz'], ''), arrayMap(x -> (x % -inf), range(number))), '') FROM system.numbers LIMIT 1025 FORMAT Null; diff --git a/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh index feaf4bcf623..71b003d2533 100755 --- a/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh +++ b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip' "${CLICKHOUSE_URL}&enable_http_compression=1&http_zlib_compression_level=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(100000) FORMAT JSON" | gzip -d | tail -n30 | head -n23 +${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip' "${CLICKHOUSE_URL}&enable_http_compression=1&http_zlib_compression_level=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(100000) FORMAT JSON SETTINGS max_block_size=65505" | gzip -d | tail -n30 | head -n23 diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.reference b/tests/queries/0_stateless/01786_explain_merge_tree.reference index 9b2df9773ea..25c7c37beca 100644 --- a/tests/queries/0_stateless/01786_explain_merge_tree.reference +++ b/tests/queries/0_stateless/01786_explain_merge_tree.reference @@ -1,4 +1,4 @@ - ReadFromMergeTree + ReadFromMergeTree (default.test_index) Indexes: MinMax Keys: @@ -32,6 +32,7 @@ Granules: 1/2 ----------------- "Node Type": "ReadFromMergeTree", + "Description": "default.test_index", "Indexes": [ { "Type": "MinMax", @@ -89,16 +90,16 @@ } ] ----------------- - ReadFromMergeTree + ReadFromMergeTree (default.test_index) ReadType: InOrder Parts: 1 Granules: 3 ----------------- - ReadFromMergeTree + ReadFromMergeTree (default.test_index) ReadType: InReverseOrder Parts: 1 Granules: 3 - ReadFromMergeTree + ReadFromMergeTree (default.idx) Indexes: PrimaryKey Keys: diff --git a/tests/queries/0_stateless/01889_clickhouse_client_config_format.sh b/tests/queries/0_stateless/01889_clickhouse_client_config_format.sh index aa162911399..4e970f17d3a 100755 --- a/tests/queries/0_stateless/01889_clickhouse_client_config_format.sh +++ b/tests/queries/0_stateless/01889_clickhouse_client_config_format.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01889_tokenize.reference b/tests/queries/0_stateless/01889_tokenize.reference index 4dd6f323929..2861a183c33 100644 --- a/tests/queries/0_stateless/01889_tokenize.reference +++ b/tests/queries/0_stateless/01889_tokenize.reference @@ -1,8 +1,8 @@ ['It','is','quite','a','wonderful','day','isn','t','it'] ['There','is','so','much','to','learn'] -['22','00','email','yandex','ru'] +['22','00','email','tut','by'] ['Токенизация','каких','либо','других','языков'] ['It','is','quite','a','wonderful','day,','isn\'t','it?'] ['There','is....','so','much','to','learn!'] -['22:00','email@yandex.ru'] +['22:00','email@tut.by'] ['Токенизация','каких-либо','других','языков?'] diff --git a/tests/queries/0_stateless/01889_tokenize.sql b/tests/queries/0_stateless/01889_tokenize.sql index c9d29a8632b..287e439d2ce 100644 --- a/tests/queries/0_stateless/01889_tokenize.sql +++ b/tests/queries/0_stateless/01889_tokenize.sql @@ -2,10 +2,10 @@ SET allow_experimental_nlp_functions = 1; SELECT splitByNonAlpha('It is quite a wonderful day, isn\'t it?'); SELECT splitByNonAlpha('There is.... so much to learn!'); -SELECT splitByNonAlpha('22:00 email@yandex.ru'); +SELECT splitByNonAlpha('22:00 email@tut.by'); SELECT splitByNonAlpha('Токенизация каких-либо других языков?'); SELECT splitByWhitespace('It is quite a wonderful day, isn\'t it?'); SELECT splitByWhitespace('There is.... so much to learn!'); -SELECT splitByWhitespace('22:00 email@yandex.ru'); +SELECT splitByWhitespace('22:00 email@tut.by'); SELECT splitByWhitespace('Токенизация каких-либо других языков?'); diff --git a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh index b6721d4a798..47cf6e06b48 100755 --- a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh +++ b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh @@ -12,6 +12,7 @@ mkdir -p ${user_files_path}/ cp $CUR_DIR/data_zstd/test_01946.zstd ${user_files_path}/ ${CLICKHOUSE_CLIENT} --multiline --multiquery --query " +set min_chunk_bytes_for_parallel_parsing=10485760; set max_read_buffer_size = 65536; set input_format_parallel_parsing = 0; select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') limit 30 format Null; diff --git a/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh b/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh index e7871454192..7740ffcce7b 100755 --- a/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh +++ b/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh @@ -66,8 +66,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # frame #11: 0x000000000fffdfc4 clickhouse`main(argc_=, argv_=) + 356 at main.cpp:366 # frame #12: 0x00007ffff7de6d0a libc.so.6`__libc_start_main(main=(clickhouse`main at main.cpp:339), argc=7, argv=0x00007fffffffe1e8, init=, fini=, rtld_fini=, stack_end=0x00007fffffffe1d8) + 234 at libc-start.c:308 # frame #13: 0x000000000ffdc0aa clickhouse`_start + 42 -# -# [1]: https://clickhouse-test-reports.s3.yandex.net/26656/f17ca450ac991603e6400c7caef49c493ac69739/functional_stateless_tests_(ubsan).html#fail1 # Limit number of files to 50, and we will get EMFILE for some of socket() prlimit --nofile=50 $CLICKHOUSE_BENCHMARK --iterations 1 --concurrency 50 --query 'select 1' 2>&1 diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.reference b/tests/queries/0_stateless/02006_test_positional_arguments.reference index c5c5f115b0a..f86a1ab6c47 100644 --- a/tests/queries/0_stateless/02006_test_positional_arguments.reference +++ b/tests/queries/0_stateless/02006_test_positional_arguments.reference @@ -111,3 +111,9 @@ select substr('aaaaaaaaaaaaaa', 8) as a group by a; aaaaaaa select substr('aaaaaaaaaaaaaa', 8) as a group by substr('aaaaaaaaaaaaaa', 8); aaaaaaa +select b from (select 5 as a, 'Hello' as b order by a); +Hello +select b from (select 5 as a, 'Hello' as b group by a); +Hello +select b from (select 5 as a, 'Hello' as b order by 1); +Hello diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.sql b/tests/queries/0_stateless/02006_test_positional_arguments.sql index 7442ca6bbf6..2a02cd03c93 100644 --- a/tests/queries/0_stateless/02006_test_positional_arguments.sql +++ b/tests/queries/0_stateless/02006_test_positional_arguments.sql @@ -45,3 +45,6 @@ explain syntax select plus(1, 1) as a group by a; select substr('aaaaaaaaaaaaaa', 8) as a group by a; select substr('aaaaaaaaaaaaaa', 8) as a group by substr('aaaaaaaaaaaaaa', 8); +select b from (select 5 as a, 'Hello' as b order by a); +select b from (select 5 as a, 'Hello' as b group by a); +select b from (select 5 as a, 'Hello' as b order by 1); diff --git a/tests/queries/0_stateless/02015_async_inserts_2.sh b/tests/queries/0_stateless/02015_async_inserts_2.sh index 90f5584d84e..fd20f846897 100755 --- a/tests/queries/0_stateless/02015_async_inserts_2.sh +++ b/tests/queries/0_stateless/02015_async_inserts_2.sh @@ -1,13 +1,14 @@ #!/usr/bin/env bash +# Tags: no-random-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&max_insert_threads=0&group_by_two_level_threshold=100000&group_by_two_level_threshold_bytes=50000000&distributed_aggregation_memory_efficient=1&fsync_metadata=1&priority=1&output_format_parallel_formatting=0&input_format_parallel_parsing=0&min_chunk_bytes_for_parallel_parsing=4031398&max_read_buffer_size=554729&prefer_localhost_replica=0&max_block_size=51672&max_threads=20" -${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" -${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "DROP TABLE IF EXISTS async_inserts" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV 1,"a" @@ -22,7 +23,7 @@ ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV wait -${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts ORDER BY id" -${CLICKHOUSE_CLIENT} -q "SELECT name, rows, level FROM system.parts WHERE table = 'async_inserts' AND database = '$CLICKHOUSE_DATABASE' ORDER BY name" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "SELECT * FROM async_inserts ORDER BY id" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "SELECT name, rows, level FROM system.parts WHERE table = 'async_inserts' AND database = '$CLICKHOUSE_DATABASE' ORDER BY name" -${CLICKHOUSE_CLIENT} -q "DROP TABLE async_inserts" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "DROP TABLE async_inserts" diff --git a/tests/queries/0_stateless/02118_deserialize_whole_text.sh b/tests/queries/0_stateless/02118_deserialize_whole_text.sh index fe9256df329..e9f35582f15 100755 --- a/tests/queries/0_stateless/02118_deserialize_whole_text.sh +++ b/tests/queries/0_stateless/02118_deserialize_whole_text.sh @@ -41,16 +41,16 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv4')" 2>&1 | echo "[\"255.255.255.255trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' -echo "[\"2a02:6ba8:2da1:40cd:31db:f9f1:fc3d:80b1trash\"]" > $DATA_FILE +echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' -echo "2a02:6ba8:2da1:40cd:31db:f9f1:fc3d:80b1trash" > $DATA_FILE +echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' -echo "2a02:6ba8:2da1:40cd:31db:f9f1:fc3d:80b1trash" > $DATA_FILE +echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' -echo "[\"2a02:6ba8:2da1:40cd:31db:f9f1:fc3d:80b1trash\"]" > $DATA_FILE +echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"{1:2, 2:3}trash\"]" > $DATA_FILE diff --git a/tests/queries/0_stateless/02126_url_auth.python b/tests/queries/0_stateless/02126_url_auth.python index 60009624c76..57b16fb413e 100644 --- a/tests/queries/0_stateless/02126_url_auth.python +++ b/tests/queries/0_stateless/02126_url_auth.python @@ -121,18 +121,14 @@ class CSVHTTPServer(BaseHTTPRequestHandler): class HTTPServerV6(HTTPServer): address_family = socket.AF_INET6 -def start_server(requests_amount): +def start_server(): if IS_IPV6: httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, CSVHTTPServer) else: httpd = HTTPServer(HTTP_SERVER_ADDRESS, CSVHTTPServer) - def real_func(): - for i in range(requests_amount): - httpd.handle_request() - - t = threading.Thread(target=real_func) - return t + t = threading.Thread(target=httpd.serve_forever) + return t, httpd # test section @@ -217,9 +213,10 @@ def main(): query : 'hello, world', } - t = start_server(len(list(select_requests_url_auth.keys()))) + t, httpd = start_server() t.start() test_select(requests=list(select_requests_url_auth.keys()), answers=list(select_requests_url_auth.values()), test_data=test_data) + httpd.shutdown() t.join() print("PASSED") diff --git a/tests/queries/0_stateless/02205_HTTP_user_agent.python b/tests/queries/0_stateless/02205_HTTP_user_agent.python index 8fb9cea0845..397e06cbe82 100644 --- a/tests/queries/0_stateless/02205_HTTP_user_agent.python +++ b/tests/queries/0_stateless/02205_HTTP_user_agent.python @@ -124,7 +124,8 @@ def test_select(): check_answers(query, EXPECTED_ANSWER) def main(): - t = start_server(1) + # HEAD + GET + t = start_server(2) t.start() test_select() t.join() diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference new file mode 100644 index 00000000000..f7b91ff48b8 --- /dev/null +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -0,0 +1,62 @@ +test intervals +- test nanoseconds +1980-12-12 12:12:12.123456789 +1980-12-12 12:12:12.123456700 +1980-12-12 12:12:12.123456789 +1930-12-12 12:12:12.123456789 +1930-12-12 12:12:12.123456700 +2220-12-12 12:12:12.123456789 +2220-12-12 12:12:12.123456700 +- test microseconds +1980-12-12 12:12:12.123456 +1980-12-12 12:12:12.123400 +1980-12-12 12:12:12.123456 +1980-12-12 12:12:12.123456 +1930-12-12 12:12:12.123456 +1930-12-12 12:12:12.123400 +1930-12-12 12:12:12.123457 +2220-12-12 12:12:12.123456 +2220-12-12 12:12:12.123400 +2220-12-12 12:12:12.123456 +- test milliseconds +1980-12-12 12:12:12.123 +1980-12-12 12:12:12.120 +1980-12-12 12:12:12.123 +1980-12-12 12:12:12.123 +1930-12-12 12:12:12.123 +1930-12-12 12:12:12.120 +1930-12-12 12:12:12.124 +2220-12-12 12:12:12.123 +2220-12-12 12:12:12.120 +2220-12-12 12:12:12.123 +test add[...]seconds() +- test nanoseconds +1980-12-12 12:12:12.123456790 +1980-12-12 12:12:12.123456701 +1980-12-12 12:12:12.123456790 +1930-12-12 12:12:12.123456788 +1930-12-12 12:12:12.123456699 +2220-12-12 12:12:12.123456790 +2220-12-12 12:12:12.123456701 +- test microseconds +1980-12-12 12:12:12.123457 +1980-12-12 12:12:12.123401 +1980-12-12 12:12:12.12345778 +1980-12-12 12:12:12.123457 +1930-12-12 12:12:12.123455 +1930-12-12 12:12:12.123399 +1930-12-12 12:12:12.12345578 +2220-12-12 12:12:12.123457 +2220-12-12 12:12:12.123401 +2220-12-12 12:12:12.12345778 +- test milliseconds +1980-12-12 12:12:12.124 +1980-12-12 12:12:12.121 +1980-12-12 12:12:12.124456 +1980-12-12 12:12:12.124 +1930-12-12 12:12:12.122 +1930-12-12 12:12:12.119 +1930-12-12 12:12:12.122456 +2220-12-12 12:12:12.124 +2220-12-12 12:12:12.121 +2220-12-12 12:12:12.124456 diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.sql b/tests/queries/0_stateless/02207_subseconds_intervals.sql new file mode 100644 index 00000000000..a7ce03d9330 --- /dev/null +++ b/tests/queries/0_stateless/02207_subseconds_intervals.sql @@ -0,0 +1,94 @@ +SELECT 'test intervals'; + +SELECT '- test nanoseconds'; +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456789', 9), INTERVAL 1 NANOSECOND); -- In normal range, source scale matches result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.1234567', 7), INTERVAL 1 NANOSECOND); -- In normal range, source scale less than result + +select toStartOfInterval(a, INTERVAL 1 NANOSECOND) from ( select toDateTime64('1980-12-12 12:12:12.123456789', 9) AS a ); -- Non-constant argument + +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456789', 9), INTERVAL 1 NANOSECOND); -- Below normal range, source scale matches result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.1234567', 7), INTERVAL 1 NANOSECOND); -- Below normal range, source scale less than result + +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456789', 9), INTERVAL 1 NANOSECOND); -- Above normal range, source scale matches result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.1234567', 7), INTERVAL 1 NANOSECOND); -- Above normal range, source scale less than result + + +SELECT '- test microseconds'; +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456', 6), INTERVAL 1 MICROSECOND); -- In normal range, source scale matches result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.1234', 4), INTERVAL 1 MICROSECOND); -- In normal range, source scale less than result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.12345678', 8), INTERVAL 1 MICROSECOND); -- In normal range, source scale greater than result + +select toStartOfInterval(a, INTERVAL 1 MICROSECOND) from ( select toDateTime64('1980-12-12 12:12:12.12345678', 8) AS a ); -- Non-constant argument + +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456', 6), INTERVAL 1 MICROSECOND); -- Below normal range, source scale matches result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.1234', 4), INTERVAL 1 MICROSECOND); -- Below normal range, source scale less than result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.12345678', 8), INTERVAL 1 MICROSECOND); -- Below normal range, source scale greater than result + + +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456', 6), INTERVAL 1 MICROSECOND); -- Above normal range, source scale matches result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.1234', 4), INTERVAL 1 MICROSECOND); -- Above normal range, source scale less than result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.12345678', 8), INTERVAL 1 MICROSECOND); -- Above normal range, source scale greater than result + + +SELECT '- test milliseconds'; +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123', 3), INTERVAL 1 MILLISECOND); -- In normal range, source scale matches result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.12', 2), INTERVAL 1 MILLISECOND); -- In normal range, source scale less than result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456', 6), INTERVAL 1 MILLISECOND); -- In normal range, source scale greater than result + +select toStartOfInterval(a, INTERVAL 1 MILLISECOND) from ( select toDateTime64('1980-12-12 12:12:12.123456', 6) AS a ); -- Non-constant argument + +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123', 3), INTERVAL 1 MILLISECOND); -- Below normal range, source scale matches result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.12', 2), INTERVAL 1 MILLISECOND); -- Below normal range, source scale less than result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456', 6), INTERVAL 1 MILLISECOND); -- Below normal range, source scale greater than result + +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123', 3), INTERVAL 1 MILLISECOND); -- Above normal range, source scale matches result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.12', 2), INTERVAL 1 MILLISECOND); -- Above normal range, source scale less than result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456', 6), INTERVAL 1 MILLISECOND); -- Above normal range, source scale greater than result + + +SELECT 'test add[...]seconds()'; + + +SELECT '- test nanoseconds'; +select addNanoseconds(toDateTime64('1980-12-12 12:12:12.123456789', 9), 1); -- In normal range, source scale matches result +select addNanoseconds(toDateTime64('1980-12-12 12:12:12.1234567', 7), 1); -- In normal range, source scale less than result + +select addNanoseconds(a, 1) from ( select toDateTime64('1980-12-12 12:12:12.123456789', 9) AS a ); -- Non-constant argument + +select addNanoseconds(toDateTime64('1930-12-12 12:12:12.123456789', 9), 1); -- Below normal range, source scale matches result +select addNanoseconds(toDateTime64('1930-12-12 12:12:12.1234567', 7), 1); -- Below normal range, source scale less than result + +select addNanoseconds(toDateTime64('2220-12-12 12:12:12.123456789', 9), 1); -- Above normal range, source scale matches result +select addNanoseconds(toDateTime64('2220-12-12 12:12:12.1234567', 7), 1); -- Above normal range, source scale less than result + + +SELECT '- test microseconds'; +select addMicroseconds(toDateTime64('1980-12-12 12:12:12.123456', 6), 1); -- In normal range, source scale matches result +select addMicroseconds(toDateTime64('1980-12-12 12:12:12.1234', 4), 1); -- In normal range, source scale less than result +select addMicroseconds(toDateTime64('1980-12-12 12:12:12.12345678', 8), 1); -- In normal range, source scale greater than result + +select addMicroseconds(a, 1) from ( select toDateTime64('1980-12-12 12:12:12.123456', 6) AS a ); -- Non-constant argument + +select addMicroseconds(toDateTime64('1930-12-12 12:12:12.123456', 6), 1); -- Below normal range, source scale matches result +select addMicroseconds(toDateTime64('1930-12-12 12:12:12.1234', 4), 1); -- Below normal range, source scale less than result +select addMicroseconds(toDateTime64('1930-12-12 12:12:12.12345678', 8), 1); -- Below normal range, source scale greater than result + +select addMicroseconds(toDateTime64('2220-12-12 12:12:12.123456', 6), 1); -- Above normal range, source scale matches result +select addMicroseconds(toDateTime64('2220-12-12 12:12:12.1234', 4), 1); -- Above normal range, source scale less than result +select addMicroseconds(toDateTime64('2220-12-12 12:12:12.12345678', 8), 1); -- Above normal range, source scale greater than result + + +SELECT '- test milliseconds'; +select addMilliseconds(toDateTime64('1980-12-12 12:12:12.123', 3), 1); -- In normal range, source scale matches result +select addMilliseconds(toDateTime64('1980-12-12 12:12:12.12', 2), 1); -- In normal range, source scale less than result +select addMilliseconds(toDateTime64('1980-12-12 12:12:12.123456', 6), 1); -- In normal range, source scale greater than result + +select addMilliseconds(a, 1) from ( select toDateTime64('1980-12-12 12:12:12.123', 3) AS a ); -- Non-constant argument + +select addMilliseconds(toDateTime64('1930-12-12 12:12:12.123', 3), 1); -- Below normal range, source scale matches result +select addMilliseconds(toDateTime64('1930-12-12 12:12:12.12', 2), 1); -- Below normal range, source scale less than result +select addMilliseconds(toDateTime64('1930-12-12 12:12:12.123456', 6), 1); -- Below normal range, source scale greater than result + +select addMilliseconds(toDateTime64('2220-12-12 12:12:12.123', 3), 1); -- Above normal range, source scale matches result +select addMilliseconds(toDateTime64('2220-12-12 12:12:12.12', 2), 1); -- Above normal range, source scale less than result +select addMilliseconds(toDateTime64('2220-12-12 12:12:12.123456', 6), 1); -- Above normal range, source scale greater than result diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh index 842c32cf243..261c389c9f2 100755 --- a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh @@ -29,5 +29,5 @@ $CLICKHOUSE_CLIENT -q "create table test_dist engine=Distributed('test_shard_loc $CLICKHOUSE_CLIENT -q "detach table test_dist" $CLICKHOUSE_CLIENT -q "drop table test" $CLICKHOUSE_CLIENT -q "attach table test_dist" -$CLICKHOUSE_CLIENT -q "select * from test_dist" 2>&1 | grep -q "UNKNOWN_TABLE" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT --prefer_localhost_replica=1 -q "select * from test_dist" 2>&1 | grep -q "UNKNOWN_TABLE" && echo "OK" || echo "FAIL" diff --git a/tests/queries/0_stateless/02233_HTTP_ranged.python b/tests/queries/0_stateless/02233_HTTP_ranged.python new file mode 100644 index 00000000000..e0198210c16 --- /dev/null +++ b/tests/queries/0_stateless/02233_HTTP_ranged.python @@ -0,0 +1,262 @@ +#!/usr/bin/env python3 + +from http.server import BaseHTTPRequestHandler, HTTPServer +import socket +import sys +import re +import threading +import os +import traceback +import urllib.request +import subprocess + + +def is_ipv6(host): + try: + socket.inet_aton(host) + return False + except: + return True + + +def get_local_port(host, ipv6): + if ipv6: + family = socket.AF_INET6 + else: + family = socket.AF_INET + + with socket.socket(family) as fd: + fd.bind((host, 0)) + return fd.getsockname()[1] + + +CLICKHOUSE_HOST = os.environ.get("CLICKHOUSE_HOST", "localhost") +CLICKHOUSE_PORT_HTTP = os.environ.get("CLICKHOUSE_PORT_HTTP", "8123") + +# Server returns this JSON response. +SERVER_JSON_RESPONSE = """{ + "login": "ClickHouse", + "id": 54801242, + "name": "ClickHouse", + "company": null +}""" + +PAYLOAD_LEN = len(SERVER_JSON_RESPONSE) + +EXPECTED_ANSWER = """{\\n\\t"login": "ClickHouse",\\n\\t"id": 54801242,\\n\\t"name": "ClickHouse",\\n\\t"company": null\\n}""" + +##################################################################################### +# This test starts an HTTP server and serves data to clickhouse url-engine based table. +# The objective of this test is to check the ClickHouse server provides a User-Agent +# with HTTP requests. +# In order for it to work ip+port of http server (given below) should be +# accessible from clickhouse server. +##################################################################################### + +# IP-address of this host accessible from the outside world. Get the first one +HTTP_SERVER_HOST = ( + subprocess.check_output(["hostname", "-i"]).decode("utf-8").strip().split()[0] +) +IS_IPV6 = is_ipv6(HTTP_SERVER_HOST) +HTTP_SERVER_PORT = get_local_port(HTTP_SERVER_HOST, IS_IPV6) + +# IP address and port of the HTTP server started from this script. +HTTP_SERVER_ADDRESS = (HTTP_SERVER_HOST, HTTP_SERVER_PORT) +if IS_IPV6: + HTTP_SERVER_URL_STR = ( + "http://" + + f"[{str(HTTP_SERVER_ADDRESS[0])}]:{str(HTTP_SERVER_ADDRESS[1])}" + + "/" + ) +else: + HTTP_SERVER_URL_STR = ( + "http://" + f"{str(HTTP_SERVER_ADDRESS[0])}:{str(HTTP_SERVER_ADDRESS[1])}" + "/" + ) + + +def get_ch_answer(query): + host = CLICKHOUSE_HOST + if IS_IPV6: + host = f"[{host}]" + + url = os.environ.get( + "CLICKHOUSE_URL", + "http://{host}:{port}".format(host=CLICKHOUSE_HOST, port=CLICKHOUSE_PORT_HTTP), + ) + return urllib.request.urlopen(url, data=query.encode()).read().decode() + + +def check_answers(query, answer): + ch_answer = get_ch_answer(query) + if ch_answer.strip() != answer.strip(): + print("FAIL on query:", query, file=sys.stderr) + print("Expected answer:", answer, file=sys.stderr) + print("Fetched answer :", ch_answer, file=sys.stderr) + raise Exception("Fail on query") + + +BYTE_RANGE_RE = re.compile(r"bytes=(\d+)-(\d+)?$") + + +def parse_byte_range(byte_range): + """Returns the two numbers in 'bytes=123-456' or throws ValueError. + The last number or both numbers may be None. + """ + if byte_range.strip() == "": + return None, None + + m = BYTE_RANGE_RE.match(byte_range) + if not m: + raise ValueError(f"Invalid byte range {byte_range}") + + first, last = [x and int(x) for x in m.groups()] + if last and last < first: + raise ValueError(f"Invalid byte range {byte_range}") + return first, last + + +# Server with check for User-Agent headers. +class HttpProcessor(BaseHTTPRequestHandler): + allow_range = False + range_used = False + get_call_num = 0 + + def send_head(self): + if self.headers["Range"] and HttpProcessor.allow_range: + try: + self.range = parse_byte_range(self.headers["Range"]) + except ValueError as e: + self.send_error(400, "Invalid byte range") + return None + else: + self.range = None + + if self.range: + first, last = self.range + else: + first, last = None, None + + if first == None: + first = 0 + + payload = SERVER_JSON_RESPONSE.encode() + payload_len = len(payload) + if first and first >= payload_len: + self.send_error(416, "Requested Range Not Satisfiable") + return None + + self.send_response(206 if HttpProcessor.allow_range else 200) + self.send_header("Content-type", "application/json") + + if HttpProcessor.allow_range: + self.send_header("Accept-Ranges", "bytes") + + if last is None or last >= payload_len: + last = payload_len - 1 + + response_length = last - first + 1 + + if first or last: + self.send_header("Content-Range", f"bytes {first}-{last}/{payload_len}") + self.send_header( + "Content-Length", + str(response_length) if HttpProcessor.allow_range else str(payload_len), + ) + self.end_headers() + return payload + + def do_HEAD(self): + self.send_head() + + def do_GET(self): + result = self.send_head() + if result == None: + return + + HttpProcessor.get_call_num += 1 + + if not self.range: + self.wfile.write(SERVER_JSON_RESPONSE.encode()) + return + + HttpProcessor.range_used = True + payload = SERVER_JSON_RESPONSE.encode() + start, stop = self.range + if stop == None: + stop = len(payload) - 1 + if start == None: + start = 0 + self.wfile.write(SERVER_JSON_RESPONSE.encode()[start : stop + 1]) + + def log_message(self, format, *args): + return + + +class HTTPServerV6(HTTPServer): + address_family = socket.AF_INET6 + + +def start_server(): + if IS_IPV6: + httpd = HTTPServerV6(HTTP_SERVER_ADDRESS, HttpProcessor) + else: + httpd = HTTPServer(HTTP_SERVER_ADDRESS, HttpProcessor) + + t = threading.Thread(target=httpd.serve_forever) + return t, httpd + + +##################################################################### +# Testing area. +##################################################################### + + +def test_select(download_buffer_size): + global HTTP_SERVER_URL_STR + query = f"SELECT * FROM url('{HTTP_SERVER_URL_STR}','JSONAsString') SETTINGS max_download_buffer_size={download_buffer_size};" + check_answers(query, EXPECTED_ANSWER) + + +def run_test(allow_range, download_buffer_size=20): + HttpProcessor.range_used = False + HttpProcessor.get_call_num = 0 + HttpProcessor.allow_range = allow_range + + t, httpd = start_server() + t.start() + test_select(download_buffer_size) + + expected_get_call_num = (PAYLOAD_LEN - 1) // download_buffer_size + 1 + if allow_range: + if not HttpProcessor.range_used: + raise Exception("HTTP Range was not used when supported") + + if expected_get_call_num != HttpProcessor.get_call_num: + raise Exception( + f"Invalid amount of GET calls with Range. Expected {expected_get_call_num}, actual {HttpProcessor.get_call_num}" + ) + else: + if HttpProcessor.range_used: + raise Exception("HTTP Range used while not supported") + + httpd.shutdown() + t.join() + print("PASSED") + + +def main(): + run_test(allow_range=False) + run_test(allow_range=True, download_buffer_size=20) + run_test(allow_range=True, download_buffer_size=10) + + +if __name__ == "__main__": + try: + main() + except Exception as ex: + exc_type, exc_value, exc_traceback = sys.exc_info() + traceback.print_tb(exc_traceback, file=sys.stderr) + print(ex, file=sys.stderr) + sys.stderr.flush() + + os._exit(1) diff --git a/tests/queries/0_stateless/02233_HTTP_ranged.reference b/tests/queries/0_stateless/02233_HTTP_ranged.reference new file mode 100644 index 00000000000..17f0fff172a --- /dev/null +++ b/tests/queries/0_stateless/02233_HTTP_ranged.reference @@ -0,0 +1,3 @@ +PASSED +PASSED +PASSED diff --git a/tests/queries/0_stateless/02233_HTTP_ranged.sh b/tests/queries/0_stateless/02233_HTTP_ranged.sh new file mode 100755 index 00000000000..b6fba098d10 --- /dev/null +++ b/tests/queries/0_stateless/02233_HTTP_ranged.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +python3 "$CURDIR"/02233_HTTP_ranged.python + diff --git a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh b/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh deleted file mode 100755 index b946addd01c..00000000000 --- a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-ubsan, no-fasttest - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -echo "Parquet" -DATA_FILE=$CUR_DIR/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet -${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (id String, score Int32) ENGINE = Memory" -cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet SETTINGS input_format_use_lowercase_column_name=true" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" -${CLICKHOUSE_CLIENT} --query="drop table parquet_load" - -echo "ORC" -DATA_FILE=$CUR_DIR/data_orc/test_setting_input_format_use_lowercase_column_name.orc -${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_load" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE orc_load (id String, score Int32) ENGINE = Memory" -cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO orc_load FORMAT ORC SETTINGS input_format_use_lowercase_column_name=true" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM orc_load" -${CLICKHOUSE_CLIENT} --query="drop table orc_load" diff --git a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference new file mode 100644 index 00000000000..a8abc33648e --- /dev/null +++ b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference @@ -0,0 +1,8 @@ +b Nullable(String) +c Nullable(String) +a Nullable(String) +s1 \N 1 +} [2] 2 +\N \N \N +\N \N \N +\N [3] \N diff --git a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh new file mode 100755 index 00000000000..8655ffd1e1f --- /dev/null +++ b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +FILE_NAME=test_02240.data +DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME + +touch $DATA_FILE + +echo -e 'a=1\tb=s1\tc=\N +c=[2]\ta=2\tb=\N} + +a=\N +c=[3]\ta=\N' > $DATA_FILE +$CLICKHOUSE_CLIENT --max_read_buffer_size=4 -q "desc file('$FILE_NAME', 'TSKV')" +$CLICKHOUSE_CLIENT --max_read_buffer_size=4 -q "select * from file('$FILE_NAME', 'TSKV')" + diff --git a/tests/queries/0_stateless/02241_parquet_bad_column.reference b/tests/queries/0_stateless/02241_parquet_bad_column.reference index f599e28b8ab..b2f7f08c170 100644 --- a/tests/queries/0_stateless/02241_parquet_bad_column.reference +++ b/tests/queries/0_stateless/02241_parquet_bad_column.reference @@ -1 +1,2 @@ 10 +10 diff --git a/tests/queries/0_stateless/02241_parquet_bad_column.sh b/tests/queries/0_stateless/02241_parquet_bad_column.sh index a160671a088..9efd11cbbe1 100755 --- a/tests/queries/0_stateless/02241_parquet_bad_column.sh +++ b/tests/queries/0_stateless/02241_parquet_bad_column.sh @@ -5,23 +5,25 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "drop table if exists test_02241" -$CLICKHOUSE_CLIENT -q "create table test_02241 (image_path Nullable(String), - caption Nullable(String), - NSFW Nullable(String), - similarity Nullable(Float64), - LICENSE Nullable(String), - url Nullable(String), - key Nullable(UInt64), - shard_id Nullable(UInt64), - status Nullable(String), - width Nullable(UInt32), - height Nullable(UInt32), - exif Nullable(String), - original_width Nullable(UInt32), - original_height Nullable(UInt32)) engine=Memory" +for case_insensitive in "true" "false"; do + $CLICKHOUSE_CLIENT -q "drop table if exists test_02241" + $CLICKHOUSE_CLIENT -q "create table test_02241 (image_path Nullable(String), + caption Nullable(String), + NSFW Nullable(String), + similarity Nullable(Float64), + LICENSE Nullable(String), + url Nullable(String), + key Nullable(UInt64), + shard_id Nullable(UInt64), + status Nullable(String), + width Nullable(UInt32), + height Nullable(UInt32), + exif Nullable(String), + original_width Nullable(UInt32), + original_height Nullable(UInt32)) engine=Memory" -cat $CUR_DIR/data_parquet_bad_column/metadata_0.parquet | $CLICKHOUSE_CLIENT -q "insert into test_02241 format Parquet" + cat $CUR_DIR/data_parquet_bad_column/metadata_0.parquet | $CLICKHOUSE_CLIENT -q "insert into test_02241 format Parquet SETTINGS input_format_parquet_case_insensitive_column_matching=$case_insensitive" -$CLICKHOUSE_CLIENT -q "select count() from test_02241" -$CLICKHOUSE_CLIENT -q "drop table test_02241" + $CLICKHOUSE_CLIENT -q "select count() from test_02241" + $CLICKHOUSE_CLIENT -q "drop table test_02241" +done diff --git a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference b/tests/queries/0_stateless/02242_case_insensitive_column_matching.reference similarity index 66% rename from tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference rename to tests/queries/0_stateless/02242_case_insensitive_column_matching.reference index 5c383cb3035..9732211a286 100644 --- a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference +++ b/tests/queries/0_stateless/02242_case_insensitive_column_matching.reference @@ -4,3 +4,6 @@ Parquet ORC 123 1 456 2 +Arrow +123 1 +456 2 diff --git a/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh b/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh new file mode 100755 index 00000000000..8ebf2952ab3 --- /dev/null +++ b/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Tags: no-ubsan, no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "Parquet" +DATA_FILE=$CUR_DIR/data_parquet/case_insensitive_column_matching.parquet +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (iD String, scOre Int32) ENGINE = Memory" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet SETTINGS input_format_parquet_case_insensitive_column_matching=true" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" +${CLICKHOUSE_CLIENT} --query="drop table parquet_load" + +echo "ORC" +DATA_FILE=$CUR_DIR/data_orc/case_insensitive_column_matching.orc +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_load" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE orc_load (iD String, sCorE Int32) ENGINE = Memory" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO orc_load FORMAT ORC SETTINGS input_format_orc_case_insensitive_column_matching=true" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM orc_load" +${CLICKHOUSE_CLIENT} --query="drop table orc_load" + +echo "Arrow" +DATA_FILE=$CUR_DIR/data_arrow/case_insensitive_column_matching.arrow +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_load" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_load (iD String, sCorE Int32) ENGINE = Memory" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO arrow_load FORMAT Arrow SETTINGS input_format_arrow_case_insensitive_column_matching=true" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_load" +${CLICKHOUSE_CLIENT} --query="drop table arrow_load" diff --git a/tests/queries/0_stateless/02242_case_insensitive_nested.reference b/tests/queries/0_stateless/02242_case_insensitive_nested.reference new file mode 100644 index 00000000000..58d66d3230a --- /dev/null +++ b/tests/queries/0_stateless/02242_case_insensitive_nested.reference @@ -0,0 +1,12 @@ +Arrow +[1,2,3] ['123','456','789'] [9.8,10.12,11.14] +[4,5,6] ['101112','131415','161718'] [123.8,10.2,11.414] +[7,8,9] ['101','415','118'] [13.08,1.12,0.414] +Parquet +[1,2,3] ['123','456','789'] [9.8,10.12,11.14] +[4,5,6] ['101112','131415','161718'] [123.8,10.2,11.414] +[7,8,9] ['101','415','118'] [13.08,1.12,0.414] +ORC +[1,2,3] ['123','456','789'] [9.8,10.12,11.14] +[4,5,6] ['101112','131415','161718'] [123.8,10.2,11.414] +[7,8,9] ['101','415','118'] [13.08,1.12,0.414] diff --git a/tests/queries/0_stateless/02242_case_insensitive_nested.sh b/tests/queries/0_stateless/02242_case_insensitive_nested.sh new file mode 100755 index 00000000000..c22f5695dc3 --- /dev/null +++ b/tests/queries/0_stateless/02242_case_insensitive_nested.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS nested_table" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS nested_nested_table" + +${CLICKHOUSE_CLIENT} --query="CREATE TABLE nested_table (table Nested(eLeM1 Int32, elEm2 String, ELEM3 Float32)) engine=Memory" + +formats=('Arrow' 'Parquet' 'ORC') +format_files=('arrow' 'parquet' 'orc') + +for ((i = 0; i < 3; i++)) do + echo ${formats[i]} + + ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE nested_table" + cat $CUR_DIR/data_orc_arrow_parquet_nested/nested_table.${format_files[i]} | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nested_table FORMAT ${formats[i]} SETTINGS input_format_${format_files[i]}_import_nested = 1, input_format_${format_files[i]}_case_insensitive_column_matching = true" + + ${CLICKHOUSE_CLIENT} --query="SELECT * FROM nested_table" + +done + +${CLICKHOUSE_CLIENT} --query="DROP TABLE nested_table" diff --git a/tests/queries/0_stateless/02242_optimize_to_subcolumns_no_storage.reference b/tests/queries/0_stateless/02242_optimize_to_subcolumns_no_storage.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/02242_optimize_to_subcolumns_no_storage.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/02242_optimize_to_subcolumns_no_storage.sql b/tests/queries/0_stateless/02242_optimize_to_subcolumns_no_storage.sql new file mode 100644 index 00000000000..e6e4663c5aa --- /dev/null +++ b/tests/queries/0_stateless/02242_optimize_to_subcolumns_no_storage.sql @@ -0,0 +1,3 @@ +SET optimize_functions_to_subcolumns = 1; +SELECT count(*) FROM numbers(2) AS n1, numbers(3) AS n2, numbers(4) AS n3 +WHERE (n1.number = n2.number) AND (n2.number = n3.number); diff --git a/tests/queries/0_stateless/02243_in_ip_address.reference b/tests/queries/0_stateless/02243_in_ip_address.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/02243_in_ip_address.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/02243_in_ip_address.sql b/tests/queries/0_stateless/02243_in_ip_address.sql new file mode 100644 index 00000000000..a2c8c37e585 --- /dev/null +++ b/tests/queries/0_stateless/02243_in_ip_address.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table (id UInt64, value_ipv4 IPv4, value_ipv6 IPv6) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (0, '127.0.0.1', '127.0.0.1'); + +SELECT id FROM test_table WHERE value_ipv4 IN (SELECT value_ipv4 FROM test_table); +SELECT id FROM test_table WHERE value_ipv6 IN (SELECT value_ipv6 FROM test_table); + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02243_ipv6_long_parsing.reference b/tests/queries/0_stateless/02243_ipv6_long_parsing.reference new file mode 100644 index 00000000000..c09bfebe9d5 --- /dev/null +++ b/tests/queries/0_stateless/02243_ipv6_long_parsing.reference @@ -0,0 +1,3 @@ +0 ::ffff:1.12.12.12 +1 ::ffff:123.123.123.123 +2 ::ffff:192.168.100.228 diff --git a/tests/queries/0_stateless/02243_ipv6_long_parsing.sql b/tests/queries/0_stateless/02243_ipv6_long_parsing.sql new file mode 100644 index 00000000000..25225ee0fa8 --- /dev/null +++ b/tests/queries/0_stateless/02243_ipv6_long_parsing.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table (id UInt64, value IPv6) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (0, '0000:0000:0000:0000:0000:ffff:1.12.12.12'); +INSERT INTO test_table VALUES (1, '0000:0000:0000:0000:0000:ffff:123.123.123.123'); +INSERT INTO test_table VALUES (2, '0000:0000:0000:0000:0000:ffff:192.168.100.228'); + +SELECT * FROM test_table ORDER BY id; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02244_casewithexpression_return_type.reference b/tests/queries/0_stateless/02244_casewithexpression_return_type.reference new file mode 100644 index 00000000000..bcdeb4a290b --- /dev/null +++ b/tests/queries/0_stateless/02244_casewithexpression_return_type.reference @@ -0,0 +1,20 @@ +0 555555 +1 10 +2 555555 +3 55 +4 555555 +5 555555 +6 77 +7 555555 +8 555555 +9 95 +10 100 +11 555555 +12 555555 +13 555555 +14 555555 +15 555555 +16 555555 +17 555555 +18 555555 +19 555555 diff --git a/tests/queries/0_stateless/02244_casewithexpression_return_type.sql b/tests/queries/0_stateless/02244_casewithexpression_return_type.sql new file mode 100644 index 00000000000..02557a3ddfa --- /dev/null +++ b/tests/queries/0_stateless/02244_casewithexpression_return_type.sql @@ -0,0 +1,12 @@ + SELECT "number", CASE "number" + WHEN 3 THEN 55 + WHEN 6 THEN 77 + WHEN 9 THEN 95 + ELSE CASE + WHEN "number"=1 THEN 10 + WHEN "number"=10 THEN 100 + ELSE 555555 + END + END AS "LONG_COL_0" + FROM `system`.numbers + LIMIT 20; diff --git a/tests/queries/0_stateless/02244_lowcardinality_hash_join.reference b/tests/queries/0_stateless/02244_lowcardinality_hash_join.reference new file mode 100644 index 00000000000..d89bbd39cdc --- /dev/null +++ b/tests/queries/0_stateless/02244_lowcardinality_hash_join.reference @@ -0,0 +1,4 @@ +x x +x x +x x +x x diff --git a/tests/queries/0_stateless/02244_lowcardinality_hash_join.sql b/tests/queries/0_stateless/02244_lowcardinality_hash_join.sql new file mode 100644 index 00000000000..f2a601adf06 --- /dev/null +++ b/tests/queries/0_stateless/02244_lowcardinality_hash_join.sql @@ -0,0 +1,27 @@ +-- Tags: no-parallel +DROP TABLE IF EXISTS lc_table; + +CREATE TABLE lc_table +( + col LowCardinality(String) +) ENGINE=TinyLog; + +INSERT INTO lc_table VALUES('x'); + +SELECT * +FROM lc_table +INNER JOIN lc_table AS lc_table2 ON lc_table.col = lc_table2.col; + +SELECT * +FROM lc_table +INNER JOIN lc_table AS lc_table2 ON CAST(lc_table.col AS String) = CAST(lc_table2.col AS String); + +SELECT * +FROM lc_table +INNER JOIN lc_table AS lc_table2 ON (lc_table.col = lc_table2.col) OR (lc_table.col = lc_table2.col); + +SELECT * +FROM lc_table +INNER JOIN lc_table AS lc_table2 ON (CAST(lc_table.col AS String) = CAST(lc_table2.col AS String)) OR (CAST(lc_table.col AS String) = CAST(lc_table2.col AS String)); + +DROP TABLE IF EXISTS lc_table; diff --git a/tests/queries/0_stateless/02245_weird_partitions_pruning.reference b/tests/queries/0_stateless/02245_weird_partitions_pruning.reference new file mode 100644 index 00000000000..cf406b417b4 --- /dev/null +++ b/tests/queries/0_stateless/02245_weird_partitions_pruning.reference @@ -0,0 +1,14 @@ +202112-0 (202112,0) +202201-0 (202201,0) +202301-0 (202301,0) +202112-0 2021-12-31 22:30:00 2021-12-31 22:30:00 2021-12-31 14:30:00 2021-12-31 14:30:00 1000 +202201-0 2022-01-01 00:30:00 2022-01-31 22:30:00 2021-12-31 16:30:00 2022-01-31 14:30:00 2000 +202301-0 2023-01-31 22:30:00 2023-01-31 22:30:00 2023-01-31 14:30:00 2023-01-31 14:30:00 1000 +202112-0 +default weird_partitions_02245 1 1000 1 +202201-0 +default weird_partitions_02245 1 2000 1 +202112-0 +202201-0 +default weird_partitions_02245 2 3000 2 +default weird_partitions_02245 0 0 0 diff --git a/tests/queries/0_stateless/02245_weird_partitions_pruning.sql b/tests/queries/0_stateless/02245_weird_partitions_pruning.sql new file mode 100644 index 00000000000..6273a9f3d59 --- /dev/null +++ b/tests/queries/0_stateless/02245_weird_partitions_pruning.sql @@ -0,0 +1,61 @@ +-- We use a hack - partition by ignore(d1). In some cases there are two columns +-- not fully correlated (<1) (date_begin - date_end or datetime - datetime_in_TZ_with_DST) +-- If we partition by these columns instead of one it will be twice more partitions. +-- Partition by (.., ignore(d1)) allows to partition by the first column but build +-- min_max indexes for both column, so partition pruning works for both columns. +-- It's very similar to min_max skip index but gives bigger performance boost, +-- because partition pruning happens on very early query stage. + + +DROP TABLE IF EXISTS weird_partitions_02245; + +CREATE TABLE weird_partitions_02245(d DateTime, d1 DateTime default d - toIntervalHour(8), id Int64) +Engine=MergeTree +PARTITION BY (toYYYYMM(toDateTime(d)), ignore(d1)) +ORDER BY id; + +INSERT INTO weird_partitions_02245(d, id) +SELECT + toDateTime('2021-12-31 22:30:00') AS d, + number +FROM numbers(1000); + +INSERT INTO weird_partitions_02245(d, id) +SELECT + toDateTime('2022-01-01 00:30:00') AS d, + number +FROM numbers(1000); + +INSERT INTO weird_partitions_02245(d, id) +SELECT + toDateTime('2022-01-31 22:30:00') AS d, + number +FROM numbers(1000); + +INSERT INTO weird_partitions_02245(d, id) +SELECT + toDateTime('2023-01-31 22:30:00') AS d, + number +FROM numbers(1000); + +OPTIMIZE TABLE weird_partitions_02245; +OPTIMIZE TABLE weird_partitions_02245; + +SELECT DISTINCT _partition_id, _partition_value FROM weird_partitions_02245 ORDER BY _partition_id ASC; + +SELECT _partition_id, min(d), max(d), min(d1), max(d1), count() FROM weird_partitions_02245 GROUP BY _partition_id ORDER BY _partition_id ASC; + +select DISTINCT _partition_id from weird_partitions_02245 where d >= '2021-12-31 00:00:00' and d < '2022-01-01 00:00:00' ORDER BY _partition_id; +explain estimate select DISTINCT _partition_id from weird_partitions_02245 where d >= '2021-12-31 00:00:00' and d < '2022-01-01 00:00:00'; + +select DISTINCT _partition_id from weird_partitions_02245 where d >= '2022-01-01 00:00:00' and d1 >= '2021-12-31 00:00:00' and d1 < '2022-01-01 00:00:00' ORDER BY _partition_id;; +explain estimate select DISTINCT _partition_id from weird_partitions_02245 where d >= '2022-01-01 00:00:00' and d1 >= '2021-12-31 00:00:00' and d1 < '2022-01-01 00:00:00'; + +select DISTINCT _partition_id from weird_partitions_02245 where d1 >= '2021-12-31 00:00:00' and d1 < '2022-01-01 00:00:00' ORDER BY _partition_id;; +explain estimate select DISTINCT _partition_id from weird_partitions_02245 where d1 >= '2021-12-31 00:00:00' and d1 < '2022-01-01 00:00:00'; + +select DISTINCT _partition_id from weird_partitions_02245 where d >= '2022-01-01 00:00:00' and d1 >= '2021-12-31 00:00:00' and d1 < '2020-01-01 00:00:00' ORDER BY _partition_id;; +explain estimate select DISTINCT _partition_id from weird_partitions_02245 where d >= '2022-01-01 00:00:00' and d1 >= '2021-12-31 00:00:00' and d1 < '2020-01-01 00:00:00'; + +DROP TABLE weird_partitions_02245; + diff --git a/tests/queries/0_stateless/data_arrow/case_insensitive_column_matching.arrow b/tests/queries/0_stateless/data_arrow/case_insensitive_column_matching.arrow new file mode 100644 index 00000000000..4350d5c3e49 Binary files /dev/null and b/tests/queries/0_stateless/data_arrow/case_insensitive_column_matching.arrow differ diff --git a/tests/queries/0_stateless/data_orc/case_insensitive_column_matching.orc b/tests/queries/0_stateless/data_orc/case_insensitive_column_matching.orc new file mode 100644 index 00000000000..136f9980064 Binary files /dev/null and b/tests/queries/0_stateless/data_orc/case_insensitive_column_matching.orc differ diff --git a/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet b/tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet similarity index 100% rename from tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet rename to tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet diff --git a/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet.columns b/tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet.columns similarity index 100% rename from tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet.columns rename to tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet.columns diff --git a/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql b/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql index 63a833af114..c7a34c493c9 100644 --- a/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql +++ b/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql @@ -1,4 +1,4 @@ --- Tags: replica, distributed +-- Tags: replica, distributed, no-random-settings SET max_parallel_replicas = 2; SELECT EventTime::DateTime('Asia/Dubai') FROM remote('127.0.0.{1|2}', test, hits) ORDER BY EventTime DESC LIMIT 10 diff --git a/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh b/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh index a1136a47319..d14a174d3a0 100755 --- a/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh +++ b/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh @@ -13,9 +13,9 @@ do $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()" echo "$format, false"; - $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ + $CLICKHOUSE_CLIENT --max_block_size=65505 --output_format_parallel_formatting=false -q \ "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \ - $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names FORMAT $format" + $CLICKHOUSE_CLIENT --max_block_size=65505 --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names FORMAT $format" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" @@ -23,9 +23,9 @@ do $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()" echo "$format, true"; - $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ + $CLICKHOUSE_CLIENT --max_block_size=65505 --output_format_parallel_formatting=false -q \ "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \ - $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names FORMAT $format" + $CLICKHOUSE_CLIENT --max_block_size=65505 --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names FORMAT $format" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" diff --git a/utils/check-style/check-black b/utils/check-style/check-black index 1f0be9375c2..45e7820469b 100755 --- a/utils/check-style/check-black +++ b/utils/check-style/check-black @@ -6,7 +6,7 @@ set -e GIT_ROOT=$(git rev-parse --show-cdup) GIT_ROOT=${GIT_ROOT:-.} tmp=$(mktemp) -if ! find "$GIT_ROOT" -name '*.py' -not -path "$GIT_ROOT/contrib/*" -exec black --check {} + 1>"$tmp" 2>&1; then +if ! find "$GIT_ROOT" -name '*.py' -not -path "$GIT_ROOT/contrib/*" -exec black --check --diff {} + 1>"$tmp" 2>&1; then # Show the result only if some files need formatting cat "$tmp" fi diff --git a/website/js/base.js b/website/js/base.js index 6704231c69d..9389028f1ef 100644 --- a/website/js/base.js +++ b/website/js/base.js @@ -70,15 +70,6 @@ (function (d, w, c) { (w[c] = w[c] || []).push(function() { var is_single_page = $('html').attr('data-single-page') === 'true'; - try { - w.yaCounter18343495 = new Ya.Metrika2({ - id: 18343495, - clickmap: !is_single_page, - trackLinks: !is_single_page, - accurateTrackBounce: !is_single_page, - webvisor: !is_single_page - }); - } catch(e) { } if (!is_single_page) { $('head').each(function(_, element) { @@ -91,21 +82,7 @@ }); } }); - - var n = d.getElementsByTagName("script")[0], - s = d.createElement("script"), - f = function () { n.parentNode.insertBefore(s, n); }; - s.type = "text/javascript"; - s.async = true; - s.src = "/js/metrika.js"; - if (window.location.hostname.endsWith('clickhouse.com')) { - if (w.opera == "[object Opera]") { - d.addEventListener("DOMContentLoaded", f, false); - } else { - f(); - } - } - })(document, window, "yandex_metrika_callbacks2"); + })(document, window, ""); var beforePrint = function() { var details = document.getElementsByTagName("details");