diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index a172947b2fc..5b47f94a324 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -7,6 +7,7 @@ env: "on": schedule: - cron: '13 3 * * *' + workflow_dispatch: jobs: DockerHubPushAarch64: diff --git a/CHANGELOG.md b/CHANGELOG.md index 61724ab2d0c..100b03ab92b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)). * Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)). +* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)). #### New Feature @@ -366,7 +367,7 @@ #### Improvement -* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. +* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. * Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Improve `Bool` type serialization and deserialization, check the range of values. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)). * If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)). diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ed3872fd6e..deef582c790 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -266,7 +266,7 @@ if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE)) endif () # Allows to build stripped binary in a separate directory -if (OBJCOPY_PATH AND READELF_PATH) +if (OBJCOPY_PATH AND STRIP_PATH) option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF) if (INSTALL_STRIPPED_BINARIES) set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information") diff --git a/cmake/strip.sh b/cmake/strip.sh deleted file mode 100755 index f85d82fab31..00000000000 --- a/cmake/strip.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash - -BINARY_PATH=$1 -BINARY_NAME=$(basename "$BINARY_PATH") -DESTINATION_STRIPPED_DIR=$2 -OBJCOPY_PATH=${3:objcopy} -READELF_PATH=${4:readelf} - -BUILD_ID=$($READELF_PATH -n "$1" | sed -n '/Build ID/ { s/.*: //p; q; }') -BUILD_ID_PREFIX=${BUILD_ID:0:2} -BUILD_ID_SUFFIX=${BUILD_ID:2} - -DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id" -DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin" - -mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX" -mkdir -p "$DESTINATION_STRIP_BINARY_DIR" - - -cp "$BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" - -$OBJCOPY_PATH --only-keep-debug --compress-debug-sections "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" -chmod 0644 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" -chown 0:0 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" - -strip --remove-section=.comment --remove-section=.note "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" - -$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" diff --git a/cmake/strip_binary.cmake b/cmake/strip_binary.cmake index e430807772d..1f24790a159 100644 --- a/cmake/strip_binary.cmake +++ b/cmake/strip_binary.cmake @@ -11,16 +11,43 @@ macro(clickhouse_strip_binary) message(FATAL_ERROR "A binary path name must be provided for stripping binary") endif() - if (NOT DEFINED STRIP_DESTINATION_DIR) message(FATAL_ERROR "Destination directory for stripped binary must be provided") endif() add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD - COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH} - COMMENT "Stripping clickhouse binary" VERBATIM + COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin" + COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin" + COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" + COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" + COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + COMMENT "Stripping clickhouse binary" VERBATIM ) install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) + install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse) +endmacro() + + +macro(clickhouse_make_empty_debug_info_for_nfpm) + set(oneValueArgs TARGET DESTINATION_DIR) + cmake_parse_arguments(EMPTY_DEBUG "" "${oneValueArgs}" "" ${ARGN}) + + if (NOT DEFINED EMPTY_DEBUG_TARGET) + message(FATAL_ERROR "A target name must be provided for stripping binary") + endif() + + if (NOT DEFINED EMPTY_DEBUG_DESTINATION_DIR) + message(FATAL_ERROR "Destination directory for empty debug must be provided") + endif() + + add_custom_command(TARGET ${EMPTY_DEBUG_TARGET} POST_BUILD + COMMAND mkdir -p "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug" + COMMAND touch "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" + COMMENT "Addiding empty debug info for NFPM" VERBATIM + ) + + install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}" COMPONENT clickhouse) endmacro() diff --git a/cmake/tools.cmake b/cmake/tools.cmake index d6fddd0509e..d571a46ad26 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -170,32 +170,32 @@ else () message (FATAL_ERROR "Cannot find objcopy.") endif () -# Readelf (FIXME copypaste) +# Strip (FIXME copypaste) if (COMPILER_GCC) - find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf") + find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-13" "llvm-strip-12" "llvm-strip-11" "strip") else () - find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf") + find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip") endif () -if (NOT READELF_PATH AND OS_DARWIN) +if (NOT STRIP_PATH AND OS_DARWIN) find_program (BREW_PATH NAMES "brew") if (BREW_PATH) execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX) if (LLVM_PREFIX) - find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) + find_program (STRIP_PATH NAMES "llvm-strip" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) endif () - if (NOT READELF_PATH) + if (NOT STRIP_PATH) execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX) if (BINUTILS_PREFIX) - find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) + find_program (STRIP_PATH NAMES "strip" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) endif () endif () endif () endif () -if (READELF_PATH) - message (STATUS "Using readelf: ${READELF_PATH}") +if (STRIP_PATH) + message (STATUS "Using strip: ${STRIP_PATH}") else () - message (FATAL_ERROR "Cannot find readelf.") + message (FATAL_ERROR "Cannot find strip.") endif () diff --git a/contrib/libxml2 b/contrib/libxml2 index 18890f471c4..a075d256fd9 160000 --- a/contrib/libxml2 +++ b/contrib/libxml2 @@ -1 +1 @@ -Subproject commit 18890f471c420411aa3c989e104d090966ec9dbf +Subproject commit a075d256fd9ff15590b86d981b75a50ead124fca diff --git a/docker/docs/check/Dockerfile b/docker/docs/check/Dockerfile index 174be123eed..4eb03a91e7a 100644 --- a/docker/docs/check/Dockerfile +++ b/docker/docs/check/Dockerfile @@ -1,4 +1,3 @@ -# rebuild in #33610 # docker build -t clickhouse/docs-check . ARG FROM_TAG=latest FROM clickhouse/docs-builder:$FROM_TAG diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 4af74d3ba54..3cef5b008db 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -131,9 +131,6 @@ function start() # use root to match with current uid clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log sleep 0.5 - cat /var/log/clickhouse-server/stdout.log - tail -n200 /var/log/clickhouse-server/stderr.log - tail -n200 /var/log/clickhouse-server/clickhouse-server.log counter=$((counter + 1)) done @@ -211,14 +208,12 @@ stop start clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \ - || echo -e 'Server failed to start\tFAIL' >> /test_output/test_results.tsv + || (echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt) [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL" [ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL" -# Print Fatal log messages to stdout -zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* - # Grep logs for sanitizer asserts, crashes and other critical errors # Sanitizer asserts @@ -235,20 +230,26 @@ zgrep -Fa " Application: Child process was terminated by signal 9" /var/ || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv # Logical errors -zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ - && echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ +zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /test_output/logical_errors.txt \ + && echo -e 'Logical error thrown (see clickhouse-server.log or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv +# Remove file logical_errors.txt if it's empty +[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt + # Crash zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv # It also checks for crash without stacktrace (printed by watchdog) -zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \ - && echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ +zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log* > /test_output/fatal_messages.txt \ + && echo -e 'Fatal message in clickhouse-server.log (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv +# Remove file fatal_messages.txt if it's empty +[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt + zgrep -Fa "########################################" /test_output/* > /dev/null \ && echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv @@ -259,12 +260,12 @@ echo -e "Backward compatibility check\n" echo "Download previous release server" mkdir previous_release_package_folder -clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'Download script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv +clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv if [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] then - echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/backward_compatibility_check_results.tsv + echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/test_results.tsv stop # Uninstall current packages @@ -290,8 +291,8 @@ then mkdir tmp_stress_output ./stress --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \ - && echo -e 'Test script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'Test script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv + && echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv rm -rf tmp_stress_output clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" @@ -301,8 +302,9 @@ then # Start new server configure start 500 - clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'Server failed to start\tFAIL' >> /test_output/backward_compatibility_check_results.tsv + clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ + || (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \ + && grep -Fa ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt) clickhouse-client --query="SELECT 'Server version: ', version()" @@ -312,10 +314,12 @@ then stop # Error messages (we should ignore some errors) + echo "Check for Error messages in server log:" zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ -e "Code: 236. DB::Exception: Cancelled mutating parts" \ -e "REPLICA_IS_ALREADY_ACTIVE" \ -e "REPLICA_IS_ALREADY_EXIST" \ + -e "ALL_REPLICAS_LOST" \ -e "DDLWorker: Cannot parse DDL task query" \ -e "RaftInstance: failed to accept a rpc connection due to error 125" \ -e "UNKNOWN_DATABASE" \ @@ -328,47 +332,53 @@ then -e "Code: 1000, e.code() = 111, Connection refused" \ -e "UNFINISHED" \ -e "Renaming unexpected part" \ - /var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "" > /dev/null \ - && echo -e 'Error message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No Error messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv + /var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ + && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_error_messages.txt if it's empty + [ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt # Sanitizer asserts zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \ - && echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No sanitizer asserts\tOK' >> /test_output/backward_compatibility_check_results.tsv + && echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv rm -f /test_output/tmp # OOM zgrep -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ - && echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv + && echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv # Logical errors - zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ - && echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No logical errors\tOK' >> /test_output/backward_compatibility_check_results.tsv + echo "Check for Logical errors in server log:" + zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_logical_errors.txt \ + && echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_logical_errors.txt if it's empty + [ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt # Crash zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ - && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'Not crashed\tOK' >> /test_output/backward_compatibility_check_results.tsv + && echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv # It also checks for crash without stacktrace (printed by watchdog) - zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log > /dev/null \ - && echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \ - || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv + echo "Check for Fatal message in server log:" + zgrep -Fa " " /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_fatal_messages.txt \ + && echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ + || echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + + # Remove file bc_check_fatal_messages.txt if it's empty + [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt else - echo -e "Failed to download previous release packets\tFAIL" >> /test_output/backward_compatibility_check_results.tsv + echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv fi -zgrep -Fa "FAIL" /test_output/backward_compatibility_check_results.tsv > /dev/null \ - && echo -e 'Backward compatibility check\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check\tOK' >> /test_output/test_results.tsv - - # Put logs into /test_output/ for log_file in /var/log/clickhouse-server/clickhouse-server.log* do diff --git a/docs/en/engines/table-engines/integrations/hive.md b/docs/en/engines/table-engines/integrations/hive.md index b804b9c2279..61147467690 100644 --- a/docs/en/engines/table-engines/integrations/hive.md +++ b/docs/en/engines/table-engines/integrations/hive.md @@ -137,7 +137,7 @@ CREATE TABLE test.test_orc `f_array_array_float` Array(Array(Float32)), `day` String ) -ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc') PARTITION BY day ``` diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 9c7fab7424d..ad199ce452e 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -195,5 +195,6 @@ toc_title: Adopters | ООО «МПЗ Богородский» | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) | | ДомКлик | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) | | АС "Стрела" | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) | +| Piwik PRO | Web Analytics | — | — | — | [Official website, Dec 2018](https://piwik.pro/blog/piwik-pro-clickhouse-faster-efficient-reports/) | [Original article](https://clickhouse.com/docs/en/introduction/adopters/) diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md index 279204a8af1..9aa6419d89c 100644 --- a/docs/en/operations/caches.md +++ b/docs/en/operations/caches.md @@ -5,7 +5,7 @@ toc_title: Caches # Cache Types {#cache-types} -When performing queries, ClichHouse uses different caches. +When performing queries, ClickHouse uses different caches. Main cache types: diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 8bf1a5f477c..c48a70b0909 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -10,7 +10,7 @@ cssmin==0.2.0 future==0.18.2 htmlmin==0.1.12 idna==2.10 -Jinja2>=3.0.3 +Jinja2==3.0.3 jinja2-highlight==0.6.1 jsmin==3.0.0 livereload==2.6.3 diff --git a/docs/zh/engines/table-engines/integrations/hive.md b/docs/zh/engines/table-engines/integrations/hive.md index aa2c82d902a..24e0834d2fc 100644 --- a/docs/zh/engines/table-engines/integrations/hive.md +++ b/docs/zh/engines/table-engines/integrations/hive.md @@ -140,7 +140,7 @@ CREATE TABLE test.test_orc `f_array_array_float` Array(Array(Float32)), `day` String ) -ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc') +ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc') PARTITION BY day ``` diff --git a/docs/zh/operations/system-tables/functions.md b/docs/zh/operations/system-tables/functions.md index 695c7b7fee1..75df1f65c1f 100644 --- a/docs/zh/operations/system-tables/functions.md +++ b/docs/zh/operations/system-tables/functions.md @@ -15,7 +15,7 @@ ``` ┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐ │ sumburConsistentHash │ 0 │ 0 │ │ -│ yandexConsistentHash │ 0 │ 0 │ │ +│ kostikConsistentHash │ 0 │ 0 │ │ │ demangle │ 0 │ 0 │ │ │ addressToLine │ 0 │ 0 │ │ │ JSONExtractRaw │ 0 │ 0 │ │ diff --git a/packages/clickhouse-common-static-dbg.yaml b/packages/clickhouse-common-static-dbg.yaml index 1213f4215c8..12a1594bd30 100644 --- a/packages/clickhouse-common-static-dbg.yaml +++ b/packages/clickhouse-common-static-dbg.yaml @@ -21,8 +21,12 @@ description: | This package contains the debugging symbols for clickhouse-common. contents: -- src: root/usr/lib/debug - dst: /usr/lib/debug +- src: root/usr/lib/debug/usr/bin/clickhouse.debug + dst: /usr/lib/debug/usr/bin/clickhouse.debug +- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug + dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug +- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug + dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug # docs - src: ../AUTHORS dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 1e2420021b6..cca7be97b61 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -473,18 +473,11 @@ else () if (INSTALL_STRIPPED_BINARIES) clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse) else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT}) install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() endif() -if (NOT INSTALL_STRIPPED_BINARIES) - # Install dunny debug directory - # TODO: move logic to every place where clickhouse_strip_binary is used - add_custom_command(TARGET clickhouse POST_BUILD COMMAND echo > .empty ) - install(FILES "${CMAKE_CURRENT_BINARY_DIR}/.empty" DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/.empty) -endif() - - if (ENABLE_TESTS) set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms) add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS}) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 92bb5dc45a3..9491d503fbf 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -137,5 +137,10 @@ if (BUILD_STANDALONE_KEEPER) add_dependencies(clickhouse-keeper clickhouse_keeper_configs) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) - install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + if (INSTALL_STRIPPED_BINARIES) + clickhouse_strip_binary(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-keeper) + else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) + install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + endif() endif() diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index aded9664b35..90ce3d8be7f 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -27,5 +27,6 @@ set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECT if (INSTALL_STRIPPED_BINARIES) clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge) else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 50a8bb629c8..b530e08ca26 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -42,6 +42,7 @@ endif() if (INSTALL_STRIPPED_BINARIES) clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge) else() + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index aaffe85ae2e..13d39980e1c 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -29,15 +29,15 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover( time_t decrease_error_period_, size_t max_error_cap_) : Base(std::move(nested_pools_), decrease_error_period_, max_error_cap_, &Poco::Logger::get("ConnectionPoolWithFailover")) - , default_load_balancing(load_balancing) + , get_priority_load_balancing(load_balancing) { const std::string & local_hostname = getFQDNOrHostName(); - hostname_differences.resize(nested_pools.size()); + get_priority_load_balancing.hostname_differences.resize(nested_pools.size()); for (size_t i = 0; i < nested_pools.size(); ++i) { ConnectionPool & connection_pool = dynamic_cast(*nested_pools[i]); - hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost()); + get_priority_load_balancing.hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost()); } } @@ -51,36 +51,15 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts }; size_t offset = 0; + LoadBalancing load_balancing = get_priority_load_balancing.load_balancing; if (settings) - offset = settings->load_balancing_first_offset % nested_pools.size(); - GetPriorityFunc get_priority; - switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing) { - case LoadBalancing::NEAREST_HOSTNAME: - get_priority = [&](size_t i) { return hostname_differences[i]; }; - break; - case LoadBalancing::IN_ORDER: - get_priority = [](size_t i) { return i; }; - break; - case LoadBalancing::RANDOM: - break; - case LoadBalancing::FIRST_OR_RANDOM: - get_priority = [offset](size_t i) -> size_t { return i != offset; }; - break; - case LoadBalancing::ROUND_ROBIN: - if (last_used >= nested_pools.size()) - last_used = 0; - ++last_used; - /* Consider nested_pools.size() equals to 5 - * last_used = 1 -> get_priority: 0 1 2 3 4 - * last_used = 2 -> get_priority: 4 0 1 2 3 - * last_used = 3 -> get_priority: 4 3 0 1 2 - * ... - * */ - get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; }; - break; + offset = settings->load_balancing_first_offset % nested_pools.size(); + load_balancing = LoadBalancing(settings->load_balancing); } + GetPriorityFunc get_priority = get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size()); + UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0; bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true; @@ -173,38 +152,14 @@ std::vector ConnectionPoolWithFailover::g ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings) { size_t offset = 0; + LoadBalancing load_balancing = get_priority_load_balancing.load_balancing; if (settings) - offset = settings->load_balancing_first_offset % nested_pools.size(); - - GetPriorityFunc get_priority; - switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing) { - case LoadBalancing::NEAREST_HOSTNAME: - get_priority = [&](size_t i) { return hostname_differences[i]; }; - break; - case LoadBalancing::IN_ORDER: - get_priority = [](size_t i) { return i; }; - break; - case LoadBalancing::RANDOM: - break; - case LoadBalancing::FIRST_OR_RANDOM: - get_priority = [offset](size_t i) -> size_t { return i != offset; }; - break; - case LoadBalancing::ROUND_ROBIN: - if (last_used >= nested_pools.size()) - last_used = 0; - ++last_used; - /* Consider nested_pools.size() equals to 5 - * last_used = 1 -> get_priority: 0 1 2 3 4 - * last_used = 2 -> get_priority: 5 0 1 2 3 - * last_used = 3 -> get_priority: 5 4 0 1 2 - * ... - * */ - get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; }; - break; + offset = settings->load_balancing_first_offset % nested_pools.size(); + load_balancing = LoadBalancing(settings->load_balancing); } - return get_priority; + return get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size()); } std::vector ConnectionPoolWithFailover::getManyImpl( diff --git a/src/Client/ConnectionPoolWithFailover.h b/src/Client/ConnectionPoolWithFailover.h index 4e47905aae6..df7dd572ef3 100644 --- a/src/Client/ConnectionPoolWithFailover.h +++ b/src/Client/ConnectionPoolWithFailover.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -109,9 +110,7 @@ private: GetPriorityFunc makeGetPriorityFunc(const Settings * settings); - std::vector hostname_differences; /// Distances from name of this host to the names of hosts of pools. - size_t last_used = 0; /// Last used for round_robin policy. - LoadBalancing default_load_balancing; + GetPriorityForLoadBalancing get_priority_load_balancing; }; using ConnectionPoolWithFailoverPtr = std::shared_ptr; diff --git a/src/Columns/MaskOperations.cpp b/src/Columns/MaskOperations.cpp index 64d90ae82cb..9e2d02253be 100644 --- a/src/Columns/MaskOperations.cpp +++ b/src/Columns/MaskOperations.cpp @@ -83,11 +83,20 @@ size_t extractMaskNumericImpl( const PaddedPODArray * null_bytemap, PaddedPODArray * nulls) { + if constexpr (!column_is_short) + { + if (data.size() != mask.size()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask"); + } + size_t ones_count = 0; size_t data_index = 0; - size_t mask_size = mask.size(); - for (size_t i = 0; i != mask_size; ++i) + size_t mask_size = mask.size(); + size_t data_size = data.size(); + + size_t i = 0; + for (; i != mask_size && data_index != data_size; ++i) { // Change mask only where value is 1. if (!mask[i]) @@ -120,6 +129,13 @@ size_t extractMaskNumericImpl( mask[i] = value; } + + if constexpr (column_is_short) + { + if (data_index != data_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask"); + } + return ones_count; } diff --git a/src/Common/FiberStack.h b/src/Common/FiberStack.h index 29f84ee6d85..c55608311d0 100644 --- a/src/Common/FiberStack.h +++ b/src/Common/FiberStack.h @@ -31,8 +31,8 @@ public: /// probably it worth to try to increase stack size for coroutines. /// /// Current value is just enough for all tests in our CI. It's not selected in some special - /// way. We will have 40 pages with 4KB page size. - static constexpr size_t default_stack_size = 192 * 1024; /// 64KB was not enough for tests + /// way. We will have 80 pages with 4KB page size. + static constexpr size_t default_stack_size = 320 * 1024; /// 64KB was not enough for tests explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_) { diff --git a/src/Common/GetPriorityForLoadBalancing.cpp b/src/Common/GetPriorityForLoadBalancing.cpp new file mode 100644 index 00000000000..d8e7566e891 --- /dev/null +++ b/src/Common/GetPriorityForLoadBalancing.cpp @@ -0,0 +1,49 @@ +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +std::function GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const +{ + std::function get_priority; + switch (load_balance) + { + case LoadBalancing::NEAREST_HOSTNAME: + if (hostname_differences.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "It's a bug: hostname_differences is not initialized"); + get_priority = [&](size_t i) { return hostname_differences[i]; }; + break; + case LoadBalancing::IN_ORDER: + get_priority = [](size_t i) { return i; }; + break; + case LoadBalancing::RANDOM: + break; + case LoadBalancing::FIRST_OR_RANDOM: + get_priority = [offset](size_t i) -> size_t { return i != offset; }; + break; + case LoadBalancing::ROUND_ROBIN: + if (last_used >= pool_size) + last_used = 0; + ++last_used; + /* Consider pool_size equals to 5 + * last_used = 1 -> get_priority: 0 1 2 3 4 + * last_used = 2 -> get_priority: 4 0 1 2 3 + * last_used = 3 -> get_priority: 4 3 0 1 2 + * ... + * */ + get_priority = [&](size_t i) + { + ++i; + return i < last_used ? pool_size - i : i - last_used; + }; + break; + } + return get_priority; +} + +} diff --git a/src/Common/GetPriorityForLoadBalancing.h b/src/Common/GetPriorityForLoadBalancing.h new file mode 100644 index 00000000000..e57b02b5e90 --- /dev/null +++ b/src/Common/GetPriorityForLoadBalancing.h @@ -0,0 +1,34 @@ +#pragma once + +#include + +namespace DB +{ + +class GetPriorityForLoadBalancing +{ +public: + GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {} + GetPriorityForLoadBalancing(){} + + bool operator == (const GetPriorityForLoadBalancing & other) const + { + return load_balancing == other.load_balancing && hostname_differences == other.hostname_differences; + } + + bool operator != (const GetPriorityForLoadBalancing & other) const + { + return !(*this == other); + } + + std::function getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const; + + std::vector hostname_differences; /// Distances from name of this host to the names of hosts of pools. + + LoadBalancing load_balancing = LoadBalancing::RANDOM; + +private: + mutable size_t last_used = 0; /// Last used for round_robin policy. +}; + +} diff --git a/src/Common/IntervalKind.cpp b/src/Common/IntervalKind.cpp index 69b56be48ac..1478b832282 100644 --- a/src/Common/IntervalKind.cpp +++ b/src/Common/IntervalKind.cpp @@ -13,6 +13,9 @@ Int32 IntervalKind::toAvgSeconds() const { switch (kind) { + case IntervalKind::Nanosecond: return 0; /// fractional parts of seconds have 0 seconds + case IntervalKind::Microsecond: return 0; + case IntervalKind::Millisecond: return 0; case IntervalKind::Second: return 1; case IntervalKind::Minute: return 60; case IntervalKind::Hour: return 3600; @@ -52,6 +55,9 @@ const char * IntervalKind::toKeyword() const { switch (kind) { + case IntervalKind::Nanosecond: return "NANOSECOND"; + case IntervalKind::Microsecond: return "MICROSECOND"; + case IntervalKind::Millisecond: return "MILLISECOND"; case IntervalKind::Second: return "SECOND"; case IntervalKind::Minute: return "MINUTE"; case IntervalKind::Hour: return "HOUR"; @@ -69,6 +75,9 @@ const char * IntervalKind::toLowercasedKeyword() const { switch (kind) { + case IntervalKind::Nanosecond: return "nanosecond"; + case IntervalKind::Microsecond: return "microsecond"; + case IntervalKind::Millisecond: return "millisecond"; case IntervalKind::Second: return "second"; case IntervalKind::Minute: return "minute"; case IntervalKind::Hour: return "hour"; @@ -86,6 +95,12 @@ const char * IntervalKind::toDateDiffUnit() const { switch (kind) { + case IntervalKind::Nanosecond: + return "nanosecond"; + case IntervalKind::Microsecond: + return "microsecond"; + case IntervalKind::Millisecond: + return "millisecond"; case IntervalKind::Second: return "second"; case IntervalKind::Minute: @@ -111,6 +126,12 @@ const char * IntervalKind::toNameOfFunctionToIntervalDataType() const { switch (kind) { + case IntervalKind::Nanosecond: + return "toIntervalNanosecond"; + case IntervalKind::Microsecond: + return "toIntervalMicrosecond"; + case IntervalKind::Millisecond: + return "toIntervalMillisecond"; case IntervalKind::Second: return "toIntervalSecond"; case IntervalKind::Minute: @@ -136,6 +157,12 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const { switch (kind) { + case IntervalKind::Nanosecond: + return "toNanosecond"; + case IntervalKind::Microsecond: + return "toMicrosecond"; + case IntervalKind::Millisecond: + return "toMillisecond"; case IntervalKind::Second: return "toSecond"; case IntervalKind::Minute: @@ -162,6 +189,21 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const bool IntervalKind::tryParseString(const std::string & kind, IntervalKind::Kind & result) { + if ("nanosecond" == kind) + { + result = IntervalKind::Nanosecond; + return true; + } + if ("microsecond" == kind) + { + result = IntervalKind::Microsecond; + return true; + } + if ("millisecond" == kind) + { + result = IntervalKind::Millisecond; + return true; + } if ("second" == kind) { result = IntervalKind::Second; diff --git a/src/Common/IntervalKind.h b/src/Common/IntervalKind.h index aab0bb79be5..d5f2b5672cd 100644 --- a/src/Common/IntervalKind.h +++ b/src/Common/IntervalKind.h @@ -10,6 +10,9 @@ struct IntervalKind { enum Kind { + Nanosecond, + Microsecond, + Millisecond, Second, Minute, Hour, @@ -61,6 +64,9 @@ struct IntervalKind /// NOLINTNEXTLINE #define FOR_EACH_INTERVAL_KIND(M) \ + M(Nanosecond) \ + M(Microsecond) \ + M(Millisecond) \ M(Second) \ M(Minute) \ M(Hour) \ diff --git a/src/Common/RadixSort.h b/src/Common/RadixSort.h index 944ab860355..2f02ebb9e03 100644 --- a/src/Common/RadixSort.h +++ b/src/Common/RadixSort.h @@ -515,6 +515,11 @@ public: radixSortLSDInternal(arr, size, false, nullptr); } + static void executeLSD(Element * arr, size_t size, bool reverse) + { + radixSortLSDInternal(arr, size, reverse, nullptr); + } + /** This function will start to sort inplace (modify 'arr') * but on the last step it will write result directly to the destination * instead of finishing sorting 'arr'. diff --git a/src/Common/ZooKeeper/CMakeLists.txt b/src/Common/ZooKeeper/CMakeLists.txt index 34ebad9bb50..a9a335d1461 100644 --- a/src/Common/ZooKeeper/CMakeLists.txt +++ b/src/Common/ZooKeeper/CMakeLists.txt @@ -22,7 +22,6 @@ target_link_libraries (clickhouse_common_zookeeper_no_log PRIVATE string_utils ) - if (ENABLE_EXAMPLES) add_subdirectory(examples) endif() diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index b1574341c40..118789c0ffc 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -5,15 +5,15 @@ #include #include -#include -#include #include -#include +#include #include #include +#include #include +#include #define ZOOKEEPER_CONNECTION_TIMEOUT_MS 1000 @@ -48,7 +48,7 @@ static void check(Coordination::Error code, const std::string & path) void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, - int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_) + int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_) { log = &Poco::Logger::get("ZooKeeper"); hosts = hosts_; @@ -57,6 +57,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ operation_timeout_ms = operation_timeout_ms_; chroot = chroot_; implementation = implementation_; + get_priority_load_balancing = get_priority_load_balancing_; if (implementation == "zookeeper") { @@ -66,14 +67,13 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ Coordination::ZooKeeper::Nodes nodes; nodes.reserve(hosts.size()); - Strings shuffled_hosts = hosts; /// Shuffle the hosts to distribute the load among ZooKeeper nodes. - pcg64 generator(randomSeed()); - std::shuffle(shuffled_hosts.begin(), shuffled_hosts.end(), generator); + std::vector shuffled_hosts = shuffleHosts(); bool dns_error = false; - for (auto & host_string : shuffled_hosts) + for (auto & host : shuffled_hosts) { + auto & host_string = host.host; try { bool secure = bool(startsWith(host_string, "secure://")); @@ -81,6 +81,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ if (secure) host_string.erase(0, strlen("secure://")); + LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, Poco::Net::SocketAddress{host_string}.toString()); nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure}); } catch (const Poco::Net::HostNotFoundException & e) @@ -154,23 +155,47 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_ } } +std::vector ZooKeeper::shuffleHosts() const +{ + std::function get_priority = get_priority_load_balancing.getPriorityFunc(get_priority_load_balancing.load_balancing, 0, hosts.size()); + std::vector shuffle_hosts; + for (size_t i = 0; i < hosts.size(); ++i) + { + ShuffleHost shuffle_host; + shuffle_host.host = hosts[i]; + if (get_priority) + shuffle_host.priority = get_priority(i); + shuffle_host.randomize(); + shuffle_hosts.emplace_back(shuffle_host); + } + + std::sort( + shuffle_hosts.begin(), shuffle_hosts.end(), + [](const ShuffleHost & lhs, const ShuffleHost & rhs) + { + return ShuffleHost::compare(lhs, rhs); + }); + + return shuffle_hosts; +} + ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_, - std::shared_ptr zk_log_) + std::shared_ptr zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_) { zk_log = std::move(zk_log_); Strings hosts_strings; splitInto<','>(hosts_strings, hosts_string); - init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); + init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_); } ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_, - std::shared_ptr zk_log_) + std::shared_ptr zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_) { zk_log = std::move(zk_log_); - init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); + init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_); } struct ZooKeeperArgs @@ -213,6 +238,15 @@ struct ZooKeeperArgs { implementation = config.getString(config_name + "." + key); } + else if (key == "zookeeper_load_balancing") + { + String load_balancing_str = config.getString(config_name + "." + key); + /// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`) + auto load_balancing = magic_enum::enum_cast(Poco::toUpper(load_balancing_str)); + if (!load_balancing) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str); + get_priority_load_balancing.load_balancing = *load_balancing; + } else throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); } @@ -224,6 +258,15 @@ struct ZooKeeperArgs if (chroot.back() == '/') chroot.pop_back(); } + + /// init get_priority_load_balancing + get_priority_load_balancing.hostname_differences.resize(hosts.size()); + const String & local_hostname = getFQDNOrHostName(); + for (size_t i = 0; i < hosts.size(); ++i) + { + const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':')); + get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host); + } } Strings hosts; @@ -232,13 +275,14 @@ struct ZooKeeperArgs int operation_timeout_ms; std::string chroot; std::string implementation; + GetPriorityForLoadBalancing get_priority_load_balancing; }; ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr zk_log_) : zk_log(std::move(zk_log_)) { ZooKeeperArgs args(config, config_name); - init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot); + init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing); } bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) const @@ -249,8 +293,11 @@ bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, if (args.implementation == implementation && implementation == "testkeeper") return false; - return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot) - != std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot); + if (args.get_priority_load_balancing != get_priority_load_balancing) + return true; + + return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing) + != std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, args.get_priority_load_balancing); } @@ -757,7 +804,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition & ZooKeeperPtr ZooKeeper::startNewSession() const { - return std::make_shared(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log); + return std::make_shared(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log, get_priority_load_balancing); } diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 6d0f8a438b1..f901a79591f 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -13,7 +13,10 @@ #include #include #include +#include +#include #include +#include namespace ProfileEvents @@ -37,6 +40,25 @@ namespace zkutil /// Preferred size of multi() command (in number of ops) constexpr size_t MULTI_BATCH_SIZE = 100; +struct ShuffleHost +{ + String host; + Int64 priority = 0; + UInt32 random = 0; + + void randomize() + { + random = thread_local_rng(); + } + + static bool compare(const ShuffleHost & lhs, const ShuffleHost & rhs) + { + return std::forward_as_tuple(lhs.priority, lhs.random) + < std::forward_as_tuple(rhs.priority, rhs.random); + } +}; + +using GetPriorityForLoadBalancing = DB::GetPriorityForLoadBalancing; /// ZooKeeper session. The interface is substantially different from the usual libzookeeper API. /// @@ -58,14 +80,16 @@ public: int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, const std::string & chroot_ = "", const std::string & implementation_ = "zookeeper", - std::shared_ptr zk_log_ = nullptr); + std::shared_ptr zk_log_ = nullptr, + const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {}); explicit ZooKeeper(const Strings & hosts_, const std::string & identity_ = "", int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS, int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, const std::string & chroot_ = "", const std::string & implementation_ = "zookeeper", - std::shared_ptr zk_log_ = nullptr); + std::shared_ptr zk_log_ = nullptr, + const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {}); /** Config of the form: @@ -91,6 +115,8 @@ public: */ ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr zk_log_); + std::vector shuffleHosts() const; + /// Creates a new session with the same parameters. This method can be used for reconnecting /// after the session has expired. /// This object remains unchanged, and the new session is returned. @@ -284,7 +310,7 @@ private: friend class EphemeralNodeHolder; void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_, - int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_); + int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_); /// The following methods don't any throw exceptions but return error codes. Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created); @@ -311,6 +337,8 @@ private: Poco::Logger * log = nullptr; std::shared_ptr zk_log; + GetPriorityForLoadBalancing get_priority_load_balancing; + AtomicStopwatch session_uptime; }; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 0627a70193f..d3c993344b6 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -451,7 +451,7 @@ void ZooKeeper::connect( } else { - LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}", socket.peerAddress().toString(), session_id); + LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}{}", socket.peerAddress().toString(), session_id, fail_reasons.str()); } } diff --git a/src/Common/formatIPv6.h b/src/Common/formatIPv6.h index 1a65adae55b..d6efeed17e6 100644 --- a/src/Common/formatIPv6.h +++ b/src/Common/formatIPv6.h @@ -11,7 +11,7 @@ constexpr size_t IPV4_BINARY_LENGTH = 4; constexpr size_t IPV6_BINARY_LENGTH = 16; constexpr size_t IPV4_MAX_TEXT_LENGTH = 15; /// Does not count tail zero byte. -constexpr size_t IPV6_MAX_TEXT_LENGTH = 39; +constexpr size_t IPV6_MAX_TEXT_LENGTH = 45; /// Does not count tail zero byte. namespace DB { diff --git a/src/Common/isLocalAddress.cpp b/src/Common/isLocalAddress.cpp index d79e4cebd15..596fd4caad7 100644 --- a/src/Common/isLocalAddress.cpp +++ b/src/Common/isLocalAddress.cpp @@ -124,6 +124,7 @@ bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_ size_t getHostNameDifference(const std::string & local_hostname, const std::string & host) { + /// FIXME should we replace it with Levenstein distance? (we already have it in NamePrompter) size_t hostname_difference = 0; for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i) if (local_hostname[i] != host[i]) diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 5c93d6719fa..a7142ef7f2e 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -13,6 +13,7 @@ #include #include +#include namespace DB @@ -269,8 +270,18 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const } -const ColumnWithTypeAndName * Block::findByName(const std::string & name) const +const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool case_insensitive) const { + if (case_insensitive) + { + auto found = std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); }); + if (found == data.end()) + { + return nullptr; + } + return &*found; + } + auto it = index_by_name.find(name); if (index_by_name.end() == it) { @@ -280,19 +291,23 @@ const ColumnWithTypeAndName * Block::findByName(const std::string & name) const } -const ColumnWithTypeAndName & Block::getByName(const std::string & name) const +const ColumnWithTypeAndName & Block::getByName(const std::string & name, bool case_insensitive) const { - const auto * result = findByName(name); + const auto * result = findByName(name, case_insensitive); if (!result) - throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() - , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception( + "Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); return *result; } -bool Block::has(const std::string & name) const +bool Block::has(const std::string & name, bool case_insensitive) const { + if (case_insensitive) + return std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); }) + != data.end(); + return index_by_name.end() != index_by_name.find(name); } @@ -301,8 +316,8 @@ size_t Block::getPositionByName(const std::string & name) const { auto it = index_by_name.find(name); if (index_by_name.end() == it) - throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames() - , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception( + "Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); return it->second; } diff --git a/src/Core/Block.h b/src/Core/Block.h index 66e16b70f47..c5d3e1ae35a 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -60,21 +60,21 @@ public: ColumnWithTypeAndName & safeGetByPosition(size_t position); const ColumnWithTypeAndName & safeGetByPosition(size_t position) const; - ColumnWithTypeAndName* findByName(const std::string & name) + ColumnWithTypeAndName* findByName(const std::string & name, bool case_insensitive = false) { return const_cast( - const_cast(this)->findByName(name)); + const_cast(this)->findByName(name, case_insensitive)); } - const ColumnWithTypeAndName * findByName(const std::string & name) const; + const ColumnWithTypeAndName * findByName(const std::string & name, bool case_insensitive = false) const; - ColumnWithTypeAndName & getByName(const std::string & name) + ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false) { return const_cast( - const_cast(this)->getByName(name)); + const_cast(this)->getByName(name, case_insensitive)); } - const ColumnWithTypeAndName & getByName(const std::string & name) const; + const ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false) const; Container::iterator begin() { return data.begin(); } Container::iterator end() { return data.end(); } @@ -83,7 +83,7 @@ public: Container::const_iterator cbegin() const { return data.cbegin(); } Container::const_iterator cend() const { return data.cend(); } - bool has(const std::string & name) const; + bool has(const std::string & name, bool case_insensitive = false) const; size_t getPositionByName(const std::string & name) const; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 86ea202fda7..a13ac7a6527 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -616,11 +616,13 @@ class IColumn; M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \ M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \ - M(Bool, input_format_use_lowercase_column_name, false, "Use lowercase column name while reading input formats", 0) \ M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ + M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \ M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \ M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \ + M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \ M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \ + M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \ M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \ M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \ M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index ddd1c29785c..3f68038560c 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -149,4 +149,5 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, {"str", FormatSettings::MsgPackUUIDRepresentation::STR}, {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}}) + } diff --git a/src/DataTypes/DataTypeInterval.cpp b/src/DataTypes/DataTypeInterval.cpp index 57d071a8666..9faf0cec2d8 100644 --- a/src/DataTypes/DataTypeInterval.cpp +++ b/src/DataTypes/DataTypeInterval.cpp @@ -13,6 +13,9 @@ bool DataTypeInterval::equals(const IDataType & rhs) const void registerDataTypeInterval(DataTypeFactory & factory) { + factory.registerSimpleDataType("IntervalNanosecond", [] { return DataTypePtr(std::make_shared(IntervalKind::Nanosecond)); }); + factory.registerSimpleDataType("IntervalMicrosecond", [] { return DataTypePtr(std::make_shared(IntervalKind::Microsecond)); }); + factory.registerSimpleDataType("IntervalMillisecond", [] { return DataTypePtr(std::make_shared(IntervalKind::Millisecond)); }); factory.registerSimpleDataType("IntervalSecond", [] { return DataTypePtr(std::make_shared(IntervalKind::Second)); }); factory.registerSimpleDataType("IntervalMinute", [] { return DataTypePtr(std::make_shared(IntervalKind::Minute)); }); factory.registerSimpleDataType("IntervalHour", [] { return DataTypePtr(std::make_shared(IntervalKind::Hour)); }); diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index df504bc34a8..8f5e40de5b8 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -15,6 +15,8 @@ #include +#include + namespace DB { @@ -227,14 +229,17 @@ void validateArraySizes(const Block & block) } -std::unordered_set getAllTableNames(const Block & block) +std::unordered_set getAllTableNames(const Block & block, bool to_lower_case) { std::unordered_set nested_table_names; - for (auto & name : block.getNames()) + for (const auto & name : block.getNames()) { auto nested_table_name = Nested::extractTableName(name); + if (to_lower_case) + boost::to_lower(nested_table_name); + if (!nested_table_name.empty()) - nested_table_names.insert(nested_table_name); + nested_table_names.insert(std::move(nested_table_name)); } return nested_table_names; } diff --git a/src/DataTypes/NestedUtils.h b/src/DataTypes/NestedUtils.h index 2ca5c17dc74..f6dc42d5c58 100644 --- a/src/DataTypes/NestedUtils.h +++ b/src/DataTypes/NestedUtils.h @@ -32,7 +32,7 @@ namespace Nested void validateArraySizes(const Block & block); /// Get all nested tables names from a block. - std::unordered_set getAllTableNames(const Block & block); + std::unordered_set getAllTableNames(const Block & block, bool to_lower_case = false); } } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index d9d9f5b45f6..0c3cc56c061 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -88,6 +88,9 @@ DatabaseReplicated::DatabaseReplicated( /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it. if (zookeeper_path.front() != '/') zookeeper_path = "/" + zookeeper_path; + + if (!db_settings.collection_name.value.empty()) + fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef()); } String DatabaseReplicated::getFullReplicaName() const @@ -191,22 +194,36 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const shards.back().emplace_back(unescapeForFileName(host_port)); } - String username = db_settings.cluster_username; - String password = db_settings.cluster_password; UInt16 default_port = getContext()->getTCPPort(); - bool secure = db_settings.cluster_secure_connection; bool treat_local_as_remote = false; bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL; return std::make_shared( getContext()->getSettingsRef(), shards, - username, - password, + cluster_auth_info.cluster_username, + cluster_auth_info.cluster_password, default_port, treat_local_as_remote, treat_local_port_as_remote, - secure); + cluster_auth_info.cluster_secure_connection, + /*priority=*/1, + database_name, + cluster_auth_info.cluster_secret); +} + + +void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref) +{ + const auto & config_prefix = fmt::format("named_collections.{}", collection_name); + + if (!config_ref.has(config_prefix)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name); + + cluster_auth_info.cluster_username = config_ref.getString(config_prefix + ".cluster_username", ""); + cluster_auth_info.cluster_password = config_ref.getString(config_prefix + ".cluster_password", ""); + cluster_auth_info.cluster_secret = config_ref.getString(config_prefix + ".cluster_secret", ""); + cluster_auth_info.cluster_secure_connection = config_ref.getBool(config_prefix + ".cluster_secure_connection", false); } void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(bool force_attach) diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index fcb8a2c4d33..ac212e168b8 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -75,6 +75,16 @@ private: bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper); void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper); + struct + { + String cluster_username{"default"}; + String cluster_password; + String cluster_secret; + bool cluster_secure_connection{false}; + } cluster_auth_info; + + void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config); + void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const; void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr); diff --git a/src/Databases/DatabaseReplicatedSettings.h b/src/Databases/DatabaseReplicatedSettings.h index 0aff26712c0..8bed1ababf6 100644 --- a/src/Databases/DatabaseReplicatedSettings.h +++ b/src/Databases/DatabaseReplicatedSettings.h @@ -8,12 +8,11 @@ namespace DB class ASTStorage; #define LIST_OF_DATABASE_REPLICATED_SETTINGS(M) \ - M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \ + M(Float, max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \ M(UInt64, max_replication_lag_to_enqueue, 10, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \ M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \ - M(String, cluster_username, "default", "Username to use when connecting to hosts of cluster", 0) \ - M(String, cluster_password, "", "Password to use when connecting to hosts of cluster", 0) \ - M(Bool, cluster_secure_connection, false, "Enable TLS when connecting to hosts of cluster", 0) \ + M(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \ + DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS) diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index e46620d9d1f..e05ccef74c0 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -264,32 +265,6 @@ std::unique_ptr DiskS3::writeFile(const String & path, LOG_TRACE(log, "{} to file by path: {}. S3 path: {}", mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name); - ScheduleFunc schedule = [pool = &getThreadPoolWriter(), thread_group = CurrentThread::getGroup()](auto callback) - { - pool->scheduleOrThrow([callback = std::move(callback), thread_group]() - { - if (thread_group) - CurrentThread::attachTo(thread_group); - - SCOPE_EXIT_SAFE( - if (thread_group) - CurrentThread::detachQueryIfNotDetached(); - - /// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent. - /// Typically, it may be changes from Process to User. - /// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed. - /// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well. - /// When, finally, we destroy the thread (and the ThreadStatus), - /// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\ - /// and by this time user-level memory tracker may be already destroyed. - /// - /// As a work-around, reset memory tracker to total, which is always alive. - CurrentThread::get().memory_tracker.setParent(&total_memory_tracker); - ); - callback(); - }); - }; - auto s3_buffer = std::make_unique( settings->client, bucket, @@ -299,7 +274,7 @@ std::unique_ptr DiskS3::writeFile(const String & path, settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size, std::move(object_metadata), - buf_size, std::move(schedule)); + buf_size, threadPoolCallbackRunner(getThreadPoolWriter())); auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) { diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index f8636768d00..8b417498b6a 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -90,10 +90,10 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers; format_settings.json.quote_denormals = settings.output_format_json_quote_denormals; format_settings.null_as_default = settings.input_format_null_as_default; - format_settings.use_lowercase_column_name = settings.input_format_use_lowercase_column_name; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; format_settings.parquet.import_nested = settings.input_format_parquet_import_nested; + format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching; format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns; format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference; format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; @@ -131,6 +131,13 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference; + format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference; + format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching; + format_settings.orc.import_nested = settings.input_format_orc_import_nested; + format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns; + format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size; + format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_orc_skip_columns_with_unsupported_types_in_schema_inference; + format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching; format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields; format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode; format_settings.seekable_read = settings.input_format_allow_seeks; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 4b39d255110..d18549ee756 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -32,7 +32,6 @@ struct FormatSettings bool null_as_default = true; bool decimal_trailing_zeros = false; bool defaults_for_omitted_fields = true; - bool use_lowercase_column_name = false; bool seekable_read = true; UInt64 max_rows_to_read_for_schema_inference = 100; @@ -78,6 +77,7 @@ struct FormatSettings bool import_nested = false; bool allow_missing_columns = false; bool skip_columns_with_unsupported_types_in_schema_inference = false; + bool case_insensitive_column_matching = false; } arrow; struct @@ -142,6 +142,7 @@ struct FormatSettings bool import_nested = false; bool allow_missing_columns = false; bool skip_columns_with_unsupported_types_in_schema_inference = false; + bool case_insensitive_column_matching = false; } parquet; struct Pretty @@ -224,6 +225,7 @@ struct FormatSettings bool allow_missing_columns = false; int64_t row_batch_size = 100'000; bool skip_columns_with_unsupported_types_in_schema_inference = false; + bool case_insensitive_column_matching = false; } orc; /// For capnProto format we should determine how to diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index a7f06689820..bc1ae807e7d 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -41,6 +41,11 @@ namespace ErrorCodes throw Exception("Illegal type Date of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } + static inline UInt32 dateTimeIsNotSupported(const char * name) + { + throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + /// This factor transformation will say that the function is monotone everywhere. struct ZeroTransform { @@ -311,6 +316,133 @@ struct ToStartOfSecondImpl using FactorTransform = ZeroTransform; }; +struct ToStartOfMillisecondImpl +{ + static constexpr auto name = "toStartOfMillisecond"; + + static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) + { + // given that scale is 6, scale_multiplier is 1000000 + // for DateTime64 value of 123.456789: + // 123456789 - 789 = 123456000 + // for DateTime64 value of -123.456789: + // -123456789 - (1000 + (-789)) = -123457000 + + if (scale_multiplier == 1000) + { + return datetime64; + } + else if (scale_multiplier <= 1000) + { + return datetime64 * (1000 / scale_multiplier); + } + else + { + auto droppable_part_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier(datetime64, scale_multiplier / 1000); + + if (droppable_part_with_sign < 0) + droppable_part_with_sign += scale_multiplier; + + return datetime64 - droppable_part_with_sign; + } + } + + static inline UInt32 execute(UInt32, const DateLUTImpl &) + { + throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + static inline UInt32 execute(UInt16, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + + using FactorTransform = ZeroTransform; +}; + +struct ToStartOfMicrosecondImpl +{ + static constexpr auto name = "toStartOfMicrosecond"; + + static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) + { + // @see ToStartOfMillisecondImpl + + if (scale_multiplier == 1000000) + { + return datetime64; + } + else if (scale_multiplier <= 1000000) + { + return datetime64 * (1000000 / scale_multiplier); + } + else + { + auto droppable_part_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier(datetime64, scale_multiplier / 1000000); + + if (droppable_part_with_sign < 0) + droppable_part_with_sign += scale_multiplier; + + return datetime64 - droppable_part_with_sign; + } + } + + static inline UInt32 execute(UInt32, const DateLUTImpl &) + { + throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + static inline UInt32 execute(UInt16, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + + using FactorTransform = ZeroTransform; +}; + +struct ToStartOfNanosecondImpl +{ + static constexpr auto name = "toStartOfNanosecond"; + + static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &) + { + // @see ToStartOfMillisecondImpl + if (scale_multiplier == 1000000000) + { + return datetime64; + } + else if (scale_multiplier <= 1000000000) + { + return datetime64 * (1000000000 / scale_multiplier); + } + else + { + throw Exception("Illegal type of argument for function " + std::string(name) + ", DateTime64 expected", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + + static inline UInt32 execute(UInt32, const DateLUTImpl &) + { + throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + static inline UInt32 execute(Int32, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + static inline UInt32 execute(UInt16, const DateLUTImpl &) + { + return dateIsNotSupported(name); + } + + using FactorTransform = ZeroTransform; +}; + struct ToStartOfFiveMinuteImpl { static constexpr auto name = "toStartOfFiveMinute"; diff --git a/src/Functions/FunctionDateOrDateTimeAddInterval.h b/src/Functions/FunctionDateOrDateTimeAddInterval.h index cb48b819481..fbfc9e9bc1f 100644 --- a/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -40,26 +40,158 @@ namespace ErrorCodes /// - 'AddSecondsImpl::execute(UInt32, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(DateTime, ...) -> DateTime' /// - 'AddSecondsImpl::execute(UInt16, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(Date, ...) -> DateTime' +struct AddNanosecondsImpl +{ + static constexpr auto name = "addNanoseconds"; + + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + auto division = std::div(t.fractional * multiplier + delta, static_cast(1000000000)); + return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta}; + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(9 - scale); + return t * multiplier + delta; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(9); + return t * multiplier + delta; + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) + { + throw Exception("addNanoSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) + { + throw Exception("addNanoSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } +}; + +struct AddMicrosecondsImpl +{ + static constexpr auto name = "addMicroseconds"; + + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); + if (scale <= 6) + { + auto division = std::div((t.fractional + delta), static_cast(10e6)); + return {t.whole * multiplier + division.quot, division.rem}; + } + else + { + auto division = std::div((t.fractional + delta * multiplier), static_cast(10e6 * multiplier)); + return {t.whole + division.quot, division.rem}; + } + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(6 - scale)); + return scale <= 6 ? t * multiplier + delta : t + delta * multiplier; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(6); + return t * multiplier + delta; + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) + { + throw Exception("addMicroSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) + { + throw Exception("addMicroSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } +}; + +struct AddMillisecondsImpl +{ + static constexpr auto name = "addMilliseconds"; + + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); + if (scale <= 3) + { + auto division = std::div((t.fractional + delta), static_cast(1000)); + return {t.whole * multiplier + division.quot, division.rem}; + } + else + { + auto division = std::div((t.fractional + delta * multiplier), static_cast(1000 * multiplier)); + return {t.whole + division.quot,division.rem}; + } + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(std::abs(3 - scale)); + return scale <= 3 ? t * multiplier + delta : t + delta * multiplier; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) + { + Int64 multiplier = DecimalUtils::scaleMultiplier(3); + return t * multiplier + delta; + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0) + { + throw Exception("addMilliSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0) + { + throw Exception("addMilliSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } +}; + struct AddSecondsImpl { static constexpr auto name = "addSeconds"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return {t.whole + delta, t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + return t + delta * DecimalUtils::scaleMultiplier(scale); + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta; } @@ -70,21 +202,29 @@ struct AddMinutesImpl static constexpr auto name = "addMinutes"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return {t.whole + delta * 60, t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + return t + 60 * delta * DecimalUtils::scaleMultiplier(scale); + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta * 60; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta * 60; } @@ -95,20 +235,29 @@ struct AddHoursImpl static constexpr auto name = "addHours"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return {t.whole + delta * 3600, t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &) + + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0) + { + return t + 3600 * delta * DecimalUtils::scaleMultiplier(scale); + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return t + delta * 3600; } - static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { // use default datetime64 scale return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.fromDayNum(DayNum(d)) + delta * 3600; } @@ -119,22 +268,30 @@ struct AddDaysImpl static constexpr auto name = "addDays"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone) + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addDays(t.whole, delta), t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + { + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addDays(d.quot, delta) * multiplier + d.rem; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addDays(t, delta); } - static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta; } - static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta; } @@ -145,22 +302,30 @@ struct AddWeeksImpl static constexpr auto name = "addWeeks"; static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone) + execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addWeeks(t.whole, delta), t.fractional}; } - static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + { + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addDays(d.quot, delta * 7) * multiplier + d.rem; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addWeeks(t, delta); } - static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta * 7; } - static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &) + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &, UInt16 = 0) { return d + delta * 7; } @@ -170,23 +335,31 @@ struct AddMonthsImpl { static constexpr auto name = "addMonths"; - static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addMonths(t.whole, delta), t.fractional}; } - static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + { + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addMonths(d.quot, delta) * multiplier + d.rem; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addMonths(t, delta); } - static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addMonths(DayNum(d), delta); } - static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addMonths(ExtendedDayNum(d), delta); } @@ -197,22 +370,30 @@ struct AddQuartersImpl static constexpr auto name = "addQuarters"; static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone) + execute(DecimalUtils::DecimalComponents t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addQuarters(t.whole, delta), t.fractional}; } - static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + { + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addQuarters(d.quot, delta) * multiplier + d.rem; + } + + static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addQuarters(t, delta); } - static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone) + static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addQuarters(DayNum(d), delta); } - static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone) + static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addQuarters(ExtendedDayNum(d), delta); } @@ -222,23 +403,31 @@ struct AddYearsImpl { static constexpr auto name = "addYears"; - static inline DecimalUtils::DecimalComponents - execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents + execute(DecimalUtils::DecimalComponents t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return {time_zone.addYears(t.whole, delta), t.fractional}; } - static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED DateTime64 + execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0) + { + auto multiplier = DecimalUtils::scaleMultiplier(scale); + auto d = std::div(t, multiplier); + return time_zone.addYears(d.quot, delta) * multiplier + d.rem; + } + + static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addYears(t, delta); } - static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addYears(DayNum(d), delta); } - static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone) + static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0) { return time_zone.addYears(ExtendedDayNum(d), delta); } @@ -250,13 +439,16 @@ struct SubtractIntervalImpl : public Transform using Transform::Transform; template - inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone) const + inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const { /// Signed integer overflow is Ok. - return Transform::execute(t, -delta, time_zone); + return Transform::execute(t, -delta, time_zone, scale); } }; +struct SubtractNanosecondsImpl : SubtractIntervalImpl { static constexpr auto name = "subtractNanoseconds"; }; +struct SubtractMicrosecondsImpl : SubtractIntervalImpl { static constexpr auto name = "subtractMicroseconds"; }; +struct SubtractMillisecondsImpl : SubtractIntervalImpl { static constexpr auto name = "subtractMilliseconds"; }; struct SubtractSecondsImpl : SubtractIntervalImpl { static constexpr auto name = "subtractSeconds"; }; struct SubtractMinutesImpl : SubtractIntervalImpl { static constexpr auto name = "subtractMinutes"; }; struct SubtractHoursImpl : SubtractIntervalImpl { static constexpr auto name = "subtractHours"; }; @@ -277,17 +469,17 @@ struct Adder {} template - void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone) const + void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const { size_t size = vec_from.size(); vec_to.resize(size); for (size_t i = 0; i < size; ++i) - vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone); + vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone, scale); } template - void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone) const + void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const { size_t size = vec_from.size(); vec_to.resize(size); @@ -296,11 +488,11 @@ struct Adder ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat32, ColumnFloat64>( - &delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, size); return true; }); + &delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, scale, size); return true; }); } template - void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone) const + void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const { size_t size = delta.size(); vec_to.resize(size); @@ -309,7 +501,7 @@ struct Adder ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64, ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64, ColumnFloat32, ColumnFloat64>( - &delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, size); return true; }); + &delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, scale, size); return true; }); } private: @@ -325,18 +517,18 @@ private: template NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector( - const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const + const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const { for (size_t i = 0; i < size; ++i) - vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone); + vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone, scale); } template NO_INLINE NO_SANITIZE_UNDEFINED void constantVector( - const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const + const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const { for (size_t i = 0; i < size; ++i) - vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone); + vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, scale); } }; @@ -344,7 +536,7 @@ private: template struct DateTimeAddIntervalImpl { - static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) + static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale = 0) { using FromValueType = typename FromDataType::FieldType; using FromColumnType = typename FromDataType::ColumnType; @@ -363,16 +555,15 @@ struct DateTimeAddIntervalImpl if (const auto * sources = checkAndGetColumn(source_col.get())) { if (const auto * delta_const_column = typeid_cast(&delta_column)) - op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone); + op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone, scale); else - op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone); + op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone, scale); } else if (const auto * sources_const = checkAndGetColumnConst(source_col.get())) { op.constantVector( sources_const->template getValue(), - col_to->getData(), - delta_column, time_zone); + col_to->getData(), delta_column, time_zone, scale); } else { @@ -463,18 +654,10 @@ public: } } - // TransformDateTime64 helps choosing correct overload of exec and does some transformations - // on input and output parameters to simplify support of DateTime64 in concrete Transform. - template - using TransformType = std::conditional_t< - std::is_same_v, - TransformDateTime64, - Transform>; - /// Helper templates to deduce return type based on argument type, since some overloads may promote or denote types, /// e.g. addSeconds(Date, 1) => DateTime template - using TransformExecuteReturnType = decltype(std::declval>().execute(FieldType(), 0, std::declval())); + using TransformExecuteReturnType = decltype(std::declval().execute(FieldType(), 0, std::declval(), 0)); // Deduces RETURN DataType from INPUT DataType, based on return type of Transform{}.execute(INPUT_TYPE, UInt64, DateLUTImpl). // e.g. for Transform-type that has execute()-overload with 'UInt16' input and 'UInt32' return, @@ -500,11 +683,33 @@ public: if (typeid_cast(arguments[0].type.get())) { const auto & datetime64_type = assert_cast(*arguments[0].type); - return std::make_shared(datetime64_type.getScale(), extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + + auto from_scale = datetime64_type.getScale(); + auto scale = from_scale; + + if (std::is_same_v) + scale = 9; + else if (std::is_same_v) + scale = 6; + else if (std::is_same_v) + scale = 3; + + scale = std::max(scale, from_scale); + + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); } else { - return std::make_shared(DataTypeDateTime64::default_scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + auto scale = DataTypeDateTime64::default_scale; + + if (std::is_same_v) + scale = 9; + else if (std::is_same_v) + scale = 6; + else if (std::is_same_v) + scale = 3; + + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); } } else @@ -541,9 +746,9 @@ public: } else if (const auto * datetime64_type = assert_cast(from_type)) { - using WrappedTransformType = TransformType; - return DateTimeAddIntervalImpl, WrappedTransformType>::execute( - WrappedTransformType{datetime64_type->getScale()}, arguments, result_type); + auto from_scale = datetime64_type->getScale(); + return DateTimeAddIntervalImpl, Transform>::execute( + Transform{}, arguments, result_type, from_scale); } else throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(), diff --git a/src/Functions/FunctionDateOrDateTimeToSomething.h b/src/Functions/FunctionDateOrDateTimeToSomething.h index 00678e65364..5269eecea37 100644 --- a/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -88,6 +88,20 @@ public: Int64 scale = DataTypeDateTime64::default_scale; if (const auto * dt64 = checkAndGetDataType(arguments[0].type.get())) scale = dt64->getScale(); + auto source_scale = scale; + + if constexpr (std::is_same_v) + { + scale = std::max(source_scale, static_cast(3)); + } + else if constexpr (std::is_same_v) + { + scale = std::max(source_scale, static_cast(6)); + } + else if constexpr (std::is_same_v) + { + scale = std::max(source_scale, static_cast(9)); + } return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 1, 0)); } diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 4f5f6ae483f..7f8e9148032 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -112,6 +112,9 @@ void registerFunctionsConversion(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); + factory.registerFunction>(); + factory.registerFunction>(); + factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); factory.registerFunction>(); diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index d1564008dfe..e098378f51a 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -1487,6 +1487,9 @@ struct NameToDecimal256 { static constexpr auto name = "toDecimal256"; }; static constexpr auto kind = IntervalKind::INTERVAL_KIND; \ }; +DEFINE_NAME_TO_INTERVAL(Nanosecond) +DEFINE_NAME_TO_INTERVAL(Microsecond) +DEFINE_NAME_TO_INTERVAL(Millisecond) DEFINE_NAME_TO_INTERVAL(Second) DEFINE_NAME_TO_INTERVAL(Minute) DEFINE_NAME_TO_INTERVAL(Hour) @@ -2703,13 +2706,10 @@ private: return createWrapper(from_type, to_type, requested_result_is_nullable); } - WrapperType createUInt8ToUInt8Wrapper(const DataTypePtr from_type, const DataTypePtr to_type) const + WrapperType createUInt8ToBoolWrapper(const DataTypePtr from_type, const DataTypePtr to_type) const { return [from_type, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) -> ColumnPtr { - if (isBool(from_type) || !isBool(to_type)) - return arguments.front().column; - /// Special case when we convert UInt8 column to Bool column. /// both columns have type UInt8, but we shouldn't use identity wrapper, /// because Bool column can contain only 0 and 1. @@ -3506,15 +3506,19 @@ private: /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested. WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const { - bool convert_to_ipv6 = to_type->getCustomName() && to_type->getCustomName()->getName() == "IPv6"; + if (isUInt8(from_type) && isBool(to_type)) + return createUInt8ToBoolWrapper(from_type, to_type); - if (from_type->equals(*to_type) && !convert_to_ipv6) - { - if (isUInt8(from_type)) - return createUInt8ToUInt8Wrapper(from_type, to_type); + /// We can cast IPv6 into IPv6, IPv4 into IPv4, but we should not allow to cast FixedString(16) into IPv6 as part of identity cast + bool safe_convert_custom_types = true; + if (const auto * to_type_custom_name = to_type->getCustomName()) + safe_convert_custom_types = from_type->getCustomName() && from_type->getCustomName()->getName() == to_type_custom_name->getName(); + else if (const auto * from_type_custom_name = from_type->getCustomName()) + safe_convert_custom_types = to_type->getCustomName() && from_type_custom_name->getName() == to_type->getCustomName()->getName(); + + if (from_type->equals(*to_type) && safe_convert_custom_types) return createIdentityWrapper(from_type); - } else if (WhichDataType(from_type).isNothing()) return createNothingWrapper(to_type.get()); diff --git a/src/Functions/FunctionsTimeWindow.cpp b/src/Functions/FunctionsTimeWindow.cpp index 79ce7356ee7..76844e2e6fb 100644 --- a/src/Functions/FunctionsTimeWindow.cpp +++ b/src/Functions/FunctionsTimeWindow.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ARGUMENT_OUT_OF_BOUND; + extern const int SYNTAX_ERROR; } namespace @@ -167,6 +168,13 @@ struct TimeWindowImpl switch (std::get<0>(interval)) { + //TODO: add proper support for fractional seconds +// case IntervalKind::Nanosecond: +// return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); +// case IntervalKind::Microsecond: +// return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); +// case IntervalKind::Millisecond: +// return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); case IntervalKind::Second: return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); case IntervalKind::Minute: @@ -183,6 +191,8 @@ struct TimeWindowImpl return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); case IntervalKind::Year: return executeTumble(*time_column_vec, std::get<1>(interval), time_zone); + default: + throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR); } __builtin_unreachable(); } @@ -350,6 +360,16 @@ struct TimeWindowImpl switch (std::get<0>(window_interval)) { + //TODO: add proper support for fractional seconds +// case IntervalKind::Nanosecond: +// return executeHop( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); +// case IntervalKind::Microsecond: +// return executeHop( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); +// case IntervalKind::Millisecond: +// return executeHop( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); case IntervalKind::Second: return executeHop( *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); @@ -374,6 +394,8 @@ struct TimeWindowImpl case IntervalKind::Year: return executeHop( *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + default: + throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR); } __builtin_unreachable(); } @@ -487,6 +509,16 @@ struct TimeWindowImpl switch (std::get<0>(window_interval)) { + //TODO: add proper support for fractional seconds +// case IntervalKind::Nanosecond: +// return executeHopSlice( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); +// case IntervalKind::Microsecond: +// return executeHopSlice( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); +// case IntervalKind::Millisecond: +// return executeHopSlice( +// *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); case IntervalKind::Second: return executeHopSlice( *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); @@ -511,6 +543,8 @@ struct TimeWindowImpl case IntervalKind::Year: return executeHopSlice( *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone); + default: + throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR); } __builtin_unreachable(); } diff --git a/src/Functions/FunctionsTimeWindow.h b/src/Functions/FunctionsTimeWindow.h index 313de10702d..3ea397e4c7d 100644 --- a/src/Functions/FunctionsTimeWindow.h +++ b/src/Functions/FunctionsTimeWindow.h @@ -80,7 +80,32 @@ struct ToStartOfTransform; TRANSFORM_TIME(Hour) TRANSFORM_TIME(Minute) TRANSFORM_TIME(Second) -#undef TRANSFORM_DATE +#undef TRANSFORM_TIME + +#define TRANSFORM_SUBSECONDS(INTERVAL_KIND, DEF_SCALE) \ +template<> \ + struct ToStartOfTransform \ + { \ + static Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \ + { \ + if (scale <= DEF_SCALE) \ + { \ + auto val = t * DecimalUtils::scaleMultiplier(DEF_SCALE - scale); \ + if (delta == 1) \ + return val; \ + else \ + return val - (val % delta); \ + } \ + else \ + { \ + return t - (t % (delta * DecimalUtils::scaleMultiplier(scale - DEF_SCALE))) ; \ + } \ + } \ + }; + TRANSFORM_SUBSECONDS(Millisecond, 3) + TRANSFORM_SUBSECONDS(Microsecond, 6) + TRANSFORM_SUBSECONDS(Nanosecond, 9) +#undef TRANSFORM_SUBSECONDS template struct AddTime; @@ -117,6 +142,25 @@ struct ToStartOfTransform; ADD_TIME(Second, 1) #undef ADD_TIME +#define ADD_SUBSECONDS(INTERVAL_KIND, DEF_SCALE) \ +template <> \ + struct AddTime \ + { \ + static inline NO_SANITIZE_UNDEFINED Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \ + { \ + if (scale < DEF_SCALE) \ + { \ + return t + delta * DecimalUtils::scaleMultiplier(DEF_SCALE - scale); \ + } \ + else \ + return t + delta * DecimalUtils::scaleMultiplier(scale - DEF_SCALE); \ + } \ + }; + ADD_SUBSECONDS(Millisecond, 3) + ADD_SUBSECONDS(Microsecond, 6) + ADD_SUBSECONDS(Nanosecond, 9) +#undef ADD_SUBSECONDS + template struct TimeWindowImpl { diff --git a/src/Functions/SubtractSubSeconds.cpp b/src/Functions/SubtractSubSeconds.cpp new file mode 100644 index 00000000000..5eeb24c8748 --- /dev/null +++ b/src/Functions/SubtractSubSeconds.cpp @@ -0,0 +1,28 @@ +#include +#include + + +namespace DB +{ + +using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionSubtractNanoseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +using FunctionSubtractMicroseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionSubtractMicroseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +using FunctionSubtractMilliseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionSubtractMilliseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +} + + diff --git a/src/Functions/TransformDateTime64.h b/src/Functions/TransformDateTime64.h index b05bdab65ad..9ac28118b8f 100644 --- a/src/Functions/TransformDateTime64.h +++ b/src/Functions/TransformDateTime64.h @@ -13,7 +13,7 @@ namespace DB * * DateTime64 value and scale factor (2) * * DateTime64 broken down to components, result of execute is then re-assembled back into DateTime64 value (3) * - * Suitable Transfotm-types are commonly used in Date/DateTime manipulation functions, + * Suitable Transform-types are commonly used in Date/DateTime manipulation functions, * and should implement static (or const) function with following signatures: * 1: * R execute(Int64 whole_value, ... ) diff --git a/src/Functions/addSubSeconds.cpp b/src/Functions/addSubSeconds.cpp new file mode 100644 index 00000000000..f58f8b20b99 --- /dev/null +++ b/src/Functions/addSubSeconds.cpp @@ -0,0 +1,28 @@ +#include +#include + + +namespace DB +{ + +using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionAddNanoseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionAddMicroseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval; +void registerFunctionAddMilliseconds(FunctionFactory & factory) +{ + factory.registerFunction(); +}; + +} + + diff --git a/src/Functions/yandexConsistentHash.cpp b/src/Functions/kostikConsistentHash.cpp similarity index 59% rename from src/Functions/yandexConsistentHash.cpp rename to src/Functions/kostikConsistentHash.cpp index 58617e29af7..a38c3c965d8 100644 --- a/src/Functions/yandexConsistentHash.cpp +++ b/src/Functions/kostikConsistentHash.cpp @@ -7,9 +7,9 @@ namespace DB { /// An O(1) time and space consistent hash algorithm by Konstantin Oblakov -struct YandexConsistentHashImpl +struct KostikConsistentHashImpl { - static constexpr auto name = "yandexConsistentHash"; + static constexpr auto name = "kostikConsistentHash"; using HashType = UInt64; /// Actually it supports UInt64, but it is efficient only if n <= 32768 @@ -23,12 +23,12 @@ struct YandexConsistentHashImpl } }; -using FunctionYandexConsistentHash = FunctionConsistentHashImpl; +using FunctionKostikConsistentHash = FunctionConsistentHashImpl; -void registerFunctionYandexConsistentHash(FunctionFactory & factory) +void registerFunctionKostikConsistentHash(FunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction(); + factory.registerAlias("yandexConsistentHash", "kostikConsistentHash"); } } - diff --git a/src/Functions/registerFunctionsConsistentHashing.cpp b/src/Functions/registerFunctionsConsistentHashing.cpp index d4d740bc92f..84a78cd6765 100644 --- a/src/Functions/registerFunctionsConsistentHashing.cpp +++ b/src/Functions/registerFunctionsConsistentHashing.cpp @@ -2,12 +2,12 @@ namespace DB { class FunctionFactory; -void registerFunctionYandexConsistentHash(FunctionFactory & factory); +void registerFunctionKostikConsistentHash(FunctionFactory & factory); void registerFunctionJumpConsistentHash(FunctionFactory & factory); void registerFunctionsConsistentHashing(FunctionFactory & factory) { - registerFunctionYandexConsistentHash(factory); + registerFunctionKostikConsistentHash(factory); registerFunctionJumpConsistentHash(factory); } diff --git a/src/Functions/registerFunctionsDateTime.cpp b/src/Functions/registerFunctionsDateTime.cpp index 5211a62ff1e..dd7b67c47ac 100644 --- a/src/Functions/registerFunctionsDateTime.cpp +++ b/src/Functions/registerFunctionsDateTime.cpp @@ -11,6 +11,9 @@ void registerFunctionToDayOfWeek(FunctionFactory &); void registerFunctionToDayOfYear(FunctionFactory &); void registerFunctionToHour(FunctionFactory &); void registerFunctionToMinute(FunctionFactory &); +void registerFunctionToStartOfNanosecond(FunctionFactory &); +void registerFunctionToStartOfMicrosecond(FunctionFactory &); +void registerFunctionToStartOfMillisecond(FunctionFactory &); void registerFunctionToStartOfSecond(FunctionFactory &); void registerFunctionToSecond(FunctionFactory &); void registerFunctionToStartOfDay(FunctionFactory &); @@ -47,6 +50,9 @@ void registerFunctionTimeSlots(FunctionFactory &); void registerFunctionToYYYYMM(FunctionFactory &); void registerFunctionToYYYYMMDD(FunctionFactory &); void registerFunctionToYYYYMMDDhhmmss(FunctionFactory &); +void registerFunctionAddNanoseconds(FunctionFactory &); +void registerFunctionAddMicroseconds(FunctionFactory &); +void registerFunctionAddMilliseconds(FunctionFactory &); void registerFunctionAddSeconds(FunctionFactory &); void registerFunctionAddMinutes(FunctionFactory &); void registerFunctionAddHours(FunctionFactory &); @@ -55,6 +61,9 @@ void registerFunctionAddWeeks(FunctionFactory &); void registerFunctionAddMonths(FunctionFactory &); void registerFunctionAddQuarters(FunctionFactory &); void registerFunctionAddYears(FunctionFactory &); +void registerFunctionSubtractNanoseconds(FunctionFactory &); +void registerFunctionSubtractMicroseconds(FunctionFactory &); +void registerFunctionSubtractMilliseconds(FunctionFactory &); void registerFunctionSubtractSeconds(FunctionFactory &); void registerFunctionSubtractMinutes(FunctionFactory &); void registerFunctionSubtractHours(FunctionFactory &); @@ -93,6 +102,9 @@ void registerFunctionsDateTime(FunctionFactory & factory) registerFunctionToStartOfMonth(factory); registerFunctionToStartOfQuarter(factory); registerFunctionToStartOfYear(factory); + registerFunctionToStartOfNanosecond(factory); + registerFunctionToStartOfMicrosecond(factory); + registerFunctionToStartOfMillisecond(factory); registerFunctionToStartOfSecond(factory); registerFunctionToStartOfMinute(factory); registerFunctionToStartOfFiveMinute(factory); @@ -119,6 +131,9 @@ void registerFunctionsDateTime(FunctionFactory & factory) registerFunctionToYYYYMM(factory); registerFunctionToYYYYMMDD(factory); registerFunctionToYYYYMMDDhhmmss(factory); + registerFunctionAddNanoseconds(factory); + registerFunctionAddMicroseconds(factory); + registerFunctionAddMilliseconds(factory); registerFunctionAddSeconds(factory); registerFunctionAddMinutes(factory); registerFunctionAddHours(factory); @@ -127,6 +142,9 @@ void registerFunctionsDateTime(FunctionFactory & factory) registerFunctionAddMonths(factory); registerFunctionAddQuarters(factory); registerFunctionAddYears(factory); + registerFunctionSubtractNanoseconds(factory); + registerFunctionSubtractMicroseconds(factory); + registerFunctionSubtractMilliseconds(factory); registerFunctionSubtractSeconds(factory); registerFunctionSubtractMinutes(factory); registerFunctionSubtractHours(factory); diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 09b7931de8d..bff33f9b061 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -33,184 +33,273 @@ namespace template <> struct Transform { - static constexpr auto name = function_name; - - static UInt16 execute(UInt16 d, UInt64 years, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfYearInterval(DayNum(d), years); } - static UInt16 execute(Int32 d, UInt64 years, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years); } - static UInt16 execute(UInt32 t, UInt64 years, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); } - static UInt16 execute(Int64 t, UInt64 years, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years); + return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years); } }; template <> struct Transform { - static constexpr auto name = function_name; - - static UInt16 execute(UInt16 d, UInt64 quarters, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfQuarterInterval(DayNum(d), quarters); } - static UInt16 execute(Int32 d, UInt64 quarters, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters); } - static UInt16 execute(UInt32 t, UInt64 quarters, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); } - static UInt16 execute(Int64 t, UInt64 quarters, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters); + return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters); } }; template <> struct Transform { - static constexpr auto name = function_name; - - static UInt16 execute(UInt16 d, UInt64 months, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMonthInterval(DayNum(d), months); } - static UInt16 execute(Int32 d, UInt64 months, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months); } - static UInt16 execute(UInt32 t, UInt64 months, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); } - static UInt16 execute(Int64 t, UInt64 months, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months); + return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months); } }; template <> struct Transform { - static constexpr auto name = function_name; - - static UInt16 execute(UInt16 d, UInt64 weeks, const DateLUTImpl & time_zone) + static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfWeekInterval(DayNum(d), weeks); } - static UInt16 execute(Int32 d, UInt64 weeks, const DateLUTImpl & time_zone) + static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks); } - static UInt16 execute(UInt32 t, UInt64 weeks, const DateLUTImpl & time_zone) + static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); } - static UInt16 execute(Int64 t, UInt64 weeks, const DateLUTImpl & time_zone) + static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks); + return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks); } }; template <> struct Transform { - static constexpr auto name = function_name; - - static UInt32 execute(UInt16 d, UInt64 days, const DateLUTImpl & time_zone) + static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); } - static UInt32 execute(Int32 d, UInt64 days, const DateLUTImpl & time_zone) + static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days); } - static UInt32 execute(UInt32 t, UInt64 days, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days); } - static UInt32 execute(Int64 t, UInt64 days, const DateLUTImpl & time_zone) + static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days); + return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days); } }; template <> struct Transform { - static constexpr auto name = function_name; + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt32 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); } - static UInt32 execute(Int64 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64) + { + return time_zone.toStartOfHourInterval(t, hours); + } + + static UInt32 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier) + { + return time_zone.toStartOfHourInterval(t / scale_multiplier, hours); + } }; template <> struct Transform { - static constexpr auto name = function_name; + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, UInt64 minutes, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfMinuteInterval(t, minutes); } - static UInt32 execute(Int64 t, UInt64 minutes, const DateLUTImpl & time_zone) + static UInt32 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfMinuteInterval(t, minutes); + return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes); } }; template <> struct Transform { - static constexpr auto name = function_name; + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } - static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); } - - static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone) + static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64) { return time_zone.toStartOfSecondInterval(t, seconds); } - static Int64 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone) + static UInt32 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier) { - return time_zone.toStartOfSecondInterval(t, seconds); + return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds); } }; + template <> + struct Transform + { + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); } + + static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier) + { + if (scale_multiplier < 1000) + { + Int64 t_milliseconds = t * (static_cast(1000) / scale_multiplier); + if (likely(t >= 0)) + return t_milliseconds / milliseconds * milliseconds; + else + return ((t_milliseconds + 1) / milliseconds - 1) * milliseconds; + } + else if (scale_multiplier > 1000) + { + Int64 scale_diff = scale_multiplier / static_cast(1000); + if (likely(t >= 0)) + return t / milliseconds / scale_diff * milliseconds; + else + return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds; + } + else + if (likely(t >= 0)) + return t / milliseconds * milliseconds; + else + return ((t + 1) / milliseconds - 1) * milliseconds; + } + }; + + template <> + struct Transform + { + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); } + + static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier) + { + if (scale_multiplier < 1000000) + { + Int64 t_microseconds = t * (static_cast(1000000) / scale_multiplier); + if (likely(t >= 0)) + return t_microseconds / microseconds * microseconds; + else + return ((t_microseconds + 1) / microseconds - 1) * microseconds; + } + else if (scale_multiplier > 1000000) + { + Int64 scale_diff = scale_multiplier / static_cast(1000000); + if (likely(t >= 0)) + return t / microseconds / scale_diff * microseconds; + else + return ((t + 1) / microseconds / scale_diff - 1) * microseconds; + } + else + if (likely(t >= 0)) + return t / microseconds * microseconds; + else + return ((t + 1) / microseconds - 1) * microseconds; + } + }; + + template <> + struct Transform + { + static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); } + + static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); } + + static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier) + { + if (scale_multiplier < 1000000000) + { + Int64 t_nanoseconds = t * (static_cast(1000000000) / scale_multiplier); + if (likely(t >= 0)) + return t_nanoseconds / nanoseconds * nanoseconds; + else + return ((t_nanoseconds + 1) / nanoseconds - 1) * nanoseconds; + } + else + if (likely(t >= 0)) + return t / nanoseconds * nanoseconds; + else + return ((t + 1) / nanoseconds - 1) * nanoseconds; + } + }; class FunctionToStartOfInterval : public IFunction { @@ -240,6 +329,7 @@ public: const DataTypeInterval * interval_type = nullptr; bool result_type_is_date = false; + bool result_type_is_datetime = false; auto check_interval_argument = [&] { interval_type = checkAndGetDataType(arguments[1].type.get()); @@ -251,6 +341,8 @@ public: result_type_is_date = (interval_type->getKind() == IntervalKind::Year) || (interval_type->getKind() == IntervalKind::Quarter) || (interval_type->getKind() == IntervalKind::Month) || (interval_type->getKind() == IntervalKind::Week); + result_type_is_datetime = (interval_type->getKind() == IntervalKind::Day) || (interval_type->getKind() == IntervalKind::Hour) + || (interval_type->getKind() == IntervalKind::Minute) || (interval_type->getKind() == IntervalKind::Second); }; auto check_timezone_argument = [&] @@ -263,7 +355,7 @@ public: if (first_argument_is_date && result_type_is_date) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The timezone argument of function {} with interval type {} is allowed only when the 1st argument " - "has the type DateTime", + "has the type DateTime or DateTime64", getName(), interval_type->getKind().toString()); }; @@ -288,19 +380,33 @@ public: if (result_type_is_date) return std::make_shared(); - else + else if (result_type_is_datetime) return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + else + { + auto scale = 0; + + if (interval_type->getKind() == IntervalKind::Nanosecond) + scale = 9; + else if (interval_type->getKind() == IntervalKind::Microsecond) + scale = 6; + else if (interval_type->getKind() == IntervalKind::Millisecond) + scale = 3; + + return std::make_shared(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0)); + } + } bool useDefaultImplementationForConstants() const override { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; } - ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override { const auto & time_column = arguments[0]; const auto & interval_column = arguments[1]; const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0); - auto result_column = dispatchForColumns(time_column, interval_column, time_zone); + auto result_column = dispatchForColumns(time_column, interval_column, result_type, time_zone); return result_column; } @@ -316,33 +422,36 @@ public: private: ColumnPtr dispatchForColumns( - const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone) const + const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const { const auto & from_datatype = *time_column.type.get(); const auto which_type = WhichDataType(from_datatype); + + if (which_type.isDateTime64()) + { + const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); + auto scale = assert_cast(from_datatype).getScale(); + + if (time_column_vec) + return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone, scale); + } if (which_type.isDateTime()) { const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, time_zone); + return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } if (which_type.isDate()) { const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, time_zone); + return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } if (which_type.isDate32()) { const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, time_zone); - } - if (which_type.isDateTime64()) - { - const auto * time_column_vec = checkAndGetColumn(time_column.column.get()); - if (time_column_vec) - return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, time_zone); + return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } throw Exception( "Illegal column for first argument of function " + getName() + ". Must contain dates or dates with time", @@ -351,7 +460,8 @@ private: template ColumnPtr dispatchForIntervalColumn( - const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone) const + const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column, + const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const { const auto * interval_type = checkAndGetDataType(interval_column.type.get()); if (!interval_type) @@ -368,49 +478,52 @@ private: switch (interval_type->getKind()) { + case IntervalKind::Nanosecond: + return execute(from, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Microsecond: + return execute(from, time_column, num_units, result_type, time_zone, scale); + case IntervalKind::Millisecond: + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Second: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Minute: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Hour: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Day: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Week: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Month: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Quarter: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); case IntervalKind::Year: - return execute(from, time_column, num_units, time_zone); + return execute(from, time_column, num_units, result_type, time_zone, scale); } __builtin_unreachable(); } - - template - ColumnPtr execute(const FromDataType & from_datatype, const ColumnType & time_column, UInt64 num_units, const DateLUTImpl & time_zone) const + template + ColumnPtr execute(const FromDataType &, const ColumnType & time_column_type, Int64 num_units, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const { - const auto & time_data = time_column.getData(); - size_t size = time_column.size(); - auto result = ColumnVector::create(); - auto & result_data = result->getData(); + using ToColumnType = typename ToDataType::ColumnType; + + const auto & time_data = time_column_type.getData(); + size_t size = time_data.size(); + + auto result_col = result_type->createColumn(); + auto *col_to = assert_cast(result_col.get()); + auto & result_data = col_to->getData(); result_data.resize(size); - if constexpr (std::is_same_v) - { - const auto transform = TransformDateTime64>{from_datatype.getScale()}; - for (size_t i = 0; i != size; ++i) - result_data[i] = transform.execute(time_data[i], num_units, time_zone); - } - else - { - for (size_t i = 0; i != size; ++i) - result_data[i] = Transform::execute(time_data[i], num_units, time_zone); - } - return result; + Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale); + + for (size_t i = 0; i != size; ++i) + result_data[i] = Transform::execute(time_data[i], num_units, time_zone, scale_multiplier); + + return result_col; } }; diff --git a/src/Functions/toStartOfSubsecond.cpp b/src/Functions/toStartOfSubsecond.cpp new file mode 100644 index 00000000000..b2257c5e3cd --- /dev/null +++ b/src/Functions/toStartOfSubsecond.cpp @@ -0,0 +1,30 @@ +#include +#include +#include + + +namespace DB +{ + +using FunctionToStartOfMillisecond = FunctionDateOrDateTimeToSomething; + +void registerFunctionToStartOfMillisecond(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +using FunctionToStartOfMicrosecond = FunctionDateOrDateTimeToSomething; + +void registerFunctionToStartOfMicrosecond(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +using FunctionToStartOfNanosecond = FunctionDateOrDateTimeToSomething; + +void registerFunctionToStartOfNanosecond(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index fb9752ae391..181ac9aed7e 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -372,8 +372,8 @@ SetPtr makeExplicitSet( element_type = low_cardinality_type->getDictionaryType(); auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types); - if (prepared_sets.count(set_key)) - return prepared_sets.at(set_key); /// Already prepared. + if (auto it = prepared_sets.find(set_key); it != prepared_sets.end()) + return it->second; /// Already prepared. Block block; const auto & right_arg_func = std::dynamic_pointer_cast(right_arg); @@ -388,7 +388,7 @@ SetPtr makeExplicitSet( set->insertFromBlock(block.getColumnsWithTypeAndName()); set->finishInsert(); - prepared_sets[set_key] = set; + prepared_sets.emplace(set_key, set); return set; } @@ -707,7 +707,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat if (tid != 0) tuple_ast = tuple_ast->clone(); - auto literal = std::make_shared(UInt64(++tid)); + auto literal = std::make_shared(UInt64{++tid}); visit(*literal, literal, data); auto func = makeASTFunction("tupleElement", tuple_ast, literal); @@ -814,14 +814,13 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (!data.only_consts) { /// We are in the part of the tree that we are not going to compute. You just need to define types. - /// Do not subquery and create sets. We replace "in*" function to "in*IgnoreSet". + /// Do not evaluate subquery and create sets. We replace "in*" function to "in*IgnoreSet". auto argument_name = node.arguments->children.at(0)->getColumnName(); - data.addFunction( - FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()), - { argument_name, argument_name }, - column_name); + FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()), + {argument_name, argument_name}, + column_name); } return; } @@ -1145,8 +1144,8 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su if (no_subqueries) return {}; auto set_key = PreparedSetKey::forSubquery(*right_in_operand); - if (data.prepared_sets.count(set_key)) - return data.prepared_sets.at(set_key); + if (auto it = data.prepared_sets.find(set_key); it != data.prepared_sets.end()) + return it->second; /// A special case is if the name of the table is specified on the right side of the IN statement, /// and the table has the type Set (a previously prepared set). @@ -1160,7 +1159,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su StorageSet * storage_set = dynamic_cast(table.get()); if (storage_set) { - data.prepared_sets[set_key] = storage_set->getSet(); + data.prepared_sets.emplace(set_key, storage_set->getSet()); return storage_set->getSet(); } } @@ -1174,7 +1173,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su /// If you already created a Set with the same subquery / table. if (subquery_for_set.set) { - data.prepared_sets[set_key] = subquery_for_set.set; + data.prepared_sets.emplace(set_key, subquery_for_set.set); return subquery_for_set.set; } @@ -1196,7 +1195,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su } subquery_for_set.set = set; - data.prepared_sets[set_key] = set; + data.prepared_sets.emplace(set_key, set); return set; } else diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index b6b67bac81c..342cc9eef9d 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -10,6 +10,7 @@ namespace DB { +class ASTExpressionList; class ASTFunction; class ExpressionActions; @@ -89,10 +90,7 @@ struct ScopeStack : WithContext void addColumn(ColumnWithTypeAndName column); void addAlias(const std::string & name, std::string alias); void addArrayJoin(const std::string & source_name, std::string result_name); - void addFunction( - const FunctionOverloadResolverPtr & function, - const Names & argument_names, - std::string result_name); + void addFunction(const FunctionOverloadResolverPtr & function, const Names & argument_names, std::string result_name); ActionsDAGPtr popLevel(); diff --git a/src/Interpreters/CatBoostModel.cpp b/src/Interpreters/CatBoostModel.cpp index 1b6e30a0959..cffaa81c4f0 100644 --- a/src/Interpreters/CatBoostModel.cpp +++ b/src/Interpreters/CatBoostModel.cpp @@ -26,10 +26,10 @@ extern const int CANNOT_LOAD_CATBOOST_MODEL; extern const int CANNOT_APPLY_CATBOOST_MODEL; } - /// CatBoost wrapper interface functions. -struct CatBoostWrapperAPI +class CatBoostWrapperAPI { +public: using ModelCalcerHandle = void; ModelCalcerHandle * (* ModelCalcerCreate)(); // NOLINT @@ -68,9 +68,6 @@ struct CatBoostWrapperAPI }; -namespace -{ - class CatBoostModelHolder { private: @@ -84,7 +81,61 @@ public: }; -class CatBoostModelImpl : public ICatBoostModel +/// Holds CatBoost wrapper library and provides wrapper interface. +class CatBoostLibHolder +{ +public: + explicit CatBoostLibHolder(std::string lib_path_) : lib_path(std::move(lib_path_)), lib(lib_path) { initAPI(); } + + const CatBoostWrapperAPI & getAPI() const { return api; } + const std::string & getCurrentPath() const { return lib_path; } + +private: + CatBoostWrapperAPI api; + std::string lib_path; + SharedLibrary lib; + + void initAPI() + { + load(api.ModelCalcerCreate, "ModelCalcerCreate"); + load(api.ModelCalcerDelete, "ModelCalcerDelete"); + load(api.GetErrorString, "GetErrorString"); + load(api.LoadFullModelFromFile, "LoadFullModelFromFile"); + load(api.CalcModelPredictionFlat, "CalcModelPredictionFlat"); + load(api.CalcModelPrediction, "CalcModelPrediction"); + load(api.CalcModelPredictionWithHashedCatFeatures, "CalcModelPredictionWithHashedCatFeatures"); + load(api.GetStringCatFeatureHash, "GetStringCatFeatureHash"); + load(api.GetIntegerCatFeatureHash, "GetIntegerCatFeatureHash"); + load(api.GetFloatFeaturesCount, "GetFloatFeaturesCount"); + load(api.GetCatFeaturesCount, "GetCatFeaturesCount"); + tryLoad(api.CheckModelMetadataHasKey, "CheckModelMetadataHasKey"); + tryLoad(api.GetModelInfoValueSize, "GetModelInfoValueSize"); + tryLoad(api.GetModelInfoValue, "GetModelInfoValue"); + tryLoad(api.GetTreeCount, "GetTreeCount"); + tryLoad(api.GetDimensionsCount, "GetDimensionsCount"); + } + + template + void load(T& func, const std::string & name) { func = lib.get(name); } + + template + void tryLoad(T& func, const std::string & name) { func = lib.tryGet(name); } +}; + +std::shared_ptr getCatBoostWrapperHolder(const std::string & lib_path) +{ + static std::shared_ptr ptr; + static std::mutex mutex; + + std::lock_guard lock(mutex); + + if (!ptr || ptr->getCurrentPath() != lib_path) + ptr = std::make_shared(lib_path); + + return ptr; +} + +class CatBoostModelImpl { public: CatBoostModelImpl(const CatBoostWrapperAPI * api_, const std::string & model_path) : api(api_) @@ -92,13 +143,15 @@ public: handle = std::make_unique(api); if (!handle) { - std::string msg = "Cannot create CatBoost model: "; - throw Exception(msg + api->GetErrorString(), ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL); + throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL, + "Cannot create CatBoost model: {}", + api->GetErrorString()); } if (!api->LoadFullModelFromFile(handle->get(), model_path.c_str())) { - std::string msg = "Cannot load CatBoost model: "; - throw Exception(msg + api->GetErrorString(), ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL); + throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL, + "Cannot load CatBoost model: {}", + api->GetErrorString()); } float_features_count = api->GetFloatFeaturesCount(handle->get()); @@ -108,32 +161,22 @@ public: tree_count = api->GetDimensionsCount(handle->get()); } - ColumnPtr evaluate(const ColumnRawPtrs & columns) const override + ColumnPtr evaluate(const ColumnRawPtrs & columns) const { if (columns.empty()) - throw Exception("Got empty columns list for CatBoost model.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got empty columns list for CatBoost model."); if (columns.size() != float_features_count + cat_features_count) - { - std::string msg; - { - WriteBufferFromString buffer(msg); - buffer << "Number of columns is different with number of features: "; - buffer << columns.size() << " vs " << float_features_count << " + " << cat_features_count; - } - throw Exception(msg, ErrorCodes::BAD_ARGUMENTS); - } + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Number of columns is different with number of features: columns size {} float features size {} + cat features size {}", + float_features_count, + cat_features_count); for (size_t i = 0; i < float_features_count; ++i) { if (!columns[i]->isNumeric()) { - std::string msg; - { - WriteBufferFromString buffer(msg); - buffer << "Column " << i << " should be numeric to make float feature."; - } - throw Exception(msg, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric to make float feature.", i); } } @@ -142,16 +185,13 @@ public: { const auto * column = columns[i]; if (column->isNumeric()) + { cat_features_are_strings = false; + } else if (!(typeid_cast(column) || typeid_cast(column))) { - std::string msg; - { - WriteBufferFromString buffer(msg); - buffer << "Column " << i << " should be numeric or string."; - } - throw Exception(msg, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric or string.", i); } } @@ -187,9 +227,9 @@ public: return ColumnTuple::create(std::move(mutable_columns)); } - size_t getFloatFeaturesCount() const override { return float_features_count; } - size_t getCatFeaturesCount() const override { return cat_features_count; } - size_t getTreeCount() const override { return tree_count; } + size_t getFloatFeaturesCount() const { return float_features_count; } + size_t getCatFeaturesCount() const { return cat_features_count; } + size_t getTreeCount() const { return tree_count; } private: std::unique_ptr handle; @@ -435,66 +475,6 @@ private: } }; - -/// Holds CatBoost wrapper library and provides wrapper interface. -class CatBoostLibHolder: public CatBoostWrapperAPIProvider -{ -public: - explicit CatBoostLibHolder(std::string lib_path_) : lib_path(std::move(lib_path_)), lib(lib_path) { initAPI(); } - - const CatBoostWrapperAPI & getAPI() const override { return api; } - const std::string & getCurrentPath() const { return lib_path; } - -private: - CatBoostWrapperAPI api; - std::string lib_path; - SharedLibrary lib; - - void initAPI(); - - template - void load(T& func, const std::string & name) { func = lib.get(name); } - - template - void tryLoad(T& func, const std::string & name) { func = lib.tryGet(name); } -}; - -void CatBoostLibHolder::initAPI() -{ - load(api.ModelCalcerCreate, "ModelCalcerCreate"); - load(api.ModelCalcerDelete, "ModelCalcerDelete"); - load(api.GetErrorString, "GetErrorString"); - load(api.LoadFullModelFromFile, "LoadFullModelFromFile"); - load(api.CalcModelPredictionFlat, "CalcModelPredictionFlat"); - load(api.CalcModelPrediction, "CalcModelPrediction"); - load(api.CalcModelPredictionWithHashedCatFeatures, "CalcModelPredictionWithHashedCatFeatures"); - load(api.GetStringCatFeatureHash, "GetStringCatFeatureHash"); - load(api.GetIntegerCatFeatureHash, "GetIntegerCatFeatureHash"); - load(api.GetFloatFeaturesCount, "GetFloatFeaturesCount"); - load(api.GetCatFeaturesCount, "GetCatFeaturesCount"); - tryLoad(api.CheckModelMetadataHasKey, "CheckModelMetadataHasKey"); - tryLoad(api.GetModelInfoValueSize, "GetModelInfoValueSize"); - tryLoad(api.GetModelInfoValue, "GetModelInfoValue"); - tryLoad(api.GetTreeCount, "GetTreeCount"); - tryLoad(api.GetDimensionsCount, "GetDimensionsCount"); -} - -std::shared_ptr getCatBoostWrapperHolder(const std::string & lib_path) -{ - static std::shared_ptr ptr; - static std::mutex mutex; - - std::lock_guard lock(mutex); - - if (!ptr || ptr->getCurrentPath() != lib_path) - ptr = std::make_shared(lib_path); - - return ptr; -} - -} - - CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::string lib_path_, const ExternalLoadableLifetime & lifetime_) : name(std::move(name_)), model_path(std::move(model_path_)), lib_path(std::move(lib_path_)), lifetime(lifetime_) @@ -502,43 +482,28 @@ CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::st api_provider = getCatBoostWrapperHolder(lib_path); api = &api_provider->getAPI(); model = std::make_unique(api, model_path); - float_features_count = model->getFloatFeaturesCount(); - cat_features_count = model->getCatFeaturesCount(); - tree_count = model->getTreeCount(); } -const ExternalLoadableLifetime & CatBoostModel::getLifetime() const -{ - return lifetime; -} - -bool CatBoostModel::isModified() const -{ - return true; -} - -std::shared_ptr CatBoostModel::clone() const -{ - return std::make_shared(name, model_path, lib_path, lifetime); -} +CatBoostModel::~CatBoostModel() = default; size_t CatBoostModel::getFloatFeaturesCount() const { - return float_features_count; + return model->getFloatFeaturesCount(); } size_t CatBoostModel::getCatFeaturesCount() const { - return cat_features_count; + return model->getCatFeaturesCount(); } size_t CatBoostModel::getTreeCount() const { - return tree_count; + return model->getTreeCount(); } DataTypePtr CatBoostModel::getReturnType() const { + size_t tree_count = getTreeCount(); auto type = std::make_shared(); if (tree_count == 1) return type; @@ -552,6 +517,7 @@ ColumnPtr CatBoostModel::evaluate(const ColumnRawPtrs & columns) const { if (!model) throw Exception("CatBoost model was not loaded.", ErrorCodes::LOGICAL_ERROR); + return model->evaluate(columns); } diff --git a/src/Interpreters/CatBoostModel.h b/src/Interpreters/CatBoostModel.h index 51bf0ba94f5..7bb1df92b67 100644 --- a/src/Interpreters/CatBoostModel.h +++ b/src/Interpreters/CatBoostModel.h @@ -8,47 +8,32 @@ namespace DB { -/// CatBoost wrapper interface functions. -struct CatBoostWrapperAPI; -class CatBoostWrapperAPIProvider -{ -public: - virtual ~CatBoostWrapperAPIProvider() = default; - virtual const CatBoostWrapperAPI & getAPI() const = 0; -}; - -/// CatBoost model interface. -class ICatBoostModel -{ -public: - virtual ~ICatBoostModel() = default; - /// Evaluate model. Use first `float_features_count` columns as float features, - /// the others `cat_features_count` as categorical features. - virtual ColumnPtr evaluate(const ColumnRawPtrs & columns) const = 0; - - virtual size_t getFloatFeaturesCount() const = 0; - virtual size_t getCatFeaturesCount() const = 0; - virtual size_t getTreeCount() const = 0; -}; +class CatBoostLibHolder; +class CatBoostWrapperAPI; +class CatBoostModelImpl; class IDataType; using DataTypePtr = std::shared_ptr; /// General ML model evaluator interface. -class IModel : public IExternalLoadable +class IMLModel : public IExternalLoadable { public: + IMLModel() = default; virtual ColumnPtr evaluate(const ColumnRawPtrs & columns) const = 0; virtual std::string getTypeName() const = 0; virtual DataTypePtr getReturnType() const = 0; + virtual ~IMLModel() override = default; }; -class CatBoostModel : public IModel +class CatBoostModel : public IMLModel { public: CatBoostModel(std::string name, std::string model_path, std::string lib_path, const ExternalLoadableLifetime & lifetime); + ~CatBoostModel() override; + ColumnPtr evaluate(const ColumnRawPtrs & columns) const override; std::string getTypeName() const override { return "catboost"; } @@ -59,29 +44,28 @@ public: /// IExternalLoadable interface. - const ExternalLoadableLifetime & getLifetime() const override; + const ExternalLoadableLifetime & getLifetime() const override { return lifetime; } std::string getLoadableName() const override { return name; } bool supportUpdates() const override { return true; } - bool isModified() const override; + bool isModified() const override { return true; } - std::shared_ptr clone() const override; + std::shared_ptr clone() const override + { + return std::make_shared(name, model_path, lib_path, lifetime); + } private: const std::string name; std::string model_path; std::string lib_path; ExternalLoadableLifetime lifetime; - std::shared_ptr api_provider; + std::shared_ptr api_provider; const CatBoostWrapperAPI * api; - std::unique_ptr model; - - size_t float_features_count; - size_t cat_features_count; - size_t tree_count; + std::unique_ptr model; void init(); }; diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index d558d1cfd67..1039fac6883 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -132,7 +132,9 @@ Cluster::Address::Address( bool secure_, Int64 priority_, UInt32 shard_index_, - UInt32 replica_index_) + UInt32 replica_index_, + String cluster_name_, + String cluster_secret_) : user(user_), password(password_) { bool can_be_local = true; @@ -164,6 +166,8 @@ Cluster::Address::Address( is_local = can_be_local && isLocal(clickhouse_port); shard_index = shard_index_; replica_index = replica_index_; + cluster = cluster_name_; + cluster_secret = cluster_secret_; } @@ -537,10 +541,14 @@ Cluster::Cluster( bool treat_local_as_remote, bool treat_local_port_as_remote, bool secure, - Int64 priority) + Int64 priority, + String cluster_name, + String cluster_secret) { UInt32 current_shard_num = 1; + secret = cluster_secret; + for (const auto & shard : names) { Addresses current; @@ -554,7 +562,9 @@ Cluster::Cluster( secure, priority, current_shard_num, - current.size() + 1); + current.size() + 1, + cluster_name, + cluster_secret); addresses_with_failover.emplace_back(current); @@ -690,6 +700,9 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti } } + secret = from.secret; + name = from.name; + initMisc(); } @@ -704,6 +717,9 @@ Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector addresses_with_failover.emplace_back(from.addresses_with_failover.at(index)); } + secret = from.secret; + name = from.name; + initMisc(); } diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index e9f26c21089..13f19f7c0ed 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -55,7 +55,9 @@ public: bool treat_local_as_remote, bool treat_local_port_as_remote, bool secure = false, - Int64 priority = 1); + Int64 priority = 1, + String cluster_name = "", + String cluster_secret = ""); Cluster(const Cluster &)= delete; Cluster & operator=(const Cluster &) = delete; @@ -127,7 +129,9 @@ public: bool secure_ = false, Int64 priority_ = 1, UInt32 shard_index_ = 0, - UInt32 replica_index_ = 0); + UInt32 replica_index_ = 0, + String cluster_name = "", + String cluster_secret_ = ""); /// Returns 'escaped_host_name:port' String toString() const; diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 3e414d5b6de..4d2cdf7dd2c 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -350,6 +350,12 @@ void DDLWorker::scheduleTasks(bool reinitialized) bool maybe_concurrently_deleting = task && !zookeeper->exists(fs::path(task->entry_path) / "active"); return task && !maybe_concurrently_deleting && !maybe_currently_processing; } + else if (last_skipped_entry_name.has_value() && !queue_fully_loaded_after_initialization_debug_helper) + { + /// If connection was lost during queue loading + /// we may start processing from finished task (because we don't know yet that it's finished) and it's ok. + return false; + } else { /// Return true if entry should not be scheduled. @@ -365,7 +371,11 @@ void DDLWorker::scheduleTasks(bool reinitialized) String reason; auto task = initAndCheckTask(entry_name, reason, zookeeper); - if (!task) + if (task) + { + queue_fully_loaded_after_initialization_debug_helper = true; + } + else { LOG_DEBUG(log, "Will not execute task {}: {}", entry_name, reason); updateMaxDDLEntryID(entry_name); diff --git a/src/Interpreters/DDLWorker.h b/src/Interpreters/DDLWorker.h index dbdf0e94f06..7cdbf880a2b 100644 --- a/src/Interpreters/DDLWorker.h +++ b/src/Interpreters/DDLWorker.h @@ -131,6 +131,9 @@ protected: std::optional first_failed_task_name; std::list current_tasks; + /// This flag is needed for debug assertions only + bool queue_fully_loaded_after_initialization_debug_helper = false; + Coordination::Stat queue_node_stat; std::shared_ptr queue_updated_event = std::make_shared(); std::shared_ptr cleanup_event = std::make_shared(); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 7e150f59694..841d7bc567f 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -259,7 +259,7 @@ NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & a if (!array_join_expression_list) return src_columns; - getRootActionsNoMakeSet(array_join_expression_list, true, actions, false); + getRootActionsNoMakeSet(array_join_expression_list, actions, false); auto array_join = addMultipleArrayJoinAction(actions, is_array_join_left); auto sample_columns = actions->getResultColumns(); @@ -294,7 +294,7 @@ NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const const ASTTablesInSelectQueryElement * join = select_query->join(); if (join) { - getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), true, actions, false); + getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), actions, false); auto sample_columns = actions->getNamesAndTypesList(); syntax->analyzed_join->addJoinedColumnsAndCorrectTypes(sample_columns, true); actions = std::make_shared(sample_columns); @@ -332,14 +332,14 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) { NameSet unique_keys; ASTs & group_asts = group_by_ast->children; - for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i) + for (ssize_t i = 0; i < static_cast(group_asts.size()); ++i) { ssize_t size = group_asts.size(); if (getContext()->getSettingsRef().enable_positional_arguments) replaceForPositionalArguments(group_asts[i], select_query, ASTSelectQuery::Expression::GROUP_BY); - getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false); + getRootActionsNoMakeSet(group_asts[i], temp_actions, false); const auto & column_name = group_asts[i]->getColumnName(); @@ -405,8 +405,8 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global) { if (do_global) { - GlobalSubqueriesVisitor::Data subqueries_data(getContext(), subquery_depth, isRemoteStorage(), - external_tables, subqueries_for_sets, has_global_subqueries); + GlobalSubqueriesVisitor::Data subqueries_data( + getContext(), subquery_depth, isRemoteStorage(), external_tables, subqueries_for_sets, has_global_subqueries); GlobalSubqueriesVisitor(subqueries_data).visit(query); } } @@ -416,7 +416,7 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_ { auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name); - if (prepared_sets.count(set_key)) + if (prepared_sets.contains(set_key)) return; /// Already prepared. if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name)) @@ -509,33 +509,62 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) } -void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) { LogAST log; - ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth, - sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets, - no_subqueries, false, only_consts, !isRemoteStorage()); + ActionsVisitor::Data visitor_data( + getContext(), + settings.size_limits_for_set, + subquery_depth, + sourceColumns(), + std::move(actions), + prepared_sets, + subqueries_for_sets, + no_makeset_for_subqueries, + false /* no_makeset */, + only_consts, + !isRemoteStorage() /* create_source_for_in */); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } -void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts) { LogAST log; - ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth, - sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets, - no_subqueries, true, only_consts, !isRemoteStorage()); + ActionsVisitor::Data visitor_data( + getContext(), + settings.size_limits_for_set, + subquery_depth, + sourceColumns(), + std::move(actions), + prepared_sets, + subqueries_for_sets, + true /* no_makeset_for_subqueries, no_makeset implies no_makeset_for_subqueries */, + true /* no_makeset */, + only_consts, + !isRemoteStorage() /* create_source_for_in */); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } -void ExpressionAnalyzer::getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts) + +void ExpressionAnalyzer::getRootActionsForHaving( + const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) { LogAST log; - ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth, - sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets, - no_subqueries, false, only_consts, true); + ActionsVisitor::Data visitor_data( + getContext(), + settings.size_limits_for_set, + subquery_depth, + sourceColumns(), + std::move(actions), + prepared_sets, + subqueries_for_sets, + no_makeset_for_subqueries, + false /* no_makeset */, + only_consts, + true /* create_source_for_in */); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -547,7 +576,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr { AggregateDescription aggregate; if (node->arguments) - getRootActionsNoMakeSet(node->arguments, true, actions); + getRootActionsNoMakeSet(node->arguments, actions); aggregate.column_name = node->getColumnName(); @@ -746,8 +775,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) // Requiring a constant reference to a shared pointer to non-const AST // doesn't really look sane, but the visitor does indeed require it. // Hence we clone the node (not very sane either, I know). - getRootActionsNoMakeSet(window_function.function_node->clone(), - true, actions); + getRootActionsNoMakeSet(window_function.function_node->clone(), actions); const ASTs & arguments = window_function.function_node->arguments->children; @@ -867,8 +895,7 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi auto array_join = addMultipleArrayJoinAction(step.actions(), is_array_join_left); before_array_join = chain.getLastActions(); - chain.steps.push_back(std::make_unique( - array_join, step.getResultColumns())); + chain.steps.push_back(std::make_unique(array_join, step.getResultColumns())); chain.addStep(); @@ -1099,8 +1126,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( } } - chain.steps.emplace_back(std::make_unique( - std::make_shared(std::move(columns)))); + chain.steps.emplace_back( + std::make_unique(std::make_shared(std::move(columns)))); chain.steps.back()->additional_input = std::move(unused_source_columns); chain.getLastActions(); chain.addStep(); @@ -1210,8 +1237,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments( // recursively together with (1b) as ASTFunction::window_definition. if (getSelectQuery()->window()) { - getRootActionsNoMakeSet(getSelectQuery()->window(), - true /* no_subqueries */, step.actions()); + getRootActionsNoMakeSet(getSelectQuery()->window(), step.actions()); } for (const auto & [_, w] : window_descriptions) @@ -1222,8 +1248,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments( // definitions (1a). // Requiring a constant reference to a shared pointer to non-const AST // doesn't really look sane, but the visitor does indeed require it. - getRootActionsNoMakeSet(f.function_node->clone(), - true /* no_subqueries */, step.actions()); + getRootActionsNoMakeSet(f.function_node->clone(), step.actions()); // (2b) Required function argument columns. for (const auto & a : f.function_node->arguments->children) @@ -1456,7 +1481,7 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r alias = name; result_columns.emplace_back(name, alias); result_names.push_back(alias); - getRootActions(ast, false, actions_dag); + getRootActions(ast, false /* no_makeset_for_subqueries */, actions_dag); } if (add_aliases) @@ -1496,7 +1521,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const ColumnsWithTypeAn { auto actions = std::make_shared(constant_inputs); - getRootActions(query, true, actions, true); + getRootActions(query, true /* no_makeset_for_subqueries */, actions, true /* only_consts */); return std::make_shared(actions, ExpressionActionsSettings::fromContext(getContext())); } @@ -1513,13 +1538,13 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::simpleSelectActions() } ExpressionAnalysisResult::ExpressionAnalysisResult( - SelectQueryExpressionAnalyzer & query_analyzer, - const StorageMetadataPtr & metadata_snapshot, - bool first_stage_, - bool second_stage_, - bool only_types, - const FilterDAGInfoPtr & filter_info_, - const Block & source_header) + SelectQueryExpressionAnalyzer & query_analyzer, + const StorageMetadataPtr & metadata_snapshot, + bool first_stage_, + bool second_stage_, + bool only_types, + const FilterDAGInfoPtr & filter_info_, + const Block & source_header) : first_stage(first_stage_) , second_stage(second_stage_) , need_aggregate(query_analyzer.hasAggregation()) diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index b6bb3c5fad5..5dcbdc2486b 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -172,15 +172,15 @@ protected: ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const; - void getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false); /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the * prepared sets would not be applicable for MergeTree index optimization. */ - void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts = false); - void getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActionsForHaving(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false); /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions, * Create a set of columns aggregated_columns resulting after the aggregation, if any, diff --git a/src/Interpreters/ExternalModelsLoader.h b/src/Interpreters/ExternalModelsLoader.h index 18e1f1123f6..042906bee9e 100644 --- a/src/Interpreters/ExternalModelsLoader.h +++ b/src/Interpreters/ExternalModelsLoader.h @@ -15,14 +15,14 @@ namespace DB class ExternalModelsLoader : public ExternalLoader, WithContext { public: - using ModelPtr = std::shared_ptr; + using ModelPtr = std::shared_ptr; /// Models will be loaded immediately and then will be updated in separate thread, each 'reload_period' seconds. explicit ExternalModelsLoader(ContextPtr context_); ModelPtr getModel(const std::string & model_name) const { - return std::static_pointer_cast(load(model_name)); + return std::static_pointer_cast(load(model_name)); } void reloadModel(const std::string & model_name) const diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 5d2df583b9e..50ce7977534 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -17,7 +18,11 @@ #include #include #include +#include #include +#include +#include +#include namespace DB { @@ -34,7 +39,6 @@ public: { size_t subquery_depth; bool is_remote; - size_t external_table_id; TemporaryTablesMapping & external_tables; SubqueriesForSets & subqueries_for_sets; bool & has_global_subqueries; @@ -49,7 +53,6 @@ public: : WithContext(context_) , subquery_depth(subquery_depth_) , is_remote(is_remote_) - , external_table_id(1) , external_tables(tables) , subqueries_for_sets(subqueries_for_sets_) , has_global_subqueries(has_global_subqueries_) @@ -92,48 +95,33 @@ public: { /// If this is already an external table, you do not need to add anything. Just remember its presence. auto temporary_table_name = getIdentifierName(subquery_or_table_name); - bool exists_in_local_map = external_tables.end() != external_tables.find(temporary_table_name); + bool exists_in_local_map = external_tables.contains(temporary_table_name); bool exists_in_context = static_cast(getContext()->tryResolveStorageID( StorageID("", temporary_table_name), Context::ResolveExternal)); if (exists_in_local_map || exists_in_context) return; } - String external_table_name = subquery_or_table_name->tryGetAlias(); - if (external_table_name.empty()) + String alias = subquery_or_table_name->tryGetAlias(); + String external_table_name; + if (alias.empty()) { - /// Generate the name for the external table. - external_table_name = "_data" + toString(external_table_id); - while (external_tables.count(external_table_name)) - { - ++external_table_id; - external_table_name = "_data" + toString(external_table_id); - } + auto hash = subquery_or_table_name->getTreeHash(); + external_table_name = fmt::format("_data_{}_{}", hash.first, hash.second); } - - auto interpreter = interpretSubquery(subquery_or_table_name, getContext(), subquery_depth, {}); - - Block sample = interpreter->getSampleBlock(); - NamesAndTypesList columns = sample.getNamesAndTypesList(); - - auto external_storage_holder = std::make_shared( - getContext(), - ColumnsDescription{columns}, - ConstraintsDescription{}, - nullptr, - /*create_for_global_subquery*/ true); - StoragePtr external_storage = external_storage_holder->getTable(); + else + external_table_name = alias; /** We replace the subquery with the name of the temporary table. * It is in this form, the request will go to the remote server. * This temporary table will go to the remote server, and on its side, * instead of doing a subquery, you just need to read it. + * TODO We can do better than using alias to name external tables */ auto database_and_table_name = std::make_shared(external_table_name); if (set_alias) { - String alias = subquery_or_table_name->tryGetAlias(); if (auto * table_name = subquery_or_table_name->as()) if (alias.empty()) alias = table_name->shortName(); @@ -151,8 +139,27 @@ public: else ast = database_and_table_name; - external_tables[external_table_name] = external_storage_holder; + if (external_tables.contains(external_table_name)) + return; + auto interpreter = interpretSubquery(subquery_or_table_name, getContext(), subquery_depth, {}); + + Block sample = interpreter->getSampleBlock(); + NamesAndTypesList columns = sample.getNamesAndTypesList(); + + auto external_storage_holder = std::make_shared( + getContext(), + ColumnsDescription{columns}, + ConstraintsDescription{}, + nullptr, + /*create_for_global_subquery*/ true); + StoragePtr external_storage = external_storage_holder->getTable(); + + external_tables.emplace(external_table_name, external_storage_holder); + + /// We need to materialize external tables immediately because reading from distributed + /// tables might generate local plans which can refer to external tables during index + /// analysis. It's too late to populate the external table via CreatingSetsTransform. if (getContext()->getSettingsRef().use_index_for_in_with_subqueries) { auto external_table = external_storage_holder->getTable(); diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index c56529b3214..e81db1427ef 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -1237,16 +1237,16 @@ NO_INLINE IColumn::Filter joinRightColumns( { const IColumn & left_asof_key = added_columns.leftAsofKey(); - auto [block, row_num] = mapped->findAsof(left_asof_key, i); - if (block) + auto row_ref = mapped->findAsof(left_asof_key, i); + if (row_ref.block) { setUsed(filter, i); if constexpr (multiple_disjuncts) - used_flags.template setUsed(block, row_num, 0); + used_flags.template setUsed(row_ref.block, row_ref.row_num, 0); else used_flags.template setUsed(find_result); - added_columns.appendFromBlock(*block, row_num); + added_columns.appendFromBlock(*row_ref.block, row_ref.row_num); } else addNotFoundRow(added_columns, current_offset); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 042cc41ef54..27ed8438fc8 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1242,10 +1242,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

hasGlobalSubqueries() && !subqueries_for_sets.empty()) - executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets); } if (expressions.second_stage || from_aggregation_stage) @@ -1428,7 +1424,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

hasGlobalSubqueries())) + if (!subqueries_for_sets.empty()) executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets); } @@ -1892,7 +1888,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc && limit_length <= std::numeric_limits::max() - limit_offset && limit_length + limit_offset < max_block_size) { - max_block_size = std::max(UInt64(1), limit_length + limit_offset); + max_block_size = std::max(UInt64{1}, limit_length + limit_offset); max_threads_execute_query = max_streams = 1; } @@ -2578,11 +2574,11 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan) void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_plan, SubqueriesForSets & subqueries_for_sets) { - const auto & input_order_info = query_info.input_order_info - ? query_info.input_order_info - : (query_info.projection ? query_info.projection->input_order_info : nullptr); - if (input_order_info) - executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins"); + // const auto & input_order_info = query_info.input_order_info + // ? query_info.input_order_info + // : (query_info.projection ? query_info.projection->input_order_info : nullptr); + // if (input_order_info) + // executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins"); const Settings & settings = context->getSettingsRef(); diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp index 39fc7965eb2..2b791f5a189 100644 --- a/src/Interpreters/RowRefs.cpp +++ b/src/Interpreters/RowRefs.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -44,38 +45,52 @@ class SortedLookupVector : public SortedLookupVectorBase { struct Entry { - /// We don't store a RowRef and instead keep it's members separately (and return a tuple) to reduce the memory usage. - /// For example, for sizeof(T) == 4 => sizeof(Entry) == 16 (while before it would be 20). Then when you put it into a vector, the effect is even greater - decltype(RowRef::block) block; - decltype(RowRef::row_num) row_num; - TKey asof_value; + TKey value; + uint32_t row_ref_index; Entry() = delete; - Entry(TKey v, const Block * b, size_t r) : block(b), row_num(r), asof_value(v) { } + Entry(TKey value_, uint32_t row_ref_index_) + : value(value_) + , row_ref_index(row_ref_index_) + { } - bool operator<(const Entry & other) const { return asof_value < other.asof_value; } + }; + + struct LessEntryOperator + { + ALWAYS_INLINE bool operator()(const Entry & lhs, const Entry & rhs) const + { + return lhs.value < rhs.value; + } }; struct GreaterEntryOperator { - bool operator()(Entry const & a, Entry const & b) const { return a.asof_value > b.asof_value; } + ALWAYS_INLINE bool operator()(const Entry & lhs, const Entry & rhs) const + { + return lhs.value > rhs.value; + } }; public: - using Base = std::vector; using Keys = std::vector; - static constexpr bool isDescending = (inequality == ASOF::Inequality::Greater || inequality == ASOF::Inequality::GreaterOrEquals); - static constexpr bool isStrict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater); + using Entries = PaddedPODArray; + using RowRefs = PaddedPODArray; + + static constexpr bool is_descending = (inequality == ASOF::Inequality::Greater || inequality == ASOF::Inequality::GreaterOrEquals); + static constexpr bool is_strict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater); void insert(const IColumn & asof_column, const Block * block, size_t row_num) override { using ColumnType = ColumnVectorOrDecimal; const auto & column = assert_cast(asof_column); - TKey k = column.getElement(row_num); + TKey key = column.getElement(row_num); assert(!sorted.load(std::memory_order_acquire)); - array.emplace_back(k, block, row_num); + + entries.emplace_back(key, row_refs.size()); + row_refs.emplace_back(RowRef(block, row_num)); } /// Unrolled version of upper_bound and lower_bound @@ -84,30 +99,30 @@ public: /// at https://en.algorithmica.org/hpc/data-structures/s-tree/ size_t boundSearch(TKey value) { - size_t size = array.size(); + size_t size = entries.size(); size_t low = 0; /// This is a single binary search iteration as a macro to unroll. Takes into account the inequality: - /// isStrict -> Equal values are not requested - /// isDescending -> The vector is sorted in reverse (for greater or greaterOrEquals) + /// is_strict -> Equal values are not requested + /// is_descending -> The vector is sorted in reverse (for greater or greaterOrEquals) #define BOUND_ITERATION \ { \ size_t half = size / 2; \ size_t other_half = size - half; \ size_t probe = low + half; \ size_t other_low = low + other_half; \ - TKey v = array[probe].asof_value; \ + TKey & v = entries[probe].value; \ size = half; \ - if constexpr (isDescending) \ + if constexpr (is_descending) \ { \ - if constexpr (isStrict) \ + if constexpr (is_strict) \ low = value <= v ? other_low : low; \ else \ low = value < v ? other_low : low; \ } \ else \ { \ - if constexpr (isStrict) \ + if constexpr (is_strict) \ low = value >= v ? other_low : low; \ else \ low = value > v ? other_low : low; \ @@ -130,7 +145,7 @@ public: return low; } - std::tuple findAsof(const IColumn & asof_column, size_t row_num) override + RowRef findAsof(const IColumn & asof_column, size_t row_num) override { sort(); @@ -139,8 +154,11 @@ public: TKey k = column.getElement(row_num); size_t pos = boundSearch(k); - if (pos != array.size()) - return std::make_tuple(array[pos].block, array[pos].row_num); + if (pos != entries.size()) + { + size_t row_ref_index = entries[pos].row_ref_index; + return row_refs[row_ref_index]; + } return {nullptr, 0}; } @@ -148,7 +166,8 @@ public: private: std::atomic sorted = false; mutable std::mutex lock; - Base array; + Entries entries; + RowRefs row_refs; // Double checked locking with SC atomics works in C++ // https://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/ @@ -160,12 +179,37 @@ private: if (!sorted.load(std::memory_order_acquire)) { std::lock_guard l(lock); + if (!sorted.load(std::memory_order_relaxed)) { - if constexpr (isDescending) - ::sort(array.begin(), array.end(), GreaterEntryOperator()); + if constexpr (std::is_arithmetic_v && !std::is_floating_point_v) + { + if (likely(entries.size() > 256)) + { + struct RadixSortTraits : RadixSortNumTraits + { + using Element = Entry; + using Result = Element; + + static TKey & extractKey(Element & elem) { return elem.value; } + static Element extractResult(Element & elem) { return elem; } + }; + + if constexpr (is_descending) + RadixSort::executeLSD(entries.data(), entries.size(), true); + else + RadixSort::executeLSD(entries.data(), entries.size(), false); + + sorted.store(true, std::memory_order_release); + return; + } + } + + if constexpr (is_descending) + ::sort(entries.begin(), entries.end(), GreaterEntryOperator()); else - ::sort(array.begin(), array.end()); + ::sort(entries.begin(), entries.end(), LessEntryOperator()); + sorted.store(true, std::memory_order_release); } } diff --git a/src/Interpreters/RowRefs.h b/src/Interpreters/RowRefs.h index 02462833050..fa5ce867613 100644 --- a/src/Interpreters/RowRefs.h +++ b/src/Interpreters/RowRefs.h @@ -146,7 +146,7 @@ private: struct SortedLookupVectorBase { SortedLookupVectorBase() = default; - virtual ~SortedLookupVectorBase() { } + virtual ~SortedLookupVectorBase() = default; static std::optional getTypeSize(const IColumn & asof_column, size_t & type_size); @@ -154,7 +154,7 @@ struct SortedLookupVectorBase virtual void insert(const IColumn &, const Block *, size_t) = 0; // This needs to be synchronized internally - virtual std::tuple findAsof(const IColumn &, size_t) = 0; + virtual RowRef findAsof(const IColumn &, size_t) = 0; }; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 59545d4314d..3b4d665e41b 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -41,6 +41,57 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; +} + +namespace +{ + class StorageWithComment : public IAST + { + public: + ASTPtr storage; + ASTPtr comment; + + String getID(char) const override { return "Storage with comment definition"; } + + ASTPtr clone() const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method clone is not supported"); + } + + void formatImpl(const FormatSettings &, FormatState &, FormatStateStacked) const override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported"); + } + }; + + class ParserStorageWithComment : public IParserBase + { + protected: + const char * getName() const override { return "storage definition with comment"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + ParserStorage storage_p; + ASTPtr storage; + + if (!storage_p.parse(pos, storage, expected)) + return false; + + ParserKeyword s_comment("COMMENT"); + ParserStringLiteral string_literal_parser; + ASTPtr comment; + + if (s_comment.ignore(pos, expected)) + string_literal_parser.parse(pos, comment, expected); + + auto storage_with_comment = std::make_shared(); + storage_with_comment->storage = std::move(storage); + storage_with_comment->comment = std::move(comment); + + node = storage_with_comment; + return true; + } + }; } namespace @@ -102,8 +153,9 @@ std::shared_ptr createSystemLog( engine += " TTL " + ttl; engine += " ORDER BY (event_date, event_time)"; } + // Validate engine definition grammatically to prevent some configuration errors - ParserStorage storage_parser; + ParserStorageWithComment storage_parser; parseQuery(storage_parser, engine.data(), engine.data() + engine.size(), "Storage to create table for " + config_prefix, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); @@ -450,7 +502,6 @@ void SystemLog::prepareTable() is_prepared = true; } - template ASTPtr SystemLog::getCreateTableQuery() { @@ -465,11 +516,16 @@ ASTPtr SystemLog::getCreateTableQuery() new_columns_list->set(new_columns_list->columns, InterpreterCreateQuery::formatColumns(ordinary_columns, alias_columns)); create->set(create->columns_list, new_columns_list); - ParserStorage storage_parser; - ASTPtr storage_ast = parseQuery( + ParserStorageWithComment storage_parser; + + ASTPtr storage_with_comment_ast = parseQuery( storage_parser, storage_def.data(), storage_def.data() + storage_def.size(), "Storage to create table for " + LogElement::name(), 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - create->set(create->storage, storage_ast); + + StorageWithComment & storage_with_comment = storage_with_comment_ast->as(); + + create->set(create->storage, storage_with_comment.storage); + create->set(create->comment, storage_with_comment.comment); /// Write additional (default) settings for MergeTree engine to make it make it possible to compare ASTs /// and recreate tables on settings changes. diff --git a/src/Interpreters/threadPoolCallbackRunner.cpp b/src/Interpreters/threadPoolCallbackRunner.cpp new file mode 100644 index 00000000000..fadad235039 --- /dev/null +++ b/src/Interpreters/threadPoolCallbackRunner.cpp @@ -0,0 +1,39 @@ +#include "threadPoolCallbackRunner.h" + +#include + +#include + +namespace DB +{ + +CallbackRunner threadPoolCallbackRunner(ThreadPool & pool) +{ + return [pool = &pool, thread_group = CurrentThread::getGroup()](auto callback) + { + pool->scheduleOrThrow([callback = std::move(callback), thread_group]() + { + if (thread_group) + CurrentThread::attachTo(thread_group); + + SCOPE_EXIT_SAFE({ + if (thread_group) + CurrentThread::detachQueryIfNotDetached(); + + /// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent. + /// Typically, it may be changes from Process to User. + /// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed. + /// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well. + /// When, finally, we destroy the thread (and the ThreadStatus), + /// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\ + /// and by this time user-level memory tracker may be already destroyed. + /// + /// As a work-around, reset memory tracker to total, which is always alive. + CurrentThread::get().memory_tracker.setParent(&total_memory_tracker); + }); + callback(); + }); + }; +} + +} diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h new file mode 100644 index 00000000000..59d06f2f1bc --- /dev/null +++ b/src/Interpreters/threadPoolCallbackRunner.h @@ -0,0 +1,15 @@ +#pragma once + +#include + + +namespace DB +{ + +/// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously +using CallbackRunner = std::function)>; + +/// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()' +CallbackRunner threadPoolCallbackRunner(ThreadPool & pool); + +} diff --git a/src/Parsers/parseIntervalKind.cpp b/src/Parsers/parseIntervalKind.cpp index 7d36133e81c..0704aa107ca 100644 --- a/src/Parsers/parseIntervalKind.cpp +++ b/src/Parsers/parseIntervalKind.cpp @@ -7,6 +7,27 @@ namespace DB { bool parseIntervalKind(IParser::Pos & pos, Expected & expected, IntervalKind & result) { + if (ParserKeyword("NANOSECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_NANOSECOND").ignore(pos, expected) + || ParserKeyword("NS").ignore(pos, expected)) + { + result = IntervalKind::Nanosecond; + return true; + } + + if (ParserKeyword("MICROSECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_MICROSECOND").ignore(pos, expected) + || ParserKeyword("MCS").ignore(pos, expected)) + { + result = IntervalKind::Microsecond; + return true; + } + + if (ParserKeyword("MILLISECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_MILLISECOND").ignore(pos, expected) + || ParserKeyword("MS").ignore(pos, expected)) + { + result = IntervalKind::Millisecond; + return true; + } + if (ParserKeyword("SECOND").ignore(pos, expected) || ParserKeyword("SQL_TSI_SECOND").ignore(pos, expected) || ParserKeyword("SS").ignore(pos, expected) || ParserKeyword("S").ignore(pos, expected)) { diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 42c68e4654b..792ebd09392 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -140,7 +140,11 @@ void ArrowBlockInputFormat::prepareReader() } arrow_column_to_ch_column = std::make_unique( - getPort().getHeader(), "Arrow", format_settings.arrow.import_nested, format_settings.arrow.allow_missing_columns); + getPort().getHeader(), + "Arrow", + format_settings.arrow.import_nested, + format_settings.arrow.allow_missing_columns, + format_settings.arrow.case_insensitive_column_matching); missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); if (stream) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 8f5bb205bef..c17c86d51ea 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -34,6 +34,7 @@ #include #include #include +#include /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn. #define FOR_ARROW_NUMERIC_TYPES(M) \ @@ -524,19 +525,22 @@ static void checkStatus(const arrow::Status & status, const String & column_name } -Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, const std::string & format_name, bool skip_columns_with_unsupported_types, const Block * hint_header) +Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( + const arrow::Schema & schema, const std::string & format_name, bool skip_columns_with_unsupported_types, const Block * hint_header, bool ignore_case) { ColumnsWithTypeAndName sample_columns; std::unordered_set nested_table_names; if (hint_header) - nested_table_names = Nested::getAllTableNames(*hint_header); + nested_table_names = Nested::getAllTableNames(*hint_header, ignore_case); + for (const auto & field : schema.fields()) { - if (hint_header && !hint_header->has(field->name()) && !nested_table_names.contains(field->name())) + if (hint_header && !hint_header->has(field->name(), ignore_case) + && !nested_table_names.contains(ignore_case ? boost::to_lower_copy(field->name()) : field->name())) continue; /// Create empty arrow column by it's type and convert it to ClickHouse column. - arrow::MemoryPool* pool = arrow::default_memory_pool(); + arrow::MemoryPool * pool = arrow::default_memory_pool(); std::unique_ptr array_builder; arrow::Status status = MakeBuilder(pool, field->type(), &array_builder); checkStatus(status, field->name(), format_name); @@ -557,20 +561,31 @@ Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(const arrow::Schema & schema, } ArrowColumnToCHColumn::ArrowColumnToCHColumn( - const Block & header_, const std::string & format_name_, bool import_nested_, bool allow_missing_columns_) - : header(header_), format_name(format_name_), import_nested(import_nested_), allow_missing_columns(allow_missing_columns_) + const Block & header_, + const std::string & format_name_, + bool import_nested_, + bool allow_missing_columns_, + bool case_insensitive_matching_) + : header(header_) + , format_name(format_name_) + , import_nested(import_nested_) + , allow_missing_columns(allow_missing_columns_) + , case_insensitive_matching(case_insensitive_matching_) { } void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr & table) { NameToColumnPtr name_to_column_ptr; - for (const auto & column_name : table->ColumnNames()) + for (auto column_name : table->ColumnNames()) { std::shared_ptr arrow_column = table->GetColumnByName(column_name); if (!arrow_column) throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column '{}' is duplicated", column_name); - name_to_column_ptr[column_name] = arrow_column; + + if (case_insensitive_matching) + boost::to_lower(column_name); + name_to_column_ptr[std::move(column_name)] = arrow_column; } arrowColumnsToCHChunk(res, name_to_column_ptr); @@ -590,22 +605,31 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & { const ColumnWithTypeAndName & header_column = header.getByPosition(column_i); + auto search_column_name = header_column.name; + if (case_insensitive_matching) + boost::to_lower(search_column_name); + bool read_from_nested = false; String nested_table_name = Nested::extractTableName(header_column.name); - if (!name_to_column_ptr.contains(header_column.name)) + String search_nested_table_name = nested_table_name; + if (case_insensitive_matching) + boost::to_lower(search_nested_table_name); + + if (!name_to_column_ptr.contains(search_column_name)) { /// Check if it's a column from nested table. - if (import_nested && name_to_column_ptr.contains(nested_table_name)) + if (import_nested && name_to_column_ptr.contains(search_nested_table_name)) { - if (!nested_tables.contains(nested_table_name)) + if (!nested_tables.contains(search_nested_table_name)) { - std::shared_ptr arrow_column = name_to_column_ptr[nested_table_name]; - ColumnsWithTypeAndName cols = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true, true, false, skipped)}; + std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; + ColumnsWithTypeAndName cols + = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true, true, false, skipped)}; Block block(cols); - nested_tables[nested_table_name] = std::make_shared(Nested::flatten(block)); + nested_tables[search_nested_table_name] = std::make_shared(Nested::flatten(block)); } - read_from_nested = nested_tables[nested_table_name]->has(header_column.name); + read_from_nested = nested_tables[search_nested_table_name]->has(header_column.name, case_insensitive_matching); } if (!read_from_nested) @@ -622,13 +646,19 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } } - std::shared_ptr arrow_column = name_to_column_ptr[header_column.name]; ColumnWithTypeAndName column; if (read_from_nested) - column = nested_tables[nested_table_name]->getByName(header_column.name); + { + column = nested_tables[search_nested_table_name]->getByName(header_column.name, case_insensitive_matching); + if (case_insensitive_matching) + column.name = header_column.name; + } else + { + auto arrow_column = name_to_column_ptr[search_column_name]; column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true, true, false, skipped); + } try { @@ -636,8 +666,11 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } catch (Exception & e) { - e.addMessage(fmt::format("while converting column {} from type {} to type {}", - backQuote(header_column.name), column.type->getName(), header_column.type->getName())); + e.addMessage(fmt::format( + "while converting column {} from type {} to type {}", + backQuote(header_column.name), + column.type->getName(), + header_column.type->getName())); throw; } @@ -651,22 +684,23 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & std::vector ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema & schema) const { std::vector missing_columns; - auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header); + auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, &header, case_insensitive_matching); auto flatten_block_from_arrow = Nested::flatten(block_from_arrow); + for (size_t i = 0, columns = header.columns(); i < columns; ++i) { - const auto & column = header.getByPosition(i); + const auto & header_column = header.getByPosition(i); bool read_from_nested = false; - String nested_table_name = Nested::extractTableName(column.name); - if (!block_from_arrow.has(column.name)) + String nested_table_name = Nested::extractTableName(header_column.name); + if (!block_from_arrow.has(header_column.name, case_insensitive_matching)) { - if (import_nested && block_from_arrow.has(nested_table_name)) - read_from_nested = flatten_block_from_arrow.has(column.name); + if (import_nested && block_from_arrow.has(nested_table_name, case_insensitive_matching)) + read_from_nested = flatten_block_from_arrow.has(header_column.name, case_insensitive_matching); if (!read_from_nested) { if (!allow_missing_columns) - throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", column.name}; + throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name}; missing_columns.push_back(i); } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index cc5852691e0..695e14b7bba 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -25,7 +25,8 @@ public: const Block & header_, const std::string & format_name_, bool import_nested_, - bool allow_missing_columns_); + bool allow_missing_columns_, + bool case_insensitive_matching_ = false); void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table); @@ -40,7 +41,8 @@ public: const arrow::Schema & schema, const std::string & format_name, bool skip_columns_with_unsupported_types = false, - const Block * hint_header = nullptr); + const Block * hint_header = nullptr, + bool ignore_case = false); private: const Block & header; @@ -48,6 +50,7 @@ private: bool import_nested; /// If false, throw exception if some columns in header not exists in arrow table. bool allow_missing_columns; + bool case_insensitive_matching; /// Map {column name : dictionary column}. /// To avoid converting dictionary from Arrow Dictionary diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index e93897edfbe..c9c831a5e44 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -54,9 +54,6 @@ Chunk ORCBlockInputFormat::generate() if (!table || !table->num_rows()) return res; - if (format_settings.use_lowercase_column_name) - table = *table->RenameColumns(include_column_names); - arrow_column_to_ch_column->arrowTableToCHChunk(res, table); /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. @@ -74,7 +71,6 @@ void ORCBlockInputFormat::resetParser() file_reader.reset(); include_indices.clear(); - include_column_names.clear(); block_missing_values.clear(); } @@ -126,20 +122,6 @@ static void getFileReaderAndSchema( if (!read_schema_result.ok()) throw Exception(read_schema_result.status().ToString(), ErrorCodes::BAD_ARGUMENTS); schema = std::move(read_schema_result).ValueOrDie(); - - if (format_settings.use_lowercase_column_name) - { - std::vector> fields; - fields.reserve(schema->num_fields()); - for (int i = 0; i < schema->num_fields(); ++i) - { - const auto& field = schema->field(i); - auto name = field->name(); - boost::to_lower(name); - fields.push_back(field->WithName(name)); - } - schema = arrow::schema(fields, schema->metadata()); - } } void ORCBlockInputFormat::prepareReader() @@ -150,12 +132,17 @@ void ORCBlockInputFormat::prepareReader() return; arrow_column_to_ch_column = std::make_unique( - getPort().getHeader(), "ORC", format_settings.orc.import_nested, format_settings.orc.allow_missing_columns); + getPort().getHeader(), + "ORC", + format_settings.orc.import_nested, + format_settings.orc.allow_missing_columns, + format_settings.orc.case_insensitive_column_matching); missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); + const bool ignore_case = format_settings.orc.case_insensitive_column_matching; std::unordered_set nested_table_names; if (format_settings.orc.import_nested) - nested_table_names = Nested::getAllTableNames(getPort().getHeader()); + nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); /// In ReadStripe column indices should be started from 1, /// because 0 indicates to select all columns. @@ -166,19 +153,18 @@ void ORCBlockInputFormat::prepareReader() /// so we should recursively count the number of indices we need for this type. int indexes_count = countIndicesForType(schema->field(i)->type()); const auto & name = schema->field(i)->name(); - if (getPort().getHeader().has(name) || nested_table_names.contains(name)) + if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name)) { for (int j = 0; j != indexes_count; ++j) - { include_indices.push_back(index + j); - include_column_names.push_back(name); - } } + index += indexes_count; } } -ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : ISchemaReader(in_), format_settings(format_settings_) +ORCSchemaReader::ORCSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) + : ISchemaReader(in_), format_settings(format_settings_) { } diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index bd2151d78ff..b7a771730ea 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -47,7 +47,6 @@ private: // indices of columns to read from ORC file std::vector include_indices; - std::vector include_column_names; std::vector missing_columns; BlockMissingValues block_missing_values; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index f3d81822297..07b0665cddc 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -54,11 +54,7 @@ Chunk ParquetBlockInputFormat::generate() std::shared_ptr table; arrow::Status read_status = file_reader->ReadRowGroup(row_group_current, column_indices, &table); if (!read_status.ok()) - throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), - ErrorCodes::CANNOT_READ_ALL_DATA}; - - if (format_settings.use_lowercase_column_name) - table = *table->RenameColumns(column_names); + throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; ++row_group_current; @@ -79,7 +75,6 @@ void ParquetBlockInputFormat::resetParser() file_reader.reset(); column_indices.clear(); - column_names.clear(); row_group_current = 0; block_missing_values.clear(); } @@ -124,20 +119,6 @@ static void getFileReaderAndSchema( return; THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(std::move(arrow_file), arrow::default_memory_pool(), &file_reader)); THROW_ARROW_NOT_OK(file_reader->GetSchema(&schema)); - - if (format_settings.use_lowercase_column_name) - { - std::vector> fields; - fields.reserve(schema->num_fields()); - for (int i = 0; i < schema->num_fields(); ++i) - { - const auto& field = schema->field(i); - auto name = field->name(); - boost::to_lower(name); - fields.push_back(field->WithName(name)); - } - schema = arrow::schema(fields, schema->metadata()); - } } void ParquetBlockInputFormat::prepareReader() @@ -150,12 +131,18 @@ void ParquetBlockInputFormat::prepareReader() row_group_total = file_reader->num_row_groups(); row_group_current = 0; - arrow_column_to_ch_column = std::make_unique(getPort().getHeader(), "Parquet", format_settings.parquet.import_nested, format_settings.parquet.allow_missing_columns); + arrow_column_to_ch_column = std::make_unique( + getPort().getHeader(), + "Parquet", + format_settings.parquet.import_nested, + format_settings.parquet.allow_missing_columns, + format_settings.parquet.case_insensitive_column_matching); missing_columns = arrow_column_to_ch_column->getMissingColumns(*schema); + const bool ignore_case = format_settings.parquet.case_insensitive_column_matching; std::unordered_set nested_table_names; if (format_settings.parquet.import_nested) - nested_table_names = Nested::getAllTableNames(getPort().getHeader()); + nested_table_names = Nested::getAllTableNames(getPort().getHeader(), ignore_case); int index = 0; for (int i = 0; i < schema->num_fields(); ++i) @@ -165,19 +152,19 @@ void ParquetBlockInputFormat::prepareReader() /// count the number of indices we need for this type. int indexes_count = countIndicesForType(schema->field(i)->type()); const auto & name = schema->field(i)->name(); - if (getPort().getHeader().has(name) || nested_table_names.contains(name)) + + if (getPort().getHeader().has(name, ignore_case) || nested_table_names.contains(ignore_case ? boost::to_lower_copy(name) : name)) { for (int j = 0; j != indexes_count; ++j) - { column_indices.push_back(index + j); - column_names.push_back(name); - } } + index += indexes_count; } } -ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : ISchemaReader(in_), format_settings(format_settings_) +ParquetSchemaReader::ParquetSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) + : ISchemaReader(in_), format_settings(format_settings_) { } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index eba9aac29f2..1faadaa3d21 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -40,7 +40,6 @@ private: int row_group_total = 0; // indices of columns to read from Parquet file std::vector column_indices; - std::vector column_names; std::unique_ptr arrow_column_to_ch_column; int row_group_current = 0; std::vector missing_columns; diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 4c50e4d9b03..35846fdad07 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -4,7 +4,6 @@ #include #include #include -#include namespace DB @@ -242,15 +241,16 @@ std::unordered_map TSKVSchemaReader::readRowAndGetNamesAndD std::unordered_map names_and_types; StringRef name_ref; - String name_tmp; + String name_buf; String value; do { - bool has_value = readName(in, name_ref, name_tmp); + bool has_value = readName(in, name_ref, name_buf); + String name = String(name_ref); if (has_value) { readEscapedString(value, in); - names_and_types[String(name_ref)] = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped); + names_and_types[std::move(name)] = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped); } else { diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 1276157cc91..ae97a769b23 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -34,16 +34,16 @@ Block FillingTransform::transformHeader(Block header, const SortDescription & so template static FillColumnDescription::StepFunction getStepFunction( - IntervalKind kind, Int64 step, const DateLUTImpl & date_lut) + IntervalKind kind, Int64 step, const DateLUTImpl & date_lut, UInt16 scale = DataTypeDateTime64::default_scale) { switch (kind) { - #define DECLARE_CASE(NAME) \ +#define DECLARE_CASE(NAME) \ case IntervalKind::NAME: \ - return [step, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get(field), step, date_lut); }; + return [step, scale, &date_lut](Field & field) { field = Add##NAME##sImpl::execute(get(field), step, date_lut, scale); }; FOR_EACH_INTERVAL_KIND(DECLARE_CASE) - #undef DECLARE_CASE +#undef DECLARE_CASE } __builtin_unreachable(); } @@ -92,7 +92,7 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & Int64 avg_seconds = get(descr.fill_step) * descr.step_kind->toAvgSeconds(); if (avg_seconds < 86400) throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, - "Value of step is to low ({} seconds). Must be >= 1 day", avg_seconds); + "Value of step is to low ({} seconds). Must be >= 1 day", avg_seconds); } if (which.isDate()) @@ -108,25 +108,23 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & switch (*descr.step_kind) { - #define DECLARE_CASE(NAME) \ +#define DECLARE_CASE(NAME) \ case IntervalKind::NAME: \ descr.step_func = [step, &time_zone = date_time64->getTimeZone()](Field & field) \ { \ auto field_decimal = get>(field); \ - auto components = DecimalUtils::splitWithScaleMultiplier(field_decimal.getValue(), field_decimal.getScaleMultiplier()); \ - auto res = Add##NAME##sImpl::execute(components, step, time_zone); \ - auto res_decimal = decimalFromComponentsWithMultiplier(res, field_decimal.getScaleMultiplier()); \ - field = DecimalField(res_decimal, field_decimal.getScale()); \ + auto res = Add##NAME##sImpl::execute(field_decimal.getValue(), step, time_zone, field_decimal.getScale()); \ + field = DecimalField(res, field_decimal.getScale()); \ }; \ break; FOR_EACH_INTERVAL_KIND(DECLARE_CASE) - #undef DECLARE_CASE +#undef DECLARE_CASE } } else throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, - "STEP of Interval type can be used only with Date/DateTime types, but got {}", type->getName()); + "STEP of Interval type can be used only with Date/DateTime types, but got {}", type->getName()); } else { @@ -140,12 +138,12 @@ static bool tryConvertFields(FillColumnDescription & descr, const DataTypePtr & } FillingTransform::FillingTransform( - const Block & header_, const SortDescription & sort_description_, bool on_totals_) - : ISimpleTransform(header_, transformHeader(header_, sort_description_), true) - , sort_description(sort_description_) - , on_totals(on_totals_) - , filling_row(sort_description_) - , next_row(sort_description_) + const Block & header_, const SortDescription & sort_description_, bool on_totals_) + : ISimpleTransform(header_, transformHeader(header_, sort_description_), true) + , sort_description(sort_description_) + , on_totals(on_totals_) + , filling_row(sort_description_) + , next_row(sort_description_) { if (on_totals) return; @@ -162,14 +160,14 @@ FillingTransform::FillingTransform( if (!tryConvertFields(descr, type)) throw Exception("Incompatible types of WITH FILL expression values with column type " - + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); if (type->isValueRepresentedByUnsignedInteger() && ((!descr.fill_from.isNull() && less(descr.fill_from, Field{0}, 1)) || - (!descr.fill_to.isNull() && less(descr.fill_to, Field{0}, 1)))) + (!descr.fill_to.isNull() && less(descr.fill_to, Field{0}, 1)))) { throw Exception("WITH FILL bound values cannot be negative for unsigned type " - + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); } } @@ -214,7 +212,7 @@ void FillingTransform::transform(Chunk & chunk) MutableColumns res_other_columns; auto init_columns_by_positions = [](const Columns & old_columns, Columns & new_columns, - MutableColumns & new_mutable_columns, const Positions & positions) + MutableColumns & new_mutable_columns, const Positions & positions) { for (size_t pos : positions) { diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 7a85791d172..737e0c9d4b7 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -399,6 +399,7 @@ bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr, bool is_final) c return true; /// disallow GLOBAL IN, GLOBAL NOT IN + /// TODO why? if ("globalIn" == function_ptr->name || "globalNotIn" == function_ptr->name) return true; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 93cceadaf93..761b4ecdeb1 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -179,8 +179,9 @@ std::unique_ptr createReadBuffer( method = chooseCompressionMethod(current_path, compression_method); } - /// For clickhouse-local add progress callback to display progress bar. - if (context->getApplicationType() == Context::ApplicationType::LOCAL) + /// For clickhouse-local and clickhouse-client add progress callback to display progress bar. + if (context->getApplicationType() == Context::ApplicationType::LOCAL + || context->getApplicationType() == Context::ApplicationType::CLIENT) { auto & in = static_cast(*nested_buffer); in.setProgressCallback(context); @@ -643,7 +644,9 @@ Pipe StorageFile::read( /// Set total number of bytes to process. For progress bar. auto progress_callback = context->getFileProgressCallback(); - if (context->getApplicationType() == Context::ApplicationType::LOCAL && progress_callback) + if ((context->getApplicationType() == Context::ApplicationType::LOCAL + || context->getApplicationType() == Context::ApplicationType::CLIENT) + && progress_callback) progress_callback(FileProgress(0, total_bytes_to_read)); for (size_t i = 0; i < num_streams; ++i) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index f319bd1097b..ce31308fdd7 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include #include #include @@ -374,6 +376,16 @@ static bool checkIfObjectExists(const std::shared_ptr & clien return false; } +// TODO: common thread pool for IO must be used instead after PR #35150 +static ThreadPool & getThreadPoolStorageS3() +{ + constexpr size_t pool_size = 100; + constexpr size_t queue_size = 1000000; + static ThreadPool pool(pool_size, pool_size, queue_size); + return pool; +} + + class StorageS3Sink : public SinkToStorage { public: @@ -398,7 +410,7 @@ public: std::make_unique( client, bucket, key, min_upload_part_size, upload_part_size_multiply_factor, upload_part_size_multiply_parts_count_threshold, - max_single_part_upload_size), compression_method, 3); + max_single_part_upload_size, std::nullopt, DBMS_DEFAULT_BUFFER_SIZE, threadPoolCallbackRunner(getThreadPoolStorageS3())), compression_method, 3); writer = FormatFactory::instance().getOutputFormatParallelIfPossible(format, *write_buf, sample_block, context, {}, format_settings); } diff --git a/src/Storages/System/StorageSystemModels.cpp b/src/Storages/System/StorageSystemModels.cpp index 3df48e830bb..4a4dbbc69df 100644 --- a/src/Storages/System/StorageSystemModels.cpp +++ b/src/Storages/System/StorageSystemModels.cpp @@ -38,7 +38,7 @@ void StorageSystemModels::fillData(MutableColumns & res_columns, ContextPtr cont if (load_result.object) { - const auto model_ptr = std::static_pointer_cast(load_result.object); + const auto model_ptr = std::static_pointer_cast(load_result.object); res_columns[3]->insert(model_ptr->getTypeName()); } else diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index a329b01e9f2..644ab5d57c2 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -57,6 +57,7 @@ namespace ErrorCodes { extern const int ARGUMENT_OUT_OF_BOUND; extern const int BAD_ARGUMENTS; + extern const int SYNTAX_ERROR; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int INCORRECT_QUERY; @@ -262,7 +263,13 @@ namespace IntervalKind strToIntervalKind(const String& interval_str) { - if (interval_str == "Second") + if (interval_str == "Nanosecond") + return IntervalKind::Nanosecond; + else if (interval_str == "Microsecond") + return IntervalKind::Microsecond; + else if (interval_str == "Millisecond") + return IntervalKind::Millisecond; + else if (interval_str == "Second") return IntervalKind::Second; else if (interval_str == "Minute") return IntervalKind::Minute; @@ -307,6 +314,12 @@ namespace { switch (kind) { + case IntervalKind::Nanosecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Microsecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Millisecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: { \ return AddTime::execute(time_sec, num_units, time_zone); \ @@ -738,6 +751,12 @@ UInt32 StorageWindowView::getWindowLowerBound(UInt32 time_sec) switch (window_interval_kind) { + case IntervalKind::Nanosecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Microsecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Millisecond: + throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: \ { \ @@ -773,6 +792,13 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) switch (window_interval_kind) { + case IntervalKind::Nanosecond: + throw Exception("Fractional seconds are not supported by window view yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Microsecond: + throw Exception("Fractional seconds are not supported by window view yet", ErrorCodes::SYNTAX_ERROR); + case IntervalKind::Millisecond: + throw Exception("Fractional seconds are not supported by window view yet", ErrorCodes::SYNTAX_ERROR); + #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: \ { \ diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index c330d1c725b..94f5eff51d7 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -9,11 +9,10 @@ from github import Github from env_helper import ( GITHUB_REPOSITORY, - TEMP_PATH, - REPO_COPY, + GITHUB_RUN_URL, REPORTS_PATH, - GITHUB_SERVER_URL, - GITHUB_RUN_ID, + REPO_COPY, + TEMP_PATH, ) from s3_helper import S3Helper from get_robot_token import get_best_robot_token @@ -126,7 +125,7 @@ if __name__ == "__main__": logging.info("Exception uploading file %s text %s", f, ex) paths[f] = "" - report_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" + report_url = GITHUB_RUN_URL if paths["runlog.log"]: report_url = paths["runlog.log"] if paths["main.log"]: diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 1cee5fd42de..5afe2991073 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -11,7 +11,7 @@ from env_helper import ( TEMP_PATH, GITHUB_REPOSITORY, GITHUB_SERVER_URL, - GITHUB_RUN_ID, + GITHUB_RUN_URL, ) from report import create_build_html_report from s3_helper import S3Helper @@ -180,9 +180,7 @@ if __name__ == "__main__": branch_name = "PR #{}".format(pr_info.number) branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}" commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}" - task_url = ( - f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID or '0'}" - ) + task_url = GITHUB_RUN_URL report = create_build_html_report( build_check_name, build_results, diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 818478f6430..3d0cc468aec 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -11,7 +11,7 @@ from typing import Dict, List, Optional, Set, Tuple, Union from github import Github -from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP +from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL from s3_helper import S3Helper from pr_info import PRInfo from get_robot_token import get_best_robot_token, get_parameter_from_ssm @@ -234,6 +234,7 @@ def build_and_push_one_image( with open(build_log, "wb") as bl: cmd = ( "docker buildx build --builder default " + f"--label build-url={GITHUB_RUN_URL} " f"{from_tag_arg}" f"--build-arg BUILDKIT_INLINE_CACHE=1 " f"--tag {image.repo}:{version_string} " diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index 27bfe07db53..2b864b6b94c 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -4,6 +4,7 @@ import os import unittest from unittest.mock import patch +from env_helper import GITHUB_RUN_URL from pr_info import PRInfo import docker_images_check as di @@ -117,7 +118,8 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "docker buildx build --builder default --build-arg FROM_TAG=version " + f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " + "--build-arg FROM_TAG=version " "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version --cache-from " "type=registry,ref=name:version --push --progress plain path", mock_popen.call_args.args, @@ -133,7 +135,8 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "docker buildx build --builder default --build-arg FROM_TAG=version2 " + f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " + "--build-arg FROM_TAG=version2 " "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version2 --cache-from " "type=registry,ref=name:version2 --progress plain path", mock_popen.call_args.args, @@ -149,7 +152,7 @@ class TestDockerImageCheck(unittest.TestCase): mock_popen.assert_called_once() mock_machine.assert_not_called() self.assertIn( - "docker buildx build --builder default " + f"docker buildx build --builder default --label build-url={GITHUB_RUN_URL} " "--build-arg BUILDKIT_INLINE_CACHE=1 --tag name:version2 --cache-from " "type=registry,ref=name:version2 --progress plain path", mock_popen.call_args.args, diff --git a/tests/ci/env_helper.py b/tests/ci/env_helper.py index 90178e5c56a..c34162ba51a 100644 --- a/tests/ci/env_helper.py +++ b/tests/ci/env_helper.py @@ -7,9 +7,10 @@ CACHES_PATH = os.getenv("CACHES_PATH", TEMP_PATH) CLOUDFLARE_TOKEN = os.getenv("CLOUDFLARE_TOKEN") GITHUB_EVENT_PATH = os.getenv("GITHUB_EVENT_PATH") GITHUB_REPOSITORY = os.getenv("GITHUB_REPOSITORY", "ClickHouse/ClickHouse") -GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID") +GITHUB_RUN_ID = os.getenv("GITHUB_RUN_ID", "0") GITHUB_SERVER_URL = os.getenv("GITHUB_SERVER_URL", "https://github.com") GITHUB_WORKSPACE = os.getenv("GITHUB_WORKSPACE", os.path.abspath("../../")) +GITHUB_RUN_URL = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" IMAGES_PATH = os.getenv("IMAGES_PATH") REPORTS_PATH = os.getenv("REPORTS_PATH", "./reports") REPO_COPY = os.getenv("REPO_COPY", os.path.abspath("../../")) diff --git a/tests/ci/finish_check.py b/tests/ci/finish_check.py index 79cea83b1c8..289e32406ef 100644 --- a/tests/ci/finish_check.py +++ b/tests/ci/finish_check.py @@ -2,7 +2,7 @@ import logging from github import Github -from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_ID +from env_helper import GITHUB_RUN_URL from pr_info import PRInfo from get_robot_token import get_best_robot_token from commit_status_helper import get_commit @@ -33,7 +33,7 @@ if __name__ == "__main__": gh = Github(get_best_robot_token()) commit = get_commit(gh, pr_info.sha) - url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" + url = GITHUB_RUN_URL statuses = filter_statuses(list(commit.get_statuses())) if NAME in statuses and statuses[NAME].state == "pending": commit.create_status( diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py index 761b1ac9257..c6ce86b2ce1 100644 --- a/tests/ci/performance_comparison_check.py +++ b/tests/ci/performance_comparison_check.py @@ -11,6 +11,7 @@ import re from github import Github +from env_helper import GITHUB_RUN_URL from pr_info import PRInfo from s3_helper import S3Helper from get_robot_token import get_best_robot_token @@ -88,9 +89,9 @@ if __name__ == "__main__": else: pr_link = f"https://github.com/ClickHouse/ClickHouse/pull/{pr_info.number}" - task_url = f"https://github.com/ClickHouse/ClickHouse/actions/runs/{os.getenv('GITHUB_RUN_ID')}" - docker_env += ' -e CHPC_ADD_REPORT_LINKS="Job (actions) Tested commit"'.format( - task_url, pr_link + docker_env += ( + f' -e CHPC_ADD_REPORT_LINKS="' + f'Job (actions) Tested commit"' ) if "RUN_BY_HASH_TOTAL" in os.environ: @@ -199,7 +200,7 @@ if __name__ == "__main__": status = "failure" message = "No message in report." - report_url = task_url + report_url = GITHUB_RUN_URL if paths["runlog.log"]: report_url = paths["runlog.log"] diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 64e22712059..ee4399792ae 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -8,7 +8,7 @@ from build_download_helper import get_with_retries from env_helper import ( GITHUB_REPOSITORY, GITHUB_SERVER_URL, - GITHUB_RUN_ID, + GITHUB_RUN_URL, GITHUB_EVENT_PATH, ) @@ -111,7 +111,7 @@ class PRInfo: self.sha = github_event["pull_request"]["head"]["sha"] repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" - self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" + self.task_url = GITHUB_RUN_URL self.repo_full_name = GITHUB_REPOSITORY self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" @@ -142,7 +142,7 @@ class PRInfo: self.sha = github_event["after"] pull_request = get_pr_for_commit(self.sha, github_event["ref"]) repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" - self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" + self.task_url = GITHUB_RUN_URL self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" self.repo_full_name = GITHUB_REPOSITORY if pull_request is None or pull_request["state"] == "closed": @@ -180,7 +180,7 @@ class PRInfo: self.number = 0 self.labels = {} repo_prefix = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}" - self.task_url = f"{repo_prefix}/actions/runs/{GITHUB_RUN_ID or '0'}" + self.task_url = GITHUB_RUN_URL self.commit_html_url = f"{repo_prefix}/commits/{self.sha}" self.repo_full_name = GITHUB_REPOSITORY self.pr_html_url = f"{repo_prefix}/commits/{ref}" diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 5b89082532d..9c7ba13f8e4 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -5,7 +5,7 @@ import re from typing import Tuple from github import Github -from env_helper import GITHUB_RUN_ID, GITHUB_REPOSITORY, GITHUB_SERVER_URL +from env_helper import GITHUB_RUN_URL, GITHUB_REPOSITORY, GITHUB_SERVER_URL from pr_info import PRInfo from get_robot_token import get_best_robot_token from commit_status_helper import get_commit @@ -231,7 +231,7 @@ if __name__ == "__main__": ) sys.exit(1) - url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" + url = GITHUB_RUN_URL if not can_run: print("::notice ::Cannot run") commit.create_status( diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index f7b74e8d5dd..289fc4b3184 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -2,7 +2,7 @@ import os import logging import ast -from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_ID +from env_helper import GITHUB_SERVER_URL, GITHUB_REPOSITORY, GITHUB_RUN_URL from report import ReportColorTheme, create_test_html_report @@ -66,7 +66,7 @@ def upload_results( branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_number}" commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{commit_sha}" - task_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/actions/runs/{GITHUB_RUN_ID}" + task_url = GITHUB_RUN_URL if additional_urls: raw_log_url = additional_urls[0] diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 02e22ee0c4d..3bb547333e7 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -238,7 +238,7 @@ def _update_dockerfile(repo_path: str, version: ClickHouseVersion): def update_version_local(repo_path, version, version_type="testing"): update_contributors() version.with_description(version_type) - update_cmake_version(version, version_type) + update_cmake_version(version) _update_changelog(repo_path, version) _update_dockerfile(repo_path, version) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 9dd05cacce4..2c830e6ea40 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -373,6 +373,11 @@ class SettingsRandomizer: "priority": lambda: int(abs(random.gauss(0, 2))), "output_format_parallel_formatting": lambda: random.randint(0, 1), "input_format_parallel_parsing": lambda: random.randint(0, 1), + "min_chunk_bytes_for_parallel_parsing": lambda: max(1024, int(random.gauss(10 * 1024 * 1024, 5 * 1000 * 1000))), + "max_read_buffer_size": lambda: random.randint(1, 20) if random.random() < 0.1 else random.randint(500000, 1048576), + "prefer_localhost_replica": lambda: random.randint(0, 1), + "max_block_size": lambda: random.randint(8000, 100000), + "max_threads": lambda: random.randint(1, 64), } @staticmethod diff --git a/tests/config/config.d/zookeeper.xml b/tests/config/config.d/zookeeper.xml index 4fa529a6180..63057224ef9 100644 --- a/tests/config/config.d/zookeeper.xml +++ b/tests/config/config.d/zookeeper.xml @@ -1,5 +1,7 @@ + + random localhost 9181 diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict index bf25f1fa484..1863cd20bdd 100644 --- a/tests/fuzz/all.dict +++ b/tests/fuzz/all.dict @@ -1459,7 +1459,7 @@ "xor" "xxHash32" "xxHash64" -"yandexConsistentHash" +"kostikConsistentHash" "YEAR" "yearweek" "yesterday" diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict index 722e931dc09..3f393aa6846 100644 --- a/tests/fuzz/dictionaries/functions.dict +++ b/tests/fuzz/dictionaries/functions.dict @@ -26,7 +26,7 @@ "toUnixTimestamp64Nano" "toUnixTimestamp64Micro" "jumpConsistentHash" -"yandexConsistentHash" +"kostikConsistentHash" "addressToSymbol" "toJSONString" "JSON_VALUE" diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index d0b5e892f5b..dffd09ae849 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -16,21 +16,28 @@ import traceback import urllib.parse import shlex import urllib3 - -from cassandra.policies import RoundRobinPolicy -import cassandra.cluster -import psycopg2 -import pymongo -import pymysql import requests -from confluent_kafka.avro.cached_schema_registry_client import ( - CachedSchemaRegistryClient, -) + +try: + # Please, add modules that required for specific tests only here. + # So contributors will be able to run most tests locally + # without installing tons of unneeded packages that may be not so easy to install. + from cassandra.policies import RoundRobinPolicy + import cassandra.cluster + import psycopg2 + from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT + import pymongo + import pymysql + from confluent_kafka.avro.cached_schema_registry_client import ( + CachedSchemaRegistryClient, + ) +except Exception as e: + logging.warning(f"Cannot import some modules, some tests may not work: {e}") + from dict2xml import dict2xml from kazoo.client import KazooClient from kazoo.exceptions import KazooException from minio import Minio -from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT from helpers.test_tools import assert_eq_with_retry, exec_query_with_retry from helpers import pytest_xdist_logging_to_separate_files diff --git a/tests/integration/test_distributed_queries_stress/test.py b/tests/integration/test_distributed_queries_stress/test.py index a5df8562676..fce42b4e58b 100644 --- a/tests/integration/test_distributed_queries_stress/test.py +++ b/tests/integration/test_distributed_queries_stress/test.py @@ -67,10 +67,10 @@ def started_cluster(): insert into data (key) select * from numbers(10); create table if not exists dist_one as data engine=Distributed(one_shard, currentDatabase(), data, key); - create table if not exists dist_one_over_dist as data engine=Distributed(one_shard, currentDatabase(), dist_one, yandexConsistentHash(key, 2)); + create table if not exists dist_one_over_dist as data engine=Distributed(one_shard, currentDatabase(), dist_one, kostikConsistentHash(key, 2)); create table if not exists dist_two as data engine=Distributed(two_shards, currentDatabase(), data, key); - create table if not exists dist_two_over_dist as data engine=Distributed(two_shards, currentDatabase(), dist_two, yandexConsistentHash(key, 2)); + create table if not exists dist_two_over_dist as data engine=Distributed(two_shards, currentDatabase(), dist_two, kostikConsistentHash(key, 2)); """ ) yield cluster diff --git a/tests/integration/test_s3_zero_copy_replication/test.py b/tests/integration/test_s3_zero_copy_replication/test.py index d7aa4feb1d2..1ce1047ebec 100644 --- a/tests/integration/test_s3_zero_copy_replication/test.py +++ b/tests/integration/test_s3_zero_copy_replication/test.py @@ -361,6 +361,8 @@ def test_s3_zero_copy_with_ttl_delete(cluster, large_data, iterations): ) node1.query("OPTIMIZE TABLE ttl_delete_test FINAL") + + node1.query("SYSTEM SYNC REPLICA ttl_delete_test") node2.query("SYSTEM SYNC REPLICA ttl_delete_test") if large_data: diff --git a/tests/integration/test_system_logs_comment/__init__.py b/tests/integration/test_system_logs_comment/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_system_logs_comment/test.py b/tests/integration/test_system_logs_comment/test.py new file mode 100644 index 00000000000..0659a2689a0 --- /dev/null +++ b/tests/integration/test_system_logs_comment/test.py @@ -0,0 +1,49 @@ +# pylint: disable=line-too-long +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name + +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node_default", stay_alive=True) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_system_logs_comment(): + node.exec_in_container( + [ + "bash", + "-c", + f"""echo " + + + ENGINE = MergeTree + PARTITION BY (event_date) + ORDER BY (event_time) + TTL event_date + INTERVAL 14 DAY DELETE + SETTINGS ttl_only_drop_parts=1 + COMMENT 'test_comment' + + + + + " > /etc/clickhouse-server/config.d/yyy-override-query_log.xml + """, + ] + ) + node.restart_clickhouse() + + node.query("select 1") + node.query("system flush logs") + + comment = node.query("SELECT comment FROM system.tables WHERE name = 'query_log'") + assert comment == "test_comment\n" diff --git a/tests/integration/test_zookeeper_config_load_balancing/__init__.py b/tests/integration/test_zookeeper_config_load_balancing/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing.xml b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing.xml new file mode 100644 index 00000000000..5416e5e82de --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/configs/zookeeper_load_balancing.xml @@ -0,0 +1,19 @@ + + + + random + + zoo1 + 2181 + + + zoo2 + 2181 + + + zoo3 + 2181 + + 3000 + + diff --git a/tests/integration/test_zookeeper_config_load_balancing/test.py b/tests/integration/test_zookeeper_config_load_balancing/test.py new file mode 100644 index 00000000000..56af7513389 --- /dev/null +++ b/tests/integration/test_zookeeper_config_load_balancing/test.py @@ -0,0 +1,427 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.network import PartitionManager + +cluster = ClickHouseCluster( + __file__, zookeeper_config_path="configs/zookeeper_load_balancing.xml" +) + +# use 3-letter hostnames, so getHostNameDifference("nod1", "zoo1") will work as expected +node1 = cluster.add_instance( + "nod1", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"] +) +node2 = cluster.add_instance( + "nod2", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"] +) +node3 = cluster.add_instance( + "nod3", with_zookeeper=True, main_configs=["configs/zookeeper_load_balancing.xml"] +) + + +def change_balancing(old, new, reload=True): + line = "{}<" + old_line = line.format(old) + new_line = line.format(new) + for node in [node1, node2, node3]: + node.replace_in_config( + "/etc/clickhouse-server/config.d/zookeeper_load_balancing.xml", + old_line, + new_line, + ) + if reload: + node.query("select '{}', '{}'".format(old, new)) + node.query("system reload config") + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_first_or_random(started_cluster): + try: + change_balancing("random", "first_or_random") + print( + str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + finally: + change_balancing("first_or_random", "random", reload=False) + + +def test_in_order(started_cluster): + try: + change_balancing("random", "in_order") + print( + str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + finally: + change_balancing("in_order", "random", reload=False) + + +def test_nearest_hostname(started_cluster): + try: + change_balancing("random", "nearest_hostname") + print( + str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo1_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo3_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + finally: + change_balancing("nearest_hostname", "random", reload=False) + + +def test_round_robin(started_cluster): + pm = PartitionManager() + try: + pm._add_rule( + { + "source": node1.ip_address, + "destination": cluster.get_instance_ip("zoo1"), + "action": "REJECT --reject-with tcp-reset", + } + ) + pm._add_rule( + { + "source": node2.ip_address, + "destination": cluster.get_instance_ip("zoo1"), + "action": "REJECT --reject-with tcp-reset", + } + ) + pm._add_rule( + { + "source": node3.ip_address, + "destination": cluster.get_instance_ip("zoo1"), + "action": "REJECT --reject-with tcp-reset", + } + ) + change_balancing("random", "round_robin") + + print( + str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node1.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node2.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + print( + str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep ':2181' | grep ESTABLISHED", + ], + privileged=True, + user="root", + ) + ) + ) + assert ( + "1" + == str( + node3.exec_in_container( + [ + "bash", + "-c", + "lsof -a -i4 -i6 -itcp -w | grep 'testzookeeperconfigloadbalancing_zoo2_1.*testzookeeperconfigloadbalancing_default:2181' | grep ESTABLISHED | wc -l", + ], + privileged=True, + user="root", + ) + ).strip() + ) + + finally: + pm.heal_all() + change_balancing("round_robin", "random", reload=False) diff --git a/tests/performance/consistent_hashes.xml b/tests/performance/consistent_hashes.xml index 3610579f545..c65a1151536 100644 --- a/tests/performance/consistent_hashes.xml +++ b/tests/performance/consistent_hashes.xml @@ -3,7 +3,7 @@ hash_func - yandexConsistentHash + kostikConsistentHash jumpConsistentHash diff --git a/tests/queries/0_stateless/00153_transform.reference b/tests/queries/0_stateless/00153_transform.reference index eea4fa0e1a8..8a38f4f8172 100644 --- a/tests/queries/0_stateless/00153_transform.reference +++ b/tests/queries/0_stateless/00153_transform.reference @@ -99,6 +99,6 @@ abc 1 1 Остальные -Яндекс +Bigmir)net Google Остальные diff --git a/tests/queries/0_stateless/00153_transform.sql b/tests/queries/0_stateless/00153_transform.sql index a5e531d36a4..78ec3cd4d1c 100644 --- a/tests/queries/0_stateless/00153_transform.sql +++ b/tests/queries/0_stateless/00153_transform.sql @@ -8,10 +8,10 @@ SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], 0) FROM sys SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], -1) FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], -1.1) FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222.2, 333], 1) FROM system.numbers LIMIT 10; -SELECT transform(1, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; -SELECT transform(2, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; -SELECT transform(3, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; -SELECT transform(4, [2, 3], ['Яндекс', 'Google'], 'Остальные') AS title; +SELECT transform(1, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title; +SELECT transform(2, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title; +SELECT transform(3, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title; +SELECT transform(4, [2, 3], ['Bigmir)net', 'Google'], 'Остальные') AS title; SELECT transform('hello', 'wrong', 1); -- { serverError 43 } SELECT transform('hello', ['wrong'], 1); -- { serverError 43 } SELECT transform('hello', ['wrong'], [1]); -- { serverError 43 } diff --git a/tests/queries/0_stateless/00165_transform_non_const_default.reference b/tests/queries/0_stateless/00165_transform_non_const_default.reference index d66471d9741..01890b91309 100644 --- a/tests/queries/0_stateless/00165_transform_non_const_default.reference +++ b/tests/queries/0_stateless/00165_transform_non_const_default.reference @@ -79,6 +79,6 @@ abc 1 1 Остальные -Яндекс +Meta.ua Google Остальные diff --git a/tests/queries/0_stateless/00165_transform_non_const_default.sql b/tests/queries/0_stateless/00165_transform_non_const_default.sql index f68327f7700..ef3b7c1f1c5 100644 --- a/tests/queries/0_stateless/00165_transform_non_const_default.sql +++ b/tests/queries/0_stateless/00165_transform_non_const_default.sql @@ -6,7 +6,7 @@ SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], materialize SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], materialize(-1)) FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222, 333], materialize(-1.1)) FROM system.numbers LIMIT 10; SELECT transform(toString(number), ['3', '5', '7'], [111, 222.2, 333], materialize(1)) FROM system.numbers LIMIT 10; -SELECT transform(1, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title; -SELECT transform(2, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title; -SELECT transform(3, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title; -SELECT transform(4, [2, 3], ['Яндекс', 'Google'], materialize('Остальные')) AS title; +SELECT transform(1, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title; +SELECT transform(2, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title; +SELECT transform(3, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title; +SELECT transform(4, [2, 3], ['Meta.ua', 'Google'], materialize('Остальные')) AS title; diff --git a/tests/queries/0_stateless/00255_array_concat_string.reference b/tests/queries/0_stateless/00255_array_concat_string.reference index 4ffac8e5de0..edd1101beb6 100644 --- a/tests/queries/0_stateless/00255_array_concat_string.reference +++ b/tests/queries/0_stateless/00255_array_concat_string.reference @@ -34,25 +34,25 @@ Hello, World 0,1,2,3,4,5,6,7 0,1,2,3,4,5,6,7,8 -yandex -yandex google -yandex google test -yandex google test 123 -yandex google test 123 -yandex google test 123 hello -yandex google test 123 hello world -yandex google test 123 hello world goodbye -yandex google test 123 hello world goodbye xyz -yandex google test 123 hello world goodbye xyz yandex -yandex google test 123 hello world goodbye xyz yandex google -yandex google test 123 hello world goodbye xyz yandex google test -yandex google test 123 hello world goodbye xyz yandex google test 123 -yandex google test 123 hello world goodbye xyz yandex google test 123 -yandex google test 123 hello world goodbye xyz yandex google test 123 hello -yandex google test 123 hello world goodbye xyz yandex google test 123 hello world -yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye -yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye xyz -yandex google test 123 hello world goodbye xyz yandex google test 123 hello world goodbye xyz yandex +meta.ua +meta.ua google +meta.ua google test +meta.ua google test 123 +meta.ua google test 123 +meta.ua google test 123 hello +meta.ua google test 123 hello world +meta.ua google test 123 hello world goodbye +meta.ua google test 123 hello world goodbye xyz +meta.ua google test 123 hello world goodbye xyz meta.ua +meta.ua google test 123 hello world goodbye xyz meta.ua google +meta.ua google test 123 hello world goodbye xyz meta.ua google test +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world goodbye +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world goodbye xyz +meta.ua google test 123 hello world goodbye xyz meta.ua google test 123 hello world goodbye xyz meta.ua 0 01 diff --git a/tests/queries/0_stateless/00255_array_concat_string.sql b/tests/queries/0_stateless/00255_array_concat_string.sql index f4f95956a16..a18d349bac8 100644 --- a/tests/queries/0_stateless/00255_array_concat_string.sql +++ b/tests/queries/0_stateless/00255_array_concat_string.sql @@ -6,7 +6,7 @@ SELECT arrayStringConcat(emptyArrayString()); SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number))) FROM system.numbers LIMIT 10; SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number)), '') FROM system.numbers LIMIT 10; SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number)), ',') FROM system.numbers LIMIT 10; -SELECT arrayStringConcat(arrayMap(x -> transform(x, [0, 1, 2, 3, 4, 5, 6, 7, 8], ['yandex', 'google', 'test', '123', '', 'hello', 'world', 'goodbye', 'xyz'], ''), arrayMap(x -> x % 9, range(number))), ' ') FROM system.numbers LIMIT 20; +SELECT arrayStringConcat(arrayMap(x -> transform(x, [0, 1, 2, 3, 4, 5, 6, 7, 8], ['meta.ua', 'google', 'test', '123', '', 'hello', 'world', 'goodbye', 'xyz'], ''), arrayMap(x -> x % 9, range(number))), ' ') FROM system.numbers LIMIT 20; SELECT arrayStringConcat(arrayMap(x -> toString(x), range(number % 4))) FROM system.numbers LIMIT 10; SELECT arrayStringConcat([Null, 'hello', Null, 'world', Null, 'xyz', 'def', Null], ';'); SELECT arrayStringConcat([Null::Nullable(String), Null::Nullable(String)], ';'); diff --git a/tests/queries/0_stateless/00296_url_parameters.reference b/tests/queries/0_stateless/00296_url_parameters.reference index 91a7fe8d488..348651d3f7e 100644 --- a/tests/queries/0_stateless/00296_url_parameters.reference +++ b/tests/queries/0_stateless/00296_url_parameters.reference @@ -1,8 +1,8 @@ ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] b d f d f h b d d h f h b d f d f h b d d h f h -http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f //yandex.ru/?c=d //yandex.ru/?a=b //yandex.ru/?a=b&c=d# //yandex.ru/?a&c=d#e=f //yandex.ru/?a#e=f //yandex.ru/?a&c=d# //yandex.ru/?a=b&c=d#e=f //yandex.ru/?c=d#e //yandex.ru/?a=b#e //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b#e&g=h //yandex.ru/?a=b&c=d#e&g=h //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b&c=d#test?e=f&g=h //yandex.ru/?a=b&c=d#test?g=h //yandex.ru/?a=b&c=d#test?e=f +http://bigmir.net/?c=d http://bigmir.net/?a=b http://bigmir.net/?a=b&c=d# http://bigmir.net/?a&c=d#e=f http://bigmir.net/?a#e=f http://bigmir.net/?a&c=d# http://bigmir.net/?a=b&c=d#e=f http://bigmir.net/?c=d#e http://bigmir.net/?a=b#e http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b#e&g=h http://bigmir.net/?a=b&c=d#e&g=h http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b&c=d#test?e=f&g=h http://bigmir.net/?a=b&c=d#test?g=h http://bigmir.net/?a=b&c=d#test?e=f //bigmir.net/?c=d //bigmir.net/?a=b //bigmir.net/?a=b&c=d# //bigmir.net/?a&c=d#e=f //bigmir.net/?a#e=f //bigmir.net/?a&c=d# //bigmir.net/?a=b&c=d#e=f //bigmir.net/?c=d#e //bigmir.net/?a=b#e //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b#e&g=h //bigmir.net/?a=b&c=d#e&g=h //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b&c=d#test?e=f&g=h //bigmir.net/?a=b&c=d#test?g=h //bigmir.net/?a=b&c=d#test?e=f ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e=f'] ['a','c=d','e=f'] ['a=b','c=d','e=f','g=h'] ['a=b','c=d'] ['a=b','c=d','e','g=h'] ['a=b','c=d','e=f','g=h'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] ['a','c'] ['a','c','e'] ['a','c','e'] ['a','c','e','g'] ['a','c'] ['a','c','e','g'] ['a','c','e','g'] b d f d f h b d d h f h b d f d f h b d d h f h -http://yandex.ru/?c=d http://yandex.ru/?a=b http://yandex.ru/?a=b&c=d# http://yandex.ru/?a&c=d#e=f http://yandex.ru/?a#e=f http://yandex.ru/?a&c=d# http://yandex.ru/?a=b&c=d#e=f http://yandex.ru/?c=d#e http://yandex.ru/?a=b#e http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b#e&g=h http://yandex.ru/?a=b&c=d#e&g=h http://yandex.ru/?a=b&c=d#e http://yandex.ru/?a=b&c=d#test?e=f&g=h http://yandex.ru/?a=b&c=d#test?g=h http://yandex.ru/?a=b&c=d#test?e=f //yandex.ru/?c=d //yandex.ru/?a=b //yandex.ru/?a=b&c=d# //yandex.ru/?a&c=d#e=f //yandex.ru/?a#e=f //yandex.ru/?a&c=d# //yandex.ru/?a=b&c=d#e=f //yandex.ru/?c=d#e //yandex.ru/?a=b#e //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b#e&g=h //yandex.ru/?a=b&c=d#e&g=h //yandex.ru/?a=b&c=d#e //yandex.ru/?a=b&c=d#test?e=f&g=h //yandex.ru/?a=b&c=d#test?g=h //yandex.ru/?a=b&c=d#test?e=f +http://bigmir.net/?c=d http://bigmir.net/?a=b http://bigmir.net/?a=b&c=d# http://bigmir.net/?a&c=d#e=f http://bigmir.net/?a#e=f http://bigmir.net/?a&c=d# http://bigmir.net/?a=b&c=d#e=f http://bigmir.net/?c=d#e http://bigmir.net/?a=b#e http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b#e&g=h http://bigmir.net/?a=b&c=d#e&g=h http://bigmir.net/?a=b&c=d#e http://bigmir.net/?a=b&c=d#test?e=f&g=h http://bigmir.net/?a=b&c=d#test?g=h http://bigmir.net/?a=b&c=d#test?e=f //bigmir.net/?c=d //bigmir.net/?a=b //bigmir.net/?a=b&c=d# //bigmir.net/?a&c=d#e=f //bigmir.net/?a#e=f //bigmir.net/?a&c=d# //bigmir.net/?a=b&c=d#e=f //bigmir.net/?c=d#e //bigmir.net/?a=b#e //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b#e&g=h //bigmir.net/?a=b&c=d#e&g=h //bigmir.net/?a=b&c=d#e //bigmir.net/?a=b&c=d#test?e=f&g=h //bigmir.net/?a=b&c=d#test?g=h //bigmir.net/?a=b&c=d#test?e=f diff --git a/tests/queries/0_stateless/00296_url_parameters.sql b/tests/queries/0_stateless/00296_url_parameters.sql index f6dad306319..8a96e3888fe 100644 --- a/tests/queries/0_stateless/00296_url_parameters.sql +++ b/tests/queries/0_stateless/00296_url_parameters.sql @@ -1,200 +1,200 @@ SELECT - extractURLParameters('http://yandex.ru/?a=b&c=d'), - extractURLParameters('http://yandex.ru/?a=b&c=d#e=f'), - extractURLParameters('http://yandex.ru/?a&c=d#e=f'), - extractURLParameters('http://yandex.ru/?a=b&c=d#e=f&g=h'), - extractURLParameters('http://yandex.ru/?a=b&c=d#e'), - extractURLParameters('http://yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameters('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), - extractURLParameters('//yandex.ru/?a=b&c=d'), - extractURLParameters('//yandex.ru/?a=b&c=d#e=f'), - extractURLParameters('//yandex.ru/?a&c=d#e=f'), - extractURLParameters('//yandex.ru/?a=b&c=d#e=f&g=h'), - extractURLParameters('//yandex.ru/?a=b&c=d#e'), - extractURLParameters('//yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameters('//yandex.ru/?a=b&c=d#test?e=f&g=h'); + extractURLParameters('http://bigmir.net/?a=b&c=d'), + extractURLParameters('http://bigmir.net/?a=b&c=d#e=f'), + extractURLParameters('http://bigmir.net/?a&c=d#e=f'), + extractURLParameters('http://bigmir.net/?a=b&c=d#e=f&g=h'), + extractURLParameters('http://bigmir.net/?a=b&c=d#e'), + extractURLParameters('http://bigmir.net/?a=b&c=d#e&g=h'), + extractURLParameters('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), + extractURLParameters('//bigmir.net/?a=b&c=d'), + extractURLParameters('//bigmir.net/?a=b&c=d#e=f'), + extractURLParameters('//bigmir.net/?a&c=d#e=f'), + extractURLParameters('//bigmir.net/?a=b&c=d#e=f&g=h'), + extractURLParameters('//bigmir.net/?a=b&c=d#e'), + extractURLParameters('//bigmir.net/?a=b&c=d#e&g=h'), + extractURLParameters('//bigmir.net/?a=b&c=d#test?e=f&g=h'); SELECT - extractURLParameterNames('http://yandex.ru/?a=b&c=d'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#e=f'), - extractURLParameterNames('http://yandex.ru/?a&c=d#e=f'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#e=f&g=h'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#e'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameterNames('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), - extractURLParameterNames('//yandex.ru/?a=b&c=d'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#e=f'), - extractURLParameterNames('//yandex.ru/?a&c=d#e=f'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#e=f&g=h'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#e'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#e&g=h'), - extractURLParameterNames('//yandex.ru/?a=b&c=d#test?e=f&g=h'); + extractURLParameterNames('http://bigmir.net/?a=b&c=d'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#e=f'), + extractURLParameterNames('http://bigmir.net/?a&c=d#e=f'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#e=f&g=h'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#e'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#e&g=h'), + extractURLParameterNames('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), + extractURLParameterNames('//bigmir.net/?a=b&c=d'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#e=f'), + extractURLParameterNames('//bigmir.net/?a&c=d#e=f'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#e=f&g=h'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#e'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#e&g=h'), + extractURLParameterNames('//bigmir.net/?a=b&c=d#test?e=f&g=h'); SELECT - extractURLParameter('http://yandex.ru/?a=b&c=d', 'a'), - extractURLParameter('http://yandex.ru/?a=b&c=d', 'c'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e=f', 'e'), - extractURLParameter('http://yandex.ru/?a&c=d#e=f', 'a'), - extractURLParameter('http://yandex.ru/?a&c=d#e=f', 'c'), - extractURLParameter('http://yandex.ru/?a&c=d#e=f', 'e'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e=f&g=h', 'g'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e', 'a'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e', 'c'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e', 'e'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'c'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'e'), - extractURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'g'), - extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), - extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - extractURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'), - extractURLParameter('//yandex.ru/?a=b&c=d', 'a'), - extractURLParameter('//yandex.ru/?a=b&c=d', 'c'), - extractURLParameter('//yandex.ru/?a=b&c=d#e=f', 'e'), - extractURLParameter('//yandex.ru/?a&c=d#e=f', 'a'), - extractURLParameter('//yandex.ru/?a&c=d#e=f', 'c'), - extractURLParameter('//yandex.ru/?a&c=d#e=f', 'e'), - extractURLParameter('//yandex.ru/?a=b&c=d#e=f&g=h', 'g'), - extractURLParameter('//yandex.ru/?a=b&c=d#e', 'a'), - extractURLParameter('//yandex.ru/?a=b&c=d#e', 'c'), - extractURLParameter('//yandex.ru/?a=b&c=d#e', 'e'), - extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'c'), - extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'e'), - extractURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'g'), - extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), - extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - extractURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'); + extractURLParameter('http://bigmir.net/?a=b&c=d', 'a'), + extractURLParameter('http://bigmir.net/?a=b&c=d', 'c'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e=f', 'e'), + extractURLParameter('http://bigmir.net/?a&c=d#e=f', 'a'), + extractURLParameter('http://bigmir.net/?a&c=d#e=f', 'c'), + extractURLParameter('http://bigmir.net/?a&c=d#e=f', 'e'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e=f&g=h', 'g'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e', 'a'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e', 'c'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e', 'e'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'c'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'e'), + extractURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'g'), + extractURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'), + extractURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'), + extractURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'g'), + extractURLParameter('//bigmir.net/?a=b&c=d', 'a'), + extractURLParameter('//bigmir.net/?a=b&c=d', 'c'), + extractURLParameter('//bigmir.net/?a=b&c=d#e=f', 'e'), + extractURLParameter('//bigmir.net/?a&c=d#e=f', 'a'), + extractURLParameter('//bigmir.net/?a&c=d#e=f', 'c'), + extractURLParameter('//bigmir.net/?a&c=d#e=f', 'e'), + extractURLParameter('//bigmir.net/?a=b&c=d#e=f&g=h', 'g'), + extractURLParameter('//bigmir.net/?a=b&c=d#e', 'a'), + extractURLParameter('//bigmir.net/?a=b&c=d#e', 'c'), + extractURLParameter('//bigmir.net/?a=b&c=d#e', 'e'), + extractURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'c'), + extractURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'e'), + extractURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'g'), + extractURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'), + extractURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'), + extractURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'g'); SELECT - cutURLParameter('http://yandex.ru/?a=b&c=d', 'a'), - cutURLParameter('http://yandex.ru/?a=b&c=d', 'c'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e=f', 'e'), - cutURLParameter('http://yandex.ru/?a&c=d#e=f', 'a'), - cutURLParameter('http://yandex.ru/?a&c=d#e=f', 'c'), - cutURLParameter('http://yandex.ru/?a&c=d#e=f', 'e'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e=f&g=h', 'g'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e', 'a'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e', 'c'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e', 'e'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'c'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'e'), - cutURLParameter('http://yandex.ru/?a=b&c=d#e&g=h', 'g'), - cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), - cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - cutURLParameter('http://yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'), - cutURLParameter('//yandex.ru/?a=b&c=d', 'a'), - cutURLParameter('//yandex.ru/?a=b&c=d', 'c'), - cutURLParameter('//yandex.ru/?a=b&c=d#e=f', 'e'), - cutURLParameter('//yandex.ru/?a&c=d#e=f', 'a'), - cutURLParameter('//yandex.ru/?a&c=d#e=f', 'c'), - cutURLParameter('//yandex.ru/?a&c=d#e=f', 'e'), - cutURLParameter('//yandex.ru/?a=b&c=d#e=f&g=h', 'g'), - cutURLParameter('//yandex.ru/?a=b&c=d#e', 'a'), - cutURLParameter('//yandex.ru/?a=b&c=d#e', 'c'), - cutURLParameter('//yandex.ru/?a=b&c=d#e', 'e'), - cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'c'), - cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'e'), - cutURLParameter('//yandex.ru/?a=b&c=d#e&g=h', 'g'), - cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'test'), - cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'e'), - cutURLParameter('//yandex.ru/?a=b&c=d#test?e=f&g=h', 'g'); + cutURLParameter('http://bigmir.net/?a=b&c=d', 'a'), + cutURLParameter('http://bigmir.net/?a=b&c=d', 'c'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e=f', 'e'), + cutURLParameter('http://bigmir.net/?a&c=d#e=f', 'a'), + cutURLParameter('http://bigmir.net/?a&c=d#e=f', 'c'), + cutURLParameter('http://bigmir.net/?a&c=d#e=f', 'e'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e=f&g=h', 'g'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e', 'a'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e', 'c'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e', 'e'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'c'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'e'), + cutURLParameter('http://bigmir.net/?a=b&c=d#e&g=h', 'g'), + cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'), + cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'), + cutURLParameter('http://bigmir.net/?a=b&c=d#test?e=f&g=h', 'g'), + cutURLParameter('//bigmir.net/?a=b&c=d', 'a'), + cutURLParameter('//bigmir.net/?a=b&c=d', 'c'), + cutURLParameter('//bigmir.net/?a=b&c=d#e=f', 'e'), + cutURLParameter('//bigmir.net/?a&c=d#e=f', 'a'), + cutURLParameter('//bigmir.net/?a&c=d#e=f', 'c'), + cutURLParameter('//bigmir.net/?a&c=d#e=f', 'e'), + cutURLParameter('//bigmir.net/?a=b&c=d#e=f&g=h', 'g'), + cutURLParameter('//bigmir.net/?a=b&c=d#e', 'a'), + cutURLParameter('//bigmir.net/?a=b&c=d#e', 'c'), + cutURLParameter('//bigmir.net/?a=b&c=d#e', 'e'), + cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'c'), + cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'e'), + cutURLParameter('//bigmir.net/?a=b&c=d#e&g=h', 'g'), + cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'test'), + cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'e'), + cutURLParameter('//bigmir.net/?a=b&c=d#test?e=f&g=h', 'g'); SELECT - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e=f')), - extractURLParameters(materialize('http://yandex.ru/?a&c=d#e=f')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameters(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e=f')), - extractURLParameters(materialize('//yandex.ru/?a&c=d#e=f')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e=f&g=h')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameters(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h')); + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e=f')), + extractURLParameters(materialize('http://bigmir.net/?a&c=d#e=f')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#e&g=h')), + extractURLParameters(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e=f')), + extractURLParameters(materialize('//bigmir.net/?a&c=d#e=f')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e=f&g=h')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#e&g=h')), + extractURLParameters(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h')); SELECT - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e=f')), - extractURLParameterNames(materialize('http://yandex.ru/?a&c=d#e=f')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameterNames(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e=f')), - extractURLParameterNames(materialize('//yandex.ru/?a&c=d#e=f')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e=f&g=h')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#e&g=h')), - extractURLParameterNames(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h')); + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e=f')), + extractURLParameterNames(materialize('http://bigmir.net/?a&c=d#e=f')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#e&g=h')), + extractURLParameterNames(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e=f')), + extractURLParameterNames(materialize('//bigmir.net/?a&c=d#e=f')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e=f&g=h')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#e&g=h')), + extractURLParameterNames(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h')); SELECT - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'a'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'c'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'a'), - extractURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'c'), - extractURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'a'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'c'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'c'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'g'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - extractURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'a'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'c'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'a'), - extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'c'), - extractURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'a'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'c'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'c'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'g'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - extractURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'); + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'a'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'c'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'a'), + extractURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'c'), + extractURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h'), 'g'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'a'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'c'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'c'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'g'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'), + extractURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'a'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'c'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'a'), + extractURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'c'), + extractURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f&g=h'), 'g'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'a'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'c'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'c'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'g'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'), + extractURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g'); SELECT - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'a'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d'), 'c'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'a'), - cutURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'c'), - cutURLParameter(materialize('http://yandex.ru/?a&c=d#e=f'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'a'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'c'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'c'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#e&g=h'), 'g'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - cutURLParameter(materialize('http://yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'a'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d'), 'c'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'a'), - cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'c'), - cutURLParameter(materialize('//yandex.ru/?a&c=d#e=f'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e=f&g=h'), 'g'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'a'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'c'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'c'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#e&g=h'), 'g'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'test'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'e'), - cutURLParameter(materialize('//yandex.ru/?a=b&c=d#test?e=f&g=h'), 'g'); + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'a'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d'), 'c'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'a'), + cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'c'), + cutURLParameter(materialize('http://bigmir.net/?a&c=d#e=f'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e=f&g=h'), 'g'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'a'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'c'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'c'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#e&g=h'), 'g'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'), + cutURLParameter(materialize('http://bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'a'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d'), 'c'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'a'), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'c'), + cutURLParameter(materialize('//bigmir.net/?a&c=d#e=f'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e=f&g=h'), 'g'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'a'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'c'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'c'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#e&g=h'), 'g'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'test'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'e'), + cutURLParameter(materialize('//bigmir.net/?a=b&c=d#test?e=f&g=h'), 'g'); diff --git a/tests/queries/0_stateless/00381_first_significant_subdomain.reference b/tests/queries/0_stateless/00381_first_significant_subdomain.reference index 1f1230a2104..086f3b0f9ce 100644 --- a/tests/queries/0_stateless/00381_first_significant_subdomain.reference +++ b/tests/queries/0_stateless/00381_first_significant_subdomain.reference @@ -1,3 +1,3 @@ canada congo net-domena -yandex yandex yandex яндекс yandex +meta bigmir yahoo гугл meta canada hello hello canada diff --git a/tests/queries/0_stateless/00381_first_significant_subdomain.sql b/tests/queries/0_stateless/00381_first_significant_subdomain.sql index 5badd14f200..5d8c53afc9c 100644 --- a/tests/queries/0_stateless/00381_first_significant_subdomain.sql +++ b/tests/queries/0_stateless/00381_first_significant_subdomain.sql @@ -4,12 +4,12 @@ SELECT firstSignificantSubdomain('http://pochemu.net-domena.ru') AS why; SELECT - firstSignificantSubdomain('ftp://www.yandex.com.tr/news.html'), - firstSignificantSubdomain('https://www.yandex.ua/news.html'), - firstSignificantSubdomain('magnet:yandex.abc'), - firstSignificantSubdomain('ftp://www.yandex.co.uk/news.html'), - firstSignificantSubdomain('https://api.www3.static.dev.ввв.яндекс.рф'), - firstSignificantSubdomain('//www.yandex.com.tr/news.html'); + firstSignificantSubdomain('ftp://www.meta.com.ua/news.html'), + firstSignificantSubdomain('https://www.bigmir.net/news.html'), + firstSignificantSubdomain('magnet:ukr.abc'), + firstSignificantSubdomain('ftp://www.yahoo.co.jp/news.html'), + firstSignificantSubdomain('https://api.www3.static.dev.ввв.гугл.ком'), + firstSignificantSubdomain('//www.meta.com.ua/news.html'); SELECT firstSignificantSubdomain('http://hello.canada.c'), diff --git a/tests/queries/0_stateless/00505_secure.sh b/tests/queries/0_stateless/00505_secure.sh index e69515253ed..b7c12911b90 100755 --- a/tests/queries/0_stateless/00505_secure.sh +++ b/tests/queries/0_stateless/00505_secure.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-random-settings # set -x diff --git a/tests/queries/0_stateless/00580_consistent_hashing_functions.sql b/tests/queries/0_stateless/00580_consistent_hashing_functions.sql index 08e785929c7..f470642d391 100644 --- a/tests/queries/0_stateless/00580_consistent_hashing_functions.sql +++ b/tests/queries/0_stateless/00580_consistent_hashing_functions.sql @@ -1,6 +1,6 @@ -- Tags: no-fasttest SELECT jumpConsistentHash(1, 1), jumpConsistentHash(42, 57), jumpConsistentHash(256, 1024), jumpConsistentHash(3735883980, 1), jumpConsistentHash(3735883980, 666), jumpConsistentHash(16045690984833335023, 255); -SELECT yandexConsistentHash(16045690984833335023, 1), yandexConsistentHash(16045690984833335023, 2), yandexConsistentHash(16045690984833335023, 3), yandexConsistentHash(16045690984833335023, 4), yandexConsistentHash(16045690984833335023, 173), yandexConsistentHash(16045690984833335023, 255); +SELECT kostikConsistentHash(16045690984833335023, 1), kostikConsistentHash(16045690984833335023, 2), kostikConsistentHash(16045690984833335023, 3), kostikConsistentHash(16045690984833335023, 4), kostikConsistentHash(16045690984833335023, 173), kostikConsistentHash(16045690984833335023, 255); SELECT jumpConsistentHash(intHash64(number), 787) FROM system.numbers LIMIT 1000000, 2; -SELECT yandexConsistentHash(16045690984833335023+number-number, 120) FROM system.numbers LIMIT 1000000, 2; +SELECT kostikConsistentHash(16045690984833335023+number-number, 120) FROM system.numbers LIMIT 1000000, 2; diff --git a/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference b/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference index a0265bdb7ed..2b1089c6840 100644 --- a/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference +++ b/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference @@ -1,23 +1,23 @@ {"total":"1","domain":"baidu.com"} {"total":"2","domain":"facebook.com"} {"total":"1","domain":"google.com"} -{"total":"2","domain":"yandex.ru"} +{"total":"2","domain":"meta.ua"} {"total":"1","domain":"baidu.com"} {"total":"2","domain":"facebook.com"} {"total":"1","domain":"google.com"} -{"total":"2","domain":"yandex.ru"} +{"total":"2","domain":"meta.ua"} 1 baidu.com 2 facebook.com 1 google.com -2 yandex.ru +2 meta.ua 1 baidu.com 2 facebook.com 1 google.com -2 yandex.ru +2 meta.ua 1 baidu.com 1 google.com 2 facebook.com -2 yandex.ru +2 meta.ua 1 1 2 @@ -25,4 +25,4 @@ baidu.com google.com facebook.com -yandex.ru +meta.ua diff --git a/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql b/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql index 49975daaa7e..07d39e8d54a 100644 --- a/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql +++ b/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.sql @@ -4,8 +4,8 @@ DROP TABLE IF EXISTS transactions; CREATE TABLE clicks (domain String) ENGINE = Memory; CREATE TABLE transactions (domain String) ENGINE = Memory; -INSERT INTO clicks VALUES ('facebook.com'), ('yandex.ru'), ('google.com'); -INSERT INTO transactions VALUES ('facebook.com'), ('yandex.ru'), ('baidu.com'); +INSERT INTO clicks VALUES ('facebook.com'), ('meta.ua'), ('google.com'); +INSERT INTO transactions VALUES ('facebook.com'), ('meta.ua'), ('baidu.com'); SELECT diff --git a/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh b/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh index f49aeb93184..00a7e3c5232 100755 --- a/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh +++ b/tests/queries/0_stateless/00653_verification_monotonic_data_load.sh @@ -25,7 +25,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE string_test_table (val String) ENGINE ${CLICKHOUSE_CLIENT} --query="CREATE TABLE fixed_string_test_table (val FixedString(1)) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE signed_integer_test_table (val Int32) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE unsigned_integer_test_table (val UInt32) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE enum_test_table (val Enum16('hello' = 1, 'world' = 2, 'yandex' = 256, 'clickhouse' = 257)) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE enum_test_table (val Enum16('hello' = 1, 'world' = 2, 'youtube' = 256, 'clickhouse' = 257)) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE date_test_table (val Date) ENGINE = MergeTree ORDER BY val SETTINGS index_granularity = 1, index_granularity_bytes = 0, min_bytes_for_wide_part = 0;" ${CLICKHOUSE_CLIENT} --query="SYSTEM STOP MERGES string_test_table;" @@ -40,7 +40,7 @@ ${CLICKHOUSE_CLIENT} --query="INSERT INTO fixed_string_test_table VALUES ('0'), # 131072 -> 17 bit is 1 ${CLICKHOUSE_CLIENT} --query="INSERT INTO signed_integer_test_table VALUES (-2), (0), (2), (2), (131072), (131073), (131073);" ${CLICKHOUSE_CLIENT} --query="INSERT INTO unsigned_integer_test_table VALUES (0), (2), (2), (131072), (131073), (131073);" -${CLICKHOUSE_CLIENT} --query="INSERT INTO enum_test_table VALUES ('hello'), ('world'), ('world'), ('yandex'), ('clickhouse'), ('clickhouse');" +${CLICKHOUSE_CLIENT} --query="INSERT INTO enum_test_table VALUES ('hello'), ('world'), ('world'), ('youtube'), ('clickhouse'), ('clickhouse');" ${CLICKHOUSE_CLIENT} --query="INSERT INTO date_test_table VALUES (1), (2), (2), (256), (257), (257);" CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=debug/g') diff --git a/tests/queries/0_stateless/00900_long_parquet_load.reference b/tests/queries/0_stateless/00900_long_parquet_load.reference index 6ecff505b2e..b295a226853 100644 --- a/tests/queries/0_stateless/00900_long_parquet_load.reference +++ b/tests/queries/0_stateless/00900_long_parquet_load.reference @@ -88,6 +88,9 @@ idx10 ['This','is','a','test'] 22 23 24 +=== Try load data from case_insensitive_column_matching.parquet +123 1 +456 2 === Try load data from datapage_v2.snappy.parquet Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Unknown encoding type.: While executing ParquetBlockInputFormat: data for INSERT was parsed from stdin: (in query: INSERT INTO parquet_load FORMAT Parquet). (CANNOT_READ_ALL_DATA) @@ -339,9 +342,6 @@ Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Unkno (NULL) === Try load data from single_nan.parquet \N -=== Try load data from test_setting_input_format_use_lowercase_column_name.parquet -123 1 -456 2 === Try load data from userdata1.parquet 1454486129 1 Amanda Jordan ajordan0@com.com Female 1.197.201.2 6759521864920116 Indonesia 3/8/1971 49756.53 Internal Auditor 1E+02 1454519043 2 Albert Freeman afreeman1@is.gd Male 218.111.175.34 Canada 1/16/1968 150280.17 Accountant IV diff --git a/tests/queries/0_stateless/00909_ngram_distance.reference b/tests/queries/0_stateless/00909_ngram_distance.reference index 290e24faac5..4323fa86151 100644 --- a/tests/queries/0_stateless/00909_ngram_distance.reference +++ b/tests/queries/0_stateless/00909_ngram_distance.reference @@ -113,112 +113,112 @@ 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 0 привет как дела?... Херсон 600 -пап привет как дела - Яндекс.Видео 684 -привет как дела клип - Яндекс.Видео 692 -привет братан как дела - Яндекс.Видео 707 +пап привет как дела - TUT.BY 625 +привет как дела клип - TUT.BY 636 +привет братан как дела - TUT.BY 657 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 привет 1000 0 http://metric.ru/ 765 http://metris.ru/ 765 http://metrika.ru/ 778 http://autometric.ru/ 810 -http://metrica.yandex.com/ 846 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 привет как дела?... Херсон 297 -пап привет как дела - Яндекс.Видео 422 -привет как дела клип - Яндекс.Видео 435 -привет братан как дела - Яндекс.Видео 500 +пап привет как дела - TUT.BY 333 +привет как дела клип - TUT.BY 350 +привет братан как дела - TUT.BY 429 привет 529 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 436 привет как дела?... Херсон 459 -пап привет как дела - Яндекс.Видео 511 +привет как дела клип - TUT.BY 500 +привет братан как дела - TUT.BY 524 привет 529 -привет как дела клип - Яндекс.Видео 565 -привет братан как дела - Яндекс.Видео 583 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 http://metrika.ru/ 524 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 +http://top.bigmir.net/ 920 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 600 -http://metrica.yandex.com/ 655 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 619 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 600 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 -http://metrica.yandex.com/ 724 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 -http://metrica.yandex.com/ 714 +http://top.bigmir.net/ 667 +http://metrika.ru/ 900 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 0 @@ -335,135 +335,135 @@ http://metris.ru/ 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 0 привет как дела?... Херсон 600 -пап привет как дела - Яндекс.Видео 684 -привет как дела клип - Яндекс.Видео 692 -привет братан как дела - Яндекс.Видео 707 +пап привет как дела - TUT.BY 625 +привет как дела клип - TUT.BY 636 +привет братан как дела - TUT.BY 657 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 привет 1000 0 http://metric.ru/ 765 http://metris.ru/ 765 http://metrika.ru/ 778 http://autometric.ru/ 810 -http://metrica.yandex.com/ 846 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 привет как дела?... Херсон 297 -пап привет как дела - Яндекс.Видео 422 -привет как дела клип - Яндекс.Видео 435 -привет братан как дела - Яндекс.Видео 500 +пап привет как дела - TUT.BY 333 +привет как дела клип - TUT.BY 350 +привет братан как дела - TUT.BY 429 привет 529 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 436 привет как дела?... Херсон 459 -пап привет как дела - Яндекс.Видео 511 +привет как дела клип - TUT.BY 500 +привет братан как дела - TUT.BY 524 привет 529 -привет как дела клип - Яндекс.Видео 565 -привет братан как дела - Яндекс.Видео 583 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 http://metrika.ru/ 524 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 +http://top.bigmir.net/ 920 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 524 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 +http://top.bigmir.net/ 920 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 600 -http://metrica.yandex.com/ 655 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 619 http://metric.ru/ 700 http://metris.ru/ 700 http://autometric.ru/ 750 -http://metrica.yandex.com/ 793 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 600 http://autometric.ru/ 667 http://metris.ru/ 700 http://metrika.ru/ 714 -http://metrica.yandex.com/ 724 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 -http://metrica.yandex.com/ 714 +http://top.bigmir.net/ 667 +http://metrika.ru/ 900 + 1000 +http://autometric.ru/ 1000 +http://metric.ru/ 1000 +http://metris.ru/ 1000 +пап привет как дела - TUT.BY 1000 +привет 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 +привет как дела?... Херсон 1000 +привет как дела клип - TUT.BY 0 +пап привет как дела - TUT.BY 208 +привет братан как дела - TUT.BY 286 +привет как дела?... Херсон 490 +привет 742 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 -привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 -привет как дела?... Херсон 1000 -привет как дела клип - Яндекс.Видео 0 -пап привет как дела - Яндекс.Видео 169 -привет братан как дела - Яндекс.Видео 235 -привет как дела?... Херсон 544 -привет 784 - 1000 -http://autometric.ru/ 1000 -http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 -http://metrika.ru/ 1000 -http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 0 0 0 @@ -576,82 +576,82 @@ http://metris.ru/ 1000 111 429 1000 +пап привет как дела - TUT.BY 242 привет как дела?... Херсон 254 -пап привет как дела - Яндекс.Видео 398 -привет как дела клип - Яндекс.Видео 412 -привет братан как дела - Яндекс.Видео 461 +привет как дела клип - TUT.BY 265 +привет братан как дела - TUT.BY 333 привет 471 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 303 привет как дела?... Херсон 343 -пап привет как дела - Яндекс.Видео 446 +привет как дела клип - TUT.BY 353 +привет братан как дела - TUT.BY 389 привет 471 -привет как дела клип - Яндекс.Видео 482 -привет братан как дела - Яндекс.Видео 506 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 http://metrika.ru/ 579 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 667 -http://metrica.yandex.com/ 704 http://autometric.ru/ 727 http://metris.ru/ 778 http://metrika.ru/ 789 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 684 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 667 http://autometric.ru/ 727 -http://metrica.yandex.com/ 778 http://metris.ru/ 778 http://metrika.ru/ 789 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 -http://metrica.yandex.com/ 769 +http://top.bigmir.net/ 727 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 0 @@ -765,91 +765,91 @@ http://metris.ru/ 1000 111 600 1000 +пап привет как дела - TUT.BY 909 привет как дела?... Херсон 910 -пап привет как дела - Яндекс.Видео 928 -привет как дела клип - Яндекс.Видео 929 -привет братан как дела - Яндекс.Видео 955 +привет как дела клип - TUT.BY 912 +привет братан как дела - TUT.BY 944 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 привет 1000 +пап привет как дела - TUT.BY 667 привет как дела?... Херсон 672 -пап привет как дела - Яндекс.Видео 735 -привет как дела клип - Яндекс.Видео 741 -привет братан как дела - Яндекс.Видео 753 +привет как дела клип - TUT.BY 676 +привет братан как дела - TUT.BY 694 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 +http://top.bigmir.net/ 1000 привет 1000 http://metrika.ru/ 579 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 579 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 667 -http://metrica.yandex.com/ 704 http://autometric.ru/ 727 http://metris.ru/ 778 http://metrika.ru/ 789 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metrika.ru/ 684 http://metric.ru/ 778 http://metris.ru/ 778 http://autometric.ru/ 818 -http://metrica.yandex.com/ 852 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 http://metric.ru/ 667 http://autometric.ru/ 727 -http://metrica.yandex.com/ 778 http://metris.ru/ 778 http://metrika.ru/ 789 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 -http://metrica.yandex.com/ 769 +http://top.bigmir.net/ 727 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 diff --git a/tests/queries/0_stateless/00909_ngram_distance.sql b/tests/queries/0_stateless/00909_ngram_distance.sql index b2f403c415a..28aff50d22e 100644 --- a/tests/queries/0_stateless/00909_ngram_distance.sql +++ b/tests/queries/0_stateless/00909_ngram_distance.sql @@ -32,7 +32,7 @@ select round(1000 * ngramDistanceUTF8('абвгдеёжз', 'ёёёёёёёё')) drop table if exists test_distance; create table test_distance (Title String) engine = Memory; -insert into test_distance values ('привет как дела?... Херсон'), ('привет как дела клип - Яндекс.Видео'), ('привет'), ('пап привет как дела - Яндекс.Видео'), ('привет братан как дела - Яндекс.Видео'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://metrica.yandex.com/'), ('http://metris.ru/'), ('http://metrika.ru/'), (''); +insert into test_distance values ('привет как дела?... Херсон'), ('привет как дела клип - TUT.BY'), ('привет'), ('пап привет как дела - TUT.BY'), ('привет братан как дела - TUT.BY'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://top.bigmir.net/'), ('http://metris.ru/'), ('http://metrika.ru/'), (''); SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, Title) as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, extract(Title, 'как дела')) as distance, Title; @@ -44,7 +44,7 @@ SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUT SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metriks') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'yandex') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceUTF8(Title, 'bigmir') as distance, Title; select round(1000 * ngramDistanceCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5; @@ -91,8 +91,8 @@ SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCa SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'mEtrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'metriKS') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'YanDEX') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - яндеКс.видео') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'BigMIR') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - TuT.by') as distance, Title; select round(1000 * ngramDistance(materialize(''), '')) from system.numbers limit 5; @@ -134,7 +134,7 @@ SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(T SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metriks') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'yandex') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistance(Title, 'bigmir') as distance, Title; select round(1000 * ngramDistanceCaseInsensitive(materialize(''), '')) from system.numbers limit 5; select round(1000 * ngramDistanceCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5; @@ -175,6 +175,6 @@ SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCa SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'mEtrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'metriKS') as distance, Title; SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'YanDEX') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_distance ORDER BY ngramDistanceCaseInsensitive(Title, 'BigMIR') as distance, Title; drop table if exists test_distance; diff --git a/tests/queries/0_stateless/00926_multimatch.reference b/tests/queries/0_stateless/00926_multimatch.reference index 4a2320de57b..7ff32ecd73b 100644 --- a/tests/queries/0_stateless/00926_multimatch.reference +++ b/tests/queries/0_stateless/00926_multimatch.reference @@ -600,16 +600,16 @@ 1 1 1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 1 1 1 diff --git a/tests/queries/0_stateless/00926_multimatch.sql b/tests/queries/0_stateless/00926_multimatch.sql index 90cc289b5a5..b9843a1b4ba 100644 --- a/tests/queries/0_stateless/00926_multimatch.sql +++ b/tests/queries/0_stateless/00926_multimatch.sql @@ -75,18 +75,18 @@ select 1 = multiMatchAny(materialize('abcdef'), ['a......', 'a.....']) from syst select 0 = multiMatchAny(materialize('aaaa'), ['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}']) from system.numbers limit 10; select 1 = multiMatchAny(materialize('abc'), ['a\0d']) from system.numbers limit 10; -select 1 = multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['google', 'yandex1']) from system.numbers limit 10; -select 2 = multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['google1', 'yandex']) from system.numbers limit 10; -select 0 != multiMatchAnyIndex(materialize('gogleuedeyandexgoogle'), ['.*goo.*', '.*yan.*']) from system.numbers limit 10; +select 1 = multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), ['google', 'unian1']) from system.numbers limit 10; +select 2 = multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), ['google1', 'unian']) from system.numbers limit 10; +select 0 != multiMatchAnyIndex(materialize('gogleuedeuniangoogle'), ['.*goo.*', '.*yan.*']) from system.numbers limit 10; select 5 = multiMatchAnyIndex(materialize('vladizlvav dabe don\'t heart me no more'), ['what', 'is', 'love', 'baby', 'no mo??', 'dont', 'h.rt me']) from system.numbers limit 10; SELECT multiMatchAny(materialize('/odezhda-dlya-bega/'), ['/odezhda-dlya-bega/', 'kurtki-i-vetrovki-dlya-bega', 'futbolki-i-mayki-dlya-bega']); SELECT 1 = multiMatchAny('фабрикант', ['f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]']); -- All indices tests -SELECT [1, 2] = arraySort(multiMatchAllIndices(materialize('gogleuedeyandexgoogle'), ['.*goo.*', '.*yan.*'])) from system.numbers limit 5; -SELECT [1, 3] = arraySort(multiMatchAllIndices(materialize('gogleuedeyandexgoogle'), ['.*goo.*', 'neverexisted', '.*yan.*'])) from system.numbers limit 5; -SELECT [] = multiMatchAllIndices(materialize('gogleuedeyandexgoogle'), ['neverexisted', 'anotherone', 'andanotherone']) from system.numbers limit 5; +SELECT [1, 2] = arraySort(multiMatchAllIndices(materialize('gogleuedeuniangoogle'), ['.*goo.*', '.*yan.*'])) from system.numbers limit 5; +SELECT [1, 3] = arraySort(multiMatchAllIndices(materialize('gogleuedeuniangoogle'), ['.*goo.*', 'neverexisted', '.*yan.*'])) from system.numbers limit 5; +SELECT [] = multiMatchAllIndices(materialize('gogleuedeuniangoogle'), ['neverexisted', 'anotherone', 'andanotherone']) from system.numbers limit 5; SELECT [1, 2, 3, 11] = arraySort(multiMatchAllIndices('фабрикант', ['', 'рикан', 'а', 'f[ae]b[ei]rl', 'ф[иаэе]б[еэи][рпл]', 'афиукд', 'a[ft],th', '^ф[аиеэ]?б?[еэи]?$', 'берлик', 'fab', 'фа[беьв]+е?[рлко]'])); SELECT [1] = multiMatchAllIndices(materialize('/odezhda-dlya-bega/'), ['/odezhda-dlya-bega/', 'kurtki-i-vetrovki-dlya-bega', 'futbolki-i-mayki-dlya-bega']); SELECT [] = multiMatchAllIndices(materialize('aaaa'), ['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}']); diff --git a/tests/queries/0_stateless/00951_ngram_search.reference b/tests/queries/0_stateless/00951_ngram_search.reference index a98f63a198a..ece03fc649e 100644 --- a/tests/queries/0_stateless/00951_ngram_search.reference +++ b/tests/queries/0_stateless/00951_ngram_search.reference @@ -113,113 +113,113 @@ 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 308 -привет братан как дела - Яндекс.Видео 923 -пап привет как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 923 +пап привет как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 308 -привет братан как дела - Яндекс.Видео 769 -привет как дела клип - Яндекс.Видео 769 +привет братан как дела - TUT.BY 769 +привет как дела клип - TUT.BY 769 привет как дела?... Херсон 769 -пап привет как дела - Яндекс.Видео 846 +пап привет как дела - TUT.BY 846 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 +http://top.bigmir.net/ 200 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 600 http://metris.ru/ 600 http://autometric.ru/ 800 http://metric.ru/ 800 -http://metrica.yandex.com/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 800 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 600 http://metris.ru/ 600 http://autometric.ru/ 800 http://metric.ru/ 800 -http://metrica.yandex.com/ 800 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 -http://metrica.yandex.com/ 1000 +http://metrika.ru/ 250 +http://top.bigmir.net/ 1000 1000 1000 1000 @@ -335,135 +335,135 @@ http://metrica.yandex.com/ 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 1000 http://autometric.ru/ 1000 http://metric.ru/ 1000 -http://metrica.yandex.com/ 1000 http://metrika.ru/ 1000 http://metris.ru/ 1000 -пап привет как дела - Яндекс.Видео 1000 +http://top.bigmir.net/ 1000 +пап привет как дела - TUT.BY 1000 привет 1000 -привет братан как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 308 -привет братан как дела - Яндекс.Видео 923 -пап привет как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 923 +пап привет как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 308 -привет братан как дела - Яндекс.Видео 769 -привет как дела клип - Яндекс.Видео 769 +привет братан как дела - TUT.BY 769 +привет как дела клип - TUT.BY 769 привет как дела?... Херсон 769 -пап привет как дела - Яндекс.Видео 846 +пап привет как дела - TUT.BY 846 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 +http://top.bigmir.net/ 200 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 +http://top.bigmir.net/ 200 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 600 http://metris.ru/ 600 http://autometric.ru/ 800 http://metric.ru/ 800 -http://metrica.yandex.com/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 600 http://metric.ru/ 600 -http://metrica.yandex.com/ 600 http://metris.ru/ 600 http://metrika.ru/ 800 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 600 http://metris.ru/ 600 http://autometric.ru/ 800 http://metric.ru/ 800 -http://metrica.yandex.com/ 800 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 -http://metrica.yandex.com/ 1000 +http://metrika.ru/ 250 +http://top.bigmir.net/ 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 -http://metrika.ru/ 0 http://metris.ru/ 0 -привет 121 -привет как дела?... Херсон 394 -привет братан как дела - Яндекс.Видео 788 -пап привет как дела - Яндекс.Видео 818 -привет как дела клип - Яндекс.Видео 1000 +http://metrika.ru/ 32 +привет 129 +http://top.bigmir.net/ 258 +привет как дела?... Херсон 419 +привет братан как дела - TUT.BY 452 +пап привет как дела - TUT.BY 484 +привет как дела клип - TUT.BY 677 1000 1000 1000 @@ -579,80 +579,80 @@ http://metris.ru/ 0 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 360 -привет братан как дела - Яндекс.Видео 960 -пап привет как дела - Яндекс.Видео 1000 -привет как дела клип - Яндекс.Видео 1000 +привет братан как дела - TUT.BY 960 +пап привет как дела - TUT.BY 1000 +привет как дела клип - TUT.BY 1000 привет как дела?... Херсон 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 360 -привет братан как дела - Яндекс.Видео 880 -привет как дела клип - Яндекс.Видео 880 +привет братан как дела - TUT.BY 880 +привет как дела клип - TUT.BY 880 привет как дела?... Херсон 880 -пап привет как дела - Яндекс.Видео 920 +пап привет как дела - TUT.BY 920 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 500 http://metris.ru/ 500 http://autometric.ru/ 750 http://metric.ru/ 750 -http://metrica.yandex.com/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 750 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 500 http://metris.ru/ 500 http://autometric.ru/ 750 http://metric.ru/ 750 -http://metrica.yandex.com/ 750 0 http://autometric.ru/ 0 http://metric.ru/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 -http://metrica.yandex.com/ 1000 +http://top.bigmir.net/ 1000 1000 1000 1000 @@ -768,88 +768,88 @@ http://metrica.yandex.com/ 1000 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 0 -привет братан как дела - Яндекс.Видео 80 -пап привет как дела - Яндекс.Видео 120 -привет как дела клип - Яндекс.Видео 120 +привет братан как дела - TUT.BY 80 +пап привет как дела - TUT.BY 120 +привет как дела клип - TUT.BY 120 привет как дела?... Херсон 120 0 http://autometric.ru/ 0 http://metric.ru/ 0 -http://metrica.yandex.com/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 +http://top.bigmir.net/ 0 привет 0 -пап привет как дела - Яндекс.Видео 440 -привет братан как дела - Яндекс.Видео 440 -привет как дела клип - Яндекс.Видео 440 +пап привет как дела - TUT.BY 440 +привет братан как дела - TUT.BY 440 +привет как дела клип - TUT.BY 440 привет как дела?... Херсон 440 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 500 http://metris.ru/ 500 http://autometric.ru/ 750 http://metric.ru/ 750 -http://metrica.yandex.com/ 1000 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://autometric.ru/ 500 http://metric.ru/ 500 -http://metrica.yandex.com/ 500 http://metris.ru/ 500 http://metrika.ru/ 750 0 -пап привет как дела - Яндекс.Видео 0 +http://top.bigmir.net/ 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 http://metrika.ru/ 500 http://metris.ru/ 500 http://autometric.ru/ 750 http://metric.ru/ 750 -http://metrica.yandex.com/ 750 0 http://autometric.ru/ 0 http://metric.ru/ 0 http://metrika.ru/ 0 http://metris.ru/ 0 -пап привет как дела - Яндекс.Видео 0 +пап привет как дела - TUT.BY 0 привет 0 -привет братан как дела - Яндекс.Видео 0 -привет как дела клип - Яндекс.Видео 0 +привет братан как дела - TUT.BY 0 +привет как дела клип - TUT.BY 0 привет как дела?... Херсон 0 -http://metrica.yandex.com/ 1000 +http://top.bigmir.net/ 1000 diff --git a/tests/queries/0_stateless/00951_ngram_search.sql b/tests/queries/0_stateless/00951_ngram_search.sql index f1a37605ebc..77525d86013 100644 --- a/tests/queries/0_stateless/00951_ngram_search.sql +++ b/tests/queries/0_stateless/00951_ngram_search.sql @@ -32,7 +32,7 @@ select round(1000 * ngramSearchUTF8('абвгдеёжз', 'ёёёёёёёё')); drop table if exists test_entry_distance; create table test_entry_distance (Title String) engine = Memory; -insert into test_entry_distance values ('привет как дела?... Херсон'), ('привет как дела клип - Яндекс.Видео'), ('привет'), ('пап привет как дела - Яндекс.Видео'), ('привет братан как дела - Яндекс.Видео'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://metrica.yandex.com/'), ('http://metris.ru/'), ('http://metrika.ru/'), (''); +insert into test_entry_distance values ('привет как дела?... Херсон'), ('привет как дела клип - TUT.BY'), ('привет'), ('пап привет как дела - TUT.BY'), ('привет братан как дела - TUT.BY'), ('http://metric.ru/'), ('http://autometric.ru/'), ('http://top.bigmir.net/'), ('http://metris.ru/'), ('http://metrika.ru/'), (''); SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, Title) as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, extract(Title, 'как дела')) as distance, Title; @@ -44,7 +44,7 @@ SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSear SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metriks') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'yandex') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchUTF8(Title, 'bigmir') as distance, Title; select round(1000 * ngramSearchCaseInsensitiveUTF8(materialize(''), '')) from system.numbers limit 5; @@ -91,8 +91,8 @@ SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSear SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'mEtrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metriKS') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'YanDEX') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - яндеКс.видео') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'BigMIR') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitiveUTF8(Title, 'приВЕТ КАк ДеЛа КлИп - bigMir.Net') as distance, Title; select round(1000 * ngramSearch(materialize(''), '')) from system.numbers limit 5; @@ -134,7 +134,7 @@ SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSear SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metriks') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'yandex') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearch(Title, 'bigmir') as distance, Title; select round(1000 * ngramSearchCaseInsensitive(materialize(''), '')) from system.numbers limit 5; select round(1000 * ngramSearchCaseInsensitive(materialize('abc'), '')) from system.numbers limit 5; @@ -175,6 +175,6 @@ SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSear SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'mEtrica') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metriKS') as distance, Title; SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'metrics') as distance, Title; -SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'YanDEX') as distance, Title; +SELECT Title, round(1000 * distance) FROM test_entry_distance ORDER BY ngramSearchCaseInsensitive(Title, 'BigMIR') as distance, Title; drop table if exists test_entry_distance; diff --git a/tests/queries/0_stateless/00979_yandex_consistent_hash_fpe.sql b/tests/queries/0_stateless/00979_yandex_consistent_hash_fpe.sql index 79fabeae7ef..3da52f2cb96 100644 --- a/tests/queries/0_stateless/00979_yandex_consistent_hash_fpe.sql +++ b/tests/queries/0_stateless/00979_yandex_consistent_hash_fpe.sql @@ -1 +1 @@ -SELECT yandexConsistentHash(-1, 40000); -- { serverError 36 } +SELECT kostikConsistentHash(-1, 40000); -- { serverError 36 } diff --git a/tests/queries/0_stateless/00998_constraints_all_tables.reference b/tests/queries/0_stateless/00998_constraints_all_tables.reference index 0ec8b004ae4..90a2380df1e 100644 --- a/tests/queries/0_stateless/00998_constraints_all_tables.reference +++ b/tests/queries/0_stateless/00998_constraints_all_tables.reference @@ -10,5 +10,5 @@ 0 0 3 -CREATE TABLE default.constrained\n(\n `URL` String,\n CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = \'yandex.ru\',\n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log -CREATE TABLE default.constrained2\n(\n `URL` String,\n CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = \'yandex.ru\',\n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log +CREATE TABLE default.constrained\n(\n `URL` String,\n CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = \'censor.net\',\n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log +CREATE TABLE default.constrained2\n(\n `URL` String,\n CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = \'censor.net\',\n CONSTRAINT is_utf8 CHECK isValidUTF8(URL)\n)\nENGINE = Log diff --git a/tests/queries/0_stateless/00998_constraints_all_tables.sql b/tests/queries/0_stateless/00998_constraints_all_tables.sql index e47b7eaf83c..bb0d6933a01 100644 --- a/tests/queries/0_stateless/00998_constraints_all_tables.sql +++ b/tests/queries/0_stateless/00998_constraints_all_tables.sql @@ -1,53 +1,53 @@ DROP TABLE IF EXISTS constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Null; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Null; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); DROP TABLE constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Memory; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Memory; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); SELECT count() FROM constrained; DROP TABLE constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = StripeLog; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = StripeLog; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); SELECT count() FROM constrained; DROP TABLE constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = TinyLog; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = TinyLog; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); SELECT count() FROM constrained; DROP TABLE constrained; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log; +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), ('https://yandex.ru/te\xFFst'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), ('https://censor.net/te\xFFst'); -- { serverError 469 } SELECT count() FROM constrained; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('ftp://yandex.ru/Hello'), (toValidUTF8('https://yandex.ru/te\xFFst')); +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('ftp://censor.net/Hello'), (toValidUTF8('https://censor.net/te\xFFst')); SELECT count() FROM constrained; DROP TABLE constrained; DROP TABLE IF EXISTS constrained2; -CREATE TABLE constrained (URL String, CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log; +CREATE TABLE constrained (URL String, CONSTRAINT is_censor CHECK domainWithoutWWW(URL) = 'censor.net', CONSTRAINT is_utf8 CHECK isValidUTF8(URL)) ENGINE = Log; CREATE TABLE constrained2 AS constrained; SHOW CREATE TABLE constrained; SHOW CREATE TABLE constrained2; -INSERT INTO constrained VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } -INSERT INTO constrained2 VALUES ('https://www.yandex.ru/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +INSERT INTO constrained VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } +INSERT INTO constrained2 VALUES ('https://www.censor.net/?q=upyachka'), ('Hello'), ('test'); -- { serverError 469 } DROP TABLE constrained; DROP TABLE constrained2; diff --git a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql index 6d1c7fd5ef6..86c84427297 100644 --- a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql +++ b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql @@ -5,7 +5,7 @@ set allow_experimental_parallel_reading_from_replicas=0; drop table if exists test_max_parallel_replicas_lr; -- If you wonder why the table is named with "_lr" suffix in this test. --- No reason. Actually it is the name of the table in Yandex.Market and they provided this test case for us. +-- No reason. Actually it is the name of the table in our customer and they provided this test case for us. CREATE TABLE test_max_parallel_replicas_lr (timestamp UInt64) ENGINE = MergeTree ORDER BY (intHash32(timestamp)) SAMPLE BY intHash32(timestamp); INSERT INTO test_max_parallel_replicas_lr select number as timestamp from system.numbers limit 100; diff --git a/tests/queries/0_stateless/01059_storage_file_compression.sh b/tests/queries/0_stateless/01059_storage_file_compression.sh index fbee5070d8d..ab56caee3fe 100755 --- a/tests/queries/0_stateless/01059_storage_file_compression.sh +++ b/tests/queries/0_stateless/01059_storage_file_compression.sh @@ -22,3 +22,4 @@ for m in gz br xz zst lz4 bz2 do ${CLICKHOUSE_CLIENT} --query "SELECT count() < 4000000, max(x) FROM file('${CLICKHOUSE_DATABASE}/${m}.tsv.${m}', RowBinary, 'x UInt8', 'none')" done + diff --git a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql index dd39277ee31..b162fdb21fd 100644 --- a/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql +++ b/tests/queries/0_stateless/01083_expressions_in_engine_arguments.sql @@ -1,5 +1,7 @@ -- Tags: no-parallel, no-fasttest +SET prefer_localhost_replica=1; + DROP TABLE IF EXISTS file; DROP TABLE IF EXISTS url; DROP TABLE IF EXISTS view; diff --git a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql index de93166d891..aa924218360 100644 --- a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql +++ b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql @@ -3,6 +3,7 @@ -- set insert_distributed_sync = 1; -- see https://github.com/ClickHouse/ClickHouse/issues/18971 SET allow_experimental_parallel_reading_from_replicas = 0; -- see https://github.com/ClickHouse/ClickHouse/issues/34525 +SET prefer_localhost_replica = 1; DROP TABLE IF EXISTS local_01099_a; DROP TABLE IF EXISTS local_01099_b; diff --git a/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql b/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql index 4aacecc9734..3822f22d353 100644 --- a/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql +++ b/tests/queries/0_stateless/01236_distributed_over_live_view_over_distributed.sql @@ -1,4 +1,4 @@ --- Tags: distributed, no-replicated-database, no-parallel, no-fasttest +-- Tags: distributed, no-replicated-database, no-parallel, no-fasttest, no-random-settings SET allow_experimental_live_view = 1; diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index c9c01455e31..1807da6948a 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan +# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan, no-random-settings # Tag no-fasttest: max_memory_usage_for_user can interfere another queries running concurrently # Regression for MemoryTracker that had been incorrectly accounted diff --git a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh index 9a4df37d206..34fa822b6ea 100755 --- a/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh +++ b/tests/queries/0_stateless/01318_long_unsuccessful_mutation_zookeeper.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel +# Tags: long, zookeeper, no-parallel, no-backward-compatibility-check CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference index 994e3f24aaf..7cf545176e3 100644 --- a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference +++ b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference @@ -2,13 +2,13 @@ other other google other -yandex +censor.net other yahoo other other other -SELECT transform(number, [2, 4, 6], _CAST([\'google\', \'yandex\', \'yahoo\'], \'Array(Enum8(\\\'google\\\' = 1, \\\'other\\\' = 2, \\\'yahoo\\\' = 3, \\\'yandex\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'google\\\' = 1, \\\'other\\\' = 2, \\\'yahoo\\\' = 3, \\\'yandex\\\' = 4)\')) +SELECT transform(number, [2, 4, 6], _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) FROM system.numbers LIMIT 10 google @@ -17,24 +17,24 @@ google google google google -yandex -yandex -yandex -yandex -SELECT if(number > 5, \'yandex\', \'google\') +censor.net +censor.net +censor.net +censor.net +SELECT if(number > 5, \'censor.net\', \'google\') FROM system.numbers LIMIT 10 other other google other -yandex +censor.net other yahoo other other other -SELECT transform(number, [2, 4, 6], [\'google\', \'yandex\', \'yahoo\'], \'other\') +SELECT transform(number, [2, 4, 6], [\'google\', \'censor.net\', \'yahoo\'], \'other\') FROM system.numbers LIMIT 10 google @@ -43,10 +43,10 @@ google google google google -yandex -yandex -yandex -yandex -SELECT if(number > 5, \'yandex\', \'google\') +censor.net +censor.net +censor.net +censor.net +SELECT if(number > 5, \'censor.net\', \'google\') FROM system.numbers LIMIT 10 diff --git a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.sql b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.sql index dcb082c650a..ee2f48a53da 100644 --- a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.sql +++ b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.sql @@ -1,13 +1,13 @@ set optimize_if_transform_strings_to_enum = 1; -SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -SELECT number > 5 ? 'yandex' : 'google' FROM system.numbers LIMIT 10; -EXPLAIN SYNTAX SELECT number > 5 ? 'yandex' : 'google' FROM system.numbers LIMIT 10; +SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; set optimize_if_transform_strings_to_enum = 0; -SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'yandex', 'yahoo'], 'other') FROM system.numbers LIMIT 10; -SELECT number > 5 ? 'yandex' : 'google' FROM system.numbers LIMIT 10; -EXPLAIN SYNTAX SELECT number > 5 ? 'yandex' : 'google' FROM system.numbers LIMIT 10; +SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; diff --git a/tests/queries/0_stateless/01358_constexpr_constraint.sql b/tests/queries/0_stateless/01358_constexpr_constraint.sql index 799f6f32259..4560ac47c42 100644 --- a/tests/queries/0_stateless/01358_constexpr_constraint.sql +++ b/tests/queries/0_stateless/01358_constexpr_constraint.sql @@ -1,7 +1,7 @@ CREATE TEMPORARY TABLE constrained ( `URL` String, - CONSTRAINT is_yandex CHECK domainWithoutWWW(URL) = domainWithoutWWW(URL), + CONSTRAINT identity CHECK domainWithoutWWW(URL) = domainWithoutWWW(URL), CONSTRAINT is_utf8 CHECK isValidUTF8(URL) ); diff --git a/tests/queries/0_stateless/01407_lambda_arrayJoin.sql b/tests/queries/0_stateless/01407_lambda_arrayJoin.sql index 4f34bb59527..363b1d92dbb 100644 --- a/tests/queries/0_stateless/01407_lambda_arrayJoin.sql +++ b/tests/queries/0_stateless/01407_lambda_arrayJoin.sql @@ -1,6 +1,5 @@ SELECT arrayFilter((a) -> ((a, arrayJoin([])) IN (Null, [Null])), []); SELECT arrayFilter((a) -> ((a, arrayJoin([[]])) IN (Null, [Null])), []); --- simplified from the https://clickhouse-test-reports.s3.yandex.net/10373/6c4748a63e7acde2cc3283d96ffec590aae1e724/fuzzer/fuzzer.log#fail1 SELECT * FROM system.one ARRAY JOIN arrayFilter((a) -> ((a, arrayJoin([])) IN (NULL)), []) AS arr_x; -- { serverError 43; } SELECT * FROM numbers(1) LEFT ARRAY JOIN arrayFilter((x_0, x_1) -> (arrayJoin([]) IN (NULL)), [], []) AS arr_x; diff --git a/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql b/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql index ee144264193..1e1d87a5ad5 100644 --- a/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql +++ b/tests/queries/0_stateless/01456_ast_optimizations_over_distributed.sql @@ -8,8 +8,8 @@ SET optimize_if_transform_strings_to_enum = 1; SELECT any(number + 1) FROM numbers(1); SELECT uniq(bitNot(number)) FROM numbers(1); SELECT sum(number + 1) FROM numbers(1); -SELECT transform(number, [1, 2], ['google', 'yandex'], 'other') FROM numbers(1); -SELECT number > 0 ? 'yandex' : 'google' FROM numbers(1); +SELECT transform(number, [1, 2], ['google', 'censor.net'], 'other') FROM numbers(1); +SELECT number > 0 ? 'censor.net' : 'google' FROM numbers(1); DROP TABLE IF EXISTS local_table; @@ -23,8 +23,8 @@ INSERT INTO local_table SELECT number FROM numbers(1); SELECT any(number + 1) FROM dist; SELECT uniq(bitNot(number)) FROM dist; SELECT sum(number + 1) FROM dist; -SELECT transform(number, [1, 2], ['google', 'yandex'], 'other') FROM dist; -SELECT number > 0 ? 'yandex' : 'google' FROM dist; +SELECT transform(number, [1, 2], ['google', 'censor.net'], 'other') FROM dist; +SELECT number > 0 ? 'censor.net' : 'google' FROM dist; DROP TABLE local_table; DROP TABLE dist; diff --git a/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql b/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql index f9c227942ac..0595e67f2b0 100644 --- a/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql +++ b/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + DROP TABLE IF EXISTS buf_dest; DROP TABLE IF EXISTS buf; diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql index ca9f296b6bf..90975b0d9c4 100644 --- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql @@ -1,6 +1,7 @@ DROP TABLE IF EXISTS select_final; SET do_not_merge_across_partitions_select_final = 1; +SET max_threads = 0; CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t); diff --git a/tests/queries/0_stateless/01598_memory_limit_zeros.sql b/tests/queries/0_stateless/01598_memory_limit_zeros.sql index 5b321687e43..cc2a75e023e 100644 --- a/tests/queries/0_stateless/01598_memory_limit_zeros.sql +++ b/tests/queries/0_stateless/01598_memory_limit_zeros.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-parallel, no-fasttest, no-random-settings SET max_memory_usage = 1, max_untracked_memory = 1000000, max_threads=40; select 'test', count(*) from zeros_mt(1000000) where not ignore(zero); -- { serverError 241 } diff --git a/tests/queries/0_stateless/01622_constraints_simple_optimization.sql b/tests/queries/0_stateless/01622_constraints_simple_optimization.sql index e1922975a2a..7ec9e1a3158 100644 --- a/tests/queries/0_stateless/01622_constraints_simple_optimization.sql +++ b/tests/queries/0_stateless/01622_constraints_simple_optimization.sql @@ -8,23 +8,23 @@ SET optimize_move_to_prewhere = 1; SET optimize_substitute_columns = 1; SET optimize_append_index = 1; -CREATE TABLE constraint_test_assumption (URL String, a Int32, CONSTRAINT c1 ASSUME domainWithoutWWW(URL) = 'yandex.ru', CONSTRAINT c2 ASSUME URL > 'zzz' AND startsWith(URL, 'test') = True) ENGINE = TinyLog; +CREATE TABLE constraint_test_assumption (URL String, a Int32, CONSTRAINT c1 ASSUME domainWithoutWWW(URL) = 'bigmir.net', CONSTRAINT c2 ASSUME URL > 'zzz' AND startsWith(URL, 'test') = True) ENGINE = TinyLog; --- Add wrong rows in order to check optimization INSERT INTO constraint_test_assumption (URL, a) VALUES ('1', 1); INSERT INTO constraint_test_assumption (URL, a) VALUES ('2', 2); -INSERT INTO constraint_test_assumption (URL, a) VALUES ('yandex.ru', 3); +INSERT INTO constraint_test_assumption (URL, a) VALUES ('bigmir.net', 3); INSERT INTO constraint_test_assumption (URL, a) VALUES ('3', 4); -SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) = 'yandex.ru'; --- assumption -> 4 -SELECT count() FROM constraint_test_assumption WHERE NOT (domainWithoutWWW(URL) = 'yandex.ru'); --- assumption -> 0 -SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) != 'yandex.ru'; --- assumption -> 0 +SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) = 'bigmir.net'; --- assumption -> 4 +SELECT count() FROM constraint_test_assumption WHERE NOT (domainWithoutWWW(URL) = 'bigmir.net'); --- assumption -> 0 +SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) != 'bigmir.net'; --- assumption -> 0 SELECT count() FROM constraint_test_assumption WHERE domainWithoutWWW(URL) = 'nothing'; --- not optimized -> 0 -SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND URL > 'zzz'); ---> assumption -> 4 -SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND NOT URL <= 'zzz'); ---> assumption -> 4 -SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND URL > 'zzz') OR (a = 10 AND a + 5 < 100); ---> assumption -> 4 -SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'yandex.ru' AND URL = '111'); ---> assumption & no assumption -> 0 +SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'bigmir.net' AND URL > 'zzz'); ---> assumption -> 4 +SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'bigmir.net' AND NOT URL <= 'zzz'); ---> assumption -> 4 +SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'bigmir.net' AND URL > 'zzz') OR (a = 10 AND a + 5 < 100); ---> assumption -> 4 +SELECT count() FROM constraint_test_assumption WHERE (domainWithoutWWW(URL) = 'bigmir.net' AND URL = '111'); ---> assumption & no assumption -> 0 SELECT count() FROM constraint_test_assumption WHERE (startsWith(URL, 'test') = True); ---> assumption -> 4 DROP TABLE constraint_test_assumption; diff --git a/tests/queries/0_stateless/01661_referer.reference b/tests/queries/0_stateless/01661_referer.reference index 49d29c80f17..d247c53413e 100644 --- a/tests/queries/0_stateless/01661_referer.reference +++ b/tests/queries/0_stateless/01661_referer.reference @@ -1,2 +1,2 @@ 1 -https://yandex.ru/ +https://meta.ua/ diff --git a/tests/queries/0_stateless/01661_referer.sh b/tests/queries/0_stateless/01661_referer.sh index 8123c925454..0299ee063ea 100755 --- a/tests/queries/0_stateless/01661_referer.sh +++ b/tests/queries/0_stateless/01661_referer.sh @@ -4,6 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT 1' --referer 'https://yandex.ru/' +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d 'SELECT 1' --referer 'https://meta.ua/' ${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" -${CLICKHOUSE_CLIENT} --query "SELECT http_referer FROM system.query_log WHERE current_database = currentDatabase() AND http_referer LIKE '%yandex%' LIMIT 1" +${CLICKHOUSE_CLIENT} --query "SELECT http_referer FROM system.query_log WHERE current_database = currentDatabase() AND http_referer LIKE '%meta%' LIMIT 1" diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.reference b/tests/queries/0_stateless/01675_data_type_coroutine.reference index 7326d960397..541dab48def 100644 --- a/tests/queries/0_stateless/01675_data_type_coroutine.reference +++ b/tests/queries/0_stateless/01675_data_type_coroutine.reference @@ -1 +1,2 @@ Ok +Ok diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.sh b/tests/queries/0_stateless/01675_data_type_coroutine.sh index 8e80d722a4c..9f7d5401bd2 100755 --- a/tests/queries/0_stateless/01675_data_type_coroutine.sh +++ b/tests/queries/0_stateless/01675_data_type_coroutine.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -6,6 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) counter=0 retries=60 + I=0 while [[ $counter -lt $retries ]]; do I=$((I + 1)) @@ -14,5 +16,16 @@ while [[ $counter -lt $retries ]]; do ((++counter)) done +echo 'Ok' + +counter=0 +I=0 +while [[ $counter -lt $retries ]]; do + I=$((I + 1)) + TYPE=$(perl -e "print 'Array(' x $I; print 'UInt8'; print ')' x $I") + ${CLICKHOUSE_CLIENT} --prefer_localhost_replica=0 --max_parser_depth 1000000 --query "SELECT * FROM remote('127.0.0.{1,2}', generateRandom('x $TYPE', 1, 1, 1)) LIMIT 1 FORMAT Null" 2>&1 | grep -q -F 'Maximum parse depth' && break; + ((++counter)) +done + #echo "I = ${I}" echo 'Ok' diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh index edc4f6916ff..0fe04fb95fd 100755 --- a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh +++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh @@ -6,6 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} -q "create table insert_big_json(a String, b String) engine=MergeTree() order by tuple()"; -python3 -c "[print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000)) for i in range(10)]; [print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000)) for i in range(10)]" 2>/dev/null | ${CLICKHOUSE_CLIENT} --input_format_parallel_parsing=1 --max_memory_usage=0 -q "insert into insert_big_json FORMAT JSONEachRow" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||: +python3 -c "[print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000)) for i in range(10)]; [print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000)) for i in range(10)]" 2>/dev/null | ${CLICKHOUSE_CLIENT} --min_chunk_bytes_for_parallel_parsing=10485760 --max_threads=0 --input_format_parallel_parsing=1 --max_memory_usage=0 -q "insert into insert_big_json FORMAT JSONEachRow" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||: ${CLICKHOUSE_CLIENT} -q "drop table insert_big_json" diff --git a/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh index 7295537a2d2..03f7893eb04 100755 --- a/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh +++ b/tests/queries/0_stateless/01722_long_brotli_http_compression_json_format.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br' "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | brotli -d | tail -n30 | head -n23 +${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: br' "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON SETTINGS max_block_size=65505" | brotli -d | tail -n30 | head -n23 diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql index 7f36bcb6c8a..62b578c21d6 100644 --- a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -1,4 +1,4 @@ --- Tags: long, distributed +-- Tags: long, distributed, no-random-settings drop table if exists data_01730; diff --git a/tests/queries/0_stateless/01733_transform_ubsan.sql b/tests/queries/0_stateless/01733_transform_ubsan.sql index 256603e9087..7c3d8ef653a 100644 --- a/tests/queries/0_stateless/01733_transform_ubsan.sql +++ b/tests/queries/0_stateless/01733_transform_ubsan.sql @@ -1,4 +1,4 @@ -SELECT arrayStringConcat(arrayMap(x -> transform(x, [1025, -9223372036854775808, 65537, 257, 1048576, 10, 7, 1048575, 65536], ['yandex', 'googlegooglegooglegoogle', 'test', '', '', 'hello', 'world', '', 'xyz'], ''), arrayMap(x -> (x % -inf), range(number))), '') +SELECT arrayStringConcat(arrayMap(x -> transform(x, [1025, -9223372036854775808, 65537, 257, 1048576, 10, 7, 1048575, 65536], ['censor.net', 'googlegooglegooglegoogle', 'test', '', '', 'hello', 'world', '', 'xyz'], ''), arrayMap(x -> (x % -inf), range(number))), '') FROM system.numbers LIMIT 1025 FORMAT Null; diff --git a/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh index feaf4bcf623..71b003d2533 100755 --- a/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh +++ b/tests/queries/0_stateless/01746_long_zlib_http_compression_json_format.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip' "${CLICKHOUSE_URL}&enable_http_compression=1&http_zlib_compression_level=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(100000) FORMAT JSON" | gzip -d | tail -n30 | head -n23 +${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: gzip' "${CLICKHOUSE_URL}&enable_http_compression=1&http_zlib_compression_level=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(100000) FORMAT JSON SETTINGS max_block_size=65505" | gzip -d | tail -n30 | head -n23 diff --git a/tests/queries/0_stateless/01889_clickhouse_client_config_format.sh b/tests/queries/0_stateless/01889_clickhouse_client_config_format.sh index aa162911399..4e970f17d3a 100755 --- a/tests/queries/0_stateless/01889_clickhouse_client_config_format.sh +++ b/tests/queries/0_stateless/01889_clickhouse_client_config_format.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01889_tokenize.reference b/tests/queries/0_stateless/01889_tokenize.reference index 4dd6f323929..2861a183c33 100644 --- a/tests/queries/0_stateless/01889_tokenize.reference +++ b/tests/queries/0_stateless/01889_tokenize.reference @@ -1,8 +1,8 @@ ['It','is','quite','a','wonderful','day','isn','t','it'] ['There','is','so','much','to','learn'] -['22','00','email','yandex','ru'] +['22','00','email','tut','by'] ['Токенизация','каких','либо','других','языков'] ['It','is','quite','a','wonderful','day,','isn\'t','it?'] ['There','is....','so','much','to','learn!'] -['22:00','email@yandex.ru'] +['22:00','email@tut.by'] ['Токенизация','каких-либо','других','языков?'] diff --git a/tests/queries/0_stateless/01889_tokenize.sql b/tests/queries/0_stateless/01889_tokenize.sql index c9d29a8632b..287e439d2ce 100644 --- a/tests/queries/0_stateless/01889_tokenize.sql +++ b/tests/queries/0_stateless/01889_tokenize.sql @@ -2,10 +2,10 @@ SET allow_experimental_nlp_functions = 1; SELECT splitByNonAlpha('It is quite a wonderful day, isn\'t it?'); SELECT splitByNonAlpha('There is.... so much to learn!'); -SELECT splitByNonAlpha('22:00 email@yandex.ru'); +SELECT splitByNonAlpha('22:00 email@tut.by'); SELECT splitByNonAlpha('Токенизация каких-либо других языков?'); SELECT splitByWhitespace('It is quite a wonderful day, isn\'t it?'); SELECT splitByWhitespace('There is.... so much to learn!'); -SELECT splitByWhitespace('22:00 email@yandex.ru'); +SELECT splitByWhitespace('22:00 email@tut.by'); SELECT splitByWhitespace('Токенизация каких-либо других языков?'); diff --git a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh index b6721d4a798..47cf6e06b48 100755 --- a/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh +++ b/tests/queries/0_stateless/01946_test_zstd_decompression_with_escape_sequence_at_the_end_of_buffer.sh @@ -12,6 +12,7 @@ mkdir -p ${user_files_path}/ cp $CUR_DIR/data_zstd/test_01946.zstd ${user_files_path}/ ${CLICKHOUSE_CLIENT} --multiline --multiquery --query " +set min_chunk_bytes_for_parallel_parsing=10485760; set max_read_buffer_size = 65536; set input_format_parallel_parsing = 0; select * from file('test_01946.zstd', 'JSONEachRow', 'foo String') limit 30 format Null; diff --git a/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh b/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh index e7871454192..7740ffcce7b 100755 --- a/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh +++ b/tests/queries/0_stateless/01955_clickhouse_benchmark_connection_hang.sh @@ -66,8 +66,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # frame #11: 0x000000000fffdfc4 clickhouse`main(argc_=, argv_=) + 356 at main.cpp:366 # frame #12: 0x00007ffff7de6d0a libc.so.6`__libc_start_main(main=(clickhouse`main at main.cpp:339), argc=7, argv=0x00007fffffffe1e8, init=, fini=, rtld_fini=, stack_end=0x00007fffffffe1d8) + 234 at libc-start.c:308 # frame #13: 0x000000000ffdc0aa clickhouse`_start + 42 -# -# [1]: https://clickhouse-test-reports.s3.yandex.net/26656/f17ca450ac991603e6400c7caef49c493ac69739/functional_stateless_tests_(ubsan).html#fail1 # Limit number of files to 50, and we will get EMFILE for some of socket() prlimit --nofile=50 $CLICKHOUSE_BENCHMARK --iterations 1 --concurrency 50 --query 'select 1' 2>&1 diff --git a/tests/queries/0_stateless/02015_async_inserts_2.sh b/tests/queries/0_stateless/02015_async_inserts_2.sh index 90f5584d84e..fd20f846897 100755 --- a/tests/queries/0_stateless/02015_async_inserts_2.sh +++ b/tests/queries/0_stateless/02015_async_inserts_2.sh @@ -1,13 +1,14 @@ #!/usr/bin/env bash +# Tags: no-random-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1" +url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=1&max_insert_threads=0&group_by_two_level_threshold=100000&group_by_two_level_threshold_bytes=50000000&distributed_aggregation_memory_efficient=1&fsync_metadata=1&priority=1&output_format_parallel_formatting=0&input_format_parallel_parsing=0&min_chunk_bytes_for_parallel_parsing=4031398&max_read_buffer_size=554729&prefer_localhost_replica=0&max_block_size=51672&max_threads=20" -${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS async_inserts" -${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "DROP TABLE IF EXISTS async_inserts" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "CREATE TABLE async_inserts (id UInt32, s String) ENGINE = MergeTree ORDER BY id" ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV 1,"a" @@ -22,7 +23,7 @@ ${CLICKHOUSE_CURL} -sS "$url" -d 'INSERT INTO async_inserts FORMAT CSV wait -${CLICKHOUSE_CLIENT} -q "SELECT * FROM async_inserts ORDER BY id" -${CLICKHOUSE_CLIENT} -q "SELECT name, rows, level FROM system.parts WHERE table = 'async_inserts' AND database = '$CLICKHOUSE_DATABASE' ORDER BY name" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "SELECT * FROM async_inserts ORDER BY id" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "SELECT name, rows, level FROM system.parts WHERE table = 'async_inserts' AND database = '$CLICKHOUSE_DATABASE' ORDER BY name" -${CLICKHOUSE_CLIENT} -q "DROP TABLE async_inserts" +${CLICKHOUSE_CLIENT} --max_insert_threads=0 --group_by_two_level_threshold=100000 --group_by_two_level_threshold_bytes=50000000 --distributed_aggregation_memory_efficient=1 --fsync_metadata=1 --priority=1 --output_format_parallel_formatting=0 --input_format_parallel_parsing=0 --min_chunk_bytes_for_parallel_parsing=4031398 --max_read_buffer_size=554729 --prefer_localhost_replica=0 --max_block_size=51672 --max_threads=20 -q "DROP TABLE async_inserts" diff --git a/tests/queries/0_stateless/02118_deserialize_whole_text.sh b/tests/queries/0_stateless/02118_deserialize_whole_text.sh index fe9256df329..e9f35582f15 100755 --- a/tests/queries/0_stateless/02118_deserialize_whole_text.sh +++ b/tests/queries/0_stateless/02118_deserialize_whole_text.sh @@ -41,16 +41,16 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv4')" 2>&1 | echo "[\"255.255.255.255trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' -echo "[\"2a02:6ba8:2da1:40cd:31db:f9f1:fc3d:80b1trash\"]" > $DATA_FILE +echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' -echo "2a02:6ba8:2da1:40cd:31db:f9f1:fc3d:80b1trash" > $DATA_FILE +echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' -echo "2a02:6ba8:2da1:40cd:31db:f9f1:fc3d:80b1trash" > $DATA_FILE +echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' -echo "[\"2a02:6ba8:2da1:40cd:31db:f9f1:fc3d:80b1trash\"]" > $DATA_FILE +echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "[\"{1:2, 2:3}trash\"]" > $DATA_FILE diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.reference b/tests/queries/0_stateless/02207_subseconds_intervals.reference new file mode 100644 index 00000000000..f7b91ff48b8 --- /dev/null +++ b/tests/queries/0_stateless/02207_subseconds_intervals.reference @@ -0,0 +1,62 @@ +test intervals +- test nanoseconds +1980-12-12 12:12:12.123456789 +1980-12-12 12:12:12.123456700 +1980-12-12 12:12:12.123456789 +1930-12-12 12:12:12.123456789 +1930-12-12 12:12:12.123456700 +2220-12-12 12:12:12.123456789 +2220-12-12 12:12:12.123456700 +- test microseconds +1980-12-12 12:12:12.123456 +1980-12-12 12:12:12.123400 +1980-12-12 12:12:12.123456 +1980-12-12 12:12:12.123456 +1930-12-12 12:12:12.123456 +1930-12-12 12:12:12.123400 +1930-12-12 12:12:12.123457 +2220-12-12 12:12:12.123456 +2220-12-12 12:12:12.123400 +2220-12-12 12:12:12.123456 +- test milliseconds +1980-12-12 12:12:12.123 +1980-12-12 12:12:12.120 +1980-12-12 12:12:12.123 +1980-12-12 12:12:12.123 +1930-12-12 12:12:12.123 +1930-12-12 12:12:12.120 +1930-12-12 12:12:12.124 +2220-12-12 12:12:12.123 +2220-12-12 12:12:12.120 +2220-12-12 12:12:12.123 +test add[...]seconds() +- test nanoseconds +1980-12-12 12:12:12.123456790 +1980-12-12 12:12:12.123456701 +1980-12-12 12:12:12.123456790 +1930-12-12 12:12:12.123456788 +1930-12-12 12:12:12.123456699 +2220-12-12 12:12:12.123456790 +2220-12-12 12:12:12.123456701 +- test microseconds +1980-12-12 12:12:12.123457 +1980-12-12 12:12:12.123401 +1980-12-12 12:12:12.12345778 +1980-12-12 12:12:12.123457 +1930-12-12 12:12:12.123455 +1930-12-12 12:12:12.123399 +1930-12-12 12:12:12.12345578 +2220-12-12 12:12:12.123457 +2220-12-12 12:12:12.123401 +2220-12-12 12:12:12.12345778 +- test milliseconds +1980-12-12 12:12:12.124 +1980-12-12 12:12:12.121 +1980-12-12 12:12:12.124456 +1980-12-12 12:12:12.124 +1930-12-12 12:12:12.122 +1930-12-12 12:12:12.119 +1930-12-12 12:12:12.122456 +2220-12-12 12:12:12.124 +2220-12-12 12:12:12.121 +2220-12-12 12:12:12.124456 diff --git a/tests/queries/0_stateless/02207_subseconds_intervals.sql b/tests/queries/0_stateless/02207_subseconds_intervals.sql new file mode 100644 index 00000000000..a7ce03d9330 --- /dev/null +++ b/tests/queries/0_stateless/02207_subseconds_intervals.sql @@ -0,0 +1,94 @@ +SELECT 'test intervals'; + +SELECT '- test nanoseconds'; +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456789', 9), INTERVAL 1 NANOSECOND); -- In normal range, source scale matches result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.1234567', 7), INTERVAL 1 NANOSECOND); -- In normal range, source scale less than result + +select toStartOfInterval(a, INTERVAL 1 NANOSECOND) from ( select toDateTime64('1980-12-12 12:12:12.123456789', 9) AS a ); -- Non-constant argument + +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456789', 9), INTERVAL 1 NANOSECOND); -- Below normal range, source scale matches result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.1234567', 7), INTERVAL 1 NANOSECOND); -- Below normal range, source scale less than result + +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456789', 9), INTERVAL 1 NANOSECOND); -- Above normal range, source scale matches result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.1234567', 7), INTERVAL 1 NANOSECOND); -- Above normal range, source scale less than result + + +SELECT '- test microseconds'; +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456', 6), INTERVAL 1 MICROSECOND); -- In normal range, source scale matches result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.1234', 4), INTERVAL 1 MICROSECOND); -- In normal range, source scale less than result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.12345678', 8), INTERVAL 1 MICROSECOND); -- In normal range, source scale greater than result + +select toStartOfInterval(a, INTERVAL 1 MICROSECOND) from ( select toDateTime64('1980-12-12 12:12:12.12345678', 8) AS a ); -- Non-constant argument + +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456', 6), INTERVAL 1 MICROSECOND); -- Below normal range, source scale matches result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.1234', 4), INTERVAL 1 MICROSECOND); -- Below normal range, source scale less than result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.12345678', 8), INTERVAL 1 MICROSECOND); -- Below normal range, source scale greater than result + + +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456', 6), INTERVAL 1 MICROSECOND); -- Above normal range, source scale matches result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.1234', 4), INTERVAL 1 MICROSECOND); -- Above normal range, source scale less than result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.12345678', 8), INTERVAL 1 MICROSECOND); -- Above normal range, source scale greater than result + + +SELECT '- test milliseconds'; +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123', 3), INTERVAL 1 MILLISECOND); -- In normal range, source scale matches result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.12', 2), INTERVAL 1 MILLISECOND); -- In normal range, source scale less than result +select toStartOfInterval(toDateTime64('1980-12-12 12:12:12.123456', 6), INTERVAL 1 MILLISECOND); -- In normal range, source scale greater than result + +select toStartOfInterval(a, INTERVAL 1 MILLISECOND) from ( select toDateTime64('1980-12-12 12:12:12.123456', 6) AS a ); -- Non-constant argument + +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123', 3), INTERVAL 1 MILLISECOND); -- Below normal range, source scale matches result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.12', 2), INTERVAL 1 MILLISECOND); -- Below normal range, source scale less than result +select toStartOfInterval(toDateTime64('1930-12-12 12:12:12.123456', 6), INTERVAL 1 MILLISECOND); -- Below normal range, source scale greater than result + +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123', 3), INTERVAL 1 MILLISECOND); -- Above normal range, source scale matches result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.12', 2), INTERVAL 1 MILLISECOND); -- Above normal range, source scale less than result +select toStartOfInterval(toDateTime64('2220-12-12 12:12:12.123456', 6), INTERVAL 1 MILLISECOND); -- Above normal range, source scale greater than result + + +SELECT 'test add[...]seconds()'; + + +SELECT '- test nanoseconds'; +select addNanoseconds(toDateTime64('1980-12-12 12:12:12.123456789', 9), 1); -- In normal range, source scale matches result +select addNanoseconds(toDateTime64('1980-12-12 12:12:12.1234567', 7), 1); -- In normal range, source scale less than result + +select addNanoseconds(a, 1) from ( select toDateTime64('1980-12-12 12:12:12.123456789', 9) AS a ); -- Non-constant argument + +select addNanoseconds(toDateTime64('1930-12-12 12:12:12.123456789', 9), 1); -- Below normal range, source scale matches result +select addNanoseconds(toDateTime64('1930-12-12 12:12:12.1234567', 7), 1); -- Below normal range, source scale less than result + +select addNanoseconds(toDateTime64('2220-12-12 12:12:12.123456789', 9), 1); -- Above normal range, source scale matches result +select addNanoseconds(toDateTime64('2220-12-12 12:12:12.1234567', 7), 1); -- Above normal range, source scale less than result + + +SELECT '- test microseconds'; +select addMicroseconds(toDateTime64('1980-12-12 12:12:12.123456', 6), 1); -- In normal range, source scale matches result +select addMicroseconds(toDateTime64('1980-12-12 12:12:12.1234', 4), 1); -- In normal range, source scale less than result +select addMicroseconds(toDateTime64('1980-12-12 12:12:12.12345678', 8), 1); -- In normal range, source scale greater than result + +select addMicroseconds(a, 1) from ( select toDateTime64('1980-12-12 12:12:12.123456', 6) AS a ); -- Non-constant argument + +select addMicroseconds(toDateTime64('1930-12-12 12:12:12.123456', 6), 1); -- Below normal range, source scale matches result +select addMicroseconds(toDateTime64('1930-12-12 12:12:12.1234', 4), 1); -- Below normal range, source scale less than result +select addMicroseconds(toDateTime64('1930-12-12 12:12:12.12345678', 8), 1); -- Below normal range, source scale greater than result + +select addMicroseconds(toDateTime64('2220-12-12 12:12:12.123456', 6), 1); -- Above normal range, source scale matches result +select addMicroseconds(toDateTime64('2220-12-12 12:12:12.1234', 4), 1); -- Above normal range, source scale less than result +select addMicroseconds(toDateTime64('2220-12-12 12:12:12.12345678', 8), 1); -- Above normal range, source scale greater than result + + +SELECT '- test milliseconds'; +select addMilliseconds(toDateTime64('1980-12-12 12:12:12.123', 3), 1); -- In normal range, source scale matches result +select addMilliseconds(toDateTime64('1980-12-12 12:12:12.12', 2), 1); -- In normal range, source scale less than result +select addMilliseconds(toDateTime64('1980-12-12 12:12:12.123456', 6), 1); -- In normal range, source scale greater than result + +select addMilliseconds(a, 1) from ( select toDateTime64('1980-12-12 12:12:12.123', 3) AS a ); -- Non-constant argument + +select addMilliseconds(toDateTime64('1930-12-12 12:12:12.123', 3), 1); -- Below normal range, source scale matches result +select addMilliseconds(toDateTime64('1930-12-12 12:12:12.12', 2), 1); -- Below normal range, source scale less than result +select addMilliseconds(toDateTime64('1930-12-12 12:12:12.123456', 6), 1); -- Below normal range, source scale greater than result + +select addMilliseconds(toDateTime64('2220-12-12 12:12:12.123', 3), 1); -- Above normal range, source scale matches result +select addMilliseconds(toDateTime64('2220-12-12 12:12:12.12', 2), 1); -- Above normal range, source scale less than result +select addMilliseconds(toDateTime64('2220-12-12 12:12:12.123456', 6), 1); -- Above normal range, source scale greater than result diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh index 842c32cf243..261c389c9f2 100755 --- a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh @@ -29,5 +29,5 @@ $CLICKHOUSE_CLIENT -q "create table test_dist engine=Distributed('test_shard_loc $CLICKHOUSE_CLIENT -q "detach table test_dist" $CLICKHOUSE_CLIENT -q "drop table test" $CLICKHOUSE_CLIENT -q "attach table test_dist" -$CLICKHOUSE_CLIENT -q "select * from test_dist" 2>&1 | grep -q "UNKNOWN_TABLE" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT --prefer_localhost_replica=1 -q "select * from test_dist" 2>&1 | grep -q "UNKNOWN_TABLE" && echo "OK" || echo "FAIL" diff --git a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh b/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh deleted file mode 100755 index b946addd01c..00000000000 --- a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash -# Tags: no-ubsan, no-fasttest - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -echo "Parquet" -DATA_FILE=$CUR_DIR/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet -${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (id String, score Int32) ENGINE = Memory" -cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet SETTINGS input_format_use_lowercase_column_name=true" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" -${CLICKHOUSE_CLIENT} --query="drop table parquet_load" - -echo "ORC" -DATA_FILE=$CUR_DIR/data_orc/test_setting_input_format_use_lowercase_column_name.orc -${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_load" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE orc_load (id String, score Int32) ENGINE = Memory" -cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO orc_load FORMAT ORC SETTINGS input_format_use_lowercase_column_name=true" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM orc_load" -${CLICKHOUSE_CLIENT} --query="drop table orc_load" diff --git a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference new file mode 100644 index 00000000000..a8abc33648e --- /dev/null +++ b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.reference @@ -0,0 +1,8 @@ +b Nullable(String) +c Nullable(String) +a Nullable(String) +s1 \N 1 +} [2] 2 +\N \N \N +\N \N \N +\N [3] \N diff --git a/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh new file mode 100755 index 00000000000..8655ffd1e1f --- /dev/null +++ b/tests/queries/0_stateless/02240_tskv_schema_inference_bug.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +FILE_NAME=test_02240.data +DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME + +touch $DATA_FILE + +echo -e 'a=1\tb=s1\tc=\N +c=[2]\ta=2\tb=\N} + +a=\N +c=[3]\ta=\N' > $DATA_FILE +$CLICKHOUSE_CLIENT --max_read_buffer_size=4 -q "desc file('$FILE_NAME', 'TSKV')" +$CLICKHOUSE_CLIENT --max_read_buffer_size=4 -q "select * from file('$FILE_NAME', 'TSKV')" + diff --git a/tests/queries/0_stateless/02241_parquet_bad_column.reference b/tests/queries/0_stateless/02241_parquet_bad_column.reference index f599e28b8ab..b2f7f08c170 100644 --- a/tests/queries/0_stateless/02241_parquet_bad_column.reference +++ b/tests/queries/0_stateless/02241_parquet_bad_column.reference @@ -1 +1,2 @@ 10 +10 diff --git a/tests/queries/0_stateless/02241_parquet_bad_column.sh b/tests/queries/0_stateless/02241_parquet_bad_column.sh index a160671a088..9efd11cbbe1 100755 --- a/tests/queries/0_stateless/02241_parquet_bad_column.sh +++ b/tests/queries/0_stateless/02241_parquet_bad_column.sh @@ -5,23 +5,25 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "drop table if exists test_02241" -$CLICKHOUSE_CLIENT -q "create table test_02241 (image_path Nullable(String), - caption Nullable(String), - NSFW Nullable(String), - similarity Nullable(Float64), - LICENSE Nullable(String), - url Nullable(String), - key Nullable(UInt64), - shard_id Nullable(UInt64), - status Nullable(String), - width Nullable(UInt32), - height Nullable(UInt32), - exif Nullable(String), - original_width Nullable(UInt32), - original_height Nullable(UInt32)) engine=Memory" +for case_insensitive in "true" "false"; do + $CLICKHOUSE_CLIENT -q "drop table if exists test_02241" + $CLICKHOUSE_CLIENT -q "create table test_02241 (image_path Nullable(String), + caption Nullable(String), + NSFW Nullable(String), + similarity Nullable(Float64), + LICENSE Nullable(String), + url Nullable(String), + key Nullable(UInt64), + shard_id Nullable(UInt64), + status Nullable(String), + width Nullable(UInt32), + height Nullable(UInt32), + exif Nullable(String), + original_width Nullable(UInt32), + original_height Nullable(UInt32)) engine=Memory" -cat $CUR_DIR/data_parquet_bad_column/metadata_0.parquet | $CLICKHOUSE_CLIENT -q "insert into test_02241 format Parquet" + cat $CUR_DIR/data_parquet_bad_column/metadata_0.parquet | $CLICKHOUSE_CLIENT -q "insert into test_02241 format Parquet SETTINGS input_format_parquet_case_insensitive_column_matching=$case_insensitive" -$CLICKHOUSE_CLIENT -q "select count() from test_02241" -$CLICKHOUSE_CLIENT -q "drop table test_02241" + $CLICKHOUSE_CLIENT -q "select count() from test_02241" + $CLICKHOUSE_CLIENT -q "drop table test_02241" +done diff --git a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference b/tests/queries/0_stateless/02242_case_insensitive_column_matching.reference similarity index 66% rename from tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference rename to tests/queries/0_stateless/02242_case_insensitive_column_matching.reference index 5c383cb3035..9732211a286 100644 --- a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference +++ b/tests/queries/0_stateless/02242_case_insensitive_column_matching.reference @@ -4,3 +4,6 @@ Parquet ORC 123 1 456 2 +Arrow +123 1 +456 2 diff --git a/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh b/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh new file mode 100755 index 00000000000..8ebf2952ab3 --- /dev/null +++ b/tests/queries/0_stateless/02242_case_insensitive_column_matching.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Tags: no-ubsan, no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "Parquet" +DATA_FILE=$CUR_DIR/data_parquet/case_insensitive_column_matching.parquet +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (iD String, scOre Int32) ENGINE = Memory" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet SETTINGS input_format_parquet_case_insensitive_column_matching=true" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" +${CLICKHOUSE_CLIENT} --query="drop table parquet_load" + +echo "ORC" +DATA_FILE=$CUR_DIR/data_orc/case_insensitive_column_matching.orc +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_load" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE orc_load (iD String, sCorE Int32) ENGINE = Memory" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO orc_load FORMAT ORC SETTINGS input_format_orc_case_insensitive_column_matching=true" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM orc_load" +${CLICKHOUSE_CLIENT} --query="drop table orc_load" + +echo "Arrow" +DATA_FILE=$CUR_DIR/data_arrow/case_insensitive_column_matching.arrow +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_load" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_load (iD String, sCorE Int32) ENGINE = Memory" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO arrow_load FORMAT Arrow SETTINGS input_format_arrow_case_insensitive_column_matching=true" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_load" +${CLICKHOUSE_CLIENT} --query="drop table arrow_load" diff --git a/tests/queries/0_stateless/02242_case_insensitive_nested.reference b/tests/queries/0_stateless/02242_case_insensitive_nested.reference new file mode 100644 index 00000000000..58d66d3230a --- /dev/null +++ b/tests/queries/0_stateless/02242_case_insensitive_nested.reference @@ -0,0 +1,12 @@ +Arrow +[1,2,3] ['123','456','789'] [9.8,10.12,11.14] +[4,5,6] ['101112','131415','161718'] [123.8,10.2,11.414] +[7,8,9] ['101','415','118'] [13.08,1.12,0.414] +Parquet +[1,2,3] ['123','456','789'] [9.8,10.12,11.14] +[4,5,6] ['101112','131415','161718'] [123.8,10.2,11.414] +[7,8,9] ['101','415','118'] [13.08,1.12,0.414] +ORC +[1,2,3] ['123','456','789'] [9.8,10.12,11.14] +[4,5,6] ['101112','131415','161718'] [123.8,10.2,11.414] +[7,8,9] ['101','415','118'] [13.08,1.12,0.414] diff --git a/tests/queries/0_stateless/02242_case_insensitive_nested.sh b/tests/queries/0_stateless/02242_case_insensitive_nested.sh new file mode 100755 index 00000000000..c22f5695dc3 --- /dev/null +++ b/tests/queries/0_stateless/02242_case_insensitive_nested.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS nested_table" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS nested_nested_table" + +${CLICKHOUSE_CLIENT} --query="CREATE TABLE nested_table (table Nested(eLeM1 Int32, elEm2 String, ELEM3 Float32)) engine=Memory" + +formats=('Arrow' 'Parquet' 'ORC') +format_files=('arrow' 'parquet' 'orc') + +for ((i = 0; i < 3; i++)) do + echo ${formats[i]} + + ${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE nested_table" + cat $CUR_DIR/data_orc_arrow_parquet_nested/nested_table.${format_files[i]} | ${CLICKHOUSE_CLIENT} -q "INSERT INTO nested_table FORMAT ${formats[i]} SETTINGS input_format_${format_files[i]}_import_nested = 1, input_format_${format_files[i]}_case_insensitive_column_matching = true" + + ${CLICKHOUSE_CLIENT} --query="SELECT * FROM nested_table" + +done + +${CLICKHOUSE_CLIENT} --query="DROP TABLE nested_table" diff --git a/tests/queries/0_stateless/02243_in_ip_address.reference b/tests/queries/0_stateless/02243_in_ip_address.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/02243_in_ip_address.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/02243_in_ip_address.sql b/tests/queries/0_stateless/02243_in_ip_address.sql new file mode 100644 index 00000000000..a2c8c37e585 --- /dev/null +++ b/tests/queries/0_stateless/02243_in_ip_address.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table (id UInt64, value_ipv4 IPv4, value_ipv6 IPv6) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (0, '127.0.0.1', '127.0.0.1'); + +SELECT id FROM test_table WHERE value_ipv4 IN (SELECT value_ipv4 FROM test_table); +SELECT id FROM test_table WHERE value_ipv6 IN (SELECT value_ipv6 FROM test_table); + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02243_ipv6_long_parsing.reference b/tests/queries/0_stateless/02243_ipv6_long_parsing.reference new file mode 100644 index 00000000000..c09bfebe9d5 --- /dev/null +++ b/tests/queries/0_stateless/02243_ipv6_long_parsing.reference @@ -0,0 +1,3 @@ +0 ::ffff:1.12.12.12 +1 ::ffff:123.123.123.123 +2 ::ffff:192.168.100.228 diff --git a/tests/queries/0_stateless/02243_ipv6_long_parsing.sql b/tests/queries/0_stateless/02243_ipv6_long_parsing.sql new file mode 100644 index 00000000000..25225ee0fa8 --- /dev/null +++ b/tests/queries/0_stateless/02243_ipv6_long_parsing.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table (id UInt64, value IPv6) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (0, '0000:0000:0000:0000:0000:ffff:1.12.12.12'); +INSERT INTO test_table VALUES (1, '0000:0000:0000:0000:0000:ffff:123.123.123.123'); +INSERT INTO test_table VALUES (2, '0000:0000:0000:0000:0000:ffff:192.168.100.228'); + +SELECT * FROM test_table ORDER BY id; + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/data_arrow/case_insensitive_column_matching.arrow b/tests/queries/0_stateless/data_arrow/case_insensitive_column_matching.arrow new file mode 100644 index 00000000000..4350d5c3e49 Binary files /dev/null and b/tests/queries/0_stateless/data_arrow/case_insensitive_column_matching.arrow differ diff --git a/tests/queries/0_stateless/data_orc/case_insensitive_column_matching.orc b/tests/queries/0_stateless/data_orc/case_insensitive_column_matching.orc new file mode 100644 index 00000000000..136f9980064 Binary files /dev/null and b/tests/queries/0_stateless/data_orc/case_insensitive_column_matching.orc differ diff --git a/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet b/tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet similarity index 100% rename from tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet rename to tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet diff --git a/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet.columns b/tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet.columns similarity index 100% rename from tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet.columns rename to tests/queries/0_stateless/data_parquet/case_insensitive_column_matching.parquet.columns diff --git a/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql b/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql index 63a833af114..c7a34c493c9 100644 --- a/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql +++ b/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql @@ -1,4 +1,4 @@ --- Tags: replica, distributed +-- Tags: replica, distributed, no-random-settings SET max_parallel_replicas = 2; SELECT EventTime::DateTime('Asia/Dubai') FROM remote('127.0.0.{1|2}', test, hits) ORDER BY EventTime DESC LIMIT 10 diff --git a/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh b/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh index a1136a47319..d14a174d3a0 100755 --- a/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh +++ b/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh @@ -13,9 +13,9 @@ do $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()" echo "$format, false"; - $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ + $CLICKHOUSE_CLIENT --max_block_size=65505 --output_format_parallel_formatting=false -q \ "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \ - $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names FORMAT $format" + $CLICKHOUSE_CLIENT --max_block_size=65505 --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names FORMAT $format" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" @@ -23,9 +23,9 @@ do $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()" echo "$format, true"; - $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ + $CLICKHOUSE_CLIENT --max_block_size=65505 --output_format_parallel_formatting=false -q \ "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \ - $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names FORMAT $format" + $CLICKHOUSE_CLIENT --max_block_size=65505 --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names FORMAT $format" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" diff --git a/website/js/base.js b/website/js/base.js index 6704231c69d..9389028f1ef 100644 --- a/website/js/base.js +++ b/website/js/base.js @@ -70,15 +70,6 @@ (function (d, w, c) { (w[c] = w[c] || []).push(function() { var is_single_page = $('html').attr('data-single-page') === 'true'; - try { - w.yaCounter18343495 = new Ya.Metrika2({ - id: 18343495, - clickmap: !is_single_page, - trackLinks: !is_single_page, - accurateTrackBounce: !is_single_page, - webvisor: !is_single_page - }); - } catch(e) { } if (!is_single_page) { $('head').each(function(_, element) { @@ -91,21 +82,7 @@ }); } }); - - var n = d.getElementsByTagName("script")[0], - s = d.createElement("script"), - f = function () { n.parentNode.insertBefore(s, n); }; - s.type = "text/javascript"; - s.async = true; - s.src = "/js/metrika.js"; - if (window.location.hostname.endsWith('clickhouse.com')) { - if (w.opera == "[object Opera]") { - d.addEventListener("DOMContentLoaded", f, false); - } else { - f(); - } - } - })(document, window, "yandex_metrika_callbacks2"); + })(document, window, ""); var beforePrint = function() { var details = document.getElementsByTagName("details");