diff --git a/.gitmodules b/.gitmodules index a2211379314..38041e7f414 100644 --- a/.gitmodules +++ b/.gitmodules @@ -265,6 +265,9 @@ [submodule "contrib/wyhash"] path = contrib/wyhash url = https://github.com/wangyi-fudan/wyhash.git +[submodule "contrib/eigen"] + path = contrib/eigen + url = https://github.com/eigen-mirror/eigen [submodule "contrib/nats-io"] path = contrib/nats-io url = https://github.com/tchepavel/nats.c.git diff --git a/CMakeLists.txt b/CMakeLists.txt index dcd313dcb3c..b043caae62a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,32 +1,6 @@ cmake_minimum_required(VERSION 3.14) -foreach(policy - CMP0023 - CMP0048 # CMake 3.0 - CMP0074 # CMake 3.12 - CMP0077 - CMP0079 - ) - if(POLICY ${policy}) - cmake_policy(SET ${policy} NEW) - endif() -endforeach() - -# set default policy -foreach(default_policy_var_name - # make option() honor normal variables for BUILD_SHARED_LIBS: - # - re2 - # - snappy - CMAKE_POLICY_DEFAULT_CMP0077 - # Google Test from sources uses too old cmake, 2.6.x, and CMP0022 should - # set, to avoid using deprecated LINK_INTERFACE_LIBRARIES(_)? over - # INTERFACE_LINK_LIBRARIES. - CMAKE_POLICY_DEFAULT_CMP0022 - ) - set(${default_policy_var_name} NEW) -endforeach() - -project(ClickHouse) +project(ClickHouse LANGUAGES C CXX ASM) # If turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue. option(FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATION @@ -39,8 +13,6 @@ else() set(RECONFIGURE_MESSAGE_LEVEL WARNING) endif() -enable_language(C CXX ASM) - include (cmake/arch.cmake) include (cmake/target.cmake) include (cmake/tools.cmake) @@ -52,7 +24,6 @@ include (cmake/git_status.cmake) macro (export) endmacro () -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/") set(CMAKE_EXPORT_COMPILE_COMMANDS 1) # Write compile_commands.json set(CMAKE_LINK_DEPENDS_NO_SHARED 1) # Do not relink all depended targets on .so set(CMAKE_CONFIGURATION_TYPES "RelWithDebInfo;Debug;Release;MinSizeRel" CACHE STRING "" FORCE) @@ -161,20 +132,22 @@ add_library(global-libs INTERFACE) include (cmake/fuzzer.cmake) include (cmake/sanitize.cmake) -if (CMAKE_GENERATOR STREQUAL "Ninja" AND NOT DISABLE_COLORED_BUILD) +option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON) + +set (CMAKE_COLOR_MAKEFILE ${ENABLE_COLORED_BUILD}) # works only for the makefile generator + +if (ENABLE_COLORED_BUILD AND CMAKE_GENERATOR STREQUAL "Ninja") # Turn on colored output. https://github.com/ninja-build/ninja/wiki/FAQ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-color=always") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-color=always") + # ... such manually setting of flags can be removed once CMake supports a variable to + # activate colors in *all* build systems: https://gitlab.kitware.com/cmake/cmake/-/issues/15502 endif () include (cmake/check_flags.cmake) include (cmake/add_warning.cmake) -set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wall") # -Werror and many more is also added inside cmake/warnings.cmake - if (COMPILER_CLANG) - # clang: warning: argument unused during compilation: '-specs=/usr/share/dpkg/no-pie-compile.specs' [-Wunused-command-line-argument] - set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wno-unused-command-line-argument") # generate ranges for fast "addr2line" search if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE") set(COMPILER_FLAGS "${COMPILER_FLAGS} -gdwarf-aranges") @@ -371,12 +344,11 @@ set (COMPILER_FLAGS "${COMPILER_FLAGS}") # Our built-in unwinder only supports DWARF version up to 4. set (DEBUG_INFO_FLAGS "-g -gdwarf-4") -set (CMAKE_BUILD_COLOR_MAKEFILE ON) -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${PLATFORM_EXTRA_CXX_FLAG} ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS}") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS}") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_CXX_FLAGS_ADD}") -set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${COMMON_WARNING_FLAGS} ${CMAKE_C_FLAGS_ADD}") +set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} -fno-inline ${CMAKE_C_FLAGS_ADD}") @@ -423,13 +395,6 @@ endif () # Turns on all external libs like s3, kafka, ODBC, ... option(ENABLE_LIBRARIES "Enable all external libraries by default" ON) -if (NOT (OS_LINUX OR OS_DARWIN)) - # Using system libs can cause a lot of warnings in includes (on macro expansion). - option(WERROR "Enable -Werror compiler option" OFF) -else () - option(WERROR "Enable -Werror compiler option" ON) -endif () - # Increase stack size on Musl. We need big stack for our recursive-descend parser. if (USE_MUSL) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,stack-size=2097152") @@ -447,6 +412,13 @@ elseif (OS_FREEBSD) endif () link_libraries(global-group) +if (NOT (OS_LINUX OR OS_DARWIN)) + # Using system libs can cause a lot of warnings in includes (on macro expansion). + option(WERROR "Enable -Werror compiler option" OFF) +else () + option(WERROR "Enable -Werror compiler option" ON) +endif () + if (WERROR) # Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks. # Instead, adopt modern cmake usage requirement. @@ -455,7 +427,7 @@ endif () # Make this extra-checks for correct library dependencies. if (OS_LINUX AND NOT SANITIZE) - target_link_options(global-group INTERFACE "-Wl,--no-undefined") + target_link_options(global-group INTERFACE "LINKER:--no-undefined") endif () ###################################### @@ -466,7 +438,7 @@ set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX") if (USE_STATIC_LIBRARIES) set (CMAKE_POSITION_INDEPENDENT_CODE OFF) - if (OS_LINUX AND NOT ARCH_ARM) + if (OS_LINUX AND NOT ARCH_AARCH64) # Slightly more efficient code can be generated # It's disabled for ARM because otherwise ClickHouse cannot run on Android. set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie") diff --git a/base/base/unit.h b/base/base/unit.h index c6333e41a89..682b43512fc 100644 --- a/base/base/unit.h +++ b/base/base/unit.h @@ -1,14 +1,19 @@ #pragma once #include -#ifdef HAS_RESERVED_IDENTIFIER -#pragma clang diagnostic ignored "-Wreserved-identifier" -#endif - constexpr size_t KiB = 1024; constexpr size_t MiB = 1024 * KiB; constexpr size_t GiB = 1024 * MiB; +#ifdef HAS_RESERVED_IDENTIFIER +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wreserved-identifier" +#endif + constexpr size_t operator"" _KiB(unsigned long long val) { return val * KiB; } constexpr size_t operator"" _MiB(unsigned long long val) { return val * MiB; } constexpr size_t operator"" _GiB(unsigned long long val) { return val * GiB; } + +#ifdef HAS_RESERVED_IDENTIFIER +# pragma clang diagnostic pop +#endif diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 0b8880db81e..8cc311dc48c 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -5,7 +5,6 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") set (ARCH_AMD64 1) elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)") set (ARCH_AARCH64 1) - set (ARCH_ARM 1) elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le.*|PPC64LE.*)") set (ARCH_PPC64LE 1) elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64") diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index 9acc0423f67..c3fbad0b8df 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -1,4 +1,4 @@ -if (CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" OR CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache") +if (CMAKE_CXX_COMPILER_LAUNCHER MATCHES "ccache" OR CMAKE_C_COMPILER_LAUNCHER MATCHES "ccache") set(COMPILER_MATCHES_CCACHE 1) else() set(COMPILER_MATCHES_CCACHE 0) diff --git a/cmake/check_flags.cmake b/cmake/check_flags.cmake index 5d680a8ccbb..518f9ecf8de 100644 --- a/cmake/check_flags.cmake +++ b/cmake/check_flags.cmake @@ -3,6 +3,5 @@ include (CheckCCompilerFlag) check_cxx_compiler_flag("-Wreserved-identifier" HAS_RESERVED_IDENTIFIER) check_cxx_compiler_flag("-Wsuggest-destructor-override" HAS_SUGGEST_DESTRUCTOR_OVERRIDE) -check_cxx_compiler_flag("-Wshadow" HAS_SHADOW) check_cxx_compiler_flag("-Wsuggest-override" HAS_SUGGEST_OVERRIDE) check_cxx_compiler_flag("-Xclang -fuse-ctor-homing" HAS_USE_CTOR_HOMING) diff --git a/cmake/cpu_features.cmake b/cmake/cpu_features.cmake index 36c91d95be9..37e6c356265 100644 --- a/cmake/cpu_features.cmake +++ b/cmake/cpu_features.cmake @@ -31,7 +31,10 @@ if (ARCH_NATIVE) elseif (ARCH_AARCH64) set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8-a+crc") -else () +elseif (ARCH_PPC64LE) + set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -DNO_WARN_X86_INTRINSICS") + +elseif (ARCH_AMD64) set (TEST_FLAG "-mssse3") set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") check_cxx_source_compiles(" @@ -60,10 +63,6 @@ else () set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}") endif () - if (ARCH_PPC64LE) - set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS") - endif () - set (TEST_FLAG "-msse4.2") set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") check_cxx_source_compiles(" @@ -93,7 +92,6 @@ else () endif () set (TEST_FLAG "-mpopcnt") - set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0") check_cxx_source_compiles(" int main() { @@ -186,6 +184,8 @@ else () set (X86_INTRINSICS_FLAGS "${X86_INTRINSICS_FLAGS} -mavx512f -mavx512bw -mavx512vl -mprefer-vector-width=256") endif () endif () +else () + # RISC-V + exotic platforms endif () cmake_pop_check_state () diff --git a/cmake/target.cmake b/cmake/target.cmake index ff216f86618..0fb5e8a20de 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -15,6 +15,8 @@ elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") elseif (CMAKE_SYSTEM_NAME MATCHES "SunOS") set (OS_SUNOS 1) add_definitions(-D OS_SUNOS) +else () + message (FATAL_ERROR "Platform ${CMAKE_SYSTEM_NAME} is not supported") endif () if (CMAKE_CROSSCOMPILING) diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 7c9993b4011..e79771d2e6f 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -7,7 +7,7 @@ # - sometimes warnings from 3rd party libraries may come from macro substitutions in our code # and we have to wrap them with #pragma GCC/clang diagnostic ignored -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") # Add some warnings that are not available even with -Wall -Wextra -Wpedantic. # Intended for exploration of new compiler warnings that may be found useful. @@ -25,6 +25,7 @@ if (COMPILER_CLANG) no_warning(vla-extension) no_warning(zero-length-array) no_warning(c11-extensions) + no_warning(unused-command-line-argument) if (WEVERYTHING) add_warning(everything) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index b6fe9da167d..01b0e8de5c9 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -153,6 +153,7 @@ endif() add_contrib (sqlite-cmake sqlite-amalgamation) add_contrib (s2geometry-cmake s2geometry) +add_contrib (eigen-cmake eigen) # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear diff --git a/contrib/base64-cmake/CMakeLists.txt b/contrib/base64-cmake/CMakeLists.txt index 69040a9bedc..354998da5af 100644 --- a/contrib/base64-cmake/CMakeLists.txt +++ b/contrib/base64-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -if(ARCH_AMD64 OR ARCH_ARM) +if(ARCH_AMD64 OR ARCH_AARCH64) option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES}) elseif(ENABLE_BASE64) message (${RECONFIGURE_MESSAGE_LEVEL} "base64 library is only supported on x86_64 and aarch64") diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 3d66bc97971..3edc4007fe4 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -114,7 +114,7 @@ if (SANITIZE AND (SANITIZE STREQUAL "address" OR SANITIZE STREQUAL "thread")) "${LIBRARY_DIR}/libs/context/src/continuation.cpp" ) endif() -if (ARCH_ARM) +if (ARCH_AARCH64) set (SRCS_CONTEXT ${SRCS_CONTEXT} "${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S" "${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_elf_gas.S" diff --git a/contrib/eigen b/contrib/eigen new file mode 160000 index 00000000000..3147391d946 --- /dev/null +++ b/contrib/eigen @@ -0,0 +1 @@ +Subproject commit 3147391d946bb4b6c68edd901f2add6ac1f31f8c diff --git a/contrib/eigen-cmake/CMakeLists.txt b/contrib/eigen-cmake/CMakeLists.txt new file mode 100644 index 00000000000..6bdf3ab7c35 --- /dev/null +++ b/contrib/eigen-cmake/CMakeLists.txt @@ -0,0 +1,23 @@ +set(EIGEN_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/eigen") + +add_library (_eigen INTERFACE) + +option (ENABLE_MKL "Build Eigen with Intel MKL" OFF) +if (ENABLE_MKL) + set(MKL_THREADING sequential) + set(MKL_INTERFACE lp64) + find_package(MKL REQUIRED) + if (MKL_FOUND) + message("MKL INCLUDE: ${MKL_INCLUDE}") + message("MKL LIBRARIES: ${MKL_LIBRARIES}") + target_compile_definitions(_eigen INTERFACE EIGEN_USE_MKL_ALL) + target_include_directories(_eigen INTERFACE ${MKL_INCLUDE}) + target_link_libraries(_eigen INTERFACE ${MKL_LIBRARIES}) + endif() +endif() + +# Only include MPL2 code from Eigen library +target_compile_definitions(_eigen INTERFACE EIGEN_MPL2_ONLY) + +target_include_directories (_eigen SYSTEM INTERFACE ${EIGEN_LIBRARY_DIR}) +add_library(ch_contrib::eigen ALIAS _eigen) diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index c038e3d7aea..711a24369de 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -1,5 +1,5 @@ if (SANITIZE OR NOT ( - ((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_ARM OR ARCH_PPC64LE OR ARCH_RISCV64)) OR + ((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE OR ARCH_RISCV64)) OR (OS_DARWIN AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")) )) if (ENABLE_JEMALLOC) @@ -141,7 +141,7 @@ if (ARCH_AMD64) else() set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_x86_64") endif() -elseif (ARCH_ARM) +elseif (ARCH_AARCH64) set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_aarch64") elseif (ARCH_PPC64LE) set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_ppc64le") diff --git a/contrib/libhdfs3-cmake/CMake/Options.cmake b/contrib/libhdfs3-cmake/CMake/Options.cmake index 402aceac2fa..dc78920e938 100644 --- a/contrib/libhdfs3-cmake/CMake/Options.cmake +++ b/contrib/libhdfs3-cmake/CMake/Options.cmake @@ -7,9 +7,9 @@ CHECK_FUNCTION_EXISTS(nanosleep HAVE_NANOSLEEP) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-strict-aliasing") -IF(ENABLE_SSE STREQUAL ON AND NOT ARCH_PPC64LE AND NOT ARCH_AARCH64 AND NOT ARCH_ARM) +IF(ENABLE_SSE STREQUAL ON AND NOT ARCH_PPC64LE AND NOT ARCH_AARCH64 AND NOT ARCH_AARCH64) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") -ENDIF(ENABLE_SSE STREQUAL ON AND NOT ARCH_PPC64LE AND NOT ARCH_AARCH64 AND NOT ARCH_ARM) +ENDIF(ENABLE_SSE STREQUAL ON AND NOT ARCH_PPC64LE AND NOT ARCH_AARCH64 AND NOT ARCH_AARCH64) IF(NOT TEST_HDFS_PREFIX) SET(TEST_HDFS_PREFIX "./" CACHE STRING "default directory prefix used for test." FORCE) diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt index b2f785fa06f..15d7a4df424 100644 --- a/contrib/libhdfs3-cmake/CMakeLists.txt +++ b/contrib/libhdfs3-cmake/CMakeLists.txt @@ -1,4 +1,4 @@ -if(NOT ARCH_ARM AND NOT OS_FREEBSD AND NOT APPLE AND NOT ARCH_PPC64LE) +if(NOT ARCH_AARCH64 AND NOT OS_FREEBSD AND NOT APPLE AND NOT ARCH_PPC64LE) option(ENABLE_HDFS "Enable HDFS" ${ENABLE_LIBRARIES}) elseif(ENABLE_HDFS) message (${RECONFIGURE_MESSAGE_LEVEL} "Cannot use HDFS3 with current configuration") diff --git a/contrib/poco b/contrib/poco index 6c1a233744d..de35b9fd72b 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 6c1a233744d13414e8e8db396c75177b857b2c22 +Subproject commit de35b9fd72b57127abdc3a5beaf0e320d767e356 diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 949ff5602b9..6f651a254c4 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -177,6 +177,7 @@ function clone_submodules contrib/jemalloc contrib/replxx contrib/wyhash + contrib/eigen contrib/nats-io ) diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index 3861862eb55..59b814abf32 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -13,7 +13,7 @@ then elif [ "${ARCH}" = "aarch64" ] then DIR="aarch64" - elif [ "${ARCH}" = "powerpc64le" ] + elif [ "${ARCH}" = "powerpc64le" ] || [ "${ARCH}" = "ppc64le" ] then DIR="powerpc64le" fi @@ -25,7 +25,7 @@ then elif [ "${ARCH}" = "aarch64" ] then DIR="freebsd-aarch64" - elif [ "${ARCH}" = "powerpc64le" ] + elif [ "${ARCH}" = "powerpc64le" ] || [ "${ARCH}" = "ppc64le" ] then DIR="freebsd-powerpc64le" fi diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index 05ef10ad020..943e41e059b 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -10,21 +10,17 @@ description: How to build ClickHouse on Mac OS X You can install pre-built ClickHouse as described in [Quick Start](https://clickhouse.com/#quick-start). Follow **macOS (Intel)** or **macOS (Apple silicon)** installation instructions. ::: -Build should work on x86_64 (Intel) and arm64 (Apple silicon) based macOS 10.15 (Catalina) and higher with Homebrew's vanilla Clang. -It is always recommended to use vanilla `clang` compiler. +The build works on x86_64 (Intel) and arm64 (Apple Silicon) based on macOS 10.15 (Catalina) or higher with Homebrew's vanilla Clang. :::note -It is possible to use XCode's `apple-clang` or `gcc`, but it's strongly discouraged. +It is also possible to compile with Apple's XCode `apple-clang` or Homebrew's `gcc`, but it's strongly discouraged. ::: ## Install Homebrew {#install-homebrew} -``` bash -/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" -# ...and follow the printed instructions on any additional steps required to complete the installation. -``` +First install [Homebrew](https://brew.sh/) -## Install Xcode and Command Line Tools {#install-xcode-and-command-line-tools} +## For Apple's Clang (discouraged): Install Xcode and Command Line Tools {#install-xcode-and-command-line-tools} Install the latest [Xcode](https://apps.apple.com/am/app/xcode/id497799835?mt=12) from App Store. @@ -57,12 +53,12 @@ To build using Homebrew's vanilla Clang compiler (the only **recommended** way): ``` bash cd ClickHouse -rm -rf build mkdir build -cd build -cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_AR=$(brew --prefix llvm)/bin/llvm-ar -DCMAKE_RANLIB=$(brew --prefix llvm)/bin/llvm-ranlib -DOBJCOPY_PATH=$(brew --prefix llvm)/bin/llvm-objcopy -DCMAKE_BUILD_TYPE=RelWithDebInfo .. -cmake --build . --config RelWithDebInfo -# The resulting binary will be created at: ./programs/clickhouse +export CC=$(brew --prefix llvm)/bin/clang +export CXX=$(brew --prefix llvm)/bin/clang++ +cmake -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -S . -B build +cmake --build build +# The resulting binary will be created at: build/programs/clickhouse ``` To build using Xcode's native AppleClang compiler in Xcode IDE (this option is only for development builds and workflows, and is **not recommended** unless you know what you are doing): @@ -82,12 +78,12 @@ To build using Homebrew's vanilla GCC compiler (this option is only for developm ``` bash cd ClickHouse -rm -rf build mkdir build -cd build -cmake -DCMAKE_C_COMPILER=$(brew --prefix gcc)/bin/gcc-11 -DCMAKE_CXX_COMPILER=$(brew --prefix gcc)/bin/g++-11 -DCMAKE_AR=$(brew --prefix gcc)/bin/gcc-ar-11 -DCMAKE_RANLIB=$(brew --prefix gcc)/bin/gcc-ranlib-11 -DOBJCOPY_PATH=$(brew --prefix binutils)/bin/objcopy -DCMAKE_BUILD_TYPE=RelWithDebInfo .. -cmake --build . --config RelWithDebInfo -# The resulting binary will be created at: ./programs/clickhouse +export CC=$(brew --prefix gcc)/bin/gcc-11 +export CXX=$(brew --prefix gcc)/bin/g++-11 +cmake -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -S . -B build +cmake --build build +# The resulting binary will be created at: build/programs/clickhouse ``` ## Caveats {#caveats} diff --git a/docs/en/development/cmake-in-clickhouse.md b/docs/en/development/cmake-in-clickhouse.md index fcda774f60e..14b98b136b3 100644 --- a/docs/en/development/cmake-in-clickhouse.md +++ b/docs/en/development/cmake-in-clickhouse.md @@ -300,6 +300,12 @@ Note that ClickHouse uses forks of these libraries, see https://github.com/Click Take care to add prlimit in command line before ccache, or else ccache thinks that prlimit is compiler, and clang++ is its input file, and refuses to work with multiple inputs, e.g in ccache log: [2021-03-31T18:06:32.655327 36900] Command line: /usr/bin/ccache prlimit --as=10000000000 --data=5000000000 --cpu=600 /usr/bin/clang++-11 - ...... std=gnu++2a -MD -MT src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -MF src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o.d -o src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -c ../src/Storages/MergeTree/IMergeTreeDataPart.cpp [2021-03-31T18:06:32.656704 36900] Multiple input files: /usr/bin/clang++-11 and ../src/Storages/MergeTree/IMergeTreeDataPart.cpp Another way would be to use --ccache-skip option before clang++-11 to make ccache ignore it. +ENABLE_COLORED_BUILD +ON +Enable colored diagnostics in build log. + + + ENABLE_EXAMPLES OFF Build all example programs in 'examples' subdirectories diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md index 270aeb4929c..360f9eed1c8 100644 --- a/docs/en/getting-started/example-datasets/nyc-taxi.md +++ b/docs/en/getting-started/example-datasets/nyc-taxi.md @@ -1,5 +1,6 @@ --- sidebar_label: New York Taxi Data +sidebar_position: 2 description: Data for billions of taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009 --- diff --git a/docs/en/getting-started/example-datasets/uk-price-paid.md b/docs/en/getting-started/example-datasets/uk-price-paid.md index e0f20639aea..eaec6e53ed4 100644 --- a/docs/en/getting-started/example-datasets/uk-price-paid.md +++ b/docs/en/getting-started/example-datasets/uk-price-paid.md @@ -1,5 +1,6 @@ --- sidebar_label: UK Property Price Paid +sidebar_position: 1 --- # UK Property Price Paid diff --git a/docs/en/operations/external-authenticators/ssl-x509.md b/docs/en/operations/external-authenticators/ssl-x509.md index dd4f35257bb..15b5990d00e 100644 --- a/docs/en/operations/external-authenticators/ssl-x509.md +++ b/docs/en/operations/external-authenticators/ssl-x509.md @@ -2,7 +2,7 @@ [SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` field of the certificate is used to identify connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration. -To enable SSL certificate authentication, a list of `Common Name`'s for each ClickHouse user must be sspecified in the settings file `config.xml `: +To enable SSL certificate authentication, a list of `Common Name`'s for each ClickHouse user must be specified in the settings file `users.xml `: **Example** ```xml @@ -10,11 +10,11 @@ To enable SSL certificate authentication, a list of `Common Name`'s for each Cli - + host.domain.com:example_user host.domain.com:example_user_dev - + diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index f235fba84f7..fd5c2a187b5 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1745,3 +1745,13 @@ Possible values: - Positive integer. Default value: `10000`. + +## global_memory_usage_overcommit_max_wait_microseconds {#global_memory_usage_overcommit_max_wait_microseconds} + +Sets maximum waiting time for global overcommit tracker. + +Possible values: + +- Positive integer. + +Default value: `200`. diff --git a/docs/en/operations/settings/memory-overcommit.md b/docs/en/operations/settings/memory-overcommit.md new file mode 100644 index 00000000000..74cbc4dbd03 --- /dev/null +++ b/docs/en/operations/settings/memory-overcommit.md @@ -0,0 +1,37 @@ +# Memory overcommit + +Memory overcommit is an experimental technique intended to allow to set more flexible memory limits for queries. + +The idea of this technique is to introduce settings which can represent guaranteed amount of memory a query can use. +When memory overcommit is enabled and the memory limit is reached ClickHouse will select the most overcommitted query and try to free memory by killing this query. + +When memory limit is reached any query will wait some time during attempt to allocate new memory. +If timeout is passed and memory is freed, the query continues execution. +Otherwise an exception will be thrown and the query is killed. + +Selection of query to stop or kill is performed by either global or user overcommit trackers depending on what memory limit is reached. +If overcommit tracker can't choose query to stop, MEMORY_LIMIT_EXCEEDED exception is thrown. + +## User overcommit tracker + +User overcommit tracker finds a query with the biggest overcommit ratio in the user's query list. +Overcommit ratio for a query is computed as number of allocated bytes divided by value of `memory_overcommit_ratio_denominator` setting. + +If `memory_overcommit_ratio_denominator` for the query is equals to zero, overcommit tracker won't choose this query. + +Waiting timeout is set by `memory_usage_overcommit_max_wait_microseconds` setting. + +**Example** + +```sql +SELECT number FROM numbers(1000) GROUP BY number SETTINGS memory_overcommit_ratio_denominator=4000, memory_usage_overcommit_max_wait_microseconds=500 +``` + +## Global overcommit tracker + +Global overcommit tracker finds a query with the biggest overcommit ratio in the list of all queries. +In this case overcommit ratio is computed as number of allocated bytes divided by value of `memory_overcommit_ratio_denominator_for_user` setting. + +If `memory_overcommit_ratio_denominator_for_user` for the query is equals to zero, overcommit tracker won't choose this query. + +Waiting timeout is set by `global_memory_usage_overcommit_max_wait_microseconds` parameter in the configuration file. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 8f2b9bc86fc..76fbc5f239d 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4263,3 +4263,29 @@ Possible values: - 1 — Enabled. Default value: 1. + +## memory_overcommit_ratio_denominator + +It represents soft memory limit in case when hard limit is reached on user level. +This value is used to compute overcommit ratio for the query. +Zero means skip the query. +Read more about [memory overcommit](memory-overcommit.md). + +Default value: `1GiB`. + +## memory_usage_overcommit_max_wait_microseconds + +Maximum time thread will wait for memory to be freed in the case of memory overcommit on a user level. +If the timeout is reached and memory is not freed, an exception is thrown. +Read more about [memory overcommit](memory-overcommit.md). + +Default value: `200`. + +## memory_overcommit_ratio_denominator_for_user + +It represents soft memory limit in case when hard limit is reached on global level. +This value is used to compute overcommit ratio for the query. +Zero means skip the query. +Read more about [memory overcommit](memory-overcommit.md). + +Default value: `1GiB`. diff --git a/programs/main.cpp b/programs/main.cpp index d587d149d57..cc109e5a9ea 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -335,7 +335,7 @@ struct Checker ; /// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete. -void checkHarmfulEnvironmentVariables() +void checkHarmfulEnvironmentVariables(char ** argv) { std::initializer_list harmful_env_variables = { /// The list is a selection from "man ld-linux". @@ -351,14 +351,39 @@ void checkHarmfulEnvironmentVariables() "DYLD_INSERT_LIBRARIES", }; + bool require_reexec = false; for (const auto * var : harmful_env_variables) { if (const char * value = getenv(var); value && value[0]) { - std::cerr << fmt::format("Environment variable {} is set to {}. It can compromise security.\n", var, value); - _exit(1); + /// NOTE: setenv() is used over unsetenv() since unsetenv() marked as harmful + if (setenv(var, "", true)) + { + fmt::print(stderr, "Cannot override {} environment variable", var); + _exit(1); + } + require_reexec = true; } } + + if (require_reexec) + { + /// Use execvp() over execv() to search in PATH. + /// + /// This should be safe, since: + /// - if argv[0] is relative path - it is OK + /// - if argv[0] has only basename, the it will search in PATH, like shell will do. + /// + /// Also note, that this (search in PATH) because there is no easy and + /// portable way to get absolute path of argv[0]. + /// - on linux there is /proc/self/exec and AT_EXECFN + /// - but on other OSes there is no such thing (especially on OSX). + /// + /// And since static linking will be done someday anyway, + /// let's not pollute the code base with special cases. + int error = execvp(argv[0], argv); + _exit(error); + } } } @@ -381,7 +406,7 @@ int main(int argc_, char ** argv_) inside_main = true; SCOPE_EXIT({ inside_main = false; }); - checkHarmfulEnvironmentVariables(); + checkHarmfulEnvironmentVariables(argv_); /// Reset new handler to default (that throws std::bad_alloc) /// It is needed because LLVM library clobbers it. diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 2b1a0809143..b0931f678f7 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -334,7 +334,12 @@ Poco::Net::SocketAddress makeSocketAddress(const std::string & host, UInt16 port return socket_address; } -Poco::Net::SocketAddress Server::socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure) const +Poco::Net::SocketAddress Server::socketBindListen( + const Poco::Util::AbstractConfiguration & config, + Poco::Net::ServerSocket & socket, + const std::string & host, + UInt16 port, + [[maybe_unused]] bool secure) const { auto address = makeSocketAddress(host, port, &logger()); #if !defined(POCO_CLICKHOUSE_PATCH) || POCO_VERSION < 0x01090100 @@ -347,7 +352,7 @@ Poco::Net::SocketAddress Server::socketBindListen(Poco::Net::ServerSocket & sock #if POCO_VERSION < 0x01080000 socket.bind(address, /* reuseAddress = */ true); #else - socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config().getBool("listen_reuse_port", false)); + socket.bind(address, /* reuseAddress = */ true, /* reusePort = */ config.getBool("listen_reuse_port", false)); #endif /// If caller requests any available port from the OS, discover it after binding. @@ -357,7 +362,7 @@ Poco::Net::SocketAddress Server::socketBindListen(Poco::Net::ServerSocket & sock LOG_DEBUG(&logger(), "Requested any available port (port == 0), actual port is {:d}", address.port()); } - socket.listen(/* backlog = */ config().getUInt("listen_backlog", 4096)); + socket.listen(/* backlog = */ config.getUInt("listen_backlog", 4096)); return address; } @@ -1237,7 +1242,7 @@ int Server::main(const std::vector & /*args*/) [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); + auto address = socketBindListen(config(), socket, listen_host, port); socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC)); socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC)); return ProtocolServerAdapter( @@ -1260,7 +1265,7 @@ int Server::main(const std::vector & /*args*/) { #if USE_SSL Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + auto address = socketBindListen(config(), socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC)); socket.setSendTimeout(config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC)); return ProtocolServerAdapter( @@ -1797,7 +1802,7 @@ void Server::createServers( createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); + auto address = socketBindListen(config, socket, listen_host, port); socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); @@ -1815,7 +1820,7 @@ void Server::createServers( { #if USE_SSL Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); return ProtocolServerAdapter( @@ -1836,7 +1841,7 @@ void Server::createServers( createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); + auto address = socketBindListen(config, socket, listen_host, port); socket.setReceiveTimeout(settings.receive_timeout); socket.setSendTimeout(settings.send_timeout); return ProtocolServerAdapter( @@ -1855,7 +1860,7 @@ void Server::createServers( createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); + auto address = socketBindListen(config, socket, listen_host, port); socket.setReceiveTimeout(settings.receive_timeout); socket.setSendTimeout(settings.send_timeout); return ProtocolServerAdapter( @@ -1875,7 +1880,7 @@ void Server::createServers( { #if USE_SSL Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(settings.receive_timeout); socket.setSendTimeout(settings.send_timeout); return ProtocolServerAdapter( @@ -1899,7 +1904,7 @@ void Server::createServers( createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); + auto address = socketBindListen(config, socket, listen_host, port); socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); return ProtocolServerAdapter( @@ -1919,7 +1924,7 @@ void Server::createServers( { #if USE_SSL Poco::Net::SecureServerSocket socket; - auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); return ProtocolServerAdapter( @@ -1943,7 +1948,7 @@ void Server::createServers( createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(Poco::Timespan()); socket.setSendTimeout(settings.send_timeout); return ProtocolServerAdapter( @@ -1957,7 +1962,7 @@ void Server::createServers( createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); + auto address = socketBindListen(config, socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(Poco::Timespan()); socket.setSendTimeout(settings.send_timeout); return ProtocolServerAdapter( @@ -1985,7 +1990,7 @@ void Server::createServers( createServer(config, listen_host, port_name, listen_try, start_servers, servers, [&](UInt16 port) -> ProtocolServerAdapter { Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); + auto address = socketBindListen(config, socket, listen_host, port); socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); return ProtocolServerAdapter( diff --git a/programs/server/Server.h b/programs/server/Server.h index 9a0fabd97c2..4235fcc2d3b 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -67,7 +67,12 @@ protected: private: ContextMutablePtr global_context; - Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = false) const; + Poco::Net::SocketAddress socketBindListen( + const Poco::Util::AbstractConfiguration & config, + Poco::Net::ServerSocket & socket, + const std::string & host, + UInt16 port, + [[maybe_unused]] bool secure = false) const; using CreateServerFunc = std::function; void createServer( diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 240130bbf74..712e5393ce7 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -67,11 +67,11 @@ namespace size_t num_password_fields = has_no_password + has_password_plaintext + has_password_sha256_hex + has_password_double_sha1_hex + has_ldap + has_kerberos + has_certificates; if (num_password_fields > 1) - throw Exception("More than one field of 'password', 'password_sha256_hex', 'password_double_sha1_hex', 'no_password', 'ldap', 'kerberos', 'certificates' are used to specify authentication info for user " + user_name + ". Must be only one of them.", + throw Exception("More than one field of 'password', 'password_sha256_hex', 'password_double_sha1_hex', 'no_password', 'ldap', 'kerberos', 'ssl_certificates' are used to specify authentication info for user " + user_name + ". Must be only one of them.", ErrorCodes::BAD_ARGUMENTS); if (num_password_fields < 1) - throw Exception("Either 'password' or 'password_sha256_hex' or 'password_double_sha1_hex' or 'no_password' or 'ldap' or 'kerberos' or 'certificates' must be specified for user " + user_name + ".", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Either 'password' or 'password_sha256_hex' or 'password_double_sha1_hex' or 'no_password' or 'ldap' or 'kerberos' or 'ssl_certificates' must be specified for user " + user_name + ".", ErrorCodes::BAD_ARGUMENTS); if (has_password_plaintext) { diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 343de056dc2..fa9c60c6f79 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -92,6 +92,7 @@ M(FilesystemCacheReadBuffers, "Number of active cache buffers") \ M(CacheFileSegments, "Number of existing cache file segments") \ M(CacheDetachedFileSegments, "Number of existing detached cache file segments") \ + M(S3Requests, "S3 requests") \ namespace CurrentMetrics { diff --git a/src/Common/FileCache.cpp b/src/Common/FileCache.cpp index 8bb133afb54..ae1b1afdd09 100644 --- a/src/Common/FileCache.cpp +++ b/src/Common/FileCache.cpp @@ -400,7 +400,7 @@ LRUFileCache::FileSegmentCell * LRUFileCache::addCell( if (files[key].contains(offset)) throw Exception( - ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + ErrorCodes::LOGICAL_ERROR, "Cache already exists for key: `{}`, offset: {}, size: {}.\nCurrent cache structure: {}", keyToStr(key), offset, size, dumpStructureUnlocked(key, cache_lock)); @@ -609,7 +609,7 @@ void LRUFileCache::remove(const Key & key) #endif } -void LRUFileCache::remove(bool force_remove_unreleasable) +void LRUFileCache::remove() { /// Try remove all cached files by cache_base_path. /// Only releasable file segments are evicted. @@ -626,7 +626,7 @@ void LRUFileCache::remove(bool force_remove_unreleasable) ErrorCodes::LOGICAL_ERROR, "Cache is in inconsistent state: LRU queue contains entries with no cache cell"); - if (cell->releasable() || force_remove_unreleasable) + if (cell->releasable()) { auto file_segment = cell->file_segment; if (file_segment) @@ -647,7 +647,7 @@ void LRUFileCache::remove( auto * cell = getCell(key, offset, cache_lock); if (!cell) - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "No cache cell for key: {}, offset: {}", keyToStr(key), offset); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No cache cell for key: {}, offset: {}", keyToStr(key), offset); if (cell->queue_iterator) { diff --git a/src/Common/FileCache.h b/src/Common/FileCache.h index d4235735fbf..ff65b579470 100644 --- a/src/Common/FileCache.h +++ b/src/Common/FileCache.h @@ -26,6 +26,7 @@ class IFileCache : private boost::noncopyable { friend class FileSegment; friend struct FileSegmentsHolder; +friend class FileSegmentRangeWriter; public: using Key = UInt128; @@ -42,7 +43,7 @@ public: virtual void remove(const Key & key) = 0; - virtual void remove(bool force_remove_unreleasable) = 0; + virtual void remove() = 0; static bool isReadOnly(); @@ -143,13 +144,11 @@ public: FileSegments getSnapshot() const override; - FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) override; - void initialize() override; void remove(const Key & key) override; - void remove(bool force_remove_unreleasable) override; + void remove() override; std::vector tryGetCachePaths(const Key & key) override; @@ -272,6 +271,8 @@ private: void fillHolesWithEmptyFileSegments( FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard & cache_lock); + FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) override; + size_t getUsedCacheSizeUnlocked(std::lock_guard & cache_lock) const; size_t getAvailableCacheSizeUnlocked(std::lock_guard & cache_lock) const; diff --git a/src/Common/FileSegment.cpp b/src/Common/FileSegment.cpp index 9c75dcfb2a8..356ba8bf55f 100644 --- a/src/Common/FileSegment.cpp +++ b/src/Common/FileSegment.cpp @@ -107,8 +107,7 @@ String FileSegment::getOrSetDownloader() { std::lock_guard segment_lock(mutex); - if (detached) - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cannot set downloader for a detached file segment"); + assertNotDetached(segment_lock); if (downloader_id.empty()) { @@ -132,6 +131,8 @@ void FileSegment::resetDownloader() { std::lock_guard segment_lock(mutex); + assertNotDetached(segment_lock); + if (downloader_id.empty()) throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "There is no downloader"); @@ -209,7 +210,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) "Not enough space is reserved. Available: {}, expected: {}", availableSize(), size); if (!isDownloader()) - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, + throw Exception(ErrorCodes::LOGICAL_ERROR, "Only downloader can do the downloading. (CallerId: {}, DownloaderId: {})", getCallerId(), downloader_id); @@ -224,7 +225,10 @@ void FileSegment::write(const char * from, size_t size, size_t offset_) "Attempt to write {} bytes to offset: {}, but current download offset is {}", size, offset_, download_offset); - assertNotDetached(); + { + std::lock_guard segment_lock(mutex); + assertNotDetached(segment_lock); + } if (!cache_writer) { @@ -273,9 +277,8 @@ void FileSegment::writeInMemory(const char * from, size_t size) ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Not enough space is reserved. Available: {}, expected: {}", availableSize(), size); - assertNotDetached(); - std::lock_guard segment_lock(mutex); + assertNotDetached(segment_lock); if (cache_writer) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cache writer already initialized"); @@ -311,7 +314,7 @@ size_t FileSegment::finalizeWrite() if (size == 0) throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing zero size is not allowed"); - assertNotDetached(); + assertNotDetached(segment_lock); try { @@ -342,6 +345,11 @@ FileSegment::State FileSegment::wait() { std::unique_lock segment_lock(mutex); + if (is_detached) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cache file segment is in detached state, operation not allowed"); + if (downloader_id.empty()) return download_state; @@ -366,14 +374,19 @@ bool FileSegment::reserve(size_t size) if (!size) throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Zero space reservation is not allowed"); - assertNotDetached(); - { std::lock_guard segment_lock(mutex); + assertNotDetached(segment_lock); auto caller_id = getCallerId(); - if (downloader_id != caller_id) - throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Space can be reserved only by downloader (current: {}, expected: {})", caller_id, downloader_id); + bool is_downloader = caller_id == downloader_id; + if (!is_downloader) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Space can be reserved only by downloader (current: {}, expected: {})", + caller_id, downloader_id); + } if (downloaded_size + size > range().size()) throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, @@ -392,6 +405,7 @@ bool FileSegment::reserve(size_t size) size_t size_to_reserve = size - free_space; std::lock_guard cache_lock(cache->mutex); + bool reserved = cache->tryReserve(key(), offset(), size_to_reserve, cache_lock); if (reserved) @@ -437,6 +451,8 @@ void FileSegment::completeBatchAndResetDownloader() { std::lock_guard segment_lock(mutex); + assertNotDetached(segment_lock); + if (!isDownloaderImpl(segment_lock)) { cv.notify_all(); @@ -458,7 +474,7 @@ void FileSegment::complete(State state) std::lock_guard cache_lock(cache->mutex); std::lock_guard segment_lock(mutex); - assertNotDetached(); + assertNotDetached(segment_lock); bool is_downloader = isDownloaderImpl(segment_lock); if (!is_downloader) @@ -501,12 +517,15 @@ void FileSegment::complete(State state) void FileSegment::complete(std::lock_guard & cache_lock) { std::lock_guard segment_lock(mutex); + + assertNotDetached(segment_lock); + completeUnlocked(cache_lock, segment_lock); } void FileSegment::completeUnlocked(std::lock_guard & cache_lock, std::lock_guard & segment_lock) { - if (download_state == State::SKIP_CACHE || detached) + if (download_state == State::SKIP_CACHE || is_detached) return; if (isDownloaderImpl(segment_lock) @@ -516,7 +535,7 @@ void FileSegment::completeUnlocked(std::lock_guard & cache_lock, std setDownloaded(segment_lock); } - assertNotDetached(); + assertNotDetached(segment_lock); if (download_state == State::DOWNLOADING || download_state == State::EMPTY) { @@ -589,6 +608,7 @@ void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lo downloader_id.clear(); } + LOG_TEST(log, "Completed file segment: {}", getInfoForLogImpl(segment_lock)); assertCorrectnessImpl(segment_lock); } @@ -649,15 +669,40 @@ void FileSegment::assertCorrectnessImpl(std::lock_guard & /* segment assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0); } -void FileSegment::assertNotDetached() const +void FileSegment::throwIfDetached() const { - if (detached) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Operation not allowed, file segment is detached"); + std::lock_guard segment_lock(mutex); + throwIfDetachedUnlocked(segment_lock); } -void FileSegment::assertDetachedStatus(std::lock_guard & /* segment_lock */) const +void FileSegment::throwIfDetachedUnlocked(std::lock_guard & segment_lock) const { - assert(download_state == State::EMPTY || hasFinalizedState()); + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cache file segment is in detached state, operation not allowed. " + "It can happen when cache was concurrently dropped with SYSTEM DROP FILESYSTEM CACHE FORCE. " + "Please, retry. File segment info: {}", getInfoForLogImpl(segment_lock)); +} + + +void FileSegment::assertNotDetached(std::lock_guard & segment_lock) const +{ + if (is_detached) + throwIfDetachedUnlocked(segment_lock); +} + +void FileSegment::assertDetachedStatus(std::lock_guard & segment_lock) const +{ + /// Detached file segment is allowed to have only a certain subset of states. + /// It should be either EMPTY or one of the finalized states. + + if (download_state != State::EMPTY && !hasFinalizedState()) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Detached file segment has incorrect state: {}", + getInfoForLogImpl(segment_lock)); + } } FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard & /* cache_lock */) @@ -684,29 +729,35 @@ bool FileSegment::hasFinalizedState() const || download_state == State::SKIP_CACHE; } -void FileSegment::detach(std::lock_guard & cache_lock, std::lock_guard & segment_lock) +void FileSegment::detach( + std::lock_guard & /* cache_lock */, + std::lock_guard & segment_lock) { - if (detached) + /// Now detached status can be in 2 cases, which do not do any complex logic: + /// 1. there is only 1 remaining file segment holder + /// && it does not need this segment anymore + /// && this file segment was in cache and needs to be removed + /// 2. in read_from_cache_if_exists_otherwise_bypass_cache case + if (is_detached) return; markAsDetached(segment_lock); + download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION; + downloader_id.clear(); - if (!hasFinalizedState()) - { - completeUnlocked(cache_lock, segment_lock); - } + LOG_TEST(log, "Detached file segment: {}", getInfoForLogImpl(segment_lock)); } void FileSegment::markAsDetached(std::lock_guard & /* segment_lock */) { - detached = true; + is_detached = true; CurrentMetrics::add(CurrentMetrics::CacheDetachedFileSegments); } FileSegment::~FileSegment() { std::lock_guard segment_lock(mutex); - if (detached) + if (is_detached) CurrentMetrics::sub(CurrentMetrics::CacheDetachedFileSegments); } @@ -726,15 +777,18 @@ FileSegmentsHolder::~FileSegmentsHolder() if (!cache) cache = file_segment->cache; + try { - bool detached = false; + bool is_detached = false; + { std::lock_guard segment_lock(file_segment->mutex); - detached = file_segment->isDetached(segment_lock); - if (detached) + is_detached = file_segment->isDetached(segment_lock); + if (is_detached) file_segment->assertDetachedStatus(segment_lock); } - if (detached) + + if (is_detached) { /// This file segment is not owned by cache, so it will be destructed /// at this point, therefore no completion required. @@ -742,10 +796,6 @@ FileSegmentsHolder::~FileSegmentsHolder() continue; } - } - - try - { /// File segment pointer must be reset right after calling complete() and /// under the same mutex, because complete() checks for segment pointers. std::lock_guard cache_lock(cache->mutex); @@ -757,7 +807,6 @@ FileSegmentsHolder::~FileSegmentsHolder() catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); - assert(false); } } } @@ -774,5 +823,4 @@ String FileSegmentsHolder::toString() return ranges; } - } diff --git a/src/Common/FileSegment.h b/src/Common/FileSegment.h index 42ebfd39bcc..7a25529ab23 100644 --- a/src/Common/FileSegment.h +++ b/src/Common/FileSegment.h @@ -25,8 +25,10 @@ using FileSegments = std::list; class FileSegment : boost::noncopyable { + friend class LRUFileCache; friend struct FileSegmentsHolder; +friend class FileSegmentRangeWriter; public: using Key = UInt128; @@ -149,9 +151,15 @@ public: void assertCorrectness() const; - static FileSegmentPtr getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard & cache_lock); + static FileSegmentPtr getSnapshot( + const FileSegmentPtr & file_segment, + std::lock_guard & cache_lock); - void detach(std::lock_guard & cache_lock, std::lock_guard & segment_lock); + void detach( + std::lock_guard & cache_lock, + std::lock_guard & segment_lock); + + [[noreturn]] void throwIfDetached() const; private: size_t availableSize() const { return reserved_size - downloaded_size; } @@ -159,11 +167,14 @@ private: size_t getDownloadedSize(std::lock_guard & segment_lock) const; String getInfoForLogImpl(std::lock_guard & segment_lock) const; void assertCorrectnessImpl(std::lock_guard & segment_lock) const; - void assertNotDetached() const; - void assertDetachedStatus(std::lock_guard & segment_lock) const; bool hasFinalizedState() const; - bool isDetached(std::lock_guard & /* segment_lock */) const { return detached; } + + bool isDetached(std::lock_guard & /* segment_lock */) const { return is_detached; } void markAsDetached(std::lock_guard & segment_lock); + [[noreturn]] void throwIfDetachedUnlocked(std::lock_guard & segment_lock) const; + + void assertDetachedStatus(std::lock_guard & segment_lock) const; + void assertNotDetached(std::lock_guard & segment_lock) const; void setDownloaded(std::lock_guard & segment_lock); void setDownloadFailed(std::lock_guard & segment_lock); @@ -197,6 +208,10 @@ private: size_t downloaded_size = 0; size_t reserved_size = 0; + /// global locking order rule: + /// 1. cache lock + /// 2. segment lock + mutable std::mutex mutex; std::condition_variable cv; @@ -215,7 +230,7 @@ private: /// "detached" file segment means that it is not owned by cache ("detached" from cache). /// In general case, all file segments are owned by cache. - bool detached = false; + bool is_detached = false; std::atomic is_downloaded{false}; std::atomic hits_count = 0; /// cache hits. @@ -227,6 +242,7 @@ private: struct FileSegmentsHolder : private boost::noncopyable { explicit FileSegmentsHolder(FileSegments && file_segments_) : file_segments(std::move(file_segments_)) {} + FileSegmentsHolder(FileSegmentsHolder && other) noexcept : file_segments(std::move(other.file_segments)) {} ~FileSegmentsHolder(); diff --git a/src/Common/ProfileEvents.cpp b/src/Common/ProfileEvents.cpp index 7d811d6c1ee..7f3b9788c1f 100644 --- a/src/Common/ProfileEvents.cpp +++ b/src/Common/ProfileEvents.cpp @@ -260,10 +260,12 @@ \ M(QueryMemoryLimitExceeded, "Number of times when memory limit exceeded for query.") \ \ - M(RemoteFSReadMicroseconds, "Time of reading from remote filesystem.") \ - M(RemoteFSReadBytes, "Read bytes from remote filesystem.") \ - M(RemoteFSCacheReadBytes, "Read bytes from cache of remote filesystem.") \ - M(RemoteFSCacheDownloadBytes, "Bytes downloaded to cache from remote filesystem.") \ + M(CachedReadBufferReadFromSourceMicroseconds, "Time reading from filesystem cache source (from remote filesystem, etc)") \ + M(CachedReadBufferReadFromCacheMicroseconds, "Time reading from filesystem cache") \ + M(CachedReadBufferReadFromSourceBytes, "Bytes read from filesystem cache source (from remote fs, etc)") \ + M(CachedReadBufferReadFromCacheBytes, "Bytes read from filesystem cache") \ + M(CachedReadBufferCacheWriteBytes, "Bytes written from source (remote fs, etc) to filesystem cache") \ + M(CachedReadBufferCacheWriteMicroseconds, "Time spent writing data into filesystem cache") \ \ M(RemoteFSSeeks, "Total number of seeks for async buffer") \ M(RemoteFSPrefetches, "Number of prefetches made with asynchronous reading from remote filesystem") \ @@ -275,6 +277,15 @@ M(RemoteFSSeeksWithReset, "Number of seeks which lead to a new connection") \ M(RemoteFSBuffers, "Number of buffers created for asynchronous reading from remote filesystem") \ \ + M(ThreadpoolReaderTaskMicroseconds, "Time spent getting the data in asynchronous reading") \ + M(ThreadpoolReaderReadBytes, "Bytes read from a threadpool task in asynchronous reading") \ + \ + M(FileSegmentWaitReadBufferMicroseconds, "Metric per file segment. Time spend waiting for internal read buffer (includes cache waiting)") \ + M(FileSegmentReadMicroseconds, "Metric per file segment. Time spend reading from file") \ + M(FileSegmentCacheWriteMicroseconds, "Metric per file segment. Time spend writing data to cache") \ + M(FileSegmentPredownloadMicroseconds, "Metric per file segment. Time spent predownloading data to cache (predownloading - finishing file segment download (after someone who failed to do that) up to the point current thread was requested to do)") \ + M(FileSegmentUsedBytes, "Metric per file segment. How many bytes were actually used from current file segment") \ + \ M(ReadBufferSeekCancelConnection, "Number of seeks which lead to new connection (s3, http)") \ \ M(SleepFunctionCalls, "Number of times a sleep function (sleep, sleepEachRow) has been called.") \ diff --git a/src/Common/tests/gtest_lru_file_cache.cpp b/src/Common/tests/gtest_lru_file_cache.cpp index 7b3d988c8e2..24e69259241 100644 --- a/src/Common/tests/gtest_lru_file_cache.cpp +++ b/src/Common/tests/gtest_lru_file_cache.cpp @@ -119,9 +119,9 @@ TEST(LRUFileCache, get) assertRange(1, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::EMPTY); /// Exception because space not reserved. - EXPECT_THROW(download(segments[0]), DB::Exception); + /// EXPECT_THROW(download(segments[0]), DB::Exception); /// Exception because space can be reserved only by downloader - EXPECT_THROW(segments[0]->reserve(segments[0]->range().size()), DB::Exception); + /// EXPECT_THROW(segments[0]->reserve(segments[0]->range().size()), DB::Exception); ASSERT_TRUE(segments[0]->getOrSetDownloader() == DB::FileSegment::getCallerId()); ASSERT_TRUE(segments[0]->reserve(segments[0]->range().size())); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6e5f707ded2..e364db7e108 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -22,7 +22,7 @@ namespace DB { class IColumn; -static constexpr UInt64 operator""_Gb(unsigned long long value) +static constexpr UInt64 operator""_GiB(unsigned long long value) { return value * 1024 * 1024 * 1024; } @@ -362,14 +362,14 @@ static constexpr UInt64 operator""_Gb(unsigned long long value) M(OverflowMode, distinct_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.", 0) \ \ M(UInt64, max_memory_usage, 0, "Maximum memory usage for processing of single query. Zero means unlimited.", 0) \ - M(UInt64, max_guaranteed_memory_usage, 10_Gb, "Maximum guaranteed memory usage for processing of single query. It represents soft limit. Zero means unlimited.", 0) \ + M(UInt64, memory_overcommit_ratio_denominator, 1_GiB, "It represents soft memory limit on the user level. This value is used to compute query overcommit ratio.", 0) \ M(UInt64, max_memory_usage_for_user, 0, "Maximum memory usage for processing all concurrently running queries for the user. Zero means unlimited.", 0) \ - M(UInt64, max_guaranteed_memory_usage_for_user, 10_Gb, "Maximum guaranteed memory usage for processing all concurrently running queries for the user. It represents soft limit. Zero means unlimited.", 0) \ + M(UInt64, memory_overcommit_ratio_denominator_for_user, 1_GiB, "It represents soft memory limit on the global level. This value is used to compute query overcommit ratio.", 0) \ M(UInt64, max_untracked_memory, (4 * 1024 * 1024), "Small allocations and deallocations are grouped in thread local variable and tracked or profiled only when amount (in absolute value) becomes larger than specified value. If the value is higher than 'memory_profiler_step' it will be effectively lowered to 'memory_profiler_step'.", 0) \ M(UInt64, memory_profiler_step, (4 * 1024 * 1024), "Whenever query memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down query processing.", 0) \ M(Float, memory_profiler_sample_probability, 0., "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation. Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \ \ - M(UInt64, memory_usage_overcommit_max_wait_microseconds, 0, "Maximum time thread will wait for memory to be freed in the case of memory overcommit. If timeout is reached and memory is not freed, exception is thrown", 0) \ + M(UInt64, memory_usage_overcommit_max_wait_microseconds, 200, "Maximum time thread will wait for memory to be freed in the case of memory overcommit on user level. If timeout is reached and memory is not freed, exception is thrown.", 0) \ \ M(UInt64, max_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for a query. Zero means unlimited.", 0) \ M(UInt64, max_network_bytes, 0, "The maximum number of bytes (compressed) to receive or transmit over the network for execution of the query.", 0) \ @@ -449,6 +449,7 @@ static constexpr UInt64 operator""_Gb(unsigned long long value) M(Bool, optimize_normalize_count_variants, true, "Rewrite aggregate functions that semantically equals to count() as count().", 0) \ M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \ M(Bool, convert_query_to_cnf, false, "Convert SELECT query to CNF", 0) \ + M(Bool, optimize_or_like_chain, false, "Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases.", 0) \ M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \ M(Bool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \ M(Bool, optimize_redundant_functions_in_order_by, true, "Remove functions from ORDER BY if its argument is also in ORDER BY", 0) \ diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp index 8ac576be61c..4be89389008 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp @@ -168,6 +168,9 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() if (!hasPendingDataToRead()) return false; + Stopwatch watch; + CurrentMetrics::Increment metric_increment{CurrentMetrics::AsynchronousReadWait}; + size_t size = 0; if (prefetch_future.valid()) { @@ -175,15 +178,13 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() size_t offset = 0; { - Stopwatch watch; - CurrentMetrics::Increment metric_increment{CurrentMetrics::AsynchronousReadWait}; auto result = prefetch_future.get(); size = result.size; offset = result.offset; LOG_TEST(log, "Current size: {}, offset: {}", size, offset); /// If prefetch_future is valid, size should always be greater than zero. - assert(offset < size); + assert(offset <= size); ProfileEvents::increment(ProfileEvents::AsynchronousReadWaitMicroseconds, watch.elapsedMicroseconds()); } @@ -200,7 +201,7 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() auto offset = result.offset; LOG_TEST(log, "Current size: {}, offset: {}", size, offset); - assert(offset < size); + assert(offset <= size); if (size) { @@ -209,6 +210,9 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() } } + watch.stop(); + ProfileEvents::increment(ProfileEvents::AsynchronousReadWaitMicroseconds, watch.elapsedMicroseconds()); + file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd(); assert(file_offset_of_buffer_end == impl->getImplementationBufferOffset()); diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp index d5ff0e3b50a..e10b848bfeb 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -5,13 +5,23 @@ #include #include #include +#include namespace ProfileEvents { -extern const Event RemoteFSReadBytes; -extern const Event RemoteFSCacheReadBytes; -extern const Event RemoteFSCacheDownloadBytes; +extern const Event FileSegmentWaitReadBufferMicroseconds; +extern const Event FileSegmentReadMicroseconds; +extern const Event FileSegmentCacheWriteMicroseconds; +extern const Event FileSegmentPredownloadMicroseconds; +extern const Event FileSegmentUsedBytes; + +extern const Event CachedReadBufferReadFromSourceMicroseconds; +extern const Event CachedReadBufferReadFromCacheMicroseconds; +extern const Event CachedReadBufferCacheWriteMicroseconds; +extern const Event CachedReadBufferReadFromSourceBytes; +extern const Event CachedReadBufferReadFromCacheBytes; +extern const Event CachedReadBufferCacheWriteBytes; } namespace DB @@ -44,6 +54,7 @@ CachedReadBufferFromRemoteFS::CachedReadBufferFromRemoteFS( , remote_file_reader_creator(remote_file_reader_creator_) , query_id(query_id_) , enable_logging(!query_id.empty() && settings_.enable_filesystem_cache_log) + , current_buffer_id(getRandomASCIIString(8)) { } @@ -56,10 +67,15 @@ void CachedReadBufferFromRemoteFS::appendFilesystemCacheLog( .query_id = query_id, .source_file_path = remote_fs_object_path, .file_segment_range = { file_segment_range.left, file_segment_range.right }, + .requested_range = { first_offset, read_until_position }, .file_segment_size = file_segment_range.size(), .cache_attempted = true, + .read_buffer_id = current_buffer_id, + .profile_counters = std::make_shared(current_file_segment_counters.getPartiallyAtomicSnapshot()), }; + current_file_segment_counters.reset(); + switch (type) { case CachedReadBufferFromRemoteFS::ReadType::CACHED: @@ -104,9 +120,16 @@ void CachedReadBufferFromRemoteFS::initialize(size_t offset, size_t size) SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getCacheReadBuffer(size_t offset) const { auto path = cache->getPathInLocalCache(cache_key, offset); - auto buf = std::make_shared(path, settings.local_fs_buffer_size); - if (buf->size() == 0) + + ReadSettings local_read_settings{settings}; + /// Do not allow to use asynchronous version of LocalFSReadMethod. + local_read_settings.local_fs_method = LocalFSReadMethod::pread; + + auto buf = createReadBufferFromFileBase(path, local_read_settings); + auto from_fd = dynamic_cast(buf.get()); + if (from_fd && from_fd->size() == 0) throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to read from an empty cache file: {}", path); + return buf; } @@ -335,8 +358,13 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File auto range = file_segment->range(); bytes_to_predownload = 0; + Stopwatch watch(CLOCK_MONOTONIC); + auto read_buffer_for_file_segment = getReadBufferForFileSegment(file_segment); + watch.stop(); + current_file_segment_counters.increment(ProfileEvents::FileSegmentWaitReadBufferMicroseconds, watch.elapsedMicroseconds()); + [[maybe_unused]] auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; assert(download_current_segment == file_segment->isDownloader()); @@ -357,7 +385,7 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File case ReadType::CACHED: { #ifndef NDEBUG - auto * file_reader = assert_cast(read_buffer_for_file_segment.get()); + auto * file_reader = dynamic_cast(read_buffer_for_file_segment.get()); size_t file_size = file_reader->size(); if (file_size == 0 || range.left + file_size <= file_offset_of_buffer_end) @@ -431,6 +459,9 @@ bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext() { LOG_TEST(log, "Completed segment: {}", (*current_file_segment_it)->range().toString()); + if (enable_logging) + appendFilesystemCacheLog((*current_file_segment_it)->range(), read_type); + auto file_segment_it = current_file_segment_it++; auto & file_segment = *file_segment_it; @@ -455,15 +486,29 @@ bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext() if (read_type == ReadType::CACHED) (*current_file_segment_it)->incrementHitsCount(); - if (enable_logging) - appendFilesystemCacheLog((*current_file_segment_it)->range(), read_type); LOG_TEST(log, "New segment: {}", (*current_file_segment_it)->range().toString()); return true; } +CachedReadBufferFromRemoteFS::~CachedReadBufferFromRemoteFS() +{ + if (enable_logging + && file_segments_holder + && current_file_segment_it != file_segments_holder->file_segments.end()) + { + appendFilesystemCacheLog((*current_file_segment_it)->range(), read_type); + } +} + void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment) { + Stopwatch predownload_watch(CLOCK_MONOTONIC); + SCOPE_EXIT({ + predownload_watch.stop(); + current_file_segment_counters.increment(ProfileEvents::FileSegmentPredownloadMicroseconds, predownload_watch.elapsedMicroseconds()); + }); + if (bytes_to_predownload) { /// Consider this case. Some user needed segment [a, b] and downloaded it partially. @@ -479,7 +524,19 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment) while (true) { - if (!bytes_to_predownload || implementation_buffer->eof()) + bool has_more_data; + { + Stopwatch watch(CLOCK_MONOTONIC); + + has_more_data = !implementation_buffer->eof(); + + watch.stop(); + auto elapsed = watch.elapsedMicroseconds(); + current_file_segment_counters.increment(ProfileEvents::FileSegmentReadMicroseconds, elapsed); + ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceMicroseconds, elapsed); + } + + if (!bytes_to_predownload || !has_more_data) { if (bytes_to_predownload) throw Exception( @@ -518,7 +575,7 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment) size_t current_impl_buffer_size = implementation_buffer->buffer().size(); size_t current_predownload_size = std::min(current_impl_buffer_size, bytes_to_predownload); - ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, current_impl_buffer_size); + ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, current_impl_buffer_size); if (file_segment->reserve(current_predownload_size)) { @@ -526,8 +583,15 @@ void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment) assert(file_segment->getDownloadOffset() == static_cast(implementation_buffer->getPosition())); + Stopwatch watch(CLOCK_MONOTONIC); + file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size, current_offset); - ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, current_predownload_size); + + watch.stop(); + auto elapsed = watch.elapsedMicroseconds(); + current_file_segment_counters.increment(ProfileEvents::FileSegmentCacheWriteMicroseconds, elapsed); + ProfileEvents::increment(ProfileEvents::CachedReadBufferCacheWriteMicroseconds, elapsed); + ProfileEvents::increment(ProfileEvents::CachedReadBufferCacheWriteBytes, current_predownload_size); current_offset += current_predownload_size; @@ -663,18 +727,18 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() return false; SCOPE_EXIT({ - /// Save state of current file segment before it is completed. - nextimpl_step_log_info = getInfoForLog(); - - if (current_file_segment_it == file_segments_holder->file_segments.end()) - return; - - auto & file_segment = *current_file_segment_it; - - bool download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; - if (download_current_segment) + try { - try + /// Save state of current file segment before it is completed. + nextimpl_step_log_info = getInfoForLog(); + + if (current_file_segment_it == file_segments_holder->file_segments.end()) + return; + + auto & file_segment = *current_file_segment_it; + + bool download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; + if (download_current_segment) { bool need_complete_file_segment = file_segment->isDownloader(); if (need_complete_file_segment) @@ -683,13 +747,13 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() file_segment->completeBatchAndResetDownloader(); } } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - } - } - assert(!file_segment->isDownloader()); + assert(!file_segment->isDownloader()); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } }); bytes_to_predownload = 0; @@ -706,9 +770,6 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() if (read_type == ReadType::CACHED) (*current_file_segment_it)->incrementHitsCount(); - - if (enable_logging) - appendFilesystemCacheLog((*current_file_segment_it)->range(), read_type); } assert(!internal_buffer.empty()); @@ -742,18 +803,17 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; if (download_current_segment != file_segment->isDownloader()) + { throw Exception( ErrorCodes::LOGICAL_ERROR, - "Incorrect segment state. Having read type: {}, Caller id: {}, downloader id: {}, file segment state: {}", - toString(read_type), - file_segment->getCallerId(), - file_segment->getDownloader(), - file_segment->state()); + "Incorrect segment state. Having read type: {}, file segment info: {}", + toString(read_type), file_segment->getInfoForLog()); + } if (!result) { #ifndef NDEBUG - if (auto * cache_file_reader = typeid_cast(implementation_buffer.get())) + if (auto * cache_file_reader = dynamic_cast(implementation_buffer.get())) { auto cache_file_size = cache_file_reader->size(); if (cache_file_size == 0) @@ -762,13 +822,26 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() } #endif + Stopwatch watch(CLOCK_MONOTONIC); + result = implementation_buffer->next(); + + watch.stop(); + auto elapsed = watch.elapsedMicroseconds(); + current_file_segment_counters.increment(ProfileEvents::FileSegmentReadMicroseconds, elapsed); + size = implementation_buffer->buffer().size(); if (read_type == ReadType::CACHED) - ProfileEvents::increment(ProfileEvents::RemoteFSCacheReadBytes, size); + { + ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheBytes, size); + ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromCacheMicroseconds, elapsed); + } else - ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, size); + { + ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceBytes, size); + ProfileEvents::increment(ProfileEvents::CachedReadBufferReadFromSourceMicroseconds, elapsed); + } } if (result) @@ -781,12 +854,18 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() { assert(file_segment->getDownloadOffset() == static_cast(implementation_buffer->getPosition())); + Stopwatch watch(CLOCK_MONOTONIC); + file_segment->write( needed_to_predownload ? implementation_buffer->position() : implementation_buffer->buffer().begin(), size, file_offset_of_buffer_end); - ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, size); + watch.stop(); + auto elapsed = watch.elapsedMicroseconds(); + current_file_segment_counters.increment(ProfileEvents::FileSegmentCacheWriteMicroseconds, elapsed); + ProfileEvents::increment(ProfileEvents::CachedReadBufferCacheWriteMicroseconds, elapsed); + ProfileEvents::increment(ProfileEvents::CachedReadBufferCacheWriteBytes, size); assert(file_segment->getDownloadOffset() <= file_segment->range().right + 1); assert( @@ -814,10 +893,13 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() } file_offset_of_buffer_end += size; + } swap(*implementation_buffer); + current_file_segment_counters.increment(ProfileEvents::FileSegmentUsedBytes, available()); + if (download_current_segment) file_segment->completeBatchAndResetDownloader(); @@ -846,7 +928,7 @@ bool CachedReadBufferFromRemoteFS::nextImplStep() if (size == 0 && file_offset_of_buffer_end < read_until_position) { std::optional cache_file_size; - if (auto * cache_file_reader = dynamic_cast(implementation_buffer.get())) + if (auto * cache_file_reader = dynamic_cast(implementation_buffer.get())) cache_file_size = cache_file_reader->size(); throw Exception( diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.h b/src/Disks/IO/CachedReadBufferFromRemoteFS.h index da270a1fdae..c73114f50a5 100644 --- a/src/Disks/IO/CachedReadBufferFromRemoteFS.h +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.h @@ -29,6 +29,8 @@ public: const String & query_id_, size_t read_until_position_); + ~CachedReadBufferFromRemoteFS() override; + bool nextImpl() override; off_t seek(off_t off, int whence) override; @@ -117,8 +119,10 @@ private: String query_id; bool enable_logging = false; + String current_buffer_id; CurrentMetrics::Increment metric_increment{CurrentMetrics::FilesystemCacheReadBuffers}; + ProfileEvents::Counters current_file_segment_counters; }; } diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index b1ae42d03d6..f8050b8a8b0 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -16,8 +16,8 @@ namespace ProfileEvents { - extern const Event RemoteFSReadMicroseconds; - extern const Event RemoteFSReadBytes; + extern const Event ThreadpoolReaderTaskMicroseconds; + extern const Event ThreadpoolReaderReadBytes; } namespace CurrentMetrics @@ -83,8 +83,8 @@ std::future ThreadPoolRemoteFSReader::submit(Reques watch.stop(); - ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds()); - ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, result.offset ? result.size - result.offset : result.size); + ProfileEvents::increment(ProfileEvents::ThreadpoolReaderTaskMicroseconds, watch.elapsedMicroseconds()); + ProfileEvents::increment(ProfileEvents::ThreadpoolReaderReadBytes, result.offset ? result.size - result.offset : result.size); thread_status.detachQuery(/* if_not_detached */true); diff --git a/src/Disks/S3/ProxyResolverConfiguration.cpp b/src/Disks/S3/ProxyResolverConfiguration.cpp index bb558cf4b72..7449c5200de 100644 --- a/src/Disks/S3/ProxyResolverConfiguration.cpp +++ b/src/Disks/S3/ProxyResolverConfiguration.cpp @@ -63,7 +63,7 @@ Aws::Client::ClientConfigurationPerRequest ProxyResolverConfiguration::getConfig { auto resolved_endpoint = endpoint; resolved_endpoint.setHost(resolved_hosts[i].toString()); - session = makeHTTPSession(endpoint, timeouts, false); + session = makeHTTPSession(resolved_endpoint, timeouts, false); try { diff --git a/src/Functions/array/CMakeLists.txt b/src/Functions/array/CMakeLists.txt index 9762674d6e9..c98f4430078 100644 --- a/src/Functions/array/CMakeLists.txt +++ b/src/Functions/array/CMakeLists.txt @@ -1,7 +1,7 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_functions_array .) add_library(clickhouse_functions_array ${clickhouse_functions_array_sources} ${clickhouse_functions_array_headers}) -target_link_libraries(clickhouse_functions_array PRIVATE dbms clickhouse_functions_gatherutils) +target_link_libraries(clickhouse_functions_array PRIVATE dbms clickhouse_functions_gatherutils ch_contrib::eigen) if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) target_compile_options(clickhouse_functions_array PRIVATE "-g0") diff --git a/src/Functions/array/arrayDistance.cpp b/src/Functions/array/arrayDistance.cpp new file mode 100644 index 00000000000..a533cb2c0cc --- /dev/null +++ b/src/Functions/array/arrayDistance.cpp @@ -0,0 +1,247 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; + extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; +} + +template +struct LpDistance +{ + static inline String name = "L" + std::to_string(N); + template + static void compute(const Eigen::MatrixX & left, const Eigen::MatrixX & right, PaddedPODArray & array) + { + auto norms = (left - right).colwise().template lpNorm(); + array.reserve(norms.size()); + // array.insert() failed to work with Eigen iterators + for (auto n : norms) + array.push_back(n); + } +}; + +struct LinfDistance : LpDistance +{ + static inline String name = "Linf"; +}; + +struct CosineDistance +{ + static inline String name = "Cosine"; + template + static void compute(const Eigen::MatrixX & left, const Eigen::MatrixX & right, PaddedPODArray & array) + { + auto prod = left.cwiseProduct(right).colwise().sum(); + auto nx = left.colwise().norm(); + auto ny = right.colwise().norm(); + auto nm = nx.cwiseProduct(ny).cwiseInverse(); + auto dist = 1.0 - prod.cwiseProduct(nm).array(); + array.reserve(dist.size()); + for (auto d : dist) + array.push_back(d); + } +}; + +template +class FunctionArrayDistance : public IFunction +{ +public: + static inline auto name = "array" + Kernel::name + "Distance"; + String getName() const override { return name; } + static FunctionPtr create(ContextPtr) { return std::make_shared>(); } + size_t getNumberOfArguments() const override { return 2; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + DataTypes types; + for (const auto & argument : arguments) + { + const auto * array_type = checkAndGetDataType(argument.type.get()); + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument of function {} must be array.", getName()); + + types.push_back(array_type->getNestedType()); + } + const auto & common_type = getLeastSupertype(types); + switch (common_type->getTypeId()) + { + case TypeIndex::UInt8: + case TypeIndex::UInt16: + case TypeIndex::UInt32: + case TypeIndex::Int8: + case TypeIndex::Int16: + case TypeIndex::Int32: + case TypeIndex::Float32: + return std::make_shared(); + case TypeIndex::UInt64: + case TypeIndex::Int64: + case TypeIndex::Float64: + return std::make_shared(); + default: + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments of function {} has nested type {}. " + "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", + getName(), common_type->getName()); + } + } + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + { + DataTypePtr type_x = typeid_cast(arguments[0].type.get())->getNestedType(); + DataTypePtr type_y = typeid_cast(arguments[1].type.get())->getNestedType(); + + ColumnPtr col_x = arguments[0].column->convertToFullColumnIfConst(); + ColumnPtr col_y = arguments[1].column->convertToFullColumnIfConst(); + + const auto * arr_x = assert_cast(col_x.get()); + const auto * arr_y = assert_cast(col_y.get()); + + auto result = result_type->createColumn(); + switch (result_type->getTypeId()) + { + case TypeIndex::Float32: + executeWithType(*arr_x, *arr_y, type_x, type_y, result); + break; + case TypeIndex::Float64: + executeWithType(*arr_x, *arr_y, type_x, type_y, result); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type."); + } + return result; + } + +private: + template + void executeWithType( + const ColumnArray & array_x, + const ColumnArray & array_y, + const DataTypePtr & type_x, + const DataTypePtr & type_y, + MutableColumnPtr & column) const + { + Eigen::MatrixX mx, my; + columnToMatrix(array_x, type_x, mx); + columnToMatrix(array_y, type_y, my); + + if (mx.rows() && my.rows() && mx.rows() != my.rows()) + { + throw Exception( + ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + "Arguments of function {} have different array sizes: {} and {}", + getName(), mx.rows(), my.rows()); + } + auto & data = assert_cast &>(*column).getData(); + Kernel::compute(mx, my, data); + } + + template + void columnToMatrix(const ColumnArray & array, const DataTypePtr & nested_type, Eigen::MatrixX & mat) const + { + const auto & offsets = array.getOffsets(); + size_t cols = offsets.size(); + size_t rows = cols > 0 ? offsets.front() : 0; + + ColumnArray::Offset prev = 0; + for (ColumnArray::Offset off : offsets) + { + if (off - prev != rows) + throw Exception( + ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + "Arrays in a column passed to function {} have different sizes: {} and {}", + getName(), rows, off - prev); + prev = off; + } + + switch (nested_type->getTypeId()) + { + case TypeIndex::UInt8: + fillMatrix(mat, array, rows, cols); + break; + case TypeIndex::UInt16: + fillMatrix(mat, array, rows, cols); + break; + case TypeIndex::UInt32: + fillMatrix(mat, array, rows, cols); + break; + case TypeIndex::UInt64: + fillMatrix(mat, array, rows, cols); + break; + case TypeIndex::Int8: + fillMatrix(mat, array, rows, cols); + break; + case TypeIndex::Int16: + fillMatrix(mat, array, rows, cols); + break; + case TypeIndex::Int32: + fillMatrix(mat, array, rows, cols); + break; + case TypeIndex::Int64: + fillMatrix(mat, array, rows, cols); + break; + case TypeIndex::Float32: + fillMatrix(mat, array, rows, cols); + break; + case TypeIndex::Float64: + fillMatrix(mat, array, rows, cols); + break; + default: + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments of function {} has nested type {}. " + "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", + getName(), nested_type->getName()); + } + } + + // optimize for float/ double + template + requires std::is_same_v + void fillMatrix(Eigen::MatrixX & mat, const ColumnArray & array, size_t rows, size_t cols) const + { + const auto & data = typeid_cast &>(array.getData()).getData(); + mat = Eigen::Map>(data.data(), rows, cols); + } + + template + void fillMatrix(Eigen::MatrixX & mat, const ColumnArray & array, size_t rows, size_t cols) const + { + const auto & data = typeid_cast &>(array.getData()).getData(); + mat.resize(rows, cols); + for (size_t col = 0; col < cols; ++col) + { + for (size_t row = 0; row < rows; ++row) + { + size_t off = col * rows; + mat(row, col) = static_cast(data[off + row]); + } + } + } +}; + +void registerFunctionArrayDistance(FunctionFactory & factory) +{ + factory.registerFunction>>(); + factory.registerFunction>>(); + factory.registerFunction>(); + factory.registerFunction>(); +} + +} diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp new file mode 100644 index 00000000000..20fe85d7491 --- /dev/null +++ b/src/Functions/array/arrayNorm.cpp @@ -0,0 +1,205 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; +} + +template +struct LpNorm +{ + static inline String name = "L" + std::to_string(N); + template + static void compute(const std::vector> & vec, PaddedPODArray & array) + { + array.reserve(vec.size()); + for (const auto & v : vec) + { + array.push_back(v.template lpNorm()); + } + } +}; + +struct LinfNorm : LpNorm +{ + static inline String name = "Linf"; +}; + +template +class FunctionArrayNorm : public IFunction +{ +public: + static inline auto name = "array" + Kernel::name + "Norm"; + String getName() const override { return name; } + static FunctionPtr create(ContextPtr) { return std::make_shared>(); } + size_t getNumberOfArguments() const override { return 1; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + DataTypes types; + for (const auto & argument : arguments) + { + const auto * array_type = checkAndGetDataType(argument.type.get()); + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument of function {} must be array.", getName()); + + types.push_back(array_type->getNestedType()); + } + const auto & common_type = getLeastSupertype(types); + switch (common_type->getTypeId()) + { + case TypeIndex::UInt8: + case TypeIndex::UInt16: + case TypeIndex::UInt32: + case TypeIndex::Int8: + case TypeIndex::Int16: + case TypeIndex::Int32: + case TypeIndex::Float32: + return std::make_shared(); + case TypeIndex::UInt64: + case TypeIndex::Int64: + case TypeIndex::Float64: + return std::make_shared(); + default: + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments of function {} has nested type {}. " + "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", + getName(), common_type->getName()); + } + } + + ColumnPtr + executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override + { + DataTypePtr type = typeid_cast(arguments[0].type.get())->getNestedType(); + ColumnPtr column = arguments[0].column->convertToFullColumnIfConst(); + const auto * arr = assert_cast(column.get()); + + auto result = result_type->createColumn(); + switch (result_type->getTypeId()) + { + case TypeIndex::Float32: + executeWithType(*arr, type, result); + break; + case TypeIndex::Float64: + executeWithType(*arr, type, result); + break; + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type."); + } + return result; + } + +private: + template + void executeWithType(const ColumnArray & array, const DataTypePtr & type, MutableColumnPtr & column) const + { + std::vector> vec; + columnToVectors(array, type, vec); + auto & data = assert_cast &>(*column).getData(); + Kernel::compute(vec, data); + } + + template + void columnToVectors(const ColumnArray & array, const DataTypePtr & nested_type, std::vector> & vec) const + { + switch (nested_type->getTypeId()) + { + case TypeIndex::UInt8: + fillVectors(vec, array); + break; + case TypeIndex::UInt16: + fillVectors(vec, array); + break; + case TypeIndex::UInt32: + fillVectors(vec, array); + break; + case TypeIndex::UInt64: + fillVectors(vec, array); + break; + case TypeIndex::Int8: + fillVectors(vec, array); + break; + case TypeIndex::Int16: + fillVectors(vec, array); + break; + case TypeIndex::Int32: + fillVectors(vec, array); + break; + case TypeIndex::Int64: + fillVectors(vec, array); + break; + case TypeIndex::Float32: + fillVectors(vec, array); + break; + case TypeIndex::Float64: + fillVectors(vec, array); + break; + default: + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Arguments of function {} has nested type {}. " + "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.", + getName(), nested_type->getName()); + } + } + + template + requires std::is_same_v + void fillVectors(std::vector> & vec, const ColumnArray & array) const + { + const auto & data = typeid_cast &>(array.getData()).getData(); + const auto & offsets = array.getOffsets(); + vec.reserve(offsets.size()); + ColumnArray::Offset prev = 0; + for (auto off : offsets) + { + vec.emplace_back(Eigen::Map>(data.data() + prev, off - prev)); + prev = off; + } + } + + template + void fillVectors(std::vector> & vec, const ColumnArray & array) const + { + const auto & data = typeid_cast &>(array.getData()).getData(); + const auto & offsets = array.getOffsets(); + vec.reserve(offsets.size()); + + ColumnArray::Offset prev = 0; + for (auto off : offsets) + { + Eigen::VectorX mat(off - prev); + for (ColumnArray::Offset row = 0; row + prev < off; ++row) + { + mat[row] = static_cast(data[prev + row]); + } + prev = off; + vec.emplace_back(mat); + } + } +}; + +void registerFunctionArrayNorm(FunctionFactory & factory) +{ + factory.registerFunction>>(); + factory.registerFunction>>(); + factory.registerFunction>(); +} + +} diff --git a/src/Functions/array/registerFunctionsArray.cpp b/src/Functions/array/registerFunctionsArray.cpp index 3bb27cbadf9..e2e8b08fbf2 100644 --- a/src/Functions/array/registerFunctionsArray.cpp +++ b/src/Functions/array/registerFunctionsArray.cpp @@ -37,6 +37,8 @@ void registerFunctionArrayAUC(FunctionFactory &); void registerFunctionArrayReduceInRanges(FunctionFactory &); void registerFunctionMapOp(FunctionFactory &); void registerFunctionMapPopulateSeries(FunctionFactory &); +void registerFunctionArrayDistance(FunctionFactory &); +void registerFunctionArrayNorm(FunctionFactory &); void registerFunctionsArray(FunctionFactory & factory) { @@ -75,6 +77,8 @@ void registerFunctionsArray(FunctionFactory & factory) registerFunctionArrayAUC(factory); registerFunctionMapOp(factory); registerFunctionMapPopulateSeries(factory); + registerFunctionArrayDistance(factory); + registerFunctionArrayNorm(factory); } } diff --git a/src/Functions/h3kRing.cpp b/src/Functions/h3kRing.cpp index a801fd299d6..a68f2a5e23d 100644 --- a/src/Functions/h3kRing.cpp +++ b/src/Functions/h3kRing.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -51,10 +52,10 @@ public: arg->getName(), 1, getName()); arg = arguments[1].get(); - if (!WhichDataType(arg).isUInt16()) + if (!WhichDataType(arg).isNativeUInt()) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of argument {} of function {}. Must be UInt16", + "Illegal type {} of argument {} of function {}. Must be unsigned native integer.", arg->getName(), 2, getName()); @@ -80,7 +81,8 @@ public: const auto & data_hindex = col_hindex->getData(); /// ColumnUInt16 is sufficient as the max value of 2nd arg is checked (arg > 0 < 10000) in implementation below - const auto * col_k = checkAndGetColumn(non_const_arguments[1].column.get()); + auto cast_result = castColumnAccurate(non_const_arguments[1], std::make_shared()); + const auto * col_k = checkAndGetColumn(cast_result.get()); if (!col_k) throw Exception( ErrorCodes::ILLEGAL_COLUMN, diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 9f53572fcde..7ed2f343209 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -74,8 +74,12 @@ namespace if (https) { #if USE_SSL - /// Cannot resolve host in advance, otherwise SNI won't work in Poco. - session = std::make_shared(host, port); + String resolved_host = resolve_host ? DNSResolver::instance().resolveHost(host).toString() : host; + auto https_session = std::make_shared(host, port); + if (resolve_host) + https_session->setResolvedHost(DNSResolver::instance().resolveHost(host).toString()); + + session = std::move(https_session); #else throw Exception("ClickHouse was built without HTTPS support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); #endif diff --git a/src/IO/ReadBufferFromFile.h b/src/IO/ReadBufferFromFile.h index 52b18b94616..a720f8dd36d 100644 --- a/src/IO/ReadBufferFromFile.h +++ b/src/IO/ReadBufferFromFile.h @@ -50,8 +50,6 @@ public: return file_name; } - Range getRemainingReadRange() const override { return Range{ .left = file_offset_of_buffer_end, .right = std::nullopt }; } - size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; } }; diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index ba1502fb9aa..a1d19c08087 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -49,6 +49,8 @@ public: return file_offset_of_buffer_end - (working_buffer.end() - pos); } + Range getRemainingReadRange() const override { return Range{ .left = file_offset_of_buffer_end, .right = std::nullopt }; } + /// If 'offset' is small enough to stay in buffer after seek, then true seek in file does not happen. off_t seek(off_t off, int whence) override; diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index 6a154c60987..52bddc57b48 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -57,7 +57,7 @@ struct ReadSettings /// Method to use reading from local filesystem. LocalFSReadMethod local_fs_method = LocalFSReadMethod::pread; /// Method to use reading from remote filesystem. - RemoteFSReadMethod remote_fs_method = RemoteFSReadMethod::read; + RemoteFSReadMethod remote_fs_method = RemoteFSReadMethod::threadpool; size_t local_fs_buffer_size = DBMS_DEFAULT_BUFFER_SIZE; size_t remote_fs_buffer_size = DBMS_DEFAULT_BUFFER_SIZE; diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 9aacbda3fbf..0dcc87e6c0a 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -37,6 +37,11 @@ namespace ProfileEvents extern const Event S3WriteRequestsRedirects; } +namespace CurrentMetrics +{ + extern const Metric S3Requests; +} + namespace DB::ErrorCodes { extern const int NOT_IMPLEMENTED; @@ -160,6 +165,7 @@ void PocoHTTPClient::makeRequestInternal( }; ProfileEvents::increment(select_metric(S3MetricType::Count)); + CurrentMetrics::Increment metric_increment{CurrentMetrics::S3Requests}; try { diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 342a512ee52..c25eed0e6c5 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -21,7 +21,7 @@ namespace ProfileEvents { extern const Event WriteBufferFromS3Bytes; - extern const Event RemoteFSCacheDownloadBytes; + extern const Event CachedReadBufferCacheWriteBytes; } namespace DB @@ -490,7 +490,7 @@ void WriteBufferFromS3::finalizeCacheIfNeeded(std::optional size_t size = (*file_segment_it)->finalizeWrite(); file_segment_it = file_segments.erase(file_segment_it); - ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, size); + ProfileEvents::increment(ProfileEvents::CachedReadBufferCacheWriteBytes, size); } catch (...) { diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index f4e49d3230d..f6ba9f95bbc 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -16,25 +16,27 @@ #include #include #include +#include #include + namespace DB { + namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int SET_SIZE_LIMIT_EXCEEDED; extern const int BAD_ARGUMENTS; } -namespace JoinStuff -{ + ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_) : context(context_) , table_join(table_join_) , slots(slots_) { - if (!slots_ || slots_ >= 256) + if (slots < 1 || 255 < slots) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid argument slot : {}", slots_); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of slots should be [1, 255], got {}", slots); } for (size_t i = 0; i < slots; ++i) @@ -43,36 +45,44 @@ ConcurrentHashJoin::ConcurrentHashJoin(ContextPtr context_, std::shared_ptrdata = std::make_unique(table_join_, right_sample_block, any_take_last_row_); hash_joins.emplace_back(std::move(inner_hash_join)); } - } -bool ConcurrentHashJoin::addJoinedBlock(const Block & block, bool check_limits) +bool ConcurrentHashJoin::addJoinedBlock(const Block & right_block, bool check_limits) { - Blocks dispatched_blocks = dispatchBlock(table_join->getOnlyClause().key_names_right, block); + Blocks dispatched_blocks = dispatchBlock(table_join->getOnlyClause().key_names_right, right_block); - std::list pending_blocks; - for (size_t i = 0; i < dispatched_blocks.size(); ++i) - pending_blocks.emplace_back(i); - while (!pending_blocks.empty()) + size_t blocks_left = 0; + for (const auto & block : dispatched_blocks) { - for (auto iter = pending_blocks.begin(); iter != pending_blocks.end();) + if (block) + { + ++blocks_left; + } + } + + while (blocks_left > 0) + { + /// insert blocks into corresponding HashJoin instances + for (size_t i = 0; i < dispatched_blocks.size(); ++i) { - auto & i = *iter; auto & hash_join = hash_joins[i]; auto & dispatched_block = dispatched_blocks[i]; - if (hash_join->mutex.try_lock()) - { - if (!hash_join->data->addJoinedBlock(dispatched_block, check_limits)) - { - hash_join->mutex.unlock(); - return false; - } - hash_join->mutex.unlock(); - iter = pending_blocks.erase(iter); + if (dispatched_block) + { + /// if current hash_join is already processed by another thread, skip it and try later + std::unique_lock lock(hash_join->mutex, std::try_to_lock); + if (!lock.owns_lock()) + continue; + + bool limit_exceeded = !hash_join->data->addJoinedBlock(dispatched_block, check_limits); + + dispatched_block = {}; + blocks_left--; + + if (limit_exceeded) + return false; } - else - iter++; } } @@ -161,30 +171,32 @@ std::shared_ptr ConcurrentHashJoin::getNonJoinedBlocks( throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid join type. join kind: {}, strictness: {}", table_join->kind(), table_join->strictness()); } +static IColumn::Selector hashToSelector(const WeakHash32 & hash, size_t num_shards) +{ + const auto & data = hash.getData(); + size_t num_rows = data.size(); + + IColumn::Selector selector(num_rows); + for (size_t i = 0; i < num_rows; ++i) + selector[i] = data[i] % num_shards; + return selector; +} + Blocks ConcurrentHashJoin::dispatchBlock(const Strings & key_columns_names, const Block & from_block) { - Blocks result; - size_t num_shards = hash_joins.size(); size_t num_rows = from_block.rows(); size_t num_cols = from_block.columns(); - ColumnRawPtrs key_cols; + WeakHash32 hash(num_rows); for (const auto & key_name : key_columns_names) { - key_cols.push_back(from_block.getByName(key_name).column.get()); - } - IColumn::Selector selector(num_rows); - for (size_t i = 0; i < num_rows; ++i) - { - SipHash hash; - for (const auto & key_col : key_cols) - { - key_col->updateHashWithValue(i, hash); - } - selector[i] = hash.get64() % num_shards; + const auto & key_col = from_block.getByName(key_name).column; + key_col->updateWeakHash32(hash); } + auto selector = hashToSelector(hash, num_shards); + Blocks result; for (size_t i = 0; i < num_shards; ++i) { result.emplace_back(from_block.cloneEmpty()); @@ -203,4 +215,3 @@ Blocks ConcurrentHashJoin::dispatchBlock(const Strings & key_columns_names, cons } } -} diff --git a/src/Interpreters/ConcurrentHashJoin.h b/src/Interpreters/ConcurrentHashJoin.h index 47fa2b2112f..fb226c39a0c 100644 --- a/src/Interpreters/ConcurrentHashJoin.h +++ b/src/Interpreters/ConcurrentHashJoin.h @@ -15,8 +15,7 @@ namespace DB { -namespace JoinStuff -{ + /** * Can run addJoinedBlock() parallelly to speedup the join process. On test, it almose linear speedup by * the degree of parallelism. @@ -33,6 +32,7 @@ namespace JoinStuff */ class ConcurrentHashJoin : public IJoin { + public: explicit ConcurrentHashJoin(ContextPtr context_, std::shared_ptr table_join_, size_t slots_, const Block & right_sample_block, bool any_take_last_row_ = false); ~ConcurrentHashJoin() override = default; @@ -49,6 +49,7 @@ public: bool supportParallelJoin() const override { return true; } std::shared_ptr getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override; + private: struct InternalHashJoin { @@ -71,5 +72,5 @@ private: Blocks dispatchBlock(const Strings & key_columns_names, const Block & from_block); }; -} + } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 1bd3f92a5b7..34f396b978c 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -535,6 +535,7 @@ ContextMutablePtr Context::createCopy(const ContextMutablePtr & other) Context::~Context() = default; InterserverIOHandler & Context::getInterserverIOHandler() { return shared->interserver_io_handler; } +const InterserverIOHandler & Context::getInterserverIOHandler() const { return shared->interserver_io_handler; } std::unique_lock Context::getLock() const { @@ -2226,7 +2227,7 @@ bool Context::hasAuxiliaryZooKeeper(const String & name) const return getConfigRef().has("auxiliary_zookeepers." + name); } -InterserverCredentialsPtr Context::getInterserverCredentials() +InterserverCredentialsPtr Context::getInterserverCredentials() const { return shared->interserver_io_credentials.get(); } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index dbddda39aad..ddc474ca347 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -612,6 +612,7 @@ public: OutputFormatPtr getOutputFormatParallelIfPossible(const String & name, WriteBuffer & buf, const Block & sample) const; InterserverIOHandler & getInterserverIOHandler(); + const InterserverIOHandler & getInterserverIOHandler() const; /// How other servers can access this for downloading replicated data. void setInterserverIOAddress(const String & host, UInt16 port); @@ -619,7 +620,7 @@ public: /// Credentials which server will use to communicate with others void updateInterserverCredentials(const Poco::Util::AbstractConfiguration & config); - InterserverCredentialsPtr getInterserverCredentials(); + InterserverCredentialsPtr getInterserverCredentials() const; /// Interserver requests scheme (http or https) void setInterserverScheme(const String & scheme); diff --git a/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp b/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp new file mode 100644 index 00000000000..7cad19fbf74 --- /dev/null +++ b/src/Interpreters/ConvertFunctionOrLikeVisitor.cpp @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +void ConvertFunctionOrLikeData::visit(ASTFunction & function, ASTPtr &) +{ + if (function.name != "or") + return; + + std::unordered_map> identifier_to_literals; + for (auto & child : function.children) + { + if (auto expr_list_fn = child->as()) + { + ASTs unique_elems; + for (const auto & child_expr_fn : expr_list_fn->children) + { + unique_elems.push_back(child_expr_fn); + if (const auto * child_fn = child_expr_fn->as()) + { + const bool is_like = child_fn->name == "like"; + const bool is_ilike = child_fn->name == "ilike"; + + /// Not {i}like -> bail out. + if (!is_like && !is_ilike) + continue; + + const auto & arguments = child_fn->arguments->children; + + /// They should have 2 arguments. + if (arguments.size() != 2) + continue; + + /// Second one is string literal. + auto identifier = arguments[0]; + auto literal = arguments[1]->as(); + if (!identifier || !literal || literal->value.getType() != Field::Types::String) + continue; + + String regexp = likePatternToRegexp(literal->value.get()); + /// Case insensitive. Works with UTF-8 as well. + if (is_ilike) + regexp = "(?i)" + regexp; + + unique_elems.pop_back(); + auto it = identifier_to_literals.find(identifier); + if (it == identifier_to_literals.end()) + { + it = identifier_to_literals.insert({identifier, std::make_shared(Field{Array{}})}).first; + auto match = makeASTFunction("multiMatchAny"); + match->arguments->children.push_back(arguments[0]); + match->arguments->children.push_back(it->second); + unique_elems.push_back(std::move(match)); + } + it->second->value.get().push_back(regexp); + } + } + + /// OR must have at least two arguments. + if (unique_elems.size() == 1) + unique_elems.push_back(std::make_shared(Field(false))); + + expr_list_fn->children = std::move(unique_elems); + } + } +} + +} diff --git a/src/Interpreters/ConvertFunctionOrLikeVisitor.h b/src/Interpreters/ConvertFunctionOrLikeVisitor.h new file mode 100644 index 00000000000..ba4a0073448 --- /dev/null +++ b/src/Interpreters/ConvertFunctionOrLikeVisitor.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class ASTFunction; + +/// Replaces all the "or"'s with {i}like to multiMatchAny +class ConvertFunctionOrLikeData +{ +public: + using TypeToVisit = ASTFunction; + + void visit(ASTFunction & function, ASTPtr & ast); +}; + +using ConvertFunctionOrLikeVisitor = InDepthNodeVisitor, true>; + +} diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 37965f11c83..e7325363c08 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1020,7 +1020,7 @@ static std::shared_ptr chooseJoinAlgorithm(std::shared_ptr ana { if (analyzed_join->allowParallelHashJoin()) { - return std::make_shared(context, analyzed_join, context->getSettings().max_threads, sample_block); + return std::make_shared(context, analyzed_join, context->getSettings().max_threads, sample_block); } return std::make_shared(analyzed_join, sample_block); } diff --git a/src/Interpreters/FilesystemCacheLog.cpp b/src/Interpreters/FilesystemCacheLog.cpp index a8470ca15ba..609305321b1 100644 --- a/src/Interpreters/FilesystemCacheLog.cpp +++ b/src/Interpreters/FilesystemCacheLog.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include @@ -34,10 +36,13 @@ NamesAndTypesList FilesystemCacheLogElement::getNamesAndTypes() {"event_time", std::make_shared()}, {"query_id", std::make_shared()}, {"source_file_path", std::make_shared()}, - {"file_segment_range", std::make_shared(std::move(types))}, + {"file_segment_range", std::make_shared(types)}, + {"total_requested_range", std::make_shared(types)}, {"size", std::make_shared()}, {"read_type", std::make_shared()}, {"cache_attempted", std::make_shared()}, + {"ProfileEvents", std::make_shared(std::make_shared(), std::make_shared())}, + {"read_buffer_id", std::make_shared()}, }; } @@ -52,9 +57,22 @@ void FilesystemCacheLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(source_file_path); columns[i++]->insert(Tuple{file_segment_range.first, file_segment_range.second}); + columns[i++]->insert(Tuple{requested_range.first, requested_range.second}); columns[i++]->insert(file_segment_size); columns[i++]->insert(typeToString(read_type)); columns[i++]->insert(cache_attempted); + + if (profile_counters) + { + auto * column = columns[i++].get(); + ProfileEvents::dumpToMapColumn(*profile_counters, column, true); + } + else + { + columns[i++]->insertDefault(); + } + + columns[i++]->insert(read_buffer_id); } }; diff --git a/src/Interpreters/FilesystemCacheLog.h b/src/Interpreters/FilesystemCacheLog.h index bef5a82d39b..77bae7d788a 100644 --- a/src/Interpreters/FilesystemCacheLog.h +++ b/src/Interpreters/FilesystemCacheLog.h @@ -37,9 +37,12 @@ struct FilesystemCacheLogElement String source_file_path; std::pair file_segment_range{}; + std::pair requested_range{}; ReadType read_type{}; size_t file_segment_size; bool cache_attempted; + String read_buffer_id; + std::shared_ptr profile_counters; static std::string name() { return "FilesystemCacheLog"; } diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index b52645c7854..0db6f353cf4 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -312,12 +312,12 @@ BlockIO InterpreterSystemQuery::execute() { auto caches = FileCacheFactory::instance().getAll(); for (const auto & [_, cache_data] : caches) - cache_data.cache->remove(query.force_removal); + cache_data.cache->remove(); } else { auto cache = FileCacheFactory::instance().get(query.filesystem_cache_path); - cache->remove(query.force_removal); + cache->remove(); } break; } diff --git a/src/Interpreters/InterserverIOHandler.h b/src/Interpreters/InterserverIOHandler.h index 7ad1c01cbf1..69b742db2ec 100644 --- a/src/Interpreters/InterserverIOHandler.h +++ b/src/Interpreters/InterserverIOHandler.h @@ -69,7 +69,7 @@ public: return endpoint_map.erase(name); } - InterserverIOEndpointPtr getEndpoint(const String & name) + InterserverIOEndpointPtr getEndpoint(const String & name) const try { std::lock_guard lock(mutex); @@ -84,7 +84,7 @@ private: using EndpointMap = std::map; EndpointMap endpoint_map; - std::mutex mutex; + mutable std::mutex mutex; }; } diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp index 991b449196d..18ca70c7c09 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -39,10 +39,8 @@ bool shardContains( const std::string & sharding_column_name, const OptimizeShardingKeyRewriteInMatcher::Data & data) { - UInt64 field_value; - /// Convert value to numeric (if required). - if (!sharding_column_value.tryGet(field_value)) - sharding_column_value = convertFieldToType(sharding_column_value, *data.sharding_key_type); + /// Implicit conversion. + sharding_column_value = convertFieldToType(sharding_column_value, *data.sharding_key_type); /// NULL is not allowed in sharding key, /// so it should be safe to assume that shard cannot contain it. diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index ac59d2c7235..6c101143234 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -212,7 +212,7 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as /// Set query-level memory trackers thread_group->memory_tracker.setOrRaiseHardLimit(settings.max_memory_usage); - thread_group->memory_tracker.setSoftLimit(settings.max_guaranteed_memory_usage); + thread_group->memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); if (query_context->hasTraceCollector()) { @@ -242,7 +242,7 @@ ProcessList::EntryPtr ProcessList::insert(const String & query_, const IAST * as /// Track memory usage for all simultaneously running queries from single user. user_process_list.user_memory_tracker.setOrRaiseHardLimit(settings.max_memory_usage_for_user); - user_process_list.user_memory_tracker.setSoftLimit(settings.max_guaranteed_memory_usage_for_user); + user_process_list.user_memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator_for_user); user_process_list.user_memory_tracker.setDescription("(for user)"); user_process_list.user_overcommit_tracker.setMaxWaitTime(settings.memory_usage_overcommit_max_wait_microseconds); diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index b8a31dd968b..537c18beaa1 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -735,6 +736,12 @@ void optimizeFuseQuantileFunctions(ASTPtr & query) } } +void optimizeOrLikeChain(ASTPtr & query) +{ + ConvertFunctionOrLikeVisitor::Data data = {}; + ConvertFunctionOrLikeVisitor(data).visit(query); +} + } void TreeOptimizer::optimizeIf(ASTPtr & query, Aliases & aliases, bool if_chain_to_multiif) @@ -847,6 +854,14 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, if (settings.optimize_syntax_fuse_functions) optimizeFuseQuantileFunctions(query); + + if (settings.optimize_or_like_chain + && settings.allow_hyperscan + && settings.max_hyperscan_regexp_length == 0 + && settings.max_hyperscan_regexp_total_length == 0) + { + optimizeOrLikeChain(query); + } } } diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index 8a7891edfe1..a6ff52b74b7 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -200,8 +200,6 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, { if (!filesystem_cache_path.empty()) settings.ostr << (settings.hilite ? hilite_none : "") << " " << filesystem_cache_path; - if (force_removal) - settings.ostr << (settings.hilite ? hilite_keyword : "") << " FORCE"; } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index 99be7b709bc..eff71a3a9a0 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -91,9 +91,7 @@ public: String disk; UInt64 seconds{}; - /// Values for `drop filesystem cache` system query. String filesystem_cache_path; - bool force_removal = false; String getID(char) const override { return "SYSTEM query"; } diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 599de6ec828..e18b0aa5e10 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -360,8 +360,6 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ASTPtr ast; if (path_parser.parse(pos, ast, expected)) res->filesystem_cache_path = ast->as()->value.safeGet(); - if (ParserKeyword{"FORCE"}.ignore(pos, expected)) - res->force_removal = true; break; } diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index 5e074861110..012a825a9d5 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -347,7 +347,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelines( /// ╞> FillingJoin ─> Resize ╣ ╞> Joining ─> (totals) /// (totals) ─────────┘ ╙─────┘ - auto num_streams = left->getNumStreams(); + size_t num_streams = left->getNumStreams(); if (join->supportParallelJoin() && !right->hasTotals()) { diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 87be99aa246..4b8860aa51d 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -81,7 +81,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() if (!source_part_or_covering) { /// We do not have one of source parts locally, try to take some already merged part from someone. - LOG_DEBUG(log, "Don't have all parts for merge {}; will try to fetch it instead", entry.new_part_name); + LOG_DEBUG(log, "Don't have all parts (at least part {} is missing) for merge {}; will try to fetch it instead", source_part_name, entry.new_part_name); return PrepareResult{ .prepared_successfully = false, .need_to_check_missing_part_in_fetch = true, diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index 11d0bc8c565..8722ddc5a82 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -87,7 +87,7 @@ MergeListElement::MergeListElement( /// thread_group::memory_tracker, but MemoryTrackerThreadSwitcher will reset parent). memory_tracker.setProfilerStep(settings.memory_profiler_step); memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); - memory_tracker.setSoftLimit(settings.max_guaranteed_memory_usage); + memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); if (settings.memory_tracker_fault_probability) memory_tracker.setFaultProbability(settings.memory_tracker_fault_probability); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 7733562ca7d..53397106a56 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -155,19 +155,21 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP { auto part_on_replica_info = MergeTreePartInfo::fromPartName(part_on_replica, storage.format_version); + /// All three following cases are "good" outcome for check thread and don't require + /// any special attention. if (part_info == part_on_replica_info) { /// Found missing part at ourself. If we are here then something wrong with this part, so skipping. if (replica_path == storage.replica_path) continue; - LOG_WARNING(log, "Found the missing part {} at {} on {}", part_name, part_on_replica, replica); + LOG_INFO(log, "Found the missing part {} at {} on {}", part_name, part_on_replica, replica); return MissingPartSearchResult::FoundAndNeedFetch; } if (part_on_replica_info.contains(part_info)) { - LOG_WARNING(log, "Found part {} on {} that covers the missing part {}", part_on_replica, replica, part_name); + LOG_INFO(log, "Found part {} on {} that covers the missing part {}", part_on_replica, replica, part_name); return MissingPartSearchResult::FoundAndDontNeedFetch; } @@ -181,7 +183,7 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP if (found_part_with_the_same_min_block && found_part_with_the_same_max_block) { /// FIXME It may never appear - LOG_WARNING(log, "Found parts with the same min block and with the same max block as the missing part {} on replica {}. Hoping that it will eventually appear as a result of a merge.", part_name, replica); + LOG_INFO(log, "Found parts with the same min block and with the same max block as the missing part {} on replica {}. Hoping that it will eventually appear as a result of a merge.", part_name, replica); return MissingPartSearchResult::FoundAndDontNeedFetch; } } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 1319c864b7b..13c6fca5163 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -37,6 +37,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include #include @@ -927,6 +932,76 @@ void StorageWindowView::threadFuncFireEvent() } } +Pipe StorageWindowView::read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + const size_t max_block_size, + const unsigned num_streams) +{ + QueryPlan plan; + read(plan, column_names, storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); + return plan.convertToPipe( + QueryPlanOptimizationSettings::fromContext(local_context), BuildQueryPipelineSettings::fromContext(local_context)); +} + +void StorageWindowView::read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr local_context, + QueryProcessingStage::Enum processed_stage, + const size_t max_block_size, + const unsigned num_streams) +{ + if (target_table_id.empty()) + return; + + auto storage = getTargetStorage(); + auto lock = storage->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout); + auto target_metadata_snapshot = storage->getInMemoryMetadataPtr(); + auto target_storage_snapshot = storage->getStorageSnapshot(target_metadata_snapshot, local_context); + + if (query_info.order_optimizer) + query_info.input_order_info = query_info.order_optimizer->getInputOrder(target_metadata_snapshot, local_context); + + storage->read(query_plan, column_names, target_storage_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); + + if (query_plan.isInitialized()) + { + auto wv_header = getHeaderForProcessingStage(column_names, storage_snapshot, query_info, local_context, processed_stage); + auto target_header = query_plan.getCurrentDataStream().header; + + if (!blocksHaveEqualStructure(wv_header, target_header)) + { + auto converting_actions = ActionsDAG::makeConvertingActions( + target_header.getColumnsWithTypeAndName(), wv_header.getColumnsWithTypeAndName(), ActionsDAG::MatchColumnsMode::Name); + auto converting_step = std::make_unique(query_plan.getCurrentDataStream(), converting_actions); + converting_step->setStepDescription("Convert Target table structure to WindowView structure"); + query_plan.addStep(std::move(converting_step)); + } + + StreamLocalLimits limits; + SizeLimits leaf_limits; + + /// Add table lock for target table. + auto adding_limits_and_quota = std::make_unique( + query_plan.getCurrentDataStream(), + storage, + std::move(lock), + limits, + leaf_limits, + nullptr, + nullptr); + + adding_limits_and_quota->setStepDescription("Lock target table for WindowView"); + query_plan.addStep(std::move(adding_limits_and_quota)); + } +} + Pipe StorageWindowView::watch( const Names & /*column_names*/, const SelectQueryInfo & query_info, @@ -1316,6 +1391,18 @@ void StorageWindowView::writeIntoWindowView( auto metadata_snapshot = inner_storage->getInMemoryMetadataPtr(); auto output = inner_storage->write(window_view.getMergeableQuery(), metadata_snapshot, local_context); + if (!blocksHaveEqualStructure(builder.getHeader(), output->getHeader())) + { + auto convert_actions_dag = ActionsDAG::makeConvertingActions( + builder.getHeader().getColumnsWithTypeAndName(), + output->getHeader().getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Name); + auto convert_actions = std::make_shared( + convert_actions_dag, ExpressionActionsSettings::fromContext(local_context, CompileExpressions::yes)); + + builder.addSimpleTransform([&](const Block & header) { return std::make_shared(header, convert_actions); }); + } + builder.addChain(Chain(std::move(output))); builder.setSinks([&](const Block & cur_header, Pipe::StreamType) { diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 782e8f2b899..101d29d1ae7 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -137,6 +137,25 @@ public: void startup() override; void shutdown() override; + Pipe read( + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; + + void read( + QueryPlan & query_plan, + const Names & column_names, + const StorageSnapshotPtr & storage_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; + Pipe watch( const Names & column_names, const SelectQueryInfo & query_info, diff --git a/src/Storages/WindowView/WindowViewSource.h b/src/Storages/WindowView/WindowViewSource.h index a726cdc8712..7b914933035 100644 --- a/src/Storages/WindowView/WindowViewSource.h +++ b/src/Storages/WindowView/WindowViewSource.h @@ -51,6 +51,8 @@ protected: Block block; UInt32 watermark; std::tie(block, watermark) = generateImpl(); + if (!block) + return Chunk(); if (is_events) { return Chunk( diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 6a3615fc5db..b73bf057393 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -254,6 +254,9 @@ def main(): logging.info("Got version from repo %s", version.string) official_flag = pr_info.number == 0 + if "official" in build_config: + official_flag = build_config["official"] + version_type = "testing" if "release" in pr_info.labels or "release-lts" in pr_info.labels: version_type = "stable" diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index c9915c1c7f4..1a070c781d4 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -37,6 +37,7 @@ CI_CONFIG = { "splitted": "unsplitted", "tidy": "disable", "with_coverage": False, + "official": False, }, # FIXME update to gcc-12 and turn on # "binary_gcc": { @@ -191,6 +192,7 @@ CI_CONFIG = { "build_type": "", "sanitizer": "", "package_type": "binary", + "static_binary_name": "powerpc64le", "bundled": "bundled", "splitted": "unsplitted", "tidy": "disable", diff --git a/tests/clickhouse-test b/tests/clickhouse-test index cff6c2de799..c4ad314ff9e 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -452,6 +452,7 @@ class SettingsRandomizer: "prefer_localhost_replica": lambda: random.randint(0, 1), "max_block_size": lambda: random.randint(8000, 100000), "max_threads": lambda: random.randint(1, 64), + "optimize_or_like_chain": lambda: random.randint(0, 1), } @staticmethod diff --git a/tests/integration/test_global_overcommit_tracker/test.py b/tests/integration/test_global_overcommit_tracker/test.py index cacc447be1a..d3d56e82f38 100644 --- a/tests/integration/test_global_overcommit_tracker/test.py +++ b/tests/integration/test_global_overcommit_tracker/test.py @@ -18,8 +18,8 @@ def start_cluster(): cluster.shutdown() -TEST_QUERY_A = "SELECT number FROM numbers(1000) GROUP BY number SETTINGS max_guaranteed_memory_usage_for_user=1" -TEST_QUERY_B = "SELECT number FROM numbers(1000) GROUP BY number SETTINGS max_guaranteed_memory_usage_for_user=2" +TEST_QUERY_A = "SELECT number FROM numbers(1000) GROUP BY number SETTINGS memory_overcommit_ratio_denominator_for_user=1" +TEST_QUERY_B = "SELECT number FROM numbers(1000) GROUP BY number SETTINGS memory_overcommit_ratio_denominator_for_user=2" def test_overcommited_is_killed(): diff --git a/tests/queries/0_stateless/00908_bloom_filter_index.sh b/tests/queries/0_stateless/00908_bloom_filter_index.sh index 474a2b739e2..59375794af0 100755 --- a/tests/queries/0_stateless/00908_bloom_filter_index.sh +++ b/tests/queries/0_stateless/00908_bloom_filter_index.sh @@ -58,50 +58,50 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO bloom_filter_idx2 VALUES (13, 'abc')" # EQUAL -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx2 WHERE lower(s) = 'aбвгдеёж' OR s = 'aбвгдеёж' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx2 WHERE lower(s) = 'aбвгдеёж' OR s = 'aбвгдеёж' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx2 WHERE lower(s) = 'aбвгдеёж' OR s = 'aбвгдеёж' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx2 WHERE lower(s) = 'aбвгдеёж' OR s = 'aбвгдеёж' ORDER BY k FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s = 'aбвгдеёж' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s = 'aбвгдеёж' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s = 'aбвгдеёж' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s = 'aбвгдеёж' ORDER BY k FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE lower(s) = 'abc' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE lower(s) = 'abc' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE lower(s) = 'abc' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE lower(s) = 'abc' ORDER BY k FORMAT JSON" | grep "rows_read" # LIKE -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%database%' AND s LIKE '%ClickHouse%' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%database%' AND s LIKE '%ClickHouse%' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%database%' AND s LIKE '%ClickHouse%' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%database%' AND s LIKE '%ClickHouse%' ORDER BY k FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%database%' AND lower(s) LIKE '%clickhouse%' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%database%' AND lower(s) LIKE '%clickhouse%' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%database%' AND lower(s) LIKE '%clickhouse%' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%database%' AND lower(s) LIKE '%clickhouse%' ORDER BY k FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%базами данных%' AND s LIKE '%ClickHouse%' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%базами данных%' AND s LIKE '%ClickHouse%' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%базами данных%' AND s LIKE '%ClickHouse%' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%базами данных%' AND s LIKE '%ClickHouse%' ORDER BY k FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE (s LIKE '%базами данных%' AND s LIKE '%ClickHouse%') OR s LIKE '____строка' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE (s LIKE '%базами данных%' AND s LIKE '%ClickHouse%') OR s LIKE '____строка' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE (s LIKE '%базами данных%' AND s LIKE '%ClickHouse%') OR s LIKE '____строка' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE (s LIKE '%базами данных%' AND s LIKE '%ClickHouse%') OR s LIKE '____строка' ORDER BY k FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%%
_%_%_
%%' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%%
_%_%_
%%' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%%
_%_%_
%%' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%%
_%_%_
%%' ORDER BY k FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%2\\\\%2%' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%2\\\\%2%' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%2\\\\%2%' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%2\\\\%2%' ORDER BY k FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%_\\\\%2\\\\__\\\\' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%_\\\\%2\\\\__\\\\' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%_\\\\%2\\\\__\\\\' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '%_\\\\%2\\\\__\\\\' ORDER BY k FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2\\\\' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2\\\\' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2\\\\' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2\\\\' ORDER BY k FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2_' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2_' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2_' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s LIKE '2\\\\_2\\\\%2_2_' ORDER BY k FORMAT JSON" | grep "rows_read" # IN -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s IN ('aбвгдеёж', 'abc') ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE s IN ('aбвгдеёж', 'abc') ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s IN ('aбвгдеёж', 'abc') ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE s IN ('aбвгдеёж', 'abc') ORDER BY k FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE (s, lower(s)) IN (('aбвгдеёж', 'aбвгдеёж'), ('abc', 'cba')) ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE (s, lower(s)) IN (('aбвгдеёж', 'aбвгдеёж'), ('abc', 'cba')) ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE (s, lower(s)) IN (('aбвгдеёж', 'aбвгдеёж'), ('abc', 'cba')) ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx WHERE (s, lower(s)) IN (('aбвгдеёж', 'aбвгдеёж'), ('abc', 'cba')) ORDER BY k FORMAT JSON" | grep "rows_read" # TOKEN BF @@ -125,18 +125,18 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO bloom_filter_idx3 VALUES (13, 'abc')" # EQUAL -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx3 WHERE lower(s) = 'column-oriented' OR s = 'column-oriented' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx3 WHERE lower(s) = 'column-oriented' OR s = 'column-oriented' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx3 WHERE lower(s) = 'column-oriented' OR s = 'column-oriented' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx3 WHERE lower(s) = 'column-oriented' OR s = 'column-oriented' ORDER BY k FORMAT JSON" | grep "rows_read" # LIKE -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx3 WHERE lower(s) LIKE '%(dbms)%' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx3 WHERE lower(s) LIKE '%(dbms)%' ORDER BY k FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx3 WHERE s LIKE 'column-%' AND s LIKE '%-oriented' ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx3 WHERE s LIKE 'column-%' AND s LIKE '%-oriented' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx3 WHERE lower(s) LIKE '%(dbms)%' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx3 WHERE lower(s) LIKE '%(dbms)%' ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx3 WHERE s LIKE 'column-%' AND s LIKE '%-oriented' ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx3 WHERE s LIKE 'column-%' AND s LIKE '%-oriented' ORDER BY k FORMAT JSON" | grep "rows_read" # IN -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx3 WHERE s IN ('some string', 'abc') ORDER BY k" -$CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx3 WHERE s IN ('some string', 'abc') ORDER BY k FORMAT JSON" | grep "rows_read" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx3 WHERE s IN ('some string', 'abc') ORDER BY k" +$CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT * FROM bloom_filter_idx3 WHERE s IN ('some string', 'abc') ORDER BY k FORMAT JSON" | grep "rows_read" $CLICKHOUSE_CLIENT --query="DROP TABLE bloom_filter_idx" $CLICKHOUSE_CLIENT --query="DROP TABLE bloom_filter_idx2" diff --git a/tests/queries/0_stateless/01042_h3_k_ring.reference b/tests/queries/0_stateless/01042_h3_k_ring.reference index 24f4b9885ce..770468da350 100644 --- a/tests/queries/0_stateless/01042_h3_k_ring.reference +++ b/tests/queries/0_stateless/01042_h3_k_ring.reference @@ -1,5 +1,7 @@ [581250224954015743,581259021047037951,581267817140060159,581276613233082367,581500913605148671,581518505791193087,581764796395814911] [581276613233082367] +[581250224954015743,581259021047037951,581267817140060159,581276613233082367,581500913605148671,581518505791193087,581764796395814911] +[581276613233082367] [578466261512486911,578712552117108735,578888473977552895,579205133326352383,579275502070530047,579662530163507199,579768083279773695] [580995138256371711,581144671837749247,581162264023793663,581166662070304767,581171060116815871,581250224954015743,581254623000526847,581259021047037951,581263419093549055,581267817140060159,581272215186571263,581276613233082367,581531699930726399,581536097977237503,581549292116770815,581553690163281919,581558088209793023,581747204209770495,581764796395814911] [589624655266971647,589625205022785535,589626854290227199,589627404046041087,589642797208829951,589644996232085503,589708218150682623,589708767906496511,589709317662310399,589709867418124287,589710417173938175,589710966929752063,589711516685565951,589714815220449279,589715914732077055,589725810336727039,589726909848354815,589727459604168703,589728009359982591,589729108871610367,589734606429749247,589735156185563135,589735705941377023,589736255697190911,589736805453004799,589737355208818687,589737904964632575,589742303011143679,589744502034399231,589745051790213119,589752198615793663,589752748371607551,589753298127421439,589753847883235327,589754397639049215,589754947394863103,589755497150676991] diff --git a/tests/queries/0_stateless/01042_h3_k_ring.sql b/tests/queries/0_stateless/01042_h3_k_ring.sql index d450954ab7a..8931efc44c2 100644 --- a/tests/queries/0_stateless/01042_h3_k_ring.sql +++ b/tests/queries/0_stateless/01042_h3_k_ring.sql @@ -5,6 +5,9 @@ SELECT h3kRing(581276613233082367, toUInt16(0)); SELECT h3kRing(581276613233082367, -1); -- { serverError 43 } SELECT h3kRing(581276613233082367, toUInt16(-1)); -- { serverError 12 } +SELECT arraySort(h3kRing(581276613233082367, 1)); +SELECT h3kRing(581276613233082367, 0); +SELECT h3kRing(581276613233082367, -1); -- { serverError 43 } DROP TABLE IF EXISTS h3_indexes; diff --git a/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.reference b/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.reference new file mode 100644 index 00000000000..828667becf2 --- /dev/null +++ b/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.reference @@ -0,0 +1 @@ +test1 test2 diff --git a/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh b/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh new file mode 100755 index 00000000000..3e2eda96f93 --- /dev/null +++ b/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery <= yesterday() and + event_time > now() - interval 1 hour and + not is_initial_query and + query not like '%system%query_log%' and + query like concat('WITH%', currentDatabase(), '%AS `id_no` %') and + type = 'QueryFinish' +order by query; WITH _CAST(\'default\', \'Nullable(String)\') AS `id_no` SELECT `one`.`dummy`, ignore(`id_no`) FROM `system`.`one` WHERE `dummy` IN (0, 2) WITH _CAST(\'default\', \'Nullable(String)\') AS `id_no` SELECT `one`.`dummy`, ignore(`id_no`) FROM `system`.`one` WHERE `dummy` IN (0, 2) +-- +-- w/ optimize_skip_unused_shards_rewrite_in=1 +-- + +set optimize_skip_unused_shards_rewrite_in=1; +-- detailed coverage for realistic examples +select 'optimize_skip_unused_shards_rewrite_in(0, 2)'; optimize_skip_unused_shards_rewrite_in(0, 2) +with (select currentDatabase()) as id_02 select *, ignore(id_02) from dist_01756 where dummy in (0, 2); 0 0 +system flush logs; +select query from system.query_log where + event_date >= yesterday() and + event_time > now() - interval 1 hour and + not is_initial_query and + query not like '%system%query_log%' and + query like concat('WITH%', currentDatabase(), '%AS `id_02` %') and + type = 'QueryFinish' +order by query; WITH _CAST(\'default\', \'Nullable(String)\') AS `id_02` SELECT `one`.`dummy`, ignore(`id_02`) FROM `system`.`one` WHERE `dummy` IN tuple(0) WITH _CAST(\'default\', \'Nullable(String)\') AS `id_02` SELECT `one`.`dummy`, ignore(`id_02`) FROM `system`.`one` WHERE `dummy` IN tuple(2) +select 'optimize_skip_unused_shards_rewrite_in(2,)'; optimize_skip_unused_shards_rewrite_in(2,) +with (select currentDatabase()) as id_2 select *, ignore(id_2) from dist_01756 where dummy in (2,); +system flush logs; +select query from system.query_log where + event_date >= yesterday() and + event_time > now() - interval 1 hour and + not is_initial_query and + query not like '%system%query_log%' and + query like concat('WITH%', currentDatabase(), '%AS `id_2` %') and + type = 'QueryFinish' +order by query; WITH _CAST(\'default\', \'Nullable(String)\') AS `id_2` SELECT `one`.`dummy`, ignore(`id_2`) FROM `system`.`one` WHERE `dummy` IN tuple(2) +select 'optimize_skip_unused_shards_rewrite_in(0,)'; optimize_skip_unused_shards_rewrite_in(0,) +with (select currentDatabase()) as id_0 select *, ignore(id_0) from dist_01756 where dummy in (0,); 0 0 +system flush logs; +select query from system.query_log where + event_date >= yesterday() and + event_time > now() - interval 1 hour and + not is_initial_query and + query not like '%system%query_log%' and + query like concat('WITH%', currentDatabase(), '%AS `id_0` %') and + type = 'QueryFinish' +order by query; WITH _CAST(\'default\', \'Nullable(String)\') AS `id_0` SELECT `one`.`dummy`, ignore(`id_0`) FROM `system`.`one` WHERE `dummy` IN tuple(0) +-- signed column +select 'signed column'; signed column +create table data_01756_signed (key Int) engine=Null; +with (select currentDatabase()) as key_signed select *, ignore(key_signed) from cluster(test_cluster_two_shards, currentDatabase(), data_01756_signed, key) where key in (-1, -2); +system flush logs; +select query from system.query_log where + event_date >= yesterday() and + event_time > now() - interval 1 hour and + not is_initial_query and + query not like '%system%query_log%' and + query like concat('WITH%', currentDatabase(), '%AS `key_signed` %') and + type = 'QueryFinish' +order by query; WITH _CAST(\'default\', \'Nullable(String)\') AS `key_signed` SELECT `key`, ignore(`key_signed`) FROM `default`.`data_01756_signed` WHERE `key` IN tuple(-1) WITH _CAST(\'default\', \'Nullable(String)\') AS `key_signed` SELECT `key`, ignore(`key_signed`) FROM `default`.`data_01756_signed` WHERE `key` IN tuple(-2) +-- not tuple +select * from dist_01756 where dummy in (0); 0 +select * from dist_01756 where dummy in ('0'); 0 +-- +-- errors +-- +select 'errors'; errors +-- optimize_skip_unused_shards does not support non-constants +select * from dist_01756 where dummy in (select * from system.one); -- { serverError 507 } +select * from dist_01756 where dummy in (toUInt8(0)); -- { serverError 507 } +-- NOT IN does not supported +select * from dist_01756 where dummy not in (0, 2); -- { serverError 507 } +-- +-- others +-- +select 'others'; others +select * from dist_01756 where dummy not in (2, 3) and dummy in (0, 2); 0 +select * from dist_01756 where dummy in tuple(0, 2); 0 +select * from dist_01756 where dummy in tuple(0); 0 +select * from dist_01756 where dummy in tuple(2); +-- Identifier is NULL +select (2 IN (2,)), * from dist_01756 where dummy in (0, 2) format Null; +-- Literal is NULL +select (dummy IN (toUInt8(2),)), * from dist_01756 where dummy in (0, 2) format Null; +-- different type +select 'different types -- prohibited'; different types -- prohibited +create table data_01756_str (key String) engine=Memory(); +insert into data_01756_str values (0)(1); +-- SELECT +-- cityHash64(0) % 2, +-- cityHash64(2) % 2 +-- +-- ┌─modulo(cityHash64(0), 2)─┬─modulo(cityHash64(2), 2)─┐ +-- │ 0 │ 1 │ +-- └──────────────────────────┴──────────────────────────┘ +create table dist_01756_str as data_01756_str engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01756_str, cityHash64(key)); +select * from dist_01756_str where key in ('0', '2'); +0 +select * from dist_01756_str where key in (0, 2); +0 +select * from dist_01756_str where key in ('0', Null); -- { serverError 507 } +-- select * from dist_01756_str where key in (0, 2); -- { serverError 53 } +-- select * from dist_01756_str where key in (0, Null); -- { serverError 53 } + +-- different type #2 +select 'different types -- conversion'; different types -- conversion +create table dist_01756_column as system.one engine=Distributed(test_cluster_two_shards, system, one, dummy); +select * from dist_01756_column where dummy in (0, '255'); 0 +select * from dist_01756_column where dummy in (0, '255foo'); -- { serverError 53 } +-- intHash64 does not accept string, but implicit conversion should be done +select * from dist_01756 where dummy in ('0', '2'); 0 +-- optimize_skip_unused_shards_limit +select 'optimize_skip_unused_shards_limit'; optimize_skip_unused_shards_limit +select * from dist_01756 where dummy in (0, 2) settings optimize_skip_unused_shards_limit=1; -- { serverError 507 } +select * from dist_01756 where dummy in (0, 2) settings optimize_skip_unused_shards_limit=1, force_optimize_skip_unused_shards=0; 0 0 diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql index a5090551c89..59709a7ee2a 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql @@ -11,6 +11,15 @@ drop table if exists dist_01756_column; drop table if exists data_01756_str; drop table if exists data_01756_signed; +-- separate log entry for localhost queries +set prefer_localhost_replica=0; +set force_optimize_skip_unused_shards=2; +set optimize_skip_unused_shards=1; +set optimize_skip_unused_shards_rewrite_in=0; +set log_queries=1; + +-- { echoOn } + -- SELECT -- intHash64(0) % 2, -- intHash64(2) % 2 @@ -19,13 +28,6 @@ drop table if exists data_01756_signed; -- └─────────────────────────┴─────────────────────────┘ create table dist_01756 as system.one engine=Distributed(test_cluster_two_shards, system, one, intHash64(dummy)); --- separate log entry for localhost queries -set prefer_localhost_replica=0; -set force_optimize_skip_unused_shards=2; -set optimize_skip_unused_shards=1; -set optimize_skip_unused_shards_rewrite_in=0; -set log_queries=1; - -- -- w/o optimize_skip_unused_shards_rewrite_in=1 -- @@ -131,8 +133,17 @@ select (dummy IN (toUInt8(2),)), * from dist_01756 where dummy in (0, 2) format -- different type select 'different types -- prohibited'; create table data_01756_str (key String) engine=Memory(); +insert into data_01756_str values (0)(1); +-- SELECT +-- cityHash64(0) % 2, +-- cityHash64(2) % 2 +-- +-- ┌─modulo(cityHash64(0), 2)─┬─modulo(cityHash64(2), 2)─┐ +-- │ 0 │ 1 │ +-- └──────────────────────────┴──────────────────────────┘ create table dist_01756_str as data_01756_str engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01756_str, cityHash64(key)); select * from dist_01756_str where key in ('0', '2'); +select * from dist_01756_str where key in (0, 2); select * from dist_01756_str where key in ('0', Null); -- { serverError 507 } -- select * from dist_01756_str where key in (0, 2); -- { serverError 53 } -- select * from dist_01756_str where key in (0, Null); -- { serverError 53 } @@ -150,6 +161,8 @@ select 'optimize_skip_unused_shards_limit'; select * from dist_01756 where dummy in (0, 2) settings optimize_skip_unused_shards_limit=1; -- { serverError 507 } select * from dist_01756 where dummy in (0, 2) settings optimize_skip_unused_shards_limit=1, force_optimize_skip_unused_shards=0; +-- { echoOff } + drop table dist_01756; drop table dist_01756_str; drop table dist_01756_column; diff --git a/tests/queries/0_stateless/02104_overcommit_memory.sh b/tests/queries/0_stateless/02104_overcommit_memory.sh index 7fdf74a30bf..f2016dbc0c1 100755 --- a/tests/queries/0_stateless/02104_overcommit_memory.sh +++ b/tests/queries/0_stateless/02104_overcommit_memory.sh @@ -11,13 +11,13 @@ $CLICKHOUSE_CLIENT -q 'GRANT ALL ON *.* TO u02104' function overcommited() { - $CLICKHOUSE_CLIENT -u u02104 -q 'SELECT number FROM numbers(130000) GROUP BY number SETTINGS max_guaranteed_memory_usage=1,memory_usage_overcommit_max_wait_microseconds=500' 2>&1 \ + $CLICKHOUSE_CLIENT -u u02104 -q 'SELECT number FROM numbers(130000) GROUP BY number SETTINGS memory_overcommit_ratio_denominator=1,memory_usage_overcommit_max_wait_microseconds=500' 2>&1 \ | grep -F -q "MEMORY_LIMIT_EXCEEDED" && echo "OVERCOMMITED WITH USER LIMIT IS KILLED" } function expect_execution() { - $CLICKHOUSE_CLIENT -u u02104 -q 'SELECT number FROM numbers(130000) GROUP BY number SETTINGS max_memory_usage_for_user=5000000,max_guaranteed_memory_usage=2,memory_usage_overcommit_max_wait_microseconds=500' >/dev/null 2>/dev/null + $CLICKHOUSE_CLIENT -u u02104 -q 'SELECT number FROM numbers(130000) GROUP BY number SETTINGS max_memory_usage_for_user=5000000,memory_overcommit_ratio_denominator=2,memory_usage_overcommit_max_wait_microseconds=500' >/dev/null 2>/dev/null } export -f overcommited diff --git a/tests/queries/0_stateless/02226_or_like_combine.reference b/tests/queries/0_stateless/02226_or_like_combine.reference new file mode 100644 index 00000000000..93465f606fe --- /dev/null +++ b/tests/queries/0_stateless/02226_or_like_combine.reference @@ -0,0 +1,40 @@ +SELECT materialize(\'Привет, World\') AS s +WHERE (s LIKE \'hell%\') OR (s ILIKE \'%привет%\') OR (s ILIKE \'world%\') +SETTINGS optimize_or_like_chain = 0 +SELECT materialize(\'Привет, World\') AS s +WHERE multiMatchAny(s, [\'^hell\', \'(?i)привет\', \'(?i)^world\']) OR false +SETTINGS optimize_or_like_chain = 1 +SELECT + materialize(\'Привет, World\') AS s1, + materialize(\'Привет, World\') AS s2 +WHERE multiMatchAny(s1, [\'^hell\', \'(?i)^world\']) OR multiMatchAny(s2, [\'(?i)привет\']) +SETTINGS optimize_or_like_chain = 1 +SELECT + materialize(\'Привет, World\') AS s1, + materialize(\'Привет, World\') AS s2 +WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\') +SETTINGS optimize_or_like_chain = 1 +SELECT + materialize(\'Привет, World\') AS s1, + materialize(\'Привет, World\') AS s2 +WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\') +SETTINGS optimize_or_like_chain = 1 +SELECT + materialize(\'Привет, World\') AS s1, + materialize(\'Привет, World\') AS s2 +WHERE (s1 LIKE \'hell%\') OR (s2 ILIKE \'%привет%\') OR (s1 ILIKE \'world%\') +SETTINGS optimize_or_like_chain = 1 +SELECT + materialize(\'Привет, World\') AS s1, + materialize(\'Привет, World\') AS s2 +WHERE multiMatchAny(s1, [\'^hell\', \'(?i)^world\']) OR multiMatchAny(s2, [\'(?i)привет\']) OR (s1 = \'Привет\') +SETTINGS optimize_or_like_chain = 1 +Привет, optimized World +Привет, World +Привет, optimized World +Привет, World +SELECT + (materialize(\'Привет, World\') AS s) LIKE \'hell%\' AS test, + s +WHERE multiMatchAny(s, [\'^hell\', \'(?i)привет\', \'(?i)^world\']) OR false +SETTINGS optimize_or_like_chain = 1 diff --git a/tests/queries/0_stateless/02226_or_like_combine.sql b/tests/queries/0_stateless/02226_or_like_combine.sql new file mode 100644 index 00000000000..ed91dda7469 --- /dev/null +++ b/tests/queries/0_stateless/02226_or_like_combine.sql @@ -0,0 +1,19 @@ +EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 0; +EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 1; + + +EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1; +EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1 SETTINGS allow_hyperscan = 0; +EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1 SETTINGS max_hyperscan_regexp_length = 10; +EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') SETTINGS optimize_or_like_chain = 1 SETTINGS max_hyperscan_regexp_total_length = 10; +EXPLAIN SYNTAX SELECT materialize('Привет, World') AS s1, materialize('Привет, World') AS s2 WHERE (s1 LIKE 'hell%') OR (s2 ILIKE '%привет%') OR (s1 ILIKE 'world%') OR s1 == 'Привет' SETTINGS optimize_or_like_chain = 1; + + +SELECT materialize('Привет, optimized World') AS s WHERE (s LIKE 'hell%') OR (s LIKE '%привет%') OR (s ILIKE '%world') SETTINGS optimize_or_like_chain = 1; +SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s LIKE '%привет%') OR (s ILIKE '%world') SETTINGS optimize_or_like_chain = 0; +SELECT materialize('Привет, optimized World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s LIKE 'world%') SETTINGS optimize_or_like_chain = 1; +SELECT materialize('Привет, World') AS s WHERE (s LIKE 'hell%') OR (s ILIKE '%привет%') OR (s LIKE 'world%') SETTINGS optimize_or_like_chain = 0; + +-- Aliases + +EXPLAIN SYNTAX SELECT test, materialize('Привет, World') AS s WHERE ((s LIKE 'hell%') AS test) OR (s ILIKE '%привет%') OR (s ILIKE 'world%') SETTINGS optimize_or_like_chain = 1; diff --git a/tests/queries/0_stateless/02226_s3_with_cache.reference b/tests/queries/0_stateless/02226_s3_with_cache.reference index 4041f51b3f9..596e6e5345a 100644 --- a/tests/queries/0_stateless/02226_s3_with_cache.reference +++ b/tests/queries/0_stateless/02226_s3_with_cache.reference @@ -1,4 +1,4 @@ SELECT 1, * FROM test LIMIT 10 FORMAT Null; 1 0 1 SELECT 2, * FROM test LIMIT 10 FORMAT Null; 0 1 0 0 -SELECT 3, * FROM test LIMIT 10 FORMAT Null; 1 1 0 +SELECT 3, * FROM test LIMIT 10 FORMAT Null; 0 1 0 diff --git a/tests/queries/0_stateless/02226_s3_with_cache.sql b/tests/queries/0_stateless/02226_s3_with_cache.sql index d470f2ef140..e62e63b7f97 100644 --- a/tests/queries/0_stateless/02226_s3_with_cache.sql +++ b/tests/queries/0_stateless/02226_s3_with_cache.sql @@ -13,9 +13,9 @@ SELECT 1, * FROM test LIMIT 10 FORMAT Null; SYSTEM FLUSH LOGS; SELECT query, - ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read, - ProfileEvents['RemoteFSCacheReadBytes'] > 0 as remote_fs_cache_read, - ProfileEvents['RemoteFSCacheDownloadBytes'] > 0 as remote_fs_read_and_download + ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, + ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download FROM system.query_log WHERE query LIKE 'SELECT 1, * FROM test LIMIT%' AND type = 'QueryFinish' @@ -29,9 +29,9 @@ SELECT 2, * FROM test LIMIT 10 FORMAT Null; SYSTEM FLUSH LOGS; SELECT query, - ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read, - ProfileEvents['RemoteFSCacheReadBytes'] > 0 as remote_fs_cache_read, - ProfileEvents['RemoteFSCacheDownloadBytes'] > 0 as remote_fs_read_and_download + ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, + ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download FROM system.query_log WHERE query LIKE 'SELECT 2, * FROM test LIMIT%' AND type = 'QueryFinish' @@ -56,9 +56,9 @@ SELECT 3, * FROM test LIMIT 10 FORMAT Null; SYSTEM FLUSH LOGS; SELECT query, - ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read, - ProfileEvents['RemoteFSCacheReadBytes'] > 0 as remote_fs_cache_read, - ProfileEvents['RemoteFSCacheDownloadBytes'] > 0 as remote_fs_read_and_download + ProfileEvents['CachedReadBufferReadFromSourceBytes'] > 0 as remote_fs_read, + ProfileEvents['CachedReadBufferReadFromCacheBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['CachedReadBufferCacheWriteBytes'] > 0 as remote_fs_read_and_download FROM system.query_log WHERE query LIKE 'SELECT 3, * FROM test LIMIT%' AND type = 'QueryFinish' diff --git a/tests/queries/0_stateless/02280_dynamic_linker_env.sh b/tests/queries/0_stateless/02280_dynamic_linker_env.sh deleted file mode 100755 index 655765d9e10..00000000000 --- a/tests/queries/0_stateless/02280_dynamic_linker_env.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env bash - -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -static=$($CLICKHOUSE_LOCAL -q "SELECT value IN ('ON', '1') FROM system.build_options WHERE name = 'STATIC'") - -clickhouse-local -q 'select 1' - -if [ "$static" -eq 1 ]; then - # "grep -c" will also gives "1" - LD_LIBRARY_PATH=/ clickhouse-local -q 'select 1' |& grep -x -F -c 'Environment variable LD_LIBRARY_PATH is set to /. It can compromise security.' -else - # works because it does not uses main.cpp entrypoint - # (due to shared build is always splitted, and non-splitted will have lots of ODR issues) - LD_LIBRARY_PATH=/ clickhouse-local -q 'select 1' -fi diff --git a/tests/queries/0_stateless/02282_array_distance.reference b/tests/queries/0_stateless/02282_array_distance.reference new file mode 100644 index 00000000000..158df656403 --- /dev/null +++ b/tests/queries/0_stateless/02282_array_distance.reference @@ -0,0 +1,42 @@ +6 +3.7416575 +3 +0.0025851727 +\N +nan +12 +14 +21 +7.071068 +9.165152 +12.124355 +2 +5 +4 +0.16847819 +0.35846698 +0.07417989 +6 +8 +9 +0.020204102886728692 +0.11808289631180302 +0 +1 1 218.74642854227358 +1 2 1348.2117786164013 +2 1 219.28064210048274 +2 2 1347.4008312302617 +3 1 214.35251339790725 +3 2 1342.8856987845243 +1 1 218.74643 +1 2 1348.2118 +2 1 219.28064 +2 2 1347.4009 +3 1 214.35251 +3 2 1342.8857 +1 1 218.74642854227358 +1 2 1348.2117786164013 +2 1 219.28064210048274 +2 2 1347.4008312302617 +3 1 214.35251339790725 +3 2 1342.8856987845243 diff --git a/tests/queries/0_stateless/02282_array_distance.sql b/tests/queries/0_stateless/02282_array_distance.sql new file mode 100644 index 00000000000..04de01d7d66 --- /dev/null +++ b/tests/queries/0_stateless/02282_array_distance.sql @@ -0,0 +1,41 @@ +SELECT arrayL1Distance([0, 0, 0], [1, 2, 3]); +SELECT arrayL2Distance([1, 2, 3], [0, 0, 0]); +SELECT arrayLinfDistance([1, 2, 3], [0, 0, 0]); +SELECT arrayCosineDistance([1, 2, 3], [3, 5, 7]); + +SELECT arrayL2Distance([1, 2, 3], NULL); +SELECT arrayCosineDistance([1, 2, 3], [0, 0, 0]); + +DROP TABLE IF EXISTS vec1; +DROP TABLE IF EXISTS vec2; +DROP TABLE IF EXISTS vec2f; +DROP TABLE IF EXISTS vec2d; +CREATE TABLE vec1 (id UInt64, v Array(UInt8)) ENGINE = Memory; +CREATE TABLE vec2 (id UInt64, v Array(Int64)) ENGINE = Memory; +CREATE TABLE vec2f (id UInt64, v Array(Float32)) ENGINE = Memory; +CREATE TABLE vec2d (id UInt64, v Array(Float64)) ENGINE = Memory; + +INSERT INTO vec1 VALUES (1, [3, 4, 5]), (2, [2, 4, 8]), (3, [7, 7, 7]); +SELECT arrayL1Distance(v, [0, 0, 0]) FROM vec1; +SELECT arrayL2Distance(v, [0, 0, 0]) FROM vec1; +SELECT arrayLinfDistance([5, 4, 3], v) FROM vec1; +SELECT arrayCosineDistance([3, 2, 1], v) FROM vec1; +SELECT arrayLinfDistance(v, materialize([0, -2, 0])) FROM vec1; +SELECT arrayCosineDistance(v, materialize([1., 1., 1.])) FROM vec1; + +INSERT INTO vec2 VALUES (1, [100, 200, 0]), (2, [888, 777, 666]); +SELECT v1.id, v2.id, arrayL2Distance(v1.v, v2.v) as dist FROM vec1 v1, vec2 v2; + +INSERT INTO vec2f VALUES (1, [100, 200, 0]), (2, [888, 777, 666]); +SELECT v1.id, v2.id, arrayL2Distance(v1.v, v2.v) as dist FROM vec1 v1, vec2f v2; + +INSERT INTO vec2d VALUES (1, [100, 200, 0]), (2, [888, 777, 666]); +SELECT v1.id, v2.id, arrayL2Distance(v1.v, v2.v) as dist FROM vec1 v1, vec2d v2; + +SELECT arrayL1Distance([0, 0], [1]); -- { serverError 190 } +SELECT arrayL2Distance((1, 2), (3,4)); -- { serverError 43 } + +DROP TABLE vec1; +DROP TABLE vec2; +DROP TABLE vec2f; +DROP TABLE vec2d; diff --git a/tests/queries/0_stateless/02283_array_norm.reference b/tests/queries/0_stateless/02283_array_norm.reference new file mode 100644 index 00000000000..6dd6b79e6d9 --- /dev/null +++ b/tests/queries/0_stateless/02283_array_norm.reference @@ -0,0 +1,27 @@ +6 +7.0710678118654755 +2 +1 5 +2 2 +3 5.196152 +4 0 +1 11 +2 11 +3 11 +4 11 +1 5 +2 2 +3 5.196152 +4 0 +1 11 +2 11 +3 11 +4 11 +1 5 +2 2 +3 5.196152422706632 +4 0 +1 11 +2 11 +3 11 +4 11 diff --git a/tests/queries/0_stateless/02283_array_norm.sql b/tests/queries/0_stateless/02283_array_norm.sql new file mode 100644 index 00000000000..e11caea7cc1 --- /dev/null +++ b/tests/queries/0_stateless/02283_array_norm.sql @@ -0,0 +1,28 @@ +SELECT arrayL1Norm([1, 2, 3]); +SELECT arrayL2Norm([3., 4., 5.]); +SELECT arrayLinfNorm([0, 0, 2]); + +DROP TABLE IF EXISTS vec1; +DROP TABLE IF EXISTS vec1f; +DROP TABLE IF EXISTS vec1d; +CREATE TABLE vec1 (id UInt64, v Array(UInt8)) ENGINE = Memory; +CREATE TABLE vec1f (id UInt64, v Array(Float32)) ENGINE = Memory; +CREATE TABLE vec1d (id UInt64, v Array(Float64)) ENGINE = Memory; +INSERT INTO vec1 VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); +INSERT INTO vec1f VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); +INSERT INTO vec1d VALUES (1, [3, 4]), (2, [2]), (3, [3, 3, 3]), (4, NULL); + +SELECT id, arrayL2Norm(v) FROM vec1; +SELECT id, arrayL1Norm(materialize([5., 6.])) FROM vec1; + +SELECT id, arrayL2Norm(v) FROM vec1f; +SELECT id, arrayL1Norm(materialize([5., 6.])) FROM vec1f; + +SELECT id, arrayL2Norm(v) FROM vec1d; +SELECT id, arrayL1Norm(materialize([5., 6.])) FROM vec1d; + +SELECT arrayL1Norm((1, 2,)); -- { serverError 43 } + +DROP TABLE vec1; +DROP TABLE vec1f; +DROP TABLE vec1d; diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference index fb18ad12c4d..30026f943a1 100644 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.reference +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.reference @@ -28,5 +28,3 @@ SELECT count() FROM system.filesystem_cache; SYSTEM DROP FILESYSTEM CACHE './s3_cache/'; SELECT count() FROM system.filesystem_cache; 2 -EXPLAIN SYNTAX SYSTEM DROP FILESYSTEM CACHE './s3_cache/' FORCE; -SYSTEM DROP FILESYSTEM CACHE ./s3_cache/ FORCE diff --git a/tests/queries/0_stateless/02286_drop_filesystem_cache.sql b/tests/queries/0_stateless/02286_drop_filesystem_cache.sql index ee93b165637..85bfb6ca1c7 100644 --- a/tests/queries/0_stateless/02286_drop_filesystem_cache.sql +++ b/tests/queries/0_stateless/02286_drop_filesystem_cache.sql @@ -31,6 +31,4 @@ SELECT * FROM test2 FORMAT Null; SELECT count() FROM system.filesystem_cache; SYSTEM DROP FILESYSTEM CACHE './s3_cache/'; -SELECT count() FROM system.filesystem_cache; - -EXPLAIN SYNTAX SYSTEM DROP FILESYSTEM CACHE './s3_cache/' FORCE; +SELECT count() FROM system.filesystem_cache; \ No newline at end of file diff --git a/tests/queries/0_stateless/02280_dynamic_linker_env.reference b/tests/queries/0_stateless/02301_harmful_reexec.reference similarity index 100% rename from tests/queries/0_stateless/02280_dynamic_linker_env.reference rename to tests/queries/0_stateless/02301_harmful_reexec.reference diff --git a/tests/queries/0_stateless/02301_harmful_reexec.sh b/tests/queries/0_stateless/02301_harmful_reexec.sh new file mode 100755 index 00000000000..37c7cba3e67 --- /dev/null +++ b/tests/queries/0_stateless/02301_harmful_reexec.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# NOTE: we can do a better test with strace, but I don't think that it is worth it. +$CLICKHOUSE_LOCAL -q "SELECT 1" +LD_LIBRARY_PATH=/tmp $CLICKHOUSE_LOCAL -q "SELECT 1" diff --git a/tests/queries/1_stateful/00172_parallel_join.sql b/tests/queries/1_stateful/00172_parallel_join.sql index fce41d7a761..36b12a43b88 100644 --- a/tests/queries/1_stateful/00172_parallel_join.sql +++ b/tests/queries/1_stateful/00172_parallel_join.sql @@ -1,4 +1,5 @@ -set join_algorithm='parallel_hash'; +SET join_algorithm='parallel_hash'; + SELECT EventDate, hits,