diff --git a/base/common/logger_useful.h b/base/common/logger_useful.h index c89ec63ef22..ad56ef7e857 100644 --- a/base/common/logger_useful.h +++ b/base/common/logger_useful.h @@ -15,15 +15,27 @@ using Poco::Message; using DB::LogsLevel; using DB::CurrentThread; -/// Logs a message to a specified logger with that level. -#define LOG_IMPL(logger, priority, PRIORITY, ...) do \ +namespace +{ + template constexpr size_t numArgs(Ts &&...) { return sizeof...(Ts); } + template constexpr auto firstArg(T && x, Ts &&...) { return std::forward(x); } +} + + +/// Logs a message to a specified logger with that level. +/// If more than one argument is provided, +/// the first argument is interpreted as template with {}-substitutions +/// and the latter arguments treat as values to substitute. +/// If only one argument is provided, it is threat as message without substitutions. + +#define LOG_IMPL(logger, priority, PRIORITY, ...) do \ { \ const bool is_clients_log = (CurrentThread::getGroup() != nullptr) && \ (CurrentThread::getGroup()->client_logs_level >= (priority)); \ if ((logger)->is((PRIORITY)) || is_clients_log) \ { \ - std::string formatted_message = fmt::format(__VA_ARGS__); \ + std::string formatted_message = numArgs(__VA_ARGS__) > 1 ? fmt::format(__VA_ARGS__) : firstArg(__VA_ARGS__); \ if (auto channel = (logger)->getChannel()) \ { \ std::string file_function; \ diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt new file mode 100644 index 00000000000..72959d3c1d8 --- /dev/null +++ b/cmake/autogenerated_versions.txt @@ -0,0 +1,9 @@ +# This strings autochanged from release_lib.sh: +SET(VERSION_REVISION 54435) +SET(VERSION_MAJOR 20) +SET(VERSION_MINOR 5) +SET(VERSION_PATCH 1) +SET(VERSION_GITHASH 91df18a906dcffdbee6816e5389df6c65f86e35f) +SET(VERSION_DESCRIBE v20.5.1.1-prestable) +SET(VERSION_STRING 20.5.1.1) +# end of autochange diff --git a/cmake/find/grpc.cmake b/cmake/find/grpc.cmake index e19cac5cf5f..0019dbd5eed 100644 --- a/cmake/find/grpc.cmake +++ b/cmake/find/grpc.cmake @@ -1,57 +1,26 @@ -set(_PROTOBUF_PROTOC $) -set(_GRPC_CPP_PLUGIN_EXECUTABLE $) +option (ENABLE_GRPC "Use gRPC" ${ENABLE_LIBRARIES}) -function(PROTOBUF_GENERATE_GRPC_CPP SRCS HDRS) - if(NOT ARGN) - message(SEND_ERROR "Error: PROTOBUF_GENERATE_GRPC_CPP() called without any proto files") - return() - endif() +if (ENABLE_GRPC) + option (USE_INTERNAL_GRPC_LIBRARY "Set to FALSE to use system gRPC library instead of bundled" ${NOT_UNBUNDLED}) - if(PROTOBUF_GENERATE_CPP_APPEND_PATH) - foreach(FIL ${ARGN}) - get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(ABS_PATH ${ABS_FIL} PATH) - list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) - if(${_contains_already} EQUAL -1) - list(APPEND _protobuf_include_path -I ${ABS_PATH}) - endif() - endforeach() - else() - set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) - endif() + if (USE_INTERNAL_GRPC_LIBRARY) + if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/grpc/include/grpc++/grpc++.h") + message(WARNING "submodule contrib/grpc is missing. To fix try run: \n git submodule update --init --recursive") + set (USE_INTERNAL_GRPC_LIBRARY OFF) + elif (NOT USE_PROTOBUF) + message(WARNING "gRPC requires protobuf which is disabled") + set (USE_INTERNAL_GRPC_LIBRARY OFF) + else() + set (GRPC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/grpc/include") + set (GRPC_LIBRARY "libgrpc++") + set (USE_GRPC ON) + endif() + else() + find_package(grpc) + if (GRPC_INCLUDE_DIR AND GRPC_LIBRARY) + set (USE_GRPC ON) + endif() + endif() +endif() - if(DEFINED PROTOBUF_IMPORT_DIRS) - foreach(DIR ${Protobuf_IMPORT_DIRS}) - get_filename_component(ABS_PATH ${DIR} ABSOLUTE) - list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) - if(${_contains_already} EQUAL -1) - list(APPEND _protobuf_include_path -I ${ABS_PATH}) - endif() - endforeach() - endif() - - set(${SRCS}) - set(${HDRS}) - foreach(FIL ${ARGN}) - get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(FIL_WE ${FIL} NAME_WE) - - list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.cc") - list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.h") - - add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.cc" - "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.h" - COMMAND ${_PROTOBUF_PROTOC} - ARGS --grpc_out=${CMAKE_CURRENT_BINARY_DIR} - --plugin=protoc-gen-grpc=${_GRPC_CPP_PLUGIN_EXECUTABLE} - ${_protobuf_include_path} ${ABS_FIL} - DEPENDS ${ABS_FIL} - COMMENT "Running gRPC C++ protocol buffer compiler on ${FIL}" - VERBATIM) - endforeach() - - set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) - set(${SRCS} ${${SRCS}} PARENT_SCOPE) - set(${HDRS} ${${HDRS}} PARENT_SCOPE) -endfunction() +message(STATUS "Using gRPC=${USE_GRPC}: ${GRPC_INCLUDE_DIR} : ${GRPC_LIBRARY}") diff --git a/cmake/find/protobuf.cmake b/cmake/find/protobuf.cmake index 5f686cfd96e..51110d3b6a3 100644 --- a/cmake/find/protobuf.cmake +++ b/cmake/find/protobuf.cmake @@ -28,68 +28,6 @@ elseif(NOT MISSING_INTERNAL_PROTOBUF_LIBRARY) set(Protobuf_LITE_LIBRARY libprotobuf-lite) set(Protobuf_PROTOC_EXECUTABLE "$") - - if(NOT DEFINED PROTOBUF_GENERATE_CPP_APPEND_PATH) - set(PROTOBUF_GENERATE_CPP_APPEND_PATH TRUE) - endif() - - function(PROTOBUF_GENERATE_CPP SRCS HDRS) - if(NOT ARGN) - message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP() called without any proto files") - return() - endif() - - if(PROTOBUF_GENERATE_CPP_APPEND_PATH) - # Create an include path for each file specified - foreach(FIL ${ARGN}) - get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(ABS_PATH ${ABS_FIL} PATH) - list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) - if(${_contains_already} EQUAL -1) - list(APPEND _protobuf_include_path -I ${ABS_PATH}) - endif() - endforeach() - else() - set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) - endif() - - if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) - set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") - endif() - - if(DEFINED Protobuf_IMPORT_DIRS) - foreach(DIR ${Protobuf_IMPORT_DIRS}) - get_filename_component(ABS_PATH ${DIR} ABSOLUTE) - list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) - if(${_contains_already} EQUAL -1) - list(APPEND _protobuf_include_path -I ${ABS_PATH}) - endif() - endforeach() - endif() - - set(${SRCS}) - set(${HDRS}) - foreach(FIL ${ARGN}) - get_filename_component(ABS_FIL ${FIL} ABSOLUTE) - get_filename_component(FIL_WE ${FIL} NAME_WE) - - list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc") - list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h") - - add_custom_command( - OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc" - "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h" - COMMAND ${Protobuf_PROTOC_EXECUTABLE} - ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL} - DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} - COMMENT "Running C++ protocol buffer compiler on ${FIL}" - VERBATIM ) - endforeach() - - set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) - set(${SRCS} ${${SRCS}} PARENT_SCOPE) - set(${HDRS} ${${HDRS}} PARENT_SCOPE) - endfunction() endif() if(OS_FREEBSD AND SANITIZE STREQUAL "address") @@ -102,6 +40,7 @@ if(OS_FREEBSD AND SANITIZE STREQUAL "address") endif() endif() +include (${ClickHouse_SOURCE_DIR}/cmake/protobuf_generate_cpp.cmake) endif() message(STATUS "Using protobuf=${USE_PROTOBUF}: ${Protobuf_INCLUDE_DIR} : ${Protobuf_LIBRARY} : ${Protobuf_PROTOC_EXECUTABLE}") diff --git a/cmake/protobuf_generate_cpp.cmake b/cmake/protobuf_generate_cpp.cmake new file mode 100644 index 00000000000..7ee9e8d7c81 --- /dev/null +++ b/cmake/protobuf_generate_cpp.cmake @@ -0,0 +1,171 @@ +# This file declares functions adding custom commands for generating C++ files from *.proto files: +# function (protobuf_generate_cpp SRCS HDRS) +# function (protobuf_generate_grpc_cpp SRCS HDRS) + +if (NOT USE_PROTOBUF) + message (WARNING "Could not use protobuf_generate_cpp() without the protobuf library") + return() +endif() + +if (NOT DEFINED PROTOBUF_PROTOC_EXECUTABLE) + set (PROTOBUF_PROTOC_EXECUTABLE "$") +endif() + +if (NOT DEFINED GRPC_CPP_PLUGIN_EXECUTABLE) + set (GRPC_CPP_PLUGIN_EXECUTABLE $) +endif() + +if (NOT DEFINED PROTOBUF_GENERATE_CPP_APPEND_PATH) + set (PROTOBUF_GENERATE_CPP_APPEND_PATH TRUE) +endif() + + +function(protobuf_generate_cpp_impl SRCS HDRS MODES OUTPUT_FILE_EXTS PLUGIN) + if(NOT ARGN) + message(SEND_ERROR "Error: protobuf_generate_cpp() called without any proto files") + return() + endif() + + if(PROTOBUF_GENERATE_CPP_APPEND_PATH) + # Create an include path for each file specified + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(ABS_PATH ${ABS_FIL} PATH) + list(FIND protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND protobuf_include_path -I ${ABS_PATH}) + endif() + endforeach() + else() + set(protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) + endif() + + if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) + set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") + endif() + + if(DEFINED Protobuf_IMPORT_DIRS) + foreach(DIR ${Protobuf_IMPORT_DIRS}) + get_filename_component(ABS_PATH ${DIR} ABSOLUTE) + list(FIND protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND protobuf_include_path -I ${ABS_PATH}) + endif() + endforeach() + endif() + + set (intermediate_dir ${CMAKE_CURRENT_BINARY_DIR}/intermediate) + + set (protoc_args) + foreach (mode ${MODES}) + list (APPEND protoc_args "--${mode}_out" ${intermediate_dir}) + endforeach() + if (PLUGIN) + list (APPEND protoc_args "--plugin=${PLUGIN}") + endif() + + set(srcs) + set(hdrs) + set(all_intermediate_outputs) + + foreach(input_name ${ARGN}) + get_filename_component(abs_name ${input_name} ABSOLUTE) + get_filename_component(name ${input_name} NAME_WE) + + set (intermediate_outputs) + foreach (ext ${OUTPUT_FILE_EXTS}) + set (filename "${name}${ext}") + set (output "${CMAKE_CURRENT_BINARY_DIR}/${filename}") + set (intermediate_output "${intermediate_dir}/${filename}") + list (APPEND intermediate_outputs "${intermediate_output}") + list (APPEND all_intermediate_outputs "${intermediate_output}") + + if (${ext} MATCHES ".*\\.h") + list(APPEND hdrs "${output}") + else() + list(APPEND srcs "${output}") + endif() + + add_custom_command( + OUTPUT ${output} + COMMAND ${CMAKE_COMMAND} -DPROTOBUF_GENERATE_CPP_SCRIPT_MODE=1 -DUSE_PROTOBUF=1 -DDIR=${CMAKE_CURRENT_BINARY_DIR} -DFILENAME=${filename} -DCOMPILER_ID=${CMAKE_CXX_COMPILER_ID} -P ${ClickHouse_SOURCE_DIR}/cmake/protobuf_generate_cpp.cmake + DEPENDS ${intermediate_output}) + endforeach() + + add_custom_command( + OUTPUT ${intermediate_outputs} + COMMAND ${Protobuf_PROTOC_EXECUTABLE} + ARGS ${protobuf_include_path} ${protoc_args} ${abs_name} + DEPENDS ${abs_name} ${Protobuf_PROTOC_EXECUTABLE} ${PLUGIN} + COMMENT "Running C++ protocol buffer compiler on ${name}" + VERBATIM ) + endforeach() + + set_source_files_properties(${srcs} ${hdrs} ${all_intermediate_outputs} PROPERTIES GENERATED TRUE) + set(${SRCS} ${srcs} PARENT_SCOPE) + set(${HDRS} ${hdrs} PARENT_SCOPE) +endfunction() + + +if (PROTOBUF_GENERATE_CPP_SCRIPT_MODE) + set (output "${DIR}/${FILENAME}") + set (intermediate_dir ${DIR}/intermediate) + set (intermediate_output "${intermediate_dir}/${FILENAME}") + + if (COMPILER_ID STREQUAL "Clang") + set (pragma_push "#pragma clang diagnostic push\n") + set (pragma_pop "#pragma clang diagnostic pop\n") + set (pragma_disable_warnings "#pragma clang diagnostic ignored \"-Weverything\"\n") + elseif (COMPILER_ID MATCHES "GNU") + set (pragma_push "#pragma GCC diagnostic push\n") + set (pragma_pop "#pragma GCC diagnostic pop\n") + set (pragma_disable_warnings "#pragma GCC diagnostic ignored \"-Wall\"\n" + "#pragma GCC diagnostic ignored \"-Wextra\"\n" + "#pragma GCC diagnostic ignored \"-Warray-bounds\"\n" + "#pragma GCC diagnostic ignored \"-Wold-style-cast\"\n" + "#pragma GCC diagnostic ignored \"-Wshadow\"\n" + "#pragma GCC diagnostic ignored \"-Wsuggest-override\"\n" + "#pragma GCC diagnostic ignored \"-Wcast-qual\"\n" + "#pragma GCC diagnostic ignored \"-Wunused-parameter\"\n") + endif() + + if (${FILENAME} MATCHES ".*\\.h") + file(WRITE "${output}" + "#pragma once\n" + ${pragma_push} + ${pragma_disable_warnings} + "#include \"${intermediate_output}\"\n" + ${pragma_pop} + ) + else() + file(WRITE "${output}" + ${pragma_disable_warnings} + "#include \"${intermediate_output}\"\n" + ) + endif() + return() +endif() + + +function(protobuf_generate_cpp SRCS HDRS) + set (modes cpp) + set (output_file_exts ".pb.cc" ".pb.h") + set (plugin) + + protobuf_generate_cpp_impl(srcs hdrs "${modes}" "${output_file_exts}" "${plugin}" ${ARGN}) + + set(${SRCS} ${srcs} PARENT_SCOPE) + set(${HDRS} ${hdrs} PARENT_SCOPE) +endfunction() + + +function(protobuf_generate_grpc_cpp SRCS HDRS) + set (modes cpp grpc) + set (output_file_exts ".pb.cc" ".pb.h" ".grpc.pb.cc" ".grpc.pb.h") + set (plugin "protoc-gen-grpc=${GRPC_CPP_PLUGIN_EXECUTABLE}") + + protobuf_generate_cpp_impl(srcs hdrs "${modes}" "${output_file_exts}" "${plugin}" ${ARGN}) + + set(${SRCS} ${srcs} PARENT_SCOPE) + set(${HDRS} ${hdrs} PARENT_SCOPE) +endfunction() diff --git a/cmake/version.cmake b/cmake/version.cmake index 3f51f59cf0f..eea17f68c47 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -1,12 +1,4 @@ -# This strings autochanged from release_lib.sh: -set(VERSION_REVISION 54435) -set(VERSION_MAJOR 20) -set(VERSION_MINOR 5) -set(VERSION_PATCH 1) -set(VERSION_GITHASH 91df18a906dcffdbee6816e5389df6c65f86e35f) -set(VERSION_DESCRIBE v20.5.1.1-prestable) -set(VERSION_STRING 20.5.1.1) -# end of autochange +include(${CMAKE_SOURCE_DIR}/cmake/autogenerated_versions.txt) set(VERSION_EXTRA "" CACHE STRING "") set(VERSION_TWEAK "" CACHE STRING "") diff --git a/cmake/yandex/ya.make.versions.inc b/cmake/yandex/ya.make.versions.inc new file mode 100644 index 00000000000..3ac401cb108 --- /dev/null +++ b/cmake/yandex/ya.make.versions.inc @@ -0,0 +1,25 @@ +INCLUDE(${ARCADIA_ROOT}/clickhouse/cmake/autogenerated_versions.txt) + +# TODO: not sure if this is customizable per-binary +SET(VERSION_NAME "ClickHouse") + +# TODO: not quite sure how to replace dash with space in ya.make +SET(VERSION_FULL "${VERSION_NAME}-${VERSION_STRING}") + +CFLAGS (GLOBAL -DDBMS_NAME=\"ClickHouse\") +CFLAGS (GLOBAL -DDBMS_VERSION_MAJOR=${VERSION_MAJOR}) +CFLAGS (GLOBAL -DDBMS_VERSION_MINOR=${VERSION_MINOR}) +CFLAGS (GLOBAL -DDBMS_VERSION_PATCH=${VERSION_PATCH}) +CFLAGS (GLOBAL -DVERSION_FULL=\"\\\"${VERSION_FULL}\\\"\") +CFLAGS (GLOBAL -DVERSION_MAJOR=${VERSION_MAJOR}) +CFLAGS (GLOBAL -DVERSION_MINOR=${VERSION_MINOR}) +CFLAGS (GLOBAL -DVERSION_PATCH=${VERSION_PATCH}) + +# TODO: not supported yet, not sure if ya.make supports arithmetics. +CFLAGS (GLOBAL -DVERSION_INTEGER=0) + +CFLAGS (GLOBAL -DVERSION_NAME=\"\\\"${VERSION_NAME}\\\"\") +CFLAGS (GLOBAL -DVERSION_OFFICIAL=\"-arcadia\") +CFLAGS (GLOBAL -DVERSION_REVISION=${VERSION_REVISION}) +CFLAGS (GLOBAL -DVERSION_STRING=\"\\\"${VERSION_STRING}\\\"\") + diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1d1d7756de2..ea13969db16 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -21,7 +21,6 @@ add_subdirectory (consistent-hashing-sumbur) add_subdirectory (consistent-hashing) add_subdirectory (croaring) add_subdirectory (FastMemcpy) -add_subdirectory (grpc-cmake) add_subdirectory (jemalloc-cmake) add_subdirectory (libcpuid-cmake) add_subdirectory (murmurhash) @@ -260,20 +259,17 @@ if (USE_INTERNAL_BROTLI_LIBRARY) endif () if (USE_INTERNAL_PROTOBUF_LIBRARY) - if (MAKE_STATIC_LIBRARIES) - set(protobuf_BUILD_SHARED_LIBS OFF CACHE INTERNAL "" FORCE) - else () - set(protobuf_BUILD_SHARED_LIBS ON CACHE INTERNAL "" FORCE) - endif () - set(protobuf_WITH_ZLIB 0 CACHE INTERNAL "" FORCE) # actually will use zlib, but skip find - set(protobuf_BUILD_TESTS OFF CACHE INTERNAL "" FORCE) - add_subdirectory(protobuf/cmake) + add_subdirectory(protobuf-cmake) endif () if (USE_INTERNAL_HDFS3_LIBRARY) add_subdirectory(libhdfs3-cmake) endif () +if (USE_INTERNAL_GRPC_LIBRARY) + add_subdirectory(grpc-cmake) +endif () + if (USE_INTERNAL_AWS_S3_LIBRARY OR USE_SENTRY) set (save_CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) set (save_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index df9fd6aa61c..0837a366f20 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -618,7 +618,12 @@ if (USE_INTERNAL_CCTZ) add_library(tzdata STATIC ${TZ_OBJS}) set_target_properties(tzdata PROPERTIES LINKER_LANGUAGE C) - target_link_libraries(cctz -Wl,--whole-archive tzdata -Wl,--no-whole-archive) # whole-archive prevents symbols from being discarded + # whole-archive prevents symbols from being discarded for unknown reason + # CMake can shuffle each of target_link_libraries arguments with other + # libraries in linker command. To avoid this we hardcode whole-archive + # library into single string. + add_dependencies(cctz tzdata) + target_link_libraries(cctz INTERFACE "-Wl,--whole-archive $ -Wl,--no-whole-archive") endif () else () diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index 0180c0c1d31..a70c155da53 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -4,6 +4,8 @@ cmake_minimum_required(VERSION 3.5.1) set(GRPC_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/grpc) set(GRPC_INCLUDE_DIR ${GRPC_SOURCE_DIR}/include/) set(GRPC_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/grpc) + + if(UNIX) if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") set(_gRPC_PLATFORM_LINUX ON) @@ -14,72 +16,28 @@ if(UNIX) endif() endif() -set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) - -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${_gRPC_C_CXX_FLAGS} -w") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_gRPC_C_CXX_FLAGS} -w") - -set(_gRPC_PROTOBUF_LIBRARY_NAME "libprotobuf") - -if(gRPC_BACKWARDS_COMPATIBILITY_MODE) - add_definitions(-DGPR_BACKWARDS_COMPATIBILITY_MODE) - if (_gRPC_PLATFORM_MAC) - # some C++11 constructs not supported before OS X 10.9 - set(CMAKE_OSX_DEPLOYMENT_TARGET 10.9) - endif() +if(_gRPC_PLATFORM_MAC) + set(_gRPC_ALLTARGETS_LIBRARIES ${CMAKE_DL_LIBS} m pthread) +elseif(UNIX) + set(_gRPC_ALLTARGETS_LIBRARIES ${CMAKE_DL_LIBS} rt m pthread) endif() -if (_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC) - set(_gRPC_CORE_NOSTDCXX_FLAGS -fno-exceptions -fno-rtti) -else() - set(_gRPC_CORE_NOSTDCXX_FLAGS "") -endif() # address_sorting.cmake -include(${GRPC_SOURCE_DIR}/cmake/address_sorting.cmake) set(_gRPC_ADDRESS_SORTING_INCLUDE_DIR "${GRPC_SOURCE_DIR}/third_party/address_sorting/include") set(_gRPC_ADDRESS_SORTING_LIBRARIES address_sorting) # cares.cmake set(CARES_ROOT_DIR ${GRPC_SOURCE_DIR}/third_party/cares/cares) set(CARES_BINARY_DIR ${GRPC_BINARY_DIR}/third_party/cares/cares) -set(CARES_SHARED OFF CACHE BOOL "disable shared library") -set(CARES_STATIC ON CACHE BOOL "link cares statically") -if(gRPC_BACKWARDS_COMPATIBILITY_MODE) - # See https://github.com/grpc/grpc/issues/17255 - set(HAVE_LIBNSL OFF CACHE BOOL "avoid cares dependency on libnsl") +set(CARES_SHARED ${BUILD_SHARED_LIBS} CACHE BOOL "" FORCE) +if(BUILD_SHARED_LIBS) + set(CARES_STATIC OFF CACHE BOOL "" FORCE) +else() + set(CARES_STATIC ON CACHE BOOL "" FORCE) endif() +set(_gRPC_CARES_LIBRARIES c-ares) add_subdirectory(${CARES_ROOT_DIR} ${CARES_BINARY_DIR}) -if(TARGET c-ares) - set(_gRPC_CARES_LIBRARIES c-ares) -endif() - -# protobuf.cmake -set(PROTOBUF_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../protobuf) - -set(protobuf_BUILD_TESTS OFF CACHE BOOL "Build protobuf tests") -if(NOT protobuf_WITH_ZLIB) - set(protobuf_WITH_ZLIB OFF CACHE BOOL "Build protobuf with zlib.") -endif() -set(protobuf_MSVC_STATIC_RUNTIME OFF CACHE BOOL "Link static runtime libraries") - -set(_gRPC_PROTOBUF_LIBRARIES libprotobuf) -set(_gRPC_PROTOBUF_PROTOC_LIBRARIES libprotoc) -set(_gRPC_PROTOBUF_PROTOC protoc) -set(_gRPC_PROTOBUF_PROTOC_EXECUTABLE $) -set(_gRPC_PROTOBUF_INCLUDE_DIR "${PROTOBUF_ROOT_DIR}/src") - -if(gRPC_INSTALL) - message(WARNING "gRPC_INSTALL will be forced to FALSE because gRPC_PROTOBUF_PROVIDER is \"module\"") - set(gRPC_INSTALL FALSE) -endif() - -# ssl.cmake -set(BORINGSSL_ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../ssl) -if(TARGET ssl) - set(_gRPC_SSL_LIBRARIES ssl) - set(_gRPC_SSL_INCLUDE_DIR ${BORINGSSL_ROOT_DIR}/include) -endif() # upb.cmake set(UPB_ROOT_DIR ${GRPC_SOURCE_DIR}/third_party/upb) @@ -87,23 +45,20 @@ set(_gRPC_UPB_INCLUDE_DIR "${UPB_ROOT_DIR}") set(_gRPC_UPB_GRPC_GENERATED_DIR "${GRPC_SOURCE_DIR}/src/core/ext/upb-generated") set(_gRPC_UPB_LIBRARIES upb) +# protobuf.cmake +set(_gRPC_PROTOBUF_INCLUDE_DIR ${Protobuf_INCLUDE_DIR}) +set(_gRPC_PROTOBUF_LIBRARIES ${Protobuf_LIBRARY}) +set(_gRPC_PROTOBUF_PROTOC_LIBRARIES ${Protobuf_PROTOC_LIBRARY}) +set(_gRPC_PROTOBUF_PROTOC_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE}) + +# ssl.cmake +set(_gRPC_SSL_INCLUDE_DIR ${OPENSSL_INCLUDE_DIR}) +set(_gRPC_SSL_LIBRARIES ${OPENSSL_SSL_LIBRARY}) + # zlib.cmake -set(ZLIB_ROOT_DIR ${GRPC_SOURCE_DIR}/../zlib-ng) -include_directories("${ZLIB_ROOT_DIR}") -## add_subdirectory(${ZLIB_ROOT_DIR} ${ZLIB_ROOT_DIR}) -if(TARGET zlibstatic) - set(_gRPC_ZLIB_LIBRARIES zlibstatic) - set(_gRPC_ZLIB_INCLUDE_DIR "${ZLIB_ROOT_DIR}" "${GRPC_SOURCE_DIR}/third_party/zlib") -endif() +set(_gRPC_ZLIB_INCLUDE_DIR ${ZLIB_INCLUDE_DIR}) +set(_gRPC_ZLIB_LIBRARIES ${ZLIB_LIBRARIES}) -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") - -if(_gRPC_PLATFORM_MAC) - set(_gRPC_ALLTARGETS_LIBRARIES ${CMAKE_DL_LIBS} m pthread) -elseif(UNIX) - set(_gRPC_ALLTARGETS_LIBRARIES ${CMAKE_DL_LIBS} rt m pthread) -endif() add_library(address_sorting ${GRPC_SOURCE_DIR}/third_party/address_sorting/address_sorting.c @@ -112,7 +67,6 @@ add_library(address_sorting ) target_include_directories(address_sorting - PUBLIC $ $ PRIVATE ${GRPC_SOURCE_DIR} PRIVATE ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR} PRIVATE ${_gRPC_CARES_INCLUDE_DIR} @@ -124,11 +78,11 @@ target_include_directories(address_sorting PRIVATE ${_gRPC_ZLIB_INCLUDE_DIR} ) target_link_libraries(address_sorting - ${_gRPC_BASELIB_LIBRARIES} ${_gRPC_PROTOBUF_LIBRARIES} ${_gRPC_ALLTARGETS_LIBRARIES} ) + add_library(gpr ${GRPC_SOURCE_DIR}/src/core/lib/gpr/alloc.cc ${GRPC_SOURCE_DIR}/src/core/lib/gpr/atm.cc @@ -184,6 +138,7 @@ target_include_directories(gpr PRIVATE ${_gRPC_UPB_INCLUDE_DIR} PRIVATE ${_gRPC_ZLIB_INCLUDE_DIR} ) + target_link_libraries(gpr ${_gRPC_ALLTARGETS_LIBRARIES} ${_gRPC_PROTOBUF_LIBRARIES} @@ -569,7 +524,6 @@ add_library(grpc ${GRPC_SOURCE_DIR}/src/core/plugin_registry/grpc_plugin_registry.cc ) -target_compile_options(grpc PUBLIC -fpermissive) target_include_directories(grpc PUBLIC ${GRPC_INCLUDE_DIR} @@ -583,8 +537,8 @@ target_include_directories(grpc PRIVATE ${_gRPC_UPB_INCLUDE_DIR} PRIVATE ${_gRPC_ZLIB_INCLUDE_DIR} ) + target_link_libraries(grpc - ${_gRPC_BASELIB_LIBRARIES} ${_gRPC_SSL_LIBRARIES} ${_gRPC_ZLIB_LIBRARIES} ${_gRPC_CARES_LIBRARIES} @@ -597,352 +551,6 @@ if (_gRPC_PLATFORM_MAC) target_link_libraries(grpc "-framework CoreFoundation") endif() -add_library(grpc_cronet - ${GRPC_SOURCE_DIR}/src/core/ext/transport/cronet/plugin_registry/grpc_cronet_plugin_registry.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/init.cc - ${GRPC_SOURCE_DIR}/src/core/lib/avl/avl.cc - ${GRPC_SOURCE_DIR}/src/core/lib/backoff/backoff.cc - ${GRPC_SOURCE_DIR}/src/core/lib/channel/channel_args.cc - ${GRPC_SOURCE_DIR}/src/core/lib/channel/channel_stack.cc - ${GRPC_SOURCE_DIR}/src/core/lib/channel/channel_stack_builder.cc - ${GRPC_SOURCE_DIR}/src/core/lib/channel/channel_trace.cc - ${GRPC_SOURCE_DIR}/src/core/lib/channel/channelz.cc - ${GRPC_SOURCE_DIR}/src/core/lib/channel/channelz_registry.cc - ${GRPC_SOURCE_DIR}/src/core/lib/channel/connected_channel.cc - ${GRPC_SOURCE_DIR}/src/core/lib/channel/handshaker.cc - ${GRPC_SOURCE_DIR}/src/core/lib/channel/handshaker_registry.cc - ${GRPC_SOURCE_DIR}/src/core/lib/channel/status_util.cc - ${GRPC_SOURCE_DIR}/src/core/lib/compression/compression.cc - ${GRPC_SOURCE_DIR}/src/core/lib/compression/compression_args.cc - ${GRPC_SOURCE_DIR}/src/core/lib/compression/compression_internal.cc - ${GRPC_SOURCE_DIR}/src/core/lib/compression/message_compress.cc - ${GRPC_SOURCE_DIR}/src/core/lib/compression/stream_compression.cc - ${GRPC_SOURCE_DIR}/src/core/lib/compression/stream_compression_gzip.cc - ${GRPC_SOURCE_DIR}/src/core/lib/compression/stream_compression_identity.cc - ${GRPC_SOURCE_DIR}/src/core/lib/debug/stats.cc - ${GRPC_SOURCE_DIR}/src/core/lib/debug/stats_data.cc - ${GRPC_SOURCE_DIR}/src/core/lib/http/format_request.cc - ${GRPC_SOURCE_DIR}/src/core/lib/http/httpcli.cc - ${GRPC_SOURCE_DIR}/src/core/lib/http/parser.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/buffer_list.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/call_combiner.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/cfstream_handle.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/combiner.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/endpoint.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/endpoint_cfstream.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/endpoint_pair_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/endpoint_pair_uv.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/endpoint_pair_windows.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/error.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/error_cfstream.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_epoll1_linux.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_epollex_linux.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_poll_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/ev_windows.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/exec_ctx.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/executor.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/executor/mpmcqueue.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/executor/threadpool.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/fork_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/fork_windows.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/gethostname_fallback.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/gethostname_host_name_max.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/gethostname_sysconf.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/grpc_if_nametoindex_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/grpc_if_nametoindex_unsupported.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/internal_errqueue.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/iocp_windows.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr_custom.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr_internal.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr_posix_cfstream.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr_uv.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/iomgr_windows.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/is_epollexclusive_available.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/load_file.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/lockfree_event.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/polling_entity.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset_custom.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset_set.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset_set_custom.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset_set_windows.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset_uv.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/pollset_windows.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/resolve_address.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/resolve_address_custom.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/resolve_address_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/resolve_address_windows.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/resource_quota.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/sockaddr_utils.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_factory_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_mutator.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_utils_common_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_utils_linux.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_utils_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_utils_uv.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_utils_windows.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/socket_windows.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_client.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_client_cfstream.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_client_custom.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_client_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_client_windows.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_custom.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_custom.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_utils_posix_common.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_utils_posix_ifaddrs.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_utils_posix_noifaddrs.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_server_windows.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_uv.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/tcp_windows.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/time_averaged_stats.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/timer.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/timer_custom.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/timer_generic.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/timer_heap.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/timer_manager.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/timer_uv.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/udp_server.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/unix_sockets_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/unix_sockets_posix_noop.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/wakeup_fd_eventfd.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/wakeup_fd_nospecial.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/wakeup_fd_pipe.cc - ${GRPC_SOURCE_DIR}/src/core/lib/iomgr/wakeup_fd_posix.cc - ${GRPC_SOURCE_DIR}/src/core/lib/json/json.cc - ${GRPC_SOURCE_DIR}/src/core/lib/json/json_reader.cc - ${GRPC_SOURCE_DIR}/src/core/lib/json/json_string.cc - ${GRPC_SOURCE_DIR}/src/core/lib/json/json_writer.cc - ${GRPC_SOURCE_DIR}/src/core/lib/slice/b64.cc - ${GRPC_SOURCE_DIR}/src/core/lib/slice/percent_encoding.cc - ${GRPC_SOURCE_DIR}/src/core/lib/slice/slice.cc - ${GRPC_SOURCE_DIR}/src/core/lib/slice/slice_buffer.cc - ${GRPC_SOURCE_DIR}/src/core/lib/slice/slice_intern.cc - ${GRPC_SOURCE_DIR}/src/core/lib/slice/slice_string_helpers.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/api_trace.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/byte_buffer.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/byte_buffer_reader.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/call.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/call_details.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/call_log_batch.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/channel.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/channel_init.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/channel_ping.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/channel_stack_type.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/completion_queue.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/completion_queue_factory.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/event_string.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/lame_client.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/metadata_array.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/server.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/validate_metadata.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/version.cc - ${GRPC_SOURCE_DIR}/src/core/lib/transport/bdp_estimator.cc - ${GRPC_SOURCE_DIR}/src/core/lib/transport/byte_stream.cc - ${GRPC_SOURCE_DIR}/src/core/lib/transport/connectivity_state.cc - ${GRPC_SOURCE_DIR}/src/core/lib/transport/error_utils.cc - ${GRPC_SOURCE_DIR}/src/core/lib/transport/metadata.cc - ${GRPC_SOURCE_DIR}/src/core/lib/transport/metadata_batch.cc - ${GRPC_SOURCE_DIR}/src/core/lib/transport/pid_controller.cc - ${GRPC_SOURCE_DIR}/src/core/lib/transport/static_metadata.cc - ${GRPC_SOURCE_DIR}/src/core/lib/transport/status_conversion.cc - ${GRPC_SOURCE_DIR}/src/core/lib/transport/status_metadata.cc - ${GRPC_SOURCE_DIR}/src/core/lib/transport/timeout_encoding.cc - ${GRPC_SOURCE_DIR}/src/core/lib/transport/transport.cc - ${GRPC_SOURCE_DIR}/src/core/lib/transport/transport_op_string.cc - ${GRPC_SOURCE_DIR}/src/core/lib/uri/uri_parser.cc - ${GRPC_SOURCE_DIR}/src/core/lib/debug/trace.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/cronet/client/secure/cronet_channel_create.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/cronet/transport/cronet_api_dummy.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/cronet/transport/cronet_transport.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/client/secure/secure_channel_create.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/bin_decoder.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/bin_encoder.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/chttp2_plugin.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/chttp2_transport.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/context_list.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/flow_control.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_data.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_goaway.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_ping.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_rst_stream.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_settings.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/frame_window_update.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/hpack_encoder.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/hpack_parser.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/hpack_table.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/http2_settings.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/huffsyms.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/incoming_metadata.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/parsing.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/stream_lists.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/stream_map.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/varint.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/transport/writing.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/alpn/alpn.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/http/client/http_client_filter.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/http/http_filters_plugin.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/http/message_compress/message_compress_filter.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/http/server/http_server_filter.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/backend_metric.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/backup_poller.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/channel_connectivity.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/client_channel.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/client_channel_channelz.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/client_channel_factory.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/client_channel_plugin.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/connector.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/global_subchannel_pool.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/health/health_check_client.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/http_connect_handshaker.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/http_proxy.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/lb_policy_registry.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/local_subchannel_pool.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/parse_address.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/proxy_mapper.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/proxy_mapper_registry.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver_registry.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolver_result_parsing.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/resolving_lb_policy.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/retry_throttle.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/server_address.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/service_config.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/subchannel.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/client_channel/subchannel_pool_interface.cc - ${GRPC_SOURCE_DIR}/src/core/ext/filters/deadline/deadline_filter.cc - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/health/v1/health.upb.c - ${GRPC_SOURCE_DIR}/third_party/upb/upb/decode.c - ${GRPC_SOURCE_DIR}/third_party/upb/upb/encode.c - ${GRPC_SOURCE_DIR}/third_party/upb/upb/msg.c - ${GRPC_SOURCE_DIR}/third_party/upb/upb/port.c - ${GRPC_SOURCE_DIR}/third_party/upb/upb/table.c - ${GRPC_SOURCE_DIR}/third_party/upb/upb/upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/udpa/data/orca/v1/orca_load_report.upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/gogoproto/gogo.upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/validate/validate.upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/api/annotations.upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/api/http.upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/any.upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/descriptor.upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/duration.upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/empty.upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/struct.upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/timestamp.upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/protobuf/wrappers.upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/google/rpc/status.upb.c - ${GRPC_SOURCE_DIR}/src/core/lib/http/httpcli_security_connector.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/context/security_context.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/alts_credentials.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/composite/composite_credentials.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/credentials.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/credentials_metadata.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/fake/fake_credentials.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/google_default/credentials_generic.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/google_default/google_default_credentials.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/iam/iam_credentials.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/jwt/json_token.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/jwt/jwt_credentials.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/jwt/jwt_verifier.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/local/local_credentials.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/oauth2/oauth2_credentials.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/plugin/plugin_credentials.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/ssl/ssl_credentials.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/tls/grpc_tls_credentials_options.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/tls/spiffe_credentials.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/security_connector/alts/alts_security_connector.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/security_connector/fake/fake_security_connector.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/security_connector/load_system_roots_fallback.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/security_connector/load_system_roots_linux.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/security_connector/local/local_security_connector.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/security_connector/security_connector.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/security_connector/ssl/ssl_security_connector.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/security_connector/ssl_utils.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/security_connector/ssl_utils_config.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/security_connector/tls/spiffe_security_connector.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/transport/client_auth_filter.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/transport/secure_endpoint.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/transport/security_handshaker.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/transport/server_auth_filter.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/transport/target_authority_table.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/transport/tsi_error.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/util/json_util.cc - ${GRPC_SOURCE_DIR}/src/core/lib/surface/init_secure.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/crypt/aes_gcm.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/crypt/gsec.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/frame_protector/alts_counter.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/frame_protector/alts_crypter.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/frame_protector/alts_frame_protector.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/frame_protector/alts_record_protocol_crypter_common.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/frame_protector/alts_seal_privacy_integrity_crypter.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/frame_protector/alts_unseal_privacy_integrity_crypter.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/frame_protector/frame_handler.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/handshaker/alts_handshaker_client.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/handshaker/alts_shared_resource.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/handshaker/alts_tsi_handshaker.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/zero_copy_frame_protector/alts_grpc_integrity_only_record_protocol.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/zero_copy_frame_protector/alts_grpc_privacy_integrity_record_protocol.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/zero_copy_frame_protector/alts_grpc_record_protocol_common.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/zero_copy_frame_protector/alts_iovec_record_protocol.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/zero_copy_frame_protector/alts_zero_copy_grpc_protector.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/check_gcp_environment.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/check_gcp_environment_linux.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/check_gcp_environment_no_op.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/check_gcp_environment_windows.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/grpc_alts_credentials_client_options.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/grpc_alts_credentials_options.cc - ${GRPC_SOURCE_DIR}/src/core/lib/security/credentials/alts/grpc_alts_credentials_server_options.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/handshaker/alts_tsi_utils.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/alts/handshaker/transport_security_common_api.cc - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/gcp/altscontext.upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/gcp/handshaker.upb.c - ${GRPC_SOURCE_DIR}/src/core/ext/upb-generated/src/proto/grpc/gcp/transport_security_common.upb.c - ${GRPC_SOURCE_DIR}/src/core/tsi/transport_security.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/client/insecure/channel_create.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/client/insecure/channel_create_posix.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/client/authority.cc - ${GRPC_SOURCE_DIR}/src/core/ext/transport/chttp2/client/chttp2_connector.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/fake_transport_security.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/local_transport_security.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/ssl/session_cache/ssl_session_boringssl.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/ssl/session_cache/ssl_session_cache.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/ssl/session_cache/ssl_session_openssl.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/ssl_transport_security.cc - ${GRPC_SOURCE_DIR}/src/core/tsi/transport_security_grpc.cc -) - -target_include_directories(grpc_cronet - PUBLIC ${GRPC_INCLUDE_DIR} - PRIVATE ${GRPC_SOURCE_DIR} - PRIVATE ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR} - PRIVATE ${_gRPC_CARES_INCLUDE_DIR} - PRIVATE ${_gRPC_PROTOBUF_INCLUDE_DIR} - PRIVATE ${_gRPC_SSL_INCLUDE_DIR} - PRIVATE ${_gRPC_UPB_GENERATED_DIR} - PRIVATE ${_gRPC_UPB_GRPC_GENERATED_DIR} - PRIVATE ${_gRPC_UPB_INCLUDE_DIR} - PRIVATE ${_gRPC_ZLIB_INCLUDE_DIR} -) -target_link_libraries(grpc_cronet - ${_gRPC_BASELIB_LIBRARIES} - ${_gRPC_SSL_LIBRARIES} - ${_gRPC_ZLIB_LIBRARIES} - ${_gRPC_CARES_LIBRARIES} - ${_gRPC_ADDRESS_SORTING_LIBRARIES} - ${_gRPC_ALLTARGETS_LIBRARIES} - ${_gRPC_PROTOBUF_LIBRARIES} - gpr -) -if (_gRPC_PLATFORM_MAC) - target_link_libraries(grpc_cronet "-framework CoreFoundation") -endif() add_library(grpc_unsecure ${GRPC_SOURCE_DIR}/src/core/lib/surface/init.cc @@ -1249,19 +857,18 @@ add_library(grpc_unsecure ${GRPC_SOURCE_DIR}/src/core/plugin_registry/grpc_unsecure_plugin_registry.cc ) - target_include_directories(grpc_unsecure PUBLIC ${GRPC_INCLUDE_DIR} PRIVATE ${GRPC_SOURCE_DIR} PRIVATE ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR} PRIVATE ${_gRPC_CARES_INCLUDE_DIR} PRIVATE ${_gRPC_PROTOBUF_INCLUDE_DIR} - PRIVATE ${_gRPC_SSL_INCLUDE_DIR} PRIVATE ${_gRPC_UPB_GENERATED_DIR} PRIVATE ${_gRPC_UPB_GRPC_GENERATED_DIR} PRIVATE ${_gRPC_UPB_INCLUDE_DIR} PRIVATE ${_gRPC_ZLIB_INCLUDE_DIR} ) + target_link_libraries(grpc_unsecure ${_gRPC_BASELIB_LIBRARIES} ${_gRPC_ZLIB_LIBRARIES} @@ -1271,10 +878,12 @@ target_link_libraries(grpc_unsecure ${_gRPC_PROTOBUF_LIBRARIES} gpr ) + if (_gRPC_PLATFORM_MAC) target_link_libraries(grpc_unsecure "-framework CoreFoundation") endif() + add_library(grpc++ ${GRPC_SOURCE_DIR}/src/cpp/client/insecure_credentials.cc ${GRPC_SOURCE_DIR}/src/cpp/client/secure_credentials.cc @@ -1331,8 +940,6 @@ add_library(grpc++ ${GRPC_SOURCE_DIR}/src/cpp/codegen/codegen_init.cc ) -target_compile_options(grpc++ PUBLIC -w) - target_include_directories(grpc++ PUBLIC ${GRPC_INCLUDE_DIR} PRIVATE ${GRPC_SOURCE_DIR} @@ -1344,10 +951,9 @@ target_include_directories(grpc++ PRIVATE ${_gRPC_UPB_GRPC_GENERATED_DIR} PRIVATE ${_gRPC_UPB_INCLUDE_DIR} PRIVATE ${_gRPC_ZLIB_INCLUDE_DIR} - PRIVATE ${_gRPC_PROTO_GENS_DIR} ) + target_link_libraries(grpc++ - ${_gRPC_BASELIB_LIBRARIES} ${_gRPC_SSL_LIBRARIES} ${_gRPC_PROTOBUF_LIBRARIES} ${_gRPC_ALLTARGETS_LIBRARIES} @@ -1355,6 +961,7 @@ target_link_libraries(grpc++ gpr ) + add_library(grpc++_unsecure ${GRPC_SOURCE_DIR}/src/cpp/client/insecure_credentials.cc ${GRPC_SOURCE_DIR}/src/cpp/common/insecure_create_auth_context.cc @@ -1404,21 +1011,19 @@ add_library(grpc++_unsecure ${GRPC_SOURCE_DIR}/src/cpp/codegen/codegen_init.cc ) -target_compile_options(grpc++_unsecure PUBLIC -w) - target_include_directories(grpc++_unsecure PUBLIC ${GRPC_INCLUDE_DIR} PRIVATE ${GRPC_SOURCE_DIR} PRIVATE ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR} PRIVATE ${_gRPC_CARES_INCLUDE_DIR} PRIVATE ${_gRPC_PROTOBUF_INCLUDE_DIR} - PRIVATE ${_gRPC_SSL_INCLUDE_DIR} PRIVATE ${_gRPC_UPB_GENERATED_DIR} PRIVATE ${_gRPC_UPB_GRPC_GENERATED_DIR} PRIVATE ${_gRPC_UPB_INCLUDE_DIR} PRIVATE ${_gRPC_ZLIB_INCLUDE_DIR} PRIVATE ${_gRPC_PROTO_GENS_DIR} ) + target_link_libraries(grpc++_unsecure ${_gRPC_BASELIB_LIBRARIES} ${_gRPC_PROTOBUF_LIBRARIES} @@ -1427,6 +1032,16 @@ target_link_libraries(grpc++_unsecure grpc_unsecure ) + +if (_gRPC_SSL_INCLUDE_DIR AND _gRPC_SSL_LIBRARIES) + add_library(libgrpc ALIAS grpc) + add_library(libgrpc++ ALIAS grpc++) +else() + add_library(libgrpc ALIAS grpc_unsecure) + add_library(libgrpc++ ALIAS grpc++_unsecure) +endif() + + add_library(grpc_plugin_support ${GRPC_SOURCE_DIR}/src/compiler/cpp_generator.cc ) @@ -1436,23 +1051,22 @@ target_include_directories(grpc_plugin_support PUBLIC ${GRPC_INCLUDE_DIR} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} PRIVATE ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR} - PRIVATE ${_gRPC_BENCHMARK_INCLUDE_DIR} PRIVATE ${_gRPC_CARES_INCLUDE_DIR} - PRIVATE ${_gRPC_GFLAGS_INCLUDE_DIR} PRIVATE ${_gRPC_PROTOBUF_INCLUDE_DIR} PRIVATE ${_gRPC_SSL_INCLUDE_DIR} PRIVATE ${_gRPC_UPB_GENERATED_DIR} PRIVATE ${_gRPC_UPB_GRPC_GENERATED_DIR} PRIVATE ${_gRPC_UPB_INCLUDE_DIR} PRIVATE ${_gRPC_ZLIB_INCLUDE_DIR} - PRIVATE ${_gRPC_PROTO_GENS_DIR} ) + target_link_libraries(grpc_plugin_support ${_gRPC_PROTOBUF_PROTOC_LIBRARIES} ${_gRPC_PROTOBUF_LIBRARIES} ${_gRPC_ALLTARGETS_LIBRARIES} ) + add_executable(grpc_cpp_plugin ${GRPC_SOURCE_DIR}/src/compiler/cpp_plugin.cc ) @@ -1461,16 +1075,13 @@ target_include_directories(grpc_cpp_plugin PRIVATE ${GRPC_SOURCE_DIR} PUBLIC ${GRPC_INCLUDE_DIR} PRIVATE ${_gRPC_ADDRESS_SORTING_INCLUDE_DIR} - PRIVATE ${_gRPC_BENCHMARK_INCLUDE_DIR} PRIVATE ${_gRPC_CARES_INCLUDE_DIR} - PRIVATE ${_gRPC_GFLAGS_INCLUDE_DIR} PRIVATE ${_gRPC_PROTOBUF_INCLUDE_DIR} PRIVATE ${_gRPC_SSL_INCLUDE_DIR} PRIVATE ${_gRPC_UPB_GENERATED_DIR} PRIVATE ${_gRPC_UPB_GRPC_GENERATED_DIR} PRIVATE ${_gRPC_UPB_INCLUDE_DIR} PRIVATE ${_gRPC_ZLIB_INCLUDE_DIR} - PRIVATE ${_gRPC_PROTO_GENS_DIR} ) target_link_libraries(grpc_cpp_plugin @@ -1479,4 +1090,3 @@ target_link_libraries(grpc_cpp_plugin ${_gRPC_ALLTARGETS_LIBRARIES} grpc_plugin_support ) - diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 9081de593b8..63c5a5e66ea 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -17,7 +17,13 @@ if (ENABLE_JEMALLOC) # # By enabling percpu_arena number of arenas limited to number of CPUs and hence # this problem should go away. - set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu" CACHE STRING "Change default configuration string of JEMalloc" ) + set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0") + # CACHE variable is empty, to allow changing defaults without necessity + # to purge cache + set (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE "" CACHE STRING "Change default configuration string of JEMalloc" ) + if (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE) + set (JEMALLOC_CONFIG_MALLOC_CONF "${JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE}") + endif() message (STATUS "jemalloc malloc_conf: ${JEMALLOC_CONFIG_MALLOC_CONF}") set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/jemalloc") @@ -55,6 +61,7 @@ if (ENABLE_JEMALLOC) ${LIBRARY_DIR}/src/ticker.c ${LIBRARY_DIR}/src/tsd.c ${LIBRARY_DIR}/src/witness.c + ${LIBRARY_DIR}/src/safety_check.c ) if (OS_DARWIN) list(APPEND SRCS ${LIBRARY_DIR}/src/zone.c) @@ -89,6 +96,8 @@ if (ENABLE_JEMALLOC) endif () target_compile_options(jemalloc PRIVATE -Wno-redundant-decls) + # for RTLD_NEXT + target_compile_options(jemalloc PRIVATE -D_GNU_SOURCE) else () find_library(LIBRARY_JEMALLOC jemalloc) find_path(INCLUDE_JEMALLOC jemalloc/jemalloc.h) diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_defs.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h similarity index 84% rename from contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_defs.h rename to contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h index d1389237a77..6a03a231a0e 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_defs.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_defs.h @@ -5,6 +5,12 @@ /* Defined if alloc_size attribute is supported. */ #define JEMALLOC_HAVE_ATTR_ALLOC_SIZE +/* Defined if format_arg(...) attribute is supported. */ +#define JEMALLOC_HAVE_ATTR_FORMAT_ARG + +/* Defined if format(gnu_printf, ...) attribute is supported. */ +#define JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF + /* Defined if format(printf, ...) attribute is supported. */ #define JEMALLOC_HAVE_ATTR_FORMAT_PRINTF diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_macros.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_macros.h similarity index 87% rename from contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_macros.h rename to contrib/jemalloc-cmake/include/jemalloc/jemalloc_macros.h index 7432f1cda53..34235894285 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_macros.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_macros.h @@ -4,12 +4,13 @@ #include #include -#define JEMALLOC_VERSION "5.1.0-56-g41b7372eadee941b9164751b8d4963f915d3ceae" +#define JEMALLOC_VERSION "5.2.1-0-gea6b3e973b477b8061e0076bb257dbd7f3faa756" #define JEMALLOC_VERSION_MAJOR 5 -#define JEMALLOC_VERSION_MINOR 1 -#define JEMALLOC_VERSION_BUGFIX 0 -#define JEMALLOC_VERSION_NREV 56 -#define JEMALLOC_VERSION_GID "41b7372eadee941b9164751b8d4963f915d3ceae" +#define JEMALLOC_VERSION_MINOR 2 +#define JEMALLOC_VERSION_BUGFIX 1 +#define JEMALLOC_VERSION_NREV 0 +#define JEMALLOC_VERSION_GID "ea6b3e973b477b8061e0076bb257dbd7f3faa756" +#define JEMALLOC_VERSION_GID_IDENT ea6b3e973b477b8061e0076bb257dbd7f3faa756 #define MALLOCX_LG_ALIGN(la) ((int)(la)) #if LG_SIZEOF_PTR == 2 @@ -68,6 +69,7 @@ # define JEMALLOC_EXPORT __declspec(dllimport) # endif # endif +# define JEMALLOC_FORMAT_ARG(i) # define JEMALLOC_FORMAT_PRINTF(s, i) # define JEMALLOC_NOINLINE __declspec(noinline) # ifdef __cplusplus @@ -95,6 +97,11 @@ # ifndef JEMALLOC_EXPORT # define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) # endif +# ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG +# define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3)) +# else +# define JEMALLOC_FORMAT_ARG(i) +# endif # ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF # define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) # elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_protos.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h similarity index 100% rename from contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_protos.h rename to contrib/jemalloc-cmake/include/jemalloc/jemalloc_protos.h diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_rename.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_rename.h index a2ea2dd3533..2e94f7a0cc3 100644 --- a/contrib/jemalloc-cmake/include/jemalloc/jemalloc_rename.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_rename.h @@ -17,6 +17,7 @@ # define je_malloc_stats_print malloc_stats_print # define je_malloc_usable_size malloc_usable_size # define je_mallocx mallocx +# define je_smallocx_ea6b3e973b477b8061e0076bb257dbd7f3faa756 smallocx_ea6b3e973b477b8061e0076bb257dbd7f3faa756 # define je_nallocx nallocx # define je_posix_memalign posix_memalign # define je_rallocx rallocx diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_typedefs.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_typedefs.h similarity index 88% rename from contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_typedefs.h rename to contrib/jemalloc-cmake/include/jemalloc/jemalloc_typedefs.h index 1a58874306e..5f94f16f937 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_typedefs.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc_typedefs.h @@ -65,13 +65,13 @@ typedef bool (extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t, bool, unsigned); struct extent_hooks_s { - extent_alloc_t *alloc; - extent_dalloc_t *dalloc; - extent_destroy_t *destroy; - extent_commit_t *commit; - extent_decommit_t *decommit; - extent_purge_t *purge_lazy; - extent_purge_t *purge_forced; - extent_split_t *split; - extent_merge_t *merge; + extent_alloc_t *alloc; + extent_dalloc_t *dalloc; + extent_destroy_t *destroy; + extent_commit_t *commit; + extent_decommit_t *decommit; + extent_purge_t *purge_lazy; + extent_purge_t *purge_forced; + extent_split_t *split; + extent_merge_t *merge; }; diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in index 089f1a773aa..cbd2740e1f1 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -1,12 +1,6 @@ /* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ #ifndef JEMALLOC_INTERNAL_DEFS_H_ #define JEMALLOC_INTERNAL_DEFS_H_ - - -#ifndef _GNU_SOURCE - #define _GNU_SOURCE -#endif - /* * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all * public APIs to be prefixed. This makes it possible, with some care, to use @@ -25,7 +19,7 @@ #define JEMALLOC_OVERRIDE___LIBC_MEMALIGN #define JEMALLOC_OVERRIDE___LIBC_REALLOC #define JEMALLOC_OVERRIDE___LIBC_VALLOC -#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ /* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. @@ -41,7 +35,7 @@ */ #define CPU_SPINWAIT /* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ -#define HAVE_CPU_SPINWAIT 0 +#define HAVE_CPU_SPINWAIT 9 /* * Number of significant bits in virtual addresses. This may be less than the @@ -55,25 +49,13 @@ /* Defined if GCC __atomic atomics are available. */ #define JEMALLOC_GCC_ATOMIC_ATOMICS 1 +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1 /* Defined if GCC __sync atomics are available. */ #define JEMALLOC_GCC_SYNC_ATOMICS 1 - -/* - * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and - * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the - * functions are defined in libgcc instead of being inlines). - */ -/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 */ - -/* - * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and - * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the - * functions are defined in libgcc instead of being inlines). - */ -/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 */ +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1 /* * Defined if __builtin_clz() and __builtin_clzl() are available. @@ -85,19 +67,13 @@ */ /* #undef JEMALLOC_OS_UNFAIR_LOCK */ -/* - * Defined if OSSpin*() functions are available, as provided by Darwin, and - * documented in the spinlock(3) manual page. - */ -/* #undef JEMALLOC_OSSPIN */ - /* Defined if syscall(2) is usable. */ #define JEMALLOC_USE_SYSCALL /* * Defined if secure_getenv(3) is available. */ -#define JEMALLOC_HAVE_SECURE_GETENV +// #define JEMALLOC_HAVE_SECURE_GETENV /* * Defined if issetugid(2) is available. @@ -243,6 +219,12 @@ #define JEMALLOC_INTERNAL_FFSL __builtin_ffsl #define JEMALLOC_INTERNAL_FFS __builtin_ffs +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + /* * If defined, explicitly attempt to more uniformly distribute large allocation * pointer alignments across all cache indices. @@ -297,7 +279,7 @@ * MADV_FREE, though typically with higher * system overhead. */ -// #define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_FREE #define JEMALLOC_PURGE_MADVISE_DONTNEED #define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS @@ -379,4 +361,7 @@ */ #define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h index c150785fb4a..e5e34925b55 100644 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h +++ b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/internal/jemalloc_preamble.h @@ -21,7 +21,7 @@ # include "jemalloc/jemalloc.h" #endif -#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) +#if defined(JEMALLOC_OSATOMIC) #include #endif @@ -161,7 +161,26 @@ static const bool config_log = false #endif ; -#ifdef JEMALLOC_HAVE_SCHED_GETCPU +/* + * Are extra safety checks enabled; things like checking the size of sized + * deallocations, double-frees, etc. + */ +static const bool config_opt_safety_checks = +#ifdef JEMALLOC_OPT_SAFETY_CHECKS + true +#elif defined(JEMALLOC_DEBUG) + /* + * This lets us only guard safety checks by one flag instead of two; fast + * checks can guard solely by config_opt_safety_checks and run in debug mode + * too. + */ + true +#else + false +#endif + ; + +#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU) /* Currently percpu_arena depends on sched_getcpu. */ #define JEMALLOC_PERCPU_ARENA #endif diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h deleted file mode 100644 index 79b13337fbb..00000000000 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_macros.h +++ /dev/null @@ -1,123 +0,0 @@ -#include -#include -#include -#include -#include - -#define JEMALLOC_VERSION "5.1.0-97-gcd2931ad9bbd78208565716ab102e86d858c2fff" -#define JEMALLOC_VERSION_MAJOR 5 -#define JEMALLOC_VERSION_MINOR 1 -#define JEMALLOC_VERSION_BUGFIX 0 -#define JEMALLOC_VERSION_NREV 97 -#define JEMALLOC_VERSION_GID "cd2931ad9bbd78208565716ab102e86d858c2fff" -#define JEMALLOC_VERSION_GID_IDENT cd2931ad9bbd78208565716ab102e86d858c2fff - -#define MALLOCX_LG_ALIGN(la) ((int)(la)) -#if LG_SIZEOF_PTR == 2 -# define MALLOCX_ALIGN(a) ((int)(ffs((int)(a))-1)) -#else -# define MALLOCX_ALIGN(a) \ - ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 : \ - ffs((int)(((size_t)(a))>>32))+31)) -#endif -#define MALLOCX_ZERO ((int)0x40) -/* - * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1 - * encodes MALLOCX_TCACHE_NONE. - */ -#define MALLOCX_TCACHE(tc) ((int)(((tc)+2) << 8)) -#define MALLOCX_TCACHE_NONE MALLOCX_TCACHE(-1) -/* - * Bias arena index bits so that 0 encodes "use an automatically chosen arena". - */ -#define MALLOCX_ARENA(a) ((((int)(a))+1) << 20) - -/* - * Use as arena index in "arena..{purge,decay,dss}" and - * "stats.arenas..*" mallctl interfaces to select all arenas. This - * definition is intentionally specified in raw decimal format to support - * cpp-based string concatenation, e.g. - * - * #define STRINGIFY_HELPER(x) #x - * #define STRINGIFY(x) STRINGIFY_HELPER(x) - * - * mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL, - * 0); - */ -#define MALLCTL_ARENAS_ALL 4096 -/* - * Use as arena index in "stats.arenas..*" mallctl interfaces to select - * destroyed arenas. - */ -#define MALLCTL_ARENAS_DESTROYED 4097 - -#if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW) -# define JEMALLOC_CXX_THROW throw() -#else -# define JEMALLOC_CXX_THROW -#endif - -#if defined(_MSC_VER) -# define JEMALLOC_ATTR(s) -# define JEMALLOC_ALIGNED(s) __declspec(align(s)) -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# ifndef JEMALLOC_EXPORT -# ifdef DLLEXPORT -# define JEMALLOC_EXPORT __declspec(dllexport) -# else -# define JEMALLOC_EXPORT __declspec(dllimport) -# endif -# endif -# define JEMALLOC_FORMAT_PRINTF(s, i) -# define JEMALLOC_NOINLINE __declspec(noinline) -# ifdef __cplusplus -# define JEMALLOC_NOTHROW __declspec(nothrow) -# else -# define JEMALLOC_NOTHROW -# endif -# define JEMALLOC_SECTION(s) __declspec(allocate(s)) -# define JEMALLOC_RESTRICT_RETURN __declspec(restrict) -# if _MSC_VER >= 1900 && !defined(__EDG__) -# define JEMALLOC_ALLOCATOR __declspec(allocator) -# else -# define JEMALLOC_ALLOCATOR -# endif -#elif defined(JEMALLOC_HAVE_ATTR) -# define JEMALLOC_ATTR(s) __attribute__((s)) -# define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s)) -# ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE -# define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s)) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2)) -# else -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# endif -# ifndef JEMALLOC_EXPORT -# define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default")) -# endif -# ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i)) -# elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF) -# define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i)) -# else -# define JEMALLOC_FORMAT_PRINTF(s, i) -# endif -# define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline) -# define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow) -# define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s)) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR -#else -# define JEMALLOC_ATTR(s) -# define JEMALLOC_ALIGNED(s) -# define JEMALLOC_ALLOC_SIZE(s) -# define JEMALLOC_ALLOC_SIZE2(s1, s2) -# define JEMALLOC_EXPORT -# define JEMALLOC_FORMAT_PRINTF(s, i) -# define JEMALLOC_NOINLINE -# define JEMALLOC_NOTHROW -# define JEMALLOC_SECTION(s) -# define JEMALLOC_RESTRICT_RETURN -# define JEMALLOC_ALLOCATOR -#endif diff --git a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_typedefs.h b/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_typedefs.h deleted file mode 100644 index 1a58874306e..00000000000 --- a/contrib/jemalloc-cmake/include_linux_aarch64/jemalloc/jemalloc_typedefs.h +++ /dev/null @@ -1,77 +0,0 @@ -typedef struct extent_hooks_s extent_hooks_t; - -/* - * void * - * extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size, - * size_t alignment, bool *zero, bool *commit, unsigned arena_ind); - */ -typedef void *(extent_alloc_t)(extent_hooks_t *, void *, size_t, size_t, bool *, - bool *, unsigned); - -/* - * bool - * extent_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size, - * bool committed, unsigned arena_ind); - */ -typedef bool (extent_dalloc_t)(extent_hooks_t *, void *, size_t, bool, - unsigned); - -/* - * void - * extent_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size, - * bool committed, unsigned arena_ind); - */ -typedef void (extent_destroy_t)(extent_hooks_t *, void *, size_t, bool, - unsigned); - -/* - * bool - * extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size, - * size_t offset, size_t length, unsigned arena_ind); - */ -typedef bool (extent_commit_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - unsigned); - -/* - * bool - * extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size, - * size_t offset, size_t length, unsigned arena_ind); - */ -typedef bool (extent_decommit_t)(extent_hooks_t *, void *, size_t, size_t, - size_t, unsigned); - -/* - * bool - * extent_purge(extent_hooks_t *extent_hooks, void *addr, size_t size, - * size_t offset, size_t length, unsigned arena_ind); - */ -typedef bool (extent_purge_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - unsigned); - -/* - * bool - * extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size, - * size_t size_a, size_t size_b, bool committed, unsigned arena_ind); - */ -typedef bool (extent_split_t)(extent_hooks_t *, void *, size_t, size_t, size_t, - bool, unsigned); - -/* - * bool - * extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a, - * void *addr_b, size_t size_b, bool committed, unsigned arena_ind); - */ -typedef bool (extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t, - bool, unsigned); - -struct extent_hooks_s { - extent_alloc_t *alloc; - extent_dalloc_t *dalloc; - extent_destroy_t *destroy; - extent_commit_t *commit; - extent_decommit_t *decommit; - extent_purge_t *purge_lazy; - extent_purge_t *purge_forced; - extent_split_t *split; - extent_merge_t *merge; -}; diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in index 63f7f765023..d5cf0e719ef 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -1,11 +1,6 @@ /* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ #ifndef JEMALLOC_INTERNAL_DEFS_H_ #define JEMALLOC_INTERNAL_DEFS_H_ - -#ifndef _GNU_SOURCE - #define _GNU_SOURCE -#endif - /* * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all * public APIs to be prefixed. This makes it possible, with some care, to use @@ -24,7 +19,7 @@ #define JEMALLOC_OVERRIDE___LIBC_MEMALIGN #define JEMALLOC_OVERRIDE___LIBC_REALLOC #define JEMALLOC_OVERRIDE___LIBC_VALLOC -#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ /* * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. @@ -54,25 +49,13 @@ /* Defined if GCC __atomic atomics are available. */ #define JEMALLOC_GCC_ATOMIC_ATOMICS 1 +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1 /* Defined if GCC __sync atomics are available. */ #define JEMALLOC_GCC_SYNC_ATOMICS 1 - -/* - * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and - * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the - * functions are defined in libgcc instead of being inlines). - */ -/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4 */ - -/* - * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and - * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite - * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the - * functions are defined in libgcc instead of being inlines). - */ -/* #undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8 */ +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1 /* * Defined if __builtin_clz() and __builtin_clzl() are available. @@ -84,20 +67,13 @@ */ /* #undef JEMALLOC_OS_UNFAIR_LOCK */ -/* - * Defined if OSSpin*() functions are available, as provided by Darwin, and - * documented in the spinlock(3) manual page. - */ -/* #undef JEMALLOC_OSSPIN */ - /* Defined if syscall(2) is usable. */ #define JEMALLOC_USE_SYSCALL /* * Defined if secure_getenv(3) is available. */ -// Don't want dependency on newer GLIBC -//#define JEMALLOC_HAVE_SECURE_GETENV +// #define JEMALLOC_HAVE_SECURE_GETENV /* * Defined if issetugid(2) is available. @@ -160,6 +136,9 @@ /* JEMALLOC_STATS enables statistics calculation. */ #define JEMALLOC_STATS +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + /* JEMALLOC_PROF enables allocation profiling. */ /* #undef JEMALLOC_PROF */ @@ -240,6 +219,12 @@ #define JEMALLOC_INTERNAL_FFSL __builtin_ffsl #define JEMALLOC_INTERNAL_FFS __builtin_ffs +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + /* * If defined, explicitly attempt to more uniformly distribute large allocation * pointer alignments across all cache indices. @@ -252,6 +237,12 @@ */ /* #undef JEMALLOC_LOG */ +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + /* * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. */ @@ -288,7 +279,7 @@ * MADV_FREE, though typically with higher * system overhead. */ -//#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_FREE #define JEMALLOC_PURGE_MADVISE_DONTNEED #define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS @@ -370,4 +361,7 @@ */ #define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h index c150785fb4a..e5e34925b55 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_preamble.h @@ -21,7 +21,7 @@ # include "jemalloc/jemalloc.h" #endif -#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN)) +#if defined(JEMALLOC_OSATOMIC) #include #endif @@ -161,7 +161,26 @@ static const bool config_log = false #endif ; -#ifdef JEMALLOC_HAVE_SCHED_GETCPU +/* + * Are extra safety checks enabled; things like checking the size of sized + * deallocations, double-frees, etc. + */ +static const bool config_opt_safety_checks = +#ifdef JEMALLOC_OPT_SAFETY_CHECKS + true +#elif defined(JEMALLOC_DEBUG) + /* + * This lets us only guard safety checks by one flag instead of two; fast + * checks can guard solely by config_opt_safety_checks and run in debug mode + * too. + */ + true +#else + false +#endif + ; + +#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU) /* Currently percpu_arena depends on sched_getcpu. */ #define JEMALLOC_PERCPU_ARENA #endif diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_defs.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_defs.h deleted file mode 100644 index d1389237a77..00000000000 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_defs.h +++ /dev/null @@ -1,43 +0,0 @@ -/* include/jemalloc/jemalloc_defs.h. Generated from jemalloc_defs.h.in by configure. */ -/* Defined if __attribute__((...)) syntax is supported. */ -#define JEMALLOC_HAVE_ATTR - -/* Defined if alloc_size attribute is supported. */ -#define JEMALLOC_HAVE_ATTR_ALLOC_SIZE - -/* Defined if format(printf, ...) attribute is supported. */ -#define JEMALLOC_HAVE_ATTR_FORMAT_PRINTF - -/* - * Define overrides for non-standard allocator-related functions if they are - * present on the system. - */ -#define JEMALLOC_OVERRIDE_MEMALIGN -#define JEMALLOC_OVERRIDE_VALLOC - -/* - * At least Linux omits the "const" in: - * - * size_t malloc_usable_size(const void *ptr); - * - * Match the operating system's prototype. - */ -#define JEMALLOC_USABLE_SIZE_CONST - -/* - * If defined, specify throw() for the public function prototypes when compiling - * with C++. The only justification for this is to match the prototypes that - * glibc defines. - */ -#define JEMALLOC_USE_CXX_THROW - -#ifdef _MSC_VER -# ifdef _WIN64 -# define LG_SIZEOF_PTR_WIN 3 -# else -# define LG_SIZEOF_PTR_WIN 2 -# endif -#endif - -/* sizeof(void *) == 2^LG_SIZEOF_PTR. */ -#define LG_SIZEOF_PTR 3 diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_protos.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_protos.h deleted file mode 100644 index ff025e30fa7..00000000000 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/jemalloc_protos.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * The je_ prefix on the following public symbol declarations is an artifact - * of namespace management, and should be omitted in application code unless - * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle.h). - */ -extern JEMALLOC_EXPORT const char *je_malloc_conf; -extern JEMALLOC_EXPORT void (*je_malloc_message)(void *cbopaque, - const char *s); - -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_malloc(size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_calloc(size_t num, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_posix_memalign(void **memptr, - size_t alignment, size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(nonnull(1)); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_aligned_alloc(size_t alignment, - size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) - JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_realloc(void *ptr, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_free(void *ptr) - JEMALLOC_CXX_THROW; - -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_mallocx(size_t size, int flags) - JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1); -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_rallocx(void *ptr, size_t size, - int flags) JEMALLOC_ALLOC_SIZE(2); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_xallocx(void *ptr, size_t size, - size_t extra, int flags); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_sallocx(const void *ptr, - int flags) JEMALLOC_ATTR(pure); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_dallocx(void *ptr, int flags); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_sdallocx(void *ptr, size_t size, - int flags); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_nallocx(size_t size, int flags) - JEMALLOC_ATTR(pure); - -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctl(const char *name, - void *oldp, size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char *name, - size_t *mibp, size_t *miblenp); -JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlbymib(const size_t *mib, - size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_malloc_stats_print( - void (*write_cb)(void *, const char *), void *je_cbopaque, - const char *opts); -JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW je_malloc_usable_size( - JEMALLOC_USABLE_SIZE_CONST void *ptr) JEMALLOC_CXX_THROW; - -#ifdef JEMALLOC_OVERRIDE_MEMALIGN -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_memalign(size_t alignment, size_t size) - JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc); -#endif - -#ifdef JEMALLOC_OVERRIDE_VALLOC -JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN - void JEMALLOC_NOTHROW *je_valloc(size_t size) JEMALLOC_CXX_THROW - JEMALLOC_ATTR(malloc); -#endif diff --git a/contrib/protobuf-cmake/CMakeLists.txt b/contrib/protobuf-cmake/CMakeLists.txt new file mode 100644 index 00000000000..3cb9053d647 --- /dev/null +++ b/contrib/protobuf-cmake/CMakeLists.txt @@ -0,0 +1,13 @@ +set(protobuf_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/protobuf) +set(protobuf_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/protobuf) + +set(protobuf_WITH_ZLIB 0 CACHE INTERNAL "" FORCE) # actually will use zlib, but skip find +set(protobuf_BUILD_TESTS OFF CACHE INTERNAL "" FORCE) + +if (MAKE_STATIC_LIBRARIES) + set(protobuf_BUILD_SHARED_LIBS OFF CACHE INTERNAL "" FORCE) +else () + set(protobuf_BUILD_SHARED_LIBS ON CACHE INTERNAL "" FORCE) +endif () + +add_subdirectory(${protobuf_SOURCE_DIR}/cmake ${protobuf_BINARY_DIR}) diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 3ad20a9479c..93f192c3f3c 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -21,7 +21,7 @@ RUN apt-get update \ locales \ ca-certificates \ wget \ - tzata \ + tzdata \ && rm -rf \ /var/lib/apt/lists/* \ /var/cache/debconf \ diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 53f41905f10..9c1fe66cf7b 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -36,7 +36,7 @@ RUN apt-get update \ ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone -RUN pip install urllib3==1.23 pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2==2.7.5 pymongo tzlocal kafka-python protobuf redis aerospike pytest-timeout minio rpm-confluent-schemaregistry grpcio grpcio-tools +RUN pip install urllib3==1.23 pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2==2.7.5 pymongo tzlocal kafka-python protobuf redis aerospike pytest-timeout minio rpm-confluent-schemaregistry grpcio grpcio-tools cassandra-driver ENV DOCKER_CHANNEL stable ENV DOCKER_VERSION 17.09.1-ce diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index bf4599acb9a..209b36f59af 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -317,7 +317,7 @@ function report rm -r report ||: mkdir report report/tmp ||: -rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv ||: +rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv run-errors.tsv ||: build_log_column_definitions @@ -434,7 +434,7 @@ create table wall_clock engine Memory as select * from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float'); create table slow_on_client_tsv engine File(TSV, 'report/slow-on-client.tsv') as - select client, server, floor(client/server, 3) p, query_display_name + select client, server, floor(client/server, 3) p, test, query_display_name from query_time left join query_display_names using (test, query_index) where p > 1.02 order by p desc; diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index ac6a5e03296..ac506d046b1 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -163,6 +163,8 @@ for query_index, q in enumerate(test_queries): prewarm_id = f'{query_prefix}.prewarm0' res = c.execute(q, query_id = prewarm_id) print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}') + except KeyboardInterrupt: + raise except: # If prewarm fails for some query -- skip it, and try to test the others. # This might happen if the new test introduces some function that the diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index b171603700d..866e78da098 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -189,7 +189,7 @@ if args.report == 'main': slow_on_client_rows = tsvRows('report/slow-on-client.tsv') error_tests += len(slow_on_client_rows) printSimpleTable('Slow on client', - ['Client time, s', 'Server time, s', 'Ratio', 'Query'], + ['Client time, s', 'Server time, s', 'Ratio', 'Test', 'Query'], slow_on_client_rows) def print_changes(): diff --git a/docker/test/test_runner.sh b/docker/test/test_runner.sh index 76e142e61f9..561117492b0 100755 --- a/docker/test/test_runner.sh +++ b/docker/test/test_runner.sh @@ -12,7 +12,7 @@ readonly CLICKHOUSE_PACKAGES_ARG="${2}" CLICKHOUSE_SERVER_IMAGE="${3}" if [ ${CLICKHOUSE_PACKAGES_ARG} != ${NO_REBUILD_FLAG} ]; then - readonly CLICKHOUSE_PACKAGES_DIR="$(realpath ${2})" # or --no-rebuild + readonly CLICKHOUSE_PACKAGES_DIR="$(realpath ${2})" # or --no-rebuild fi @@ -26,19 +26,19 @@ fi # TODO: optionally mount most recent clickhouse-test and queries directory from local machine if [ ${CLICKHOUSE_PACKAGES_ARG} != ${NO_REBUILD_FLAG} ]; then - docker build \ - -f "${CLICKHOUSE_DOCKER_DIR}/test/stateless/clickhouse-statelest-test-runner.Dockerfile" \ - --target clickhouse-test-runner-base \ - -t clickhouse-test-runner-base:preinstall \ - "${CLICKHOUSE_DOCKER_DIR}/test/stateless" + docker build \ + -f "${CLICKHOUSE_DOCKER_DIR}/test/stateless/clickhouse-statelest-test-runner.Dockerfile" \ + --target clickhouse-test-runner-base \ + -t clickhouse-test-runner-base:preinstall \ + "${CLICKHOUSE_DOCKER_DIR}/test/stateless" - docker rm -f clickhouse-test-runner-installing-packages || true - docker run \ - -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \ - --name clickhouse-test-runner-installing-packages \ - clickhouse-test-runner-base:preinstall - docker commit clickhouse-test-runner-installing-packages clickhouse-statelest-test-runner:local - docker rm -f clickhouse-test-runner-installing-packages || true + docker rm -f clickhouse-test-runner-installing-packages || true + docker run \ + -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \ + --name clickhouse-test-runner-installing-packages \ + clickhouse-test-runner-base:preinstall + docker commit clickhouse-test-runner-installing-packages clickhouse-statelest-test-runner:local + docker rm -f clickhouse-test-runner-installing-packages || true fi # # Create a bind-volume to the clickhouse-test script file @@ -47,38 +47,38 @@ fi # Build server image (optional) from local packages if [ -z "${CLICKHOUSE_SERVER_IMAGE}" ]; then - CLICKHOUSE_SERVER_IMAGE="yandex/clickhouse-server:local" + CLICKHOUSE_SERVER_IMAGE="yandex/clickhouse-server:local" - if [ ${CLICKHOUSE_PACKAGES_ARG} != ${NO_REBUILD_FLAG} ]; then - docker build \ - -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ - --target clickhouse-server-base \ - -t clickhouse-server-base:preinstall \ - "${CLICKHOUSE_DOCKER_DIR}/server" + if [ ${CLICKHOUSE_PACKAGES_ARG} != ${NO_REBUILD_FLAG} ]; then + docker build \ + -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ + --target clickhouse-server-base \ + -t clickhouse-server-base:preinstall \ + "${CLICKHOUSE_DOCKER_DIR}/server" - docker rm -f clickhouse_server_base_installing_server || true - docker run -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \ - --name clickhouse_server_base_installing_server \ - clickhouse-server-base:preinstall - docker commit clickhouse_server_base_installing_server clickhouse-server-base:postinstall + docker rm -f clickhouse_server_base_installing_server || true + docker run -v "${CLICKHOUSE_PACKAGES_DIR}:/packages" \ + --name clickhouse_server_base_installing_server \ + clickhouse-server-base:preinstall + docker commit clickhouse_server_base_installing_server clickhouse-server-base:postinstall - docker build \ - -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ - --target clickhouse-server \ - -t "${CLICKHOUSE_SERVER_IMAGE}" \ - "${CLICKHOUSE_DOCKER_DIR}/server" - fi + docker build \ + -f "${CLICKHOUSE_DOCKER_DIR}/server/local.Dockerfile" \ + --target clickhouse-server \ + -t "${CLICKHOUSE_SERVER_IMAGE}" \ + "${CLICKHOUSE_DOCKER_DIR}/server" + fi fi docker rm -f test-runner || true docker-compose down CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \ - docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \ - create \ - --build --force-recreate + docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \ + create \ + --build --force-recreate CLICKHOUSE_SERVER_IMAGE="${CLICKHOUSE_SERVER_IMAGE}" \ - docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \ - run \ - --name test-runner \ - test-runner + docker-compose -f "${CLICKHOUSE_DOCKER_DIR}/test/test_runner_docker_compose.yaml" \ + run \ + --name test-runner \ + test-runner diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index 2ff06e3a686..71a65bf8cc5 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -1,6 +1,6 @@ --- toc_priority: 62 -toc_title: Overview of ClickHouse Architecture +toc_title: Architecture Overview --- # Overview of ClickHouse Architecture {#overview-of-clickhouse-architecture} diff --git a/docs/en/development/browse-code.md b/docs/en/development/browse-code.md index 3e7b259bb39..b4f755ca5d2 100644 --- a/docs/en/development/browse-code.md +++ b/docs/en/development/browse-code.md @@ -1,6 +1,6 @@ --- -toc_priority: 63 -toc_title: Browse Source Code +toc_priority: 71 +toc_title: Source Code --- # Browse ClickHouse Source Code {#browse-clickhouse-source-code} diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index abd93e83116..3a6774037c1 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -1,19 +1,17 @@ --- toc_priority: 61 -toc_title: The Beginner ClickHouse Developer Instruction +toc_title: For Beginners --- +# The Beginner ClickHouse Developer Instruction + Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X. -# If You Use Windows {#if-you-use-windows} - If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T. -# If You Use a 32-bit System {#if-you-use-a-32-bit-system} - ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading. -# Creating a Repository on GitHub {#creating-a-repository-on-github} +## Creating a Repository on GitHub {#creating-a-repository-on-github} To start working with ClickHouse repository you will need a GitHub account. @@ -33,7 +31,7 @@ To do that in Ubuntu you would run in the command line terminal: A brief manual on using Git can be found here: https://services.github.com/on-demand/downloads/github-git-cheat-sheet.pdf. For a detailed manual on Git see https://git-scm.com/book/en/v2. -# Cloning a Repository to Your Development Machine {#cloning-a-repository-to-your-development-machine} +## Cloning a Repository to Your Development Machine {#cloning-a-repository-to-your-development-machine} Next, you need to download the source files onto your working machine. This is called “to clone a repository” because it creates a local copy of the repository on your working machine. @@ -77,7 +75,7 @@ You can also add original ClickHouse repo’s address to your local repository t After successfully running this command you will be able to pull updates from the main ClickHouse repo by running `git pull upstream master`. -## Working with Submodules {#working-with-submodules} +### Working with Submodules {#working-with-submodules} Working with submodules in git could be painful. Next commands will help to manage it: @@ -107,7 +105,7 @@ The next commands would help you to reset all submodules to the initial state (! git submodule foreach git submodule foreach git reset --hard git submodule foreach git submodule foreach git clean -xfd -# Build System {#build-system} +## Build System {#build-system} ClickHouse uses CMake and Ninja for building. @@ -127,11 +125,11 @@ For installing CMake and Ninja on Mac OS X first install Homebrew and then insta Next, check the version of CMake: `cmake --version`. If it is below 3.3, you should install a newer version from the website: https://cmake.org/download/. -# Optional External Libraries {#optional-external-libraries} +## Optional External Libraries {#optional-external-libraries} ClickHouse uses several external libraries for building. All of them do not need to be installed separately as they are built together with ClickHouse from the sources located in the submodules. You can check the list in `contrib`. -# C++ Compiler {#c-compiler} +## C++ Compiler {#c-compiler} Compilers GCC starting from version 9 and Clang version 8 or above are supported for building ClickHouse. @@ -145,7 +143,7 @@ Mac OS X build is supported only for Clang. Just run `brew install llvm` If you decide to use Clang, you can also install `libc++` and `lld`, if you know what it is. Using `ccache` is also recommended. -# The Building Process {#the-building-process} +## The Building Process {#the-building-process} Now that you are ready to build ClickHouse we recommend you to create a separate directory `build` inside `ClickHouse` that will contain all of the build artefacts: @@ -202,7 +200,7 @@ Upon successful build you get an executable file `ClickHouse//program ls -l programs/clickhouse -# Running the Built Executable of ClickHouse {#running-the-built-executable-of-clickhouse} +## Running the Built Executable of ClickHouse {#running-the-built-executable-of-clickhouse} To run the server under the current user you need to navigate to `ClickHouse/programs/server/` (located outside of `build`) and run: @@ -229,7 +227,7 @@ You can also run your custom-built ClickHouse binary with the config file from t sudo service clickhouse-server stop sudo -u clickhouse ClickHouse/build/programs/clickhouse server --config-file /etc/clickhouse-server/config.xml -# IDE (Integrated Development Environment) {#ide-integrated-development-environment} +## IDE (Integrated Development Environment) {#ide-integrated-development-environment} If you do not know which IDE to use, we recommend that you use CLion. CLion is commercial software, but it offers 30 days free trial period. It is also free of charge for students. CLion can be used both on Linux and on Mac OS X. @@ -239,7 +237,7 @@ As simple code editors, you can use Sublime Text or Visual Studio Code, or Kate Just in case, it is worth mentioning that CLion creates `build` path on its own, it also on its own selects `debug` for build type, for configuration it uses a version of CMake that is defined in CLion and not the one installed by you, and finally, CLion will use `make` to run build tasks instead of `ninja`. This is normal behaviour, just keep that in mind to avoid confusion. -# Writing Code {#writing-code} +## Writing Code {#writing-code} The description of ClickHouse architecture can be found here: https://clickhouse.tech/docs/en/development/architecture/ @@ -249,7 +247,7 @@ Writing tests: https://clickhouse.tech/docs/en/development/tests/ List of tasks: https://github.com/ClickHouse/ClickHouse/blob/master/testsructions/easy\_tasks\_sorted\_en.md -# Test Data {#test-data} +## Test Data {#test-data} Developing ClickHouse often requires loading realistic datasets. It is particularly important for performance testing. We have a specially prepared set of anonymized data from Yandex.Metrica. It requires additionally some 3GB of free disk space. Note that this data is not required to accomplish most of the development tasks. @@ -272,7 +270,7 @@ Developing ClickHouse often requires loading realistic datasets. It is particula clickhouse-client --max_insert_block_size 100000 --query "INSERT INTO test.hits FORMAT TSV" < hits_v1.tsv clickhouse-client --max_insert_block_size 100000 --query "INSERT INTO test.visits FORMAT TSV" < visits_v1.tsv -# Creating Pull Request {#creating-pull-request} +## Creating Pull Request {#creating-pull-request} Navigate to your fork repository in GitHub’s UI. If you have been developing in a branch, you need to select that branch. There will be a “Pull request” button located on the screen. In essence, this means “create a request for accepting my changes into the main repository”. diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 4697fbca7e1..a08f2db7149 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -37,6 +37,8 @@ The supported formats are: | [Avro](#data-format-avro) | ✔ | ✔ | | [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | | [Parquet](#data-format-parquet) | ✔ | ✔ | +| [Arrow](#data-format-arrow) | ✔ | ✔ | +| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | | [ORC](#data-format-orc) | ✔ | ✗ | | [RowBinary](#rowbinary) | ✔ | ✔ | | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | @@ -985,9 +987,9 @@ See also [how to read/write length-delimited protobuf messages in popular langua ## Avro {#data-format-avro} -[Apache Avro](http://avro.apache.org/) is a row-oriented data serialization framework developed within Apache’s Hadoop project. +[Apache Avro](https://avro.apache.org/) is a row-oriented data serialization framework developed within Apache’s Hadoop project. -ClickHouse Avro format supports reading and writing [Avro data files](http://avro.apache.org/docs/current/spec.html#Object+Container+Files). +ClickHouse Avro format supports reading and writing [Avro data files](https://avro.apache.org/docs/current/spec.html#Object+Container+Files). ### Data Types Matching {#data_types-matching} @@ -1009,7 +1011,7 @@ The table below shows supported data types and how they match ClickHouse [data t | `long (timestamp-millis)` \* | [DateTime64(3)](../sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* | | `long (timestamp-micros)` \* | [DateTime64(6)](../sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* | -\* [Avro logical types](http://avro.apache.org/docs/current/spec.html#Logical+Types) +\* [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types) Unsupported Avro data types: `record` (non-root), `map` @@ -1095,7 +1097,7 @@ SELECT * FROM topic1_stream; ## Parquet {#data-format-parquet} -[Apache Parquet](http://parquet.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. ClickHouse supports read and write operations for this format. +[Apache Parquet](https://parquet.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. ClickHouse supports read and write operations for this format. ### Data Types Matching {#data_types-matching-2} @@ -1141,6 +1143,16 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-engines/integrations/hdfs.md). +## Arrow {#data-format-arrow} + +[Apache Arrow](https://arrow.apache.org/) comes with two built-in columnar storage formats. ClickHouse supports read and write operations for these formats. + +`Arrow` is Apache Arrow's "file mode" format. It is designed for in-memory random access. + +## ArrowStream {#data-format-arrow-stream} + +`ArrowStream` is Apache Arrow's "stream mode" format. It is designed for in-memory stream processing. + ## ORC {#data-format-orc} [Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. You can only insert data in this format to ClickHouse. diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 5e9e8f841bf..716e774871b 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -25,6 +25,7 @@ toc_title: Integrations - Message queues - [Kafka](https://kafka.apache.org) - [clickhouse\_sinker](https://github.com/housepower/clickhouse_sinker) (uses [Go client](https://github.com/ClickHouse/clickhouse-go/)) + - [stream-loader-clickhouse](https://github.com/adform/stream-loader) - Stream processing - [Flink](https://flink.apache.org) - [flink-clickhouse-sink](https://github.com/ivi-ru/flink-clickhouse-sink) diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 8868522e977..4daadf32be6 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -41,6 +41,7 @@ toc_title: Adopters | [Integros](https://integros.com){.favicon} | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | | [Kodiak Data](https://www.kodiakdata.com/){.favicon} | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | | [Kontur](https://kontur.ru){.favicon} | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | +| [Lawrence Berkeley National Laboratory](https://www.lbl.gov){.favicon} | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) | | [LifeStreet](https://lifestreet.com/){.favicon} | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | | [Mail.ru Cloud Solutions](https://mcs.mail.ru/){.favicon} | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | | [MessageBird](https://www.messagebird.com){.favicon} | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 93acf3cae7a..5961c701283 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -586,11 +586,11 @@ If the table doesn’t exist, ClickHouse will create it. If the structure of the ``` -## query\_thread\_log {#server_configuration_parameters-query-thread-log} +## query\_thread\_log {#server_configuration_parameters-query_thread_log} Setting for logging threads of queries received with the [log\_query\_threads=1](../settings/settings.md#settings-log-query-threads) setting. -Queries are logged in the [system.query\_thread\_log](../../operations/system-tables.md#system_tables-query-thread-log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below). +Queries are logged in the [system.query\_thread\_log](../../operations/system-tables.md#system_tables-query_thread_log) table, not in a separate file. You can change the name of the table in the `table` parameter (see below). Use the following parameters to configure logging: diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index e6e692315f0..880f0ffedb1 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -404,6 +404,65 @@ Possible values: Default value: 0. +## partial_merge_join_optimizations {#partial_merge_join_optimizations} + +Disables optimizations in partial merge join algorithm for [JOIN](../../sql-reference/statements/select/join.md) queries. + +By default, this setting enables improvements that could lead to wrong results. If you see suspicious results in your queries, disable optimizations by this setting. Optimizations can be different in different versions of the ClickHouse server. + +Possible values: + +- 0 — Optimizations disabled. +- 1 — Optimizations enabled. + +Default value: 1. + +## partial_merge_join_rows_in_right_blocks {#partial_merge_join_rows_in_right_blocks} + +Limits sizes of right-hand join data blocks in partial merge join algorithm for [JOIN](../../sql-reference/statements/select/join.md) queries. + +ClickHouse server: + +1. Splits right-hand join data into blocks with up to the specified number of rows. +2. Indexes each block with their minimum and maximum values +3. Unloads prepared blocks to disk if possible. + +Possible values: + +- Any positive integer. Recommended range of values: [1000, 100000]. + +Default value: 65536. + +## any_join_distinct_right_table_keys {#any_join_distinct_right_table_keys} + +Enables legacy ClickHouse server behavior in `ANY INNER|LEFT JOIN` operations. + +!!! note "Warning" + Use this setting only for the purpose of backward compatibility if your use cases depend on legacy `JOIN` behavior. + +When the legacy behavior enabled: + +- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping. +- Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do. + +When the legacy behavior disabled: + +- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations. +- Results of `ANY INNER JOIN` operations contain one row per key from both left and right tables. + +Possible values: + +- 0 — Legacy behavior is disabled. +- 1 — Legacy behavior is enabled. + + +Default value: 0. + +See also: + +- [JOIN strictness](../../sql-reference/statements/select/join.md#select-join-strictness) + + ## max\_block\_size {#setting-max_block_size} In ClickHouse, data is processed by blocks (sets of column parts). The internal processing cycles for a single block are efficient enough, but there are noticeable expenditures on each block. The `max_block_size` setting is a recommendation for what size of the block (in a count of rows) to load from tables. The block size shouldn’t be too small, so that the expenditures on each block are still noticeable, but not too large so that the query with LIMIT that is completed after the first block is processed quickly. The goal is to avoid consuming too much memory when extracting a large number of columns in multiple threads and to preserve at least some cache locality. @@ -539,7 +598,7 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING' Setting up query threads logging. -Queries’ threads runned by ClickHouse with this setup are logged according to the rules in the [query\_thread\_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query-thread-log) server configuration parameter. +Queries’ threads runned by ClickHouse with this setup are logged according to the rules in the [query\_thread\_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter. Example: @@ -1265,4 +1324,63 @@ Possible values: Default value: 16. +## low_cardinality_max_dictionary_size {#low_cardinality_max_dictionary_size} + +Sets a maximum size in rows of a shared global dictionary for the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type that can be written to a storage file system. This setting prevents issues with RAM in case of unlimited dictionary growth. All the data that can't be encoded due to maximum dictionary size limitation ClickHouse writes in an ordinary method. + +Possible values: + +- Any positive integer. + +Default value: 8192. + +## low_cardinality_use_single_dictionary_for_part {#low_cardinality_use_single_dictionary_for_part} + +Turns on or turns off using of single dictionary for the data part. + +By default, ClickHouse server monitors the size of dictionaries and if a dictionary overflows then the server starts to write the next one. To prohibit creating several dictionaries set `low_cardinality_use_single_dictionary_for_part = 1`. + +Possible values: + +- 1 — Creating several dictionaries for the data part is prohibited. +- 0 — Creating several dictionaries for the data part is not prohibited. + +Default value: 0. + +## low_cardinality_allow_in_native_format {#low_cardinality_allow_in_native_format} + +Allows or restricts using the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type with the [Native](../../interfaces/formats.md#native) format. + +If usage of `LowCardinality` is restricted, ClickHouse server converts `LowCardinality`-columns to ordinary ones for `SELECT` queries, and convert ordinary columns to `LowCardinality`-columns for `INSERT` queries. + +This setting is required mainly for third-party clients which don't support `LowCardinality` data type. + +Possible values: + +- 1 — Usage of `LowCardinality` is not restricted. +- 0 — Usage of `LowCardinality` is restricted. + +Default value: 1. + + +## allow_suspicious_low_cardinality_types {#allow_suspicious_low_cardinality_types} + +Allows or restricts using [LowCardinality](../../sql-reference/data-types/lowcardinality.md) with data types with fixed size of 8 bytes or less: numeric data types and `FixedString(8_bytes_or_less)`. + +For small fixed values using of `LowCardinality` is usually inefficient, because ClickHouse stores a numeric index for each row. As a result: + +- Disk space usage can rise. +- RAM consumption can be higher, depending on a dictionary size. +- Some functions can work slower due to extra coding/encoding operations. + +Merge times in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)-engine tables can grow due to all the reasons described above. + +Possible values: + +- 1 — Usage of `LowCardinality` is not restricted. +- 0 — Usage of `LowCardinality` is restricted. + +Default value: 0. + + [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) diff --git a/docs/en/operations/system-tables.md b/docs/en/operations/system-tables.md index cc5456f5324..f364d4e3068 100644 --- a/docs/en/operations/system-tables.md +++ b/docs/en/operations/system-tables.md @@ -5,7 +5,7 @@ toc_title: System Tables # System Tables {#system-tables} -## Introduction +## Introduction {#system-tables-introduction} System tables provide information about: @@ -18,9 +18,12 @@ System tables: - Available only for reading data. - Can't be dropped or altered, but can be detached. -The `metric_log`, `query_log`, `query_thread_log`, `trace_log` system tables store data in a storage filesystem. Other system tables store their data in RAM. ClickHouse server creates such system tables at the start. +Most of system tables store their data in RAM. ClickHouse server creates such system tables at the start. -### Sources of System Metrics +The [metric_log](#system_tables-metric_log), [query_log](#system_tables-query_log), [query_thread_log](#system_tables-query_thread_log), [trace_log](#system_tables-trace_log) system tables store data in a storage filesystem. You can alter them or remove from a disk manually. If you remove one of that tables from a disk, the ClickHouse server creates the table again at the time of the next recording. A storage period for these tables is not limited, and ClickHouse server doesn't delete their data automatically. You need to organize removing of outdated logs by yourself. For example, you can use [TTL](../sql-reference/statements/alter.md#manipulations-with-table-ttl) settings for removing outdated log records. + + +### Sources of System Metrics {#system-tables-sources-of-system-metrics} For collecting system metrics ClickHouse server uses: @@ -587,97 +590,150 @@ Columns: - `source_file` (LowCardinality(String)) — Source file from which the logging was done. - `source_line` (UInt64) — Source line from which the logging was done. -## system.query\_log {#system_tables-query_log} +## system.query_log {#system_tables-query_log} -Contains information about execution of queries. For each query, you can see processing start time, duration of processing, error messages and other information. +Contains information about executed queries, for example, start time, duration of processing, error messages. !!! note "Note" The table doesn’t contain input data for `INSERT` queries. -ClickHouse creates this table only if the [query\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server parameter is specified. This parameter sets the logging rules, such as the logging interval or the name of the table the queries will be logged in. +You can change settings of queries logging in the [query_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) section of the server configuration. -To enable query logging, set the [log\_queries](settings/settings.md#settings-log-queries) parameter to 1. For details, see the [Settings](settings/settings.md) section. +You can disable queries logging by setting [log_queries = 0](settings/settings.md#settings-log-queries). We don't recommend to turn off logging because information in this table is important for solving issues. + +The flushing period of logs is set in `flush_interval_milliseconds` parameter of the [query_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server settings section. To force flushing logs, use the [SYSTEM FLUSH LOGS](../sql-reference/statements/system.md#query_language-system-flush_logs) query. + +ClickHouse doesn't delete logs from the table automatically. See [Introduction](#system-tables-introduction) for more details. The `system.query_log` table registers two kinds of queries: 1. Initial queries that were run directly by the client. 2. Child queries that were initiated by other queries (for distributed query execution). For these types of queries, information about the parent queries is shown in the `initial_*` columns. +Each query creates one or two rows in the `query_log` table, depending on the status (see the `type` column) of the query: + +1. If the query execution was successful, two rows with the `QueryStart` and `QueryFinish` types are created . +2. If an error occurred during query processing, two events with the `QueryStart` and `ExceptionWhileProcessing` types are created . +3. If an error occurred before launching the query, a single event with the `ExceptionBeforeStart` type is created. + Columns: -- `type` (`Enum8`) — Type of event that occurred when executing the query. Values: +- `type` ([Enum8](../sql-reference/data-types/enum.md)) — Type of an event that occurred when executing the query. Values: - `'QueryStart' = 1` — Successful start of query execution. - `'QueryFinish' = 2` — Successful end of query execution. - `'ExceptionBeforeStart' = 3` — Exception before the start of query execution. - `'ExceptionWhileProcessing' = 4` — Exception during the query execution. -- `event_date` (Date) — Query starting date. -- `event_time` (DateTime) — Query starting time. -- `query_start_time` (DateTime) — Start time of query execution. -- `query_duration_ms` (UInt64) — Duration of query execution. -- `read_rows` (UInt64) — Number of read rows. -- `read_bytes` (UInt64) — Number of read bytes. -- `written_rows` (UInt64) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. -- `written_bytes` (UInt64) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. -- `result_rows` (UInt64) — Number of rows in the result. -- `result_bytes` (UInt64) — Number of bytes in the result. -- `memory_usage` (UInt64) — Memory consumption by the query. -- `query` (String) — Query string. -- `exception` (String) — Exception message. -- `stack_trace` (String) — Stack trace (a list of methods called before the error occurred). An empty string, if the query is completed successfully. -- `is_initial_query` (UInt8) — Query type. Possible values: +- `event_date` ([Date](../sql-reference/data-types/date.md)) — Query starting date. +- `event_time` ([DateTime](../sql-reference/data-types/datetime.md)) — Query starting time. +- `query_start_time` ([DateTime](../sql-reference/data-types/datetime.md)) — Start time of query execution. +- `query_duration_ms` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution in milliseconds. +- `read_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number or rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends it's `read_rows` value, and the server-initiator of the query summarize all received and local values. The cache volumes doesn't affect this value. +- `read_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number or bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends it's `read_bytes` value, and the server-initiator of the query summarize all received and local values. The cache volumes doesn't affect this value. +- `written_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. +- `written_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. +- `result_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query. +- `result_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result. +- `memory_usage` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query. +- `query` ([String](../sql-reference/data-types/string.md)) — Query string. +- `exception` ([String](../sql-reference/data-types/string.md)) — Exception message. +- `exception_code` ([Int32](../sql-reference/data-types/int-uint.md)) — Code of an exception. +- `stack_trace` ([String](../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully. +- `is_initial_query` ([UInt8](../sql-reference/data-types/int-uint.md)) — Query type. Possible values: - 1 — Query was initiated by the client. - - 0 — Query was initiated by another query for distributed query execution. -- `user` (String) — Name of the user who initiated the current query. -- `query_id` (String) — ID of the query. -- `address` (IPv6) — IP address that was used to make the query. -- `port` (UInt16) — The client port that was used to make the query. -- `initial_user` (String) — Name of the user who ran the initial query (for distributed query execution). -- `initial_query_id` (String) — ID of the initial query (for distributed query execution). -- `initial_address` (IPv6) — IP address that the parent query was launched from. -- `initial_port` (UInt16) — The client port that was used to make the parent query. -- `interface` (UInt8) — Interface that the query was initiated from. Possible values: + - 0 — Query was initiated by another query as part of distributed query execution. +- `user` ([String](../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. +- `query_id` ([String](../sql-reference/data-types/string.md)) — ID of the query. +- `address` ([IPv6](../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. +- `port` ([UInt16](../sql-reference/data-types/int-uint.md)) — The client port that was used to make the query. +- `initial_user` ([String](../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). +- `initial_query_id` ([String](../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). +- `initial_address` ([IPv6](../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. +- `initial_port` ([UInt16](../sql-reference/data-types/int-uint.md)) — The client port that was used to make the parent query. +- `interface` ([UInt8](../sql-reference/data-types/int-uint.md)) — Interface that the query was initiated from. Possible values: - 1 — TCP. - 2 — HTTP. -- `os_user` (String) — OS’s username who runs [clickhouse-client](../interfaces/cli.md). -- `client_hostname` (String) — Hostname of the client machine where the [clickhouse-client](../interfaces/cli.md) or another TCP client is run. -- `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name. -- `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_major` (UInt32) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_minor` (UInt32) — Minor version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_patch` (UInt32) — Patch component of the [clickhouse-client](../interfaces/cli.md) or another TCP client version. +- `os_user` ([String](../sql-reference/data-types/string.md)) — Operating system username who runs [clickhouse-client](../interfaces/cli.md). +- `client_hostname` ([String](../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../interfaces/cli.md) or another TCP client is run. +- `client_name` ([String](../sql-reference/data-types/string.md)) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name. +- `client_revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client. +- `client_version_major` ([UInt32](../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. +- `client_version_minor` ([UInt32](../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. +- `client_version_patch` ([UInt32](../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../interfaces/cli.md) or another TCP client version. - `http_method` (UInt8) — HTTP method that initiated the query. Possible values: - 0 — The query was launched from the TCP interface. - 1 — `GET` method was used. - 2 — `POST` method was used. -- `http_user_agent` (String) — The `UserAgent` header passed in the HTTP request. -- `quota_key` (String) — The “quota key” specified in the [quotas](quotas.md) setting (see `keyed`). -- `revision` (UInt32) — ClickHouse revision. -- `thread_numbers` (Array(UInt32)) — Number of threads that are participating in query execution. -- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics. The description of them could be found in the table [system.events](#system_tables-events) -- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics that are listed in the `ProfileEvents.Names` column. -- `Settings.Names` (Array(String)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. -- `Settings.Values` (Array(String)) — Values of settings that are listed in the `Settings.Names` column. +- `http_user_agent` ([String](../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request. +- `quota_key` ([String](../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](quotas.md) setting (see `keyed`). +- `revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — ClickHouse revision. +- `thread_numbers` ([Array(UInt32)](../sql-reference/data-types/array.md)) — Number of threads that are participating in query execution. +- `ProfileEvents.Names` ([Array(String)](../sql-reference/data-types/array.md)) — Counters that measure different metrics. The description of them could be found in the table [system.events](#system_tables-events) +- `ProfileEvents.Values` ([Array(UInt64)](../sql-reference/data-types/array.md)) — Values of metrics that are listed in the `ProfileEvents.Names` column. +- `Settings.Names` ([Array(String)](../sql-reference/data-types/array.md)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. +- `Settings.Values` ([Array(String)](../sql-reference/data-types/array.md)) — Values of settings that are listed in the `Settings.Names` column. -Each query creates one or two rows in the `query_log` table, depending on the status of the query: +**Example** -1. If the query execution is successful, two events with types 1 and 2 are created (see the `type` column). -2. If an error occurred during query processing, two events with types 1 and 4 are created. -3. If an error occurred before launching the query, a single event with type 3 is created. +``` sql +SELECT * FROM system.query_log LIMIT 1 FORMAT Vertical; +``` -By default, logs are added to the table at intervals of 7.5 seconds. You can set this interval in the [query\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server setting (see the `flush_interval_milliseconds` parameter). To flush the logs forcibly from the memory buffer into the table, use the `SYSTEM FLUSH LOGS` query. +``` text +Row 1: +────── +type: QueryStart +event_date: 2020-05-13 +event_time: 2020-05-13 14:02:28 +query_start_time: 2020-05-13 14:02:28 +query_duration_ms: 0 +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +memory_usage: 0 +query: SELECT 1 +exception_code: 0 +exception: +stack_trace: +is_initial_query: 1 +user: default +query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a +address: ::ffff:127.0.0.1 +port: 57720 +initial_user: default +initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a +initial_address: ::ffff:127.0.0.1 +initial_port: 57720 +interface: 1 +os_user: bayonet +client_hostname: clickhouse.ru-central1.internal +client_name: ClickHouse client +client_revision: 54434 +client_version_major: 20 +client_version_minor: 4 +client_version_patch: 1 +http_method: 0 +http_user_agent: +quota_key: +revision: 54434 +thread_ids: [] +ProfileEvents.Names: [] +ProfileEvents.Values: [] +Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage'] +Settings.Values: ['0','random','1','10000000000'] -When the table is deleted manually, it will be automatically created on the fly. Note that all the previous logs will be deleted. +``` +**See Also** -!!! note "Note" - The storage period for logs is unlimited. Logs aren’t automatically deleted from the table. You need to organize the removal of outdated logs yourself. +- [system.query_thread_log](#system_tables-query_thread_log) — This table contains information about each query execution thread. -You can specify an arbitrary partitioning key for the `system.query_log` table in the [query\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server setting (see the `partition_by` parameter). - -## system.query\_thread\_log {#system_tables-query-thread-log} +## system.query_thread_log {#system_tables-query_thread_log} The table contains information about each query execution thread. -ClickHouse creates this table only if the [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-thread-log) server parameter is specified. This parameter sets the logging rules, such as the logging interval or the name of the table the queries will be logged in. +ClickHouse creates this table only if the [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server parameter is specified. This parameter sets the logging rules, such as the logging interval or the name of the table the queries will be logged in. To enable query logging, set the [log\_query\_threads](settings/settings.md#settings-log-query-threads) parameter to 1. For details, see the [Settings](settings/settings.md) section. @@ -729,14 +785,14 @@ Columns: - `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events) - `ProfileEvents.Values` (Array(UInt64)) — Values of metrics for this thread that are listed in the `ProfileEvents.Names` column. -By default, logs are added to the table at intervals of 7.5 seconds. You can set this interval in the [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-thread-log) server setting (see the `flush_interval_milliseconds` parameter). To flush the logs forcibly from the memory buffer into the table, use the `SYSTEM FLUSH LOGS` query. +By default, logs are added to the table at intervals of 7.5 seconds. You can set this interval in the [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server setting (see the `flush_interval_milliseconds` parameter). To flush the logs forcibly from the memory buffer into the table, use the `SYSTEM FLUSH LOGS` query. When the table is deleted manually, it will be automatically created on the fly. Note that all the previous logs will be deleted. !!! note "Note" The storage period for logs is unlimited. Logs aren’t automatically deleted from the table. You need to organize the removal of outdated logs yourself. -You can specify an arbitrary partitioning key for the `system.query_thread_log` table in the [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-thread-log) server setting (see the `partition_by` parameter). +You can specify an arbitrary partitioning key for the `system.query_thread_log` table in the [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server setting (see the `partition_by` parameter). ## system.trace\_log {#system_tables-trace_log} diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/troubleshooting.md index cb7c81fffa8..1a33aa3f689 100644 --- a/docs/en/operations/troubleshooting.md +++ b/docs/en/operations/troubleshooting.md @@ -116,7 +116,7 @@ Check: Check: - The [tcp\_port\_secure](server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) setting. - - Settings for [SSL sertificates](server-configuration-parameters/settings.md#server_configuration_parameters-openssl). + - Settings for [SSL certificates](server-configuration-parameters/settings.md#server_configuration_parameters-openssl). Use proper parameters while connecting. For example, use the `port_secure` parameter with `clickhouse_client`. diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md index 842b5a87578..f214db20ea7 100644 --- a/docs/en/sql-reference/data-types/aggregatefunction.md +++ b/docs/en/sql-reference/data-types/aggregatefunction.md @@ -1,5 +1,5 @@ --- -toc_priority: 52 +toc_priority: 53 toc_title: AggregateFunction --- diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index 70596525ca0..98c36d228e3 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -1,5 +1,5 @@ --- -toc_priority: 51 +toc_priority: 52 toc_title: Array(T) --- diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md new file mode 100644 index 00000000000..74eac6b54cd --- /dev/null +++ b/docs/en/sql-reference/data-types/lowcardinality.md @@ -0,0 +1,59 @@ +--- +toc_priority: 51 +toc_title: LowCardinality +--- + +# LowCardinality Data Type {#lowcardinality-data-type} + +Changes the internal representation of other data types to be dictionary-encoded. + +## Syntax {#lowcardinality-syntax} + +```sql +LowCardinality(data_type) +``` + +**Parameters** + +- `data_type` — [String](string.md), [FixedString](fixedstring.md), [Date](date.md), [DateTime](datetime.md), and numbers excepting [Decimal](decimal.md). `LowCardinality` is not efficient for some data types, see the [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) setting description. + +## Description {#lowcardinality-dscr} + +`LowCardinality` is a superstructure that changes a data storage method and rules of data processing. ClickHouse applies [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) to `LowCardinality`-columns. Operating with dictionary encoded data significantly increases performance of [SELECT](../statements/select/index.md) queries for many applications. + +The efficiency of using `LowCarditality` data type depends on data diversity. If a dictionary contains less than 10,000 distinct values, then ClickHouse mostly shows higher efficiency of data reading and storing. If a dictionary contains more than 100,000 distinct values, then ClickHouse can perform worse in comparison with using ordinary data types. + +Consider using `LowCardinality` instead of [Enum](enum.md) when working with strings. `LowCardinality` provides more flexibility in use and often reveals the same or higher efficiency. + +## Example + +Create a table with a `LowCardinality`-column: + +```sql +CREATE TABLE lc_t +( + `id` UInt16, + `strings` LowCardinality(String) +) +ENGINE = MergeTree() +ORDER BY id +``` + +## Related Settings and Functions + +Settings: + +- [low_cardinality_max_dictionary_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size) +- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part) +- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format) +- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) + +Functions: + +- [toLowCardinality](../functions/type-conversion-functions.md#tolowcardinality) + +## See Also + +- [A Magical Mystery Tour of the LowCardinality Data Type](https://www.altinity.com/blog/2019/3/27/low-cardinality). +- [Reducing Clickhouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/). +- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/yandex/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf). \ No newline at end of file diff --git a/docs/en/sql-reference/data-types/nullable.md b/docs/en/sql-reference/data-types/nullable.md index 02e25060be3..dfa2d8a3b35 100644 --- a/docs/en/sql-reference/data-types/nullable.md +++ b/docs/en/sql-reference/data-types/nullable.md @@ -1,5 +1,5 @@ --- -toc_priority: 54 +toc_priority: 55 toc_title: Nullable --- diff --git a/docs/en/sql-reference/data-types/tuple.md b/docs/en/sql-reference/data-types/tuple.md index dce267fb781..66908e2c530 100644 --- a/docs/en/sql-reference/data-types/tuple.md +++ b/docs/en/sql-reference/data-types/tuple.md @@ -1,5 +1,5 @@ --- -toc_priority: 53 +toc_priority: 54 toc_title: Tuple(T1, T2, ...) --- diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index cf9e3251486..5a3bb264a84 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -516,7 +516,7 @@ Result: **See Also** -- \[ISO 8601 announcement by @xkcd\](https://xkcd.com/1179/) +- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/) - [RFC 1123](https://tools.ietf.org/html/rfc1123) - [toDate](#todate) - [toDateTime](#todatetime) @@ -529,4 +529,129 @@ Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it r Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed. +## toLowCardinality {#tolowcardinality} + +Converts input parameter to the [LowCardianlity](../data-types/lowcardinality.md) version of same data type. + +To convert data from the `LowCardinality` data type use the [CAST](#type_conversion_function-cast) function. For example, `CAST(x as String)`. + +**Syntax** + +```sql +toLowCardinality(expr) +``` + +**Parameters** + +- `expr` — [Expression](../syntax.md#syntax-expressions) resulting in one of the [supported data types](../data-types/index.md#data_types). + + +**Returned values** + +- Result of `expr`. + +Type: `LowCardinality(expr_result_type)` + +**Example** + +Query: + +```sql +SELECT toLowCardinality('1') +``` + +Result: + +```text +┌─toLowCardinality('1')─┐ +│ 1 │ +└───────────────────────┘ +``` + + +## toUnixTimestamp64Milli +## toUnixTimestamp64Micro +## toUnixTimestamp64Nano + +Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Input value is scaled up or down appropriately depending on it precision. Please note that output value is a timestamp in UTC, not in timezone of `DateTime64`. + +**Syntax** + +``` sql +toUnixTimestamp64Milli(value) +``` + +**Parameters** + +- `value` — DateTime64 value with any precision. + +**Returned value** + +- `value` converted to the `Int64` data type. + +**Examples** + +Query: + +``` sql +WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 +SELECT toUnixTimestamp64Milli(dt64) +``` + +Result: + +``` text +┌─toUnixTimestamp64Milli(dt64)─┐ +│ 1568650812345 │ +└──────────────────────────────┘ +``` + +``` sql +WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 +SELECT toUnixTimestamp64Nano(dt64) +``` + +Result: + +``` text +┌─toUnixTimestamp64Nano(dt64)─┐ +│ 1568650812345678000 │ +└─────────────────────────────┘ +``` + +## fromUnixTimestamp64Milli +## fromUnixTimestamp64Micro +## fromUnixTimestamp64Nano + +Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and optional timezone. Input value is scaled up or down appropriately depending on it's precision. Please note that input value is treated as UTC timestamp, not timestamp at given (or implicit) timezone. + +**Syntax** + +``` sql +fromUnixTimestamp64Milli(value [, ti]) +``` + +**Parameters** + +- `value` — `Int64` value with any precision. +- `timezone` — `String` (optional) timezone name of the result. + +**Returned value** + +- `value` converted to the `DateTime64` data type. + +**Examples** + +``` sql +WITH CAST(1234567891011, 'Int64') AS i64 +SELECT fromUnixTimestamp64Milli(i64, 'UTC') +``` + +``` text +┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐ +│ 2009-02-13 23:31:31.011 │ +└──────────────────────────────────────┘ +``` + + [Original article](https://clickhouse.tech/docs/en/query_language/functions/type_conversion_functions/) diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index 143bd42b08e..34477529649 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -117,6 +117,10 @@ Returns the part of the domain that includes top-level subdomains up to the “f For example, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. +### port(URL[, default_port = 0]) {#port} + +Returns the port or `default_port` if there is no port in the URL (or in case of validation error). + ### path {#path} Returns the path. Example: `/top/news.html` The path does not include the query string. diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index c636e1dab8b..5ac3f4a0e25 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -44,6 +44,8 @@ Modifies how matching by "join keys" is performed !!! note "Note" The default strictness value can be overriden using [join\_default\_strictness](../../../operations/settings/settings.md#settings-join_default_strictness) setting. + Also the behavior of ClickHouse server for `ANY JOIN` operations depends on the [any_join_distinct_right_table_keys](../../../operations/settings/settings.md#any_join_distinct_right_table_keys) setting. + ### ASOF JOIN Usage diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index c664580c659..ccde7a945ac 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -574,11 +574,11 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## query\_thread\_log {#server_configuration_parameters-query-thread-log} +## query\_thread\_log {#server_configuration_parameters-query_thread_log} Настройка логирования потоков выполнения запросов, принятых с настройкой [log\_query\_threads=1](../settings/settings.md#settings-log-query-threads). -Запросы логируются не в отдельный файл, а в системную таблицу [system.query\_thread\_log](../../operations/server-configuration-parameters/settings.md#system_tables-query-thread-log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже). +Запросы логируются не в отдельный файл, а в системную таблицу [system.query\_thread\_log](../../operations/server-configuration-parameters/settings.md#system_tables-query_thread_log). Вы можете изменить название этой таблицы в параметре `table` (см. ниже). При настройке логирования используются следующие параметры: diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 56c3042bfa3..4dd43e9607b 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -536,7 +536,7 @@ log_queries=1 Установка логирования информации о потоках выполнения запроса. -Лог информации о потоках выполнения запросов, переданных в ClickHouse с этой установкой, записывается согласно правилам конфигурационного параметра сервера [query\_thread\_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query-thread-log). +Лог информации о потоках выполнения запросов, переданных в ClickHouse с этой установкой, записывается согласно правилам конфигурационного параметра сервера [query\_thread\_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log). Пример: diff --git a/docs/ru/operations/system-tables.md b/docs/ru/operations/system-tables.md index ae0e67a4515..8954a484560 100644 --- a/docs/ru/operations/system-tables.md +++ b/docs/ru/operations/system-tables.md @@ -1,4 +1,7 @@ -# Системные таблицы {#sistemnye-tablitsy} +# Системные таблицы {#system-tables} + + +## Введение {#system-tables-introduction} Системные таблицы используются для реализации части функциональности системы, а также предоставляют доступ к информации о работе системы. Вы не можете удалить системную таблицу (хотя можете сделать DETACH). @@ -544,182 +547,156 @@ CurrentMetric_ReplicatedChecks: 0 - `source_file` (LowCardinality(String)) — Исходный файл, из которого была сделана запись. - `source_line` (UInt64) — Исходная строка, из которой была сделана запись. -## system.query\_log {#system_tables-query_log} +## system.query_log {#system_tables-query_log} -Содержит информацию о выполнении запросов. Для каждого запроса вы можете увидеть время начала обработки, продолжительность обработки, сообщения об ошибках и другую информацию. +Содержит информацию о выполняемых запросах, например, время начала обработки, продолжительность обработки, сообщения об ошибках. !!! note "Внимание" Таблица не содержит входных данных для запросов `INSERT`. -ClickHouse создаёт таблицу только в том случае, когда установлен конфигурационный параметр сервера [query\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log). Параметр задаёт правила ведения лога, такие как интервал логирования или имя таблицы, в которую будут логгироваться запросы. +Настойки логгирования можно изменить в секции серверной конфигурации [query_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log). -Чтобы включить логирование, задайте значение параметра [log\_queries](settings/settings.md#settings-log-queries) равным 1. Подробности смотрите в разделе [Настройки](settings/settings.md#settings). +Можно отключить логгирование настройкой [log_queries = 0](settings/settings.md#settings-log-queries). По-возможности, не отключайте логгирование, поскольку информация из таблицы важна при решении проблем. + +Период сброса логов в таблицу задаётся параметром `flush_interval_milliseconds` в конфигурационной секции [query_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос [SYSTEM FLUSH LOGS](../sql-reference/statements/system.md#query_language-system-flush_logs). + +ClickHouse не удаляет логи из таблица автоматически. Смотрите [Введение](#system-tables-introduction). + +Можно указать произвольный ключ партиционирования для таблицы `system.query_log` в конфигурации [query\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) (параметр `partition_by`). + + + +Если таблицу удалить вручную, она создается заново автоматически «на лету». При этом все логи на момент удаления таблицы будут убраны. Таблица `system.query_log` содержит информацию о двух видах запросов: 1. Первоначальные запросы, которые были выполнены непосредственно клиентом. 2. Дочерние запросы, инициированные другими запросами (для выполнения распределенных запросов). Для дочерних запросов информация о первоначальном запросе содержится в столбцах `initial_*`. +В зависимости от статуса (столбец `type`) каждый запрос создаёт одну или две строки в таблице `query_log`: + +1. Если запрос выполнен успешно, создаются два события типа `QueryStart` и `QueryFinish`. +2. Если во время обработки запроса возникла ошибка, создаются два события с типами `QueryStart` и `ExceptionWhileProcessing`. +3. Если ошибка произошла ещё до запуска запроса, создается одно событие с типом `ExceptionBeforeStart`. + Столбцы: -- `type` (`Enum8`) — тип события, произошедшего при выполнении запроса. Значения: +- `type` ([Enum8](../sql-reference/data-types/enum.md)) — тип события, произошедшего при выполнении запроса. Значения: - `'QueryStart' = 1` — успешное начало выполнения запроса. - `'QueryFinish' = 2` — успешное завершение выполнения запроса. - `'ExceptionBeforeStart' = 3` — исключение перед началом обработки запроса. - `'ExceptionWhileProcessing' = 4` — исключение во время обработки запроса. -- `event_date` (Date) — дата начала запроса. -- `event_time` (DateTime) — время начала запроса. -- `query_start_time` (DateTime) — время начала обработки запроса. -- `query_duration_ms` (UInt64) — длительность обработки запроса. -- `read_rows` (UInt64) — количество прочитанных строк. -- `read_bytes` (UInt64) — количество прочитанных байтов. -- `written_rows` (UInt64) — количество записанных строк для запросов `INSERT`. Для других запросов, значение столбца 0. -- `written_bytes` (UInt64) — объём записанных данных в байтах для запросов `INSERT`. Для других запросов, значение столбца 0. -- `result_rows` (UInt64) — количество строк в результате. -- `result_bytes` (UInt64) — объём результата в байтах. -- `memory_usage` (UInt64) — потребление RAM запросом. -- `query` (String) — текст запроса. -- `exception` (String) — сообщение исключения, если запрос завершился по исключению. -- `stack_trace` (String) — трассировка (список функций, последовательно вызванных перед ошибкой). Пустая строка, если запрос успешно завершен. -- `is_initial_query` (UInt8) — вид запроса. Возможные значения: +- `event_date` ([Date](../sql-reference/data-types/date.md)) — дата начала запроса. +- `event_time` ([DateTime](../sql-reference/data-types/datetime.md)) — время начала запроса. +- `query_start_time` ([DateTime](../sql-reference/data-types/datetime.md)) — время начала обработки запроса. +- `query_duration_ms` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — длительность выполнения запроса в миллисекундах. +- `read_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Общее количество строк, считанных из всех таблиц и табличных функций, участвующих в запросе. Включает в себя обычные подзапросы, подзапросы для `IN` и `JOIN`. Для распределенных запросов `read_rows` включает в себя общее количество строк, прочитанных на всех репликах. Каждая реплика передает собственное значение `read_rows`, а сервер-инициатор запроса суммирует все полученные и локальные значения. Объемы кэша не учитываюся. +- `read_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — Общее количество байтов, считанных из всех таблиц и табличных функций, участвующих в запросе. Включает в себя обычные подзапросы, подзапросы для `IN` и `JOIN`. Для распределенных запросов `read_bytes` включает в себя общее количество байтов, прочитанных на всех репликах. Каждая реплика передает собственное значение `read_bytes`, а сервер-инициатор запроса суммирует все полученные и локальные значения. Объемы кэша не учитываюся. +- `written_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — количество записанных строк для запросов `INSERT`. Для других запросов, значение столбца 0. +- `written_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — объём записанных данных в байтах для запросов `INSERT`. Для других запросов, значение столбца 0. +- `result_rows` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — количество строк в результате запроса `SELECT` или количество строк в запросе `INSERT`. +- `result_bytes` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — объём RAM в байтах, использованный для хранения результата запроса. +- `memory_usage` ([UInt64](../sql-reference/data-types/int-uint.md#uint-ranges)) — потребление RAM запросом. +- `query` ([String](../sql-reference/data-types/string.md)) — текст запроса. +- `exception` ([String](../sql-reference/data-types/string.md)) — сообщение исключения, если запрос завершился по исключению. +- `exception_code` ([Int32](../sql-reference/data-types/int-uint.md)) — код исключения. +- `stack_trace` ([String](../sql-reference/data-types/string.md)) — [stack trace](https://en.wikipedia.org/wiki/Stack_trace). Пустая строка, если запрос успешно завершен. +- `is_initial_query` ([UInt8](../sql-reference/data-types/int-uint.md)) — вид запроса. Возможные значения: - 1 — запрос был инициирован клиентом. - - 0 — запрос был инициирован другим запросом при распределенном запросе. -- `user` (String) — пользователь, запустивший текущий запрос. -- `query_id` (String) — ID запроса. -- `address` (IPv6) — IP адрес, с которого пришел запрос. -- `port` (UInt16) — порт, с которого клиент сделал запрос -- `initial_user` (String) — пользователь, запустивший первоначальный запрос (для распределенных запросов). -- `initial_query_id` (String) — ID родительского запроса. -- `initial_address` (IPv6) — IP адрес, с которого пришел родительский запрос. -- `initial_port` (UInt16) — порт, с которого клиент сделал родительский запрос. -- `interface` (UInt8) — интерфейс, с которого ушёл запрос. Возможные значения: + - 0 — запрос был инициирован другим запросом при выполнении распределенного запроса. +- `user` ([String](../sql-reference/data-types/string.md)) — пользователь, запустивший текущий запрос. +- `query_id` ([String](../sql-reference/data-types/string.md)) — ID запроса. +- `address` ([IPv6](../sql-reference/data-types/domains/ipv6.md)) — IP адрес, с которого пришел запрос. +- `port` ([UInt16](../sql-reference/data-types/int-uint.md)) — порт, с которого клиент сделал запрос +- `initial_user` ([String](../sql-reference/data-types/string.md)) — пользователь, запустивший первоначальный запрос (для распределенных запросов). +- `initial_query_id` ([String](../sql-reference/data-types/string.md)) — ID родительского запроса. +- `initial_address` ([IPv6](../sql-reference/data-types/domains/ipv6.md)) — IP адрес, с которого пришел родительский запрос. +- `initial_port` ([UInt16](../sql-reference/data-types/int-uint.md)) — порт, с которого клиент сделал родительский запрос. +- `interface` ([UInt8](../sql-reference/data-types/int-uint.md)) — интерфейс, с которого ушёл запрос. Возможные значения: - 1 — TCP. - 2 — HTTP. -- `os_user` (String) — имя пользователя в OS, который запустил [clickhouse-client](../interfaces/cli.md). -- `client_hostname` (String) — имя сервера, с которого присоединился [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. -- `client_name` (String) — [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. -- `client_revision` (UInt32) — ревизия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. -- `client_version_major` (UInt32) — старшая версия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. -- `client_version_minor` (UInt32) — младшая версия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. -- `client_version_patch` (UInt32) — патч [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. -- `http_method` (UInt8) — HTTP метод, инициировавший запрос. Возможные значения: +- `os_user` ([String](../sql-reference/data-types/string.md)) — имя пользователя операционной системы, который запустил [clickhouse-client](../interfaces/cli.md). +- `client_hostname` ([String](../sql-reference/data-types/string.md)) — имя сервера, с которого присоединился [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. +- `client_name` ([String](../sql-reference/data-types/string.md)) — [clickhouse-client](../interfaces/cli.md) или другой TCP клиент. +- `client_revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — ревизия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. +- `client_version_major` ([UInt32](../sql-reference/data-types/int-uint.md)) — старшая версия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. +- `client_version_minor` ([UInt32](../sql-reference/data-types/int-uint.md)) — младшая версия [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. +- `client_version_patch` ([UInt32](../sql-reference/data-types/int-uint.md)) — патч [clickhouse-client](../interfaces/cli.md) или другого TCP клиента. +- `http_method` ([UInt8](../sql-reference/data-types/int-uint.md)) — HTTP метод, инициировавший запрос. Возможные значения: - 0 — запрос запущен с интерфейса TCP. - 1 — `GET`. - 2 — `POST`. -- `http_user_agent` (String) — HTTP заголовок `UserAgent`. -- `quota_key` (String) — «ключ квоты» из настроек [квот](quotas.md) (см. `keyed`). -- `revision` (UInt32) — ревизия ClickHouse. -- `thread_numbers` (Array(UInt32)) — количество потоков, участвующих в обработке запросов. -- `ProfileEvents.Names` (Array(String)) — Счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(\#system\_tables-events -- `ProfileEvents.Values` (Array(UInt64)) — метрики, перечисленные в столбце `ProfileEvents.Names`. -- `Settings.Names` (Array(String)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1. -- `Settings.Values` (Array(String)) — Значения настроек, которые перечислены в столбце `Settings.Names`. +- `http_user_agent` ([String](../sql-reference/data-types/string.md)) — HTTP заголовок `UserAgent`. +- `quota_key` ([String](../sql-reference/data-types/string.md)) — «ключ квоты» из настроек [квот](quotas.md) (см. `keyed`). +- `revision` ([UInt32](../sql-reference/data-types/int-uint.md)) — ревизия ClickHouse. +- `thread_numbers` ([Array(UInt32)](../sql-reference/data-types/array.md)) — количество потоков, участвующих в обработке запросов. +- `ProfileEvents.Names` ([Array(String)](../sql-reference/data-types/array.md)) — Счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(\#system\_tables-events +- `ProfileEvents.Values` ([Array(UInt64)](../sql-reference/data-types/array.md)) — метрики, перечисленные в столбце `ProfileEvents.Names`. +- `Settings.Names` ([Array(String)](../sql-reference/data-types/array.md)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1. +- `Settings.Values` ([Array(String)](../sql-reference/data-types/array.md)) — Значения настроек, которые перечислены в столбце `Settings.Names`. -Каждый запрос создаёт одну или две строки в таблице `query_log`, в зависимости от статуса запроса: +**Пример** -1. Если запрос выполнен успешно, создаются два события типа 1 и 2 (смотрите столбец `type`). -2. Если во время обработки запроса произошла ошибка, создаются два события с типами 1 и 4. -3. Если ошибка произошла до запуска запроса, создается одно событие с типом 3. +``` sql +SELECT * FROM system.query_log LIMIT 1 FORMAT Vertical; +``` -По умолчанию, строки добавляются в таблицу логирования с интервалом в 7,5 секунд. Можно задать интервал в конфигурационном параметре сервера [query\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) (смотрите параметр `flush_interval_milliseconds`). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос `SYSTEM FLUSH LOGS`. +``` text +Row 1: +────── +type: QueryStart +event_date: 2020-05-13 +event_time: 2020-05-13 14:02:28 +query_start_time: 2020-05-13 14:02:28 +query_duration_ms: 0 +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +memory_usage: 0 +query: SELECT 1 +exception_code: 0 +exception: +stack_trace: +is_initial_query: 1 +user: default +query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a +address: ::ffff:127.0.0.1 +port: 57720 +initial_user: default +initial_query_id: 5e834082-6f6d-4e34-b47b-cd1934f4002a +initial_address: ::ffff:127.0.0.1 +initial_port: 57720 +interface: 1 +os_user: bayonet +client_hostname: clickhouse.ru-central1.internal +client_name: ClickHouse client +client_revision: 54434 +client_version_major: 20 +client_version_minor: 4 +client_version_patch: 1 +http_method: 0 +http_user_agent: +quota_key: +revision: 54434 +thread_ids: [] +ProfileEvents.Names: [] +ProfileEvents.Values: [] +Settings.Names: ['use_uncompressed_cache','load_balancing','log_queries','max_memory_usage'] +Settings.Values: ['0','random','1','10000000000'] -Если таблицу удалить вручную, она пересоздастся автоматически «на лету». При этом все логи на момент удаления таблицы будут удалены. +``` +**Смотрите также** -!!! note "Примечание" - Срок хранения логов не ограничен. Логи не удаляются из таблицы автоматически. Вам необходимо самостоятельно организовать удаление устаревших логов. +- [system.query_thread_log](#system_tables-query_thread_log) — в этой таблице содержится информация о цепочке каждого выполненного запроса. -Можно указать произвольный ключ партиционирования для таблицы `system.query_log` в конфигурации [query\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) (параметр `partition_by`). - -## system.query\_log {#system_tables-query_log} - -Contains information about execution of queries. For each query, you can see processing start time, duration of processing, error messages and other information. - -!!! note "Note" - The table doesn’t contain input data for `INSERT` queries. - -ClickHouse creates this table only if the [query\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server parameter is specified. This parameter sets the logging rules, such as the logging interval or the name of the table the queries will be logged in. - -To enable query logging, set the [log\_queries](settings/settings.md#settings-log-queries) parameter to 1. For details, see the [Settings](settings/settings.md) section. - -The `system.query_log` table registers two kinds of queries: - -1. Initial queries that were run directly by the client. -2. Child queries that were initiated by other queries (for distributed query execution). For these types of queries, information about the parent queries is shown in the `initial_*` columns. - -Columns: - -- `type` (`Enum8`) — Type of event that occurred when executing the query. Values: - - `'QueryStart' = 1` — Successful start of query execution. - - `'QueryFinish' = 2` — Successful end of query execution. - - `'ExceptionBeforeStart' = 3` — Exception before the start of query execution. - - `'ExceptionWhileProcessing' = 4` — Exception during the query execution. -- `event_date` (Date) — Query starting date. -- `event_time` (DateTime) — Query starting time. -- `query_start_time` (DateTime) — Start time of query execution. -- `query_duration_ms` (UInt64) — Duration of query execution. -- `read_rows` (UInt64) — Number of read rows. -- `read_bytes` (UInt64) — Number of read bytes. -- `written_rows` (UInt64) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. -- `written_bytes` (UInt64) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. -- `result_rows` (UInt64) — Number of rows in the result. -- `result_bytes` (UInt64) — Number of bytes in the result. -- `memory_usage` (UInt64) — Memory consumption by the query. -- `query` (String) — Query string. -- `exception` (String) — Exception message. -- `stack_trace` (String) — Stack trace (a list of methods called before the error occurred). An empty string, if the query is completed successfully. -- `is_initial_query` (UInt8) — Query type. Possible values: - - 1 — Query was initiated by the client. - - 0 — Query was initiated by another query for distributed query execution. -- `user` (String) — Name of the user who initiated the current query. -- `query_id` (String) — ID of the query. -- `address` (IPv6) — IP address that was used to make the query. -- `port` (UInt16) — The client port that was used to make the query. -- `initial_user` (String) — Name of the user who ran the initial query (for distributed query execution). -- `initial_query_id` (String) — ID of the initial query (for distributed query execution). -- `initial_address` (IPv6) — IP address that the parent query was launched from. -- `initial_port` (UInt16) — The client port that was used to make the parent query. -- `interface` (UInt8) — Interface that the query was initiated from. Possible values: - - 1 — TCP. - - 2 — HTTP. -- `os_user` (String) — OS’s username who runs [clickhouse-client](../interfaces/cli.md). -- `client_hostname` (String) — Hostname of the client machine where the [clickhouse-client](../interfaces/cli.md) or another TCP client is run. -- `client_name` (String) — The [clickhouse-client](../interfaces/cli.md) or another TCP client name. -- `client_revision` (UInt32) — Revision of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_major` (UInt32) — Major version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_minor` (UInt32) — Minor version of the [clickhouse-client](../interfaces/cli.md) or another TCP client. -- `client_version_patch` (UInt32) — Patch component of the [clickhouse-client](../interfaces/cli.md) or another TCP client version. -- `http_method` (UInt8) — HTTP method that initiated the query. Possible values: - - 0 — The query was launched from the TCP interface. - - 1 — `GET` method was used. - - 2 — `POST` method was used. -- `http_user_agent` (String) — The `UserAgent` header passed in the HTTP request. -- `quota_key` (String) — The «quota key» specified in the [quotas](quotas.md) setting (see `keyed`). -- `revision` (UInt32) — ClickHouse revision. -- `thread_numbers` (Array(UInt32)) — Number of threads that are participating in query execution. -- `ProfileEvents.Names` (Array(String)) — Counters that measure different metrics. The description of them could be found in the table [system.events](#system_tables-events) -- `ProfileEvents.Values` (Array(UInt64)) — Values of metrics that are listed in the `ProfileEvents.Names` column. -- `Settings.Names` (Array(String)) — Names of settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. -- `Settings.Values` (Array(String)) — Values of settings that are listed in the `Settings.Names` column. - -Each query creates one or two rows in the `query_log` table, depending on the status of the query: - -1. If the query execution is successful, two events with types 1 and 2 are created (see the `type` column). -2. If an error occurred during query processing, two events with types 1 and 4 are created. -3. If an error occurred before launching the query, a single event with type 3 is created. - -By default, logs are added to the table at intervals of 7.5 seconds. You can set this interval in the [query\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server setting (see the `flush_interval_milliseconds` parameter). To flush the logs forcibly from the memory buffer into the table, use the `SYSTEM FLUSH LOGS` query. - -When the table is deleted manually, it will be automatically created on the fly. Note that all the previous logs will be deleted. - -!!! note "Note" - The storage period for logs is unlimited. Logs aren’t automatically deleted from the table. You need to organize the removal of outdated logs yourself. - -You can specify an arbitrary partitioning key for the `system.query_log` table in the [query\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server setting (see the `partition_by` parameter). -\#\# system.query\_thread\_log {\#system\_tables-query-thread-log} +## system.query_thread_log {#system_tables-query_thread_log} Содержит информацию о каждом потоке выполняемых запросов. -ClickHouse создаёт таблицу только в том случае, когда установлен конфигурационный параметр сервера [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-thread-log). Параметр задаёт правила ведения лога, такие как интервал логирования или имя таблицы, в которую будут логгироваться запросы. +ClickHouse создаёт таблицу только в том случае, когда установлен конфигурационный параметр сервера [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log). Параметр задаёт правила ведения лога, такие как интервал логирования или имя таблицы, в которую будут логгироваться запросы. Чтобы включить логирование, задайте значение параметра [log\_query\_threads](settings/settings.md#settings-log-query-threads) равным 1. Подробности смотрите в разделе [Настройки](settings/settings.md#settings). @@ -770,16 +747,16 @@ ClickHouse создаёт таблицу только в том случае, к - `ProfileEvents.Names` (Array(String)) — Счетчики для изменения различных метрик для данного потока. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(\#system\_tables-events - `ProfileEvents.Values` (Array(UInt64)) — метрики для данного потока, перечисленные в столбце `ProfileEvents.Names`. -По умолчанию, строки добавляются в таблицу логирования с интервалом в 7,5 секунд. Можно задать интервал в конфигурационном параметре сервера [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-thread-log) (смотрите параметр `flush_interval_milliseconds`). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос `SYSTEM FLUSH LOGS`. +По умолчанию, строки добавляются в таблицу логирования с интервалом в 7,5 секунд. Можно задать интервал в конфигурационном параметре сервера [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) (смотрите параметр `flush_interval_milliseconds`). Чтобы принудительно записать логи из буффера памяти в таблицу, используйте запрос `SYSTEM FLUSH LOGS`. Если таблицу удалить вручную, она пересоздастся автоматически «на лету». При этом все логи на момент удаления таблицы будут удалены. !!! note "Примечание" Срок хранения логов не ограничен. Логи не удаляются из таблицы автоматически. Вам необходимо самостоятельно организовать удаление устаревших логов. -Можно указать произвольный ключ партиционирования для таблицы `system.query_log` в конфигурации [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query-thread-log) (параметр `partition_by`). +Можно указать произвольный ключ партиционирования для таблицы `system.query_log` в конфигурации [query\_thread\_log](server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) (параметр `partition_by`). -## system.query_thread_log {#system_tables-query-thread-log} +## system.query_thread_log {#system_tables-query_thread_log} Содержит информацию о каждом потоке исполнения запроса. diff --git a/docs/ru/whats-new/extended-roadmap.md b/docs/ru/whats-new/extended-roadmap.md index b7756f7fb79..20ebe28fe5b 100644 --- a/docs/ru/whats-new/extended-roadmap.md +++ b/docs/ru/whats-new/extended-roadmap.md @@ -73,10 +73,10 @@ Upd. Включено для системных таблиц. Q1. Закоммичено, но есть технический долг, который исправляется сейчас. Готово. Нет, не готово - там всё ещё технический долг. -### 1.9. Использование TTL для прореживания данных {#ispolzovanie-ttl-dlia-prorezhivaniia-dannykh} +### 1.9. + Использование TTL для прореживания данных {#ispolzovanie-ttl-dlia-prorezhivaniia-dannykh} Будет делать Сорокин Николай, ВШЭ и Яндекс. -Upd. Есть pull request. +Upd. Есть pull request. Upd. Сделано. Сейчас пользователь может задать в таблице выражение, которое определяет, сколько времени хранятся данные. Обычно это выражение задаётся относительно значения столбца с датой - например: удалять данные через три месяца. https://clickhouse.tech/docs/ru/operations/table_engines/mergetree/\#table_engine-mergetree-ttl @@ -124,7 +124,7 @@ Q2. Upd. Олег будет делать только часть про HDFS. Upd. Реализация поверх S3 является рабочей на уровне PoC. -### 1.13. Ускорение запросов с FINAL {#uskorenie-zaprosov-s-final} +### 1.13. + Ускорение запросов с FINAL {#uskorenie-zaprosov-s-final} Требует 2.1. Делает [Николай Кочетов](https://github.com/KochetovNicolai). Нужно для Яндекс.Метрики. Q2. Upd: PR [#10463](https://github.com/ClickHouse/ClickHouse/pull/10463) @@ -203,10 +203,11 @@ Upd. SharedContext вынесен из Context. Upd. В очереди. Иван Лежанкин. -### 2.9. Логгировние в format-стиле {#loggirovnie-v-format-stile} +### 2.9. + Логгировние в format-стиле {#loggirovnie-v-format-stile} -Делает [Иван Лежанкин](https://github.com/abyss7). Низкий приоритет. -[\#6049](https://github.com/ClickHouse/ClickHouse/issues/6049#issuecomment-570836998) +[#6049](https://github.com/ClickHouse/ClickHouse/issues/6049#issuecomment-570836998) + +Сделано. ### 2.10. Запрашивать у таблиц не столбцы, а срезы {#zaprashivat-u-tablits-ne-stolbtsy-a-srezy} @@ -282,24 +283,20 @@ Upd. Сейчас обсуждается, как сделать другую з ### 4.3. Ограничение числа одновременных скачиваний с реплик {#ogranichenie-chisla-odnovremennykh-skachivanii-s-replik} -Дмитрий Григорьев, ВШЭ. Изначально делал Олег Алексеенков, но пока решение не готово, хотя там не так уж много доделывать. ### 4.4. Ограничение сетевой полосы при репликации {#ogranichenie-setevoi-polosy-pri-replikatsii} -Дмитрий Григорьев, ВШЭ. Нужно для Метрики. +Нужно для Метрики. ### 4.5. Возможность продолжить передачу куска данных при репликации после сбоя {#vozmozhnost-prodolzhit-peredachu-kuska-dannykh-pri-replikatsii-posle-sboia} -Дмитрий Григорьев, ВШЭ. - ### 4.6. p2p передача для GLOBAL подзапросов {#p2p-peredacha-dlia-global-podzaprosov} ### 4.7. Ленивая загрузка множеств для IN и JOIN с помощью k/v запросов {#lenivaia-zagruzka-mnozhestv-dlia-in-i-join-s-pomoshchiu-kv-zaprosov} ### 4.8. Разделить background pool для fetch и merge {#razdelit-background-pool-dlia-fetch-i-merge} -Дмитрий Григорьев, ВШЭ. В очереди. Исправить проблему, что восстанавливающаяся реплика перестаёт мержить. Частично компенсируется 4.3. @@ -329,6 +326,7 @@ Upd. Сделано. Эффективность работы под вопрос Метрика, БК, Маркет, Altinity уже используют более свежие версии чем LTS. Upd. Появилась вторая версия LTS - 20.3. + ## 6. Инструментирование {#instrumentirovanie} ### 6.1. + Исправления сэмплирующего профайлера запросов {#ispravleniia-sempliruiushchego-profailera-zaprosov} @@ -425,11 +423,11 @@ Upd. Рассмотрели все проверки подряд. UBSan включен в функциональных тестах, но не включен в интеграционных тестах. Требует 7.7. -### 7.11. Включение \*San в unit тестах {#vkliuchenie-san-v-unit-testakh} +### 7.11. + Включение \*San в unit тестах {#vkliuchenie-san-v-unit-testakh} У нас мало unit тестов по сравнению с функциональными тестами и их использование не обязательно. Но они всё-равно важны и нет причин не запускать их под всеми видами sanitizers. -Илья Яцишин. +Илья Яцишин. Сделано. ### 7.12. Показывать тестовое покрытие нового кода в PR {#pokazyvat-testovoe-pokrytie-novogo-koda-v-pr} @@ -528,6 +526,8 @@ Upd. Есть сборки, [пример](https://clickhouse-builds.s3.yandex.n Дарья Петрова, УрФУ. +Рабочий прототип: https://pulls-dashboard-demo.herokuapp.com/dashboard/ClickHouse/ClickHouse + Над ClickHouse одновременно работает большое количество разработчиков, которые оформляют свои изменения в виде pull requests. Когда непомерженных pull requests много, то возникает сложность с организацией работы - непонятно, на какой pull request смотреть в первую очередь. Предлагается реализовать простое одностраничное веб-приложение, в котором отображается список pull requests со следующей информацией: @@ -627,6 +627,7 @@ Upd. Готово (все директории кроме contrib). ### 7.32. Обфускация продакшен запросов {#obfuskatsiia-prodakshen-zaprosov} Роман Ильговский. Нужно для Яндекс.Метрики. +Есть pull request, почти готово: https://github.com/ClickHouse/ClickHouse/pull/10973 Имея SQL запрос, требуется вывести структуру таблиц, на которых этот запрос будет выполнен, и заполнить эти таблицы случайными данными, такими, что результат этого запроса зависит от выбора подмножества данных. @@ -1397,11 +1398,11 @@ Constraints позволяют задать выражение, истиннос Василий Морозов, Арслан Гумеров, Альберт Кидрачев, ВШЭ. В прошлом году задачу начинал делать другой человек, но не добился достаточного прогресса. -1. Оптимизация top sort. ++ 1. Оптимизация top sort. В ClickHouse используется неоптимальный вариант top sort. Суть его в том, что из каждого блока достаётся top N записей, а затем, все блоки мержатся. Но доставание top N записей у каждого следующего блока бессмысленно, если мы знаем, что из них в глобальный top N войдёт меньше. Конечно нужно реализовать вариацию на тему priority queue (heap) с быстрым пропуском целых блоков, если ни одна строка не попадёт в накопленный top. -2. Рекурсивный вариант сортировки по кортежам. ++ 2. Рекурсивный вариант сортировки по кортежам. Для сортировки по кортежам используется обычная сортировка с компаратором, который в цикле по элементам кортежа делает виртуальные вызовы `IColumn::compareAt`. Это неоптимально - как из-за короткого цикла по неизвестному в compile-time количеству элементов, так и из-за виртуальных вызовов. Чтобы обойтись без виртуальных вызовов, есть метод `IColumn::getPermutation`. Он используется в случае сортировки по одному столбцу. Есть вариант, что в случае сортировки по кортежу, что-то похожее тоже можно применить… например, сделать метод `updatePermutation`, принимающий аргументы offset и limit, и допереставляющий перестановку в диапазоне значений, в которых предыдущий столбец имел равные значения. diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index 4d4e9f98780..d9ea19ff389 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -21,7 +21,7 @@ mkdocs-htmlproofer-plugin==0.0.3 mkdocs-macros-plugin==0.4.9 nltk==3.5 nose==1.3.7 -protobuf==3.12.1 +protobuf==3.12.2 numpy==1.18.4 Pygments==2.5.2 pymdown-extensions==7.1 diff --git a/docs/zh/operations/access-rights.md b/docs/zh/operations/access-rights.md index f596e16c269..97841699d09 100644 --- a/docs/zh/operations/access-rights.md +++ b/docs/zh/operations/access-rights.md @@ -1,101 +1,146 @@ -# 访问权限 {#access-rights} +--- +toc_priority: 48 +toc_title: "访问权限和账户管理" +--- -用户和访问权限在用户配置中设置。 这通常是 `users.xml`. +# 访问权限和账户管理 {#access-rights} +ClickHouse支持基于[RBAC](https://en.wikipedia.org/wiki/Role-based_access_control)的访问控制管理。 -用户被记录在 `users` 科。 这里是一个片段 `users.xml` 文件: +ClickHouse权限实体包括: +- [用户账户](#user-account-management) +- [角色](#role-management) +- [行策略](#row-policy-management) +- [设置描述](#settings-profiles-management) +- [配额](#quotas-management) -``` xml - - - - - - +- 服务端[配置文件](configuration-files.md) `users.xml` 和 `config.xml`. - - +!!! note "警告" + 你无法同时使用两个配置的方式来管理同一个权限实体。 - - default - - default - +## 用法 {#access-control-usage} - - - - - web - default - - test - - -``` +默认ClickHouse提供了一个 `default` 账号,这个账号有所有的权限,但是不能使用SQL驱动方式的访问权限和账户管理。`default`主要用在用户名还未设置的情况,比如从客户端登录或者执行分布式查询。在分布式查询中如果服务端或者集群没有指定[用户名密码](../engines/table-engines/special/distributed.md)那默认的账户就会被使用。 -您可以看到两个用户的声明: `default`和`web`. 我们添加了 `web` 用户分开。 +如果你刚开始使用ClickHouse,考虑如下场景: -该 `default` 在用户名未通过的情况下选择用户。 该 `default` 如果服务器或群集的配置没有指定分布式查询处理,则user也用于分布式查询处理 `user` 和 `password` (见上的部分 [分布](../engines/table-engines/special/distributed.md) 发动机)。 +1. 为 `default` 用户[开启SQL驱动方式的访问权限和账户管理](#enabling-access-control) . +2. 使用 `default` 用户登录并且创建所需要的所有用户。 不要忘记创建管理员账户 (`GRANT ALL ON *.* WITH GRANT OPTION TO admin_user_account`)。 +3. [限制](settings/permissions-for-queries.md#permissions_for_queries) `default` 用户的权限并且禁用SQL驱动方式的访问权限和账户管理。 -The user that is used for exchanging information between servers combined in a cluster must not have substantial restrictions or quotas – otherwise, distributed queries will fail. +### 当前解决方案的特性 {#access-control-properties} -密码以明文(不推荐)或SHA-256形式指定。 哈希没有腌制。 在这方面,您不应将这些密码视为提供了针对潜在恶意攻击的安全性。 相反,他们是必要的保护员工。 +- 你甚至可以在数据库和表不存在的时候授予权限。 +- 如果表被删除,和这张表关联的特权不会被删除。这意味着如果你创建一张同名的表,所有的特权仍旧有效。如果想删除这张表关联的特权,你可以执行 `REVOKE ALL PRIVILEGES ON db.table FROM ALL` 查询。 +- 特权没有生命周期。 -指定允许访问的网络列表。 在此示例中,将从单独的文件加载两个用户的网络列表 (`/etc/metrika.xml`)包含 `networks` 替代。 这里是它的一个片段: +## 用户账户 {#user-account-management} -``` xml - - ... - - ::/64 - 203.0.113.0/24 - 2001:DB8::/32 - ... - - -``` +用户账户是权限实体,用来授权操作ClickHouse,用户账户包含: -您可以直接在以下内容中定义此网络列表 `users.xml`,或在文件中 `users.d` directory (for more information, see the section «[配置文件](configuration-files.md#configuration_files)»). +- 标识符信息。 +- [特权](../sql-reference/statements/grant.md#grant-privileges)用来定义用户可以执行的查询的范围。 +- 可以连接到ClickHouse的主机。 +- 指定或者默认的角色。 +- 用户登录的时候默认的限制设置。 +- 指定的设置描述。 -该配置包括解释如何从任何地方打开访问的注释。 +特权可以通过[GRANT](../sql-reference/statements/grant.md)查询授权给用户或者通过[角色](#role-management)授予。如果想撤销特权,可以使用[REVOKE](../sql-reference/statements/revoke.md)查询。查询用户所有的特权,使用[SHOW GRANTS](../sql-reference/statements/show.md#show-grants-statement)语句。 -对于在生产中使用,仅指定 `ip` 元素(IP地址及其掩码),因为使用 `host` 和 `hoost_regexp` 可能会导致额外的延迟。 +查询管理: -Next the user settings profile is specified (see the section «[设置配置文件](settings/settings-profiles.md)»). You can specify the default profile, `default'`. 配置文件可以有任何名称。 您可以为不同的用户指定相同的配置文件。 您可以在设置配置文件中编写的最重要的事情是 `readonly=1`,这确保只读访问。 -Then specify the quota to be used (see the section «[配额](quotas.md#quotas)»). You can specify the default quota: `default`. It is set in the config by default to only count resource usage, without restricting it. The quota can have any name. You can specify the same quota for different users – in this case, resource usage is calculated for each user individually. +- [CREATE USER](../sql-reference/statements/create.md#create-user-statement) +- [ALTER USER](../sql-reference/statements/alter.md#alter-user-statement) +- [DROP USER](../sql-reference/statements/misc.md#drop-user-statement) +- [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement) -在可选 `` 您还可以指定用户可以访问的数据库列表。 默认情况下,所有数据库都可供用户使用。 您可以指定 `default` 数据库。 在这种情况下,默认情况下,用户将接收对数据库的访问权限。 +### 设置应用规则 {#access-control-settings-applying} -访问 `system` 始终允许数据库(因为此数据库用于处理查询)。 +对于一个用户账户来说,设置可以通过多种方式配置:通过角色扮演和设置描述。对于一个登陆的账号来说,如果一个设置对应了多个不同的权限实体,这些设置的应用规则如下(优先权从高到底): -用户可以通过以下方式获取其中所有数据库和表的列表 `SHOW` 查询或系统表,即使不允许访问单个数据库。 +1. 用户账户设置。 +2. 用户账号默认的角色设置。如果这个设置配置了多个角色,那设置的应用是没有规定的顺序。 +3. 从设置描述分批给用户或者角色的设置。如果这个设置配置了多个角色,那设置的应用是没有规定的顺序。 +4. 对所有服务器有效的默认或者[default profile](server-configuration-parameters/settings.md#default-profile)的设置。 -数据库访问是不相关的 [只读](settings/permissions-for-queries.md#settings_readonly) 设置。 您不能授予对一个数据库的完全访问权限,并 `readonly` 进入另一个。 -[原始文章](https://clickhouse.tech/docs/en/operations/access_rights/) +## 角色 {#role-management} + +角色是权限实体的集合,可以被授予用户账号。 + +角色包括: + +- [特权](../sql-reference/statements/grant.md#grant-privileges) +- 设置和限制 +- 分配的角色列表 + +查询管理: + +- [CREATE ROLE](../sql-reference/statements/create.md#create-role-statement) +- [ALTER ROLE](../sql-reference/statements/alter.md#alter-role-statement) +- [DROP ROLE](../sql-reference/statements/misc.md#drop-role-statement) +- [SET ROLE](../sql-reference/statements/misc.md#set-role-statement) +- [SET DEFAULT ROLE](../sql-reference/statements/misc.md#set-default-role-statement) +- [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement) + +使用[GRANT](../sql-reference/statements/grant.md) 查询可以把特权授予给角色。用[REVOKE](../sql-reference/statements/revoke.md)来撤回特权。 + +## 行策略 {#row-policy-management} + +行策略是一个过滤器,用来定义哪些行数据可以被账户或者角色访问。对一个特定的表来说,行策略包括过滤器和使用这个策略的账户和角色。 + +查询管理: + +- [CREATE ROW POLICY](../sql-reference/statements/create.md#create-row-policy-statement) +- [ALTER ROW POLICY](../sql-reference/statements/alter.md#alter-row-policy-statement) +- [DROP ROW POLICY](../sql-reference/statements/misc.md#drop-row-policy-statement) +- [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement) + + +## 设置描述 {#settings-profiles-management} + +设置描述是[设置](settings/index.md)的汇总。设置汇总包括设置和限制,当然也包括这些描述的对象:角色和账户。 + +查询管理: + +- [CREATE SETTINGS PROFILE](../sql-reference/statements/create.md#create-settings-profile-statement) +- [ALTER SETTINGS PROFILE](../sql-reference/statements/alter.md#alter-settings-profile-statement) +- [DROP SETTINGS PROFILE](../sql-reference/statements/misc.md#drop-settings-profile-statement) +- [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement) + + +## 配额 {#quotas-management} + +配额用来限制资源的使用情况。参考[配额](quotas.md). + +配额包括特定时间的限制条件和使用这个配额的账户和角色。 + +Management queries: + +- [CREATE QUOTA](../sql-reference/statements/create.md#create-quota-statement) +- [ALTER QUOTA](../sql-reference/statements/alter.md#alter-quota-statement) +- [DROP QUOTA](../sql-reference/statements/misc.md#drop-quota-statement) +- [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement) + + +## 开启SQL驱动方式的访问权限和账户管理 {#enabling-access-control} + +- 为配置的存储设置一个目录. + + ClickHouse把访问实体的相关配置存储在[访问控制目录](server-configuration-parameters/settings.md#access_control_path),而这个目录可以通过服务端进行配置. + +- 为至少一个账户开启SQL驱动方式的访问权限和账户管理. + + 默认情况,SQL驱动方式的访问权限和账户管理对所有用户都是关闭的。你需要在 `users.xml` 中配置至少一个用户,并且把[权限管理](settings/settings-users.md#access_management-user-setting)的值设置为1。 + + +[Original article](https://clickhouse.tech/docs/en/operations/access_rights/) diff --git a/docs/zh/operations/index.md b/docs/zh/operations/index.md index 2544cfd1c20..f35858279f5 100644 --- a/docs/zh/operations/index.md +++ b/docs/zh/operations/index.md @@ -1,3 +1,13 @@ +--- +toc_priority: 43 +toc_title: "操作" +--- + # 操作 {#operations} +Clickhouse运维手册主要包含下面几部分: + +- 安装要求 + + [原始文章](https://clickhouse.tech/docs/en/operations/) diff --git a/docs/zh/operations/monitoring.md b/docs/zh/operations/monitoring.md index 1db80399c36..0bf8556a870 100644 --- a/docs/zh/operations/monitoring.md +++ b/docs/zh/operations/monitoring.md @@ -1,3 +1,8 @@ +--- +toc_priority: 45 +toc_title: "监控" +--- + # 监控 {#jian-kong} 可以监控到: @@ -13,7 +18,7 @@ ClickHouse 本身不会去监控硬件资源的状态。 - 处理器上的负载和温度。 - 可以使用 [dmesg](https://en.wikipedia.org/wiki/Dmesg), [turbostat](https://www.linux.org/docs/man8/turbostat.html) 或者其他工具。 + 可以使用[dmesg](https://en.wikipedia.org/wiki/Dmesg), [turbostat](https://www.linux.org/docs/man8/turbostat.html)或者其他工具。 - 磁盘存储,RAM和网络的使用率。 @@ -21,17 +26,17 @@ ClickHouse 本身不会去监控硬件资源的状态。 ClickHouse服务本身具有用于自我状态监视指标。 -要跟踪服务器事件,请观察服务器日志。 请参阅配置文件的\[logger\](server\_settings/settings.md\#server\_settings-logger)部分。 +要跟踪服务器事件,请观察服务器日志。 请参阅配置文件的 [logger](server-configuration-parameters/settings.md#server_configuration_parameters-logger)部分。 ClickHouse 收集的指标项: - 服务用于计算的资源占用的各种指标。 - 关于查询处理的常见统计信息。 -可以在 [系统。指标](system-tables.md#system_tables-metrics) ,[系统。活动](system-tables.md#system_tables-events) 以及[系统。asynchronous\_metrics](system-tables.md#system_tables-asynchronous_metrics) 等系统表查看所有的指标项。 +可以在 [系统指标](system-tables.md#system_tables-metrics) ,[系统事件](system-tables.md#system_tables-events) 以及[系统异步指标](system-tables.md#system_tables-asynchronous_metrics) 等系统表查看所有的指标项。 可以配置ClickHouse 往 [石墨](https://github.com/graphite-project)导入指标。 参考 [石墨部分](server-configuration-parameters/settings.md#server_configuration_parameters-graphite) 配置文件。在配置指标导出之前,需要参考Graphite[官方教程](https://graphite.readthedocs.io/en/latest/install.html)搭建服务。 此外,您可以通过HTTP API监视服务器可用性。 将HTTP GET请求发送到 `/ping`。 如果服务器可用,它将以 `200 OK` 响应。 -要监视服务器集群的配置中,应设置[max\_replica\_delay\_for\_distributed\_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回200 OK。 如果副本滞后,请求将返回 `503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。 +要监视服务器集群的配置,应设置[max\_replica\_delay\_for\_distributed\_queries](settings/settings.md#settings-max_replica_delay_for_distributed_queries)参数并使用HTTP资源`/replicas_status`。 如果副本可用,并且不延迟在其他副本之后,则对`/replicas_status`的请求将返回200 OK。 如果副本滞后,请求将返回 `503 HTTP_SERVICE_UNAVAILABLE`,包括有关待办事项大小的信息。 diff --git a/docs/zh/operations/requirements.md b/docs/zh/operations/requirements.md index c7b61113926..28ac93de37d 100644 --- a/docs/zh/operations/requirements.md +++ b/docs/zh/operations/requirements.md @@ -1,8 +1,6 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 44 -toc_title: "\u8981\u6C42" +toc_title: "要求" --- # 要求 {#requirements} @@ -13,20 +11,20 @@ toc_title: "\u8981\u6C42" ClickHouse实现并行数据处理并使用所有可用的硬件资源。 在选择处理器时,考虑到ClickHouse在具有大量内核但时钟速率较低的配置中的工作效率要高于具有较少内核和较高时钟速率的配置。 例如,具有2600MHz的16核心优于具有3600MHz的8核心。 -建议使用 **涡轮增压** 和 **超线程** 技术。 它显着提高了典型工作负载的性能。 +建议使用 **睿频加速** 和 **超线程** 技术。 它显着提高了典型工作负载的性能。 ## RAM {#ram} -我们建议使用至少4GB的RAM来执行非平凡的查询。 ClickHouse服务器可以使用少得多的RAM运行,但它需要处理查询的内存。 +我们建议使用至少4GB的RAM来执行重要的查询。 ClickHouse服务器可以使用少得多的RAM运行,但它需要处理查询的内存。 RAM所需的体积取决于: - 查询的复杂性。 -- 在查询中处理的数据量。 +- 查询中处理的数据量。 要计算所需的RAM体积,您应该估计临时数据的大小 [GROUP BY](../sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](../sql-reference/statements/select/distinct.md#select-distinct), [JOIN](../sql-reference/statements/select/join.md#select-join) 和您使用的其他操作。 -ClickHouse可以使用外部存储器来存储临时数据。 看 [在外部存储器中分组](../sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) 有关详细信息。 +ClickHouse可以使用外部存储器来存储临时数据。看 [在外部存储器中分组](../sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) 有关详细信息。 ## 交换文件 {#swap-file} @@ -42,20 +40,20 @@ ClickHouse可以使用外部存储器来存储临时数据。 看 [在外部存 您可以采取数据的样本并从中获取行的平均大小。 然后将该值乘以计划存储的行数。 -- 的数据压缩系数。 +- 数据压缩系数。 - 要估计数据压缩系数,请将数据的样本加载到ClickHouse中,并将数据的实际大小与存储的表的大小进行比较。 例如,点击流数据通常被压缩6-10次。 + 要估计数据压缩系数,请将数据的样本加载到ClickHouse中,并将数据的实际大小与存储的表的大小进行比较。 例如,点击流数据通常被压缩6-10倍。 -要计算要存储的最终数据量,请将压缩系数应用于估计的数据量。 如果计划将数据存储在多个副本中,则将估计的卷乘以副本数。 +要计算要存储的最终数据量,请将压缩系数应用于估计的数据量。 如果计划将数据存储在多个副本中,则将估计的量乘以副本数。 ## 网络 {#network} 如果可能的话,使用10G或更高级别的网络。 -网络带宽对于处理具有大量中间数据的分布式查询至关重要。 此外,网络速度会影响复制过程。 +网络带宽对于处理具有大量中间结果数据的分布式查询至关重要。 此外,网络速度会影响复制过程。 ## 软件 {#software} -ClickHouse主要是为Linux系列操作系统开发的。 推荐的Linux发行版是Ubuntu。 该 `tzdata` 软件包应安装在系统中。 +ClickHouse主要是为Linux系列操作系统开发的。 推荐的Linux发行版是Ubuntu。 `tzdata` 软件包应安装在系统中。 ClickHouse也可以在其他操作系统系列中工作。 查看详细信息 [开始](../getting-started/index.md) 文档的部分。 diff --git a/docs/zh/operations/troubleshooting.md b/docs/zh/operations/troubleshooting.md index fcece8c3d6d..d9a00717d7b 100644 --- a/docs/zh/operations/troubleshooting.md +++ b/docs/zh/operations/troubleshooting.md @@ -1,23 +1,21 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 46 -toc_title: "\u7591\u96BE\u89E3\u7B54" +toc_title: "常见问题" --- -# 疑难解答 {#troubleshooting} +# 常见问题 {#troubleshooting} -- [安装方式](#troubleshooting-installation-errors) +- [安装](#troubleshooting-installation-errors) - [连接到服务器](#troubleshooting-accepts-no-connections) - [查询处理](#troubleshooting-does-not-process-queries) - [查询处理效率](#troubleshooting-too-slow) -## 安装方式 {#troubleshooting-installation-errors} +## 安装 {#troubleshooting-installation-errors} ### 您无法使用Apt-get从ClickHouse存储库获取Deb软件包 {#you-cannot-get-deb-packages-from-clickhouse-repository-with-apt-get} - 检查防火墙设置。 -- 如果出于任何原因无法访问存储库,请按照以下文件中的描述下载软件包 [开始](../getting-started/index.md) 文章并使用手动安装它们 `sudo dpkg -i ` 指挥部 您还需要 `tzdata` 包。 +- 如果出于任何原因无法访问存储库,请按照[开始](../getting-started/index.md)中的描述下载软件包,并使用命令 `sudo dpkg -i ` 手动安装它们。除此之外你还需要 `tzdata` 包。 ## 连接到服务器 {#troubleshooting-accepts-no-connections} @@ -44,7 +42,7 @@ $ sudo service clickhouse-server start **检查日志** -主日志 `clickhouse-server` 是在 `/var/log/clickhouse-server/clickhouse-server.log` 默认情况下。 +主日志 `clickhouse-server` 默认情况是在 `/var/log/clickhouse-server/clickhouse-server.log` 下。 如果服务器成功启动,您应该看到字符串: @@ -57,13 +55,13 @@ $ sudo service clickhouse-server start 2019.01.11 15:23:25.549505 [ 45 ] {} ExternalDictionaries: Failed reloading 'event2id' external dictionary: Poco::Exception. Code: 1000, e.code() = 111, e.displayText() = Connection refused, e.what() = Connection refused ``` -如果在文件末尾没有看到错误,请从字符串开始查看整个文件: +如果在文件末尾没有看到错误,请从如下字符串开始查看整个文件: ``` text Application: starting up. ``` -如果您尝试启动第二个实例 `clickhouse-server` 在服务器上,您将看到以下日志: +如果您尝试在服务器上启动第二个实例 `clickhouse-server` ,您将看到以下日志: ``` text 2019.01.11 15:25:11.151730 [ 1 ] {} : Starting ClickHouse 19.1.0 with revision 54413 @@ -79,9 +77,9 @@ Revision: 54413 2019.01.11 15:25:11.156716 [ 2 ] {} BaseDaemon: Stop SignalListener thread ``` -**请参阅系统。d日志** +**查看系统日志** -如果你没有找到任何有用的信息 `clickhouse-server` 日志或没有任何日志,您可以查看 `system.d` 使用命令记录: +如果你在 `clickhouse-server` 没有找到任何有用的信息或根本没有任何日志,您可以使用命令查看 `system.d` : ``` bash $ sudo journalctl -u clickhouse-server @@ -99,9 +97,9 @@ $ sudo -u clickhouse /usr/bin/clickhouse-server --config-file /etc/clickhouse-se 检查: -- 码头工人设置。 +- Docker设置。 - 如果您在IPv6网络中的Docker中运行ClickHouse,请确保 `network=host` 已设置。 + 如果您在IPv6网络中的Docker中运行ClickHouse,请确保 `network=host` 被设置。 - 端点设置。 @@ -117,10 +115,10 @@ $ sudo -u clickhouse /usr/bin/clickhouse-server --config-file /etc/clickhouse-se 检查: - - 该 [tcp\_port\_secure](server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) 设置。 - - 设置 [SSL序列](server-configuration-parameters/settings.md#server_configuration_parameters-openssl). + - [tcp\_port\_secure](server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) 设置。 + - [SSL证书](server-configuration-parameters/settings.md#server_configuration_parameters-openssl) 设置. - 连接时使用正确的参数。 例如,使用 `port_secure` 参数 `clickhouse_client`. + 连接时使用正确的参数。 例如,使用 `clickhouse_client` 的时候使用 `port_secure` 参数 . - 用户设置。 @@ -135,7 +133,7 @@ $ curl 'http://localhost:8123/' --data-binary "SELECT a" Code: 47, e.displayText() = DB::Exception: Unknown identifier: a. Note that there are no tables (FROM clause) in your query, context: required_names: 'a' source_tables: table_aliases: private_aliases: column_aliases: public_columns: 'a' masked_columns: array_join_columns: source_columns: , e.what() = DB::Exception ``` -如果你开始 `clickhouse-client` 与 `stack-trace` 参数,ClickHouse返回包含错误描述的服务器堆栈跟踪。 +如果你使用 `clickhouse-client` 时设置了 `stack-trace` 参数,ClickHouse返回包含错误描述的服务器堆栈跟踪信息。 您可能会看到一条关于连接中断的消息。 在这种情况下,可以重复查询。 如果每次执行查询时连接中断,请检查服务器日志中是否存在错误。 diff --git a/docs/zh/operations/update.md b/docs/zh/operations/update.md index 186b6c5b315..072b3da98a9 100644 --- a/docs/zh/operations/update.md +++ b/docs/zh/operations/update.md @@ -1,11 +1,9 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 47 -toc_title: "\u70B9\u51FB\u66F4\u65B0" +toc_title: "更新" --- -# 点击更新 {#clickhouse-update} +# 更新 {#clickhouse-update} 如果从deb包安装ClickHouse,请在服务器上执行以下命令: @@ -15,6 +13,6 @@ $ sudo apt-get install clickhouse-client clickhouse-server $ sudo service clickhouse-server restart ``` -如果您使用除推荐的deb包之外的其他内容安装ClickHouse,请使用适当的更新方法。 +如果您使用除推荐的deb包之外的其他方式安装ClickHouse,请使用适当的更新方法。 -ClickHouse不支持分布式更新。 该操作应在每个单独的服务器上连续执行。 不要同时更新群集上的所有服务器,否则群集将在一段时间内不可用。 +ClickHouse不支持分布式更新。该操作应在每个单独的服务器上连续执行。不要同时更新群集上的所有服务器,否则群集将在一段时间内不可用。 diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 65742697333..baf8270d1bf 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -207,7 +207,7 @@ if (TARGET clickhouse-server AND TARGET copy-headers) endif () if (ENABLE_TESTS AND USE_GTEST) - set (CLICKHOUSE_ALL_TESTS_TARGETS local_date_time_comparison unit_tests_libcommon unit_tests_dbms hashing_write_buffer hashing_read_buffer in_join_subqueries_preprocessor expression_analyzer) + set (CLICKHOUSE_ALL_TESTS_TARGETS local_date_time_comparison unit_tests_libcommon unit_tests_dbms hashing_write_buffer hashing_read_buffer in_join_subqueries_preprocessor) add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_ALL_TESTS_TARGETS}) add_dependencies(clickhouse-bundle clickhouse-tests) endif() diff --git a/programs/client/CMakeLists.txt b/programs/client/CMakeLists.txt index 11ade559a8d..e273123afe0 100644 --- a/programs/client/CMakeLists.txt +++ b/programs/client/CMakeLists.txt @@ -6,14 +6,9 @@ set(CLICKHOUSE_CLIENT_SOURCES set(CLICKHOUSE_CLIENT_LINK PRIVATE clickhouse_common_config clickhouse_functions clickhouse_aggregate_functions clickhouse_common_io clickhouse_parsers string_utils ${Boost_PROGRAM_OPTIONS_LIBRARY}) -include(CheckSymbolExists) -check_symbol_exists(readpassphrase readpassphrase.h HAVE_READPASSPHRASE) -configure_file(config_client.h.in ${ConfigIncludePath}/config_client.h) - -if(NOT HAVE_READPASSPHRASE) - add_subdirectory(readpassphrase) - list(APPEND CLICKHOUSE_CLIENT_LINK PRIVATE readpassphrase) -endif() +# Always use internal readpassphrase +add_subdirectory(readpassphrase) +list(APPEND CLICKHOUSE_CLIENT_LINK PRIVATE readpassphrase) clickhouse_program_add(client) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index d6cac7a7b02..afc8f9a72b1 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -77,6 +76,10 @@ #include #include +#if !defined(ARCADIA_BUILD) +# include +#endif + #ifndef __clang__ #pragma GCC optimize("-fno-var-tracking-assignments") #endif diff --git a/programs/client/ConnectionParameters.cpp b/programs/client/ConnectionParameters.cpp index f0ef3ae5694..d8b4d0f1add 100644 --- a/programs/client/ConnectionParameters.cpp +++ b/programs/client/ConnectionParameters.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include "readpassphrase/readpassphrase.h" namespace DB { diff --git a/programs/client/config_client.h.in b/programs/client/config_client.h.in deleted file mode 100644 index 5ad788ff54c..00000000000 --- a/programs/client/config_client.h.in +++ /dev/null @@ -1,3 +0,0 @@ -#pragma once - -#cmakedefine HAVE_READPASSPHRASE diff --git a/programs/client/readpassphrase/CMakeLists.txt b/programs/client/readpassphrase/CMakeLists.txt index a10b54c377d..dd1bf2c91b9 100644 --- a/programs/client/readpassphrase/CMakeLists.txt +++ b/programs/client/readpassphrase/CMakeLists.txt @@ -1,13 +1,7 @@ - # wget https://raw.githubusercontent.com/openssh/openssh-portable/master/openbsd-compat/readpassphrase.c # wget https://raw.githubusercontent.com/openssh/openssh-portable/master/openbsd-compat/readpassphrase.h -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-result -Wno-reserved-id-macro") +add_library(readpassphrase readpassphrase.c) -configure_file(includes.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/includes.h) -add_library(readpassphrase ${CMAKE_CURRENT_SOURCE_DIR}/readpassphrase.c) -set_target_properties(readpassphrase - PROPERTIES LINKER_LANGUAGE C - ) -# . to allow #include -target_include_directories(readpassphrase PUBLIC . ${CMAKE_CURRENT_BINARY_DIR}/include) +set_target_properties(readpassphrase PROPERTIES LINKER_LANGUAGE C) +target_compile_options(readpassphrase PRIVATE -Wno-unused-result -Wno-reserved-id-macro) diff --git a/programs/client/readpassphrase/includes.h.in b/programs/client/readpassphrase/includes.h similarity index 79% rename from programs/client/readpassphrase/includes.h.in rename to programs/client/readpassphrase/includes.h index 44580d1ed95..3ca5eb2bff8 100644 --- a/programs/client/readpassphrase/includes.h.in +++ b/programs/client/readpassphrase/includes.h @@ -1,6 +1,6 @@ #pragma once -#cmakedefine HAVE_READPASSPHRASE +/* #undef HAVE_READPASSPHRASE */ #if !defined(HAVE_READPASSPHRASE) # ifndef _PATH_TTY diff --git a/programs/client/readpassphrase/readpassphrase.c b/programs/client/readpassphrase/readpassphrase.c index 243701239bf..21b48e7efc3 100644 --- a/programs/client/readpassphrase/readpassphrase.c +++ b/programs/client/readpassphrase/readpassphrase.c @@ -25,13 +25,11 @@ #include "includes.h" -#ifndef HAVE_READPASSPHRASE - #include #include #include #include -#include +#include "readpassphrase.h" #include #include #include @@ -193,19 +191,7 @@ restart: } //DEF_WEAK(readpassphrase); -#if 0 -char * -getpass(const char *prompt) -{ - static char buf[_PASSWORD_LEN + 1]; - - return(readpassphrase(prompt, buf, sizeof(buf), RPP_ECHO_OFF)); -} -#endif - static void handler(int s) { - signo[s] = 1; } -#endif /* HAVE_READPASSPHRASE */ diff --git a/programs/client/readpassphrase/readpassphrase.h b/programs/client/readpassphrase/readpassphrase.h index 0782a1773ea..399eb7b062c 100644 --- a/programs/client/readpassphrase/readpassphrase.h +++ b/programs/client/readpassphrase/readpassphrase.h @@ -23,39 +23,22 @@ /* OPENBSD ORIGINAL: include/readpassphrase.h */ #pragma once -// #ifndef _READPASSPHRASE_H_ -// #define _READPASSPHRASE_H_ -//#include "includes.h" -#include "config_client.h" - -// Should not be included on BSD systems, but if it happen... -#ifdef HAVE_READPASSPHRASE -# include_next +#if defined(__cplusplus) +extern "C" { #endif -#ifndef HAVE_READPASSPHRASE -# ifdef __cplusplus -extern "C" { -# endif - - -# define RPP_ECHO_OFF 0x00 /* Turn off echo (default). */ -# define RPP_ECHO_ON 0x01 /* Leave echo on. */ -# define RPP_REQUIRE_TTY 0x02 /* Fail if there is no tty. */ -# define RPP_FORCELOWER 0x04 /* Force input to lower case. */ -# define RPP_FORCEUPPER 0x08 /* Force input to upper case. */ -# define RPP_SEVENBIT 0x10 /* Strip the high bit from input. */ -# define RPP_STDIN 0x20 /* Read from stdin, not /dev/tty */ +#define RPP_ECHO_OFF 0x00 /* Turn off echo (default). */ +#define RPP_ECHO_ON 0x01 /* Leave echo on. */ +#define RPP_REQUIRE_TTY 0x02 /* Fail if there is no tty. */ +#define RPP_FORCELOWER 0x04 /* Force input to lower case. */ +#define RPP_FORCEUPPER 0x08 /* Force input to upper case. */ +#define RPP_SEVENBIT 0x10 /* Strip the high bit from input. */ +#define RPP_STDIN 0x20 /* Read from stdin, not /dev/tty */ char * readpassphrase(const char *, char *, size_t, int); -# ifdef __cplusplus +#if defined(__cplusplus) } -# endif - - -#endif /* HAVE_READPASSPHRASE */ - -// #endif /* !_READPASSPHRASE_H_ */ +#endif diff --git a/programs/client/readpassphrase/ya.make b/programs/client/readpassphrase/ya.make new file mode 100644 index 00000000000..80ad197e5d4 --- /dev/null +++ b/programs/client/readpassphrase/ya.make @@ -0,0 +1,7 @@ +LIBRARY() + +SRCS( + readpassphrase.c +) + +END() diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index 9b1393204d4..ce4bf94589e 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -114,7 +114,7 @@ void ClusterCopierApp::mainImpl() registerDisks(); static const std::string default_database = "_local"; - DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database)); + DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database, *context)); context->setCurrentDatabase(default_database); /// Initialize query scope just in case. diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 01acf250b1b..eb78c049825 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -118,13 +118,13 @@ void LocalServer::tryInitPath() } -static void attachSystemTables() +static void attachSystemTables(const Context & context) { DatabasePtr system_database = DatabaseCatalog::instance().tryGetDatabase(DatabaseCatalog::SYSTEM_DATABASE); if (!system_database) { /// TODO: add attachTableDelayed into DatabaseMemory to speedup loading - system_database = std::make_shared(DatabaseCatalog::SYSTEM_DATABASE); + system_database = std::make_shared(DatabaseCatalog::SYSTEM_DATABASE, context); DatabaseCatalog::instance().attachDatabase(DatabaseCatalog::SYSTEM_DATABASE, system_database); } @@ -202,7 +202,7 @@ try * if such tables will not be dropped, clickhouse-server will not be able to load them due to security reasons. */ std::string default_database = config().getString("default_database", "_local"); - DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database)); + DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared(default_database, *context)); context->setCurrentDatabase(default_database); applyCmdOptions(); @@ -213,14 +213,14 @@ try LOG_DEBUG(log, "Loading metadata from {}", context->getPath()); loadMetadataSystem(*context); - attachSystemTables(); + attachSystemTables(*context); loadMetadata(*context); DatabaseCatalog::instance().loadDatabases(); LOG_DEBUG(log, "Loaded metadata."); } else { - attachSystemTables(); + attachSystemTables(*context); } processQueries(); diff --git a/programs/main.cpp b/programs/main.cpp index 2eb226d3c00..382a104b798 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -8,11 +8,8 @@ #include #include /// pair -#if __has_include("config_tools.h") -#include "config_tools.h" -#endif -#if __has_include("config_core.h") -#include "config_core.h" +#if !defined(ARCADIA_BUILD) +# include "config_tools.h" #endif #include @@ -22,31 +19,31 @@ /// Universal executable for various clickhouse applications -#if ENABLE_CLICKHOUSE_SERVER || !defined(ENABLE_CLICKHOUSE_SERVER) +#if ENABLE_CLICKHOUSE_SERVER int mainEntryClickHouseServer(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_CLIENT || !defined(ENABLE_CLICKHOUSE_CLIENT) +#if ENABLE_CLICKHOUSE_CLIENT int mainEntryClickHouseClient(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_LOCAL || !defined(ENABLE_CLICKHOUSE_LOCAL) +#if ENABLE_CLICKHOUSE_LOCAL int mainEntryClickHouseLocal(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_BENCHMARK || !defined(ENABLE_CLICKHOUSE_BENCHMARK) +#if ENABLE_CLICKHOUSE_BENCHMARK int mainEntryClickHouseBenchmark(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG || !defined(ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG) +#if ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG int mainEntryClickHouseExtractFromConfig(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_COMPRESSOR || !defined(ENABLE_CLICKHOUSE_COMPRESSOR) +#if ENABLE_CLICKHOUSE_COMPRESSOR int mainEntryClickHouseCompressor(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_FORMAT || !defined(ENABLE_CLICKHOUSE_FORMAT) +#if ENABLE_CLICKHOUSE_FORMAT int mainEntryClickHouseFormat(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_COPIER || !defined(ENABLE_CLICKHOUSE_COPIER) +#if ENABLE_CLICKHOUSE_COPIER int mainEntryClickHouseClusterCopier(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_OBFUSCATOR || !defined(ENABLE_CLICKHOUSE_OBFUSCATOR) +#if ENABLE_CLICKHOUSE_OBFUSCATOR int mainEntryClickHouseObfuscator(int argc, char ** argv); #endif @@ -60,31 +57,31 @@ using MainFunc = int (*)(int, char**); /// Add an item here to register new application std::pair clickhouse_applications[] = { -#if ENABLE_CLICKHOUSE_LOCAL || !defined(ENABLE_CLICKHOUSE_LOCAL) +#if ENABLE_CLICKHOUSE_LOCAL {"local", mainEntryClickHouseLocal}, #endif -#if ENABLE_CLICKHOUSE_CLIENT || !defined(ENABLE_CLICKHOUSE_CLIENT) +#if ENABLE_CLICKHOUSE_CLIENT {"client", mainEntryClickHouseClient}, #endif -#if ENABLE_CLICKHOUSE_BENCHMARK || !defined(ENABLE_CLICKHOUSE_BENCHMARK) +#if ENABLE_CLICKHOUSE_BENCHMARK {"benchmark", mainEntryClickHouseBenchmark}, #endif -#if ENABLE_CLICKHOUSE_SERVER || !defined(ENABLE_CLICKHOUSE_SERVER) +#if ENABLE_CLICKHOUSE_SERVER {"server", mainEntryClickHouseServer}, #endif -#if ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG || !defined(ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG) +#if ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG {"extract-from-config", mainEntryClickHouseExtractFromConfig}, #endif -#if ENABLE_CLICKHOUSE_COMPRESSOR || !defined(ENABLE_CLICKHOUSE_COMPRESSOR) +#if ENABLE_CLICKHOUSE_COMPRESSOR {"compressor", mainEntryClickHouseCompressor}, #endif -#if ENABLE_CLICKHOUSE_FORMAT || !defined(ENABLE_CLICKHOUSE_FORMAT) +#if ENABLE_CLICKHOUSE_FORMAT {"format", mainEntryClickHouseFormat}, #endif -#if ENABLE_CLICKHOUSE_COPIER || !defined(ENABLE_CLICKHOUSE_COPIER) +#if ENABLE_CLICKHOUSE_COPIER {"copier", mainEntryClickHouseClusterCopier}, #endif -#if ENABLE_CLICKHOUSE_OBFUSCATOR || !defined(ENABLE_CLICKHOUSE_OBFUSCATOR) +#if ENABLE_CLICKHOUSE_OBFUSCATOR {"obfuscator", mainEntryClickHouseObfuscator}, #endif }; @@ -127,9 +124,10 @@ enum class InstructionFail SSSE3 = 2, SSE4_1 = 3, SSE4_2 = 4, - AVX = 5, - AVX2 = 6, - AVX512 = 7 + POPCNT = 5, + AVX = 6, + AVX2 = 7, + AVX512 = 8 }; const char * instructionFailToString(InstructionFail fail) @@ -146,6 +144,8 @@ const char * instructionFailToString(InstructionFail fail) return "SSE4.1"; case InstructionFail::SSE4_2: return "SSE4.2"; + case InstructionFail::POPCNT: + return "POPCNT"; case InstructionFail::AVX: return "AVX"; case InstructionFail::AVX2: @@ -189,6 +189,16 @@ void checkRequiredInstructionsImpl(volatile InstructionFail & fail) __asm__ volatile ("pcmpgtq %%xmm0, %%xmm0" : : : "xmm0"); #endif + /// Defined by -msse4.2 +#if defined(__POPCNT__) + fail = InstructionFail::POPCNT; + { + uint64_t a = 0; + uint64_t b = 0; + __asm__ volatile ("popcnt %1, %0" : "=r"(a) :"r"(b) :); + } +#endif + #if defined(__AVX__) fail = InstructionFail::AVX; __asm__ volatile ("vaddpd %%ymm0, %%ymm0, %%ymm0" : : : "ymm0"); diff --git a/programs/server/CMakeLists.txt b/programs/server/CMakeLists.txt index 026bb0bfeb2..1563f5ac51e 100644 --- a/programs/server/CMakeLists.txt +++ b/programs/server/CMakeLists.txt @@ -1,21 +1,6 @@ set(CLICKHOUSE_SERVER_SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/HTTPHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/HTTPHandlerFactory.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/InterserverIOHTTPHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/MetricsTransmitter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/NotFoundHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/PrometheusMetricsWriter.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/PrometheusRequestHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/ReplicasStatusHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/StaticRequestHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/Server.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/TCPHandler.cpp -) - -set(CLICKHOUSE_SERVER_SOURCES - ${CLICKHOUSE_SERVER_SOURCES} - ${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/MySQLHandlerFactory.cpp + MetricsTransmitter.cpp + Server.cpp ) set (CLICKHOUSE_SERVER_LINK diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8383fa2d9bf..77dc5305fa8 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -53,15 +53,19 @@ #include #include #include -#include "HTTPHandlerFactory.h" +#include #include "MetricsTransmitter.h" #include -#include "TCPHandlerFactory.h" +#include #include #include +<<<<<<< HEAD #include "MySQLHandlerFactory.h" #include +======= +#include +>>>>>>> a4e40fb5f209539cfee6af5da7f27c1c96e02eac #if !defined(ARCADIA_BUILD) # include "config_core.h" diff --git a/programs/server/Server.h b/programs/server/Server.h index ffd89df6af4..ad9e51c881c 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -1,6 +1,6 @@ #pragma once -#include "IServer.h" +#include #include diff --git a/programs/server/ya.make b/programs/server/ya.make index 2c74c01c7cb..2e13267f715 100644 --- a/programs/server/ya.make +++ b/programs/server/ya.make @@ -11,19 +11,8 @@ PEERDIR( SRCS( clickhouse-server.cpp - HTTPHandler.cpp - HTTPHandlerFactory.cpp - InterserverIOHTTPHandler.cpp MetricsTransmitter.cpp - MySQLHandler.cpp - MySQLHandlerFactory.cpp - NotFoundHandler.cpp - PrometheusMetricsWriter.cpp - PrometheusRequestHandler.cpp - ReplicasStatusHandler.cpp - StaticRequestHandler.cpp Server.cpp - TCPHandler.cpp ) END() diff --git a/programs/ya.make b/programs/ya.make index 6c773c312b8..f4a61850212 100644 --- a/programs/ya.make +++ b/programs/ya.make @@ -1,3 +1,27 @@ -RECURSE( - server +PROGRAM(clickhouse) + +CFLAGS( + -DENABLE_CLICKHOUSE_CLIENT + -DENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIG + -DENABLE_CLICKHOUSE_SERVER ) + +PEERDIR( + clickhouse/base/daemon + clickhouse/base/loggers + clickhouse/programs/client/readpassphrase + clickhouse/src +) + +SRCS( + main.cpp + + client/Client.cpp + client/ConnectionParameters.cpp + client/Suggest.cpp + extract-from-config/ExtractFromConfig.cpp + server/Server.cpp + server/MetricsTransmitter.cpp +) + +END() diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index f435d6e6336..cb2c9e7a256 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -7,8 +7,8 @@ namespace DB { namespace ErrorCodes { - extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; + extern const int NOT_IMPLEMENTED; } @@ -36,8 +36,11 @@ Authentication::Digest Authentication::getPasswordDoubleSHA1() const case DOUBLE_SHA1_PASSWORD: return password_hash; + + case MAX_TYPE: + break; } - throw Exception("Unknown authentication type: " + std::to_string(static_cast(type)), ErrorCodes::LOGICAL_ERROR); + throw Exception("getPasswordDoubleSHA1(): authentication type " + toString(type) + " not supported", ErrorCodes::NOT_IMPLEMENTED); } @@ -71,8 +74,11 @@ bool Authentication::isCorrectPassword(const String & password_) const return encodeSHA1(first_sha1) == password_hash; } + + case MAX_TYPE: + break; } - throw Exception("Unknown authentication type: " + std::to_string(static_cast(type)), ErrorCodes::LOGICAL_ERROR); + throw Exception("Cannot check if the password is correct for authentication type " + toString(type), ErrorCodes::NOT_IMPLEMENTED); } } diff --git a/src/Access/Authentication.h b/src/Access/Authentication.h index 3f16dc56de3..c410a101cdd 100644 --- a/src/Access/Authentication.h +++ b/src/Access/Authentication.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -14,6 +15,7 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; + extern const int NOT_IMPLEMENTED; } @@ -35,6 +37,15 @@ public: /// SHA1(SHA1(password)). /// This kind of hash is used by the `mysql_native_password` authentication plugin. DOUBLE_SHA1_PASSWORD, + + MAX_TYPE, + }; + + struct TypeInfo + { + const char * const raw_name; + const String name; /// Lowercased with underscores, e.g. "sha256_password". + static const TypeInfo & get(Type type_); }; using Digest = std::vector; @@ -85,6 +96,48 @@ private: }; +inline const Authentication::TypeInfo & Authentication::TypeInfo::get(Type type_) +{ + static constexpr auto make_info = [](const char * raw_name_) + { + String init_name = raw_name_; + boost::to_lower(init_name); + return TypeInfo{raw_name_, std::move(init_name)}; + }; + + switch (type_) + { + case NO_PASSWORD: + { + static const auto info = make_info("NO_PASSWORD"); + return info; + } + case PLAINTEXT_PASSWORD: + { + static const auto info = make_info("PLAINTEXT_PASSWORD"); + return info; + } + case SHA256_PASSWORD: + { + static const auto info = make_info("SHA256_PASSWORD"); + return info; + } + case DOUBLE_SHA1_PASSWORD: + { + static const auto info = make_info("DOUBLE_SHA1_PASSWORD"); + return info; + } + case MAX_TYPE: break; + } + throw Exception("Unknown authentication type: " + std::to_string(static_cast(type_)), ErrorCodes::LOGICAL_ERROR); +} + +inline String toString(Authentication::Type type_) +{ + return Authentication::TypeInfo::get(type_).raw_name; +} + + inline Authentication::Digest Authentication::encodeSHA256(const std::string_view & text [[maybe_unused]]) { #if USE_SSL @@ -122,8 +175,10 @@ inline void Authentication::setPassword(const String & password_) case DOUBLE_SHA1_PASSWORD: return setPasswordHashBinary(encodeDoubleSHA1(password_)); + + case MAX_TYPE: break; } - throw Exception("Unknown authentication type: " + std::to_string(static_cast(type)), ErrorCodes::LOGICAL_ERROR); + throw Exception("setPassword(): authentication type " + toString(type) + " not supported", ErrorCodes::NOT_IMPLEMENTED); } @@ -186,8 +241,10 @@ inline void Authentication::setPasswordHashBinary(const Digest & hash) password_hash = hash; return; } + + case MAX_TYPE: break; } - throw Exception("Unknown authentication type: " + std::to_string(static_cast(type)), ErrorCodes::LOGICAL_ERROR); + throw Exception("setPasswordHashBinary(): authentication type " + toString(type) + " not supported", ErrorCodes::NOT_IMPLEMENTED); } } diff --git a/src/Access/ExtendedRoleSet.cpp b/src/Access/ExtendedRoleSet.cpp index a29ee40380c..a8e674b3722 100644 --- a/src/Access/ExtendedRoleSet.cpp +++ b/src/Access/ExtendedRoleSet.cpp @@ -68,15 +68,27 @@ void ExtendedRoleSet::init(const ASTExtendedRoleSet & ast, const AccessControlMa { all = ast.all; - auto name_to_id = [id_mode{ast.id_mode}, manager](const String & name) -> UUID + auto name_to_id = [&ast, manager](const String & name) -> UUID { - if (id_mode) + if (ast.id_mode) return parse(name); assert(manager); - auto id = manager->find(name); - if (id) - return *id; - return manager->getID(name); + if (ast.can_contain_users && ast.can_contain_roles) + { + auto id = manager->find(name); + if (id) + return *id; + return manager->getID(name); + } + else if (ast.can_contain_users) + { + return manager->getID(name); + } + else + { + assert(ast.can_contain_roles); + return manager->getID(name); + } }; if (!ast.names.empty() && !all) diff --git a/src/Access/MultipleAccessStorage.cpp b/src/Access/MultipleAccessStorage.cpp index 0dd1f142f31..9b80c16e487 100644 --- a/src/Access/MultipleAccessStorage.cpp +++ b/src/Access/MultipleAccessStorage.cpp @@ -143,6 +143,14 @@ const IAccessStorage & MultipleAccessStorage::getStorage(const UUID & id) const return const_cast(this)->getStorage(id); } +void MultipleAccessStorage::addStorage(std::unique_ptr nested_storage) +{ + /// Note that IStorage::storage_name is not changed. It is ok as this method + /// is considered as a temporary solution allowing third-party Arcadia applications + /// using CH as a library to register their own access storages. Do not remove + /// this method without providing any alternative :) + nested_storages.emplace_back(std::move(nested_storage)); +} AccessEntityPtr MultipleAccessStorage::readImpl(const UUID & id) const { diff --git a/src/Access/MultipleAccessStorage.h b/src/Access/MultipleAccessStorage.h index ec8c8f2a101..06fb3d45c05 100644 --- a/src/Access/MultipleAccessStorage.h +++ b/src/Access/MultipleAccessStorage.h @@ -25,6 +25,8 @@ public: const Storage & getStorage(const UUID & id) const; Storage & getStorage(const UUID & id); + void addStorage(std::unique_ptr nested_storage); + Storage & getStorageByIndex(size_t i) { return *(nested_storages[i]); } const Storage & getStorageByIndex(size_t i) const { return *(nested_storages[i]); } diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index ce33383548f..f5f48a2390e 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -52,18 +52,20 @@ namespace String user_config = "users." + user_name; - bool has_password = config.has(user_config + ".password"); + bool has_no_password = config.has(user_config + ".no_password"); + bool has_password_plaintext = config.has(user_config + ".password"); bool has_password_sha256_hex = config.has(user_config + ".password_sha256_hex"); bool has_password_double_sha1_hex = config.has(user_config + ".password_double_sha1_hex"); - if (has_password + has_password_sha256_hex + has_password_double_sha1_hex > 1) - throw Exception("More than one field of 'password', 'password_sha256_hex', 'password_double_sha1_hex' is used to specify password for user " + user_name + ". Must be only one of them.", + size_t num_password_fields = has_no_password + has_password_plaintext + has_password_sha256_hex + has_password_double_sha1_hex; + if (num_password_fields > 1) + throw Exception("More than one field of 'password', 'password_sha256_hex', 'password_double_sha1_hex', 'no_password' are used to specify password for user " + user_name + ". Must be only one of them.", ErrorCodes::BAD_ARGUMENTS); - if (!has_password && !has_password_sha256_hex && !has_password_double_sha1_hex) - throw Exception("Either 'password' or 'password_sha256_hex' or 'password_double_sha1_hex' must be specified for user " + user_name + ".", ErrorCodes::BAD_ARGUMENTS); + if (num_password_fields < 1) + throw Exception("Either 'password' or 'password_sha256_hex' or 'password_double_sha1_hex' or 'no_password' must be specified for user " + user_name + ".", ErrorCodes::BAD_ARGUMENTS); - if (has_password) + if (has_password_plaintext) { user->authentication = Authentication{Authentication::PLAINTEXT_PASSWORD}; user->authentication.setPassword(config.getString(user_config + ".password")); diff --git a/src/AggregateFunctions/AggregateFunctionEntropy.h b/src/AggregateFunctions/AggregateFunctionEntropy.h index 7586cebd8ec..ff233a5ac93 100644 --- a/src/AggregateFunctions/AggregateFunctionEntropy.h +++ b/src/AggregateFunctions/AggregateFunctionEntropy.h @@ -23,18 +23,10 @@ struct EntropyData { using Weight = UInt64; - using HashingMap = HashMap< - Value, Weight, - HashCRC32, - HashTableGrower<4>, - HashTableAllocatorWithStackMemory) * (1 << 3)>>; + using HashingMap = HashMapWithStackMemory, 4>; /// For the case of pre-hashed values. - using TrivialMap = HashMap< - Value, Weight, - UInt128TrivialHash, - HashTableGrower<4>, - HashTableAllocatorWithStackMemory) * (1 << 3)>>; + using TrivialMap = HashMapWithStackMemory; using Map = std::conditional_t, TrivialMap, HashingMap>; diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h index 9dbf2c921c2..88b1c87f526 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -28,12 +28,7 @@ template struct AggregateFunctionGroupUniqArrayData { /// When creating, the hash table must be small. - using Set = HashSet< - T, - DefaultHash, - HashTableGrower<4>, - HashTableAllocatorWithStackMemory - >; + using Set = HashSetWithStackMemory, 4>; Set value; }; @@ -126,9 +121,10 @@ public: /// Generic implementation, it uses serialized representation as object descriptor. struct AggregateFunctionGroupUniqArrayGenericData { - static constexpr size_t INIT_ELEMS = 2; /// adjustable - static constexpr size_t ELEM_SIZE = sizeof(HashSetCellWithSavedHash); - using Set = HashSetWithSavedHash, HashTableAllocatorWithStackMemory>; + static constexpr size_t INITIAL_SIZE_DEGREE = 3; /// adjustable + + using Set = HashSetWithSavedHashWithStackMemory; Set value; }; diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h index 9c5e62bb6d7..23eb0e7ff09 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/src/AggregateFunctions/AggregateFunctionTopK.h @@ -23,13 +23,8 @@ namespace DB template struct AggregateFunctionTopKData { - using Set = SpaceSaving - < - T, - HashCRC32, - HashTableGrower<4>, - HashTableAllocatorWithStackMemory - >; + using Set = SpaceSaving>; + Set value; }; @@ -109,13 +104,7 @@ public: /// Generic implementation, it uses serialized representation as object descriptor. struct AggregateFunctionTopKGenericData { - using Set = SpaceSaving - < - StringRef, - StringRefHash, - HashTableGrower<4>, - HashTableAllocatorWithStackMemory - >; + using Set = SpaceSaving; Set value; }; diff --git a/src/AggregateFunctions/QuantileExactWeighted.h b/src/AggregateFunctions/QuantileExactWeighted.h index 6053bddc947..666dbd6b622 100644 --- a/src/AggregateFunctions/QuantileExactWeighted.h +++ b/src/AggregateFunctions/QuantileExactWeighted.h @@ -33,12 +33,7 @@ struct QuantileExactWeighted using Hasher = std::conditional_t, Int128Hash, HashCRC32>; /// When creating, the hash table must be small. - using Map = HashMap< - UnderlyingType, Weight, - Hasher, - HashTableGrower<4>, - HashTableAllocatorWithStackMemory) * (1 << 3)> - >; + using Map = HashMapWithStackMemory; Map map; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 222a3e486f9..baa0fbcb883 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -58,6 +58,7 @@ add_subdirectory (TableFunctions) add_subdirectory (Processors) add_subdirectory (Formats) add_subdirectory (Compression) +add_subdirectory (Server) set(dbms_headers) @@ -145,6 +146,7 @@ add_object_library(clickhouse_storages_distributed Storages/Distributed) add_object_library(clickhouse_storages_mergetree Storages/MergeTree) add_object_library(clickhouse_storages_liveview Storages/LiveView) add_object_library(clickhouse_client Client) +add_object_library(clickhouse_server Server) add_object_library(clickhouse_formats Formats) add_object_library(clickhouse_processors Processors) add_object_library(clickhouse_processors_executors Processors/Executors) diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 2f3a766b8f5..3b1f99bc5be 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -549,6 +549,8 @@ void ColumnAggregateFunction::getPermutation(bool /*reverse*/, size_t /*limit*/, res[i] = i; } +void ColumnAggregateFunction::updatePermutation(bool, size_t, int, Permutation &, EqualRanges&) const {} + void ColumnAggregateFunction::gather(ColumnGathererStream & gatherer) { gatherer.gather(*this); diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index f257351a4d0..40f73665ebe 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -193,6 +193,7 @@ public: } void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; + void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; /** More efficient manipulation methods */ Container & getData() diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 7dba8e857cc..604381f0c16 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -737,6 +737,76 @@ void ColumnArray::getPermutation(bool reverse, size_t limit, int nan_direction_h } } +void ColumnArray::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const +{ + if (limit >= size() || limit >= equal_range.back().second) + limit = 0; + + size_t n = equal_range.size(); + + if (limit) + --n; + + EqualRanges new_ranges; + for (size_t i = 0; i < n; ++i) + { + const auto& [first, last] = equal_range[i]; + + if (reverse) + std::sort(res.begin() + first, res.begin() + last, Less(*this, nan_direction_hint)); + else + std::sort(res.begin() + first, res.begin() + last, Less(*this, nan_direction_hint)); + auto new_first = first; + + for (auto j = first + 1; j < last; ++j) + { + if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0) + { + if (j - new_first > 1) + new_ranges.emplace_back(new_first, j); + + new_first = j; + } + } + + if (last - new_first > 1) + new_ranges.emplace_back(new_first, last); + } + + if (limit) + { + const auto& [first, last] = equal_range.back(); + if (reverse) + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less(*this, nan_direction_hint)); + else + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, Less(*this, nan_direction_hint)); + auto new_first = first; + for (auto j = first + 1; j < limit; ++j) + { + if (compareAt(res[new_first], res[j], *this, nan_direction_hint) != 0) + { + if (j - new_first > 1) + new_ranges.emplace_back(new_first, j); + + new_first = j; + } + } + auto new_last = limit; + for (auto j = limit; j < last; ++j) + { + if (compareAt(res[new_first], res[j], *this, nan_direction_hint) == 0) + { + std::swap(res[new_last], res[j]); + ++new_last; + } + } + if (new_last - new_first > 1) + { + new_ranges.emplace_back(new_first, new_last); + } + } + equal_range = std::move(new_ranges); +} ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const { diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index 15a1d1bd91a..55935a91cde 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -73,6 +73,7 @@ public: template ColumnPtr indexImpl(const PaddedPODArray & indexes, size_t limit) const; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; + void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override; void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; diff --git a/src/Columns/ColumnConst.cpp b/src/Columns/ColumnConst.cpp index 63f520a4c05..545c0b1b300 100644 --- a/src/Columns/ColumnConst.cpp +++ b/src/Columns/ColumnConst.cpp @@ -120,6 +120,8 @@ void ColumnConst::getPermutation(bool /*reverse*/, size_t /*limit*/, int /*nan_d res[i] = i; } +void ColumnConst::updatePermutation(bool, size_t, int, Permutation &, EqualRanges &) const {} + void ColumnConst::updateWeakHash32(WeakHash32 & hash) const { if (hash.getData().size() != s) diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index 560d4d63a10..5fc96b14be8 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -170,6 +170,7 @@ public: ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; + void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override; size_t byteSize() const override { diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index 5396389294a..3e6fb833b56 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -108,6 +108,76 @@ void ColumnDecimal::getPermutation(bool reverse, size_t limit, int , IColumn: permutation(reverse, limit, res); } +template +void ColumnDecimal::updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const +{ + if (limit >= data.size() || limit >= equal_range.back().second) + limit = 0; + + size_t n = equal_range.size(); + if (limit) + --n; + + EqualRanges new_ranges; + for (size_t i = 0; i < n; ++i) + { + const auto& [first, last] = equal_range[i]; + if (reverse) + std::partial_sort(res.begin() + first, res.begin() + last, res.begin() + last, + [this](size_t a, size_t b) { return data[a] > data[b]; }); + else + std::partial_sort(res.begin() + first, res.begin() + last, res.begin() + last, + [this](size_t a, size_t b) { return data[a] < data[b]; }); + auto new_first = first; + for (auto j = first + 1; j < last; ++j) + { + if (data[res[new_first]] != data[res[j]]) + { + if (j - new_first > 1) + new_ranges.emplace_back(new_first, j); + + new_first = j; + } + } + if (last - new_first > 1) + new_ranges.emplace_back(new_first, last); + } + + if (limit) + { + const auto& [first, last] = equal_range.back(); + if (reverse) + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, + [this](size_t a, size_t b) { return data[a] > data[b]; }); + else + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, + [this](size_t a, size_t b) { return data[a] < data[b]; }); + auto new_first = first; + for (auto j = first + 1; j < limit; ++j) + { + if (data[res[new_first]] != data[res[j]]) + { + if (j - new_first > 1) + new_ranges.emplace_back(new_first, j); + + new_first = j; + } + } + auto new_last = limit; + for (auto j = limit; j < last; ++j) + { + if (data[res[new_first]] == data[res[j]]) + { + std::swap(res[new_last], res[j]); + ++new_last; + } + } + if (new_last - new_first > 1) + new_ranges.emplace_back(new_first, new_last); + } + equal_range = std::move(new_ranges); +} + template ColumnPtr ColumnDecimal::permute(const IColumn::Permutation & perm, size_t limit) const { diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 1f56b7c4242..86357dc8be7 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -108,6 +108,7 @@ public: void updateWeakHash32(WeakHash32 & hash) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override; + void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges& equal_range) const override; MutableColumnPtr cloneResized(size_t size) const override; @@ -152,6 +153,8 @@ public: const T & getElement(size_t n) const { return data[n]; } T & getElement(size_t n) { return data[n]; } + UInt32 getScale() const {return scale;} + protected: Container data; UInt32 scale; diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 57ae4cbdedf..5a4f6f763d7 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -162,6 +162,71 @@ void ColumnFixedString::getPermutation(bool reverse, size_t limit, int /*nan_dir } } +void ColumnFixedString::updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const +{ + if (limit >= size() || limit >= equal_range.back().second) + limit = 0; + + size_t k = equal_range.size(); + if (limit) + --k; + + EqualRanges new_ranges; + + for (size_t i = 0; i < k; ++i) + { + const auto& [first, last] = equal_range[i]; + if (reverse) + std::sort(res.begin() + first, res.begin() + last, less(*this)); + else + std::sort(res.begin() + first, res.begin() + last, less(*this)); + auto new_first = first; + for (auto j = first + 1; j < last; ++j) + { + if (memcmpSmallAllowOverflow15(chars.data() + j * n, chars.data() + new_first * n, n) != 0) + { + if (j - new_first > 1) + new_ranges.emplace_back(new_first, j); + + new_first = j; + } + } + if (last - new_first > 1) + new_ranges.emplace_back(new_first, last); + } + if (limit) + { + const auto& [first, last] = equal_range.back(); + if (reverse) + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this)); + else + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this)); + auto new_first = first; + for (auto j = first + 1; j < limit; ++j) + { + if (memcmpSmallAllowOverflow15(chars.data() + j * n, chars.data() + new_first * n, n) != 0) + { + if (j - new_first > 1) + new_ranges.emplace_back(new_first, j); + + new_first = j; + } + } + auto new_last = limit; + for (auto j = limit; j < last; ++j) + { + if (memcmpSmallAllowOverflow15(chars.data() + j * n, chars.data() + new_first * n, n) == 0) + { + std::swap(res[new_last], res[j]); + ++new_last; + } + } + if (new_last - new_first > 1) + new_ranges.emplace_back(new_first, new_last); + } + equal_range = std::move(new_ranges); +} + void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length) { const ColumnFixedString & src_concrete = assert_cast(src); diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 74c4f3c74f2..996a1f99ef1 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -118,6 +118,8 @@ public: void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; + void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_range) const override; + void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index 1bde48559fe..31cb8708a6e 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -121,6 +121,11 @@ public: throw Exception("getPermutation is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } + void updatePermutation(bool, size_t, int, Permutation &, EqualRanges &) const override + { + throw Exception("updatePermutation is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + } + void gather(ColumnGathererStream &) override { throw Exception("Method gather is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index d6f0df1d53a..9e979a507ff 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -314,6 +314,76 @@ void ColumnLowCardinality::getPermutation(bool reverse, size_t limit, int nan_di } } +void ColumnLowCardinality::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const +{ + if (limit >= size() || limit >= equal_range.back().second) + limit = 0; + + size_t n = equal_range.size(); + if (limit) + --n; + + EqualRanges new_ranges; + for (size_t i = 0; i < n; ++i) + { + const auto& [first, last] = equal_range[i]; + if (reverse) + std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) + {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; }); + else + std::sort(res.begin() + first, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) + {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; }); + + auto new_first = first; + for (auto j = first + 1; j < last; ++j) + { + if (compareAt(new_first, j, *this, nan_direction_hint) != 0) + { + if (j - new_first > 1) + new_ranges.emplace_back(new_first, j); + + new_first = j; + } + } + if (last - new_first > 1) + new_ranges.emplace_back(new_first, last); + } + + if (limit) + { + const auto& [first, last] = equal_range.back(); + if (reverse) + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) + {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) > 0; }); + else + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, [this, nan_direction_hint](size_t a, size_t b) + {return getDictionary().compareAt(getIndexes().getUInt(a), getIndexes().getUInt(b), getDictionary(), nan_direction_hint) < 0; }); + auto new_first = first; + for (auto j = first + 1; j < limit; ++j) + { + if (getDictionary().compareAt(getIndexes().getUInt(new_first), getIndexes().getUInt(j), getDictionary(), nan_direction_hint) != 0) + { + if (j - new_first > 1) + new_ranges.emplace_back(new_first, j); + + new_first = j; + } + } + auto new_last = limit; + for (auto j = limit; j < last; ++j) + { + if (getDictionary().compareAt(getIndexes().getUInt(new_first), getIndexes().getUInt(j), getDictionary(), nan_direction_hint) == 0) + { + std::swap(res[new_last], res[j]); + ++new_last; + } + } + if (new_last - new_first > 1) + new_ranges.emplace_back(new_first, new_last); + } + equal_range = std::move(new_ranges); +} + std::vector ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const { auto columns = getIndexes().scatter(num_columns, selector); diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index e641cc177f3..905d15f8167 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -111,6 +111,8 @@ public: void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; + void updatePermutation(bool reverse, size_t limit, int, IColumn::Permutation & res, EqualRanges & equal_range) const override; + ColumnPtr replicate(const Offsets & offsets) const override { return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets)); diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 55ce1401073..a3c4e77db0d 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -321,6 +321,75 @@ void ColumnNullable::getPermutation(bool reverse, size_t limit, int null_directi } } +void ColumnNullable::updatePermutation(bool reverse, size_t limit, int null_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const +{ + if (limit >= equal_range.back().second || limit >= size()) + limit = 0; + + EqualRanges new_ranges, temp_ranges; + + for (const auto &[first, last] : equal_range) + { + bool direction = ((null_direction_hint > 0) != reverse); + /// Shift all NULL values to the end. + + size_t read_idx = first; + size_t write_idx = first; + while (read_idx < last && (isNullAt(res[read_idx])^direction)) + { + ++read_idx; + ++write_idx; + } + + ++read_idx; + + /// Invariants: + /// write_idx < read_idx + /// write_idx points to NULL + /// read_idx will be incremented to position of next not-NULL + /// there are range of NULLs between write_idx and read_idx - 1, + /// We are moving elements from end to begin of this range, + /// so range will "bubble" towards the end. + /// Relative order of NULL elements could be changed, + /// but relative order of non-NULLs is preserved. + + while (read_idx < last && write_idx < last) + { + if (isNullAt(res[read_idx])^direction) + { + std::swap(res[read_idx], res[write_idx]); + ++write_idx; + } + ++read_idx; + } + + if (write_idx - first > 1) + { + if (direction) + temp_ranges.emplace_back(first, write_idx); + else + new_ranges.emplace_back(first, write_idx); + + } + + if (last - write_idx > 1) + { + if (direction) + new_ranges.emplace_back(write_idx, last); + else + temp_ranges.emplace_back(write_idx, last); + } + } + while (!new_ranges.empty() && limit && limit <= new_ranges.back().first) + new_ranges.pop_back(); + + if (!temp_ranges.empty()) + getNestedColumn().updatePermutation(reverse, limit, null_direction_hint, res, temp_ranges); + + equal_range.resize(temp_ranges.size() + new_ranges.size()); + std::merge(temp_ranges.begin(), temp_ranges.end(), new_ranges.begin(), new_ranges.end(), equal_range.begin()); +} + void ColumnNullable::gather(ColumnGathererStream & gatherer) { gatherer.gather(*this); diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 5443d8b0187..2cd8ff9f40f 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -78,6 +78,7 @@ public: ColumnPtr index(const IColumn & indexes, size_t limit) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; void getPermutation(bool reverse, size_t limit, int null_direction_hint, Permutation & res) const override; + void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 12f7a5632db..136a30d475a 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -302,6 +302,77 @@ void ColumnString::getPermutation(bool reverse, size_t limit, int /*nan_directio } } +void ColumnString::updatePermutation(bool reverse, size_t limit, int /*nan_direction_hint*/, Permutation & res, EqualRanges & equal_range) const +{ + if (limit >= size() || limit > equal_range.back().second) + limit = 0; + + EqualRanges new_ranges; + auto less_true = less(*this); + auto less_false = less(*this); + size_t n = equal_range.size(); + if (limit) + --n; + + for (size_t i = 0; i < n; ++i) + { + const auto &[first, last] = equal_range[i]; + if (reverse) + std::sort(res.begin() + first, res.begin() + last, less_false); + else + std::sort(res.begin() + first, res.begin() + last, less_true); + size_t new_first = first; + for (size_t j = first + 1; j < last; ++j) + { + if (memcmpSmallAllowOverflow15( + chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1, + chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0) + { + if (j - new_first > 1) + new_ranges.emplace_back(new_first, j); + + new_first = j; + } + } + if (last - new_first > 1) + new_ranges.emplace_back(new_first, last); + } + + if (limit) + { + const auto &[first, last] = equal_range.back(); + if (reverse) + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less_false); + else + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less_true); + size_t new_first = first; + for (size_t j = first + 1; j < limit; ++j) + { + if (memcmpSmallAllowOverflow15( + chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1, + chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) != 0) + { + if (j - new_first > 1) + new_ranges.emplace_back(new_first, j); + new_first = j; + } + } + size_t new_last = limit; + for (size_t j = limit; j < last; ++j) + { + if (memcmpSmallAllowOverflow15( + chars.data() + offsetAt(res[j]), sizeAt(res[j]) - 1, + chars.data() + offsetAt(res[new_first]), sizeAt(res[new_first]) - 1) == 0) + { + std::swap(res[j], res[new_last]); + ++new_last; + } + } + if (new_last - new_first > 1) + new_ranges.emplace_back(new_first, new_last); + } + equal_range = std::move(new_ranges); +} ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const { @@ -440,6 +511,77 @@ void ColumnString::getPermutationWithCollation(const Collator & collator, bool r } } +void ColumnString::updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation &res, EqualRanges &equal_range) const +{ + if (limit >= size() || limit >= equal_range.back().second) + limit = 0; + + size_t n = equal_range.size(); + if (limit) + --n; + + EqualRanges new_ranges; + for (size_t i = 0; i < n; ++i) + { + const auto& [first, last] = equal_range[i]; + if (reverse) + std::sort(res.begin() + first, res.begin() + last, lessWithCollation(*this, collator)); + else + std::sort(res.begin() + first, res.begin() + last, lessWithCollation(*this, collator)); + auto new_first = first; + for (auto j = first + 1; j < last; ++j) + { + if (collator.compare( + reinterpret_cast(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]), + reinterpret_cast(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0) + { + if (j - new_first > 1) + new_ranges.emplace_back(new_first, j); + + new_first = j; + } + } + if (last - new_first > 1) + new_ranges.emplace_back(new_first, last); + + } + + if (limit) + { + const auto& [first, last] = equal_range.back(); + if (reverse) + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation(*this, collator)); + else + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, lessWithCollation(*this, collator)); + auto new_first = first; + for (auto j = first + 1; j < limit; ++j) + { + if (collator.compare( + reinterpret_cast(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]), + reinterpret_cast(&chars[offsetAt(res[j])]), sizeAt(res[j])) != 0) + { + if (j - new_first > 1) + new_ranges.emplace_back(new_first, j); + + new_first = j; + } + } + auto new_last = limit; + for (auto j = limit; j < last; ++j) + { + if (collator.compare( + reinterpret_cast(&chars[offsetAt(res[new_first])]), sizeAt(res[new_first]), + reinterpret_cast(&chars[offsetAt(res[j])]), sizeAt(res[j])) == 0) + { + std::swap(res[new_last], res[j]); + ++new_last; + } + } + if (new_last - new_first > 1) + new_ranges.emplace_back(new_first, new_last); + } + equal_range = std::move(new_ranges); +} void ColumnString::protect() { diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 32116880014..a0b3d259b67 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -225,9 +225,13 @@ public: void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; + void updatePermutation(bool reverse, size_t limit, int, Permutation & res, EqualRanges & equal_range) const override; + /// Sorting with respect of collation. void getPermutationWithCollation(const Collator & collator, bool reverse, size_t limit, Permutation & res) const; + void updatePermutationWithCollation(const Collator & collator, bool reverse, size_t limit, int, Permutation & res, EqualRanges& equal_range) const; + ColumnPtr replicate(const Offsets & replicate_offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 59552c67f14..78117b8e310 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -329,6 +329,19 @@ void ColumnTuple::getPermutation(bool reverse, size_t limit, int nan_direction_h } } +void ColumnTuple::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const +{ + for (const auto& column : columns) + { + column->updatePermutation(reverse, limit, nan_direction_hint, res, equal_range); + while (limit && limit <= equal_range.back().first) + equal_range.pop_back(); + + if (equal_range.empty()) + break; + } +} + void ColumnTuple::gather(ColumnGathererStream & gatherer) { gatherer.gather(*this); diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index 3533b602a1b..69b18e2fc0f 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -72,6 +72,7 @@ public: int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; void getExtremes(Field & min, Field & max) const override; void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const override; + void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override; void reserve(size_t n) override; size_t byteSize() const override; size_t allocatedBytes() const override; diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index da96e4a5ea2..5bbac6baf4d 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -77,6 +77,7 @@ public: } int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override; + void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const override; void getExtremes(Field & min, Field & max) const override { column_holder->getExtremes(min, max); } bool valuesHaveFixedSize() const override { return column_holder->valuesHaveFixedSize(); } @@ -374,6 +375,39 @@ int ColumnUnique::compareAt(size_t n, size_t m, const IColumn & rhs, return getNestedColumn()->compareAt(n, m, *column_unique.getNestedColumn(), nan_direction_hint); } +template +void ColumnUnique::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const +{ + bool found_null_value_index = false; + for (size_t i = 0; i < equal_range.size() && !found_null_value_index; ++i) + { + auto& [first, last] = equal_range[i]; + for (auto j = first; j < last; ++j) + { + if (res[j] == getNullValueIndex()) + { + if ((nan_direction_hint > 0) != reverse) + { + std::swap(res[j], res[last - 1]); + --last; + } + else + { + std::swap(res[j], res[first]); + ++first; + } + if (last - first <= 1) + { + equal_range.erase(equal_range.begin() + i); + } + found_null_value_index = true; + break; + } + } + } + getNestedColumn()->updatePermutation(reverse, limit, nan_direction_hint, res, equal_range); +} + template static void checkIndexes(const ColumnVector & indexes, size_t max_dictionary_size) { diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 41ed17d6d82..c4f2c6d8705 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -219,6 +219,76 @@ void ColumnVector::getPermutation(bool reverse, size_t limit, int nan_directi } } +template +void ColumnVector::updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges & equal_range) const +{ + if (limit >= data.size() || limit >= equal_range.back().second) + limit = 0; + + EqualRanges new_ranges; + + for (size_t i = 0; i < equal_range.size() - bool(limit); ++i) + { + const auto & [first, last] = equal_range[i]; + if (reverse) + pdqsort(res.begin() + first, res.begin() + last, greater(*this, nan_direction_hint)); + else + pdqsort(res.begin() + first, res.begin() + last, less(*this, nan_direction_hint)); + size_t new_first = first; + for (size_t j = first + 1; j < last; ++j) + { + if (less(*this, nan_direction_hint)(res[j], res[new_first]) || greater(*this, nan_direction_hint)(res[j], res[new_first])) + { + if (j - new_first > 1) + { + new_ranges.emplace_back(new_first, j); + } + new_first = j; + } + } + if (last - new_first > 1) + { + new_ranges.emplace_back(new_first, last); + } + } + if (limit) + { + const auto & [first, last] = equal_range.back(); + if (reverse) + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, greater(*this, nan_direction_hint)); + else + std::partial_sort(res.begin() + first, res.begin() + limit, res.begin() + last, less(*this, nan_direction_hint)); + + size_t new_first = first; + for (size_t j = first + 1; j < limit; ++j) + { + if (less(*this, nan_direction_hint)(res[j], res[new_first]) || greater(*this, nan_direction_hint)(res[j], res[new_first])) + { + if (j - new_first > 1) + { + new_ranges.emplace_back(new_first, j); + } + new_first = j; + } + } + + size_t new_last = limit; + for (size_t j = limit; j < last; ++j) + { + if (!less(*this, nan_direction_hint)(res[j], res[new_first]) && !greater(*this, nan_direction_hint)(res[j], res[new_first])) + { + std::swap(res[j], res[new_last]); + ++new_last; + } + } + if (new_last - new_first > 1) + { + new_ranges.emplace_back(new_first, new_last); + } + } + equal_range = std::move(new_ranges); +} + template const char * ColumnVector::getFamilyName() const diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 43b7c607f64..a6105034f1a 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -192,6 +192,8 @@ public: void getSpecialPermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, IColumn::SpecialSort) const override; + void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_range) const override; + void reserve(size_t n) override { data.reserve(n); diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 2a38fd5365b..1d92ed1c3ab 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -25,6 +25,13 @@ class ColumnGathererStream; class Field; class WeakHash32; + +/* + * Represents a set of equal ranges in previous column to perform sorting in current column. + * Used in sorting by tuples. + * */ +using EqualRanges = std::vector >; + /// Declares interface to store columns in memory. class IColumn : public COW { @@ -256,6 +263,16 @@ public: getPermutation(reverse, limit, nan_direction_hint, res); } + /*in updatePermutation we pass the current permutation and the intervals at which it should be sorted + * Then for each interval separately (except for the last one, if there is a limit) + * We sort it based on data about the current column, and find all the intervals within this + * interval that had the same values in this column. we can't tell about these values in what order they + * should have been, we form a new array with intervals that need to be sorted + * If there is a limit, then for the last interval we do partial sorting and all that is described above, + * but in addition we still find all the elements equal to the largest sorted, they will also need to be sorted. + */ + virtual void updatePermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const = 0; + /** Copies each element according offsets parameter. * (i-th element should be copied offsets[i] - offsets[i - 1] times.) * It is necessary in ARRAY JOIN operation. diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index 00604fb87d0..b0c479c46c7 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -107,6 +107,8 @@ public: res[i] = i; } + void updatePermutation(bool, size_t, int, Permutation &, EqualRanges&) const override {} + ColumnPtr replicate(const Offsets & offsets) const override { if (s != offsets.size()) diff --git a/src/Common/Allocator.h b/src/Common/Allocator.h index 9add9299430..43d7e67c4bb 100644 --- a/src/Common/Allocator.h +++ b/src/Common/Allocator.h @@ -278,13 +278,15 @@ private: /** Allocator with optimization to place small memory ranges in automatic memory. */ -template +template class AllocatorWithStackMemory : private Base { private: - alignas(Alignment) char stack_memory[N]; + alignas(Alignment) char stack_memory[_initial_bytes]; public: + static constexpr size_t initial_bytes = _initial_bytes; + /// Do not use boost::noncopyable to avoid the warning about direct base /// being inaccessible due to ambiguity, when derived classes are also /// noncopiable (-Winaccessible-base). @@ -295,10 +297,10 @@ public: void * alloc(size_t size) { - if (size <= N) + if (size <= initial_bytes) { if constexpr (Base::clear_memory) - memset(stack_memory, 0, N); + memset(stack_memory, 0, initial_bytes); return stack_memory; } @@ -307,18 +309,18 @@ public: void free(void * buf, size_t size) { - if (size > N) + if (size > initial_bytes) Base::free(buf, size); } void * realloc(void * buf, size_t old_size, size_t new_size) { /// Was in stack_memory, will remain there. - if (new_size <= N) + if (new_size <= initial_bytes) return buf; /// Already was big enough to not fit in stack_memory. - if (old_size > N) + if (old_size > initial_bytes) return Base::realloc(buf, old_size, new_size, Alignment); /// Was in stack memory, but now will not fit there. @@ -330,10 +332,20 @@ public: protected: static constexpr size_t getStackThreshold() { - return N; + return initial_bytes; } }; +// A constant that gives the number of initially available bytes in +// the allocator. Used to check that this number is in sync with the +// initial size of array or hash table that uses the allocator. +template +constexpr size_t allocatorInitialBytes = 0; + +template +constexpr size_t allocatorInitialBytes> = initial_bytes; + #if !__clang__ #pragma GCC diagnostic pop diff --git a/src/Common/CurrentMetrics.h b/src/Common/CurrentMetrics.h index b87504ef49a..a3bac96a16c 100644 --- a/src/Common/CurrentMetrics.h +++ b/src/Common/CurrentMetrics.h @@ -94,6 +94,12 @@ namespace CurrentMetrics amount = new_amount; } + void sub(Value value = 1) + { + what->fetch_sub(value, std::memory_order_relaxed); + amount -= value; + } + /// Subtract value before destructor. void destroy() { diff --git a/src/Common/HashTable/ClearableHashMap.h b/src/Common/HashTable/ClearableHashMap.h index 4370f6b6dc7..fda01dcf4bc 100644 --- a/src/Common/HashTable/ClearableHashMap.h +++ b/src/Common/HashTable/ClearableHashMap.h @@ -43,3 +43,14 @@ public: this->m_size = 0; } }; + +template +using ClearableHashMapWithStackMemory = ClearableHashMap< + Key, + Mapped, + Hash, + HashTableGrower, + HashTableAllocatorWithStackMemory< + (1ULL << initial_size_degree) + * sizeof(ClearableHashMapCell)>>; diff --git a/src/Common/HashTable/ClearableHashSet.h b/src/Common/HashTable/ClearableHashSet.h index 824ec9d8e5f..dc057afacd8 100644 --- a/src/Common/HashTable/ClearableHashSet.h +++ b/src/Common/HashTable/ClearableHashSet.h @@ -84,3 +84,15 @@ public: this->m_size = 0; } }; + +template +using ClearableHashSetWithStackMemory = ClearableHashSet< + Key, + Hash, + HashTableGrower, + HashTableAllocatorWithStackMemory< + (1ULL << initial_size_degree) + * sizeof( + ClearableHashTableCell< + Key, + HashTableCell>)>>; diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h index cdc4a003af8..3ecbd9f263c 100644 --- a/src/Common/HashTable/HashMap.h +++ b/src/Common/HashTable/HashMap.h @@ -239,3 +239,14 @@ template < typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator> using HashMapWithSavedHash = HashMapTable, Hash, Grower, Allocator>; + +template +using HashMapWithStackMemory = HashMapTable< + Key, + HashMapCellWithSavedHash, + Hash, + HashTableGrower, + HashTableAllocatorWithStackMemory< + (1ULL << initial_size_degree) + * sizeof(HashMapCellWithSavedHash)>>; diff --git a/src/Common/HashTable/HashSet.h b/src/Common/HashTable/HashSet.h index c1970e898a1..c79e05073fc 100644 --- a/src/Common/HashTable/HashSet.h +++ b/src/Common/HashTable/HashSet.h @@ -93,6 +93,14 @@ template > using HashSet = HashSetTable, Hash, Grower, Allocator>; +template +using HashSetWithStackMemory = HashSet< + Key, + Hash, + HashTableGrower, + HashTableAllocatorWithStackMemory< + (1ULL << initial_size_degree) + * sizeof(HashTableCell)>>; template < @@ -102,3 +110,12 @@ template typename Allocator = HashTableAllocator > using HashSetWithSavedHash = HashSetTable, Hash, Grower, Allocator>; + +template +using HashSetWithSavedHashWithStackMemory = HashSetWithSavedHash< + Key, + Hash, + HashTableGrower, + HashTableAllocatorWithStackMemory< + (1ULL << initial_size_degree) + * sizeof(HashSetCellWithSavedHash)>>; diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h index 58b7cd81901..528e719c05b 100644 --- a/src/Common/HashTable/HashTable.h +++ b/src/Common/HashTable/HashTable.h @@ -208,6 +208,7 @@ struct HashTableGrower /// The state of this structure is enough to get the buffer size of the hash table. UInt8 size_degree = initial_size_degree; + static constexpr auto initial_count = 1ULL << initial_size_degree; /// The size of the hash table in the cells. size_t bufSize() const { return 1ULL << size_degree; } @@ -255,6 +256,7 @@ struct HashTableGrower template struct HashTableFixedGrower { + static constexpr auto initial_count = 1ULL << key_bits; size_t bufSize() const { return 1ULL << key_bits; } size_t place(size_t x) const { return x; } /// You could write __builtin_unreachable(), but the compiler does not optimize everything, and it turns out less efficiently. @@ -309,6 +311,7 @@ struct ZeroValueStorage }; +// The HashTable template < typename Key, @@ -324,6 +327,14 @@ class HashTable : protected Cell::State, protected ZeroValueStorage /// empty base optimization { +public: + // If we use an allocator with inline memory, check that the initial + // size of the hash table is in sync with the amount of this memory. + static constexpr size_t initial_buffer_bytes + = Grower::initial_count * sizeof(Cell); + static_assert(allocatorInitialBytes == 0 + || allocatorInitialBytes == initial_buffer_bytes); + protected: friend class const_iterator; friend class iterator; diff --git a/src/Common/HashTable/HashTableAllocator.h b/src/Common/HashTable/HashTableAllocator.h index 99f9c979685..47e3fdfc4b6 100644 --- a/src/Common/HashTable/HashTableAllocator.h +++ b/src/Common/HashTable/HashTableAllocator.h @@ -10,5 +10,5 @@ */ using HashTableAllocator = Allocator; -template -using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory; +template +using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory; diff --git a/src/Common/MemoryStatisticsOS.cpp b/src/Common/MemoryStatisticsOS.cpp index adc5bf5d904..6082d23cbd0 100644 --- a/src/Common/MemoryStatisticsOS.cpp +++ b/src/Common/MemoryStatisticsOS.cpp @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB @@ -19,6 +20,7 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; extern const int CANNOT_OPEN_FILE; extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; + extern const int CANNOT_CLOSE_FILE; } static constexpr auto filename = "/proc/self/statm"; @@ -35,7 +37,18 @@ MemoryStatisticsOS::MemoryStatisticsOS() MemoryStatisticsOS::~MemoryStatisticsOS() { if (0 != ::close(fd)) - tryLogCurrentException(__PRETTY_FUNCTION__); + { + try + { + throwFromErrno( + "File descriptor for \"" + std::string(filename) + "\" could not be closed. " + "Something seems to have gone wrong. Inspect errno.", ErrorCodes::CANNOT_CLOSE_FILE); + } + catch (const ErrnoException &) + { + DB::tryLogCurrentException(__PRETTY_FUNCTION__); + } + } } MemoryStatisticsOS::Data MemoryStatisticsOS::get() const diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index 082b78682a2..16d166c191b 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -49,11 +49,13 @@ MemoryTracker::~MemoryTracker() void MemoryTracker::logPeakMemoryUsage() const { + const auto * description = description_ptr.load(std::memory_order_relaxed); LOG_DEBUG(&Logger::get("MemoryTracker"), "Peak memory usage{}: {}.", (description ? " " + std::string(description) : ""), formatReadableSizeWithBinarySuffix(peak)); } void MemoryTracker::logMemoryUsage(Int64 current) const { + const auto * description = description_ptr.load(std::memory_order_relaxed); LOG_DEBUG(&Logger::get("MemoryTracker"), "Current memory usage{}: {}.", (description ? " " + std::string(description) : ""), formatReadableSizeWithBinarySuffix(current)); } @@ -85,7 +87,7 @@ void MemoryTracker::alloc(Int64 size) std::stringstream message; message << "Memory tracker"; - if (description) + if (const auto * description = description_ptr.load(std::memory_order_relaxed)) message << " " << description; message << ": fault injected. Would use " << formatReadableSizeWithBinarySuffix(will_be) << " (attempt to allocate chunk of " << size << " bytes)" @@ -117,7 +119,7 @@ void MemoryTracker::alloc(Int64 size) std::stringstream message; message << "Memory limit"; - if (description) + if (const auto * description = description_ptr.load(std::memory_order_relaxed)) message << " " << description; message << " exceeded: would use " << formatReadableSizeWithBinarySuffix(will_be) << " (attempt to allocate chunk of " << size << " bytes)" diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 23f32b051b2..8af683ae790 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -35,7 +35,7 @@ private: CurrentMetrics::Metric metric = CurrentMetrics::end(); /// This description will be used as prefix into log messages (if isn't nullptr) - const char * description = nullptr; + std::atomic description_ptr = nullptr; void updatePeak(Int64 will_be); void logMemoryUsage(Int64 current) const; @@ -114,9 +114,9 @@ public: metric = metric_; } - void setDescription(const char * description_) + void setDescription(const char * description) { - description = description_; + description_ptr.store(description, std::memory_order_relaxed); } /// Reset the accumulated data diff --git a/src/Common/PODArray.cpp b/src/Common/PODArray.cpp index 95810551ac4..e0b17c8125c 100644 --- a/src/Common/PODArray.cpp +++ b/src/Common/PODArray.cpp @@ -2,7 +2,8 @@ namespace DB { + /// Used for left padding of PODArray when empty -const char EmptyPODArray[EmptyPODArraySize]{}; +const char empty_pod_array[empty_pod_array_size]{}; } diff --git a/src/Common/PODArray.h b/src/Common/PODArray.h index 8fe1f74484e..9ab1c64d3b9 100644 --- a/src/Common/PODArray.h +++ b/src/Common/PODArray.h @@ -63,8 +63,8 @@ namespace ErrorCodes * TODO Pass alignment to Allocator. * TODO Allow greater alignment than alignof(T). Example: array of char aligned to page size. */ -static constexpr size_t EmptyPODArraySize = 1024; -extern const char EmptyPODArray[EmptyPODArraySize]; +static constexpr size_t empty_pod_array_size = 1024; +extern const char empty_pod_array[empty_pod_array_size]; /** Base class that depend only on size of element, not on element itself. * You can static_cast to this class if you want to insert some data regardless to the actual type T. @@ -81,9 +81,14 @@ protected: /// pad_left is also rounded up to 16 bytes to maintain alignment of allocated memory. static constexpr size_t pad_left = integerRoundUp(integerRoundUp(pad_left_, ELEMENT_SIZE), 16); /// Empty array will point to this static memory as padding. - static constexpr char * null = pad_left ? const_cast(EmptyPODArray) + EmptyPODArraySize : nullptr; + static constexpr char * null = pad_left ? const_cast(empty_pod_array) + empty_pod_array_size : nullptr; - static_assert(pad_left <= EmptyPODArraySize && "Left Padding exceeds EmptyPODArraySize. Is the element size too large?"); + static_assert(pad_left <= empty_pod_array_size && "Left Padding exceeds empty_pod_array_size. Is the element size too large?"); + + // If we are using allocator with inline memory, the minimal size of + // array must be in sync with the size of this memory. + static_assert(allocatorInitialBytes == 0 + || allocatorInitialBytes == initial_bytes); char * c_start = null; /// Does not include pad_left. char * c_end = null; @@ -224,13 +229,19 @@ public: } template - void push_back_raw(const char * ptr, TAllocatorParams &&... allocator_params) + void push_back_raw(const void * ptr, TAllocatorParams &&... allocator_params) + { + push_back_raw_many(1, ptr, std::forward(allocator_params)...); + } + + template + void push_back_raw_many(size_t number_of_items, const void * ptr, TAllocatorParams &&... allocator_params) { if (unlikely(c_end == c_end_of_storage)) - reserveForNextSize(std::forward(allocator_params)...); + reserve(number_of_items, std::forward(allocator_params)...); - memcpy(c_end, ptr, ELEMENT_SIZE); - c_end += byte_size(1); + memcpy(c_end, ptr, ELEMENT_SIZE * number_of_items); + c_end += byte_size(number_of_items); } void protect() diff --git a/src/Common/ProcfsMetricsProvider.cpp b/src/Common/ProcfsMetricsProvider.cpp index 633558a7b67..fcc4124aa85 100644 --- a/src/Common/ProcfsMetricsProvider.cpp +++ b/src/Common/ProcfsMetricsProvider.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -22,6 +23,7 @@ namespace ErrorCodes { extern const int FILE_DOESNT_EXIST; extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_CLOSE_FILE; extern const int CANNOT_READ_FROM_FILE_DESCRIPTOR; } @@ -39,6 +41,20 @@ namespace errno == ENOENT ? ErrorCodes::FILE_DOESNT_EXIST : ErrorCodes::CANNOT_OPEN_FILE); } +inline void emitErrorMsgWithFailedToCloseFile(const std::string & filename) +{ + try + { + throwFromErrno( + "File descriptor for \"" + filename + "\" could not be closed. " + "Something seems to have gone wrong. Inspect errno.", ErrorCodes::CANNOT_CLOSE_FILE); + } + catch (const ErrnoException &) + { + DB::tryLogCurrentException(__PRETTY_FUNCTION__); + } +} + ssize_t readFromFD(const int fd, const char * filename, char * buf, size_t buf_size) { ssize_t res = 0; @@ -100,11 +116,11 @@ ProcfsMetricsProvider::ProcfsMetricsProvider(const pid_t /*tid*/) ProcfsMetricsProvider::~ProcfsMetricsProvider() { if (stats_version >= 3 && 0 != ::close(thread_io_fd)) - tryLogCurrentException(__PRETTY_FUNCTION__); + emitErrorMsgWithFailedToCloseFile(thread_io); if (0 != ::close(thread_stat_fd)) - tryLogCurrentException(__PRETTY_FUNCTION__); + emitErrorMsgWithFailedToCloseFile(thread_stat); if (0 != ::close(thread_schedstat_fd)) - tryLogCurrentException(__PRETTY_FUNCTION__); + emitErrorMsgWithFailedToCloseFile(thread_schedstat); } diff --git a/src/Common/SpaceSaving.h b/src/Common/SpaceSaving.h index 9ad7f6275d6..56063340240 100644 --- a/src/Common/SpaceSaving.h +++ b/src/Common/SpaceSaving.h @@ -67,9 +67,7 @@ private: template < typename TKey, - typename Hash = DefaultHash, - typename Grower = HashTableGrower<>, - typename Allocator = HashTableAllocator + typename Hash = DefaultHash > class SpaceSaving { @@ -380,7 +378,7 @@ private: counter_map[counter->key] = counter; } - using CounterMap = HashMap; + using CounterMap = HashMapWithStackMemory; CounterMap counter_map; std::vector counter_list; diff --git a/src/Common/UInt128.h b/src/Common/UInt128.h index b60de11552a..336edfcc56f 100644 --- a/src/Common/UInt128.h +++ b/src/Common/UInt128.h @@ -72,7 +72,7 @@ template bool inline operator<= (T a, const UInt128 b) { return UIn template bool inline operator< (T a, const UInt128 b) { return UInt128(a) < b; } template <> inline constexpr bool IsNumber = true; -template <> struct TypeName { static const char * get() { return "UInt128"; } }; +template <> struct TypeName { static constexpr const char * get() { return "UInt128"; } }; template <> struct TypeId { static constexpr const TypeIndex value = TypeIndex::UInt128; }; struct UInt128Hash diff --git a/src/Common/config.h.in b/src/Common/config.h.in index dd6263c3948..08fa03d659f 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -9,5 +9,9 @@ #cmakedefine01 USE_BROTLI #cmakedefine01 USE_UNWIND #cmakedefine01 USE_OPENCL +<<<<<<< HEAD #cmakedefine01 USE_SENTRY +======= +#cmakedefine01 USE_GRPC +>>>>>>> a4e40fb5f209539cfee6af5da7f27c1c96e02eac #cmakedefine01 CLICKHOUSE_SPLIT_BINARY diff --git a/src/Common/tests/gtest_global_context.h b/src/Common/tests/gtest_global_context.h index 05f60e01774..b6529f09b46 100644 --- a/src/Common/tests/gtest_global_context.h +++ b/src/Common/tests/gtest_global_context.h @@ -11,15 +11,15 @@ struct ContextHolder : shared_context(DB::Context::createShared()) , context(DB::Context::createGlobal(shared_context.get())) { + context.makeGlobalContext(); + context.setPath("./"); } ContextHolder(ContextHolder &&) = default; }; -inline ContextHolder getContext() +inline const ContextHolder & getContext() { - ContextHolder holder; - holder.context.makeGlobalContext(); - holder.context.setPath("./"); + static ContextHolder holder; return holder; } diff --git a/src/Common/tests/gtest_log.cpp b/src/Common/tests/gtest_log.cpp new file mode 100644 index 00000000000..a1e532f92e6 --- /dev/null +++ b/src/Common/tests/gtest_log.cpp @@ -0,0 +1,19 @@ +#include +#include +#include +#include + +#include +#include +#include + + +TEST(Logger, Log) +{ + Poco::Logger::root().setLevel("none"); + Poco::Logger::root().setChannel(Poco::AutoPtr(new Poco::NullChannel())); + Logger * log = &Logger::get("Log"); + + /// This test checks that we don't pass this string to fmtlib, because it is the only argument. + EXPECT_NO_THROW(LOG_INFO(log, "Hello {} World")); +} diff --git a/src/Common/tests/pod_array.cpp b/src/Common/tests/pod_array.cpp index de15b485411..6e9634ba3cf 100644 --- a/src/Common/tests/pod_array.cpp +++ b/src/Common/tests/pod_array.cpp @@ -18,9 +18,9 @@ static void test1() { using namespace DB; - static constexpr size_t initial_size = 8; - static constexpr size_t stack_threshold = 32; - using Array = PODArray, stack_threshold>>; + static constexpr size_t initial_bytes = 32; + using Array = PODArray, initial_bytes>>; bool res = true; @@ -139,9 +139,9 @@ static void test2() { using namespace DB; - static constexpr size_t initial_size = 8; - static constexpr size_t stack_threshold = 32; - using Array = PODArray, stack_threshold>>; + static constexpr size_t initial_bytes = 32; + using Array = PODArray, initial_bytes>>; bool res = true; @@ -389,9 +389,9 @@ static void test3() { using namespace DB; - static constexpr size_t initial_size = 8; - static constexpr size_t stack_threshold = 32; - using Array = PODArray, stack_threshold>>; + static constexpr size_t initial_bytes = 32; + using Array = PODArray, initial_bytes>>; bool res = true; diff --git a/src/Common/ya.make b/src/Common/ya.make index c0178f3d310..83a419212bd 100644 --- a/src/Common/ya.make +++ b/src/Common/ya.make @@ -18,17 +18,7 @@ PEERDIR( contrib/restricted/ryu ) -# TODO: stub for config_version.h -CFLAGS (GLOBAL -DDBMS_NAME=\"ClickHouse\") -CFLAGS (GLOBAL -DDBMS_VERSION_MAJOR=0) -CFLAGS (GLOBAL -DDBMS_VERSION_MINOR=0) -CFLAGS (GLOBAL -DDBMS_VERSION_PATCH=0) -CFLAGS (GLOBAL -DVERSION_FULL=\"ClickHouse\") -CFLAGS (GLOBAL -DVERSION_INTEGER=0) -CFLAGS (GLOBAL -DVERSION_NAME=\"ClickHouse\") -CFLAGS (GLOBAL -DVERSION_OFFICIAL=\"\\\(arcadia\\\)\") -CFLAGS (GLOBAL -DVERSION_REVISION=0) -CFLAGS (GLOBAL -DVERSION_STRING=\"Unknown\") +INCLUDE(${ARCADIA_ROOT}/clickhouse/cmake/yandex/ya.make.versions.inc) SRCS( ActionLock.cpp diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 62a99cea97e..5ec6980dbfa 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -164,7 +164,7 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, /// Create table NamesAndTypesList columns = sample_block.getNamesAndTypesList(); - auto temporary_table = TemporaryTableHolder(context, ColumnsDescription{columns}); + auto temporary_table = TemporaryTableHolder(context, ColumnsDescription{columns}, {}); auto storage = temporary_table.getTable(); context.addExternalTable(data->table_name, std::move(temporary_table)); BlockOutputStreamPtr output = storage->write(ASTPtr(), context); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index be7c09ec6c2..8cf90fa146a 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -126,7 +126,7 @@ struct Settings : public SettingsCollection M(SettingBool, force_optimize_skip_unused_shards_no_nested, false, "Do not apply force_optimize_skip_unused_shards for nested Distributed tables.", 0) \ \ M(SettingBool, input_format_parallel_parsing, true, "Enable parallel parsing for some data formats.", 0) \ - M(SettingUInt64, min_chunk_bytes_for_parallel_parsing, (1024 * 1024), "The minimum chunk size in bytes, which each thread will parse in parallel.", 0) \ + M(SettingUInt64, min_chunk_bytes_for_parallel_parsing, (10 * 1024 * 1024), "The minimum chunk size in bytes, which each thread will parse in parallel.", 0) \ \ M(SettingUInt64, merge_tree_min_rows_for_concurrent_read, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized.", 0) \ M(SettingUInt64, merge_tree_min_bytes_for_concurrent_read, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized.", 0) \ @@ -156,7 +156,7 @@ struct Settings : public SettingsCollection M(SettingUInt64, priority, 0, "Priority of the query. 1 - the highest, higher value - lower priority; 0 - do not use priorities.", 0) \ M(SettingInt64, os_thread_priority, 0, "If non zero - set corresponding 'nice' value for query processing threads. Can be used to adjust query priority for OS scheduler.", 0) \ \ - M(SettingBool, log_queries, 0, "Log requests and write the log to the system table.", 0) \ + M(SettingBool, log_queries, 1, "Log requests and write the log to the system table.", 0) \ M(SettingLogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, "query_log minimal type to log, possible values (from low to high): QUERY_START, QUERY_FINISH, EXCEPTION_BEFORE_START, EXCEPTION_WHILE_PROCESSING.", 0) \ M(SettingUInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \ \ diff --git a/src/Core/Types.h b/src/Core/Types.h index 208da48797e..2c2293b4658 100644 --- a/src/Core/Types.h +++ b/src/Core/Types.h @@ -85,17 +85,17 @@ template <> inline constexpr bool IsNumber = true; template struct TypeName; -template <> struct TypeName { static const char * get() { return "UInt8"; } }; -template <> struct TypeName { static const char * get() { return "UInt16"; } }; -template <> struct TypeName { static const char * get() { return "UInt32"; } }; -template <> struct TypeName { static const char * get() { return "UInt64"; } }; -template <> struct TypeName { static const char * get() { return "Int8"; } }; -template <> struct TypeName { static const char * get() { return "Int16"; } }; -template <> struct TypeName { static const char * get() { return "Int32"; } }; -template <> struct TypeName { static const char * get() { return "Int64"; } }; -template <> struct TypeName { static const char * get() { return "Float32"; } }; -template <> struct TypeName { static const char * get() { return "Float64"; } }; -template <> struct TypeName { static const char * get() { return "String"; } }; +template <> struct TypeName { static constexpr const char * get() { return "UInt8"; } }; +template <> struct TypeName { static constexpr const char * get() { return "UInt16"; } }; +template <> struct TypeName { static constexpr const char * get() { return "UInt32"; } }; +template <> struct TypeName { static constexpr const char * get() { return "UInt64"; } }; +template <> struct TypeName { static constexpr const char * get() { return "Int8"; } }; +template <> struct TypeName { static constexpr const char * get() { return "Int16"; } }; +template <> struct TypeName { static constexpr const char * get() { return "Int32"; } }; +template <> struct TypeName { static constexpr const char * get() { return "Int64"; } }; +template <> struct TypeName { static constexpr const char * get() { return "Float32"; } }; +template <> struct TypeName { static constexpr const char * get() { return "Float64"; } }; +template <> struct TypeName { static constexpr const char * get() { return "String"; } }; template struct TypeId; template <> struct TypeId { static constexpr const TypeIndex value = TypeIndex::UInt8; }; @@ -115,7 +115,7 @@ using Strings = std::vector; using Int128 = __int128; template <> inline constexpr bool IsNumber = true; -template <> struct TypeName { static const char * get() { return "Int128"; } }; +template <> struct TypeName { static constexpr const char * get() { return "Int128"; } }; template <> struct TypeId { static constexpr const TypeIndex value = TypeIndex::Int128; }; /// Own FieldType for Decimal. @@ -161,9 +161,9 @@ using Decimal128 = Decimal; using DateTime64 = Decimal64; -template <> struct TypeName { static const char * get() { return "Decimal32"; } }; -template <> struct TypeName { static const char * get() { return "Decimal64"; } }; -template <> struct TypeName { static const char * get() { return "Decimal128"; } }; +template <> struct TypeName { static constexpr const char * get() { return "Decimal32"; } }; +template <> struct TypeName { static constexpr const char * get() { return "Decimal64"; } }; +template <> struct TypeName { static constexpr const char * get() { return "Decimal128"; } }; template <> struct TypeId { static constexpr const TypeIndex value = TypeIndex::Decimal32; }; template <> struct TypeId { static constexpr const TypeIndex value = TypeIndex::Decimal64; }; @@ -183,7 +183,7 @@ template <> inline Int32 Decimal32::getScaleMultiplier(UInt32 scale) { return co template <> inline Int64 Decimal64::getScaleMultiplier(UInt32 scale) { return common::exp10_i64(scale); } template <> inline Int128 Decimal128::getScaleMultiplier(UInt32 scale) { return common::exp10_i128(scale); } -inline const char * getTypeName(TypeIndex idx) +inline constexpr const char * getTypeName(TypeIndex idx) { switch (idx) { diff --git a/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/src/DataStreams/InputStreamFromASTInsertQuery.cpp index 3b4946e4bc9..47b61294da3 100644 --- a/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -58,7 +58,7 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( if (context.getSettingsRef().input_format_defaults_for_omitted_fields && ast_insert_query->table_id && !input_function) { - StoragePtr storage = DatabaseCatalog::instance().getTable(ast_insert_query->table_id); + StoragePtr storage = DatabaseCatalog::instance().getTable(ast_insert_query->table_id, context); auto column_defaults = storage->getColumns().getDefaults(); if (!column_defaults.empty()) res_stream = std::make_shared(res_stream, column_defaults, context); diff --git a/src/DataStreams/NullAndDoCopyBlockInputStream.h b/src/DataStreams/NullAndDoCopyBlockInputStream.h index 8fe05c387a3..8bfb3538f3a 100644 --- a/src/DataStreams/NullAndDoCopyBlockInputStream.h +++ b/src/DataStreams/NullAndDoCopyBlockInputStream.h @@ -21,19 +21,10 @@ class NullAndDoCopyBlockInputStream : public IBlockInputStream { public: NullAndDoCopyBlockInputStream(const BlockInputStreamPtr & input_, BlockOutputStreamPtr output_) + : input(std::move(input_)) + , output(std::move(output_)) { - input_streams.push_back(input_); - output_streams.push_back(output_); - - for (auto & input_stream : input_streams) - children.push_back(input_stream); - } - - NullAndDoCopyBlockInputStream(const BlockInputStreams & input_, BlockOutputStreams & output_) - : input_streams(input_), output_streams(output_) - { - for (auto & input_stream : input_) - children.push_back(input_stream); + children.push_back(input); } /// Suppress readPrefix and readSuffix, because they are called by copyData. @@ -53,16 +44,13 @@ protected: /// If query was cancelled, it will be processed by child streams. /// Part of the data will be processed. - if (input_streams.size() == 1 && output_streams.size() == 1) - copyData(*input_streams.at(0), *output_streams.at(0)); - else - copyData(input_streams, output_streams); + copyData(*input, *output); return Block(); } private: - BlockInputStreams input_streams; - BlockOutputStreams output_streams; + BlockInputStreamPtr input; + BlockOutputStreamPtr output; }; } diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 7f730b5fd3f..f68e0d81c03 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -59,7 +59,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( for (const auto & database_table : dependencies) { - auto dependent_table = DatabaseCatalog::instance().getTable(database_table); + auto dependent_table = DatabaseCatalog::instance().getTable(database_table, context); ASTPtr query; BlockOutputStreamPtr out; diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index d2d5d6a92f9..c6542763533 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include namespace DB { @@ -36,7 +38,7 @@ TTLBlockInputStream::TTLBlockInputStream( const auto & column_defaults = storage_columns.getDefaults(); ASTPtr default_expr_list = std::make_shared(); - for (const auto & [name, _] : storage.column_ttl_entries_by_name) + for (const auto & [name, _] : storage.getColumnTTLs()) { auto it = column_defaults.find(name); if (it != column_defaults.end()) @@ -67,6 +69,32 @@ TTLBlockInputStream::TTLBlockInputStream( default_expr_list, storage.getColumns().getAllPhysical()); defaults_expression = ExpressionAnalyzer{default_expr_list, syntax_result, storage.global_context}.getActions(true); } + + if (storage.hasRowsTTL() && storage.getRowsTTL().mode == TTLMode::GROUP_BY) + { + current_key_value.resize(storage.getRowsTTL().group_by_keys.size()); + + ColumnNumbers keys; + for (const auto & key : storage.getRowsTTL().group_by_keys) + keys.push_back(header.getPositionByName(key)); + agg_key_columns.resize(storage.getRowsTTL().group_by_keys.size()); + + AggregateDescriptions aggregates = storage.getRowsTTL().aggregate_descriptions; + for (auto & descr : aggregates) + if (descr.arguments.empty()) + for (const auto & name : descr.argument_names) + descr.arguments.push_back(header.getPositionByName(name)); + agg_aggregate_columns.resize(storage.getRowsTTL().aggregate_descriptions.size()); + + const Settings & settings = storage.global_context.getSettingsRef(); + + Aggregator::Params params(header, keys, aggregates, + false, settings.max_rows_to_group_by, settings.group_by_overflow_mode, + SettingUInt64(0), SettingUInt64(0), + settings.max_bytes_before_external_group_by, settings.empty_result_for_aggregation_by_empty_set, + storage.global_context.getTemporaryVolume(), settings.max_threads, settings.min_free_disk_space_for_temporary_data); + aggregator = std::make_unique(params); + } } bool TTLBlockInputStream::isTTLExpired(time_t ttl) const @@ -77,7 +105,8 @@ bool TTLBlockInputStream::isTTLExpired(time_t ttl) const Block TTLBlockInputStream::readImpl() { /// Skip all data if table ttl is expired for part - if (storage.hasRowsTTL() && isTTLExpired(old_ttl_infos.table_ttl.max)) + if (storage.hasRowsTTL() && !storage.getRowsTTL().where_expression && + storage.getRowsTTL().mode != TTLMode::GROUP_BY && isTTLExpired(old_ttl_infos.table_ttl.max)) { rows_removed = data_part->rows_count; return {}; @@ -85,7 +114,16 @@ Block TTLBlockInputStream::readImpl() Block block = children.at(0)->read(); if (!block) + { + if (aggregator && !agg_result.empty()) + { + MutableColumns result_columns = header.cloneEmptyColumns(); + finalizeAggregates(result_columns); + block = header.cloneWithColumns(std::move(result_columns)); + } + return block; + } if (storage.hasRowsTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min))) removeRowsWithExpiredTableTTL(block); @@ -113,36 +151,149 @@ void TTLBlockInputStream::readSuffixImpl() void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) { - storage.rows_ttl_entry.expression->execute(block); + const auto & rows_ttl = storage.getRowsTTL(); + + rows_ttl.expression->execute(block); + if (rows_ttl.where_expression) + rows_ttl.where_expression->execute(block); const IColumn * ttl_column = - block.getByName(storage.rows_ttl_entry.result_column).column.get(); + block.getByName(rows_ttl.result_column).column.get(); + + const IColumn * where_result_column = storage.getRowsTTL().where_expression ? + block.getByName(storage.getRowsTTL().where_result_column).column.get() : nullptr; const auto & column_names = header.getNames(); - MutableColumns result_columns; - result_columns.reserve(column_names.size()); - for (auto it = column_names.begin(); it != column_names.end(); ++it) + if (!aggregator) { - const IColumn * values_column = block.getByName(*it).column.get(); - MutableColumnPtr result_column = values_column->cloneEmpty(); - result_column->reserve(block.rows()); + MutableColumns result_columns; + result_columns.reserve(column_names.size()); + for (auto it = column_names.begin(); it != column_names.end(); ++it) + { + const IColumn * values_column = block.getByName(*it).column.get(); + MutableColumnPtr result_column = values_column->cloneEmpty(); + result_column->reserve(block.rows()); + for (size_t i = 0; i < block.rows(); ++i) + { + UInt32 cur_ttl = getTimestampByIndex(ttl_column, i); + bool where_filter_passed = !where_result_column || where_result_column->getBool(i); + if (!isTTLExpired(cur_ttl) || !where_filter_passed) + { + new_ttl_infos.table_ttl.update(cur_ttl); + result_column->insertFrom(*values_column, i); + } + else if (it == column_names.begin()) + ++rows_removed; + } + result_columns.emplace_back(std::move(result_column)); + } + block = header.cloneWithColumns(std::move(result_columns)); + } + else + { + MutableColumns result_columns = header.cloneEmptyColumns(); + MutableColumns aggregate_columns = header.cloneEmptyColumns(); + + size_t rows_aggregated = 0; + size_t current_key_start = 0; + size_t rows_with_current_key = 0; for (size_t i = 0; i < block.rows(); ++i) { UInt32 cur_ttl = getTimestampByIndex(ttl_column, i); - if (!isTTLExpired(cur_ttl)) + bool where_filter_passed = !where_result_column || where_result_column->getBool(i); + bool ttl_expired = isTTLExpired(cur_ttl) && where_filter_passed; + + bool same_as_current = true; + for (size_t j = 0; j < storage.getRowsTTL().group_by_keys.size(); ++j) + { + const String & key_column = storage.getRowsTTL().group_by_keys[j]; + const IColumn * values_column = block.getByName(key_column).column.get(); + if (!same_as_current || (*values_column)[i] != current_key_value[j]) + { + values_column->get(i, current_key_value[j]); + same_as_current = false; + } + } + if (!same_as_current) + { + if (rows_with_current_key) + calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key); + finalizeAggregates(result_columns); + + current_key_start = rows_aggregated; + rows_with_current_key = 0; + } + + if (ttl_expired) + { + ++rows_with_current_key; + ++rows_aggregated; + for (const auto & name : column_names) + { + const IColumn * values_column = block.getByName(name).column.get(); + auto & column = aggregate_columns[header.getPositionByName(name)]; + column->insertFrom(*values_column, i); + } + } + else { new_ttl_infos.table_ttl.update(cur_ttl); - result_column->insertFrom(*values_column, i); + for (const auto & name : column_names) + { + const IColumn * values_column = block.getByName(name).column.get(); + auto & column = result_columns[header.getPositionByName(name)]; + column->insertFrom(*values_column, i); + } } - else if (it == column_names.begin()) - ++rows_removed; } - result_columns.emplace_back(std::move(result_column)); - } - block = header.cloneWithColumns(std::move(result_columns)); + if (rows_with_current_key) + calculateAggregates(aggregate_columns, current_key_start, rows_with_current_key); + + block = header.cloneWithColumns(std::move(result_columns)); + } +} + +void TTLBlockInputStream::calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length) +{ + Columns aggregate_chunk; + aggregate_chunk.reserve(aggregate_columns.size()); + for (const auto & name : header.getNames()) + { + const auto & column = aggregate_columns[header.getPositionByName(name)]; + ColumnPtr chunk_column = column->cut(start_pos, length); + aggregate_chunk.emplace_back(std::move(chunk_column)); + } + aggregator->executeOnBlock(aggregate_chunk, length, agg_result, agg_key_columns, + agg_aggregate_columns, agg_no_more_keys); +} + +void TTLBlockInputStream::finalizeAggregates(MutableColumns & result_columns) +{ + if (!agg_result.empty()) + { + auto aggregated_res = aggregator->convertToBlocks(agg_result, true, 1); + for (auto & agg_block : aggregated_res) + { + for (const auto & it : storage.getRowsTTL().set_parts) + it.expression->execute(agg_block); + for (const auto & name : storage.getRowsTTL().group_by_keys) + { + const IColumn * values_column = agg_block.getByName(name).column.get(); + auto & result_column = result_columns[header.getPositionByName(name)]; + result_column->insertRangeFrom(*values_column, 0, agg_block.rows()); + } + for (const auto & it : storage.getRowsTTL().set_parts) + { + const IColumn * values_column = agg_block.getByName(it.expression_result_column_name).column.get(); + auto & result_column = result_columns[header.getPositionByName(it.column_name)]; + result_column->insertRangeFrom(*values_column, 0, agg_block.rows()); + } + } + } + agg_result.invalidate(); } void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) @@ -155,7 +306,7 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) } std::vector columns_to_remove; - for (const auto & [name, ttl_entry] : storage.column_ttl_entries_by_name) + for (const auto & [name, ttl_entry] : storage.getColumnTTLs()) { /// If we read not all table columns. E.g. while mutation. if (!block.has(name)) @@ -216,7 +367,7 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) void TTLBlockInputStream::updateMovesTTL(Block & block) { std::vector columns_to_remove; - for (const auto & ttl_entry : storage.move_ttl_entries) + for (const auto & ttl_entry : storage.getMoveTTLs()) { auto & new_ttl_info = new_ttl_infos.moves_ttl[ttl_entry.result_column]; diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/DataStreams/TTLBlockInputStream.h index 3896e5232f8..c6ffa95cd75 100644 --- a/src/DataStreams/TTLBlockInputStream.h +++ b/src/DataStreams/TTLBlockInputStream.h @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -39,6 +40,13 @@ private: time_t current_time; bool force; + std::unique_ptr aggregator; + std::vector current_key_value; + AggregatedDataVariants agg_result; + ColumnRawPtrs agg_key_columns; + Aggregator::AggregateColumns agg_aggregate_columns; + bool agg_no_more_keys = false; + IMergeTreeDataPart::TTLInfos old_ttl_infos; IMergeTreeDataPart::TTLInfos new_ttl_infos; NameSet empty_columns; @@ -59,6 +67,12 @@ private: /// Removes rows with expired table ttl and computes new ttl_infos for part void removeRowsWithExpiredTableTTL(Block & block); + // Calculate aggregates of aggregate_columns into agg_result + void calculateAggregates(const MutableColumns & aggregate_columns, size_t start_pos, size_t length); + + /// Finalize agg_result into result_columns + void finalizeAggregates(MutableColumns & result_columns); + /// Updates TTL for moves void updateMovesTTL(Block & block); diff --git a/src/DataStreams/copyData.cpp b/src/DataStreams/copyData.cpp index fd4bfab28d8..a0651999034 100644 --- a/src/DataStreams/copyData.cpp +++ b/src/DataStreams/copyData.cpp @@ -1,9 +1,6 @@ -#include #include #include #include -#include -#include #include @@ -55,79 +52,6 @@ void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, TCancelCall inline void doNothing(const Block &) {} -namespace -{ - - -struct ParallelInsertsHandler -{ - using CencellationHook = std::function; - - explicit ParallelInsertsHandler(BlockOutputStreams & output_streams, CencellationHook cancellation_hook_, size_t num_threads) - : outputs(output_streams.size()), cancellation_hook(std::move(cancellation_hook_)) - { - exceptions.resize(num_threads); - - for (auto & output : output_streams) - outputs.push(output.get()); - } - - void onBlock(Block & block, size_t /*thread_num*/) - { - IBlockOutputStream * out = nullptr; - - outputs.pop(out); - out->write(block); - outputs.push(out); - } - - void onFinishThread(size_t /*thread_num*/) {} - void onFinish() {} - - void onException(std::exception_ptr & exception, size_t thread_num) - { - exceptions[thread_num] = exception; - cancellation_hook(); - } - - void rethrowFirstException() - { - for (auto & exception : exceptions) - if (exception) - std::rethrow_exception(exception); - } - - ConcurrentBoundedQueue outputs; - std::vector exceptions; - CencellationHook cancellation_hook; -}; - -} - -static void copyDataImpl(BlockInputStreams & inputs, BlockOutputStreams & outputs) -{ - for (auto & output : outputs) - output->writePrefix(); - - using Processor = ParallelInputsProcessor; - Processor * processor_ptr = nullptr; - - ParallelInsertsHandler handler(outputs, [&processor_ptr]() { processor_ptr->cancel(false); }, inputs.size()); - ParallelInputsProcessor processor(inputs, nullptr, inputs.size(), handler); - processor_ptr = &processor; - - processor.process(); - processor.wait(); - handler.rethrowFirstException(); - - /// readPrefix is called in ParallelInputsProcessor. - for (auto & input : inputs) - input->readSuffix(); - - for (auto & output : outputs) - output->writeSuffix(); -} - void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic * is_cancelled) { auto is_cancelled_pred = [is_cancelled] () @@ -138,11 +62,6 @@ void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic & is_cancelled) { copyDataImpl(from, to, is_cancelled, doNothing); diff --git a/src/DataStreams/copyData.h b/src/DataStreams/copyData.h index ae72dbd2421..f2bce8f411b 100644 --- a/src/DataStreams/copyData.h +++ b/src/DataStreams/copyData.h @@ -16,8 +16,6 @@ class Block; */ void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic * is_cancelled = nullptr); -void copyData(BlockInputStreams & inputs, BlockOutputStreams & outputs); - void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & is_cancelled); void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function & is_cancelled, diff --git a/src/DataStreams/tests/union_stream2.cpp b/src/DataStreams/tests/union_stream2.cpp index 6e0997e7f72..5b84d89a435 100644 --- a/src/DataStreams/tests/union_stream2.cpp +++ b/src/DataStreams/tests/union_stream2.cpp @@ -35,7 +35,7 @@ try Names column_names; column_names.push_back("WatchID"); - StoragePtr table = DatabaseCatalog::instance().getTable({"default", "hits6"}); + StoragePtr table = DatabaseCatalog::instance().getTable({"default", "hits6"}, context); QueryProcessingStage::Enum stage = table->getQueryProcessingStage(context); auto pipes = table->read(column_names, {}, context, stage, settings.max_block_size, settings.max_threads); diff --git a/src/DataTypes/DataTypeDateTime64.h b/src/DataTypes/DataTypeDateTime64.h index 85b77d88ef4..b575e9d81c1 100644 --- a/src/DataTypes/DataTypeDateTime64.h +++ b/src/DataTypes/DataTypeDateTime64.h @@ -17,7 +17,9 @@ class DataTypeDateTime64 final : public DataTypeDecimalBase, public { public: static constexpr UInt8 default_scale = 3; + static constexpr auto family_name = "DateTime64"; + static constexpr auto type_id = TypeIndex::DateTime64; explicit DataTypeDateTime64(UInt32 scale_, const std::string & time_zone_name = ""); @@ -26,7 +28,7 @@ public: const char * getFamilyName() const override { return family_name; } std::string doGetName() const override; - TypeIndex getTypeId() const override { return TypeIndex::DateTime64; } + TypeIndex getTypeId() const override { return type_id; } void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; diff --git a/src/DataTypes/DataTypeNumberBase.h b/src/DataTypes/DataTypeNumberBase.h index fb752ad5329..0390da2cb6f 100644 --- a/src/DataTypes/DataTypeNumberBase.h +++ b/src/DataTypes/DataTypeNumberBase.h @@ -19,12 +19,15 @@ class DataTypeNumberBase : public DataTypeWithSimpleSerialization public: static constexpr bool is_parametric = false; + using FieldType = T; + static constexpr auto type_id = TypeId::value; + static constexpr auto family_name = TypeName::get(); using ColumnType = ColumnVector; - const char * getFamilyName() const override { return TypeName::get(); } - TypeIndex getTypeId() const override { return TypeId::value; } + const char * getFamilyName() const override { return family_name; } + TypeIndex getTypeId() const override { return type_id; } void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/DataTypeString.h b/src/DataTypes/DataTypeString.h index d34c19f2664..f6db8fe73d4 100644 --- a/src/DataTypes/DataTypeString.h +++ b/src/DataTypes/DataTypeString.h @@ -13,13 +13,14 @@ class DataTypeString final : public IDataType public: using FieldType = String; static constexpr bool is_parametric = false; + static constexpr auto type_id = TypeIndex::String; const char * getFamilyName() const override { return "String"; } - TypeIndex getTypeId() const override { return TypeIndex::String; } + TypeIndex getTypeId() const override { return type_id; } void serializeBinary(const Field & field, WriteBuffer & ostr) const override; void deserializeBinary(Field & field, ReadBuffer & istr) const override; diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 28e4a4a6579..73c966c08c3 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -529,10 +529,15 @@ struct WhichDataType /// IDataType helpers (alternative for IDataType virtual methods with single point of truth) -inline bool isDate(const DataTypePtr & data_type) { return WhichDataType(data_type).isDate(); } -inline bool isDateOrDateTime(const DataTypePtr & data_type) { return WhichDataType(data_type).isDateOrDateTime(); } -inline bool isDateTime(const DataTypePtr & data_type) { return WhichDataType(data_type).isDateTime(); } -inline bool isDateTime64(const DataTypePtr & data_type) { return WhichDataType(data_type).isDateTime64(); } +template +inline bool isDate(const T & data_type) { return WhichDataType(data_type).isDate(); } +template +inline bool isDateOrDateTime(const T & data_type) { return WhichDataType(data_type).isDateOrDateTime(); } +template +inline bool isDateTime(const T & data_type) { return WhichDataType(data_type).isDateTime(); } +template +inline bool isDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTime64(); } + inline bool isEnum(const DataTypePtr & data_type) { return WhichDataType(data_type).isEnum(); } inline bool isDecimal(const DataTypePtr & data_type) { return WhichDataType(data_type).isDecimal(); } inline bool isTuple(const DataTypePtr & data_type) { return WhichDataType(data_type).isTuple(); } @@ -636,6 +641,19 @@ inline bool isCompilableType(const DataTypePtr & data_type) return data_type->isValueRepresentedByNumber() && !isDecimal(data_type); } +template +inline bool isDataType(const DataType & data_type) +{ + WhichDataType which(data_type); + return which.idx == TYPE_IDX; +} + +template +inline bool isDataType(const DataType & data_type) +{ + return isDataType(data_type); +} + template constexpr bool IsDataTypeDecimal = false; template constexpr bool IsDataTypeNumber = false; template constexpr bool IsDataTypeDateOrDateTime = false; diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 27a2441cec6..ed875f5fff4 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -288,15 +288,15 @@ void DatabaseAtomic::assertCanBeDetached(bool cleenup) "because some tables are still in use. Retry later.", ErrorCodes::DATABASE_NOT_EMPTY); } -DatabaseTablesIteratorPtr DatabaseAtomic::getTablesIterator(const IDatabase::FilterByNameFunction & filter_by_table_name) +DatabaseTablesIteratorPtr DatabaseAtomic::getTablesIterator(const Context & context, const IDatabase::FilterByNameFunction & filter_by_table_name) { - auto base_iter = DatabaseWithOwnTablesBase::getTablesIterator(filter_by_table_name); + auto base_iter = DatabaseWithOwnTablesBase::getTablesIterator(context, filter_by_table_name); return std::make_unique(std::move(typeid_cast(*base_iter))); } UUID DatabaseAtomic::tryGetTableUUID(const String & table_name) const { - if (auto table = tryGetTable(table_name)) + if (auto table = tryGetTable(table_name, global_context)) return table->getStorageID().uuid; return UUIDHelpers::Nil; } diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index 84297a42843..71428fdb420 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -42,7 +42,7 @@ public: void drop(const Context & /*context*/) override; - DatabaseTablesIteratorPtr getTablesIterator(const FilterByNameFunction & filter_by_table_name) override; + DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) override; void loadStoredObjects(Context & context, bool has_force_restore_data_flag) override; diff --git a/src/Databases/DatabaseDictionary.cpp b/src/Databases/DatabaseDictionary.cpp index 81bcbf7bc6b..c4c74f8e70c 100644 --- a/src/Databases/DatabaseDictionary.cpp +++ b/src/Databases/DatabaseDictionary.cpp @@ -50,18 +50,18 @@ Tables DatabaseDictionary::listTables(const FilterByNameFunction & filter_by_nam return tables; } -bool DatabaseDictionary::isTableExist(const String & table_name) const +bool DatabaseDictionary::isTableExist(const String & table_name, const Context &) const { return global_context.getExternalDictionariesLoader().getCurrentStatus(table_name) != ExternalLoader::Status::NOT_EXIST; } -StoragePtr DatabaseDictionary::tryGetTable(const String & table_name) const +StoragePtr DatabaseDictionary::tryGetTable(const String & table_name, const Context &) const { auto load_result = global_context.getExternalDictionariesLoader().getLoadResult(table_name); return createStorageDictionary(getDatabaseName(), load_result); } -DatabaseTablesIteratorPtr DatabaseDictionary::getTablesIterator(const FilterByNameFunction & filter_by_table_name) +DatabaseTablesIteratorPtr DatabaseDictionary::getTablesIterator(const Context &, const FilterByNameFunction & filter_by_table_name) { return std::make_unique(listTables(filter_by_table_name)); } @@ -71,7 +71,7 @@ bool DatabaseDictionary::empty() const return !global_context.getExternalDictionariesLoader().hasObjects(); } -ASTPtr DatabaseDictionary::getCreateTableQueryImpl(const String & table_name, bool throw_on_error) const +ASTPtr DatabaseDictionary::getCreateTableQueryImpl(const String & table_name, const Context &, bool throw_on_error) const { String query; { diff --git a/src/Databases/DatabaseDictionary.h b/src/Databases/DatabaseDictionary.h index bf10dc134bd..b61c85033e8 100644 --- a/src/Databases/DatabaseDictionary.h +++ b/src/Databases/DatabaseDictionary.h @@ -29,11 +29,11 @@ public: return "Dictionary"; } - bool isTableExist(const String & table_name) const override; + bool isTableExist(const String & table_name, const Context & context) const override; - StoragePtr tryGetTable(const String & table_name) const override; + StoragePtr tryGetTable(const String & table_name, const Context & context) const override; - DatabaseTablesIteratorPtr getTablesIterator(const FilterByNameFunction & filter_by_table_name) override; + DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) override; bool empty() const override; @@ -44,7 +44,7 @@ public: void shutdown() override; protected: - ASTPtr getCreateTableQueryImpl(const String & table_name, bool throw_on_error) const override; + ASTPtr getCreateTableQueryImpl(const String & table_name, const Context & context, bool throw_on_error) const override; private: mutable std::mutex mutex; diff --git a/src/Databases/DatabaseFactory.cpp b/src/Databases/DatabaseFactory.cpp index 21feb122da1..f27bc509ebe 100644 --- a/src/Databases/DatabaseFactory.cpp +++ b/src/Databases/DatabaseFactory.cpp @@ -82,7 +82,7 @@ DatabasePtr DatabaseFactory::getImpl( else if (engine_name == "Atomic") return std::make_shared(database_name, metadata_path, context); else if (engine_name == "Memory") - return std::make_shared(database_name); + return std::make_shared(database_name, context); else if (engine_name == "Dictionary") return std::make_shared(database_name, context); diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 51ee664a42a..11e5272110e 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -132,7 +132,7 @@ StoragePtr DatabaseLazy::tryGetTable(const String & table_name) const return loadTable(table_name); } -DatabaseTablesIteratorPtr DatabaseLazy::getTablesIterator(const FilterByNameFunction & filter_by_table_name) +DatabaseTablesIteratorPtr DatabaseLazy::getTablesIterator(const Context &, const FilterByNameFunction & filter_by_table_name) { std::lock_guard lock(mutex); Strings filtered_tables; diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index 4306e61b37b..2e24b687be5 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -51,13 +51,15 @@ public: time_t getObjectMetadataModificationTime(const String & table_name) const override; - bool isTableExist(const String & table_name) const override; + bool isTableExist(const String & table_name, const Context &) const override { return isTableExist(table_name); } + bool isTableExist(const String & table_name) const; - StoragePtr tryGetTable(const String & table_name) const override; + StoragePtr tryGetTable(const String & table_name, const Context &) const override { return tryGetTable(table_name); } + StoragePtr tryGetTable(const String & table_name) const; bool empty() const override; - DatabaseTablesIteratorPtr getTablesIterator(const FilterByNameFunction & filter_by_table_name) override; + DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) override; void attachTable(const String & table_name, const StoragePtr & table, const String & relative_table_path) override; diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index 84fec6bcc22..52b1f889943 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -16,8 +16,8 @@ namespace ErrorCodes extern const int UNKNOWN_TABLE; } -DatabaseMemory::DatabaseMemory(const String & name_) - : DatabaseWithOwnTablesBase(name_, "DatabaseMemory(" + name_ + ")") +DatabaseMemory::DatabaseMemory(const String & name_, const Context & context) + : DatabaseWithOwnTablesBase(name_, "DatabaseMemory(" + name_ + ")", context) , data_path("data/" + escapeForFileName(database_name) + "/") {} @@ -64,7 +64,7 @@ ASTPtr DatabaseMemory::getCreateDatabaseQuery() const return create_query; } -ASTPtr DatabaseMemory::getCreateTableQueryImpl(const String & table_name, bool throw_on_error) const +ASTPtr DatabaseMemory::getCreateTableQueryImpl(const String & table_name, const Context &, bool throw_on_error) const { std::lock_guard lock{mutex}; auto it = create_queries.find(table_name); @@ -80,7 +80,7 @@ ASTPtr DatabaseMemory::getCreateTableQueryImpl(const String & table_name, bool t UUID DatabaseMemory::tryGetTableUUID(const String & table_name) const { - if (auto table = tryGetTable(table_name)) + if (auto table = tryGetTable(table_name, global_context)) return table->getStorageID().uuid; return UUIDHelpers::Nil; } diff --git a/src/Databases/DatabaseMemory.h b/src/Databases/DatabaseMemory.h index ad34c4d9097..40cc808e42b 100644 --- a/src/Databases/DatabaseMemory.h +++ b/src/Databases/DatabaseMemory.h @@ -19,7 +19,7 @@ namespace DB class DatabaseMemory final : public DatabaseWithOwnTablesBase { public: - DatabaseMemory(const String & name_); + DatabaseMemory(const String & name_, const Context & context); String getEngineName() const override { return "Memory"; } @@ -34,7 +34,7 @@ public: const String & table_name, bool no_delay) override; - ASTPtr getCreateTableQueryImpl(const String & name, bool throw_on_error) const override; + ASTPtr getCreateTableQueryImpl(const String & name, const Context & context, bool throw_on_error) const override; ASTPtr getCreateDatabaseQuery() const override; /// DatabaseMemory allows to create tables, which store data on disk. diff --git a/src/Databases/DatabaseMySQL.cpp b/src/Databases/DatabaseMySQL.cpp index cbd3261975a..5d4b81014f9 100644 --- a/src/Databases/DatabaseMySQL.cpp +++ b/src/Databases/DatabaseMySQL.cpp @@ -89,7 +89,7 @@ bool DatabaseMySQL::empty() const return true; } -DatabaseTablesIteratorPtr DatabaseMySQL::getTablesIterator(const FilterByNameFunction & filter_by_table_name) +DatabaseTablesIteratorPtr DatabaseMySQL::getTablesIterator(const Context &, const FilterByNameFunction & filter_by_table_name) { Tables tables; std::lock_guard lock(mutex); @@ -103,12 +103,12 @@ DatabaseTablesIteratorPtr DatabaseMySQL::getTablesIterator(const FilterByNameFun return std::make_unique(tables); } -bool DatabaseMySQL::isTableExist(const String & name) const +bool DatabaseMySQL::isTableExist(const String & name, const Context &) const { - return bool(tryGetTable(name)); + return bool(tryGetTable(name, global_context)); } -StoragePtr DatabaseMySQL::tryGetTable(const String & mysql_table_name) const +StoragePtr DatabaseMySQL::tryGetTable(const String & mysql_table_name, const Context &) const { std::lock_guard lock(mutex); @@ -155,7 +155,7 @@ static ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr return create_table_query; } -ASTPtr DatabaseMySQL::getCreateTableQueryImpl(const String & table_name, bool throw_on_error) const +ASTPtr DatabaseMySQL::getCreateTableQueryImpl(const String & table_name, const Context &, bool throw_on_error) const { std::lock_guard lock(mutex); @@ -501,7 +501,7 @@ void DatabaseMySQL::createTable(const Context &, const String & table_name, cons /// XXX: hack /// In order to prevent users from broken the table structure by executing attach table database_name.table_name (...) /// we should compare the old and new create_query to make them completely consistent - const auto & origin_create_query = getCreateTableQuery(table_name); + const auto & origin_create_query = getCreateTableQuery(table_name, global_context); origin_create_query->as()->attach = true; if (queryToString(origin_create_query) != queryToString(create_query)) diff --git a/src/Databases/DatabaseMySQL.h b/src/Databases/DatabaseMySQL.h index a43da5d1762..70d8dc81e0f 100644 --- a/src/Databases/DatabaseMySQL.h +++ b/src/Databases/DatabaseMySQL.h @@ -32,13 +32,13 @@ public: bool empty() const override; - DatabaseTablesIteratorPtr getTablesIterator(const FilterByNameFunction & filter_by_table_name) override; + DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) override; ASTPtr getCreateDatabaseQuery() const override; - bool isTableExist(const String & name) const override; + bool isTableExist(const String & name, const Context & context) const override; - StoragePtr tryGetTable(const String & name) const override; + StoragePtr tryGetTable(const String & name, const Context & context) const override; time_t getObjectMetadataModificationTime(const String & name) const override; @@ -59,7 +59,7 @@ public: void attachTable(const String & table_name, const StoragePtr & storage, const String & relative_table_path) override; protected: - ASTPtr getCreateTableQueryImpl(const String & name, bool throw_on_error) const override; + ASTPtr getCreateTableQueryImpl(const String & name, const Context & context, bool throw_on_error) const override; private: const Context & global_context; diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 70f86b19f1f..1b542c7a1ff 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -123,10 +123,9 @@ String getObjectDefinitionFromCreateQuery(const ASTPtr & query) } DatabaseOnDisk::DatabaseOnDisk(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, const Context & context) - : DatabaseWithOwnTablesBase(name, logger) + : DatabaseWithOwnTablesBase(name, logger, context) , metadata_path(metadata_path_) , data_path(data_path_) - , global_context(context.getGlobalContext()) { Poco::File(context.getPath() + data_path).createDirectories(); Poco::File(metadata_path).createDirectories(); @@ -160,7 +159,7 @@ void DatabaseOnDisk::createTable( throw Exception("Dictionary " + backQuote(getDatabaseName()) + "." + backQuote(table_name) + " already exists.", ErrorCodes::DICTIONARY_ALREADY_EXISTS); - if (isTableExist(table_name)) + if (isTableExist(table_name, global_context)) throw Exception("Table " + backQuote(getDatabaseName()) + "." + backQuote(table_name) + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); if (create.attach_short_syntax) @@ -267,7 +266,7 @@ void DatabaseOnDisk::renameTable( String table_metadata_path; ASTPtr attach_query; /// DatabaseLazy::detachTable may return nullptr even if table exists, so we need tryGetTable for this case. - StoragePtr table = tryGetTable(table_name); + StoragePtr table = tryGetTable(table_name, global_context); detachTable(table_name); try { @@ -304,10 +303,10 @@ void DatabaseOnDisk::renameTable( Poco::File(table_metadata_path).remove(); } -ASTPtr DatabaseOnDisk::getCreateTableQueryImpl(const String & table_name, bool throw_on_error) const +ASTPtr DatabaseOnDisk::getCreateTableQueryImpl(const String & table_name, const Context &, bool throw_on_error) const { ASTPtr ast; - bool has_table = tryGetTable(table_name) != nullptr; + bool has_table = tryGetTable(table_name, global_context) != nullptr; auto table_metadata_path = getObjectMetadataPath(table_name); try { diff --git a/src/Databases/DatabaseOnDisk.h b/src/Databases/DatabaseOnDisk.h index 75609e231af..d4fb9b2aa17 100644 --- a/src/Databases/DatabaseOnDisk.h +++ b/src/Databases/DatabaseOnDisk.h @@ -76,6 +76,7 @@ protected: ASTPtr getCreateTableQueryImpl( const String & table_name, + const Context & context, bool throw_on_error) const override; ASTPtr getCreateQueryFromMetadata(const String & metadata_path, bool throw_on_error) const; @@ -85,7 +86,6 @@ protected: const String metadata_path; const String data_path; - const Context & global_context; }; } diff --git a/src/Databases/DatabaseWithDictionaries.cpp b/src/Databases/DatabaseWithDictionaries.cpp index 0d49078bd08..e0f2aa9286b 100644 --- a/src/Databases/DatabaseWithDictionaries.cpp +++ b/src/Databases/DatabaseWithDictionaries.cpp @@ -127,7 +127,7 @@ void DatabaseWithDictionaries::createDictionary(const Context & context, const S "Dictionary " + backQuote(getDatabaseName()) + "." + backQuote(dictionary_name) + " already exists.", ErrorCodes::DICTIONARY_ALREADY_EXISTS); - if (isTableExist(dictionary_name)) + if (isTableExist(dictionary_name, global_context)) throw Exception("Table " + backQuote(getDatabaseName()) + "." + backQuote(dictionary_name) + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 5ad4fa20690..2a2ca1841cf 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -18,18 +19,18 @@ namespace ErrorCodes extern const int UNKNOWN_TABLE; } -DatabaseWithOwnTablesBase::DatabaseWithOwnTablesBase(const String & name_, const String & logger) - : IDatabase(name_), log(&Logger::get(logger)) +DatabaseWithOwnTablesBase::DatabaseWithOwnTablesBase(const String & name_, const String & logger, const Context & context) + : IDatabase(name_), log(&Logger::get(logger)), global_context(context.getGlobalContext()) { } -bool DatabaseWithOwnTablesBase::isTableExist(const String & table_name) const +bool DatabaseWithOwnTablesBase::isTableExist(const String & table_name, const Context &) const { std::lock_guard lock(mutex); return tables.find(table_name) != tables.end(); } -StoragePtr DatabaseWithOwnTablesBase::tryGetTable(const String & table_name) const +StoragePtr DatabaseWithOwnTablesBase::tryGetTable(const String & table_name, const Context &) const { std::lock_guard lock(mutex); auto it = tables.find(table_name); @@ -38,7 +39,7 @@ StoragePtr DatabaseWithOwnTablesBase::tryGetTable(const String & table_name) con return {}; } -DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(const FilterByNameFunction & filter_by_table_name) +DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(const Context &, const FilterByNameFunction & filter_by_table_name) { std::lock_guard lock(mutex); if (!filter_by_table_name) diff --git a/src/Databases/DatabasesCommon.h b/src/Databases/DatabasesCommon.h index e195fc59ce9..4238fd30137 100644 --- a/src/Databases/DatabasesCommon.h +++ b/src/Databases/DatabasesCommon.h @@ -19,9 +19,9 @@ class Context; class DatabaseWithOwnTablesBase : public IDatabase { public: - bool isTableExist(const String & table_name) const override; + bool isTableExist(const String & table_name, const Context & context) const override; - StoragePtr tryGetTable(const String & table_name) const override; + StoragePtr tryGetTable(const String & table_name, const Context & context) const override; bool empty() const override; @@ -29,18 +29,19 @@ public: StoragePtr detachTable(const String & table_name) override; - DatabaseTablesIteratorPtr getTablesIterator(const FilterByNameFunction & filter_by_table_name) override; + DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) override; void shutdown() override; - virtual ~DatabaseWithOwnTablesBase() override; + ~DatabaseWithOwnTablesBase() override; protected: mutable std::mutex mutex; Tables tables; Poco::Logger * log; + const Context & global_context; - DatabaseWithOwnTablesBase(const String & name_, const String & logger); + DatabaseWithOwnTablesBase(const String & name_, const String & logger, const Context & context); void attachTableUnlocked(const String & table_name, const StoragePtr & table, std::unique_lock & lock); StoragePtr detachTableUnlocked(const String & table_name, std::unique_lock & lock); diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 24147a0e7c5..3d108cfd91c 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -130,7 +130,7 @@ public: virtual void loadStoredObjects(Context & /*context*/, bool /*has_force_restore_data_flag*/) {} /// Check the existence of the table. - virtual bool isTableExist(const String & name) const = 0; + virtual bool isTableExist(const String & name, const Context & context) const = 0; /// Check the existence of the dictionary virtual bool isDictionaryExist(const String & /*name*/) const @@ -139,7 +139,7 @@ public: } /// Get the table for work. Return nullptr if there is no table. - virtual StoragePtr tryGetTable(const String & name) const = 0; + virtual StoragePtr tryGetTable(const String & name, const Context & context) const = 0; virtual UUID tryGetTableUUID(const String & /*table_name*/) const { return UUIDHelpers::Nil; } @@ -147,7 +147,7 @@ public: /// Get an iterator that allows you to pass through all the tables. /// It is possible to have "hidden" tables that are not visible when passing through, but are visible if you get them by name using the functions above. - virtual DatabaseTablesIteratorPtr getTablesIterator(const FilterByNameFunction & filter_by_table_name = {}) = 0; + virtual DatabaseTablesIteratorPtr getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name = {}) = 0; /// Get an iterator to pass through all the dictionaries. virtual DatabaseDictionariesIteratorPtr getDictionariesIterator([[maybe_unused]] const FilterByNameFunction & filter_by_dictionary_name = {}) @@ -249,14 +249,14 @@ public: } /// Get the CREATE TABLE query for the table. It can also provide information for detached tables for which there is metadata. - ASTPtr tryGetCreateTableQuery(const String & name) const noexcept + ASTPtr tryGetCreateTableQuery(const String & name, const Context & context) const noexcept { - return getCreateTableQueryImpl(name, false); + return getCreateTableQueryImpl(name, context, false); } - ASTPtr getCreateTableQuery(const String & name) const + ASTPtr getCreateTableQuery(const String & name, const Context & context) const { - return getCreateTableQueryImpl(name, true); + return getCreateTableQueryImpl(name, context, true); } /// Get the CREATE DICTIONARY query for the dictionary. Returns nullptr if dictionary doesn't exists. @@ -304,7 +304,7 @@ public: virtual ~IDatabase() {} protected: - virtual ASTPtr getCreateTableQueryImpl(const String & /*name*/, bool throw_on_error) const + virtual ASTPtr getCreateTableQueryImpl(const String & /*name*/, const Context & /*context*/, bool throw_on_error) const { if (throw_on_error) throw Exception("There is no SHOW CREATE TABLE query for Database" + getEngineName(), ErrorCodes::CANNOT_GET_CREATE_TABLE_QUERY); diff --git a/src/Dictionaries/CacheDictionary.inc.h b/src/Dictionaries/CacheDictionary.inc.h index 71f3f1857ce..27064d113e6 100644 --- a/src/Dictionaries/CacheDictionary.inc.h +++ b/src/Dictionaries/CacheDictionary.inc.h @@ -302,37 +302,33 @@ void CacheDictionary::getItemsString( /// Request new values sync. /// We have request both cache_not_found_ids and cache_expired_ids. - if (!cache_not_found_ids.empty()) + std::vector required_ids; + required_ids.reserve(cache_not_found_ids.size() + cache_expired_ids.size()); + std::transform( + std::begin(cache_not_found_ids), std::end(cache_not_found_ids), + std::back_inserter(required_ids), [](auto & pair) { return pair.first; }); + std::transform( + std::begin(cache_expired_ids), std::end(cache_expired_ids), + std::back_inserter(required_ids), [](auto & pair) { return pair.first; }); + + auto on_cell_updated = [&] (const auto id, const auto cell_idx) { - std::vector required_ids; - required_ids.reserve(cache_not_found_ids.size() + cache_expired_ids.size()); - std::transform( - std::begin(cache_not_found_ids), std::end(cache_not_found_ids), - std::back_inserter(required_ids), [](auto & pair) { return pair.first; }); - std::transform( - std::begin(cache_expired_ids), std::end(cache_expired_ids), - std::back_inserter(required_ids), [](auto & pair) { return pair.first; }); + const auto attribute_value = attribute_array[cell_idx]; - auto on_cell_updated = [&] (const auto id, const auto cell_idx) - { - const auto attribute_value = attribute_array[cell_idx]; + map[id] = String{attribute_value}; + total_length += (attribute_value.size + 1) * cache_not_found_ids[id].size(); + }; - map[id] = String{attribute_value}; - total_length += (attribute_value.size + 1) * cache_not_found_ids[id].size(); - }; + auto on_id_not_found = [&] (const auto id, const auto) + { + for (const auto row : cache_not_found_ids[id]) + total_length += get_default(row).size + 1; + }; - auto on_id_not_found = [&] (const auto id, const auto) - { - for (const auto row : cache_not_found_ids[id]) - total_length += get_default(row).size + 1; - }; - - auto update_unit_ptr = std::make_shared(required_ids, on_cell_updated, on_id_not_found); - - tryPushToUpdateQueueOrThrow(update_unit_ptr); - waitForCurrentUpdateFinish(update_unit_ptr); - } + auto update_unit_ptr = std::make_shared(required_ids, on_cell_updated, on_id_not_found); + tryPushToUpdateQueueOrThrow(update_unit_ptr); + waitForCurrentUpdateFinish(update_unit_ptr); out->getChars().reserve(total_length); for (const auto row : ext::range(0, ext::size(ids))) diff --git a/src/Formats/ProtobufReader.cpp b/src/Formats/ProtobufReader.cpp index d8302771002..d76b048c1e9 100644 --- a/src/Formats/ProtobufReader.cpp +++ b/src/Formats/ProtobufReader.cpp @@ -32,11 +32,12 @@ namespace BITS32 = 5, }; - // The following condition must always be true: - // any_cursor_position < min(END_OF_VARINT, END_OF_GROUP) - // This inequation helps to check conditions in SimpleReader. - constexpr UInt64 END_OF_VARINT = static_cast(-1); - constexpr UInt64 END_OF_GROUP = static_cast(-2); + // The following conditions must always be true: + // any_cursor_position > END_OF_VARINT + // any_cursor_position > END_OF_GROUP + // Those inequations helps checking conditions in ProtobufReader::SimpleReader. + constexpr Int64 END_OF_VARINT = -1; + constexpr Int64 END_OF_GROUP = -2; Int64 decodeZigZag(UInt64 n) { return static_cast((n >> 1) ^ (~(n & 1) + 1)); } @@ -77,7 +78,7 @@ void ProtobufReader::SimpleReader::endMessage(bool ignore_errors) if (!current_message_level) return; - UInt64 root_message_end = (current_message_level == 1) ? current_message_end : parent_message_ends.front(); + Int64 root_message_end = (current_message_level == 1) ? current_message_end : parent_message_ends.front(); if (cursor != root_message_end) { if (cursor < root_message_end) @@ -95,6 +96,9 @@ void ProtobufReader::SimpleReader::endMessage(bool ignore_errors) void ProtobufReader::SimpleReader::startNestedMessage() { assert(current_message_level >= 1); + if ((cursor > field_end) && (field_end != END_OF_GROUP)) + throwUnknownFormat(); + // Start reading a nested message which is located inside a length-delimited field // of another message. parent_message_ends.emplace_back(current_message_end); @@ -146,7 +150,7 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number) throwUnknownFormat(); } - if (cursor >= current_message_end) + if ((cursor >= current_message_end) && (current_message_end != END_OF_GROUP)) return false; UInt64 varint = readVarint(); @@ -196,11 +200,17 @@ bool ProtobufReader::SimpleReader::readFieldNumber(UInt32 & field_number) bool ProtobufReader::SimpleReader::readUInt(UInt64 & value) { + if (field_end == END_OF_VARINT) + { + value = readVarint(); + field_end = cursor; + return true; + } + if (unlikely(cursor >= field_end)) return false; + value = readVarint(); - if (field_end == END_OF_VARINT) - field_end = cursor; return true; } @@ -227,6 +237,7 @@ bool ProtobufReader::SimpleReader::readFixed(T & value) { if (unlikely(cursor >= field_end)) return false; + readBinary(&value, sizeof(T)); return true; } diff --git a/src/Formats/ProtobufReader.h b/src/Formats/ProtobufReader.h index a50c2f202f0..7c3c19ba376 100644 --- a/src/Formats/ProtobufReader.h +++ b/src/Formats/ProtobufReader.h @@ -124,12 +124,12 @@ private: void ignoreGroup(); ReadBuffer & in; - UInt64 cursor; + Int64 cursor; size_t current_message_level; - UInt64 current_message_end; - std::vector parent_message_ends; - UInt64 field_end; - UInt64 last_string_pos; + Int64 current_message_end; + std::vector parent_message_ends; + Int64 field_end; + Int64 last_string_pos; }; class IConverter diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp index d6be2469759..a4569684e7f 100644 --- a/src/Functions/FunctionJoinGet.cpp +++ b/src/Functions/FunctionJoinGet.cpp @@ -43,7 +43,7 @@ static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & co ++dot; } String table_name = join_name.substr(dot); - auto table = DatabaseCatalog::instance().getTable({database_name, table_name}); + auto table = DatabaseCatalog::instance().getTable({database_name, table_name}, context); auto storage_join = std::dynamic_pointer_cast(table); if (!storage_join) throw Exception{"Table " + join_name + " should have engine StorageJoin", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; diff --git a/src/Functions/FunctionUnixTimestamp64.h b/src/Functions/FunctionUnixTimestamp64.h new file mode 100644 index 00000000000..2d3d12a4d0a --- /dev/null +++ b/src/Functions/FunctionUnixTimestamp64.h @@ -0,0 +1,148 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int DECIMAL_OVERFLOW; +} + +/** Casts DateTim64 to or from Int64 representation narrowed down (or scaled up) to any scale value defined in Impl. + */ +template +class FunctionUnixTimestamp64 : public IFunction +{ +public: + static constexpr auto name = Impl::name; + static constexpr auto target_scale = Impl::target_scale; + + using SourceDataType = typename Impl::SourceDataType; + using ResultDataType = typename Impl::ResultDataType; + + static constexpr bool is_result_datetime64 = std::is_same_v; + + static_assert(std::is_same_v || std::is_same_v); + + static auto create(const Context &) + { + return std::make_shared>(); + } + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return is_result_datetime64 ? 2 : 1; } + bool isVariadic() const override { return is_result_datetime64; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if constexpr (is_result_datetime64) + { + validateFunctionArgumentTypes(*this, arguments, + FunctionArgumentDescriptors{{"value", isDataType, nullptr, std::string(SourceDataType::family_name).c_str()}}, + // optional + FunctionArgumentDescriptors{ + // {"precision", isDataType, isColumnConst, ("Precision of the result, default is " + std::to_string(target_scale)).c_str()}, + {"timezone", isStringOrFixedString, isColumnConst, "Timezone of the result"}, + }); + const auto timezone = extractTimeZoneNameFromFunctionArguments(arguments, 1, 0); + return std::make_shared(target_scale, timezone); + } + else + { + validateFunctionArgumentTypes(*this, arguments, + FunctionArgumentDescriptors{{"value", isDataType, nullptr, std::string(SourceDataType::family_name).c_str()}}); + return std::make_shared(); + } + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + using SourceColumnType = typename SourceDataType::ColumnType; + using ResultColumnType = typename ResultDataType::ColumnType; + + const auto & src = block.getByPosition(arguments[0]); + auto & res = block.getByPosition(result); + const auto & col = *src.column; + + const SourceColumnType * source_col_typed = checkAndGetColumn(col); + if (!source_col_typed && !(source_col_typed = checkAndGetColumnConstData(&col))) + throw Exception("Invalid column type" + col.getName() + " expected " + + std::string(SourceDataType::family_name), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + res.column = res.type->createColumn(); + + if (input_rows_count == 0) + return; + + auto & result_data = assert_cast(res.column->assumeMutableRef()).getData(); + result_data.reserve(source_col_typed->size()); + const auto & source_data = source_col_typed->getData(); + + const auto scale_diff = getScaleDiff(*checkAndGetDataType(src.type.get()), *checkAndGetDataType(res.type.get())); + if (scale_diff == 0) + { + static_assert(sizeof(typename SourceColumnType::Container::value_type) == sizeof(typename ResultColumnType::Container::value_type)); + // no conversion necessary + result_data.push_back_raw_many(source_data.size(), source_data.data()); + } + else if (scale_diff < 0) + { + const Int64 scale_multiplier = DecimalUtils::scaleMultiplier(std::abs(scale_diff)); + for (const auto & v : source_data) + { + Int64 result_value = toDestValue(v); + if (common::mulOverflow(result_value, scale_multiplier, result_value)) + throw Exception("Decimal overflow in " + getName(), ErrorCodes::DECIMAL_OVERFLOW); + + result_data.push_back(result_value); + } + } + else + { + const Int64 scale_multiplier = DecimalUtils::scaleMultiplier(scale_diff); + for (const auto & v : source_data) + result_data.push_back(static_cast(toDestValue(v) / scale_multiplier)); + } + } + +private: + static Int64 getScaleDiff(const SourceDataType & src, const ResultDataType & dst) + { + Int64 src_scale = target_scale; + if constexpr (std::is_same_v) + { + src_scale = src.getScale(); + } + + Int64 dst_scale = target_scale; + if constexpr (std::is_same_v) + { + dst_scale = dst.getScale(); + } + + return src_scale - dst_scale; + } + + static auto toDestValue(const DateTime64 & v) + { + return Int64{v.value}; + } + + template + static auto toDestValue(const T & v) + { + return Int64{v}; + } +}; + +} diff --git a/src/Functions/FunctionsMiscellaneous.h b/src/Functions/FunctionsMiscellaneous.h index 7c884cc7040..3e2938f386b 100644 --- a/src/Functions/FunctionsMiscellaneous.h +++ b/src/Functions/FunctionsMiscellaneous.h @@ -115,6 +115,7 @@ public: String getName() const override { return "FunctionCapture"; } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { @@ -243,6 +244,7 @@ public: String getName() const override { return name; } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } DataTypePtr getReturnType(const ColumnsWithTypeAndName &) const override { return return_type; } size_t getNumberOfArguments() const override { return capture->captured_types.size(); } diff --git a/src/Functions/URL/port.cpp b/src/Functions/URL/port.cpp new file mode 100644 index 00000000000..e703da2602b --- /dev/null +++ b/src/Functions/URL/port.cpp @@ -0,0 +1,129 @@ +#include +#include +#include +#include +#include +#include +#include +#include "domain.h" + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +struct FunctionPort : public IFunction +{ + static constexpr auto name = "port"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override { return name; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + bool useDefaultImplementationForConstants() const override { return true; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 1 && arguments.size() != 2) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + std::to_string(arguments.size()) + ", should be 1 or 2", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (!WhichDataType(arguments[0].type).isString()) + throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + ". Must be String.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (arguments.size() == 2 && !WhichDataType(arguments[1].type).isUInt16()) + throw Exception("Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName() + ". Must be UInt16.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t) override + { + UInt16 default_port = 0; + if (arguments.size() == 2) + { + const auto * port_column = checkAndGetColumn(block.getByPosition(arguments[1]).column.get()); + if (!port_column) + throw Exception("Second argument for function " + getName() + " must be constant UInt16", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + default_port = port_column->getValue(); + } + + const ColumnPtr url_column = block.getByPosition(arguments[0]).column; + if (const ColumnString * url_strs = checkAndGetColumn(url_column.get())) + { + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_res = col_res->getData(); + vec_res.resize(url_column->size()); + + vector(default_port, url_strs->getChars(), url_strs->getOffsets(), vec_res); + block.getByPosition(result).column = std::move(col_res); + } + else + throw Exception( + "Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of argument of function " + getName(), + ErrorCodes::ILLEGAL_COLUMN); +} + +private: + static void vector(UInt16 default_port, const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + { + size_t size = offsets.size(); + + ColumnString::Offset prev_offset = 0; + for (size_t i = 0; i < size; ++i) + { + res[i] = extractPort(default_port, data, prev_offset, offsets[i] - prev_offset - 1); + prev_offset = offsets[i]; + } +} + + static UInt16 extractPort(UInt16 default_port, const ColumnString::Chars & buf, size_t offset, size_t size) + { + const char * p = reinterpret_cast(&buf[0]) + offset; + const char * end = p + size; + + StringRef host = getURLHost(p, size); + if (!host.size) + return default_port; + if (host.size == size) + return default_port; + + p = host.data + host.size; + if (*p++ != ':') + return default_port; + + Int64 port = default_port; + while (p < end) + { + if (*p == '/') + break; + if (!isNumericASCII(*p)) + return default_port; + + port = (port * 10) + (*p - '0'); + if (port < 0 || port > UInt16(-1)) + return default_port; + ++p; + } + return port; + } +}; + +void registerFunctionPort(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} + diff --git a/src/Functions/URL/registerFunctionsURL.cpp b/src/Functions/URL/registerFunctionsURL.cpp index 66a847185f3..9ba5261f728 100644 --- a/src/Functions/URL/registerFunctionsURL.cpp +++ b/src/Functions/URL/registerFunctionsURL.cpp @@ -8,6 +8,7 @@ void registerFunctionDomain(FunctionFactory & factory); void registerFunctionDomainWithoutWWW(FunctionFactory & factory); void registerFunctionFirstSignificantSubdomain(FunctionFactory & factory); void registerFunctionTopLevelDomain(FunctionFactory & factory); +void registerFunctionPort(FunctionFactory & factory); void registerFunctionPath(FunctionFactory & factory); void registerFunctionPathFull(FunctionFactory & factory); void registerFunctionQueryString(FunctionFactory & factory); @@ -33,6 +34,7 @@ void registerFunctionsURL(FunctionFactory & factory) registerFunctionDomainWithoutWWW(factory); registerFunctionFirstSignificantSubdomain(factory); registerFunctionTopLevelDomain(factory); + registerFunctionPort(factory); registerFunctionPath(factory); registerFunctionPathFull(factory); registerFunctionQueryString(factory); diff --git a/src/Functions/array/arrayDistinct.cpp b/src/Functions/array/arrayDistinct.cpp index d24de638865..bb26567e8a5 100644 --- a/src/Functions/array/arrayDistinct.cpp +++ b/src/Functions/array/arrayDistinct.cpp @@ -153,10 +153,8 @@ bool FunctionArrayDistinct::executeNumber( if (nullable_col) src_null_map = &nullable_col->getNullMapData(); - using Set = ClearableHashSet, - HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; + using Set = ClearableHashSetWithStackMemory, + INITIAL_SIZE_DEGREE>; Set set; @@ -201,10 +199,8 @@ bool FunctionArrayDistinct::executeString( ColumnString & res_data_column_string = typeid_cast(res_data_col); - using Set = ClearableHashSet, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; + using Set = ClearableHashSetWithStackMemory; const PaddedPODArray * src_null_map = nullptr; @@ -249,8 +245,8 @@ void FunctionArrayDistinct::executeHashed( ColumnArray::Offsets & res_offsets, const ColumnNullable * nullable_col) { - using Set = ClearableHashSet, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; + using Set = ClearableHashSetWithStackMemory; const PaddedPODArray * src_null_map = nullptr; diff --git a/src/Functions/array/arrayEnumerateExtended.h b/src/Functions/array/arrayEnumerateExtended.h index 69db59e698e..e2f83ae8826 100644 --- a/src/Functions/array/arrayEnumerateExtended.h +++ b/src/Functions/array/arrayEnumerateExtended.h @@ -64,36 +64,41 @@ private: template struct MethodOneNumber { - using Set = ClearableHashMap, HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; + using Set = ClearableHashMapWithStackMemory, + INITIAL_SIZE_DEGREE>; + using Method = ColumnsHashing::HashMethodOneNumber; }; struct MethodString { - using Set = ClearableHashMap, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; + using Set = ClearableHashMapWithStackMemory; + using Method = ColumnsHashing::HashMethodString; }; struct MethodFixedString { - using Set = ClearableHashMap, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; + using Set = ClearableHashMapWithStackMemory; + using Method = ColumnsHashing::HashMethodFixedString; }; struct MethodFixed { - using Set = ClearableHashMap, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; + using Set = ClearableHashMapWithStackMemory; + using Method = ColumnsHashing::HashMethodKeysFixed; }; struct MethodHashed { - using Set = ClearableHashMap, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; + using Set = ClearableHashMapWithStackMemory; + using Method = ColumnsHashing::HashMethodHashed; }; diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h index 133f5d7cb81..c021b5fbfd1 100644 --- a/src/Functions/array/arrayEnumerateRanked.h +++ b/src/Functions/array/arrayEnumerateRanked.h @@ -308,12 +308,9 @@ void FunctionArrayEnumerateRankedExtended::executeMethodImpl( const size_t depth_to_look = arrays_depths.max_array_depth; const auto & offsets = *offsets_by_depth[depth_to_look - 1]; - using Map = ClearableHashMap< - UInt128, - UInt32, - UInt128TrivialHash, - HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; + using Map = ClearableHashMapWithStackMemory; + Map indices; std::vector indices_by_depth(depth_to_look); diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index cccad7c7a03..24db3c0cd08 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -418,16 +418,15 @@ void FunctionArrayIntersect::executeImpl(Block & block, const ColumnNumbers & ar TypeListNativeNumbers::forEach(NumberExecutor(arrays, not_nullable_nested_return_type, result_column)); TypeListDecimalNumbers::forEach(DecimalExecutor(arrays, not_nullable_nested_return_type, result_column)); - using DateMap = ClearableHashMap, - HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(DataTypeDate::FieldType)>>; + using DateMap = ClearableHashMapWithStackMemory, INITIAL_SIZE_DEGREE>; - using DateTimeMap = ClearableHashMap, - HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(DataTypeDateTime::FieldType)>>; + using DateTimeMap = ClearableHashMapWithStackMemory< + DataTypeDateTime::FieldType, size_t, + DefaultHash, INITIAL_SIZE_DEGREE>; - using StringMap = ClearableHashMap, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; + using StringMap = ClearableHashMapWithStackMemory; if (!result_column) { @@ -455,8 +454,8 @@ void FunctionArrayIntersect::executeImpl(Block & block, const ColumnNumbers & ar template void FunctionArrayIntersect::NumberExecutor::operator()() { - using Map = ClearableHashMap, HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; + using Map = ClearableHashMapWithStackMemory, + INITIAL_SIZE_DEGREE>; if (!result && typeid_cast *>(data_type.get())) result = execute, true>(arrays, ColumnVector::create()); @@ -465,8 +464,8 @@ void FunctionArrayIntersect::NumberExecutor::operator()() template void FunctionArrayIntersect::DecimalExecutor::operator()() { - using Map = ClearableHashMap, HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; + using Map = ClearableHashMapWithStackMemory, + INITIAL_SIZE_DEGREE>; if (!result) if (auto * decimal = typeid_cast *>(data_type.get())) diff --git a/src/Functions/array/arrayUniq.cpp b/src/Functions/array/arrayUniq.cpp index 02129781c13..d9a44f6d8d3 100644 --- a/src/Functions/array/arrayUniq.cpp +++ b/src/Functions/array/arrayUniq.cpp @@ -66,36 +66,41 @@ private: template struct MethodOneNumber { - using Set = ClearableHashSet, HashTableGrower, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(T)>>; + using Set = ClearableHashSetWithStackMemory, + INITIAL_SIZE_DEGREE>; + using Method = ColumnsHashing::HashMethodOneNumber; }; struct MethodString { - using Set = ClearableHashSet, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; + using Set = ClearableHashSetWithStackMemory; + using Method = ColumnsHashing::HashMethodString; }; struct MethodFixedString { - using Set = ClearableHashSet, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>; + using Set = ClearableHashSetWithStackMemory; + using Method = ColumnsHashing::HashMethodFixedString; }; struct MethodFixed { - using Set = ClearableHashSet, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; + using Set = ClearableHashSetWithStackMemory; + using Method = ColumnsHashing::HashMethodKeysFixed; }; struct MethodHashed { - using Set = ClearableHashSet, - HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(UInt128)>>; + using Set = ClearableHashSetWithStackMemory; + using Method = ColumnsHashing::HashMethodHashed; }; diff --git a/src/Functions/fromUnixTimestamp64Micro.cpp b/src/Functions/fromUnixTimestamp64Micro.cpp new file mode 100644 index 00000000000..a4bac327a3a --- /dev/null +++ b/src/Functions/fromUnixTimestamp64Micro.cpp @@ -0,0 +1,20 @@ +#include +#include + +namespace DB +{ + +struct TransformFromMicro +{ + static constexpr auto name = "fromUnixTimestamp64Micro"; + static constexpr auto target_scale = 6; + using SourceDataType = DataTypeInt64; + using ResultDataType = DataTypeDateTime64; +}; + +void registerFromUnixTimestamp64Micro(FunctionFactory & factory) +{ + factory.registerFunction>(); +} + +} diff --git a/src/Functions/fromUnixTimestamp64Milli.cpp b/src/Functions/fromUnixTimestamp64Milli.cpp new file mode 100644 index 00000000000..a4ce79ecebb --- /dev/null +++ b/src/Functions/fromUnixTimestamp64Milli.cpp @@ -0,0 +1,20 @@ +#include +#include + +namespace DB +{ + +struct TransformFromMilli +{ + static constexpr auto name = "fromUnixTimestamp64Milli"; + static constexpr auto target_scale = 3; + using SourceDataType = DataTypeInt64; + using ResultDataType = DataTypeDateTime64; +}; + +void registerFromUnixTimestamp64Milli(FunctionFactory & factory) +{ + factory.registerFunction>(); +} + +} diff --git a/src/Functions/fromUnixTimestamp64Nano.cpp b/src/Functions/fromUnixTimestamp64Nano.cpp new file mode 100644 index 00000000000..cd69aa56a2c --- /dev/null +++ b/src/Functions/fromUnixTimestamp64Nano.cpp @@ -0,0 +1,20 @@ +#include +#include + +namespace DB +{ + +struct TransformFromNano +{ + static constexpr auto name = "fromUnixTimestamp64Nano"; + static constexpr auto target_scale = 9; + using SourceDataType = DataTypeInt64; + using ResultDataType = DataTypeDateTime64; +}; + +void registerFromUnixTimestamp64Nano(FunctionFactory & factory) +{ + factory.registerFunction>(); +} + +} diff --git a/src/Functions/hasColumnInTable.cpp b/src/Functions/hasColumnInTable.cpp index 03e5a4513db..b9ec2b84837 100644 --- a/src/Functions/hasColumnInTable.cpp +++ b/src/Functions/hasColumnInTable.cpp @@ -113,7 +113,7 @@ void FunctionHasColumnInTable::executeImpl(Block & block, const ColumnNumbers & bool has_column; if (host_name.empty()) { - const StoragePtr & table = DatabaseCatalog::instance().getTable({database_name, table_name}); + const StoragePtr & table = DatabaseCatalog::instance().getTable({database_name, table_name}, global_context); has_column = table->getColumns().hasPhysical(column_name); } else diff --git a/src/Functions/registerFunctions.cpp b/src/Functions/registerFunctions.cpp index 74c0c7e4112..762c551de08 100644 --- a/src/Functions/registerFunctions.cpp +++ b/src/Functions/registerFunctions.cpp @@ -37,6 +37,7 @@ void registerFunctionsIntrospection(FunctionFactory &); void registerFunctionsNull(FunctionFactory &); void registerFunctionsJSON(FunctionFactory &); void registerFunctionsConsistentHashing(FunctionFactory & factory); +void registerFunctionsUnixTimestamp64(FunctionFactory & factory); void registerFunctions() @@ -78,6 +79,7 @@ void registerFunctions() registerFunctionsJSON(factory); registerFunctionsIntrospection(factory); registerFunctionsConsistentHashing(factory); + registerFunctionsUnixTimestamp64(factory); } } diff --git a/src/Functions/registerFunctionsUnixTimestamp64.cpp b/src/Functions/registerFunctionsUnixTimestamp64.cpp new file mode 100644 index 00000000000..d7f3a4a4fc2 --- /dev/null +++ b/src/Functions/registerFunctionsUnixTimestamp64.cpp @@ -0,0 +1,25 @@ +namespace DB +{ + +class FunctionFactory; + +void registerToUnixTimestamp64Milli(FunctionFactory &); +void registerToUnixTimestamp64Micro(FunctionFactory &); +void registerToUnixTimestamp64Nano(FunctionFactory &); + +void registerFromUnixTimestamp64Milli(FunctionFactory &); +void registerFromUnixTimestamp64Micro(FunctionFactory &); +void registerFromUnixTimestamp64Nano(FunctionFactory &); + +void registerFunctionsUnixTimestamp64(FunctionFactory & factory) +{ + registerToUnixTimestamp64Milli(factory); + registerToUnixTimestamp64Micro(factory); + registerToUnixTimestamp64Nano(factory); + + registerFromUnixTimestamp64Milli(factory); + registerFromUnixTimestamp64Micro(factory); + registerFromUnixTimestamp64Nano(factory); +} + +} diff --git a/src/Functions/toUnixTimestamp64Micro.cpp b/src/Functions/toUnixTimestamp64Micro.cpp new file mode 100644 index 00000000000..845647fc0dd --- /dev/null +++ b/src/Functions/toUnixTimestamp64Micro.cpp @@ -0,0 +1,20 @@ +#include +#include + +namespace DB +{ + +struct TransformToMicro +{ + static constexpr auto name = "toUnixTimestamp64Micro"; + static constexpr auto target_scale = 6; + using SourceDataType = DataTypeDateTime64; + using ResultDataType = DataTypeInt64; +}; + +void registerToUnixTimestamp64Micro(FunctionFactory & factory) +{ + factory.registerFunction>(); +} + +} diff --git a/src/Functions/toUnixTimestamp64Milli.cpp b/src/Functions/toUnixTimestamp64Milli.cpp new file mode 100644 index 00000000000..88baf5c0b61 --- /dev/null +++ b/src/Functions/toUnixTimestamp64Milli.cpp @@ -0,0 +1,20 @@ +#include +#include + +namespace DB +{ + +struct TransformToMilli +{ + static constexpr auto name = "toUnixTimestamp64Milli"; + static constexpr auto target_scale = 3; + using SourceDataType = DataTypeDateTime64; + using ResultDataType = DataTypeInt64; +}; + +void registerToUnixTimestamp64Milli(FunctionFactory & factory) +{ + factory.registerFunction>(); +} + +} diff --git a/src/Functions/toUnixTimestamp64Nano.cpp b/src/Functions/toUnixTimestamp64Nano.cpp new file mode 100644 index 00000000000..5e90a9504cf --- /dev/null +++ b/src/Functions/toUnixTimestamp64Nano.cpp @@ -0,0 +1,20 @@ +#include +#include + +namespace DB +{ + +struct TransformToNano +{ + static constexpr auto name = "toUnixTimestamp64Nano"; + static constexpr auto target_scale = 9; + using SourceDataType = DataTypeDateTime64; + using ResultDataType = DataTypeInt64; +}; + +void registerToUnixTimestamp64Nano(FunctionFactory & factory) +{ + factory.registerFunction>(); +} + +} diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 8e53ffe493d..0b4776853e9 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -151,6 +151,9 @@ SRCS( finalizeAggregation.cpp formatDateTime.cpp formatString.cpp + fromUnixTimestamp64Micro.cpp + fromUnixTimestamp64Milli.cpp + fromUnixTimestamp64Nano.cpp FunctionFactory.cpp FunctionFQDN.cpp FunctionHelpers.cpp @@ -168,6 +171,7 @@ SRCS( FunctionsRound.cpp FunctionsStringArray.cpp FunctionsStringSimilarity.cpp + FunctionUnixTimestamp64.h GatherUtils/concat.cpp GatherUtils/createArraySink.cpp GatherUtils/createArraySource.cpp @@ -305,6 +309,7 @@ SRCS( registerFunctionsStringSearch.cpp registerFunctionsTuple.cpp registerFunctionsVisitParam.cpp + registerFunctionsUnixTimestamp64.cpp reinterpretAsFixedString.cpp reinterpretAsString.cpp reinterpretStringAs.cpp @@ -386,6 +391,9 @@ SRCS( toTimeZone.cpp toTypeName.cpp toValidUTF8.cpp + toUnixTimestamp64Micro.cpp + toUnixTimestamp64Milli.cpp + toUnixTimestamp64Nano.cpp toYear.cpp toYYYYMM.cpp toYYYYMMDD.cpp @@ -417,6 +425,7 @@ SRCS( URL/path.cpp URL/pathFull.cpp URL/protocol.cpp + URL/port.cpp URL/queryStringAndFragment.cpp URL/queryString.cpp URL/registerFunctionsURL.cpp diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index dccb413af2c..0797e63cdd9 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -283,7 +283,9 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf) if (buf.eof()) throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE); - if (*buf.position() == 'x') + char char_after_backslash = *buf.position(); + + if (char_after_backslash == 'x') { ++buf.position(); /// escape sequence of the form \xAA @@ -291,7 +293,7 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf) readPODBinary(hex_code, buf); s.push_back(unhex2(hex_code)); } - else if (*buf.position() == 'N') + else if (char_after_backslash == 'N') { /// Support for NULLs: \N sequence must be parsed as empty string. ++buf.position(); @@ -299,7 +301,22 @@ static void parseComplexEscapeSequence(Vector & s, ReadBuffer & buf) else { /// The usual escape sequence of a single character. - s.push_back(parseEscapeSequence(*buf.position())); + char decoded_char = parseEscapeSequence(char_after_backslash); + + /// For convenience using LIKE and regular expressions, + /// we leave backslash when user write something like 'Hello 100\%': + /// it is parsed like Hello 100\% instead of Hello 100% + if (decoded_char != '\\' + && decoded_char != '\'' + && decoded_char != '"' + && decoded_char != '`' /// MySQL style identifiers + && decoded_char != '/' /// JavaScript in HTML + && !isControlASCII(decoded_char)) + { + s.push_back('\\'); + } + + s.push_back(decoded_char); ++buf.position(); } } diff --git a/src/IO/tests/CMakeLists.txt b/src/IO/tests/CMakeLists.txt index 2767ce6e271..dfbbfa77853 100644 --- a/src/IO/tests/CMakeLists.txt +++ b/src/IO/tests/CMakeLists.txt @@ -37,9 +37,6 @@ target_link_libraries (parse_int_perf2 PRIVATE clickhouse_common_io) add_executable (read_write_int read_write_int.cpp) target_link_libraries (read_write_int PRIVATE clickhouse_common_io) -add_executable (mempbrk mempbrk.cpp) -target_link_libraries (mempbrk PRIVATE clickhouse_common_io) - add_executable (o_direct_and_dirty_pages o_direct_and_dirty_pages.cpp) target_link_libraries (o_direct_and_dirty_pages PRIVATE clickhouse_common_io) diff --git a/src/IO/tests/mempbrk.cpp b/src/IO/tests/mempbrk.cpp deleted file mode 100644 index 55097d989af..00000000000 --- a/src/IO/tests/mempbrk.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int CANNOT_PARSE_ESCAPE_SEQUENCE; -} -} - - -namespace test -{ -static void readEscapedString(DB::String & s, DB::ReadBuffer & buf) - { - s = ""; - while (!buf.eof()) - { - const char * next_pos = find_first_symbols<'\b', '\f', '\n', '\r', '\t', '\0', '\\'>(buf.position(), buf.buffer().end()); - - s.append(buf.position(), next_pos - buf.position()); - buf.position() += next_pos - buf.position(); - - if (!buf.hasPendingData()) - continue; - - if (*buf.position() == '\t' || *buf.position() == '\n') - return; - - if (*buf.position() == '\\') - { - ++buf.position(); - if (buf.eof()) - throw DB::Exception("Cannot parse escape sequence", DB::ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE); - s += DB::parseEscapeSequence(*buf.position()); - ++buf.position(); - } - } - } -} - - -int main(int, char **) -{ - try - { - DB::ReadBufferFromFileDescriptor in(STDIN_FILENO); -// DB::WriteBufferFromFileDescriptor out(STDOUT_FILENO); - std::string s; - size_t rows = 0; - - Stopwatch watch; - - while (!in.eof()) - { - test::readEscapedString(s, in); - in.ignore(); - - ++rows; - -/* DB::writeEscapedString(s, out); - DB::writeChar('\n', out);*/ - } - - watch.stop(); - std::cerr << std::fixed << std::setprecision(2) - << "Read " << rows << " rows (" << in.count() / 1000000.0 << " MB) in " << watch.elapsedSeconds() << " sec., " - << rows / watch.elapsedSeconds() << " rows/sec. (" << in.count() / watch.elapsedSeconds() / 1000000 << " MB/s.)" - << std::endl; - } - catch (const DB::Exception & e) - { - std::cerr << e.what() << ", " << e.displayText() << std::endl; - return 1; - } - - return 0; -} diff --git a/src/Interpreters/ActionLocksManager.cpp b/src/Interpreters/ActionLocksManager.cpp index fe5b3def658..e8887f44a45 100644 --- a/src/Interpreters/ActionLocksManager.cpp +++ b/src/Interpreters/ActionLocksManager.cpp @@ -19,23 +19,28 @@ namespace ActionLocks } +ActionLocksManager::ActionLocksManager(const Context & context) + : global_context(context.getGlobalContext()) +{ +} + template -inline void forEachTable(F && f) +inline void forEachTable(F && f, const Context & context) { for (auto & elem : DatabaseCatalog::instance().getDatabases()) - for (auto iterator = elem.second->getTablesIterator(); iterator->isValid(); iterator->next()) + for (auto iterator = elem.second->getTablesIterator(context); iterator->isValid(); iterator->next()) f(iterator->table()); } -void ActionLocksManager::add(StorageActionBlockType action_type) +void ActionLocksManager::add(StorageActionBlockType action_type, const Context & context) { - forEachTable([&](const StoragePtr & table) { add(table, action_type); }); + forEachTable([&](const StoragePtr & table) { add(table, action_type); }, context); } void ActionLocksManager::add(const StorageID & table_id, StorageActionBlockType action_type) { - if (auto table = DatabaseCatalog::instance().tryGetTable(table_id)) + if (auto table = DatabaseCatalog::instance().tryGetTable(table_id, global_context)) add(table, action_type); } @@ -60,7 +65,7 @@ void ActionLocksManager::remove(StorageActionBlockType action_type) void ActionLocksManager::remove(const StorageID & table_id, StorageActionBlockType action_type) { - if (auto table = DatabaseCatalog::instance().tryGetTable(table_id)) + if (auto table = DatabaseCatalog::instance().tryGetTable(table_id, global_context)) remove(table, action_type); } diff --git a/src/Interpreters/ActionLocksManager.h b/src/Interpreters/ActionLocksManager.h index 039a95ce218..ea5d96ad8ce 100644 --- a/src/Interpreters/ActionLocksManager.h +++ b/src/Interpreters/ActionLocksManager.h @@ -19,8 +19,10 @@ class Context; class ActionLocksManager { public: + ActionLocksManager(const Context & context); + /// Adds new locks for each table - void add(StorageActionBlockType action_type); + void add(StorageActionBlockType action_type, const Context & context); /// Add new lock for a table if it has not been already added void add(const StorageID & table_id, StorageActionBlockType action_type); void add(const StoragePtr & table, StorageActionBlockType action_type); @@ -41,6 +43,7 @@ private: mutable std::mutex mutex; StorageLocks storage_locks; + const Context & global_context; }; } diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 7c2133e629f..6b227657ba5 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -670,7 +670,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su if (identifier) { auto table_id = data.context.resolveStorageID(right_in_operand); - StoragePtr table = DatabaseCatalog::instance().tryGetTable(table_id); + StoragePtr table = DatabaseCatalog::instance().tryGetTable(table_id, data.context); if (table) { diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp index 59ff01bf972..a4e03d35d42 100644 --- a/src/Interpreters/AsynchronousMetrics.cpp +++ b/src/Interpreters/AsynchronousMetrics.cpp @@ -181,7 +181,7 @@ void AsynchronousMetrics::update() /// Lazy database can not contain MergeTree tables if (db.second->getEngineName() == "Lazy") continue; - for (auto iterator = db.second->getTablesIterator(); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) { ++total_number_of_tables; const auto & table = iterator->table(); diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 53c61bea60e..d601d460be9 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -191,7 +191,7 @@ void SelectStreamFactory::createForShard( else { auto resolved_id = context.resolveStorageID(main_table); - main_table_storage = DatabaseCatalog::instance().tryGetTable(resolved_id); + main_table_storage = DatabaseCatalog::instance().tryGetTable(resolved_id, context); } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 4605c606594..9c78fe59da1 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -2017,7 +2017,7 @@ std::shared_ptr Context::getActionLocksManager() auto lock = getLock(); if (!shared->action_locks_manager) - shared->action_locks_manager = std::make_shared(); + shared->action_locks_manager = std::make_shared(*this); return shared->action_locks_manager; } diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp index 49b52b0fb7f..b2f3f56be4d 100644 --- a/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -239,6 +239,10 @@ bool getTables(ASTSelectQuery & select, std::vector & joined_tabl size_t num_array_join = 0; size_t num_using = 0; + // For diagnostic messages. + std::vector tables_with_using; + tables_with_using.reserve(num_tables); + for (const auto & child : tables->children) { auto * table_element = child->as(); @@ -257,6 +261,7 @@ bool getTables(ASTSelectQuery & select, std::vector & joined_tabl if (t.hasUsing()) { ++num_using; + tables_with_using.push_back(table_element); continue; } @@ -275,7 +280,11 @@ bool getTables(ASTSelectQuery & select, std::vector & joined_tabl } if (num_using && (num_tables - num_array_join) > 2) - throw Exception("Multiple CROSS/COMMA JOIN do not support USING", ErrorCodes::NOT_IMPLEMENTED); + { + throw Exception("Multiple CROSS/COMMA JOIN do not support USING (while " + "processing '" + IAST::formatForErrorMessage(tables_with_using) + "')", + ErrorCodes::NOT_IMPLEMENTED); + } return !(num_array_join || num_using); } diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index fefeeeed988..ee91d903416 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -634,7 +634,7 @@ void DDLWorker::processTask(DDLTask & task, const ZooKeeperPtr & zookeeper) { /// It's not CREATE DATABASE auto table_id = context.tryResolveStorageID(*query_with_table, Context::ResolveOrdinary); - storage = DatabaseCatalog::instance().tryGetTable(table_id); + storage = DatabaseCatalog::instance().tryGetTable(table_id, context); } /// For some reason we check consistency of cluster definition only diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 7606fdb255a..4871d8d37aa 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -58,13 +58,17 @@ TemporaryTableHolder::TemporaryTableHolder(const Context & context_, } -TemporaryTableHolder::TemporaryTableHolder(const Context & context_, const ColumnsDescription & columns, const ASTPtr & query) +TemporaryTableHolder::TemporaryTableHolder( + const Context & context_, + const ColumnsDescription & columns, + const ConstraintsDescription & constraints, + const ASTPtr & query) : TemporaryTableHolder ( context_, [&](const StorageID & table_id) { - return StorageMemory::create(table_id, ColumnsDescription{columns}, ConstraintsDescription{}); + return StorageMemory::create(table_id, ColumnsDescription{columns}, ConstraintsDescription{constraints}); }, query ) @@ -97,7 +101,7 @@ StorageID TemporaryTableHolder::getGlobalTableID() const StoragePtr TemporaryTableHolder::getTable() const { - auto table = temporary_tables->tryGetTable("_tmp_" + toString(id)); + auto table = temporary_tables->tryGetTable("_tmp_" + toString(id), *global_context); if (!table) throw Exception("Temporary table " + getGlobalTableID().getNameForLogs() + " not found", ErrorCodes::LOGICAL_ERROR); return table; @@ -108,7 +112,7 @@ void DatabaseCatalog::loadDatabases() { drop_delay_sec = global_context->getConfigRef().getInt("database_atomic_delay_before_drop_table_sec", default_drop_delay_sec); - auto db_for_temporary_and_external_tables = std::make_shared(TEMPORARY_DATABASE); + auto db_for_temporary_and_external_tables = std::make_shared(TEMPORARY_DATABASE, *global_context); attachDatabase(TEMPORARY_DATABASE, db_for_temporary_and_external_tables); loadMarkedAsDroppedTables(); @@ -159,6 +163,7 @@ DatabaseAndTable DatabaseCatalog::tryGetByUUID(const UUID & uuid) const DatabaseAndTable DatabaseCatalog::getTableImpl( const StorageID & table_id, + const Context & context, std::optional * exception) const { if (!table_id) @@ -206,7 +211,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( database = it->second; } - auto table = database->tryGetTable(table_id.table_name); + auto table = database->tryGetTable(table_id.table_name, context); if (!table && exception) exception->emplace("Table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); @@ -319,7 +324,7 @@ Databases DatabaseCatalog::getDatabases() const return databases; } -bool DatabaseCatalog::isTableExist(const DB::StorageID & table_id) const +bool DatabaseCatalog::isTableExist(const DB::StorageID & table_id, const Context & context) const { if (table_id.hasUUID()) return tryGetByUUID(table_id.uuid).second != nullptr; @@ -331,12 +336,12 @@ bool DatabaseCatalog::isTableExist(const DB::StorageID & table_id) const if (iter != databases.end()) db = iter->second; } - return db && db->isTableExist(table_id.table_name); + return db && db->isTableExist(table_id.table_name, context); } -void DatabaseCatalog::assertTableDoesntExist(const StorageID & table_id) const +void DatabaseCatalog::assertTableDoesntExist(const StorageID & table_id, const Context & context) const { - if (isTableExist(table_id)) + if (isTableExist(table_id, context)) throw Exception("Table " + table_id.getNameForLogs() + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); } @@ -468,32 +473,32 @@ bool DatabaseCatalog::isDictionaryExist(const StorageID & table_id) const return db && db->isDictionaryExist(table_id.getTableName()); } -StoragePtr DatabaseCatalog::getTable(const StorageID & table_id) const +StoragePtr DatabaseCatalog::getTable(const StorageID & table_id, const Context & context) const { std::optional exc; - auto res = getTableImpl(table_id, &exc); + auto res = getTableImpl(table_id, context, &exc); if (!res.second) throw Exception(*exc); return res.second; } -StoragePtr DatabaseCatalog::tryGetTable(const StorageID & table_id) const +StoragePtr DatabaseCatalog::tryGetTable(const StorageID & table_id, const Context & context) const { - return getTableImpl(table_id, nullptr).second; + return getTableImpl(table_id, context, nullptr).second; } -DatabaseAndTable DatabaseCatalog::getDatabaseAndTable(const StorageID & table_id) const +DatabaseAndTable DatabaseCatalog::getDatabaseAndTable(const StorageID & table_id, const Context & context) const { std::optional exc; - auto res = getTableImpl(table_id, &exc); + auto res = getTableImpl(table_id, context, &exc); if (!res.second) throw Exception(*exc); return res; } -DatabaseAndTable DatabaseCatalog::tryGetDatabaseAndTable(const StorageID & table_id) const +DatabaseAndTable DatabaseCatalog::tryGetDatabaseAndTable(const StorageID & table_id, const Context & context) const { - return getTableImpl(table_id, nullptr); + return getTableImpl(table_id, context, nullptr); } void DatabaseCatalog::loadMarkedAsDroppedTables() diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index a481e3d7e5e..aefed0f372d 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -21,6 +21,7 @@ class Context; class IDatabase; class Exception; class ColumnsDescription; +struct ConstraintsDescription; using DatabasePtr = std::shared_ptr; using DatabaseAndTable = std::pair; @@ -71,7 +72,11 @@ struct TemporaryTableHolder : boost::noncopyable TemporaryTableHolder(const Context & context, const Creator & creator, const ASTPtr & query = {}); /// Creates temporary table with Engine=Memory - TemporaryTableHolder(const Context & context, const ColumnsDescription & columns, const ASTPtr & query = {}); + TemporaryTableHolder( + const Context & context, + const ColumnsDescription & columns, + const ConstraintsDescription & constraints, + const ASTPtr & query = {}); TemporaryTableHolder(TemporaryTableHolder && rhs); TemporaryTableHolder & operator = (TemporaryTableHolder && rhs); @@ -129,15 +134,17 @@ public: DatabasePtr getDatabase(const String & database_name, const Context & local_context) const; /// For all of the following methods database_name in table_id must be not empty (even for temporary tables). - void assertTableDoesntExist(const StorageID & table_id) const; - bool isTableExist(const StorageID & table_id) const; + void assertTableDoesntExist(const StorageID & table_id, const Context & context) const; + bool isTableExist(const StorageID & table_id, const Context & context) const; bool isDictionaryExist(const StorageID & table_id) const; - StoragePtr getTable(const StorageID & table_id) const; - StoragePtr tryGetTable(const StorageID & table_id) const; - DatabaseAndTable getDatabaseAndTable(const StorageID & table_id) const; - DatabaseAndTable tryGetDatabaseAndTable(const StorageID & table_id) const; - DatabaseAndTable getTableImpl(const StorageID & table_id, std::optional * exception = nullptr) const; + StoragePtr getTable(const StorageID & table_id, const Context & context) const; + StoragePtr tryGetTable(const StorageID & table_id, const Context & context) const; + DatabaseAndTable getDatabaseAndTable(const StorageID & table_id, const Context & context) const; + DatabaseAndTable tryGetDatabaseAndTable(const StorageID & table_id, const Context & context) const; + DatabaseAndTable getTableImpl(const StorageID & table_id, + const Context & context, + std::optional * exception = nullptr) const; void addDependency(const StorageID & from, const StorageID & where); void removeDependency(const StorageID & from, const StorageID & where); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 3341855b8c6..6f846cc0277 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -192,61 +192,65 @@ void ExpressionAnalyzer::analyzeAggregation() if (has_aggregation) { - getSelectQuery(); /// assertSelect() /// Find out aggregation keys. - if (select_query->groupBy()) + if (select_query) { - NameSet unique_keys; - ASTs & group_asts = select_query->groupBy()->children; - for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i) + if (select_query->groupBy()) { - ssize_t size = group_asts.size(); - getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false); - - const auto & column_name = group_asts[i]->getColumnName(); - const auto & block = temp_actions->getSampleBlock(); - - if (!block.has(column_name)) - throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER); - - const auto & col = block.getByName(column_name); - - /// Constant expressions have non-null column pointer at this stage. - if (col.column && isColumnConst(*col.column)) + NameSet unique_keys; + ASTs & group_asts = select_query->groupBy()->children; + for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i) { - /// But don't remove last key column if no aggregate functions, otherwise aggregation will not work. - if (!aggregate_descriptions.empty() || size > 1) + ssize_t size = group_asts.size(); + getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false); + + const auto & column_name = group_asts[i]->getColumnName(); + const auto & block = temp_actions->getSampleBlock(); + + if (!block.has(column_name)) + throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER); + + const auto & col = block.getByName(column_name); + + /// Constant expressions have non-null column pointer at this stage. + if (col.column && isColumnConst(*col.column)) { - if (i + 1 < static_cast(size)) - group_asts[i] = std::move(group_asts.back()); + /// But don't remove last key column if no aggregate functions, otherwise aggregation will not work. + if (!aggregate_descriptions.empty() || size > 1) + { + if (i + 1 < static_cast(size)) + group_asts[i] = std::move(group_asts.back()); - group_asts.pop_back(); + group_asts.pop_back(); - --i; - continue; + --i; + continue; + } + } + + NameAndTypePair key{column_name, col.type}; + + /// Aggregation keys are uniqued. + if (!unique_keys.count(key.name)) + { + unique_keys.insert(key.name); + aggregation_keys.push_back(key); + + /// Key is no longer needed, therefore we can save a little by moving it. + aggregated_columns.push_back(std::move(key)); } } - NameAndTypePair key{column_name, col.type}; - - /// Aggregation keys are uniqued. - if (!unique_keys.count(key.name)) + if (group_asts.empty()) { - unique_keys.insert(key.name); - aggregation_keys.push_back(key); - - /// Key is no longer needed, therefore we can save a little by moving it. - aggregated_columns.push_back(std::move(key)); + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, {}); + has_aggregation = select_query->having() || !aggregate_descriptions.empty(); } } - - if (group_asts.empty()) - { - select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, {}); - has_aggregation = select_query->having() || !aggregate_descriptions.empty(); - } } + else + aggregated_columns = temp_actions->getSampleBlock().getNamesAndTypesList(); for (const auto & desc : aggregate_descriptions) aggregated_columns.emplace_back(desc.column_name, desc.function->getReturnType()); @@ -318,7 +322,7 @@ SetPtr SelectQueryExpressionAnalyzer::isPlainStorageSetInSubquery(const ASTPtr & if (!table) return nullptr; auto table_id = context.resolveStorageID(subquery_or_table_name); - const auto storage = DatabaseCatalog::instance().getTable(table_id); + const auto storage = DatabaseCatalog::instance().getTable(table_id, context); if (storage->getName() != "Set") return nullptr; const auto storage_set = std::dynamic_pointer_cast(storage); @@ -926,7 +930,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const ExpressionActionsPtr ExpressionAnalyzer::getActions(bool add_aliases, bool project_result) { - ExpressionActionsPtr actions = std::make_shared(sourceColumns(), context); + ExpressionActionsPtr actions = std::make_shared(aggregated_columns, context); NamesWithAliases result_columns; Names result_names; diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index 37a358c3d28..9e616b04dab 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -103,7 +103,7 @@ public: Block sample = interpreter->getSampleBlock(); NamesAndTypesList columns = sample.getNamesAndTypesList(); - auto external_storage_holder = std::make_shared(context, ColumnsDescription{columns}); + auto external_storage_holder = std::make_shared(context, ColumnsDescription{columns}, ConstraintsDescription{}); StoragePtr external_storage = external_storage_holder->getTable(); /** We replace the subquery with the name of the temporary table. diff --git a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index f40e91e7dcd..432a7f40b12 100644 --- a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -27,7 +27,7 @@ namespace StoragePtr tryGetTable(const ASTPtr & database_and_table, const Context & context) { auto table_id = context.resolveStorageID(database_and_table); - return DatabaseCatalog::instance().tryGetTable(table_id); + return DatabaseCatalog::instance().tryGetTable(table_id, context); } using CheckShardsAndTables = InJoinSubqueriesPreprocessor::CheckShardsAndTables; diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index c2ab8776c25..bd20d78279d 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -42,7 +42,7 @@ BlockIO InterpreterAlterQuery::execute() context.checkAccess(getRequiredAccess()); auto table_id = context.resolveStorageID(alter, Context::ResolveOrdinary); - StoragePtr table = DatabaseCatalog::instance().getTable(table_id); + StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context); /// Add default database to table identifiers that we can encounter in e.g. default expressions, /// mutation expression, etc. @@ -244,12 +244,12 @@ AccessRightsElements InterpreterAlterQuery::getRequiredAccessForCommand(const AS } case ASTAlterCommand::MOVE_PARTITION: { - if ((command.move_destination_type == PartDestinationType::DISK) - || (command.move_destination_type == PartDestinationType::VOLUME)) + if ((command.move_destination_type == DataDestinationType::DISK) + || (command.move_destination_type == DataDestinationType::VOLUME)) { required_access.emplace_back(AccessType::ALTER_MOVE_PARTITION, database, table); } - else if (command.move_destination_type == PartDestinationType::TABLE) + else if (command.move_destination_type == DataDestinationType::TABLE) { required_access.emplace_back(AccessType::SELECT | AccessType::ALTER_DELETE, database, table); required_access.emplace_back(AccessType::INSERT, command.to_database, command.to_table); diff --git a/src/Interpreters/InterpreterCheckQuery.cpp b/src/Interpreters/InterpreterCheckQuery.cpp index b8f7203e607..b3cd807abe5 100644 --- a/src/Interpreters/InterpreterCheckQuery.cpp +++ b/src/Interpreters/InterpreterCheckQuery.cpp @@ -41,7 +41,7 @@ BlockIO InterpreterCheckQuery::execute() auto table_id = context.resolveStorageID(check, Context::ResolveOrdinary); context.checkAccess(AccessType::SHOW_TABLES, table_id); - StoragePtr table = DatabaseCatalog::instance().getTable(table_id); + StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context); auto check_results = table->checkData(query_ptr, context); Block block; diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 147ef7d739b..c9a9a384d8e 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -406,7 +406,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::setProperties(AS else if (!create.as_table.empty()) { String as_database_name = context.resolveDatabase(create.as_database); - StoragePtr as_storage = DatabaseCatalog::instance().getTable({as_database_name, create.as_table}); + StoragePtr as_storage = DatabaseCatalog::instance().getTable({as_database_name, create.as_table}, context); /// as_storage->getColumns() and setEngine(...) must be called under structure lock of other_table for CREATE ... AS other_table. as_storage_lock = as_storage->lockStructureForShare( @@ -504,7 +504,7 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const String as_database_name = context.resolveDatabase(create.as_database); String as_table_name = create.as_table; - ASTPtr as_create_ptr = DatabaseCatalog::instance().getDatabase(as_database_name)->getCreateTableQuery(as_table_name); + ASTPtr as_create_ptr = DatabaseCatalog::instance().getDatabase(as_database_name)->getCreateTableQuery(as_table_name, context); const auto & as_create = as_create_ptr->as(); const String qualified_name = backQuoteIfNeed(as_database_name) + "." + backQuoteIfNeed(as_table_name); @@ -546,7 +546,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) bool if_not_exists = create.if_not_exists; // Table SQL definition is available even if the table is detached - auto query = database->getCreateTableQuery(create.table); + auto query = database->getCreateTableQuery(create.table, context); create = query->as(); // Copy the saved create query, but use ATTACH instead of CREATE create.attach = true; create.attach_short_syntax = true; @@ -608,7 +608,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, guard = DatabaseCatalog::instance().getDDLGuard(create.database, table_name); /// Table can be created before or it can be created concurrently in another thread, while we were waiting in DDLGuard. - if (database->isTableExist(table_name)) + if (database->isTableExist(table_name, context)) { /// TODO Check structure of table if (create.if_not_exists) @@ -637,7 +637,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, if (create.if_not_exists && context.tryResolveStorageID({"", table_name}, Context::ResolveExternal)) return false; - auto temporary_table = TemporaryTableHolder(context, properties.columns, query_ptr); + auto temporary_table = TemporaryTableHolder(context, properties.columns, properties.constraints, query_ptr); context.getSessionContext().addExternalTable(table_name, std::move(temporary_table)); return true; } diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 1a6dbbcfc4d..d457fefed6a 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -86,7 +86,7 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() { auto table_id = context.resolveStorageID(table_expression.database_and_table_name); context.checkAccess(AccessType::SHOW_COLUMNS, table_id); - table = DatabaseCatalog::instance().getTable(table_id); + table = DatabaseCatalog::instance().getTable(table_id, context); } auto table_lock = table->lockStructureForShare( diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index ecdad0168f0..5ffce2fc3ec 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -81,8 +81,8 @@ BlockIO InterpreterDropQuery::executeToTable( auto ddl_guard = (!query.no_ddl_lock ? DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name) : nullptr); /// If table was already dropped by anyone, an exception will be thrown - auto [database, table] = query.if_exists ? DatabaseCatalog::instance().tryGetDatabaseAndTable(table_id) - : DatabaseCatalog::instance().getDatabaseAndTable(table_id); + auto [database, table] = query.if_exists ? DatabaseCatalog::instance().tryGetDatabaseAndTable(table_id, context) + : DatabaseCatalog::instance().getDatabaseAndTable(table_id, context); if (database && table) { @@ -182,7 +182,7 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(const String & table_name, auto resolved_id = context_handle.tryResolveStorageID(StorageID("", table_name), Context::ResolveExternal); if (resolved_id) { - StoragePtr table = DatabaseCatalog::instance().getTable(resolved_id); + StoragePtr table = DatabaseCatalog::instance().getTable(resolved_id, context); if (kind == ASTDropQuery::Kind::Truncate) { auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); @@ -234,7 +234,7 @@ BlockIO InterpreterDropQuery::executeToDatabase(const String & database_name, AS ASTDropQuery query; query.kind = kind; query.database = database_name; - for (auto iterator = database->getTablesIterator(); iterator->isValid(); iterator->next()) + for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next()) { query.table = iterator->name(); executeToTable({query.database, query.table}, query); diff --git a/src/Interpreters/InterpreterExistsQuery.cpp b/src/Interpreters/InterpreterExistsQuery.cpp index 993b3631e06..8539cb6816e 100644 --- a/src/Interpreters/InterpreterExistsQuery.cpp +++ b/src/Interpreters/InterpreterExistsQuery.cpp @@ -50,7 +50,7 @@ BlockInputStreamPtr InterpreterExistsQuery::executeImpl() { String database = context.resolveDatabase(exists_query->database); context.checkAccess(AccessType::SHOW_TABLES, database, exists_query->table); - result = DatabaseCatalog::instance().isTableExist({database, exists_query->table}); + result = DatabaseCatalog::instance().isTableExist({database, exists_query->table}, context); } } else if ((exists_query = query_ptr->as())) diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 1c1e21fc32c..dacd7ca5f20 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -76,7 +76,7 @@ namespace if (const auto * identifier = expression.database_and_table_name->as()) { auto table_id = data.context.resolveStorageID(*identifier); - const auto & storage = DatabaseCatalog::instance().getTable(table_id); + const auto & storage = DatabaseCatalog::instance().getTable(table_id, data.context); if (auto * storage_view = dynamic_cast(storage.get())) storage_view->getRuntimeViewQuery(&select_query, data.context, true); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index f3b116e490c..dbd327b79cd 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -28,6 +28,11 @@ #include #include #include +#include +#include +#include +#include +#include namespace DB @@ -65,7 +70,7 @@ StoragePtr InterpreterInsertQuery::getTable(ASTInsertQuery & query) } query.table_id = context.resolveStorageID(query.table_id); - return DatabaseCatalog::instance().getTable(query.table_id); + return DatabaseCatalog::instance().getTable(query.table_id, context); } Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table) const @@ -117,8 +122,6 @@ BlockIO InterpreterInsertQuery::execute() if (!query.table_function) context.checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); - BlockInputStreams in_streams; - BlockOutputStreams out_streams; bool is_distributed_insert_select = false; if (query.select && table->isRemote() && settings.parallel_distributed_insert_select) @@ -159,6 +162,8 @@ BlockIO InterpreterInsertQuery::execute() const auto & cluster = storage_src->getCluster(); const auto & shards_info = cluster->getShardsInfo(); + std::vector pipelines; + String new_query_str = queryToString(new_query); for (size_t shard_index : ext::range(0, shards_info.size())) { @@ -166,8 +171,7 @@ BlockIO InterpreterInsertQuery::execute() if (shard_info.isLocal()) { InterpreterInsertQuery interpreter(new_query, context); - auto block_io = interpreter.execute(); - in_streams.push_back(block_io.in); + pipelines.emplace_back(interpreter.execute().pipeline); } else { @@ -179,13 +183,20 @@ BlockIO InterpreterInsertQuery::execute() /// INSERT SELECT query returns empty block auto in_stream = std::make_shared(std::move(connections), new_query_str, Block{}, context); - in_streams.push_back(in_stream); + pipelines.emplace_back(); + pipelines.back().init(Pipe(std::make_shared(std::move(in_stream)))); + pipelines.back().setSinks([](const Block & header, QueryPipeline::StreamType) -> ProcessorPtr + { + return std::make_shared(header); + }); } - out_streams.push_back(std::make_shared(Block())); } + + res.pipeline.unitePipelines(std::move(pipelines), {}); } } + BlockOutputStreams out_streams; if (!is_distributed_insert_select || query.watch) { size_t out_streams_size = 1; @@ -193,27 +204,21 @@ BlockIO InterpreterInsertQuery::execute() { /// Passing 1 as subquery_depth will disable limiting size of intermediate result. InterpreterSelectWithUnionQuery interpreter_select{ query.select, context, SelectQueryOptions(QueryProcessingStage::Complete, 1)}; + res.pipeline = interpreter_select.executeWithProcessors(); if (table->supportsParallelInsert() && settings.max_insert_threads > 1) - { - in_streams = interpreter_select.executeWithMultipleStreams(res.pipeline); - out_streams_size = std::min(size_t(settings.max_insert_threads), in_streams.size()); - } + out_streams_size = std::min(size_t(settings.max_insert_threads), res.pipeline.getNumStreams()); + + if (out_streams_size == 1) + res.pipeline.addPipe({std::make_shared(res.pipeline.getHeader(), res.pipeline.getNumStreams())}); else - { - res = interpreter_select.execute(); - in_streams.emplace_back(res.in); - res.in = nullptr; - res.out = nullptr; - } + res.pipeline.resize(out_streams_size); } else if (query.watch) { InterpreterWatchQuery interpreter_watch{ query.watch, context }; res = interpreter_watch.execute(); - in_streams.emplace_back(res.in); - res.in = nullptr; - res.out = nullptr; + res.pipeline.init(Pipe(std::make_shared(std::move(res.in)))); } for (size_t i = 0; i < out_streams_size; i++) @@ -228,6 +233,21 @@ BlockIO InterpreterInsertQuery::execute() else out = std::make_shared(table, context, query_ptr, no_destination); + /// Note that we wrap transforms one on top of another, so we write them in reverse of data processing order. + + /// Checking constraints. It must be done after calculation of all defaults, so we can check them on calculated columns. + if (const auto & constraints = table->getConstraints(); !constraints.empty()) + out = std::make_shared( + query.table_id, out, out->getHeader(), table->getConstraints(), context); + + /// Actually we don't know structure of input blocks from query/table, + /// because some clients break insertion protocol (columns != header) + out = std::make_shared( + out, query_sample_block, out->getHeader(), table->getColumns().getDefaults(), context); + + /// It's important to squash blocks as early as possible (before other transforms), + /// because other transforms may work inefficient if block size is small. + /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side. /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks). if (!(context.getSettingsRef().insert_distributed_sync && table->isRemote()) && !no_squash && !query.watch) @@ -239,15 +259,6 @@ BlockIO InterpreterInsertQuery::execute() context.getSettingsRef().min_insert_block_size_bytes); } - /// Actually we don't know structure of input blocks from query/table, - /// because some clients break insertion protocol (columns != header) - out = std::make_shared( - out, query_sample_block, out->getHeader(), table->getColumns().getDefaults(), context); - - if (const auto & constraints = table->getConstraints(); !constraints.empty()) - out = std::make_shared( - query.table_id, out, query_sample_block, table->getConstraints(), context); - auto out_wrapper = std::make_shared(out); out_wrapper->setProcessListElement(context.getProcessListElement()); out = std::move(out_wrapper); @@ -256,27 +267,35 @@ BlockIO InterpreterInsertQuery::execute() } /// What type of query: INSERT or INSERT SELECT or INSERT WATCH? - if (query.select || query.watch) + if (is_distributed_insert_select) { - for (auto & in_stream : in_streams) - { - in_stream = std::make_shared( - in_stream, out_streams.at(0)->getHeader(), ConvertingBlockInputStream::MatchColumnsMode::Position); - } + /// Pipeline was already built. + } + else if (query.select || query.watch) + { + const auto & header = out_streams.at(0)->getHeader(); - Block in_header = in_streams.at(0)->getHeader(); - if (in_streams.size() > 1) + res.pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - for (size_t i = 1; i < in_streams.size(); ++i) - assertBlocksHaveEqualStructure(in_streams[i]->getHeader(), in_header, query.select ? "INSERT SELECT" : "INSERT WATCH"); - } + return std::make_shared(in_header, header, + ConvertingTransform::MatchColumnsMode::Position); + }); - res.in = std::make_shared(in_streams, out_streams); + res.pipeline.setSinks([&](const Block &, QueryPipeline::StreamType type) -> ProcessorPtr + { + if (type != QueryPipeline::StreamType::Main) + return nullptr; + + auto stream = std::move(out_streams.back()); + out_streams.pop_back(); + + return std::make_shared(std::move(stream)); + }); if (!allow_materialized) { for (const auto & column : table->getColumns()) - if (column.default_desc.kind == ColumnDefaultKind::Materialized && in_header.has(column.name)) + if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) throw Exception("Cannot insert column " + column.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN); } } @@ -288,6 +307,7 @@ BlockIO InterpreterInsertQuery::execute() } else res.out = std::move(out_streams.at(0)); + res.pipeline.addStorageHolder(table); return res; diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 42afd0ef477..82c134aeba6 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -261,7 +261,7 @@ BlockIO InterpreterKillQueryQuery::execute() CancellationCode code = CancellationCode::Unknown; if (!query.test) { - auto storage = DatabaseCatalog::instance().tryGetTable(table_id); + auto storage = DatabaseCatalog::instance().tryGetTable(table_id, context); if (!storage) code = CancellationCode::NotFound; else diff --git a/src/Interpreters/InterpreterOptimizeQuery.cpp b/src/Interpreters/InterpreterOptimizeQuery.cpp index b6c50e59cc3..c47fe1160cf 100644 --- a/src/Interpreters/InterpreterOptimizeQuery.cpp +++ b/src/Interpreters/InterpreterOptimizeQuery.cpp @@ -25,7 +25,7 @@ BlockIO InterpreterOptimizeQuery::execute() context.checkAccess(getRequiredAccess()); auto table_id = context.resolveStorageID(ast, Context::ResolveOrdinary); - StoragePtr table = DatabaseCatalog::instance().getTable(table_id); + StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context); table->optimize(query_ptr, ast.partition, ast.final, ast.deduplicate, context); return {}; } diff --git a/src/Interpreters/InterpreterRenameQuery.cpp b/src/Interpreters/InterpreterRenameQuery.cpp index 78d1c7ee486..de2b6bb0c1c 100644 --- a/src/Interpreters/InterpreterRenameQuery.cpp +++ b/src/Interpreters/InterpreterRenameQuery.cpp @@ -78,7 +78,7 @@ BlockIO InterpreterRenameQuery::execute() for (auto & elem : descriptions) { if (!rename.exchange) - database_catalog.assertTableDoesntExist(StorageID(elem.to_database_name, elem.to_table_name)); + database_catalog.assertTableDoesntExist(StorageID(elem.to_database_name, elem.to_table_name), context); database_catalog.getDatabase(elem.from_database_name)->renameTable( context, diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index c8d842dfa53..4f717eda706 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1533,7 +1533,7 @@ void InterpreterSelectQuery::executeFetchColumns( if constexpr (pipeline_with_processors) { if (streams.size() == 1 || pipes.size() == 1) - pipeline.setMaxThreads(streams.size()); + pipeline.setMaxThreads(1); /// Unify streams. They must have same headers. if (streams.size() > 1) diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 378cb943c04..7fe124b31e6 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -271,6 +271,11 @@ QueryPipeline InterpreterSelectWithUnionQuery::executeWithProcessors() { auto common_header = getCommonHeaderForUnion(headers); main_pipeline.unitePipelines(std::move(pipelines), common_header); + + // nested queries can force 1 thread (due to simplicity) + // but in case of union this cannot be done. + UInt64 max_threads = context->getSettingsRef().max_threads; + main_pipeline.setMaxThreads(std::min(nested_interpreters.size(), max_threads)); } main_pipeline.addInterpreterContext(context); diff --git a/src/Interpreters/InterpreterSetRoleQuery.cpp b/src/Interpreters/InterpreterSetRoleQuery.cpp index f8e0167d748..c627061dd51 100644 --- a/src/Interpreters/InterpreterSetRoleQuery.cpp +++ b/src/Interpreters/InterpreterSetRoleQuery.cpp @@ -62,7 +62,7 @@ void InterpreterSetRoleQuery::setRole(const ASTSetRoleQuery & query) void InterpreterSetRoleQuery::setDefaultRole(const ASTSetRoleQuery & query) { - context.checkAccess(AccessType::CREATE_USER | AccessType::DROP_USER); + context.checkAccess(AccessType::ALTER_USER); auto & access_control = context.getAccessControlManager(); std::vector to_users = ExtendedRoleSet{*query.to_users, access_control, context.getUserID()}.getMatchingIDs(access_control); diff --git a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp index 87e1265f793..e37c31aab22 100644 --- a/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateAccessEntityQuery.cpp @@ -56,10 +56,10 @@ namespace query->default_roles = user.default_roles.toASTWithNames(*manager); } - if (attach_mode && (user.authentication.getType() != Authentication::NO_PASSWORD)) + if (user.authentication.getType() != Authentication::NO_PASSWORD) { - /// We don't show password unless it's an ATTACH statement. query->authentication = user.authentication; + query->show_password = attach_mode; /// We don't show password unless it's an ATTACH statement. } if (!user.settings.empty()) diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index 9938910cf1e..30005c7b169 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -50,7 +50,7 @@ BlockInputStreamPtr InterpreterShowCreateQuery::executeImpl() auto resolve_table_type = show_query->temporary ? Context::ResolveExternal : Context::ResolveOrdinary; auto table_id = context.resolveStorageID(*show_query, resolve_table_type); context.checkAccess(AccessType::SHOW_COLUMNS, table_id); - create_query = DatabaseCatalog::instance().getDatabase(table_id.database_name)->getCreateTableQuery(table_id.table_name); + create_query = DatabaseCatalog::instance().getDatabase(table_id.database_name)->getCreateTableQuery(table_id.table_name, context); } else if ((show_query = query_ptr->as())) { diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index f3d48651148..fedda7cab50 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -144,7 +144,7 @@ void InterpreterSystemQuery::startStopAction(StorageActionBlockType action_type, auto access = context.getAccess(); for (auto & elem : DatabaseCatalog::instance().getDatabases()) { - for (auto iterator = elem.second->getTablesIterator(); iterator->isValid(); iterator->next()) + for (auto iterator = elem.second->getTablesIterator(context); iterator->isValid(); iterator->next()) { if (!access->isGranted(log, getRequiredAccessType(action_type), elem.first, iterator->name())) continue; @@ -321,7 +321,7 @@ StoragePtr InterpreterSystemQuery::tryRestartReplica(const StorageID & replica, context.checkAccess(AccessType::SYSTEM_RESTART_REPLICA, replica); auto table_ddl_guard = need_ddl_guard ? DatabaseCatalog::instance().getDDLGuard(replica.getDatabaseName(), replica.getTableName()) : nullptr; - auto [database, table] = DatabaseCatalog::instance().tryGetDatabaseAndTable(replica); + auto [database, table] = DatabaseCatalog::instance().tryGetDatabaseAndTable(replica, context); ASTPtr create_ast; /// Detach actions @@ -332,7 +332,7 @@ StoragePtr InterpreterSystemQuery::tryRestartReplica(const StorageID & replica, { /// If table was already dropped by anyone, an exception will be thrown auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - create_ast = database->getCreateTableQuery(replica.table_name); + create_ast = database->getCreateTableQuery(replica.table_name, context); database->detachTable(replica.table_name); } @@ -369,7 +369,7 @@ void InterpreterSystemQuery::restartReplicas(Context & system_context) for (auto & elem : catalog.getDatabases()) { DatabasePtr & database = elem.second; - for (auto iterator = database->getTablesIterator(); iterator->isValid(); iterator->next()) + for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next()) { if (dynamic_cast(iterator->table().get())) replica_names.emplace_back(StorageID{database->getDatabaseName(), iterator->name()}); @@ -394,7 +394,7 @@ void InterpreterSystemQuery::restartReplicas(Context & system_context) void InterpreterSystemQuery::syncReplica(ASTSystemQuery &) { context.checkAccess(AccessType::SYSTEM_SYNC_REPLICA, table_id); - StoragePtr table = DatabaseCatalog::instance().getTable(table_id); + StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context); if (auto * storage_replicated = dynamic_cast(table.get())) { @@ -416,7 +416,7 @@ void InterpreterSystemQuery::flushDistributed(ASTSystemQuery &) { context.checkAccess(AccessType::SYSTEM_FLUSH_DISTRIBUTED, table_id); - if (auto * storage_distributed = dynamic_cast(DatabaseCatalog::instance().getTable(table_id).get())) + if (auto * storage_distributed = dynamic_cast(DatabaseCatalog::instance().getTable(table_id, context).get())) storage_distributed->flushClusterNodesAllData(); else throw Exception("Table " + table_id.getNameForLogs() + " is not distributed", ErrorCodes::BAD_ARGUMENTS); diff --git a/src/Interpreters/InterpreterWatchQuery.cpp b/src/Interpreters/InterpreterWatchQuery.cpp index dc1ae6a7cad..489be488b4d 100644 --- a/src/Interpreters/InterpreterWatchQuery.cpp +++ b/src/Interpreters/InterpreterWatchQuery.cpp @@ -40,7 +40,7 @@ BlockIO InterpreterWatchQuery::execute() auto table_id = context.resolveStorageID(query, Context::ResolveOrdinary); /// Get storage - storage = DatabaseCatalog::instance().tryGetTable(table_id); + storage = DatabaseCatalog::instance().tryGetTable(table_id, context); if (!storage) throw Exception("Table " + table_id.getNameForLogs() + " doesn't exist.", diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 67363737670..7450890952a 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -61,19 +61,6 @@ void replaceJoinedTable(const ASTSelectQuery & select_query) } } -template -void checkTablesWithColumns(const std::vector & tables_with_columns, const Context & context) -{ - const auto & settings = context.getSettingsRef(); - if (settings.joined_subquery_requires_alias && tables_with_columns.size() > 1) - { - for (auto & t : tables_with_columns) - if (t.table.table.empty() && t.table.alias.empty()) - throw Exception("No alias for subquery or table function in JOIN (set joined_subquery_requires_alias=0 to disable restriction).", - ErrorCodes::ALIAS_REQUIRED); - } -} - class RenameQualifiedIdentifiersMatcher { public: @@ -194,13 +181,28 @@ StoragePtr JoinedTables::getLeftTableStorage() } /// Read from table. Even without table expression (implicit SELECT ... FROM system.one). - return DatabaseCatalog::instance().getTable(table_id); + return DatabaseCatalog::instance().getTable(table_id, context); } bool JoinedTables::resolveTables() { tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context); - checkTablesWithColumns(tables_with_columns, context); + assert(tables_with_columns.size() == table_expressions.size()); + + const auto & settings = context.getSettingsRef(); + if (settings.joined_subquery_requires_alias && tables_with_columns.size() > 1) + { + for (size_t i = 0; i < tables_with_columns.size(); ++i) + { + const auto & t = tables_with_columns[i]; + if (t.table.table.empty() && t.table.alias.empty()) + { + throw Exception("No alias for subquery or table function in JOIN (set joined_subquery_requires_alias=0 to disable restriction). While processing '" + + table_expressions[i]->formatForErrorMessage() + "'", + ErrorCodes::ALIAS_REQUIRED); + } + } + } return !tables_with_columns.empty(); } @@ -259,7 +261,7 @@ std::shared_ptr JoinedTables::makeTableJoin(const ASTSelectQuery & se if (table_to_join.database_and_table_name) { auto joined_table_id = context.resolveStorageID(table_to_join.database_and_table_name); - StoragePtr table = DatabaseCatalog::instance().tryGetTable(joined_table_id); + StoragePtr table = DatabaseCatalog::instance().tryGetTable(joined_table_id, context); if (table) { if (dynamic_cast(table.get()) || diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 5ba6424653c..6962885534c 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -221,14 +221,11 @@ static NameSet getKeyColumns(const StoragePtr & storage) NameSet key_columns; - if (merge_tree_data->partition_key_expr) - for (const String & col : merge_tree_data->partition_key_expr->getRequiredColumns()) - key_columns.insert(col); + for (const String & col : merge_tree_data->getColumnsRequiredForPartitionKey()) + key_columns.insert(col); - auto sorting_key_expr = merge_tree_data->sorting_key_expr; - if (sorting_key_expr) - for (const String & col : sorting_key_expr->getRequiredColumns()) - key_columns.insert(col); + for (const String & col : merge_tree_data->getColumnsRequiredForSortingKey()) + key_columns.insert(col); /// We don't process sample_by_ast separately because it must be among the primary key columns. if (!merge_tree_data->merging_params.sign_column.empty()) diff --git a/src/Interpreters/SyntaxAnalyzer.cpp b/src/Interpreters/SyntaxAnalyzer.cpp index b3d566dbdc8..b5f86b87fdc 100644 --- a/src/Interpreters/SyntaxAnalyzer.cpp +++ b/src/Interpreters/SyntaxAnalyzer.cpp @@ -839,7 +839,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect( return std::make_shared(result); } -SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(ASTPtr & query, const NamesAndTypesList & source_columns, ConstStoragePtr storage) const +SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(ASTPtr & query, const NamesAndTypesList & source_columns, ConstStoragePtr storage, bool allow_aggregations) const { if (query->as()) throw Exception("Not select analyze for select asts.", ErrorCodes::LOGICAL_ERROR); @@ -855,7 +855,20 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(ASTPtr & query, const NamesAndTy optimizeIf(query, result.aliases, settings.optimize_if_chain_to_miltiif); - assertNoAggregates(query, "in wrong place"); + if (allow_aggregations) + { + GetAggregatesVisitor::Data data; + GetAggregatesVisitor(data).visit(query); + + /// There can not be other aggregate functions within the aggregate functions. + for (const ASTFunction * node : data.aggregates) + for (auto & arg : node->arguments->children) + assertNoAggregates(arg, "inside another aggregate function"); + result.aggregates = data.aggregates; + } + else + assertNoAggregates(query, "in wrong place"); + result.collectUsedColumns(query); return std::make_shared(result); } diff --git a/src/Interpreters/SyntaxAnalyzer.h b/src/Interpreters/SyntaxAnalyzer.h index dda0add38db..abacb25ac4d 100644 --- a/src/Interpreters/SyntaxAnalyzer.h +++ b/src/Interpreters/SyntaxAnalyzer.h @@ -86,7 +86,7 @@ public: {} /// Analyze and rewrite not select query - SyntaxAnalyzerResultPtr analyze(ASTPtr & query, const NamesAndTypesList & source_columns_, ConstStoragePtr storage = {}) const; + SyntaxAnalyzerResultPtr analyze(ASTPtr & query, const NamesAndTypesList & source_columns_, ConstStoragePtr storage = {}, bool allow_aggregations = false) const; /// Analyze and rewrite select query SyntaxAnalyzerResultPtr analyzeSelect( diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 3c0d2159ef9..649cfa28e6e 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -431,7 +431,7 @@ void SystemLog::prepareTable() { String description = table_id.getNameForLogs(); - table = DatabaseCatalog::instance().tryGetTable(table_id); + table = DatabaseCatalog::instance().tryGetTable(table_id, context); if (table) { @@ -442,7 +442,7 @@ void SystemLog::prepareTable() { /// Rename the existing table. int suffix = 0; - while (DatabaseCatalog::instance().isTableExist({table_id.database_name, table_id.table_name + "_" + toString(suffix)})) + while (DatabaseCatalog::instance().isTableExist({table_id.database_name, table_id.table_name + "_" + toString(suffix)}, context)) ++suffix; auto rename = std::make_shared(); @@ -483,7 +483,7 @@ void SystemLog::prepareTable() interpreter.setInternal(true); interpreter.execute(); - table = DatabaseCatalog::instance().getTable(table_id); + table = DatabaseCatalog::instance().getTable(table_id, context); } is_prepared = true; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index c69631b2665..2cc6730b90d 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -363,6 +363,9 @@ static std::tuple executeQueryImpl( else res = interpreter->execute(); + if (res.pipeline.initialized()) + use_processors = true; + if (const auto * insert_interpreter = typeid_cast(&*interpreter)) { /// Save insertion table (not table function). TODO: support remote() table function. @@ -390,7 +393,7 @@ static std::tuple executeQueryImpl( /// Limits apply only to the final result. pipeline.setProgressCallback(context.getProgressCallback()); pipeline.setProcessListElement(context.getProcessListElement()); - if (stage == QueryProcessingStage::Complete) + if (stage == QueryProcessingStage::Complete && !pipeline.isCompleted()) { pipeline.resize(1); pipeline.addSimpleTransform([&](const Block & header) @@ -771,29 +774,36 @@ void executeQuery( if (ast_query_with_output && ast_query_with_output->settings_ast) InterpreterSetQuery(ast_query_with_output->settings_ast, context).executeForCurrentContext(); - pipeline.addSimpleTransform([](const Block & header) + if (!pipeline.isCompleted()) { - return std::make_shared(header); - }); + pipeline.addSimpleTransform([](const Block & header) + { + return std::make_shared(header); + }); - auto out = context.getOutputFormatProcessor(format_name, *out_buf, pipeline.getHeader()); - out->setAutoFlush(); + auto out = context.getOutputFormatProcessor(format_name, *out_buf, pipeline.getHeader()); + out->setAutoFlush(); - /// Save previous progress callback if any. TODO Do it more conveniently. - auto previous_progress_callback = context.getProgressCallback(); + /// Save previous progress callback if any. TODO Do it more conveniently. + auto previous_progress_callback = context.getProgressCallback(); - /// NOTE Progress callback takes shared ownership of 'out'. - pipeline.setProgressCallback([out, previous_progress_callback] (const Progress & progress) + /// NOTE Progress callback takes shared ownership of 'out'. + pipeline.setProgressCallback([out, previous_progress_callback] (const Progress & progress) + { + if (previous_progress_callback) + previous_progress_callback(progress); + out->onProgress(progress); + }); + + if (set_result_details) + set_result_details(context.getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone()); + + pipeline.setOutputFormat(std::move(out)); + } + else { - if (previous_progress_callback) - previous_progress_callback(progress); - out->onProgress(progress); - }); - - if (set_result_details) - set_result_details(context.getClientInfo().current_query_id, out->getContentType(), format_name, DateLUT::instance().getTimeZone()); - - pipeline.setOutput(std::move(out)); + pipeline.setProgressCallback(context.getProgressCallback()); + } { auto executor = pipeline.execute(); diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp index b5444f73b35..8467a98685d 100644 --- a/src/Interpreters/getTableExpressions.cpp +++ b/src/Interpreters/getTableExpressions.cpp @@ -96,7 +96,7 @@ static NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression else if (table_expression.database_and_table_name) { auto table_id = context.resolveStorageID(table_expression.database_and_table_name); - const auto & table = DatabaseCatalog::instance().getTable(table_id); + const auto & table = DatabaseCatalog::instance().getTable(table_id, context); const auto & columns = table->getColumns(); names_and_type_list = columns.getOrdinary(); materialized = columns.getMaterialized(); diff --git a/src/Interpreters/interpretSubquery.cpp b/src/Interpreters/interpretSubquery.cpp index e108db1af30..c94759897f5 100644 --- a/src/Interpreters/interpretSubquery.cpp +++ b/src/Interpreters/interpretSubquery.cpp @@ -96,7 +96,7 @@ std::shared_ptr interpretSubquery( else { auto table_id = context.resolveStorageID(table_expression); - const auto & storage = DatabaseCatalog::instance().getTable(table_id); + const auto & storage = DatabaseCatalog::instance().getTable(table_id, context); columns = storage->getColumns().getOrdinary(); select_query->replaceDatabaseAndTable(table_id); } diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index ec0865c2fb5..61a741b5cf4 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -101,6 +101,7 @@ struct PartialSortingLessWithCollation } }; + void sortBlock(Block & block, const SortDescription & description, UInt64 limit) { if (!block) @@ -178,21 +179,47 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) if (need_collation) { - PartialSortingLessWithCollation less_with_collation(columns_with_sort_desc); + EqualRanges ranges; + ranges.emplace_back(0, perm.size()); + for (const auto& column : columns_with_sort_desc) + { + while (!ranges.empty() && limit && limit <= ranges.back().first) + ranges.pop_back(); - if (limit) - std::partial_sort(perm.begin(), perm.begin() + limit, perm.end(), less_with_collation); - else - pdqsort(perm.begin(), perm.end(), less_with_collation); + + if (ranges.empty()) + break; + + + if (isCollationRequired(column.description)) + { + const ColumnString & column_string = assert_cast(*column.column); + column_string.updatePermutationWithCollation(*column.description.collator, column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); + } + else + { + column.column->updatePermutation( + column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); + } + } } else { - PartialSortingLess less(columns_with_sort_desc); - - if (limit) - std::partial_sort(perm.begin(), perm.begin() + limit, perm.end(), less); - else - pdqsort(perm.begin(), perm.end(), less); + EqualRanges ranges; + ranges.emplace_back(0, perm.size()); + for (const auto& column : columns_with_sort_desc) + { + while (!ranges.empty() && limit && limit <= ranges.back().first) + { + ranges.pop_back(); + } + if (ranges.empty()) + { + break; + } + column.column->updatePermutation( + column.description.direction < 0, limit, column.description.nulls_direction, perm, ranges); + } } size_t columns = block.columns(); diff --git a/src/Interpreters/tests/CMakeLists.txt b/src/Interpreters/tests/CMakeLists.txt index 26ebf007e6c..19d302d2b30 100644 --- a/src/Interpreters/tests/CMakeLists.txt +++ b/src/Interpreters/tests/CMakeLists.txt @@ -51,10 +51,6 @@ add_executable (in_join_subqueries_preprocessor in_join_subqueries_preprocessor. target_link_libraries (in_join_subqueries_preprocessor PRIVATE dbms clickhouse_parsers) add_check(in_join_subqueries_preprocessor) -add_executable (expression_analyzer expression_analyzer.cpp) -target_link_libraries (expression_analyzer PRIVATE dbms clickhouse_storages_system clickhouse_parsers clickhouse_common_io) -add_check(expression_analyzer) - add_executable (users users.cpp) target_link_libraries (users PRIVATE dbms clickhouse_common_config) diff --git a/src/Interpreters/tests/expression_analyzer.cpp b/src/Interpreters/tests/expression_analyzer.cpp deleted file mode 100644 index 987b5c763e6..00000000000 --- a/src/Interpreters/tests/expression_analyzer.cpp +++ /dev/null @@ -1,128 +0,0 @@ -#include - -#include -#include -#include - -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include - - -using namespace DB; - -namespace DB -{ - namespace ErrorCodes - { - extern const int SYNTAX_ERROR; - } -} - -struct TestEntry -{ - String query; - std::unordered_map expected_aliases; /// alias -> AST.getID() - NamesAndTypesList source_columns = {}; - - bool check(const Context & context) - { - ASTPtr ast = parse(query); - - auto res = SyntaxAnalyzer(context).analyze(ast, source_columns); - return checkAliases(*res); - } - -private: - bool checkAliases(const SyntaxAnalyzerResult & res) - { - for (const auto & alias : res.aliases) - { - const String & alias_name = alias.first; - if (expected_aliases.count(alias_name) == 0 || - expected_aliases[alias_name] != alias.second->getID()) - { - std::cout << "unexpected alias: " << alias_name << ' ' << alias.second->getID() << std::endl; - return false; - } - else - expected_aliases.erase(alias_name); - } - - if (!expected_aliases.empty()) - { - std::cout << "missing aliases: " << expected_aliases.size() << std::endl; - return false; - } - - return true; - } - - static ASTPtr parse(const std::string & query) - { - ParserSelectQuery parser; - std::string message; - const auto * text = query.data(); - if (ASTPtr ast = tryParseQuery(parser, text, text + query.size(), message, false, "", false, 0, 0)) - return ast; - throw Exception(message, ErrorCodes::SYNTAX_ERROR); - } -}; - - -int main() -{ - std::vector queries = - { - { - "SELECT number AS n FROM system.numbers LIMIT 0", - {{"n", "Identifier_number"}}, - { NameAndTypePair("number", std::make_shared()) } - }, - - { - "SELECT number AS n FROM system.numbers LIMIT 0", - {{"n", "Identifier_number"}} - } - }; - - SharedContextHolder shared_context = Context::createShared(); - Context context = Context::createGlobal(shared_context.get()); - context.makeGlobalContext(); - - auto system_database = std::make_shared("system"); - DatabaseCatalog::instance().attachDatabase("system", system_database); - //context.setCurrentDatabase("system"); - system_database->attachTable("one", StorageSystemOne::create("one"), {}); - system_database->attachTable("numbers", StorageSystemNumbers::create(StorageID("system", "numbers"), false), {}); - - size_t success = 0; - for (auto & entry : queries) - { - try - { - if (entry.check(context)) - { - ++success; - std::cout << "[OK] " << entry.query << std::endl; - } - else - std::cout << "[Failed] " << entry.query << std::endl; - } - catch (Exception & e) - { - std::cout << "[Error] " << entry.query << std::endl << e.displayText() << std::endl; - } - } - - return success != queries.size(); -} diff --git a/src/Interpreters/tests/hash_map3.cpp b/src/Interpreters/tests/hash_map3.cpp index 2207edc6cc1..1b4ce8eac77 100644 --- a/src/Interpreters/tests/hash_map3.cpp +++ b/src/Interpreters/tests/hash_map3.cpp @@ -66,7 +66,8 @@ int main(int, char **) UInt64, SimpleHash, Grower, - HashTableAllocatorWithStackMemory<4 * 24>>; + HashTableAllocatorWithStackMemory< + 4 * sizeof(HashMapCell)>>; Map map; diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 9ec2fad5768..f323f66be17 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -181,13 +181,13 @@ void ASTAlterCommand::formatImpl( settings.ostr << " TO "; switch (move_destination_type) { - case PartDestinationType::DISK: + case DataDestinationType::DISK: settings.ostr << "DISK "; break; - case PartDestinationType::VOLUME: + case DataDestinationType::VOLUME: settings.ostr << "VOLUME "; break; - case PartDestinationType::TABLE: + case DataDestinationType::TABLE: settings.ostr << "TABLE "; if (!to_database.empty()) { @@ -201,7 +201,7 @@ void ASTAlterCommand::formatImpl( default: break; } - if (move_destination_type != PartDestinationType::TABLE) + if (move_destination_type != DataDestinationType::TABLE) { settings.ostr << quoteString(move_destination_name); } diff --git a/src/Parsers/ASTAlterQuery.h b/src/Parsers/ASTAlterQuery.h index 85e9a4d7552..a9ae06863a9 100644 --- a/src/Parsers/ASTAlterQuery.h +++ b/src/Parsers/ASTAlterQuery.h @@ -136,7 +136,7 @@ public: bool if_exists = false; /// option for DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN - PartDestinationType move_destination_type; /// option for MOVE PART/PARTITION + DataDestinationType move_destination_type; /// option for MOVE PART/PARTITION String move_destination_name; /// option for MOVE PART/PARTITION diff --git a/src/Parsers/ASTCreateUserQuery.cpp b/src/Parsers/ASTCreateUserQuery.cpp index c8e2a76dfa2..e5c1178285b 100644 --- a/src/Parsers/ASTCreateUserQuery.cpp +++ b/src/Parsers/ASTCreateUserQuery.cpp @@ -6,6 +6,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + + namespace { void formatRenameTo(const String & new_name, const IAST::FormatSettings & settings) @@ -15,27 +21,51 @@ namespace } - void formatAuthentication(const Authentication & authentication, const IAST::FormatSettings & settings) + void formatAuthentication(const Authentication & authentication, bool show_password, const IAST::FormatSettings & settings) { - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED WITH " << (settings.hilite ? IAST::hilite_none : ""); - switch (authentication.getType()) + auto authentication_type = authentication.getType(); + if (authentication_type == Authentication::NO_PASSWORD) { - case Authentication::Type::NO_PASSWORD: - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "no_password" << (settings.hilite ? IAST::hilite_none : ""); - break; - case Authentication::Type::PLAINTEXT_PASSWORD: - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "plaintext_password BY " << (settings.hilite ? IAST::hilite_none : "") - << quoteString(authentication.getPassword()); - break; - case Authentication::Type::SHA256_PASSWORD: - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "sha256_hash BY " << (settings.hilite ? IAST::hilite_none : "") - << quoteString(authentication.getPasswordHashHex()); - break; - case Authentication::Type::DOUBLE_SHA1_PASSWORD: - settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << "double_sha1_hash BY " << (settings.hilite ? IAST::hilite_none : "") - << quoteString(authentication.getPasswordHashHex()); - break; + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " NOT IDENTIFIED" + << (settings.hilite ? IAST::hilite_none : ""); + return; } + + String authentication_type_name = Authentication::TypeInfo::get(authentication_type).name; + std::optional password; + + if (show_password) + { + switch (authentication_type) + { + case Authentication::PLAINTEXT_PASSWORD: + { + password = authentication.getPassword(); + break; + } + case Authentication::SHA256_PASSWORD: + { + authentication_type_name = "sha256_hash"; + password = authentication.getPasswordHashHex(); + break; + } + case Authentication::DOUBLE_SHA1_PASSWORD: + { + authentication_type_name = "double_sha1_hash"; + password = authentication.getPasswordHashHex(); + break; + } + + case Authentication::NO_PASSWORD: [[fallthrough]]; + case Authentication::MAX_TYPE: + throw Exception("AST: Unexpected authentication type " + toString(authentication_type), ErrorCodes::LOGICAL_ERROR); + } + } + + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " IDENTIFIED WITH " << authentication_type_name + << (settings.hilite ? IAST::hilite_none : ""); + if (password) + settings.ostr << (settings.hilite ? IAST::hilite_keyword : "") << " BY " << quoteString(*password); } @@ -190,7 +220,7 @@ void ASTCreateUserQuery::formatImpl(const FormatSettings & format, FormatState & formatRenameTo(new_name, format); if (authentication) - formatAuthentication(*authentication, format); + formatAuthentication(*authentication, show_password, format); if (hosts) formatHosts(nullptr, *hosts, format); diff --git a/src/Parsers/ASTCreateUserQuery.h b/src/Parsers/ASTCreateUserQuery.h index 54dc51d783b..28ef6c059da 100644 --- a/src/Parsers/ASTCreateUserQuery.h +++ b/src/Parsers/ASTCreateUserQuery.h @@ -12,14 +12,14 @@ class ASTExtendedRoleSet; class ASTSettingsProfileElements; /** CREATE USER [IF NOT EXISTS | OR REPLACE] name - * [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}] + * [NOT IDENTIFIED | IDENTIFIED [WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash}] BY {'password'|'hash'}] * [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [DEFAULT ROLE role [,...]] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] * * ALTER USER [IF EXISTS] name * [RENAME TO new_name] - * [IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD}] BY {'password'|'hash'}] + * [NOT IDENTIFIED | IDENTIFIED [WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash}] BY {'password'|'hash'}] * [[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] @@ -38,6 +38,7 @@ public: String new_name; std::optional authentication; + bool show_password = true; /// formatImpl() will show the password or hash. std::optional hosts; std::optional add_hosts; diff --git a/src/Parsers/ASTExtendedRoleSet.h b/src/Parsers/ASTExtendedRoleSet.h index 8d619e5d6a0..656f563bd9a 100644 --- a/src/Parsers/ASTExtendedRoleSet.h +++ b/src/Parsers/ASTExtendedRoleSet.h @@ -15,7 +15,10 @@ public: bool all = false; Strings except_names; bool except_current_user = false; - bool id_mode = false; /// If true then `names` and `except_names` keeps UUIDs, not names. + + bool id_mode = false; /// true if `names` and `except_names` keep UUIDs, not names. + bool can_contain_roles = true; /// true if this set can contain names of roles. + bool can_contain_users = true; /// true if this set can contain names of users. bool empty() const { return names.empty() && !current_user && !all; } void replaceCurrentUserTagWithName(const String & current_user_name); diff --git a/src/Parsers/ASTTTLElement.cpp b/src/Parsers/ASTTTLElement.cpp index 7e03a73e36d..1635d376d30 100644 --- a/src/Parsers/ASTTTLElement.cpp +++ b/src/Parsers/ASTTTLElement.cpp @@ -7,21 +7,90 @@ namespace DB { +ASTPtr ASTTTLElement::clone() const +{ + auto clone = std::make_shared(*this); + clone->children.clear(); + clone->ttl_expr_pos = -1; + clone->where_expr_pos = -1; + + clone->setExpression(clone->ttl_expr_pos, getExpression(ttl_expr_pos, true)); + clone->setExpression(clone->where_expr_pos, getExpression(where_expr_pos, true)); + + for (auto & expr : clone->group_by_key) + expr = expr->clone(); + for (auto & [name, expr] : clone->group_by_aggregations) + expr = expr->clone(); + + return clone; +} + void ASTTTLElement::formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const { - children.front()->formatImpl(settings, state, frame); - if (destination_type == PartDestinationType::DISK) + ttl()->formatImpl(settings, state, frame); + if (mode == TTLMode::MOVE && destination_type == DataDestinationType::DISK) { settings.ostr << " TO DISK " << quoteString(destination_name); } - else if (destination_type == PartDestinationType::VOLUME) + else if (mode == TTLMode::MOVE && destination_type == DataDestinationType::VOLUME) { settings.ostr << " TO VOLUME " << quoteString(destination_name); } - else if (destination_type == PartDestinationType::DELETE) + else if (mode == TTLMode::GROUP_BY) + { + settings.ostr << " GROUP BY "; + for (auto it = group_by_key.begin(); it != group_by_key.end(); ++it) + { + if (it != group_by_key.begin()) + settings.ostr << ", "; + (*it)->formatImpl(settings, state, frame); + } + if (!group_by_aggregations.empty()) + { + settings.ostr << " SET "; + for (auto it = group_by_aggregations.begin(); it != group_by_aggregations.end(); ++it) + { + if (it != group_by_aggregations.begin()) + settings.ostr << ", "; + settings.ostr << it->first << " = "; + it->second->formatImpl(settings, state, frame); + } + } + } + else if (mode == TTLMode::DELETE) { /// It would be better to output "DELETE" here but that will break compatibility with earlier versions. } + + if (where()) + { + settings.ostr << " WHERE "; + where()->formatImpl(settings, state, frame); + } +} + +void ASTTTLElement::setExpression(int & pos, ASTPtr && ast) +{ + if (ast) + { + if (pos == -1) + { + pos = children.size(); + children.emplace_back(ast); + } + else + children[pos] = ast; + } + else if (pos != -1) + { + children[pos] = ASTPtr{}; + pos = -1; + } +} + +ASTPtr ASTTTLElement::getExpression(int pos, bool clone) const +{ + return pos != -1 ? (clone ? children[pos]->clone() : children[pos]) : ASTPtr{}; } } diff --git a/src/Parsers/ASTTTLElement.h b/src/Parsers/ASTTTLElement.h index 02f70094e04..7ee1f4795ff 100644 --- a/src/Parsers/ASTTTLElement.h +++ b/src/Parsers/ASTTTLElement.h @@ -1,36 +1,54 @@ #pragma once #include -#include +#include +#include namespace DB { + /** Element of TTL expression. */ class ASTTTLElement : public IAST { public: - PartDestinationType destination_type; + TTLMode mode; + DataDestinationType destination_type; String destination_name; - ASTTTLElement(PartDestinationType destination_type_, const String & destination_name_) - : destination_type(destination_type_) + ASTs group_by_key; + std::vector> group_by_aggregations; + + ASTTTLElement(TTLMode mode_, DataDestinationType destination_type_, const String & destination_name_) + : mode(mode_) + , destination_type(destination_type_) , destination_name(destination_name_) + , ttl_expr_pos(-1) + , where_expr_pos(-1) { } String getID(char) const override { return "TTLElement"; } - ASTPtr clone() const override - { - auto clone = std::make_shared(*this); - clone->cloneChildren(); - return clone; - } + ASTPtr clone() const override; + + const ASTPtr ttl() const { return getExpression(ttl_expr_pos); } + const ASTPtr where() const { return getExpression(where_expr_pos); } + + void setTTL(ASTPtr && ast) { setExpression(ttl_expr_pos, std::forward(ast)); } + void setWhere(ASTPtr && ast) { setExpression(where_expr_pos, std::forward(ast)); } protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; + +private: + int ttl_expr_pos; + int where_expr_pos; + +private: + void setExpression(int & pos, ASTPtr && ast); + ASTPtr getExpression(int pos, bool clone = false) const; }; } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 70a8b282a72..785e6b25f98 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1455,23 +1455,50 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_to_disk("TO DISK"); ParserKeyword s_to_volume("TO VOLUME"); ParserKeyword s_delete("DELETE"); + ParserKeyword s_where("WHERE"); + ParserKeyword s_group_by("GROUP BY"); + ParserKeyword s_set("SET"); + ParserToken s_comma(TokenType::Comma); + ParserToken s_eq(TokenType::Equals); + + ParserIdentifier parser_identifier; ParserStringLiteral parser_string_literal; ParserExpression parser_exp; + ParserExpressionList parser_expression_list(false); - ASTPtr expr_elem; - if (!parser_exp.parse(pos, expr_elem, expected)) + ASTPtr ttl_expr; + if (!parser_exp.parse(pos, ttl_expr, expected)) return false; - PartDestinationType destination_type = PartDestinationType::DELETE; + TTLMode mode; + DataDestinationType destination_type = DataDestinationType::DELETE; String destination_name; - if (s_to_disk.ignore(pos)) - destination_type = PartDestinationType::DISK; - else if (s_to_volume.ignore(pos)) - destination_type = PartDestinationType::VOLUME; - else - s_delete.ignore(pos); - if (destination_type == PartDestinationType::DISK || destination_type == PartDestinationType::VOLUME) + if (s_to_disk.ignore(pos)) + { + mode = TTLMode::MOVE; + destination_type = DataDestinationType::DISK; + } + else if (s_to_volume.ignore(pos)) + { + mode = TTLMode::MOVE; + destination_type = DataDestinationType::VOLUME; + } + else if (s_group_by.ignore(pos)) + { + mode = TTLMode::GROUP_BY; + } + else + { + s_delete.ignore(pos); + mode = TTLMode::DELETE; + } + + ASTPtr where_expr; + ASTPtr ast_group_by_key; + std::vector> group_by_aggregations; + + if (mode == TTLMode::MOVE) { ASTPtr ast_space_name; if (!parser_string_literal.parse(pos, ast_space_name, expected)) @@ -1479,10 +1506,52 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) destination_name = ast_space_name->as().value.get(); } + else if (mode == TTLMode::GROUP_BY) + { + if (!parser_expression_list.parse(pos, ast_group_by_key, expected)) + return false; - node = std::make_shared(destination_type, destination_name); - node->children.push_back(expr_elem); + if (s_set.ignore(pos)) + { + while (true) + { + if (!group_by_aggregations.empty() && !s_comma.ignore(pos)) + break; + ASTPtr name; + ASTPtr value; + if (!parser_identifier.parse(pos, name, expected)) + return false; + if (!s_eq.ignore(pos)) + return false; + if (!parser_exp.parse(pos, value, expected)) + return false; + + String name_str; + if (!tryGetIdentifierNameInto(name, name_str)) + return false; + group_by_aggregations.emplace_back(name_str, std::move(value)); + } + } + } + else if (mode == TTLMode::DELETE && s_where.ignore(pos)) + { + if (!parser_exp.parse(pos, where_expr, expected)) + return false; + } + + auto ttl_element = std::make_shared(mode, destination_type, destination_name); + ttl_element->setTTL(std::move(ttl_expr)); + if (where_expr) + ttl_element->setWhere(std::move(where_expr)); + + if (mode == TTLMode::GROUP_BY) + { + ttl_element->group_by_key = std::move(ast_group_by_key->children); + ttl_element->group_by_aggregations = std::move(group_by_aggregations); + } + + node = ttl_element; return true; } diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index eb9f1462666..8ee4154541b 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -87,6 +87,12 @@ size_t IAST::checkDepthImpl(size_t max_depth, size_t level) const return res; } +std::string IAST::formatForErrorMessage() const +{ + std::stringstream ss; + format(FormatSettings(ss, true /* one line */)); + return ss.str(); +} void IAST::cloneChildren() { diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index 246b88f8c24..88dedc54d3f 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -9,6 +9,7 @@ #include #include #include +#include class SipHash; @@ -215,6 +216,11 @@ public: throw Exception("Unknown element in AST: " + getID(), ErrorCodes::UNKNOWN_ELEMENT_IN_AST); } + // A simple way to add some user-readable context to an error message. + std::string formatForErrorMessage() const; + template + static std::string formatForErrorMessage(const AstArray & array); + void cloneChildren(); public: @@ -231,4 +237,19 @@ private: size_t checkDepthImpl(size_t max_depth, size_t level) const; }; +template +std::string IAST::formatForErrorMessage(const AstArray & array) +{ + std::stringstream ss; + for (size_t i = 0; i < array.size(); ++i) + { + if (i > 0) + { + ss << ", "; + } + array[i]->format(IAST::FormatSettings(ss, true /* one line */)); + } + return ss.str(); +} + } diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index 623bca440bb..f90d010e9de 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -260,19 +260,19 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->part = true; if (s_to_disk.ignore(pos)) - command->move_destination_type = PartDestinationType::DISK; + command->move_destination_type = DataDestinationType::DISK; else if (s_to_volume.ignore(pos)) - command->move_destination_type = PartDestinationType::VOLUME; + command->move_destination_type = DataDestinationType::VOLUME; else if (s_to_table.ignore(pos)) { if (!parseDatabaseAndTableName(pos, expected, command->to_database, command->to_table)) return false; - command->move_destination_type = PartDestinationType::TABLE; + command->move_destination_type = DataDestinationType::TABLE; } else return false; - if (command->move_destination_type != PartDestinationType::TABLE) + if (command->move_destination_type != DataDestinationType::TABLE) { ASTPtr ast_space_name; if (!parser_string_literal.parse(pos, ast_space_name, expected)) @@ -289,19 +289,19 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected command->type = ASTAlterCommand::MOVE_PARTITION; if (s_to_disk.ignore(pos)) - command->move_destination_type = PartDestinationType::DISK; + command->move_destination_type = DataDestinationType::DISK; else if (s_to_volume.ignore(pos)) - command->move_destination_type = PartDestinationType::VOLUME; + command->move_destination_type = DataDestinationType::VOLUME; else if (s_to_table.ignore(pos)) { if (!parseDatabaseAndTableName(pos, expected, command->to_database, command->to_table)) return false; - command->move_destination_type = PartDestinationType::TABLE; + command->move_destination_type = DataDestinationType::TABLE; } else return false; - if (command->move_destination_type != PartDestinationType::TABLE) + if (command->move_destination_type != DataDestinationType::TABLE) { ASTPtr ast_space_name; if (!parser_string_literal.parse(pos, ast_space_name, expected)) diff --git a/src/Parsers/ParserCreateUserQuery.cpp b/src/Parsers/ParserCreateUserQuery.cpp index 76a06a0282f..3bf7e508220 100644 --- a/src/Parsers/ParserCreateUserQuery.cpp +++ b/src/Parsers/ParserCreateUserQuery.cpp @@ -35,100 +35,74 @@ namespace } - bool parseByPassword(IParserBase::Pos & pos, Expected & expected, String & password) - { - return IParserBase::wrapParseImpl(pos, [&] - { - if (!ParserKeyword{"BY"}.ignore(pos, expected)) - return false; - - ASTPtr ast; - if (!ParserStringLiteral{}.parse(pos, ast, expected)) - return false; - - password = ast->as().value.safeGet(); - return true; - }); - } - - bool parseAuthentication(IParserBase::Pos & pos, Expected & expected, std::optional & authentication) { return IParserBase::wrapParseImpl(pos, [&] { + if (ParserKeyword{"NOT IDENTIFIED"}.ignore(pos, expected)) + { + authentication = Authentication{Authentication::NO_PASSWORD}; + return true; + } + if (!ParserKeyword{"IDENTIFIED"}.ignore(pos, expected)) return false; - if (!ParserKeyword{"WITH"}.ignore(pos, expected)) + std::optional type; + bool expect_password = false; + bool expect_hash = false; + + if (ParserKeyword{"WITH"}.ignore(pos, expected)) { - String password; - if (!parseByPassword(pos, expected, password)) + for (auto check_type : ext::range(Authentication::MAX_TYPE)) + { + if (ParserKeyword{Authentication::TypeInfo::get(check_type).raw_name}.ignore(pos, expected)) + { + type = check_type; + expect_password = (check_type != Authentication::NO_PASSWORD); + break; + } + } + + if (!type) + { + if (ParserKeyword{"SHA256_HASH"}.ignore(pos, expected)) + { + type = Authentication::SHA256_PASSWORD; + expect_hash = true; + } + else if (ParserKeyword{"DOUBLE_SHA1_HASH"}.ignore(pos, expected)) + { + type = Authentication::DOUBLE_SHA1_PASSWORD; + expect_hash = true; + } + else + return false; + } + } + + if (!type) + { + type = Authentication::SHA256_PASSWORD; + expect_password = true; + } + + String password; + if (expect_password || expect_hash) + { + ASTPtr ast; + if (!ParserKeyword{"BY"}.ignore(pos, expected) || !ParserStringLiteral{}.parse(pos, ast, expected)) return false; - authentication = Authentication{Authentication::SHA256_PASSWORD}; + password = ast->as().value.safeGet(); + } + + authentication = Authentication{*type}; + if (expect_password) authentication->setPassword(password); - return true; - } + else if (expect_hash) + authentication->setPasswordHashHex(password); - if (ParserKeyword{"PLAINTEXT_PASSWORD"}.ignore(pos, expected)) - { - String password; - if (!parseByPassword(pos, expected, password)) - return false; - - authentication = Authentication{Authentication::PLAINTEXT_PASSWORD}; - authentication->setPassword(password); - return true; - } - - if (ParserKeyword{"SHA256_PASSWORD"}.ignore(pos, expected)) - { - String password; - if (!parseByPassword(pos, expected, password)) - return false; - - authentication = Authentication{Authentication::SHA256_PASSWORD}; - authentication->setPassword(password); - return true; - } - - if (ParserKeyword{"SHA256_HASH"}.ignore(pos, expected)) - { - String hash; - if (!parseByPassword(pos, expected, hash)) - return false; - - authentication = Authentication{Authentication::SHA256_PASSWORD}; - authentication->setPasswordHashHex(hash); - return true; - } - - if (ParserKeyword{"DOUBLE_SHA1_PASSWORD"}.ignore(pos, expected)) - { - String password; - if (!parseByPassword(pos, expected, password)) - return false; - - authentication = Authentication{Authentication::DOUBLE_SHA1_PASSWORD}; - authentication->setPassword(password); - return true; - } - - if (ParserKeyword{"DOUBLE_SHA1_HASH"}.ignore(pos, expected)) - { - String hash; - if (!parseByPassword(pos, expected, hash)) - return false; - - authentication = Authentication{Authentication::DOUBLE_SHA1_PASSWORD}; - authentication->setPasswordHashHex(hash); - return true; - } - - if (!ParserKeyword{"NO_PASSWORD"}.ignore(pos, expected)) - return false; - - authentication = Authentication{Authentication::NO_PASSWORD}; return true; }); } @@ -227,6 +201,7 @@ namespace return false; default_roles = typeid_cast>(ast); + default_roles->can_contain_users = false; return true; }); } diff --git a/src/Parsers/ParserCreateUserQuery.h b/src/Parsers/ParserCreateUserQuery.h index d609894a7ec..2a890f41060 100644 --- a/src/Parsers/ParserCreateUserQuery.h +++ b/src/Parsers/ParserCreateUserQuery.h @@ -7,13 +7,13 @@ namespace DB { /** Parses queries like * CREATE USER [IF NOT EXISTS | OR REPLACE] name - * [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}] + * [NOT IDENTIFIED | IDENTIFIED [WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash}] BY {'password'|'hash'}] * [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] * * ALTER USER [IF EXISTS] name * [RENAME TO new_name] - * [IDENTIFIED [WITH {PLAINTEXT_PASSWORD|SHA256_PASSWORD|DOUBLE_SHA1_PASSWORD}] BY {'password'|'hash'}] + * [NOT IDENTIFIED | IDENTIFIED [WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash}] BY {'password'|'hash'}] * [[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] */ diff --git a/src/Parsers/ParserSetRoleQuery.cpp b/src/Parsers/ParserSetRoleQuery.cpp index e6ff7893891..a69480f89eb 100644 --- a/src/Parsers/ParserSetRoleQuery.cpp +++ b/src/Parsers/ParserSetRoleQuery.cpp @@ -18,6 +18,7 @@ namespace return false; roles = typeid_cast>(ast); + roles->can_contain_users = false; return true; }); } @@ -34,6 +35,7 @@ namespace return false; to_users = typeid_cast>(ast); + to_users->can_contain_roles = false; return true; }); } diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index 38f77f90abb..e61443dcfad 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -565,7 +565,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, size_t num_threads, st { /// First, find any processor to execute. /// Just travers graph and prepare any processor. - while (!finished) + while (!finished && state == nullptr) { { std::unique_lock lock(task_queue_mutex); diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 003508ab86f..49e85fcc6d3 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -14,9 +14,10 @@ struct PullingAsyncPipelineExecutor::Data { PipelineExecutorPtr executor; std::exception_ptr exception; - std::atomic_bool is_executed = false; + std::atomic_bool is_finished = false; std::atomic_bool has_exception = false; ThreadFromGlobalPool thread; + Poco::Event finish_event; ~Data() { @@ -36,8 +37,11 @@ struct PullingAsyncPipelineExecutor::Data PullingAsyncPipelineExecutor::PullingAsyncPipelineExecutor(QueryPipeline & pipeline_) : pipeline(pipeline_) { - lazy_format = std::make_shared(pipeline.getHeader()); - pipeline.setOutput(lazy_format); + if (!pipeline.isCompleted()) + { + lazy_format = std::make_shared(pipeline.getHeader()); + pipeline.setOutputFormat(lazy_format); + } } PullingAsyncPipelineExecutor::~PullingAsyncPipelineExecutor() @@ -54,7 +58,8 @@ PullingAsyncPipelineExecutor::~PullingAsyncPipelineExecutor() const Block & PullingAsyncPipelineExecutor::getHeader() const { - return lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader(); + return lazy_format ? lazy_format->getPort(IOutputFormat::PortKind::Main).getHeader() + : pipeline.getHeader(); /// Empty. } static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGroupStatusPtr thread_group, size_t num_threads) @@ -78,6 +83,9 @@ static void threadFunction(PullingAsyncPipelineExecutor::Data & data, ThreadGrou data.exception = std::current_exception(); data.has_exception = true; } + + data.is_finished = true; + data.finish_event.set(); } @@ -99,20 +107,33 @@ bool PullingAsyncPipelineExecutor::pull(Chunk & chunk, uint64_t milliseconds) if (data->has_exception) { /// Finish lazy format in case of exception. Otherwise thread.join() may hung. - lazy_format->finish(); + if (lazy_format) + lazy_format->finish(); + data->has_exception = false; std::rethrow_exception(std::move(data->exception)); } - if (lazy_format->isFinished()) + bool is_execution_finished = lazy_format ? lazy_format->isFinished() + : data->is_finished.load(); + + if (is_execution_finished) { - data->is_executed = true; + /// If lazy format is finished, we don't cancel pipeline but wait for main thread to be finished. + data->is_finished = true; /// Wait thread ant rethrow exception if any. cancel(); return false; } - chunk = lazy_format->getChunk(milliseconds); + if (lazy_format) + { + chunk = lazy_format->getChunk(milliseconds); + return true; + } + + chunk.clear(); + data->finish_event.tryWait(milliseconds); return true; } @@ -147,11 +168,11 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) void PullingAsyncPipelineExecutor::cancel() { /// Cancel execution if it wasn't finished. - if (data && !data->is_executed && data->executor) + if (data && !data->is_finished && data->executor) data->executor->cancel(); /// Finish lazy format. Otherwise thread.join() may hung. - if (!lazy_format->isFinished()) + if (lazy_format && !lazy_format->isFinished()) lazy_format->finish(); /// Join thread here to wait for possible exception. @@ -165,12 +186,14 @@ void PullingAsyncPipelineExecutor::cancel() Chunk PullingAsyncPipelineExecutor::getTotals() { - return lazy_format->getTotals(); + return lazy_format ? lazy_format->getTotals() + : Chunk(); } Chunk PullingAsyncPipelineExecutor::getExtremes() { - return lazy_format->getExtremes(); + return lazy_format ? lazy_format->getExtremes() + : Chunk(); } Block PullingAsyncPipelineExecutor::getTotalsBlock() @@ -197,7 +220,9 @@ Block PullingAsyncPipelineExecutor::getExtremesBlock() BlockStreamProfileInfo & PullingAsyncPipelineExecutor::getProfileInfo() { - return lazy_format->getProfileInfo(); + static BlockStreamProfileInfo profile_info; + return lazy_format ? lazy_format->getProfileInfo() + : profile_info; } } diff --git a/src/Processors/Executors/PullingPipelineExecutor.cpp b/src/Processors/Executors/PullingPipelineExecutor.cpp index 375f6c9ed0e..af061a373cc 100644 --- a/src/Processors/Executors/PullingPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingPipelineExecutor.cpp @@ -9,7 +9,7 @@ namespace DB PullingPipelineExecutor::PullingPipelineExecutor(QueryPipeline & pipeline_) : pipeline(pipeline_) { pulling_format = std::make_shared(pipeline.getHeader(), has_data_flag); - pipeline.setOutput(pulling_format); + pipeline.setOutputFormat(pulling_format); } PullingPipelineExecutor::~PullingPipelineExecutor() diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index df73c93fd55..2873a5417ea 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -16,12 +16,12 @@ namespace DB namespace ErrorCodes { - extern const int BAD_ARGUMENTS; + extern const int UNKNOWN_EXCEPTION; extern const int CANNOT_READ_ALL_DATA; } -ArrowBlockInputFormat::ArrowBlockInputFormat(ReadBuffer & in_, const Block & header_) - : IInputFormat(header_, in_) +ArrowBlockInputFormat::ArrowBlockInputFormat(ReadBuffer & in_, const Block & header_, bool stream_) + : IInputFormat(header_, in_), stream{stream_} { prepareReader(); } @@ -30,12 +30,23 @@ Chunk ArrowBlockInputFormat::generate() { Chunk res; const Block & header = getPort().getHeader(); - - if (record_batch_current >= record_batch_total) - return res; - std::vector> single_batch(1); - arrow::Status read_status = file_reader->ReadRecordBatch(record_batch_current, &single_batch[0]); + arrow::Status read_status; + + if (stream) + { + read_status = stream_reader->ReadNext(&single_batch[0]); + if (!single_batch[0]) + return res; + } + else + { + if (record_batch_current >= record_batch_total) + return res; + + read_status = file_reader->ReadRecordBatch(record_batch_current, &single_batch[0]); + } + if (!read_status.ok()) throw Exception{"Error while reading batch of Arrow data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; @@ -57,30 +68,54 @@ void ArrowBlockInputFormat::resetParser() { IInputFormat::resetParser(); - file_reader.reset(); + if (stream) + stream_reader.reset(); + else + file_reader.reset(); prepareReader(); } void ArrowBlockInputFormat::prepareReader() { - arrow::Status open_status = arrow::ipc::RecordBatchFileReader::Open(asArrowFile(in), &file_reader); - if (!open_status.ok()) - throw Exception(open_status.ToString(), ErrorCodes::BAD_ARGUMENTS); - record_batch_total = file_reader->num_record_batches(); + arrow::Status status; + + if (stream) + status = arrow::ipc::RecordBatchStreamReader::Open(asArrowFile(in), &stream_reader); + else + status = arrow::ipc::RecordBatchFileReader::Open(asArrowFile(in), &file_reader); + + if (!status.ok()) + throw Exception{"Error while opening a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION}; + + if (stream) + record_batch_total = -1; + else + record_batch_total = file_reader->num_record_batches(); + record_batch_current = 0; } void registerInputFormatProcessorArrow(FormatFactory &factory) { factory.registerInputFormatProcessor( - "Arrow", - [](ReadBuffer & buf, - const Block & sample, - const RowInputFormatParams & /* params */, - const FormatSettings & /* format_settings */) - { - return std::make_shared(buf, sample); - }); + "Arrow", + [](ReadBuffer & buf, + const Block & sample, + const RowInputFormatParams & /* params */, + const FormatSettings & /* format_settings */) + { + return std::make_shared(buf, sample, false); + }); + + factory.registerInputFormatProcessor( + "ArrowStream", + [](ReadBuffer & buf, + const Block & sample, + const RowInputFormatParams & /* params */, + const FormatSettings & /* format_settings */) + { + return std::make_shared(buf, sample, true); + }); } } diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h index 28ca033dd13..5ad112efde9 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h @@ -4,6 +4,7 @@ #include +namespace arrow { class RecordBatchReader; } namespace arrow::ipc { class RecordBatchFileReader; } namespace DB @@ -14,7 +15,7 @@ class ReadBuffer; class ArrowBlockInputFormat : public IInputFormat { public: - ArrowBlockInputFormat(ReadBuffer & in_, const Block & header_); + ArrowBlockInputFormat(ReadBuffer & in_, const Block & header_, bool stream_); void resetParser() override; @@ -24,12 +25,17 @@ protected: Chunk generate() override; private: - void prepareReader(); - -private: + // Whether to use ArrowStream format + bool stream; + // This field is only used for ArrowStream format + std::shared_ptr stream_reader; + // The following fields are used only for Arrow format std::shared_ptr file_reader; + int record_batch_total = 0; int record_batch_current = 0; + + void prepareReader(); }; } diff --git a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp index ed33eb08728..ef6bd315319 100644 --- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp @@ -15,8 +15,8 @@ namespace ErrorCodes extern const int UNKNOWN_EXCEPTION; } -ArrowBlockOutputFormat::ArrowBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) - : IOutputFormat(header_, out_), format_settings{format_settings_}, arrow_ostream{std::make_shared(out_)} +ArrowBlockOutputFormat::ArrowBlockOutputFormat(WriteBuffer & out_, const Block & header_, bool stream_, const FormatSettings & format_settings_) + : IOutputFormat(header_, out_), stream{stream_}, format_settings{format_settings_}, arrow_ostream{std::make_shared(out_)} { } @@ -29,12 +29,7 @@ void ArrowBlockOutputFormat::consume(Chunk chunk) CHColumnToArrowColumn::chChunkToArrowTable(arrow_table, header, chunk, columns_num, "Arrow"); if (!writer) - { - // TODO: should we use arrow::ipc::IpcOptions::alignment? - auto status = arrow::ipc::RecordBatchFileWriter::Open(arrow_ostream.get(), arrow_table->schema(), &writer); - if (!status.ok()) - throw Exception{"Error while opening a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION}; - } + prepareWriter(arrow_table->schema()); // TODO: calculate row_group_size depending on a number of rows and table size auto status = writer->WriteTable(*arrow_table, format_settings.arrow.row_group_size); @@ -53,6 +48,20 @@ void ArrowBlockOutputFormat::finalize() } } +void ArrowBlockOutputFormat::prepareWriter(const std::shared_ptr & schema) +{ + arrow::Status status; + + // TODO: should we use arrow::ipc::IpcOptions::alignment? + if (stream) + status = arrow::ipc::RecordBatchStreamWriter::Open(arrow_ostream.get(), schema, &writer); + else + status = arrow::ipc::RecordBatchFileWriter::Open(arrow_ostream.get(), schema, &writer); + + if (!status.ok()) + throw Exception{"Error while opening a table writer: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION}; +} + void registerOutputFormatProcessorArrow(FormatFactory & factory) { factory.registerOutputFormatProcessor( @@ -62,7 +71,17 @@ void registerOutputFormatProcessorArrow(FormatFactory & factory) FormatFactory::WriteCallback, const FormatSettings & format_settings) { - return std::make_shared(buf, sample, format_settings); + return std::make_shared(buf, sample, false, format_settings); + }); + + factory.registerOutputFormatProcessor( + "ArrowStream", + [](WriteBuffer & buf, + const Block & sample, + FormatFactory::WriteCallback, + const FormatSettings & format_settings) + { + return std::make_shared(buf, sample, true, format_settings); }); } diff --git a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h index 9ea84f83a0d..0cc6804705b 100644 --- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h @@ -6,6 +6,7 @@ #include #include "ArrowBufferedStreams.h" +namespace arrow { class Schema; } namespace arrow::ipc { class RecordBatchWriter; } namespace DB @@ -14,7 +15,7 @@ namespace DB class ArrowBlockOutputFormat : public IOutputFormat { public: - ArrowBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_); + ArrowBlockOutputFormat(WriteBuffer & out_, const Block & header_, bool stream_, const FormatSettings & format_settings_); String getName() const override { return "ArrowBlockOutputFormat"; } void consume(Chunk) override; @@ -23,9 +24,12 @@ public: String getContentType() const override { return "application/octet-stream"; } private: + bool stream; const FormatSettings format_settings; std::shared_ptr arrow_ostream; std::shared_ptr writer; + + void prepareWriter(const std::shared_ptr & schema); }; } diff --git a/src/Processors/Pipe.h b/src/Processors/Pipe.h index 984fa7605c6..ec5514915a7 100644 --- a/src/Processors/Pipe.h +++ b/src/Processors/Pipe.h @@ -23,7 +23,7 @@ public: /// Will connect pipes outputs with transform inputs automatically. Pipe(Pipes && pipes, ProcessorPtr transform); /// Create pipe from output port. If pipe was created that way, it possibly will not have tree shape. - Pipe(OutputPort * port); + explicit Pipe(OutputPort * port); Pipe(const Pipe & other) = delete; Pipe(Pipe && other) = default; diff --git a/src/Processors/QueryPipeline.cpp b/src/Processors/QueryPipeline.cpp index 13787a3fd3b..92c91a81b8a 100644 --- a/src/Processors/QueryPipeline.cpp +++ b/src/Processors/QueryPipeline.cpp @@ -34,6 +34,14 @@ void QueryPipeline::checkInitialized() throw Exception("QueryPipeline wasn't initialized.", ErrorCodes::LOGICAL_ERROR); } +void QueryPipeline::checkInitializedAndNotCompleted() +{ + checkInitialized(); + + if (streams.empty()) + throw Exception("QueryPipeline was already completed.", ErrorCodes::LOGICAL_ERROR); +} + void QueryPipeline::checkSource(const ProcessorPtr & source, bool can_have_totals) { if (!source->getInputs().empty()) @@ -194,11 +202,11 @@ static ProcessorPtr callProcessorGetter( template void QueryPipeline::addSimpleTransformImpl(const TProcessorGetter & getter) { - checkInitialized(); + checkInitializedAndNotCompleted(); Block header; - auto add_transform = [&](OutputPort *& stream, StreamType stream_type, size_t stream_num [[maybe_unused]] = IProcessor::NO_STREAM) + auto add_transform = [&](OutputPort *& stream, StreamType stream_type) { if (!stream) return; @@ -231,17 +239,14 @@ void QueryPipeline::addSimpleTransformImpl(const TProcessorGetter & getter) if (transform) { -// if (stream_type == StreamType::Main) -// transform->setStream(stream_num); - connect(*stream, transform->getInputs().front()); stream = &transform->getOutputs().front(); processors.emplace_back(std::move(transform)); } }; - for (size_t stream_num = 0; stream_num < streams.size(); ++stream_num) - add_transform(streams[stream_num], StreamType::Main, stream_num); + for (auto & stream : streams) + add_transform(stream, StreamType::Main); add_transform(totals_having_port, StreamType::Totals); add_transform(extremes_port, StreamType::Extremes); @@ -259,9 +264,50 @@ void QueryPipeline::addSimpleTransform(const ProcessorGetterWithStreamKind & get addSimpleTransformImpl(getter); } +void QueryPipeline::setSinks(const ProcessorGetterWithStreamKind & getter) +{ + checkInitializedAndNotCompleted(); + + auto add_transform = [&](OutputPort *& stream, StreamType stream_type) + { + if (!stream) + return; + + auto transform = getter(stream->getHeader(), stream_type); + + if (transform) + { + if (transform->getInputs().size() != 1) + throw Exception("Sink for query pipeline transform should have single input, " + "but " + transform->getName() + " has " + + toString(transform->getInputs().size()) + " inputs.", ErrorCodes::LOGICAL_ERROR); + + if (!transform->getOutputs().empty()) + throw Exception("Sink for query pipeline transform should have no outputs, " + "but " + transform->getName() + " has " + + toString(transform->getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR); + } + + if (!transform) + transform = std::make_shared(stream->getHeader()); + + connect(*stream, transform->getInputs().front()); + processors.emplace_back(std::move(transform)); + }; + + for (auto & stream : streams) + add_transform(stream, StreamType::Main); + + add_transform(totals_having_port, StreamType::Totals); + add_transform(extremes_port, StreamType::Extremes); + + streams.clear(); + current_header.clear(); +} + void QueryPipeline::addPipe(Processors pipe) { - checkInitialized(); + checkInitializedAndNotCompleted(); if (pipe.empty()) throw Exception("Can't add empty processors list to QueryPipeline.", ErrorCodes::LOGICAL_ERROR); @@ -298,7 +344,7 @@ void QueryPipeline::addPipe(Processors pipe) void QueryPipeline::addDelayedStream(ProcessorPtr source) { - checkInitialized(); + checkInitializedAndNotCompleted(); checkSource(source, false); assertBlocksHaveEqualStructure(current_header, source->getOutputs().front().getHeader(), "QueryPipeline"); @@ -313,7 +359,7 @@ void QueryPipeline::addDelayedStream(ProcessorPtr source) void QueryPipeline::resize(size_t num_streams, bool force, bool strict) { - checkInitialized(); + checkInitializedAndNotCompleted(); if (!force && num_streams == getNumStreams()) return; @@ -347,7 +393,7 @@ void QueryPipeline::enableQuotaForCurrentStreams() void QueryPipeline::addTotalsHavingTransform(ProcessorPtr transform) { - checkInitialized(); + checkInitializedAndNotCompleted(); if (!typeid_cast(transform.get())) throw Exception("TotalsHavingTransform expected for QueryPipeline::addTotalsHavingTransform.", @@ -370,7 +416,7 @@ void QueryPipeline::addTotalsHavingTransform(ProcessorPtr transform) void QueryPipeline::addDefaultTotals() { - checkInitialized(); + checkInitializedAndNotCompleted(); if (totals_having_port) throw Exception("Totals having transform was already added to pipeline.", ErrorCodes::LOGICAL_ERROR); @@ -392,7 +438,7 @@ void QueryPipeline::addDefaultTotals() void QueryPipeline::addTotals(ProcessorPtr source) { - checkInitialized(); + checkInitializedAndNotCompleted(); if (totals_having_port) throw Exception("Totals having transform was already added to pipeline.", ErrorCodes::LOGICAL_ERROR); @@ -423,7 +469,7 @@ void QueryPipeline::dropTotalsAndExtremes() void QueryPipeline::addExtremesTransform() { - checkInitialized(); + checkInitializedAndNotCompleted(); if (extremes_port) throw Exception("Extremes transform was already added to pipeline.", ErrorCodes::LOGICAL_ERROR); @@ -450,7 +496,7 @@ void QueryPipeline::addExtremesTransform() void QueryPipeline::addCreatingSetsTransform(ProcessorPtr transform) { - checkInitialized(); + checkInitializedAndNotCompleted(); if (!typeid_cast(transform.get())) throw Exception("CreatingSetsTransform expected for QueryPipeline::addExtremesTransform.", @@ -467,14 +513,14 @@ void QueryPipeline::addCreatingSetsTransform(ProcessorPtr transform) processors.emplace_back(std::move(concat)); } -void QueryPipeline::setOutput(ProcessorPtr output) +void QueryPipeline::setOutputFormat(ProcessorPtr output) { - checkInitialized(); + checkInitializedAndNotCompleted(); auto * format = dynamic_cast(output.get()); if (!format) - throw Exception("IOutputFormat processor expected for QueryPipeline::setOutput.", ErrorCodes::LOGICAL_ERROR); + throw Exception("IOutputFormat processor expected for QueryPipeline::setOutputFormat.", ErrorCodes::LOGICAL_ERROR); if (output_format) throw Exception("QueryPipeline already has output.", ErrorCodes::LOGICAL_ERROR); @@ -507,19 +553,25 @@ void QueryPipeline::setOutput(ProcessorPtr output) connect(*totals_having_port, totals); connect(*extremes_port, extremes); + streams.clear(); + current_header.clear(); + extremes_port = nullptr; + totals_having_port = nullptr; + initRowsBeforeLimit(); } void QueryPipeline::unitePipelines( std::vector && pipelines, const Block & common_header) { - checkInitialized(); - - addSimpleTransform([&](const Block & header) + if (initialized()) { - return std::make_shared( - header, common_header, ConvertingTransform::MatchColumnsMode::Position); - }); + addSimpleTransform([&](const Block & header) + { + return std::make_shared( + header, common_header, ConvertingTransform::MatchColumnsMode::Position); + }); + } std::vector extremes; std::vector totals; @@ -534,11 +586,14 @@ void QueryPipeline::unitePipelines( { pipeline.checkInitialized(); - pipeline.addSimpleTransform([&](const Block & header) + if (!pipeline.isCompleted()) { - return std::make_shared( - header, common_header, ConvertingTransform::MatchColumnsMode::Position); - }); + pipeline.addSimpleTransform([&](const Block & header) + { + return std::make_shared( + header, common_header, ConvertingTransform::MatchColumnsMode::Position); + }); + } if (pipeline.extremes_port) { @@ -703,6 +758,11 @@ void QueryPipeline::initRowsBeforeLimit() Pipe QueryPipeline::getPipe() && { resize(1); + return std::move(std::move(*this).getPipes()[0]); +} + +Pipes QueryPipeline::getPipes() && +{ Pipe pipe(std::move(processors), streams.at(0), totals_having_port, extremes_port); pipe.max_parallel_streams = streams.maxParallelStreams(); @@ -721,15 +781,19 @@ Pipe QueryPipeline::getPipe() && if (extremes_port) pipe.setExtremesPort(extremes_port); - return pipe; + Pipes pipes; + pipes.emplace_back(std::move(pipe)); + + for (size_t i = 1; i < streams.size(); ++i) + pipes.emplace_back(Pipe(streams[i])); + + return pipes; } PipelineExecutorPtr QueryPipeline::execute() { - checkInitialized(); - - if (!output_format) - throw Exception("Cannot execute pipeline because it doesn't have output.", ErrorCodes::LOGICAL_ERROR); + if (!isCompleted()) + throw Exception("Cannot execute pipeline because it is not completed.", ErrorCodes::LOGICAL_ERROR); return std::make_shared(processors, process_list_element); } diff --git a/src/Processors/QueryPipeline.h b/src/Processors/QueryPipeline.h index 45e38ffa715..129b7f5ae3c 100644 --- a/src/Processors/QueryPipeline.h +++ b/src/Processors/QueryPipeline.h @@ -28,6 +28,7 @@ private: { public: auto size() const { return data.size(); } + bool empty() const { return size() == 0; } auto begin() { return data.begin(); } auto end() { return data.end(); } auto & front() { return data.front(); } @@ -81,6 +82,7 @@ public: void init(Pipes pipes); void init(Pipe pipe); /// Simple init for single pipe bool initialized() { return !processors.empty(); } + bool isCompleted() { return initialized() && streams.empty(); } /// Type of logical data stream for simple transform. /// Sometimes it's important to know which part of pipeline we are working for. @@ -95,13 +97,23 @@ public: using ProcessorGetter = std::function; using ProcessorGetterWithStreamKind = std::function; + /// Add transform with simple input and simple output for each port. void addSimpleTransform(const ProcessorGetter & getter); void addSimpleTransform(const ProcessorGetterWithStreamKind & getter); + /// Add several processors. They must have same header for inputs and same for outputs. + /// Total number of inputs must be the same as the number of streams. Output ports will become new streams. void addPipe(Processors pipe); + /// Add TotalsHavingTransform. Resize pipeline to single input. Adds totals port. void addTotalsHavingTransform(ProcessorPtr transform); + /// Add transform which calculates extremes. This transform adds extremes port and doesn't change inputs number. void addExtremesTransform(); + /// Adds transform which creates sets. It will be executed before reading any data from input ports. void addCreatingSetsTransform(ProcessorPtr transform); - void setOutput(ProcessorPtr output); + /// Resize pipeline to single output and add IOutputFormat. Pipeline will be completed after this transformation. + void setOutputFormat(ProcessorPtr output); + /// Sink is a processor with single input port and no output ports. Creates sink for each output port. + /// Pipeline will be completed after this transformation. + void setSinks(const ProcessorGetterWithStreamKind & getter); /// Add totals which returns one chunk with single row with defaults. void addDefaultTotals(); @@ -118,6 +130,7 @@ public: /// Check if resize transform was used. (In that case another distinct transform will be added). bool hasMixedStreams() const { return has_resize || hasMoreThanOneStream(); } + /// Changes the number of input ports if needed. Adds ResizeTransform. void resize(size_t num_streams, bool force = false, bool strict = false); void enableQuotaForCurrentStreams(); @@ -155,8 +168,9 @@ public: /// Set upper limit for the recommend number of threads void setMaxThreads(size_t max_threads_) { max_threads = max_threads_; } - /// Convert query pipeline to single pipe. + /// Convert query pipeline to single or several pipes. Pipe getPipe() &&; + Pipes getPipes() &&; private: /// Destruction order: processors, header, locks, temporary storages, local contexts @@ -193,6 +207,7 @@ private: QueryStatus * process_list_element = nullptr; void checkInitialized(); + void checkInitializedAndNotCompleted(); static void checkSource(const ProcessorPtr & source, bool can_have_totals); template diff --git a/src/Processors/ResizeProcessor.cpp b/src/Processors/ResizeProcessor.cpp index 3fcf4281105..d652a342150 100644 --- a/src/Processors/ResizeProcessor.cpp +++ b/src/Processors/ResizeProcessor.cpp @@ -347,11 +347,16 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in auto & waiting_output = output_ports[input_with_data.waiting_output]; - if (waiting_output.status != OutputStatus::NeedData) - throw Exception("Invalid status for associated output.", ErrorCodes::LOGICAL_ERROR); + if (waiting_output.status == OutputStatus::NotActive) + throw Exception("Invalid status NotActive for associated output.", ErrorCodes::LOGICAL_ERROR); - waiting_output.port->pushData(input_with_data.port->pullData(/* set_not_needed = */ true)); - waiting_output.status = OutputStatus::NotActive; + if (waiting_output.status != OutputStatus::Finished) + { + waiting_output.port->pushData(input_with_data.port->pullData(/* set_not_needed = */ true)); + waiting_output.status = OutputStatus::NotActive; + } + else + abandoned_chunks.emplace_back(input_with_data.port->pullData(/* set_not_needed = */ true)); if (input_with_data.port->isFinished()) { @@ -370,6 +375,18 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in return Status::Finished; } + /// Process abandoned chunks if any. + while (!abandoned_chunks.empty() && !waiting_outputs.empty()) + { + auto & waiting_output = output_ports[waiting_outputs.front()]; + waiting_outputs.pop(); + + waiting_output.port->pushData(std::move(abandoned_chunks.back())); + abandoned_chunks.pop_back(); + + waiting_output.status = OutputStatus::NotActive; + } + /// Enable more inputs if needed. while (!disabled_input_ports.empty() && !waiting_outputs.empty()) { @@ -383,6 +400,7 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in waiting_outputs.pop(); } + /// Close all other waiting for data outputs (there is no corresponding input for them). while (!waiting_outputs.empty()) { auto & output = output_ports[waiting_outputs.front()]; diff --git a/src/Processors/ResizeProcessor.h b/src/Processors/ResizeProcessor.h index 1f364ffcf15..5c9660af113 100644 --- a/src/Processors/ResizeProcessor.h +++ b/src/Processors/ResizeProcessor.h @@ -128,6 +128,9 @@ private: std::vector input_ports; std::vector output_ports; + /// This field contained chunks which were read for output which had became finished while reading was happening. + /// They will be pushed to any next waiting output. + std::vector abandoned_chunks; }; } diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index 018614f0165..f68a415117b 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -11,6 +12,38 @@ PartialSortingTransform::PartialSortingTransform( { } +static ColumnRawPtrs extractColumns(const Block & block, const SortDescription & description) +{ + size_t size = description.size(); + ColumnRawPtrs res; + res.reserve(size); + + for (size_t i = 0; i < size; ++i) + { + const IColumn * column = !description[i].column_name.empty() + ? block.getByName(description[i].column_name).column.get() + : block.safeGetByPosition(description[i].column_number).column.get(); + res.emplace_back(column); + } + + return res; +} + +bool less(const ColumnRawPtrs & lhs, UInt64 lhs_row_num, + const ColumnRawPtrs & rhs, UInt64 rhs_row_num, const SortDescription & description) +{ + size_t size = description.size(); + for (size_t i = 0; i < size; ++i) + { + int res = description[i].direction * lhs[i]->compareAt(lhs_row_num, rhs_row_num, *rhs[i], 1); + if (res < 0) + return true; + else if (res > 0) + return false; + } + return false; +} + void PartialSortingTransform::transform(Chunk & chunk) { if (read_rows) @@ -19,7 +52,42 @@ void PartialSortingTransform::transform(Chunk & chunk) auto block = getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()); chunk.clear(); + ColumnRawPtrs block_columns; + UInt64 rows_num = block.rows(); + + if (!threshold_block_columns.empty()) + { + IColumn::Filter filter(rows_num, 0); + block_columns = extractColumns(block, description); + size_t filtered_count = 0; + + for (UInt64 i = 0; i < rows_num; ++i) + { + if (less(threshold_block_columns, limit - 1, block_columns, i, description)) + { + ++filtered_count; + filter[i] = 1; + } + } + + if (filtered_count) + { + for (auto & column : block.getColumns()) + { + column = column->filter(filter, filtered_count); + } + } + } + sortBlock(block, description, limit); + + if (limit && limit < block.rows() && + (threshold_block_columns.empty() || less(block_columns, limit - 1, threshold_block_columns, limit - 1, description))) + { + threshold_block = block.cloneWithColumns(block.getColumns()); + threshold_block_columns = extractColumns(threshold_block, description); + } + chunk.setColumns(block.getColumns(), block.rows()); } diff --git a/src/Processors/Transforms/PartialSortingTransform.h b/src/Processors/Transforms/PartialSortingTransform.h index 47ac90c6904..d6749e4dfad 100644 --- a/src/Processors/Transforms/PartialSortingTransform.h +++ b/src/Processors/Transforms/PartialSortingTransform.h @@ -29,6 +29,8 @@ private: SortDescription description; UInt64 limit; RowsBeforeLimitCounterPtr read_rows; + Block threshold_block; + ColumnRawPtrs threshold_block_columns; }; } diff --git a/src/Server/CMakeLists.txt b/src/Server/CMakeLists.txt new file mode 100644 index 00000000000..e69de29bb2d diff --git a/programs/server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp similarity index 100% rename from programs/server/HTTPHandler.cpp rename to src/Server/HTTPHandler.cpp diff --git a/programs/server/HTTPHandler.h b/src/Server/HTTPHandler.h similarity index 97% rename from programs/server/HTTPHandler.h rename to src/Server/HTTPHandler.h index 6228523d343..b1a6355d281 100644 --- a/programs/server/HTTPHandler.h +++ b/src/Server/HTTPHandler.h @@ -6,6 +6,7 @@ #include #include +#include #include @@ -21,7 +22,7 @@ namespace DB class WriteBufferFromHTTPServerResponse; -typedef std::shared_ptr CompiledRegexPtr; +using CompiledRegexPtr = std::shared_ptr; class HTTPHandler : public Poco::Net::HTTPRequestHandler { diff --git a/programs/server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp similarity index 99% rename from programs/server/HTTPHandlerFactory.cpp rename to src/Server/HTTPHandlerFactory.cpp index f302216e22b..e916070be22 100644 --- a/programs/server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include "HTTPHandler.h" #include "NotFoundHandler.h" diff --git a/programs/server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h similarity index 100% rename from programs/server/HTTPHandlerFactory.h rename to src/Server/HTTPHandlerFactory.h diff --git a/programs/server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h similarity index 99% rename from programs/server/HTTPHandlerRequestFilter.h rename to src/Server/HTTPHandlerRequestFilter.h index b0b748506e5..f952efd7653 100644 --- a/programs/server/HTTPHandlerRequestFilter.h +++ b/src/Server/HTTPHandlerRequestFilter.h @@ -6,6 +6,7 @@ #include #include #include +#include #include diff --git a/programs/server/IServer.h b/src/Server/IServer.h similarity index 82% rename from programs/server/IServer.h rename to src/Server/IServer.h index 29e9bc16a75..131e7443646 100644 --- a/programs/server/IServer.h +++ b/src/Server/IServer.h @@ -1,14 +1,22 @@ #pragma once -#include -#include +namespace Poco +{ -#include +namespace Util +{ +class LayeredConfiguration; +} +class Logger; + +} namespace DB { +class Context; + class IServer { public: diff --git a/programs/server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp similarity index 99% rename from programs/server/InterserverIOHTTPHandler.cpp rename to src/Server/InterserverIOHTTPHandler.cpp index 4b733c7f1fd..062721a01aa 100644 --- a/programs/server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/programs/server/InterserverIOHTTPHandler.h b/src/Server/InterserverIOHTTPHandler.h similarity index 100% rename from programs/server/InterserverIOHTTPHandler.h rename to src/Server/InterserverIOHTTPHandler.h diff --git a/programs/server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp similarity index 99% rename from programs/server/MySQLHandler.cpp rename to src/Server/MySQLHandler.cpp index dabb0c12208..51b3d7eaef5 100644 --- a/programs/server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -253,7 +253,7 @@ void MySQLHandler::comFieldList(ReadBuffer & payload) ComFieldList packet; packet.readPayload(payload); String database = connection_context.getCurrentDatabase(); - StoragePtr table_ptr = DatabaseCatalog::instance().getTable({database, packet.table}); + StoragePtr table_ptr = DatabaseCatalog::instance().getTable({database, packet.table}, connection_context); for (const NameAndTypePair & column: table_ptr->getColumns().getAll()) { ColumnDefinition column_definition( diff --git a/programs/server/MySQLHandler.h b/src/Server/MySQLHandler.h similarity index 100% rename from programs/server/MySQLHandler.h rename to src/Server/MySQLHandler.h diff --git a/programs/server/MySQLHandlerFactory.cpp b/src/Server/MySQLHandlerFactory.cpp similarity index 99% rename from programs/server/MySQLHandlerFactory.cpp rename to src/Server/MySQLHandlerFactory.cpp index 022167fe766..5d78ed81068 100644 --- a/programs/server/MySQLHandlerFactory.cpp +++ b/src/Server/MySQLHandlerFactory.cpp @@ -4,8 +4,7 @@ #include #include #include -#include "IServer.h" -#include "MySQLHandler.h" +#include #if USE_SSL # include diff --git a/programs/server/MySQLHandlerFactory.h b/src/Server/MySQLHandlerFactory.h similarity index 95% rename from programs/server/MySQLHandlerFactory.h rename to src/Server/MySQLHandlerFactory.h index 74f0bb35a40..df7bd794b16 100644 --- a/programs/server/MySQLHandlerFactory.h +++ b/src/Server/MySQLHandlerFactory.h @@ -2,7 +2,8 @@ #include #include -#include "IServer.h" +#include +#include #if !defined(ARCADIA_BUILD) # include diff --git a/programs/server/NotFoundHandler.cpp b/src/Server/NotFoundHandler.cpp similarity index 100% rename from programs/server/NotFoundHandler.cpp rename to src/Server/NotFoundHandler.cpp diff --git a/programs/server/NotFoundHandler.h b/src/Server/NotFoundHandler.h similarity index 100% rename from programs/server/NotFoundHandler.h rename to src/Server/NotFoundHandler.h diff --git a/programs/server/PrometheusMetricsWriter.cpp b/src/Server/PrometheusMetricsWriter.cpp similarity index 100% rename from programs/server/PrometheusMetricsWriter.cpp rename to src/Server/PrometheusMetricsWriter.cpp diff --git a/programs/server/PrometheusMetricsWriter.h b/src/Server/PrometheusMetricsWriter.h similarity index 100% rename from programs/server/PrometheusMetricsWriter.h rename to src/Server/PrometheusMetricsWriter.h diff --git a/programs/server/PrometheusRequestHandler.cpp b/src/Server/PrometheusRequestHandler.cpp similarity index 95% rename from programs/server/PrometheusRequestHandler.cpp rename to src/Server/PrometheusRequestHandler.cpp index b5a48d13b64..43f39e36de8 100644 --- a/programs/server/PrometheusRequestHandler.cpp +++ b/src/Server/PrometheusRequestHandler.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include diff --git a/programs/server/PrometheusRequestHandler.h b/src/Server/PrometheusRequestHandler.h similarity index 100% rename from programs/server/PrometheusRequestHandler.h rename to src/Server/PrometheusRequestHandler.h diff --git a/programs/server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp similarity index 96% rename from programs/server/ReplicasStatusHandler.cpp rename to src/Server/ReplicasStatusHandler.cpp index f2d1ffe2ee5..986af59d3a4 100644 --- a/programs/server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -44,7 +44,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request if (db.second->getEngineName() == "Lazy") continue; - for (auto iterator = db.second->getTablesIterator(); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) { const auto & table = iterator->table(); StorageReplicatedMergeTree * table_replicated = dynamic_cast(table.get()); diff --git a/programs/server/ReplicasStatusHandler.h b/src/Server/ReplicasStatusHandler.h similarity index 100% rename from programs/server/ReplicasStatusHandler.h rename to src/Server/ReplicasStatusHandler.h diff --git a/programs/server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp similarity index 100% rename from programs/server/StaticRequestHandler.cpp rename to src/Server/StaticRequestHandler.cpp diff --git a/programs/server/StaticRequestHandler.h b/src/Server/StaticRequestHandler.h similarity index 93% rename from programs/server/StaticRequestHandler.h rename to src/Server/StaticRequestHandler.h index bdbc17f5e00..707087df24d 100644 --- a/programs/server/StaticRequestHandler.h +++ b/src/Server/StaticRequestHandler.h @@ -4,6 +4,8 @@ #include #include +#include +#include namespace DB diff --git a/programs/server/TCPHandler.cpp b/src/Server/TCPHandler.cpp similarity index 99% rename from programs/server/TCPHandler.cpp rename to src/Server/TCPHandler.cpp index 190c549361f..6e9275540e5 100644 --- a/programs/server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -262,8 +262,8 @@ void TCPHandler::runImpl() else if (state.need_receive_data_for_input) { /// It is special case for input(), all works for reading data from client will be done in callbacks. - /// state.io.in is NullAndDoCopyBlockInputStream so read it once. - state.io.in->read(); + auto executor = state.io.pipeline.execute(); + executor->execute(state.io.pipeline.getNumThreads()); state.io.onFinish(); } else if (state.io.pipeline.initialized()) @@ -474,7 +474,7 @@ void TCPHandler::processInsertQuery(const Settings & connection_settings) if (query_context->getSettingsRef().input_format_defaults_for_omitted_fields) { if (!table_id.empty()) - sendTableColumns(DatabaseCatalog::instance().getTable(table_id)->getColumns()); + sendTableColumns(DatabaseCatalog::instance().getTable(table_id, *query_context)->getColumns()); } } @@ -627,7 +627,7 @@ void TCPHandler::processTablesStatusRequest() for (const QualifiedTableName & table_name: request.tables) { auto resolved_id = connection_context.tryResolveStorageID({table_name.database, table_name.table}); - StoragePtr table = DatabaseCatalog::instance().tryGetTable(resolved_id); + StoragePtr table = DatabaseCatalog::instance().tryGetTable(resolved_id, connection_context); if (!table) continue; @@ -944,11 +944,11 @@ bool TCPHandler::receiveData(bool scalar) StoragePtr storage; /// If such a table does not exist, create it. if (resolved) - storage = DatabaseCatalog::instance().getTable(resolved); + storage = DatabaseCatalog::instance().getTable(resolved, *query_context); else { NamesAndTypesList columns = block.getNamesAndTypesList(); - auto temporary_table = TemporaryTableHolder(*query_context, ColumnsDescription{columns}); + auto temporary_table = TemporaryTableHolder(*query_context, ColumnsDescription{columns}, {}); storage = temporary_table.getTable(); query_context->addExternalTable(temporary_id.table_name, std::move(temporary_table)); } diff --git a/programs/server/TCPHandler.h b/src/Server/TCPHandler.h similarity index 100% rename from programs/server/TCPHandler.h rename to src/Server/TCPHandler.h diff --git a/programs/server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h similarity index 95% rename from programs/server/TCPHandlerFactory.h rename to src/Server/TCPHandlerFactory.h index 68652540192..a5532a8dc02 100644 --- a/programs/server/TCPHandlerFactory.h +++ b/src/Server/TCPHandlerFactory.h @@ -3,8 +3,8 @@ #include #include #include -#include "IServer.h" -#include "TCPHandler.h" +#include +#include namespace Poco { class Logger; } diff --git a/src/Server/ya.make b/src/Server/ya.make new file mode 100644 index 00000000000..1d689ee73b8 --- /dev/null +++ b/src/Server/ya.make @@ -0,0 +1,22 @@ +LIBRARY() + +PEERDIR( + clickhouse/src/Common + contrib/libs/poco/Util +) + +SRCS( + HTTPHandler.cpp + HTTPHandlerFactory.cpp + InterserverIOHTTPHandler.cpp + MySQLHandler.cpp + MySQLHandlerFactory.cpp + NotFoundHandler.cpp + PrometheusMetricsWriter.cpp + PrometheusRequestHandler.cpp + ReplicasStatusHandler.cpp + StaticRequestHandler.cpp + TCPHandler.cpp +) + +END() diff --git a/src/Storages/MergeTree/PartDestinationType.h b/src/Storages/DataDestinationType.h similarity index 73% rename from src/Storages/MergeTree/PartDestinationType.h rename to src/Storages/DataDestinationType.h index 8f3e44537a6..05d1d89c2b5 100644 --- a/src/Storages/MergeTree/PartDestinationType.h +++ b/src/Storages/DataDestinationType.h @@ -4,7 +4,7 @@ namespace DB { -enum class PartDestinationType +enum class DataDestinationType { DISK, VOLUME, diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index fdcbe8e69f9..4dd62db0965 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -110,8 +110,9 @@ void StorageDistributedDirectoryMonitor::flushAllData() { if (!quit) { + CurrentMetrics::Increment metric_pending_files{CurrentMetrics::DistributedFilesToInsert, 0}; std::unique_lock lock{mutex}; - processFiles(); + processFiles(metric_pending_files); } } @@ -131,6 +132,9 @@ void StorageDistributedDirectoryMonitor::run() { std::unique_lock lock{mutex}; + /// This metric will be updated with the number of pending files later. + CurrentMetrics::Increment metric_pending_files{CurrentMetrics::DistributedFilesToInsert, 0}; + bool do_sleep = false; while (!quit) { @@ -139,7 +143,7 @@ void StorageDistributedDirectoryMonitor::run() { try { - do_sleep = !processFiles(); + do_sleep = !processFiles(metric_pending_files); } catch (...) { @@ -222,7 +226,7 @@ ConnectionPoolPtr StorageDistributedDirectoryMonitor::createPool(const std::stri } -bool StorageDistributedDirectoryMonitor::processFiles() +bool StorageDistributedDirectoryMonitor::processFiles(CurrentMetrics::Increment & metric_pending_files) { std::map files; @@ -236,14 +240,16 @@ bool StorageDistributedDirectoryMonitor::processFiles() files[parse(file_path.getBaseName())] = file_path_str; } + /// Note: the value of this metric will be kept if this function will throw an exception. + /// This is needed, because in case of exception, files still pending. + metric_pending_files.changeTo(files.size()); + if (files.empty()) return false; - CurrentMetrics::Increment metric_increment{CurrentMetrics::DistributedFilesToInsert, CurrentMetrics::Value(files.size())}; - if (should_batch_inserts) { - processFilesWithBatching(files); + processFilesWithBatching(files, metric_pending_files); } else { @@ -252,14 +258,14 @@ bool StorageDistributedDirectoryMonitor::processFiles() if (quit) return true; - processFile(file.second); + processFile(file.second, metric_pending_files); } } return true; } -void StorageDistributedDirectoryMonitor::processFile(const std::string & file_path) +void StorageDistributedDirectoryMonitor::processFile(const std::string & file_path, CurrentMetrics::Increment & metric_pending_files) { LOG_TRACE(log, "Started processing `{}`", file_path); auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(storage.global_context->getSettingsRef()); @@ -289,6 +295,7 @@ void StorageDistributedDirectoryMonitor::processFile(const std::string & file_pa } Poco::File{file_path}.remove(); + metric_pending_files.sub(); LOG_TRACE(log, "Finished processing `{}`", file_path); } @@ -584,7 +591,9 @@ bool StorageDistributedDirectoryMonitor::scheduleAfter(size_t ms) return task_handle->scheduleAfter(ms, false); } -void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map & files) +void StorageDistributedDirectoryMonitor::processFilesWithBatching( + const std::map & files, + CurrentMetrics::Increment & metric_pending_files) { std::unordered_set file_indices_to_skip; @@ -596,6 +605,7 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map batch.readText(in); file_indices_to_skip.insert(batch.file_indices.begin(), batch.file_indices.end()); batch.send(); + metric_pending_files.sub(batch.file_indices.size()); } std::unordered_map header_to_batch; @@ -656,13 +666,17 @@ void StorageDistributedDirectoryMonitor::processFilesWithBatching(const std::map batch.total_bytes += total_bytes; if (batch.isEnoughSize()) + { batch.send(); + metric_pending_files.sub(batch.file_indices.size()); + } } for (auto & kv : header_to_batch) { Batch & batch = kv.second; batch.send(); + metric_pending_files.sub(batch.file_indices.size()); } /// current_batch.txt will not exist if there was no send diff --git a/src/Storages/Distributed/DirectoryMonitor.h b/src/Storages/Distributed/DirectoryMonitor.h index 77abf35630c..e2a913ee1ef 100644 --- a/src/Storages/Distributed/DirectoryMonitor.h +++ b/src/Storages/Distributed/DirectoryMonitor.h @@ -9,6 +9,8 @@ #include +namespace CurrentMetrics { class Increment; } + namespace DB { @@ -37,9 +39,9 @@ public: bool scheduleAfter(size_t ms); private: void run(); - bool processFiles(); - void processFile(const std::string & file_path); - void processFilesWithBatching(const std::map & files); + bool processFiles(CurrentMetrics::Increment & metric_pending_files); + void processFile(const std::string & file_path, CurrentMetrics::Increment & metric_pending_files); + void processFilesWithBatching(const std::map & files, CurrentMetrics::Increment & metric_pending_files); static bool isFileBrokenErrorCode(int code); void markAsBroken(const std::string & file_path) const; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index bc17efaafd7..2b3dce22c59 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -433,4 +434,177 @@ NamesAndTypesList IStorage::getVirtuals() const return {}; } +const StorageMetadataKeyField & IStorage::getPartitionKey() const +{ + return partition_key; +} + +void IStorage::setPartitionKey(const StorageMetadataKeyField & partition_key_) +{ + partition_key = partition_key_; +} + +bool IStorage::isPartitionKeyDefined() const +{ + return partition_key.definition_ast != nullptr; +} + +bool IStorage::hasPartitionKey() const +{ + return !partition_key.column_names.empty(); +} + +Names IStorage::getColumnsRequiredForPartitionKey() const +{ + if (hasPartitionKey()) + return partition_key.expression->getRequiredColumns(); + return {}; +} + +const StorageMetadataKeyField & IStorage::getSortingKey() const +{ + return sorting_key; +} + +void IStorage::setSortingKey(const StorageMetadataKeyField & sorting_key_) +{ + sorting_key = sorting_key_; +} + +bool IStorage::isSortingKeyDefined() const +{ + return sorting_key.definition_ast != nullptr; +} + +bool IStorage::hasSortingKey() const +{ + return !sorting_key.column_names.empty(); +} + +Names IStorage::getColumnsRequiredForSortingKey() const +{ + if (hasSortingKey()) + return sorting_key.expression->getRequiredColumns(); + return {}; +} + +Names IStorage::getSortingKeyColumns() const +{ + if (hasSortingKey()) + return sorting_key.column_names; + return {}; +} + +const StorageMetadataKeyField & IStorage::getPrimaryKey() const +{ + return primary_key; +} + +void IStorage::setPrimaryKey(const StorageMetadataKeyField & primary_key_) +{ + primary_key = primary_key_; +} + +bool IStorage::isPrimaryKeyDefined() const +{ + return primary_key.definition_ast != nullptr; +} + +bool IStorage::hasPrimaryKey() const +{ + return !primary_key.column_names.empty(); +} + +Names IStorage::getColumnsRequiredForPrimaryKey() const +{ + if (hasPrimaryKey()) + return primary_key.expression->getRequiredColumns(); + return {}; +} + +Names IStorage::getPrimaryKeyColumns() const +{ + if (hasSortingKey()) + return primary_key.column_names; + return {}; +} + +const StorageMetadataKeyField & IStorage::getSamplingKey() const +{ + return sampling_key; +} + +void IStorage::setSamplingKey(const StorageMetadataKeyField & sampling_key_) +{ + sampling_key = sampling_key_; +} + + +bool IStorage::isSamplingKeyDefined() const +{ + return sampling_key.definition_ast != nullptr; +} + +bool IStorage::hasSamplingKey() const +{ + return !sampling_key.column_names.empty(); +} + +Names IStorage::getColumnsRequiredForSampling() const +{ + if (hasSamplingKey()) + return sampling_key.expression->getRequiredColumns(); + return {}; +} + +const TTLTableDescription & IStorage::getTableTTLs() const +{ + return table_ttl; +} + +void IStorage::setTableTTLs(const TTLTableDescription & table_ttl_) +{ + table_ttl = table_ttl_; +} + +bool IStorage::hasAnyTableTTL() const +{ + return hasAnyMoveTTL() || hasRowsTTL(); +} + +const TTLColumnsDescription & IStorage::getColumnTTLs() const +{ + return column_ttls_by_name; +} + +void IStorage::setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_) +{ + column_ttls_by_name = column_ttls_by_name_; +} + +bool IStorage::hasAnyColumnTTL() const +{ + return !column_ttls_by_name.empty(); +} + +const TTLDescription & IStorage::getRowsTTL() const +{ + return table_ttl.rows_ttl; +} + +bool IStorage::hasRowsTTL() const +{ + return table_ttl.rows_ttl.expression != nullptr; +} + +const TTLDescriptions & IStorage::getMoveTTLs() const +{ + return table_ttl.move_ttl; +} + +bool IStorage::hasAnyMoveTTL() const +{ + return !table_ttl.move_ttl.empty(); +} + } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 8c36fb64f72..5e59d9f1420 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -82,7 +83,9 @@ class IStorage : public std::enable_shared_from_this, public TypePromo { public: IStorage() = delete; - explicit IStorage(StorageID storage_id_) : storage_id(std::move(storage_id_)) {} + /// Storage fields should be initialized in separate methods like setColumns + /// or setTableTTLs. + explicit IStorage(StorageID storage_id_) : storage_id(std::move(storage_id_)) {} //-V730 virtual ~IStorage() = default; IStorage(const IStorage &) = delete; @@ -101,7 +104,7 @@ public: virtual bool isView() const { return false; } /// Returns true if the storage supports queries with the SAMPLE section. - virtual bool supportsSampling() const { return false; } + virtual bool supportsSampling() const { return hasSamplingKey(); } /// Returns true if the storage supports queries with the FINAL section. virtual bool supportsFinal() const { return false; } @@ -130,10 +133,7 @@ public: virtual bool hasEvenlyDistributedRead() const { return false; } /// Returns true if there is set table TTL, any column TTL or any move TTL. - virtual bool hasAnyTTL() const { return false; } - - /// Returns true if there is set TTL for rows. - virtual bool hasRowsTTL() const { return false; } + virtual bool hasAnyTTL() const { return hasAnyColumnTTL() || hasAnyTableTTL(); } /// Optional size information of each physical column. /// Currently it's only used by the MergeTree family for query optimizations. @@ -195,10 +195,19 @@ protected: /// still thread-unsafe part. private: StorageID storage_id; mutable std::mutex id_mutex; + ColumnsDescription columns; IndicesDescription indices; ConstraintsDescription constraints; + StorageMetadataKeyField partition_key; + StorageMetadataKeyField primary_key; + StorageMetadataKeyField sorting_key; + StorageMetadataKeyField sampling_key; + + TTLColumnsDescription column_ttls_by_name; + TTLTableDescription table_ttl; + private: RWLockImpl::LockHolder tryLockTimed( const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const SettingSeconds & acquire_timeout) const; @@ -440,44 +449,100 @@ public: /// Returns data paths if storage supports it, empty vector otherwise. virtual Strings getDataPaths() const { return {}; } + /// Returns structure with partition key. + const StorageMetadataKeyField & getPartitionKey() const; + /// Set partition key for storage (methods bellow, are just wrappers for this + /// struct). + void setPartitionKey(const StorageMetadataKeyField & partition_key_); /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none. - virtual ASTPtr getPartitionKeyAST() const { return nullptr; } - - /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none. - virtual ASTPtr getSortingKeyAST() const { return nullptr; } - - /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none. - virtual ASTPtr getPrimaryKeyAST() const { return nullptr; } - - /// Returns sampling expression AST for storage or nullptr if there is none. - virtual ASTPtr getSamplingKeyAST() const { return nullptr; } - + ASTPtr getPartitionKeyAST() const { return partition_key.definition_ast; } + /// Storage has user-defined (in CREATE query) partition key. + bool isPartitionKeyDefined() const; + /// Storage has partition key. + bool hasPartitionKey() const; /// Returns column names that need to be read to calculate partition key. - virtual Names getColumnsRequiredForPartitionKey() const { return {}; } + Names getColumnsRequiredForPartitionKey() const; + + /// Returns structure with sorting key. + const StorageMetadataKeyField & getSortingKey() const; + /// Set sorting key for storage (methods bellow, are just wrappers for this + /// struct). + void setSortingKey(const StorageMetadataKeyField & sorting_key_); + /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none. + ASTPtr getSortingKeyAST() const { return sorting_key.definition_ast; } + /// Storage has user-defined (in CREATE query) sorting key. + bool isSortingKeyDefined() const; + /// Storage has sorting key. It means, that it contains at least one column. + bool hasSortingKey() const; /// Returns column names that need to be read to calculate sorting key. - virtual Names getColumnsRequiredForSortingKey() const { return {}; } - - /// Returns column names that need to be read to calculate primary key. - virtual Names getColumnsRequiredForPrimaryKey() const { return {}; } - - /// Returns column names that need to be read to calculate sampling key. - virtual Names getColumnsRequiredForSampling() const { return {}; } - - /// Returns column names that need to be read for FINAL to work. - virtual Names getColumnsRequiredForFinal() const { return {}; } - + Names getColumnsRequiredForSortingKey() const; /// Returns columns names in sorting key specified by user in ORDER BY /// expression. For example: 'a', 'x * y', 'toStartOfMonth(date)', etc. - virtual Names getSortingKeyColumns() const { return {}; } + Names getSortingKeyColumns() const; - /// Returns columns, which will be needed to calculate dependencies - /// (skip indices, TTL expressions) if we update @updated_columns set of columns. + /// Returns structure with primary key. + const StorageMetadataKeyField & getPrimaryKey() const; + /// Set primary key for storage (methods bellow, are just wrappers for this + /// struct). + void setPrimaryKey(const StorageMetadataKeyField & primary_key_); + /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none. + ASTPtr getPrimaryKeyAST() const { return primary_key.definition_ast; } + /// Storage has user-defined (in CREATE query) sorting key. + bool isPrimaryKeyDefined() const; + /// Storage has primary key (maybe part of some other key). It means, that + /// it contains at least one column. + bool hasPrimaryKey() const; + /// Returns column names that need to be read to calculate primary key. + Names getColumnsRequiredForPrimaryKey() const; + /// Returns columns names in sorting key specified by. For example: 'a', 'x + /// * y', 'toStartOfMonth(date)', etc. + Names getPrimaryKeyColumns() const; + + /// Returns structure with sampling key. + const StorageMetadataKeyField & getSamplingKey() const; + /// Set sampling key for storage (methods bellow, are just wrappers for this + /// struct). + void setSamplingKey(const StorageMetadataKeyField & sampling_key_); + /// Returns sampling expression AST for storage or nullptr if there is none. + ASTPtr getSamplingKeyAST() const { return sampling_key.definition_ast; } + /// Storage has user-defined (in CREATE query) sampling key. + bool isSamplingKeyDefined() const; + /// Storage has sampling key. + bool hasSamplingKey() const; + /// Returns column names that need to be read to calculate sampling key. + Names getColumnsRequiredForSampling() const; + + /// Returns column names that need to be read for FINAL to work. + Names getColumnsRequiredForFinal() const { return getColumnsRequiredForSortingKey(); } + + + /// Returns columns, which will be needed to calculate dependencies (skip + /// indices, TTL expressions) if we update @updated_columns set of columns. virtual ColumnDependencies getColumnDependencies(const NameSet & /* updated_columns */) const { return {}; } - /// Returns storage policy if storage supports it + /// Returns storage policy if storage supports it. virtual StoragePolicyPtr getStoragePolicy() const { return {}; } + /// Common tables TTLs (for rows and moves). + const TTLTableDescription & getTableTTLs() const; + void setTableTTLs(const TTLTableDescription & table_ttl_); + bool hasAnyTableTTL() const; + + /// Separate TTLs for columns. + const TTLColumnsDescription & getColumnTTLs() const; + void setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_); + bool hasAnyColumnTTL() const; + + /// Just wrapper for table TTLs, return rows part of table TTLs. + const TTLDescription & getRowsTTL() const; + bool hasRowsTTL() const; + + /// Just wrapper for table TTLs, return moves (to disks or volumes) parts of + /// table TTL. + const TTLDescriptions & getMoveTTLs() const; + bool hasAnyMoveTTL() const; + /// If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it. /// Used for: /// - Simple count() opimization diff --git a/src/Storages/Kafka/KafkaBlockInputStream.h b/src/Storages/Kafka/KafkaBlockInputStream.h index 1f94ee332d3..e3052122894 100644 --- a/src/Storages/Kafka/KafkaBlockInputStream.h +++ b/src/Storages/Kafka/KafkaBlockInputStream.h @@ -25,6 +25,7 @@ public: void readSuffixImpl() override; void commit(); + bool isStalled() const { return buffer->isStalled(); } private: StorageKafka & storage; diff --git a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp index 70340a93159..31a9f55350d 100644 --- a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp +++ b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.cpp @@ -15,6 +15,7 @@ namespace ErrorCodes using namespace std::chrono_literals; const auto MAX_TIME_TO_WAIT_FOR_ASSIGNMENT_MS = 15000; +const auto DRAIN_TIMEOUT_MS = 5000ms; ReadBufferFromKafkaConsumer::ReadBufferFromKafkaConsumer( @@ -80,9 +81,72 @@ ReadBufferFromKafkaConsumer::ReadBufferFromKafkaConsumer( }); } -// NOTE on removed desctuctor: There is no need to unsubscribe prior to calling rd_kafka_consumer_close(). -// check: https://github.com/edenhill/librdkafka/blob/master/INTRODUCTION.md#termination -// manual destruction was source of weird errors (hangs during droping kafka table, etc.) +ReadBufferFromKafkaConsumer::~ReadBufferFromKafkaConsumer() +{ + try + { + if (!consumer->get_subscription().empty()) + { + try + { + consumer->unsubscribe(); + } + catch (const cppkafka::HandleException & e) + { + LOG_ERROR(log, "Error during unsubscribe: {}", e.what()); + } + drain(); + } + } + catch (const cppkafka::HandleException & e) + { + LOG_ERROR(log, "Error while destructing consumer: {}", e.what()); + } + +} + +// Needed to drain rest of the messages / queued callback calls from the consumer +// after unsubscribe, otherwise consumer will hang on destruction +// see https://github.com/edenhill/librdkafka/issues/2077 +// https://github.com/confluentinc/confluent-kafka-go/issues/189 etc. +void ReadBufferFromKafkaConsumer::drain() +{ + auto start_time = std::chrono::steady_clock::now(); + cppkafka::Error last_error(RD_KAFKA_RESP_ERR_NO_ERROR); + + while (true) + { + auto msg = consumer->poll(100ms); + if (!msg) + break; + + auto error = msg.get_error(); + + if (error) + { + if (msg.is_eof() || error == last_error) + { + break; + } + else + { + LOG_ERROR(log, "Error during draining: {}", error); + } + } + + // i don't stop draining on first error, + // only if it repeats once again sequentially + last_error = error; + + auto ts = std::chrono::steady_clock::now(); + if (std::chrono::duration_cast(ts-start_time) > DRAIN_TIMEOUT_MS) + { + LOG_ERROR(log, "Timeout during draining."); + break; + } + } +} + void ReadBufferFromKafkaConsumer::commit() { diff --git a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h index 46dace827d0..e90e3b48881 100644 --- a/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h +++ b/src/Storages/Kafka/ReadBufferFromKafkaConsumer.h @@ -28,7 +28,7 @@ public: const std::atomic & stopped_, const Names & _topics ); - + ~ReadBufferFromKafkaConsumer() override; void allowNext() { allowed = true; } // Allow to read next message. void commit(); // Commit all processed messages. void subscribe(); // Subscribe internal consumer to topics. @@ -38,6 +38,7 @@ public: bool hasMorePolledMessages() const; bool polledDataUnusable() const { return (was_stopped || rebalance_happened); } + bool isStalled() const { return stalled; } void storeLastReadMessageOffset(); void resetToLastCommitted(const char * msg); @@ -75,6 +76,8 @@ private: cppkafka::TopicPartitionList assignment; const Names topics; + void drain(); + bool nextImpl() override; }; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 78a28361f32..259d3d68eb0 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -51,6 +51,7 @@ namespace { const auto RESCHEDULE_MS = 500; const auto CLEANUP_TIMEOUT_MS = 3000; + const auto MAX_THREAD_WORK_DURATION_MS = 60000; // once per minute leave do reschedule (we can't lock threads in pool forever) /// Configuration prefix const String CONFIG_PREFIX = "kafka"; @@ -293,6 +294,7 @@ ConsumerBufferPtr StorageKafka::createReadBuffer() // Create a consumer and subscribe to topics auto consumer = std::make_shared(conf); + consumer->set_destroy_flags(RD_KAFKA_DESTROY_F_NO_CONSUMER_CLOSE); // Limit the number of batched messages to allow early cancellations const Settings & settings = global_context.getSettingsRef(); @@ -359,7 +361,7 @@ bool StorageKafka::checkDependencies(const StorageID & table_id) // Check the dependencies are ready? for (const auto & db_tab : dependencies) { - auto table = DatabaseCatalog::instance().tryGetTable(db_tab); + auto table = DatabaseCatalog::instance().tryGetTable(db_tab, global_context); if (!table) return false; @@ -385,6 +387,8 @@ void StorageKafka::threadFunc() size_t dependencies_count = DatabaseCatalog::instance().getDependencies(table_id).size(); if (dependencies_count) { + auto start_time = std::chrono::steady_clock::now(); + // Keep streaming as long as there are attached views and streaming is not cancelled while (!stream_cancelled && num_created_consumers > 0) { @@ -393,9 +397,21 @@ void StorageKafka::threadFunc() LOG_DEBUG(log, "Started streaming to {} attached views", dependencies_count); - // Reschedule if not limited - if (!streamToViews()) + // Exit the loop & reschedule if some stream stalled + auto some_stream_is_stalled = streamToViews(); + if (some_stream_is_stalled) + { + LOG_TRACE(log, "Stream(s) stalled. Reschedule."); break; + } + + auto ts = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast(ts-start_time); + if (duration.count() > MAX_THREAD_WORK_DURATION_MS) + { + LOG_TRACE(log, "Thread work duration limit exceeded. Reschedule."); + break; + } } } } @@ -413,7 +429,7 @@ void StorageKafka::threadFunc() bool StorageKafka::streamToViews() { auto table_id = getStorageID(); - auto table = DatabaseCatalog::instance().getTable(table_id); + auto table = DatabaseCatalog::instance().getTable(table_id, global_context); if (!table) throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::LOGICAL_ERROR); @@ -458,15 +474,15 @@ bool StorageKafka::streamToViews() // It will be cancelled on underlying layer (kafka buffer) std::atomic stub = {false}; copyData(*in, *block_io.out, &stub); + + bool some_stream_is_stalled = false; for (auto & stream : streams) + { + some_stream_is_stalled = some_stream_is_stalled || stream->as()->isStalled(); stream->as()->commit(); + } - // Check whether the limits were applied during query execution - bool limits_applied = false; - const BlockStreamProfileInfo & info = in->getProfileInfo(); - limits_applied = info.hasAppliedLimit(); - - return limits_applied; + return some_stream_is_stalled; } void registerStorageKafka(StorageFactory & factory) diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index cd660407c89..633669cbf1b 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -401,7 +401,7 @@ void StorageLiveView::noUsersThread(std::shared_ptr storage, co if (drop_table) { - if (DatabaseCatalog::instance().tryGetTable(table_id)) + if (DatabaseCatalog::instance().tryGetTable(table_id, storage->global_context)) { try { diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 801b0b42ec4..fe62de224da 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -53,7 +53,7 @@ public: { return getStorageID().table_name + "_blocks"; } - StoragePtr getParentStorage() const { return DatabaseCatalog::instance().getTable(select_table_id); } + StoragePtr getParentStorage() const { return DatabaseCatalog::instance().getTable(select_table_id, global_context); } ASTPtr getInnerQuery() const { return inner_query->clone(); } ASTPtr getInnerSubQuery() const diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index de8a9bb317f..21768644940 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -418,7 +418,7 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks loadIndexGranularity(); calculateColumnsSizesOnDisk(); loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity` - loadRowsCount(); /// Must be called after loadIndex() as it uses the value of `index_granularity`. + loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. loadPartitionAndMinMaxIndex(); loadTTLInfos(); @@ -437,7 +437,8 @@ void IMergeTreeDataPart::loadIndex() if (!index_granularity.isInitialized()) throw Exception("Index granularity is not loaded before index loading", ErrorCodes::LOGICAL_ERROR); - size_t key_size = storage.primary_key_columns.size(); + const auto & primary_key = storage.getPrimaryKey(); + size_t key_size = primary_key.column_names.size(); if (key_size) { @@ -446,23 +447,25 @@ void IMergeTreeDataPart::loadIndex() for (size_t i = 0; i < key_size; ++i) { - loaded_index[i] = storage.primary_key_data_types[i]->createColumn(); + loaded_index[i] = primary_key.data_types[i]->createColumn(); loaded_index[i]->reserve(index_granularity.getMarksCount()); } String index_path = getFullRelativePath() + "primary.idx"; auto index_file = openForReading(volume->getDisk(), index_path); - for (size_t i = 0; i < index_granularity.getMarksCount(); ++i) //-V756 + size_t marks_count = index_granularity.getMarksCount(); + + for (size_t i = 0; i < marks_count; ++i) //-V756 for (size_t j = 0; j < key_size; ++j) - storage.primary_key_data_types[j]->deserializeBinary(*loaded_index[j], *index_file); + primary_key.data_types[j]->deserializeBinary(*loaded_index[j], *index_file); for (size_t i = 0; i < key_size; ++i) { loaded_index[i]->protect(); - if (loaded_index[i]->size() != index_granularity.getMarksCount()) + if (loaded_index[i]->size() != marks_count) throw Exception("Cannot read all data from index file " + index_path - + "(expected size: " + toString(index_granularity.getMarksCount()) + ", read: " + toString(loaded_index[i]->size()) + ")", + + "(expected size: " + toString(marks_count) + ", read: " + toString(loaded_index[i]->size()) + ")", ErrorCodes::CANNOT_READ_ALL_DATA); } @@ -493,7 +496,7 @@ void IMergeTreeDataPart::loadPartitionAndMinMaxIndex() minmax_idx.load(storage, volume->getDisk(), path); } - String calculated_partition_id = partition.getID(storage.partition_key_sample); + String calculated_partition_id = partition.getID(storage.getPartitionKey().sample_block); if (calculated_partition_id != info.partition_id) throw Exception( "While loading part " + getFullPath() + ": calculated partition ID: " + calculated_partition_id @@ -836,9 +839,10 @@ void IMergeTreeDataPart::checkConsistencyBase() const { String path = getFullRelativePath(); + const auto & pk = storage.getPrimaryKey(); if (!checksums.empty()) { - if (!storage.primary_key_columns.empty() && !checksums.files.count("primary.idx")) + if (!pk.column_names.empty() && !checksums.files.count("primary.idx")) throw Exception("No checksum for primary.idx", ErrorCodes::NO_FILE_IN_DATA_PART); if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) @@ -846,7 +850,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const if (!checksums.files.count("count.txt")) throw Exception("No checksum for count.txt", ErrorCodes::NO_FILE_IN_DATA_PART); - if (storage.partition_key_expr && !checksums.files.count("partition.dat")) + if (storage.hasPartitionKey() && !checksums.files.count("partition.dat")) throw Exception("No checksum for partition.dat", ErrorCodes::NO_FILE_IN_DATA_PART); if (!isEmpty()) @@ -872,14 +876,14 @@ void IMergeTreeDataPart::checkConsistencyBase() const }; /// Check that the primary key index is not empty. - if (!storage.primary_key_columns.empty()) + if (!pk.column_names.empty()) check_file_not_empty(volume->getDisk(), path + "primary.idx"); if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { check_file_not_empty(volume->getDisk(), path + "count.txt"); - if (storage.partition_key_expr) + if (storage.hasPartitionKey()) check_file_not_empty(volume->getDisk(), path + "partition.dat"); for (const String & col_name : storage.minmax_idx_columns) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d97001bc42e..175cf53ca93 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -131,8 +131,6 @@ MergeTreeData::MergeTreeData( : IStorage(table_id_) , global_context(context_) , merging_params(merging_params_) - , partition_by_ast(metadata.partition_by_ast) - , sample_by_ast(metadata.sample_by_ast) , settings_ast(metadata.settings_ast) , require_part_metadata(require_part_metadata_) , relative_data_path(relative_data_path_) @@ -153,16 +151,16 @@ MergeTreeData::MergeTreeData( /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(getColumns().getAllPhysical()); - if (sample_by_ast) + if (metadata.sample_by_ast != nullptr) { - sampling_expr_column_name = sample_by_ast->getColumnName(); + StorageMetadataKeyField candidate_sampling_key = StorageMetadataKeyField::getKeyFromAST(metadata.sample_by_ast, getColumns(), global_context); - if (!primary_key_sample.has(sampling_expr_column_name) - && !attach && !settings->compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. + const auto & pk_sample_block = getPrimaryKey().sample_block; + if (!pk_sample_block.has(candidate_sampling_key.column_names[0]) && !attach + && !settings->compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); - auto syntax = SyntaxAnalyzer(global_context).analyze(sample_by_ast, getColumns().getAllPhysical()); - columns_required_for_sampling = syntax->requiredSourceColumns(); + setSamplingKey(candidate_sampling_key); } MergeTreeDataFormatVersion min_format_version(0); @@ -170,8 +168,8 @@ MergeTreeData::MergeTreeData( { try { - partition_by_ast = makeASTFunction("toYYYYMM", std::make_shared(date_column_name)); - initPartitionKey(); + auto partition_by_ast = makeASTFunction("toYYYYMM", std::make_shared(date_column_name)); + initPartitionKey(partition_by_ast); if (minmax_idx_date_column_pos == -1) throw Exception("Could not find Date column", ErrorCodes::BAD_TYPE_OF_FIELD); @@ -186,7 +184,7 @@ MergeTreeData::MergeTreeData( else { is_custom_partitioned = true; - initPartitionKey(); + initPartitionKey(metadata.partition_by_ast); min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; } @@ -252,20 +250,20 @@ StorageInMemoryMetadata MergeTreeData::getInMemoryMetadata() const { StorageInMemoryMetadata metadata(getColumns(), getIndices(), getConstraints()); - if (partition_by_ast) - metadata.partition_by_ast = partition_by_ast->clone(); + if (isPartitionKeyDefined()) + metadata.partition_by_ast = getPartitionKeyAST()->clone(); - if (order_by_ast) - metadata.order_by_ast = order_by_ast->clone(); + if (isSortingKeyDefined()) + metadata.order_by_ast = getSortingKeyAST()->clone(); - if (primary_key_ast) - metadata.primary_key_ast = primary_key_ast->clone(); + if (isPrimaryKeyDefined()) + metadata.primary_key_ast = getPrimaryKeyAST()->clone(); - if (ttl_table_ast) - metadata.ttl_for_table_ast = ttl_table_ast->clone(); + if (hasAnyTableTTL()) + metadata.ttl_for_table_ast = getTableTTLs().definition_ast->clone(); - if (sample_by_ast) - metadata.sample_by_ast = sample_by_ast->clone(); + if (isSamplingKeyDefined()) + metadata.sample_by_ast = getSamplingKeyAST()->clone(); if (settings_ast) metadata.settings_ast = settings_ast->clone(); @@ -352,17 +350,18 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool auto all_columns = metadata.columns.getAllPhysical(); /// Order by check AST - if (order_by_ast && only_check) + if (hasSortingKey() && only_check) { /// This is ALTER, not CREATE/ATTACH TABLE. Let us check that all new columns used in the sorting key /// expression have just been added (so that the sorting order is guaranteed to be valid with the new key). ASTPtr added_key_column_expr_list = std::make_shared(); + const auto & old_sorting_key_columns = getSortingKeyColumns(); for (size_t new_i = 0, old_i = 0; new_i < sorting_key_size; ++new_i) { - if (old_i < sorting_key_columns.size()) + if (old_i < old_sorting_key_columns.size()) { - if (new_sorting_key_columns[new_i] != sorting_key_columns[old_i]) + if (new_sorting_key_columns[new_i] != old_sorting_key_columns[old_i]) added_key_column_expr_list->children.push_back(new_sorting_key_expr_list->children[new_i]); else ++old_i; @@ -417,6 +416,12 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool new_primary_key_data_types.push_back(elem.type); } + DataTypes new_sorting_key_data_types; + for (size_t i = 0; i < sorting_key_size; ++i) + { + new_sorting_key_data_types.push_back(new_sorting_key_sample.getByPosition(i).type); + } + ASTPtr skip_indices_with_primary_key_expr_list = new_primary_key_expr_list->clone(); ASTPtr skip_indices_with_sorting_key_expr_list = new_sorting_key_expr_list->clone(); @@ -466,17 +471,23 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & metadata, bool { setColumns(std::move(metadata.columns)); - order_by_ast = metadata.order_by_ast; - sorting_key_columns = std::move(new_sorting_key_columns); - sorting_key_expr_ast = std::move(new_sorting_key_expr_list); - sorting_key_expr = std::move(new_sorting_key_expr); + StorageMetadataKeyField new_sorting_key; + new_sorting_key.definition_ast = metadata.order_by_ast; + new_sorting_key.column_names = std::move(new_sorting_key_columns); + new_sorting_key.expression_list_ast = std::move(new_sorting_key_expr_list); + new_sorting_key.expression = std::move(new_sorting_key_expr); + new_sorting_key.sample_block = std::move(new_sorting_key_sample); + new_sorting_key.data_types = std::move(new_sorting_key_data_types); + setSortingKey(new_sorting_key); - primary_key_ast = metadata.primary_key_ast; - primary_key_columns = std::move(new_primary_key_columns); - primary_key_expr_ast = std::move(new_primary_key_expr_list); - primary_key_expr = std::move(new_primary_key_expr); - primary_key_sample = std::move(new_primary_key_sample); - primary_key_data_types = std::move(new_primary_key_data_types); + StorageMetadataKeyField new_primary_key; + new_primary_key.definition_ast = metadata.primary_key_ast; + new_primary_key.column_names = std::move(new_primary_key_columns); + new_primary_key.expression_list_ast = std::move(new_primary_key_expr_list); + new_primary_key.expression = std::move(new_primary_key_expr); + new_primary_key.sample_block = std::move(new_primary_key_sample); + new_primary_key.data_types = std::move(new_primary_key_data_types); + setPrimaryKey(new_primary_key); setIndices(metadata.indices); skip_indices = std::move(new_indices); @@ -511,28 +522,17 @@ ASTPtr MergeTreeData::extractKeyExpressionList(const ASTPtr & node) } -void MergeTreeData::initPartitionKey() +void MergeTreeData::initPartitionKey(ASTPtr partition_by_ast) { - ASTPtr partition_key_expr_list = extractKeyExpressionList(partition_by_ast); + StorageMetadataKeyField new_partition_key = StorageMetadataKeyField::getKeyFromAST(partition_by_ast, getColumns(), global_context); - if (partition_key_expr_list->children.empty()) + if (new_partition_key.expression_list_ast->children.empty()) return; - { - auto syntax_result = SyntaxAnalyzer(global_context).analyze(partition_key_expr_list, getColumns().getAllPhysical()); - partition_key_expr = ExpressionAnalyzer(partition_key_expr_list, syntax_result, global_context).getActions(false); - } - - for (const ASTPtr & ast : partition_key_expr_list->children) - { - String col_name = ast->getColumnName(); - partition_key_sample.insert(partition_key_expr->getSampleBlock().getByName(col_name)); - } - - checkKeyExpression(*partition_key_expr, partition_key_sample, "Partition"); + checkKeyExpression(*new_partition_key.expression, new_partition_key.sample_block, "Partition"); /// Add all columns used in the partition key to the min-max index. - const NamesAndTypesList & minmax_idx_columns_with_types = partition_key_expr->getRequiredColumnsWithTypes(); + const NamesAndTypesList & minmax_idx_columns_with_types = new_partition_key.expression->getRequiredColumnsWithTypes(); minmax_idx_expr = std::make_shared(minmax_idx_columns_with_types, global_context); for (const NameAndTypePair & column : minmax_idx_columns_with_types) { @@ -577,34 +577,7 @@ void MergeTreeData::initPartitionKey() } } } -} - -namespace -{ - -void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const String & result_column_name) -{ - for (const auto & action : ttl_expression->getActions()) - { - if (action.type == ExpressionAction::APPLY_FUNCTION) - { - IFunctionBase & func = *action.function_base; - if (!func.isDeterministic()) - throw Exception("TTL expression cannot contain non-deterministic functions, " - "but contains function " + func.getName(), ErrorCodes::BAD_ARGUMENTS); - } - } - - const auto & result_column = ttl_expression->getSampleBlock().getByName(result_column_name); - - if (!typeid_cast(result_column.type.get()) - && !typeid_cast(result_column.type.get())) - { - throw Exception("TTL expression result column should have DateTime or Date type, but has " - + result_column.type->getName(), ErrorCodes::BAD_TTL_EXPRESSION); - } -} - + setPartitionKey(new_partition_key); } @@ -612,50 +585,40 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription & new_columns, const ASTPtr & new_ttl_table_ast, bool only_check) { - auto new_column_ttls = new_columns.getColumnTTLs(); + auto new_column_ttls_asts = new_columns.getColumnTTLs(); - auto create_ttl_entry = [this, &new_columns](ASTPtr ttl_ast) - { - TTLEntry result; + TTLColumnsDescription new_column_ttl_by_name = getColumnTTLs(); - auto syntax_result = SyntaxAnalyzer(global_context).analyze(ttl_ast, new_columns.getAllPhysical()); - result.expression = ExpressionAnalyzer(ttl_ast, syntax_result, global_context).getActions(false); - result.destination_type = PartDestinationType::DELETE; - result.result_column = ttl_ast->getColumnName(); - - checkTTLExpression(result.expression, result.result_column); - return result; - }; - - if (!new_column_ttls.empty()) + if (!new_column_ttls_asts.empty()) { NameSet columns_ttl_forbidden; - if (partition_key_expr) - for (const auto & col : partition_key_expr->getRequiredColumns()) + if (hasPartitionKey()) + for (const auto & col : getColumnsRequiredForPartitionKey()) columns_ttl_forbidden.insert(col); - if (sorting_key_expr) - for (const auto & col : sorting_key_expr->getRequiredColumns()) + if (hasSortingKey()) + for (const auto & col : getColumnsRequiredForSortingKey()) columns_ttl_forbidden.insert(col); - for (const auto & [name, ast] : new_column_ttls) + for (const auto & [name, ast] : new_column_ttls_asts) { if (columns_ttl_forbidden.count(name)) throw Exception("Trying to set TTL for key column " + name, ErrorCodes::ILLEGAL_COLUMN); else { - auto new_ttl_entry = create_ttl_entry(ast); - if (!only_check) - column_ttl_entries_by_name[name] = new_ttl_entry; + auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, new_columns, global_context, getPrimaryKey()); + new_column_ttl_by_name[name] = new_ttl_entry; } } + if (!only_check) + setColumnTTLs(new_column_ttl_by_name); } if (new_ttl_table_ast) { - std::vector update_move_ttl_entries; - TTLEntry update_rows_ttl_entry; + TTLDescriptions update_move_ttl_entries; + TTLDescription update_rows_ttl_entry; bool seen_delete_ttl = false; for (const auto & ttl_element_ptr : new_ttl_table_ast->children) @@ -664,48 +627,46 @@ void MergeTreeData::setTTLExpressions(const ColumnsDescription & new_columns, if (!ttl_element) throw Exception("Unexpected AST element in TTL expression", ErrorCodes::UNEXPECTED_AST_STRUCTURE); - if (ttl_element->destination_type == PartDestinationType::DELETE) + if (ttl_element->destination_type == DataDestinationType::DELETE) { if (seen_delete_ttl) { throw Exception("More than one DELETE TTL expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION); } - auto new_rows_ttl_entry = create_ttl_entry(ttl_element->children[0]); - if (!only_check) - update_rows_ttl_entry = new_rows_ttl_entry; + update_rows_ttl_entry = TTLDescription::getTTLFromAST(ttl_element_ptr, new_columns, global_context, getPrimaryKey()); seen_delete_ttl = true; } else { - auto new_ttl_entry = create_ttl_entry(ttl_element->children[0]); + auto new_ttl_entry = TTLDescription::getTTLFromAST(ttl_element_ptr, new_columns, global_context, getPrimaryKey()); - new_ttl_entry.entry_ast = ttl_element_ptr; - new_ttl_entry.destination_type = ttl_element->destination_type; - new_ttl_entry.destination_name = ttl_element->destination_name; - if (!new_ttl_entry.getDestination(getStoragePolicy())) + if (!getDestinationForTTL(new_ttl_entry)) { String message; - if (new_ttl_entry.destination_type == PartDestinationType::DISK) + if (new_ttl_entry.destination_type == DataDestinationType::DISK) message = "No such disk " + backQuote(new_ttl_entry.destination_name) + " for given storage policy."; else message = "No such volume " + backQuote(new_ttl_entry.destination_name) + " for given storage policy."; throw Exception(message, ErrorCodes::BAD_TTL_EXPRESSION); } - if (!only_check) - update_move_ttl_entries.emplace_back(std::move(new_ttl_entry)); + update_move_ttl_entries.emplace_back(std::move(new_ttl_entry)); } } if (!only_check) { - rows_ttl_entry = update_rows_ttl_entry; - ttl_table_ast = new_ttl_table_ast; + TTLTableDescription new_table_ttl + { + .definition_ast = new_ttl_table_ast, + .rows_ttl = update_rows_ttl_entry, + .move_ttl = update_move_ttl_entries, + }; auto move_ttl_entries_lock = std::lock_guard(move_ttl_entries_mutex); - move_ttl_entries = update_move_ttl_entries; + setTableTTLs(new_table_ttl); } } } @@ -1418,12 +1379,12 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S /// (and not as a part of some expression) and if the ALTER only affects column metadata. NameSet columns_alter_type_metadata_only; - if (partition_key_expr) + if (hasPartitionKey()) { /// Forbid altering partition key columns because it can change partition ID format. /// TODO: in some cases (e.g. adding an Enum value) a partition key column can still be ALTERed. /// We should allow it. - for (const String & col : partition_key_expr->getRequiredColumns()) + for (const String & col : getColumnsRequiredForPartitionKey()) columns_alter_type_forbidden.insert(col); } @@ -1433,8 +1394,9 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S columns_alter_type_forbidden.insert(col); } - if (sorting_key_expr) + if (hasSortingKey()) { + auto sorting_key_expr = getSortingKey().expression; for (const ExpressionAction & action : sorting_key_expr->getActions()) { auto action_columns = action.getNeededColumns(); @@ -2518,9 +2480,6 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & return part_ptr->volume->getDisk()->getName() == disk->getName(); }), parts.end()); - if (parts.empty()) - throw Exception("Nothing to move", ErrorCodes::NO_SUCH_DATA_PART); - if (parts.empty()) { String no_parts_to_move_message; @@ -2616,7 +2575,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context /// Re-parse partition key fields using the information about expected field types. - size_t fields_count = partition_key_sample.columns(); + size_t fields_count = getPartitionKey().sample_block.columns(); if (partition_ast.fields_count != fields_count) throw Exception( "Wrong number of fields in the partition expression: " + toString(partition_ast.fields_count) + @@ -2633,7 +2592,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context ReadBufferFromMemory right_paren_buf(")", 1); ConcatReadBuffer buf({&left_paren_buf, &fields_buf, &right_paren_buf}); - auto input_stream = FormatFactory::instance().getInput("Values", buf, partition_key_sample, context, context.getSettingsRef().max_block_size); + auto input_stream = FormatFactory::instance().getInput("Values", buf, getPartitionKey().sample_block, context, context.getSettingsRef().max_block_size); auto block = input_stream->read(); if (!block || !block.rows()) @@ -2911,12 +2870,12 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules(UInt64 expected_ auto ttl_entry = selectTTLEntryForTTLInfos(ttl_infos, time_of_move); if (ttl_entry) { - SpacePtr destination_ptr = ttl_entry->getDestination(getStoragePolicy()); + SpacePtr destination_ptr = getDestinationForTTL(*ttl_entry); if (!destination_ptr) { - if (ttl_entry->destination_type == PartDestinationType::VOLUME) + if (ttl_entry->destination_type == DataDestinationType::VOLUME) LOG_WARNING(log, "Would like to reserve space on volume '{}' by TTL rule of table '{}' but volume was not found", ttl_entry->destination_name, log_name); - else if (ttl_entry->destination_type == PartDestinationType::DISK) + else if (ttl_entry->destination_type == DataDestinationType::DISK) LOG_WARNING(log, "Would like to reserve space on disk '{}' by TTL rule of table '{}' but disk was not found", ttl_entry->destination_name, log_name); } else @@ -2925,9 +2884,9 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules(UInt64 expected_ if (reservation) return reservation; else - if (ttl_entry->destination_type == PartDestinationType::VOLUME) + if (ttl_entry->destination_type == DataDestinationType::VOLUME) LOG_WARNING(log, "Would like to reserve space on volume '{}' by TTL rule of table '{}' but there is not enough space", ttl_entry->destination_name, log_name); - else if (ttl_entry->destination_type == PartDestinationType::DISK) + else if (ttl_entry->destination_type == DataDestinationType::DISK) LOG_WARNING(log, "Would like to reserve space on disk '{}' by TTL rule of table '{}' but there is not enough space", ttl_entry->destination_name, log_name); } } @@ -2937,37 +2896,39 @@ ReservationPtr MergeTreeData::tryReserveSpacePreferringTTLRules(UInt64 expected_ return reservation; } -SpacePtr MergeTreeData::TTLEntry::getDestination(StoragePolicyPtr policy) const +SpacePtr MergeTreeData::getDestinationForTTL(const TTLDescription & ttl) const { - if (destination_type == PartDestinationType::VOLUME) - return policy->getVolumeByName(destination_name); - else if (destination_type == PartDestinationType::DISK) - return policy->getDiskByName(destination_name); + auto policy = getStoragePolicy(); + if (ttl.destination_type == DataDestinationType::VOLUME) + return policy->getVolumeByName(ttl.destination_name); + else if (ttl.destination_type == DataDestinationType::DISK) + return policy->getDiskByName(ttl.destination_name); else return {}; } -bool MergeTreeData::TTLEntry::isPartInDestination(StoragePolicyPtr policy, const IMergeTreeDataPart & part) const +bool MergeTreeData::isPartInTTLDestination(const TTLDescription & ttl, const IMergeTreeDataPart & part) const { - if (destination_type == PartDestinationType::VOLUME) + auto policy = getStoragePolicy(); + if (ttl.destination_type == DataDestinationType::VOLUME) { - for (const auto & disk : policy->getVolumeByName(destination_name)->getDisks()) + for (const auto & disk : policy->getVolumeByName(ttl.destination_name)->getDisks()) if (disk->getName() == part.volume->getDisk()->getName()) return true; } - else if (destination_type == PartDestinationType::DISK) - return policy->getDiskByName(destination_name)->getName() == part.volume->getDisk()->getName(); + else if (ttl.destination_type == DataDestinationType::DISK) + return policy->getDiskByName(ttl.destination_name)->getName() == part.volume->getDisk()->getName(); return false; } -std::optional MergeTreeData::selectTTLEntryForTTLInfos( - const IMergeTreeDataPart::TTLInfos & ttl_infos, - time_t time_of_move) const +std::optional +MergeTreeData::selectTTLEntryForTTLInfos(const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const { time_t max_max_ttl = 0; - std::vector::const_iterator best_entry_it; + TTLDescriptions::const_iterator best_entry_it; auto lock = std::lock_guard(move_ttl_entries_mutex); + const auto & move_ttl_entries = getMoveTTLs(); for (auto ttl_entry_it = move_ttl_entries.begin(); ttl_entry_it != move_ttl_entries.end(); ++ttl_entry_it) { auto ttl_info_it = ttl_infos.moves_ttl.find(ttl_entry_it->result_column); @@ -2981,7 +2942,7 @@ std::optional MergeTreeData::selectTTLEntryForTTLInfos( } } - return max_max_ttl ? *best_entry_it : std::optional(); + return max_max_ttl ? *best_entry_it : std::optional(); } MergeTreeData::DataParts MergeTreeData::getDataParts(const DataPartStates & affordable_states) const @@ -3084,7 +3045,7 @@ bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const A { const String column_name = node->getColumnName(); - for (const auto & name : primary_key_columns) + for (const auto & name : getPrimaryKeyColumns()) if (column_name == name) return true; @@ -3144,10 +3105,10 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour return ast ? queryToString(ast) : ""; }; - if (query_to_string(order_by_ast) != query_to_string(src_data->order_by_ast)) + if (query_to_string(getSortingKeyAST()) != query_to_string(src_data->getSortingKeyAST())) throw Exception("Tables have different ordering", ErrorCodes::BAD_ARGUMENTS); - if (query_to_string(partition_by_ast) != query_to_string(src_data->partition_by_ast)) + if (query_to_string(getPartitionKeyAST()) != query_to_string(src_data->getPartitionKeyAST())) throw Exception("Tables have different partition key", ErrorCodes::BAD_ARGUMENTS); if (format_version != src_data->format_version) @@ -3400,7 +3361,7 @@ bool MergeTreeData::areBackgroundMovesNeeded() const if (policy->getVolumes().size() > 1) return true; - return policy->getVolumes().size() == 1 && policy->getVolumes()[0]->getDisks().size() > 1 && !move_ttl_entries.empty(); + return policy->getVolumes().size() == 1 && policy->getVolumes()[0]->getDisks().size() > 1 && hasAnyMoveTTL(); } bool MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space) @@ -3539,7 +3500,7 @@ ColumnDependencies MergeTreeData::getColumnDependencies(const NameSet & updated_ if (hasRowsTTL()) { - if (add_dependent_columns(rows_ttl_entry.expression, required_ttl_columns)) + if (add_dependent_columns(getRowsTTL().expression, required_ttl_columns)) { /// Filter all columns, if rows TTL expression have to be recalculated. for (const auto & column : getColumns().getAllPhysical()) @@ -3547,13 +3508,13 @@ ColumnDependencies MergeTreeData::getColumnDependencies(const NameSet & updated_ } } - for (const auto & [name, entry] : column_ttl_entries_by_name) + for (const auto & [name, entry] : getColumnTTLs()) { if (add_dependent_columns(entry.expression, required_ttl_columns)) updated_ttl_columns.insert(name); } - for (const auto & entry : move_ttl_entries) + for (const auto & entry : getMoveTTLs()) add_dependent_columns(entry.expression, required_ttl_columns); for (const auto & column : indices_columns) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 9cf72cbe8bb..bf9bfea88b3 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include #include @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -335,24 +336,11 @@ public: /// See comments about methods below in IStorage interface StorageInMemoryMetadata getInMemoryMetadata() const override; - ASTPtr getPartitionKeyAST() const override { return partition_by_ast; } - ASTPtr getSortingKeyAST() const override { return sorting_key_expr_ast; } - ASTPtr getPrimaryKeyAST() const override { return primary_key_expr_ast; } - ASTPtr getSamplingKeyAST() const override { return sample_by_ast; } - - Names getColumnsRequiredForPartitionKey() const override { return (partition_key_expr ? partition_key_expr->getRequiredColumns() : Names{}); } - Names getColumnsRequiredForSortingKey() const override { return sorting_key_expr->getRequiredColumns(); } - Names getColumnsRequiredForPrimaryKey() const override { return primary_key_expr->getRequiredColumns(); } - Names getColumnsRequiredForSampling() const override { return columns_required_for_sampling; } - Names getColumnsRequiredForFinal() const override { return sorting_key_expr->getRequiredColumns(); } - Names getSortingKeyColumns() const override { return sorting_key_columns; } - ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const override; StoragePolicyPtr getStoragePolicy() const override; bool supportsPrewhere() const override { return true; } - bool supportsSampling() const override { return sample_by_ast != nullptr; } bool supportsFinal() const override { @@ -530,15 +518,8 @@ public: */ static ASTPtr extractKeyExpressionList(const ASTPtr & node); - bool hasSortingKey() const { return !sorting_key_columns.empty(); } - bool hasPrimaryKey() const { return !primary_key_columns.empty(); } bool hasSkipIndices() const { return !skip_indices.empty(); } - bool hasAnyColumnTTL() const { return !column_ttl_entries_by_name.empty(); } - bool hasAnyMoveTTL() const { return !move_ttl_entries.empty(); } - bool hasRowsTTL() const override { return !rows_ttl_entry.isEmpty(); } - bool hasAnyTTL() const override { return hasRowsTTL() || hasAnyMoveTTL() || hasAnyColumnTTL(); } - /// Check that the part is not broken and calculate the checksums for it if they are not present. MutableDataPartPtr loadPartAndFixMetadata(const VolumePtr & volume, const String & relative_path) const; @@ -640,6 +621,13 @@ public: /// Return alter conversions for part which must be applied on fly. AlterConversions getAlterConversionsForPart(const MergeTreeDataPartPtr part) const; + /// Returns destination disk or volume for the TTL rule according to current + /// storage policy + SpacePtr getDestinationForTTL(const TTLDescription & ttl) const; + + /// Checks if given part already belongs destination disk or volume for the + /// TTL rule. + bool isPartInTTLDestination(const TTLDescription & ttl, const IMergeTreeDataPart & part) const; MergeTreeDataFormatVersion format_version; @@ -649,8 +637,6 @@ public: const MergingParams merging_params; bool is_custom_partitioned = false; - ExpressionActionsPtr partition_key_expr; - Block partition_key_sample; ExpressionActionsPtr minmax_idx_expr; Names minmax_idx_columns; @@ -664,55 +650,13 @@ public: ExpressionActionsPtr primary_key_and_skip_indices_expr; ExpressionActionsPtr sorting_key_and_skip_indices_expr; - /// Names of sorting key columns in ORDER BY expression. For example: 'a', - /// 'x * y', 'toStartOfMonth(date)', etc. - Names sorting_key_columns; - ASTPtr sorting_key_expr_ast; - ExpressionActionsPtr sorting_key_expr; + std::optional selectTTLEntryForTTLInfos(const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const; - /// Names of columns for primary key. - Names primary_key_columns; - ASTPtr primary_key_expr_ast; - ExpressionActionsPtr primary_key_expr; - Block primary_key_sample; - DataTypes primary_key_data_types; - - struct TTLEntry - { - ExpressionActionsPtr expression; - String result_column; - - /// Name and type of a destination are only valid in table-level context. - PartDestinationType destination_type; - String destination_name; - - ASTPtr entry_ast; - - /// Returns destination disk or volume for this rule. - SpacePtr getDestination(StoragePolicyPtr policy) const; - - /// Checks if given part already belongs destination disk or volume for this rule. - bool isPartInDestination(StoragePolicyPtr policy, const IMergeTreeDataPart & part) const; - - bool isEmpty() const { return expression == nullptr; } - }; - - std::optional selectTTLEntryForTTLInfos(const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const; - - using TTLEntriesByName = std::unordered_map; - TTLEntriesByName column_ttl_entries_by_name; - - TTLEntry rows_ttl_entry; - - /// This mutex is required for background move operations which do not obtain global locks. + /// This mutex is required for background move operations which do not + /// obtain global locks. + /// TODO (alesap) It will be removed after metadata became atomic mutable std::mutex move_ttl_entries_mutex; - /// Vector rw operations have to be done under "move_ttl_entries_mutex". - std::vector move_ttl_entries; - - String sampling_expr_column_name; - Names columns_required_for_sampling; - /// Limiting parallel sends per one table, used in DataPartsExchange std::atomic_uint current_table_sends {0}; @@ -739,11 +683,6 @@ protected: friend struct ReplicatedMergeTreeTableMetadata; friend class StorageReplicatedMergeTree; - ASTPtr partition_by_ast; - ASTPtr order_by_ast; - ASTPtr primary_key_ast; - ASTPtr sample_by_ast; - ASTPtr ttl_table_ast; ASTPtr settings_ast; bool require_part_metadata; @@ -854,7 +793,7 @@ protected: void setProperties(const StorageInMemoryMetadata & metadata, bool only_check = false, bool attach = false); - void initPartitionKey(); + void initPartitionKey(ASTPtr partition_by_ast); void setTTLExpressions(const ColumnsDescription & columns, const ASTPtr & new_ttl_table_ast, bool only_check = false); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 12f577eef25..e36391f87ec 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -608,7 +608,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor NamesAndTypesList merging_columns; Names gathering_column_names, merging_column_names; extractMergingAndGatheringColumns( - storage_columns, data.sorting_key_expr, data.skip_indices, + storage_columns, data.getSortingKey().expression, data.skip_indices, data.merging_params, gathering_columns, gathering_column_names, merging_columns, merging_column_names); auto single_disk_volume = std::make_shared("volume_" + future_part.name, disk); @@ -727,7 +727,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor pipes.emplace_back(std::move(pipe)); } - Names sort_columns = data.sorting_key_columns; + Names sort_columns = data.getSortingKeyColumns(); SortDescription sort_description; size_t sort_columns_size = sort_columns.size(); sort_description.reserve(sort_columns_size); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index cc698797366..f90add64732 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -223,9 +223,10 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( data.check(real_column_names); const Settings & settings = context.getSettingsRef(); - Names primary_key_columns = data.primary_key_columns; + const auto & primary_key = data.getPrimaryKey(); + Names primary_key_columns = primary_key.column_names; - KeyCondition key_condition(query_info, context, primary_key_columns, data.primary_key_expr); + KeyCondition key_condition(query_info, context, primary_key_columns, primary_key.expression); if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue()) { @@ -388,7 +389,8 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( used_sample_factor = 1.0 / boost::rational_cast(relative_sample_size); RelativeSize size_of_universum = 0; - DataTypePtr sampling_column_type = data.primary_key_sample.getByName(data.sampling_expr_column_name).type; + const auto & sampling_key = data.getSamplingKey(); + DataTypePtr sampling_column_type = sampling_key.data_types[0]; if (typeid_cast(sampling_column_type.get())) size_of_universum = RelativeSize(std::numeric_limits::max()) + RelativeSize(1); @@ -457,17 +459,17 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( /// The first time it was calculated for final, because sample key is a part of the PK. /// So, assume that we already have calculated column. ASTPtr sampling_key_ast = data.getSamplingKeyAST(); + if (select.final()) { - sampling_key_ast = std::make_shared(data.sampling_expr_column_name); - + sampling_key_ast = std::make_shared(sampling_key.column_names[0]); /// We do spoil available_real_columns here, but it is not used later. - available_real_columns.emplace_back(data.sampling_expr_column_name, std::move(sampling_column_type)); + available_real_columns.emplace_back(sampling_key.column_names[0], std::move(sampling_column_type)); } if (has_lower_limit) { - if (!key_condition.addCondition(data.sampling_expr_column_name, Range::createLeftBounded(lower, true))) + if (!key_condition.addCondition(sampling_key.column_names[0], Range::createLeftBounded(lower, true))) throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN); ASTPtr args = std::make_shared(); @@ -484,7 +486,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( if (has_upper_limit) { - if (!key_condition.addCondition(data.sampling_expr_column_name, Range::createRightBounded(upper, false))) + if (!key_condition.addCondition(sampling_key.column_names[0], Range::createRightBounded(upper, false))) throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN); ASTPtr args = std::make_shared(); @@ -612,7 +614,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( if (select.final()) { /// Add columns needed to calculate the sorting expression and the sign. - std::vector add_columns = data.sorting_key_expr->getRequiredColumns(); + std::vector add_columns = data.getColumnsRequiredForSortingKey(); column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end()); if (!data.merging_params.sign_column.empty()) @@ -638,7 +640,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( else if (settings.optimize_read_in_order && query_info.input_sorting_info) { size_t prefix_size = query_info.input_sorting_info->order_key_prefix_descr.size(); - auto order_key_prefix_ast = data.sorting_key_expr_ast->clone(); + auto order_key_prefix_ast = data.getSortingKey().expression_list_ast->clone(); order_key_prefix_ast->children.resize(prefix_size); auto syntax_result = SyntaxAnalyzer(context).analyze(order_key_prefix_ast, data.getColumns().getAllPhysical()); @@ -1023,7 +1025,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( { SortDescription sort_description; for (size_t j = 0; j < input_sorting_info->order_key_prefix_descr.size(); ++j) - sort_description.emplace_back(data.sorting_key_columns[j], + sort_description.emplace_back(data.getSortingKey().column_names[j], input_sorting_info->direction, 1); /// Drop temporary columns, added by 'sorting_key_prefix_expr' @@ -1096,11 +1098,11 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( if (!out_projection) out_projection = createProjection(pipe, data); - pipe.addSimpleTransform(std::make_shared(pipe.getHeader(), data.sorting_key_expr)); + pipe.addSimpleTransform(std::make_shared(pipe.getHeader(), data.getSortingKey().expression)); pipes.emplace_back(std::move(pipe)); } - Names sort_columns = data.sorting_key_columns; + Names sort_columns = data.getSortingKeyColumns(); SortDescription sort_description; size_t sort_columns_size = sort_columns.size(); sort_description.reserve(sort_columns_size); @@ -1293,11 +1295,12 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( std::function create_field_ref; /// If there are no monotonic functions, there is no need to save block reference. /// Passing explicit field to FieldRef allows to optimize ranges and shows better performance. + const auto & primary_key = data.getPrimaryKey(); if (key_condition.hasMonotonicFunctionsChain()) { auto index_block = std::make_shared(); for (size_t i = 0; i < used_key_size; ++i) - index_block->insert({index[i], data.primary_key_data_types[i], data.primary_key_columns[i]}); + index_block->insert({index[i], primary_key.data_types[i], primary_key.column_names[i]}); create_field_ref = [index_block](size_t row, size_t column, FieldRef & field) { @@ -1328,7 +1331,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( create_field_ref(range.begin, i, index_left[i]); may_be_true = key_condition.mayBeTrueAfter( - used_key_size, index_left.data(), data.primary_key_data_types); + used_key_size, index_left.data(), primary_key.data_types); } else { @@ -1342,7 +1345,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( } may_be_true = key_condition.mayBeTrueInRange( - used_key_size, index_left.data(), index_right.data(), data.primary_key_data_types); + used_key_size, index_left.data(), index_right.data(), primary_key.data_types); } if (!may_be_true) diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index d873790d91c..4eedf99c837 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -78,10 +78,12 @@ void buildScatterSelector( } /// Computes ttls and updates ttl infos -void updateTTL(const MergeTreeData::TTLEntry & ttl_entry, +void updateTTL( + const TTLDescription & ttl_entry, IMergeTreeDataPart::TTLInfos & ttl_infos, DB::MergeTreeDataPartTTLInfo & ttl_info, - Block & block, bool update_part_min_max_ttls) + Block & block, + bool update_part_min_max_ttls) { bool remove_column = false; if (!block.has(ttl_entry.result_column)) @@ -139,18 +141,19 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block data.check(block, true); block.checkNumberOfRows(); - if (!data.partition_key_expr) /// Table is not partitioned. + if (!data.hasPartitionKey()) /// Table is not partitioned. { result.emplace_back(Block(block), Row()); return result; } Block block_copy = block; - data.partition_key_expr->execute(block_copy); + const auto & partition_key = data.getPartitionKey(); + partition_key.expression->execute(block_copy); ColumnRawPtrs partition_columns; - partition_columns.reserve(data.partition_key_sample.columns()); - for (const ColumnWithTypeAndName & element : data.partition_key_sample) + partition_columns.reserve(partition_key.sample_block.columns()); + for (const ColumnWithTypeAndName & element : partition_key.sample_block) partition_columns.emplace_back(block_copy.getByName(element.name).column.get()); PODArray partition_num_to_first_row; @@ -204,7 +207,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa MergeTreePartition partition(std::move(block_with_partition.partition)); - MergeTreePartInfo new_part_info(partition.getID(data.partition_key_sample), temp_index, temp_index, 0); + MergeTreePartInfo new_part_info(partition.getID(data.getPartitionKey().sample_block), temp_index, temp_index, 0); String part_name; if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { @@ -228,7 +231,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa size_t expected_size = block.bytes(); DB::IMergeTreeDataPart::TTLInfos move_ttl_infos; - for (const auto & ttl_entry : data.move_ttl_entries) + const auto & move_ttl_entries = data.getMoveTTLs(); + for (const auto & ttl_entry : move_ttl_entries) updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false); NamesAndTypesList columns = data.getColumns().getAllPhysical().filter(block.getNames()); @@ -262,7 +266,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa if (data.hasSortingKey() || data.hasSkipIndices()) data.sorting_key_and_skip_indices_expr->execute(block); - Names sort_columns = data.sorting_key_columns; + Names sort_columns = data.getSortingKeyColumns(); SortDescription sort_description; size_t sort_columns_size = sort_columns.size(); sort_description.reserve(sort_columns_size); @@ -287,9 +291,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa } if (data.hasRowsTTL()) - updateTTL(data.rows_ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true); + updateTTL(data.getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true); - for (const auto & [name, ttl_entry] : data.column_ttl_entries_by_name) + for (const auto & [name, ttl_entry] : data.getColumnTTLs()) updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true); new_data_part->ttl_infos.update(move_ttl_infos); diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 000d0abad43..54e213fafac 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -26,7 +26,7 @@ static std::unique_ptr openForReading(const DiskPtr & di String MergeTreePartition::getID(const MergeTreeData & storage) const { - return getID(storage.partition_key_sample); + return getID(storage.getPartitionKey().sample_block); } /// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system. @@ -89,7 +89,8 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const { - size_t key_size = storage.partition_key_sample.columns(); + const auto & partition_key_sample = storage.getPartitionKey().sample_block; + size_t key_size = partition_key_sample.columns(); if (key_size == 0) { @@ -97,7 +98,7 @@ void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffe } else if (key_size == 1) { - const DataTypePtr & type = storage.partition_key_sample.getByPosition(0).type; + const DataTypePtr & type = partition_key_sample.getByPosition(0).type; auto column = type->createColumn(); column->insert(value[0]); type->serializeAsText(*column, 0, out, format_settings); @@ -108,7 +109,7 @@ void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffe Columns columns; for (size_t i = 0; i < key_size; ++i) { - const auto & type = storage.partition_key_sample.getByPosition(i).type; + const auto & type = partition_key_sample.getByPosition(i).type; types.push_back(type); auto column = type->createColumn(); column->insert(value[i]); @@ -123,19 +124,20 @@ void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffe void MergeTreePartition::load(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path) { - if (!storage.partition_key_expr) + if (!storage.hasPartitionKey()) return; + const auto & partition_key_sample = storage.getPartitionKey().sample_block; auto partition_file_path = part_path + "partition.dat"; auto file = openForReading(disk, partition_file_path); - value.resize(storage.partition_key_sample.columns()); - for (size_t i = 0; i < storage.partition_key_sample.columns(); ++i) - storage.partition_key_sample.getByPosition(i).type->deserializeBinary(value[i], *file); + value.resize(partition_key_sample.columns()); + for (size_t i = 0; i < partition_key_sample.columns(); ++i) + partition_key_sample.getByPosition(i).type->deserializeBinary(value[i], *file); } void MergeTreePartition::store(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const { - store(storage.partition_key_sample, disk, part_path, checksums); + store(storage.getPartitionKey().sample_block, disk, part_path, checksums); } void MergeTreePartition::store(const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index ae66e40e042..4e564b512d6 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -128,14 +128,14 @@ bool MergeTreePartsMover::selectPartsForMove( if (!can_move(part, &reason)) continue; - auto ttl_entry = part->storage.selectTTLEntryForTTLInfos(part->ttl_infos, time_of_move); + auto ttl_entry = data->selectTTLEntryForTTLInfos(part->ttl_infos, time_of_move); auto to_insert = need_to_move.find(part->volume->getDisk()); ReservationPtr reservation; if (ttl_entry) { - auto destination = ttl_entry->getDestination(policy); - if (destination && !ttl_entry->isPartInDestination(policy, *part)) - reservation = part->storage.tryReserveSpace(part->getBytesOnDisk(), ttl_entry->getDestination(policy)); + auto destination = data->getDestinationForTTL(*ttl_entry); + if (destination && !data->isPartInTTLDestination(*ttl_entry, *part)) + reservation = data->tryReserveSpace(part->getBytesOnDisk(), data->getDestinationForTTL(*ttl_entry)); } if (reservation) /// Found reservation by TTL rule. diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 69450f0ac4c..a8da0e8615c 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -39,8 +39,9 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( block_with_constants{KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context)}, log{log_} { - if (!data.primary_key_columns.empty()) - first_primary_key_column = data.primary_key_columns[0]; + const auto & primary_key = data.getPrimaryKey(); + if (!primary_key.column_names.empty()) + first_primary_key_column = primary_key.column_names[0]; calculateColumnSizes(data, queried_columns); determineArrayJoinedNames(query_info.query->as()); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index bd1312f0c59..e064a4c734a 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -162,7 +162,7 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm std::inserter(skip_indexes_column_names_set, skip_indexes_column_names_set.end())); Names skip_indexes_column_names(skip_indexes_column_names_set.begin(), skip_indexes_column_names_set.end()); - Block primary_key_block = getBlockAndPermute(block, storage.primary_key_columns, permutation); + Block primary_key_block = getBlockAndPermute(block, storage.getPrimaryKeyColumns(), permutation); Block skip_indexes_block = getBlockAndPermute(block, skip_indexes_column_names, permutation); writer->write(block, permutation, primary_key_block, skip_indexes_block); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index ba9bbd1da61..3f9d039ffa1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -29,7 +29,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr date_column = data.minmax_idx_columns[data.minmax_idx_date_column_pos]; const auto data_settings = data.getSettings(); - sampling_expression = formattedAST(data.sample_by_ast); + sampling_expression = formattedAST(data.getSamplingKeyAST()); index_granularity = data_settings->index_granularity; merging_params_mode = static_cast(data.merging_params.mode); sign_column = data.merging_params.sign_column; @@ -40,20 +40,20 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr /// So rules in zookeeper metadata is following: /// - When we have only ORDER BY, than store it in "primary key:" row of /metadata /// - When we have both, than store PRIMARY KEY in "primary key:" row and ORDER BY in "sorting key:" row of /metadata - if (!data.primary_key_ast) - primary_key = formattedAST(MergeTreeData::extractKeyExpressionList(data.order_by_ast)); + if (!data.isPrimaryKeyDefined()) + primary_key = formattedAST(data.getSortingKey().expression_list_ast); else { - primary_key = formattedAST(MergeTreeData::extractKeyExpressionList(data.primary_key_ast)); - sorting_key = formattedAST(MergeTreeData::extractKeyExpressionList(data.order_by_ast)); + primary_key = formattedAST(data.getPrimaryKey().expression_list_ast); + sorting_key = formattedAST(data.getSortingKey().expression_list_ast); } data_format_version = data.format_version; if (data.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) - partition_key = formattedAST(MergeTreeData::extractKeyExpressionList(data.partition_by_ast)); + partition_key = formattedAST(data.getPartitionKey().expression_list_ast); - ttl_table = formattedAST(data.ttl_table_ast); + ttl_table = formattedAST(data.getTableTTLs().definition_ast); skip_indices = data.getIndices().toString(); if (data.canUseAdaptiveGranularity()) diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 20a3bb5132b..37acf0f0160 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -39,9 +39,6 @@ public: return part->storage.mayBenefitFromIndexForIn(left_in_operand, query_context); } - bool hasAnyTTL() const override { return part->storage.hasAnyTTL(); } - bool hasRowsTTL() const override { return part->storage.hasRowsTTL(); } - ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const override { return part->storage.getColumnDependencies(updated_columns); @@ -52,10 +49,6 @@ public: return part->storage.getInMemoryMetadata(); } - bool hasSortingKey() const { return part->storage.hasSortingKey(); } - - Names getSortingKeyColumns() const override { return part->storage.getSortingKeyColumns(); } - NamesAndTypesList getVirtuals() const override { return part->storage.getVirtuals(); @@ -68,6 +61,9 @@ protected: { setColumns(part_->storage.getColumns()); setIndices(part_->storage.getIndices()); + setSortingKey(part_->storage.getSortingKey()); + setColumnTTLs(part->storage.getColumnTTLs()); + setTableTTLs(part->storage.getTableTTLs()); } private: diff --git a/src/Storages/PartitionCommands.cpp b/src/Storages/PartitionCommands.cpp index 9c8fc2cb598..c3bf00187af 100644 --- a/src/Storages/PartitionCommands.cpp +++ b/src/Storages/PartitionCommands.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include @@ -42,13 +42,13 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * res.part = command_ast->part; switch (command_ast->move_destination_type) { - case PartDestinationType::DISK: + case DataDestinationType::DISK: res.move_destination_type = PartitionCommand::MoveDestinationType::DISK; break; - case PartDestinationType::VOLUME: + case DataDestinationType::VOLUME: res.move_destination_type = PartitionCommand::MoveDestinationType::VOLUME; break; - case PartDestinationType::TABLE: + case DataDestinationType::TABLE: res.move_destination_type = PartitionCommand::MoveDestinationType::TABLE; res.to_database = command_ast->to_database; res.to_table = command_ast->to_table; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 95be1275d3d..47beeec196f 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -129,7 +129,7 @@ QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context { if (destination_id) { - auto destination = DatabaseCatalog::instance().getTable(destination_id); + auto destination = DatabaseCatalog::instance().getTable(destination_id, context); if (destination.get() == this) throw Exception("Destination table is myself. Read will cause infinite loop.", ErrorCodes::INFINITE_LOOP); @@ -153,7 +153,7 @@ Pipes StorageBuffer::read( if (destination_id) { - auto destination = DatabaseCatalog::instance().getTable(destination_id); + auto destination = DatabaseCatalog::instance().getTable(destination_id, context); if (destination.get() == this) throw Exception("Destination table is myself. Read will cause infinite loop.", ErrorCodes::INFINITE_LOOP); @@ -334,7 +334,7 @@ public: StoragePtr destination; if (storage.destination_id) { - destination = DatabaseCatalog::instance().tryGetTable(storage.destination_id); + destination = DatabaseCatalog::instance().tryGetTable(storage.destination_id, storage.global_context); if (destination.get() == &storage) throw Exception("Destination table is myself. Write will cause infinite loop.", ErrorCodes::INFINITE_LOOP); } @@ -434,7 +434,7 @@ bool StorageBuffer::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, con if (!destination_id) return false; - auto destination = DatabaseCatalog::instance().getTable(destination_id); + auto destination = DatabaseCatalog::instance().getTable(destination_id, query_context); if (destination.get() == this) throw Exception("Destination table is myself. Read will cause infinite loop.", ErrorCodes::INFINITE_LOOP); @@ -602,7 +602,7 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds, bool loc */ try { - writeBlockToDestination(block_to_write, DatabaseCatalog::instance().tryGetTable(destination_id)); + writeBlockToDestination(block_to_write, DatabaseCatalog::instance().tryGetTable(destination_id, global_context)); } catch (...) { @@ -739,7 +739,7 @@ void StorageBuffer::checkAlterIsPossible(const AlterCommands & commands, const S std::optional StorageBuffer::totalRows() const { std::optional underlying_rows; - auto underlying = DatabaseCatalog::instance().tryGetTable(destination_id); + auto underlying = DatabaseCatalog::instance().tryGetTable(destination_id, global_context); if (underlying) underlying_rows = underlying->totalRows(); diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 6d6c1f66569..10a4482c801 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -75,7 +75,7 @@ public: { if (!destination_id) return false; - auto dest = DatabaseCatalog::instance().tryGetTable(destination_id); + auto dest = DatabaseCatalog::instance().tryGetTable(destination_id, global_context); if (dest && dest.get() != this) return dest->supportsPrewhere(); return false; diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index a05872234de..6713519151f 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -1,7 +1,15 @@ #include +#include +#include +#include +#include +#include + namespace DB { + + StorageInMemoryMetadata::StorageInMemoryMetadata( const ColumnsDescription & columns_, const IndicesDescription & indices_, @@ -79,4 +87,56 @@ StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemo return *this; } + +namespace +{ + ASTPtr extractKeyExpressionList(const ASTPtr & node) + { + if (!node) + return std::make_shared(); + + const auto * expr_func = node->as(); + + if (expr_func && expr_func->name == "tuple") + { + /// Primary key is specified in tuple, extract its arguments. + return expr_func->arguments->clone(); + } + else + { + /// Primary key consists of one column. + auto res = std::make_shared(); + res->children.push_back(node); + return res; + } + } +} + +StorageMetadataKeyField StorageMetadataKeyField::getKeyFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context) +{ + StorageMetadataKeyField result; + result.definition_ast = definition_ast; + result.expression_list_ast = extractKeyExpressionList(definition_ast); + + if (result.expression_list_ast->children.empty()) + return result; + + const auto & children = result.expression_list_ast->children; + for (const auto & child : children) + result.column_names.emplace_back(child->getColumnName()); + + { + auto expr = result.expression_list_ast->clone(); + auto syntax_result = SyntaxAnalyzer(context).analyze(expr, columns.getAllPhysical()); + result.expression = ExpressionAnalyzer(expr, syntax_result, context).getActions(true); + result.sample_block = result.expression->getSampleBlock(); + } + + for (size_t i = 0; i < result.sample_block.columns(); ++i) + result.data_types.emplace_back(result.sample_block.getByPosition(i).type); + + return result; +} + + } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 361b6be009b..1733cb4a308 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -43,4 +43,33 @@ struct StorageInMemoryMetadata StorageInMemoryMetadata & operator=(const StorageInMemoryMetadata & other); }; +/// Common structure for primary, partition and other storage keys +struct StorageMetadataKeyField +{ + /// User defined AST in CREATE/ALTER query. This field may be empty, but key + /// can exists because some of them maybe set implicitly (for example, + /// primary key in merge tree can be part of sorting key) + ASTPtr definition_ast; + + /// ASTExpressionList with key fields, example: (x, toStartOfMonth(date))). + ASTPtr expression_list_ast; + + /// Expression from expression_list_ast created by ExpressionAnalyzer. Useful, + /// when you need to get required columns for key, example: a, date, b. + ExpressionActionsPtr expression; + + /// Sample block with key columns (names, types, empty column) + Block sample_block; + + /// Column names in key definition, example: x, toStartOfMonth(date), a * b. + Names column_names; + + /// Types from sample block ordered in columns order. + DataTypes data_types; + + /// Parse key structure from key definition. Requires all columns, available + /// in storage. + static StorageMetadataKeyField getKeyFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context); +}; + } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index a565c8c6260..2afa7f7d713 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -149,7 +149,7 @@ StorageMaterializedView::StorageMaterializedView( create_interpreter.setInternal(true); create_interpreter.execute(); - target_table_id = DatabaseCatalog::instance().getTable({manual_create_query->database, manual_create_query->table})->getStorageID(); + target_table_id = DatabaseCatalog::instance().getTable({manual_create_query->database, manual_create_query->table}, global_context)->getStorageID(); } if (!select_table_id.empty()) @@ -204,7 +204,7 @@ BlockOutputStreamPtr StorageMaterializedView::write(const ASTPtr & query, const static void executeDropQuery(ASTDropQuery::Kind kind, Context & global_context, const StorageID & target_table_id) { - if (DatabaseCatalog::instance().tryGetTable(target_table_id)) + if (DatabaseCatalog::instance().tryGetTable(target_table_id, global_context)) { /// We create and execute `drop` query for internal table. auto drop_query = std::make_shared(); @@ -362,12 +362,12 @@ void StorageMaterializedView::shutdown() StoragePtr StorageMaterializedView::getTargetTable() const { - return DatabaseCatalog::instance().getTable(target_table_id); + return DatabaseCatalog::instance().getTable(target_table_id, global_context); } StoragePtr StorageMaterializedView::tryGetTargetTable() const { - return DatabaseCatalog::instance().tryGetTable(target_table_id); + return DatabaseCatalog::instance().tryGetTable(target_table_id, global_context); } Strings StorageMaterializedView::getDataPaths() const diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index fb6d88c8d33..6f76ae4b7b3 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -62,7 +62,7 @@ StorageMerge::StorageMerge( template StoragePtr StorageMerge::getFirstTable(F && predicate) const { - auto iterator = getDatabaseIterator(); + auto iterator = getDatabaseIterator(global_context); while (iterator->isValid()) { @@ -110,7 +110,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & { auto stage_in_source_tables = QueryProcessingStage::FetchColumns; - DatabaseTablesIteratorPtr iterator = getDatabaseIterator(); + DatabaseTablesIteratorPtr iterator = getDatabaseIterator(context); size_t selected_table_size = 0; @@ -329,7 +329,7 @@ Pipes StorageMerge::createSources(const SelectQueryInfo & query_info, const Quer StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const String & query_id, const Settings & settings) const { StorageListWithLocks selected_tables; - auto iterator = getDatabaseIterator(); + auto iterator = getDatabaseIterator(global_context); while (iterator->isValid()) { @@ -349,7 +349,7 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( const ASTPtr & query, bool has_virtual_column, const String & query_id, const Settings & settings) const { StorageListWithLocks selected_tables; - DatabaseTablesIteratorPtr iterator = getDatabaseIterator(); + DatabaseTablesIteratorPtr iterator = getDatabaseIterator(global_context); auto virtual_column = ColumnString::create(); @@ -384,12 +384,12 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( } -DatabaseTablesIteratorPtr StorageMerge::getDatabaseIterator() const +DatabaseTablesIteratorPtr StorageMerge::getDatabaseIterator(const Context & context) const { checkStackSize(); auto database = DatabaseCatalog::instance().getDatabase(source_database); auto table_name_match = [this](const String & table_name_) { return table_name_regexp.match(table_name_); }; - return database->getTablesIterator(table_name_match); + return database->getTablesIterator(context, table_name_match); } diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index ebe40109d84..401c5facf0c 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -61,7 +61,7 @@ private: template StoragePtr getFirstTable(F && predicate) const; - DatabaseTablesIteratorPtr getDatabaseIterator() const; + DatabaseTablesIteratorPtr getDatabaseIterator(const Context & context) const; NamesAndTypesList getVirtuals() const override; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index f6114d709b6..c6af41cc163 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -990,7 +990,7 @@ void StorageMergeTree::alterPartition(const ASTPtr & query, const PartitionComma case PartitionCommand::MoveDestinationType::TABLE: checkPartitionCanBeDropped(command.partition); String dest_database = context.resolveDatabase(command.to_database); - auto dest_storage = DatabaseCatalog::instance().getTable({dest_database, command.to_table}); + auto dest_storage = DatabaseCatalog::instance().getTable({dest_database, command.to_table}, context); movePartitionToTable(dest_storage, command.partition, context); break; } @@ -1002,7 +1002,7 @@ void StorageMergeTree::alterPartition(const ASTPtr & query, const PartitionComma { checkPartitionCanBeDropped(command.partition); String from_database = context.resolveDatabase(command.from_database); - auto from_storage = DatabaseCatalog::instance().getTable({from_database, command.from_table}); + auto from_storage = DatabaseCatalog::instance().getTable({from_database, command.from_table}, context); replacePartitionFrom(from_storage, command.partition, command.replace, context); } break; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 618f8bb75a8..00565e777ae 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -492,11 +492,11 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column metadata.order_by_ast = tuple; } - if (!primary_key_ast) + if (!isPrimaryKeyDefined()) { /// Primary and sorting key become independent after this ALTER so we have to /// save the old ORDER BY expression as the new primary key. - metadata.primary_key_ast = order_by_ast->clone(); + metadata.primary_key_ast = getSortingKeyAST()->clone(); } } @@ -1570,7 +1570,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) auto clone_data_parts_from_source_table = [&] () -> size_t { - source_table = DatabaseCatalog::instance().tryGetTable(source_table_id); + source_table = DatabaseCatalog::instance().tryGetTable(source_table_id, global_context); if (!source_table) { LOG_DEBUG(log, "Can't use {} as source table for REPLACE PARTITION command. It does not exist.", source_table_id.getNameForLogs()); @@ -3485,7 +3485,7 @@ void StorageReplicatedMergeTree::alterPartition(const ASTPtr & query, const Part case PartitionCommand::MoveDestinationType::TABLE: checkPartitionCanBeDropped(command.partition); String dest_database = query_context.resolveDatabase(command.to_database); - auto dest_storage = DatabaseCatalog::instance().getTable({dest_database, command.to_table}); + auto dest_storage = DatabaseCatalog::instance().getTable({dest_database, command.to_table}, query_context); movePartitionToTable(dest_storage, command.partition, query_context); break; } @@ -3496,7 +3496,7 @@ void StorageReplicatedMergeTree::alterPartition(const ASTPtr & query, const Part { checkPartitionCanBeDropped(command.partition); String from_database = query_context.resolveDatabase(command.from_database); - auto from_storage = DatabaseCatalog::instance().getTable({from_database, command.from_table}); + auto from_storage = DatabaseCatalog::instance().getTable({from_database, command.from_table}, query_context); replacePartitionFrom(from_storage, command.partition, command.replace, query_context); } break; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index b8710de507b..22fe4656f17 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include @@ -228,18 +228,18 @@ Strings listFilesWithRegexpMatching(Aws::S3::S3Client & client, const S3::URI & return {globbed_uri.key}; } - Aws::S3::Model::ListObjectsRequest request; + Aws::S3::Model::ListObjectsV2Request request; request.SetBucket(globbed_uri.bucket); request.SetPrefix(key_prefix); re2::RE2 matcher(makeRegexpPatternFromGlobs(globbed_uri.key)); Strings result; - Aws::S3::Model::ListObjectsOutcome outcome; + Aws::S3::Model::ListObjectsV2Outcome outcome; int page = 0; do { ++page; - outcome = client.ListObjects(request); + outcome = client.ListObjectsV2(request); if (!outcome.IsSuccess()) { throw Exception("Could not list objects in bucket " + quoteString(request.GetBucket()) @@ -256,7 +256,7 @@ Strings listFilesWithRegexpMatching(Aws::S3::S3Client & client, const S3::URI & result.emplace_back(std::move(key)); } - request.SetMarker(outcome.GetResult().GetNextMarker()); + request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); } while (outcome.GetResult().GetIsTruncated()); diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 636c7f9d64d..01c85c5238a 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -19,6 +19,8 @@ #include #include #include +#include +#include namespace DB @@ -62,29 +64,42 @@ Pipes StorageView::read( if (context.getSettings().enable_optimize_predicate_expression) current_inner_query = getRuntimeViewQuery(*query_info.query->as(), context); - QueryPipeline pipeline; InterpreterSelectWithUnionQuery interpreter(current_inner_query, context, {}, column_names); /// FIXME res may implicitly use some objects owned be pipeline, but them will be destructed after return if (query_info.force_tree_shaped_pipeline) { + QueryPipeline pipeline; BlockInputStreams streams = interpreter.executeWithMultipleStreams(pipeline); + + for (auto & stream : streams) + { + stream = std::make_shared(stream); + stream = std::make_shared(stream, getSampleBlockForColumns(column_names), + ConvertingBlockInputStream::MatchColumnsMode::Name); + } + for (auto & stream : streams) pipes.emplace_back(std::make_shared(std::move(stream))); } else - /// TODO: support multiple streams here. Need more general interface than pipes. - pipes.emplace_back(interpreter.executeWithProcessors().getPipe()); - - /// It's expected that the columns read from storage are not constant. - /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery. - for (auto & pipe : pipes) { - pipe.addSimpleTransform(std::make_shared(pipe.getHeader())); + auto pipeline = interpreter.executeWithProcessors(); + + /// It's expected that the columns read from storage are not constant. + /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery. + pipeline.addSimpleTransform([](const Block & header) + { + return std::make_shared(header); + }); /// And also convert to expected structure. - pipe.addSimpleTransform(std::make_shared( - pipe.getHeader(), getSampleBlockForColumns(column_names), - ConvertingTransform::MatchColumnsMode::Name)); + pipeline.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, getSampleBlockForColumns(column_names), + ConvertingTransform::MatchColumnsMode::Name); + }); + + pipes = std::move(pipeline).getPipes(); } return pipes; diff --git a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 3691ffb508d..9f73c00d22b 100644 --- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -53,6 +53,7 @@ const char * auto_config_build[] "USE_SSL", "@USE_SSL@", "USE_HYPERSCAN", "@USE_HYPERSCAN@", "USE_SIMDJSON", "@USE_SIMDJSON@", + "USE_GRPC", "@USE_GRPC@", nullptr, nullptr }; diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index ab824fc8bdc..8eb8856512e 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -301,7 +301,7 @@ Pipes StorageSystemColumns::read( const DatabasePtr database = databases.at(database_name); offsets[i] = i ? offsets[i - 1] : 0; - for (auto iterator = database->getTablesIterator(); iterator->isValid(); iterator->next()) + for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next()) { const String & table_name = iterator->name(); storages.emplace(std::piecewise_construct, diff --git a/src/Storages/System/StorageSystemGraphite.cpp b/src/Storages/System/StorageSystemGraphite.cpp index cde75aa5550..bfa25a99838 100644 --- a/src/Storages/System/StorageSystemGraphite.cpp +++ b/src/Storages/System/StorageSystemGraphite.cpp @@ -25,7 +25,7 @@ NamesAndTypesList StorageSystemGraphite::getNamesAndTypes() /* * Looking for (Replicated)*GraphiteMergeTree and get all configuration parameters for them */ -static StorageSystemGraphite::Configs getConfigs() +static StorageSystemGraphite::Configs getConfigs(const Context & context) { const Databases databases = DatabaseCatalog::instance().getDatabases(); StorageSystemGraphite::Configs graphite_configs; @@ -36,7 +36,7 @@ static StorageSystemGraphite::Configs getConfigs() if (db.second->getEngineName() == "Lazy") continue; - for (auto iterator = db.second->getTablesIterator(); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) { const auto & table = iterator->table(); @@ -71,9 +71,9 @@ static StorageSystemGraphite::Configs getConfigs() return graphite_configs; } -void StorageSystemGraphite::fillData(MutableColumns & res_columns, const Context &, const SelectQueryInfo &) const +void StorageSystemGraphite::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const { - Configs graphite_configs = getConfigs(); + Configs graphite_configs = getConfigs(context); for (const auto & config : graphite_configs) { diff --git a/src/Storages/System/StorageSystemMutations.cpp b/src/Storages/System/StorageSystemMutations.cpp index d4a262860dc..685565d82e1 100644 --- a/src/Storages/System/StorageSystemMutations.cpp +++ b/src/Storages/System/StorageSystemMutations.cpp @@ -51,7 +51,7 @@ void StorageSystemMutations::fillData(MutableColumns & res_columns, const Contex const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, db.first); - for (auto iterator = db.second->getTablesIterator(); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) { if (!dynamic_cast(iterator->table().get())) continue; diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 6356e6d699e..3b97cbb2d9b 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -111,7 +111,7 @@ StoragesInfoStream::StoragesInfoStream(const SelectQueryInfo & query_info, const const DatabasePtr database = databases.at(database_name); offsets[i] = i ? offsets[i - 1] : 0; - for (auto iterator = database->getTablesIterator(); iterator->isValid(); iterator->next()) + for (auto iterator = database->getTablesIterator(context); iterator->isValid(); iterator->next()) { String table_name = iterator->name(); StoragePtr storage = iterator->table(); diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 734d0098d8d..251b45e44b6 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -76,7 +76,7 @@ Pipes StorageSystemReplicas::read( if (db.second->getEngineName() == "Lazy") continue; const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, db.first); - for (auto iterator = db.second->getTablesIterator(); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) { if (!dynamic_cast(iterator->table().get())) continue; diff --git a/src/Storages/System/StorageSystemReplicationQueue.cpp b/src/Storages/System/StorageSystemReplicationQueue.cpp index a6f0af8cc6c..2c188cf3734 100644 --- a/src/Storages/System/StorageSystemReplicationQueue.cpp +++ b/src/Storages/System/StorageSystemReplicationQueue.cpp @@ -60,7 +60,7 @@ void StorageSystemReplicationQueue::fillData(MutableColumns & res_columns, const const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, db.first); - for (auto iterator = db.second->getTablesIterator(); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(context); iterator->isValid(); iterator->next()) { if (!dynamic_cast(iterator->table().get())) continue; diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index d6951d8467f..2bf6595bf53 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -232,7 +232,7 @@ protected: const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name); if (!tables_it || !tables_it->isValid()) - tables_it = database->getTablesIterator(); + tables_it = database->getTablesIterator(context); const bool need_lock_structure = needLockStructure(database, getPort().getHeader()); @@ -331,7 +331,7 @@ protected: if (columns_mask[src_index] || columns_mask[src_index + 1]) { - ASTPtr ast = database->tryGetCreateTableQuery(table_name); + ASTPtr ast = database->tryGetCreateTableQuery(table_name, context); if (columns_mask[src_index++]) res_columns[res_index++]->insert(ast ? queryToString(ast) : ""); @@ -372,7 +372,7 @@ protected: if (columns_mask[src_index++]) { assert(table != nullptr); - if ((expression_ptr = table->getSortingKeyAST())) + if ((expression_ptr = table->getSortingKey().expression_list_ast)) res_columns[res_index++]->insert(queryToString(expression_ptr)); else res_columns[res_index++]->insertDefault(); @@ -381,7 +381,7 @@ protected: if (columns_mask[src_index++]) { assert(table != nullptr); - if ((expression_ptr = table->getPrimaryKeyAST())) + if ((expression_ptr = table->getPrimaryKey().expression_list_ast)) res_columns[res_index++]->insert(queryToString(expression_ptr)); else res_columns[res_index++]->insertDefault(); diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index d0e042d054f..e0755fe59ab 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -15,12 +16,26 @@ namespace DB { +namespace +{ + DataTypeEnum8::Values getAuthenticationTypeEnumValues() + { + DataTypeEnum8::Values enum_values; + for (auto type : ext::range(Authentication::MAX_TYPE)) + enum_values.emplace_back(Authentication::TypeInfo::get(type).name, static_cast(type)); + return enum_values; + } +} + + NamesAndTypesList StorageSystemUsers::getNamesAndTypes() { NamesAndTypesList names_and_types{ {"name", std::make_shared()}, {"id", std::make_shared()}, {"storage", std::make_shared()}, + {"auth_type", std::make_shared(getAuthenticationTypeEnumValues())}, + {"auth_params", std::make_shared(std::make_shared())}, {"host_ip", std::make_shared(std::make_shared())}, {"host_names", std::make_shared(std::make_shared())}, {"host_names_regexp", std::make_shared(std::make_shared())}, @@ -43,6 +58,9 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, const Context & auto & column_name = assert_cast(*res_columns[column_index++]); auto & column_id = assert_cast(*res_columns[column_index++]).getData(); auto & column_storage = assert_cast(*res_columns[column_index++]); + auto & column_auth_type = assert_cast(*res_columns[column_index++]).getData(); + auto & column_auth_params = assert_cast(assert_cast(*res_columns[column_index]).getData()); + auto & column_auth_params_offsets = assert_cast(*res_columns[column_index++]).getOffsets(); auto & column_host_ip = assert_cast(assert_cast(*res_columns[column_index]).getData()); auto & column_host_ip_offsets = assert_cast(*res_columns[column_index++]).getOffsets(); auto & column_host_names = assert_cast(assert_cast(*res_columns[column_index]).getData()); @@ -60,12 +78,15 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, const Context & auto add_row = [&](const String & name, const UUID & id, const String & storage_name, + const Authentication & authentication, const AllowedClientHosts & allowed_hosts, const ExtendedRoleSet & default_roles) { column_name.insertData(name.data(), name.length()); column_id.push_back(id); column_storage.insertData(storage_name.data(), storage_name.length()); + column_auth_type.push_back(static_cast(authentication.getType())); + column_auth_params_offsets.push_back(column_auth_params.size()); if (allowed_hosts.containsAnyHost()) { @@ -128,7 +149,7 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, const Context & if (!storage) continue; - add_row(user->getName(), id, storage->getStorageName(), user->allowed_client_hosts, user->default_roles); + add_row(user->getName(), id, storage->getStorageName(), user->authentication, user->allowed_client_hosts, user->default_roles); } } diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp new file mode 100644 index 00000000000..da9691aab4a --- /dev/null +++ b/src/Storages/TTLDescription.cpp @@ -0,0 +1,198 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int BAD_TTL_EXPRESSION; +} + +namespace +{ + +void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const String & result_column_name) +{ + for (const auto & action : ttl_expression->getActions()) + { + if (action.type == ExpressionAction::APPLY_FUNCTION) + { + IFunctionBase & func = *action.function_base; + if (!func.isDeterministic()) + throw Exception( + "TTL expression cannot contain non-deterministic functions, " + "but contains function " + + func.getName(), + ErrorCodes::BAD_ARGUMENTS); + } + } + + const auto & result_column = ttl_expression->getSampleBlock().getByName(result_column_name); + + if (!typeid_cast(result_column.type.get()) + && !typeid_cast(result_column.type.get())) + { + throw Exception( + "TTL expression result column should have DateTime or Date type, but has " + result_column.type->getName(), + ErrorCodes::BAD_TTL_EXPRESSION); + } +} + +} + +TTLDescription TTLDescription::getTTLFromAST( + const ASTPtr & definition_ast, + const ColumnsDescription & columns, + const Context & context, + const StorageMetadataKeyField & primary_key) +{ + TTLDescription result; + const auto * ttl_element = definition_ast->as(); + + /// First child is expression: `TTL expr TO DISK` + if (ttl_element != nullptr) + result.expression_ast = ttl_element->children.front()->clone(); + else /// It's columns TTL without any additions, just copy it + result.expression_ast = definition_ast->clone(); + + auto ttl_ast = result.expression_ast->clone(); + auto syntax_analyzer_result = SyntaxAnalyzer(context).analyze(ttl_ast, columns.getAllPhysical()); + result.expression = ExpressionAnalyzer(ttl_ast, syntax_analyzer_result, context).getActions(false); + result.result_column = ttl_ast->getColumnName(); + + if (ttl_element == nullptr) /// columns TTL + { + result.destination_type = DataDestinationType::DELETE; + result.mode = TTLMode::DELETE; + } + else /// rows TTL + { + result.destination_type = ttl_element->destination_type; + result.destination_name = ttl_element->destination_name; + result.mode = ttl_element->mode; + + if (ttl_element->mode == TTLMode::DELETE) + { + if (ASTPtr where_expr_ast = ttl_element->where()) + { + auto where_syntax_result = SyntaxAnalyzer(context).analyze(where_expr_ast, columns.getAllPhysical()); + result.where_expression = ExpressionAnalyzer(where_expr_ast, where_syntax_result, context).getActions(false); + result.where_result_column = where_expr_ast->getColumnName(); + } + } + else if (ttl_element->mode == TTLMode::GROUP_BY) + { + const auto & pk_columns = primary_key.column_names; + + if (ttl_element->group_by_key.size() > pk_columns.size()) + throw Exception("TTL Expression GROUP BY key should be a prefix of primary key", ErrorCodes::BAD_TTL_EXPRESSION); + + NameSet primary_key_columns_set(pk_columns.begin(), pk_columns.end()); + NameSet aggregation_columns_set; + + for (const auto & column : primary_key.expression->getRequiredColumns()) + primary_key_columns_set.insert(column); + + for (size_t i = 0; i < ttl_element->group_by_key.size(); ++i) + { + if (ttl_element->group_by_key[i]->getColumnName() != pk_columns[i]) + throw Exception( + "TTL Expression GROUP BY key should be a prefix of primary key", + ErrorCodes::BAD_TTL_EXPRESSION); + } + + for (const auto & [name, value] : ttl_element->group_by_aggregations) + { + if (primary_key_columns_set.count(name)) + throw Exception( + "Can not set custom aggregation for column in primary key in TTL Expression", + ErrorCodes::BAD_TTL_EXPRESSION); + + aggregation_columns_set.insert(name); + } + + if (aggregation_columns_set.size() != ttl_element->group_by_aggregations.size()) + throw Exception( + "Multiple aggregations set for one column in TTL Expression", + ErrorCodes::BAD_TTL_EXPRESSION); + + + result.group_by_keys = Names(pk_columns.begin(), pk_columns.begin() + ttl_element->group_by_key.size()); + + auto aggregations = ttl_element->group_by_aggregations; + + for (size_t i = 0; i < pk_columns.size(); ++i) + { + ASTPtr value = primary_key.expression_list_ast->children[i]->clone(); + + if (i >= ttl_element->group_by_key.size()) + { + ASTPtr value_max = makeASTFunction("max", value->clone()); + aggregations.emplace_back(value->getColumnName(), std::move(value_max)); + } + + if (value->as()) + { + auto syntax_result = SyntaxAnalyzer(context).analyze(value, columns.getAllPhysical(), {}, true); + auto expr_actions = ExpressionAnalyzer(value, syntax_result, context).getActions(false); + for (const auto & column : expr_actions->getRequiredColumns()) + { + if (i < ttl_element->group_by_key.size()) + { + ASTPtr expr = makeASTFunction("any", std::make_shared(column)); + aggregations.emplace_back(column, std::move(expr)); + } + else + { + ASTPtr expr = makeASTFunction("argMax", std::make_shared(column), value->clone()); + aggregations.emplace_back(column, std::move(expr)); + } + } + } + } + + for (const auto & column : columns.getAllPhysical()) + { + if (!primary_key_columns_set.count(column.name) && !aggregation_columns_set.count(column.name)) + { + ASTPtr expr = makeASTFunction("any", std::make_shared(column.name)); + aggregations.emplace_back(column.name, std::move(expr)); + } + } + + for (auto [name, value] : aggregations) + { + auto syntax_result = SyntaxAnalyzer(context).analyze(value, columns.getAllPhysical(), {}, true); + auto expr_analyzer = ExpressionAnalyzer(value, syntax_result, context); + + result.set_parts.emplace_back(TTLAggregateDescription{ + name, value->getColumnName(), expr_analyzer.getActions(false)}); + + for (const auto & descr : expr_analyzer.getAnalyzedData().aggregate_descriptions) + result.aggregate_descriptions.push_back(descr); + } + } + } + + checkTTLExpression(result.expression, result.result_column); + + + return result; +} + +} diff --git a/src/Storages/TTLDescription.h b/src/Storages/TTLDescription.h new file mode 100644 index 00000000000..99a145b8acc --- /dev/null +++ b/src/Storages/TTLDescription.h @@ -0,0 +1,98 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +/// Assignment expression in TTL with GROUP BY +struct TTLAggregateDescription +{ + /// Name of column in assignment + /// x = sum(y) + /// ^ + String column_name; + + /// Name of column on the right hand of the assignment + /// x = sum(y) + /// ^~~~~~^ + String expression_result_column_name; + + /// Expressions to calculate the value of assignment expression + ExpressionActionsPtr expression; +}; + +using TTLAggregateDescriptions = std::vector; + +/// Common struct for TTL record in storage +struct TTLDescription +{ + TTLMode mode; + + /// Expression part of TTL AST: + /// TTL d + INTERVAL 1 DAY + /// ^~~~~~~~~~~~~~~~~~~^ + ASTPtr expression_ast; + + /// Expresion actions evaluated from AST + ExpressionActionsPtr expression; + + /// Result column of this TTL expression + String result_column; + + /// WHERE part in TTL expression + /// TTL ... WHERE x % 10 == 0 and y > 5 + /// ^~~~~~~~~~~~~~~~~~~~~~^ + ExpressionActionsPtr where_expression; + + /// Name of result column from WHERE expression + String where_result_column; + + /// Names of key columns in GROUP BY expression + /// TTL ... GROUP BY toDate(d), x SET ... + /// ^~~~~~~~~~~~^ + Names group_by_keys; + + /// SET parts of TTL expression + TTLAggregateDescriptions set_parts; + + /// Aggregate descriptions for GROUP BY in TTL + AggregateDescriptions aggregate_descriptions; + + /// Destination type, only valid for table TTLs. + /// For example DISK or VOLUME + DataDestinationType destination_type; + + /// Name of destination disk or volume + String destination_name; + + /// Parse TTL structure from definition. Able to parse both column and table + /// TTLs. + static TTLDescription getTTLFromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns, const Context & context, const StorageMetadataKeyField & primary_key); +}; + +/// Mapping from column name to column TTL +using TTLColumnsDescription = std::unordered_map; +using TTLDescriptions = std::vector; + +/// Common TTL for all table. Specified after defining the table columns. +struct TTLTableDescription +{ + /// Definition. Include all parts of TTL: + /// TTL d + INTERVAL 1 day TO VOLUME 'disk1' + /// ^~~~~~~~~~~~~~~definition~~~~~~~~~~~~~~~^ + ASTPtr definition_ast; + + /// Rows removing TTL + TTLDescription rows_ttl; + + /// Moving data TTL (to other disks or volumes) + TTLDescriptions move_ttl; +}; + +} diff --git a/src/Storages/TTLMode.h b/src/Storages/TTLMode.h new file mode 100644 index 00000000000..0681f10fc17 --- /dev/null +++ b/src/Storages/TTLMode.h @@ -0,0 +1,14 @@ +#pragma once + + +namespace DB +{ + +enum class TTLMode +{ + DELETE, + MOVE, + GROUP_BY +}; + +} diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 711612cf9e5..19d1172f1ff 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -84,7 +84,7 @@ ColumnsDescription getStructureOfRemoteTableInShard( else { if (shard_info.isLocal()) - return DatabaseCatalog::instance().getTable(table_id)->getColumns(); + return DatabaseCatalog::instance().getTable(table_id, context)->getColumns(); /// Request for a table description query = "DESC TABLE " + table_id.getFullTableName(); diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index 203104801aa..fff352210e7 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -68,7 +68,7 @@ using DiskImplementations = testing::Types; TYPED_TEST_SUITE(StorageLogTest, DiskImplementations); // Returns data written to table in Values format. -std::string writeData(int rows, DB::StoragePtr & table, DB::Context & context) +std::string writeData(int rows, DB::StoragePtr & table, const DB::Context & context) { using namespace DB; @@ -104,7 +104,7 @@ std::string writeData(int rows, DB::StoragePtr & table, DB::Context & context) } // Returns all table data in Values format. -std::string readData(DB::StoragePtr & table, DB::Context & context) +std::string readData(DB::StoragePtr & table, const DB::Context & context) { using namespace DB; @@ -136,7 +136,7 @@ std::string readData(DB::StoragePtr & table, DB::Context & context) TYPED_TEST(StorageLogTest, testReadWrite) { using namespace DB; - auto context_holder = getContext(); + const auto & context_holder = getContext(); std::string data; diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index df6ee2bd0a5..bf86322a676 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -18,7 +18,7 @@ using namespace DB; /// NOTE How to do better? struct State { - Context & context; + Context context; NamesAndTypesList columns{ {"column", std::make_shared()}, {"apply_id", std::make_shared()}, @@ -27,10 +27,11 @@ struct State {"create_time", std::make_shared()}, }; - explicit State(Context & context_) : context(context_) + explicit State() + : context(getContext().context) { registerFunctions(); - DatabasePtr database = std::make_shared("test"); + DatabasePtr database = std::make_shared("test", context); database->attachTable("table", StorageMemory::create(StorageID("test", "table"), ColumnsDescription{columns}, ConstraintsDescription{})); context.makeGlobalContext(); DatabaseCatalog::instance().attachDatabase("test", database); @@ -38,6 +39,11 @@ struct State } }; +State getState() +{ + static State state; + return state; +} static void check(const std::string & query, const std::string & expected, const Context & context, const NamesAndTypesList & columns) { @@ -54,8 +60,7 @@ static void check(const std::string & query, const std::string & expected, const TEST(TransformQueryForExternalDatabase, InWithSingleElement) { - auto context_holder = getContext(); - State state(context_holder.context); + const State & state = getState(); check("SELECT column FROM test.table WHERE 1 IN (1)", R"(SELECT "column" FROM "test"."table" WHERE 1)", @@ -70,8 +75,7 @@ TEST(TransformQueryForExternalDatabase, InWithSingleElement) TEST(TransformQueryForExternalDatabase, Like) { - auto context_holder = getContext(); - State state(context_holder.context); + const State & state = getState(); check("SELECT column FROM test.table WHERE column LIKE '%hello%'", R"(SELECT "column" FROM "test"."table" WHERE "column" LIKE '%hello%')", @@ -83,8 +87,7 @@ TEST(TransformQueryForExternalDatabase, Like) TEST(TransformQueryForExternalDatabase, Substring) { - auto context_holder = getContext(); - State state(context_holder.context); + const State & state = getState(); check("SELECT column FROM test.table WHERE left(column, 10) = RIGHT(column, 10) AND SUBSTRING(column FROM 1 FOR 2) = 'Hello'", R"(SELECT "column" FROM "test"."table")", @@ -93,8 +96,7 @@ TEST(TransformQueryForExternalDatabase, Substring) TEST(TransformQueryForExternalDatabase, MultipleAndSubqueries) { - auto context_holder = getContext(); - State state(context_holder.context); + const State & state = getState(); check("SELECT column FROM test.table WHERE 1 = 1 AND toString(column) = '42' AND column = 42 AND left(column, 10) = RIGHT(column, 10) AND column IN (1, 42) AND SUBSTRING(column FROM 1 FOR 2) = 'Hello' AND column != 4", R"(SELECT "column" FROM "test"."table" WHERE 1 AND ("column" = 42) AND ("column" IN (1, 42)) AND ("column" != 4))", @@ -106,8 +108,7 @@ TEST(TransformQueryForExternalDatabase, MultipleAndSubqueries) TEST(TransformQueryForExternalDatabase, Issue7245) { - auto context_holder = getContext(); - State state(context_holder.context); + const State & state = getState(); check("select apply_id from test.table where apply_type = 2 and create_time > addDays(toDateTime('2019-01-01 01:02:03'),-7) and apply_status in (3,4)", R"(SELECT "apply_id", "apply_type", "apply_status", "create_time" FROM "test"."table" WHERE ("apply_type" = 2) AND ("create_time" > '2018-12-25 01:02:03') AND ("apply_status" IN (3, 4)))", diff --git a/src/Storages/ya.make b/src/Storages/ya.make index ffa3924d11a..8a36fad696f 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -173,6 +173,7 @@ SRCS( StorageXDBC.cpp transformQueryForExternalDatabase.cpp VirtualColumnUtils.cpp + TTLDescription.cpp ) END() diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp index 7a161c13bdc..cd924270f7c 100644 --- a/src/TableFunctions/TableFunctionMerge.cpp +++ b/src/TableFunctions/TableFunctionMerge.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes } -static NamesAndTypesList chooseColumns(const String & source_database, const String & table_name_regexp_) +static NamesAndTypesList chooseColumns(const String & source_database, const String & table_name_regexp_, const Context & context) { OptimizedRegularExpression table_name_regexp(table_name_regexp_); auto table_name_match = [&](const String & table_name) { return table_name_regexp.match(table_name); }; @@ -31,7 +31,7 @@ static NamesAndTypesList chooseColumns(const String & source_database, const Str { auto database = DatabaseCatalog::instance().getDatabase(source_database); - auto iterator = database->getTablesIterator(table_name_match); + auto iterator = database->getTablesIterator(context, table_name_match); if (iterator->isValid()) any_table = iterator->table(); @@ -69,7 +69,7 @@ StoragePtr TableFunctionMerge::executeImpl(const ASTPtr & ast_function, const Co auto res = StorageMerge::create( StorageID(getDatabaseName(), table_name), - ColumnsDescription{chooseColumns(source_database, table_name_regexp)}, + ColumnsDescription{chooseColumns(source_database, table_name_regexp, context)}, source_database, table_name_regexp, context); diff --git a/src/ya.make b/src/ya.make index 95e5914bbe7..eedd98cb178 100644 --- a/src/ya.make +++ b/src/ya.make @@ -19,6 +19,7 @@ PEERDIR( clickhouse/src/IO clickhouse/src/Parsers clickhouse/src/Processors + clickhouse/src/Server clickhouse/src/Storages clickhouse/src/TableFunctions ) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 324fd13aac2..2d155313b06 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -30,6 +30,8 @@ from contextlib import closing MESSAGES_TO_RETRY = [ "DB::Exception: ZooKeeper session has been expired", "Coordination::Exception: Connection loss", + "Operation timed out", + "ConnectionPoolWithFailover: Connection failed at try", ] @@ -234,14 +236,6 @@ def run_tests_array(all_tests_with_params): clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE) clickhouse_proc.communicate("SELECT 'Running test {suite}/{case} from pid={pid}';".format(pid = os.getpid(), case = case, suite = suite)) - if not args.no_system_log_cleanup: - clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE) - clickhouse_proc.communicate("SYSTEM FLUSH LOGS") - - for table in ['query_log', 'query_thread_log', 'trace_log', 'metric_log']: - clickhouse_proc = Popen(shlex.split(args.client), stdin=PIPE, stdout=PIPE, stderr=PIPE) - clickhouse_proc.communicate("TRUNCATE TABLE IF EXISTS system.{}".format(table)) - reference_file = os.path.join(suite_dir, name) + '.reference' stdout_file = os.path.join(suite_tmp_dir, name) + '.stdout' stderr_file = os.path.join(suite_tmp_dir, name) + '.stderr' @@ -572,7 +566,6 @@ if __name__ == '__main__': parser.add_argument('--stop', action='store_true', default=None, dest='stop', help='Stop on network errors') parser.add_argument('--order', default='desc', choices=['asc', 'desc', 'random'], help='Run order') parser.add_argument('--testname', action='store_true', default=None, dest='testname', help='Make query with test name before test run') - parser.add_argument('--no-system-log-cleanup', action='store_true', default=None, help='Do not cleanup system.*_log tables') parser.add_argument('--hung-check', action='store_true', default=False) parser.add_argument('--force-color', action='store_true', default=False) parser.add_argument('--database', help='Database for tests (random name test_XXXXXX by default)') diff --git a/tests/integration/test_authentication/test.py b/tests/integration/test_authentication/test.py index 483b59813e5..dedd5410188 100644 --- a/tests/integration/test_authentication/test.py +++ b/tests/integration/test_authentication/test.py @@ -23,6 +23,10 @@ def test_authentication_pass(): assert instance.query("SELECT currentUser()", user='sasha') == 'sasha\n' assert instance.query("SELECT currentUser()", user='masha', password='qwerty') == 'masha\n' + # 'no_password' authentication type allows to login with any password. + assert instance.query("SELECT currentUser()", user='sasha', password='something') == 'sasha\n' + assert instance.query("SELECT currentUser()", user='sasha', password='something2') == 'sasha\n' + def test_authentication_fail(): # User doesn't exist. diff --git a/tests/integration/test_default_role/__init__.py b/tests/integration/test_default_role/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_default_role/test.py b/tests/integration/test_default_role/test.py new file mode 100644 index 00000000000..2b6b4698b20 --- /dev/null +++ b/tests/integration/test_default_role/test.py @@ -0,0 +1,77 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +import re + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance('instance') + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + + instance.query("CREATE USER john") + instance.query("CREATE ROLE rx") + instance.query("CREATE ROLE ry") + + yield cluster + + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def reset_users_and_roles(): + instance.query("CREATE USER OR REPLACE john") + yield + + +def test_set_default_roles(): + assert instance.query("SHOW CURRENT ROLES", user="john") == "" + + instance.query("GRANT rx, ry TO john") + assert instance.query("SHOW CURRENT ROLES", user="john") == TSV( [['rx', 0, 1], ['ry', 0, 1]] ) + + instance.query("SET DEFAULT ROLE NONE TO john") + assert instance.query("SHOW CURRENT ROLES", user="john") == "" + + instance.query("SET DEFAULT ROLE rx TO john") + assert instance.query("SHOW CURRENT ROLES", user="john") == TSV( [['rx', 0, 1]] ) + + instance.query("SET DEFAULT ROLE ry TO john") + assert instance.query("SHOW CURRENT ROLES", user="john") == TSV( [['ry', 0, 1]] ) + + instance.query("SET DEFAULT ROLE ALL TO john") + assert instance.query("SHOW CURRENT ROLES", user="john") == TSV( [['rx', 0, 1], ['ry', 0, 1]] ) + + instance.query("SET DEFAULT ROLE ALL EXCEPT rx TO john") + assert instance.query("SHOW CURRENT ROLES", user="john") == TSV( [['ry', 0, 1]] ) + + +def test_alter_user(): + assert instance.query("SHOW CURRENT ROLES", user="john") == "" + + instance.query("GRANT rx, ry TO john") + assert instance.query("SHOW CURRENT ROLES", user="john") == TSV( [['rx', 0, 1], ['ry', 0, 1]] ) + + instance.query("ALTER USER john DEFAULT ROLE NONE") + assert instance.query("SHOW CURRENT ROLES", user="john") == "" + + instance.query("ALTER USER john DEFAULT ROLE rx") + assert instance.query("SHOW CURRENT ROLES", user="john") == TSV( [['rx', 0, 1]] ) + + instance.query("ALTER USER john DEFAULT ROLE ALL") + assert instance.query("SHOW CURRENT ROLES", user="john") == TSV( [['rx', 0, 1], ['ry', 0, 1]] ) + + instance.query("ALTER USER john DEFAULT ROLE ALL EXCEPT rx") + assert instance.query("SHOW CURRENT ROLES", user="john") == TSV( [['ry', 0, 1]] ) + + +def test_wrong_set_default_role(): + assert "There is no user `rx`" in instance.query_and_get_error("SET DEFAULT ROLE NONE TO rx") + assert "There is no user `ry`" in instance.query_and_get_error("SET DEFAULT ROLE rx TO ry") + assert "There is no role `john`" in instance.query_and_get_error("SET DEFAULT ROLE john TO john") + assert "There is no role `john`" in instance.query_and_get_error("ALTER USER john DEFAULT ROLE john") + assert "There is no role `john`" in instance.query_and_get_error("ALTER USER john DEFAULT ROLE ALL EXCEPT john") diff --git a/tests/integration/test_dictionary_allow_read_expired_keys/configs/dictionaries/cache_strings_default_settings.xml b/tests/integration/test_dictionary_allow_read_expired_keys/configs/dictionaries/cache_strings_default_settings.xml new file mode 100644 index 00000000000..11807bc1ad6 --- /dev/null +++ b/tests/integration/test_dictionary_allow_read_expired_keys/configs/dictionaries/cache_strings_default_settings.xml @@ -0,0 +1,35 @@ + + + default_string + + + dictionary_node + 9000 + default + + test + strings
+
+ + + 2 + 1 + + + + 1000 + 10000 + + + + + key + + + value + String + > + + +
+
\ No newline at end of file diff --git a/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py b/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py new file mode 100644 index 00000000000..7d762db2a6d --- /dev/null +++ b/tests/integration/test_dictionary_allow_read_expired_keys/test_default_string.py @@ -0,0 +1,61 @@ +from __future__ import print_function +import pytest +import os +import random +import string +import time + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs')) + +dictionary_node = cluster.add_instance('dictionary_node', stay_alive=True) +main_node = cluster.add_instance('main_node', main_configs=['configs/dictionaries/cache_strings_default_settings.xml']) + + +def get_random_string(string_length=8): + alphabet = string.ascii_letters + string.digits + return ''.join((random.choice(alphabet) for _ in range(string_length))) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + dictionary_node.query("CREATE DATABASE IF NOT EXISTS test;") + dictionary_node.query("DROP TABLE IF EXISTS test.strings;") + dictionary_node.query(""" + CREATE TABLE test.strings + (key UInt64, value String) + ENGINE = Memory; + """) + + values_to_insert = ", ".join(["({}, '{}')".format(1000000 + number, get_random_string()) for number in range(100)]) + dictionary_node.query("INSERT INTO test.strings VALUES {}".format(values_to_insert)) + + yield cluster + finally: + cluster.shutdown() + +# @pytest.mark.skip(reason="debugging") +def test_return_real_values(started_cluster): + assert None != dictionary_node.get_process_pid("clickhouse"), "ClickHouse must be alive" + + first_batch = """ + SELECT count(*) + FROM + ( + SELECT + arrayJoin(arrayMap(x -> (x + 1000000), range(100))) AS id, + dictGetString('default_string', 'value', toUInt64(id)) AS value + ) + WHERE value = ''; + """ + + assert TSV("0") == TSV(main_node.query(first_batch)) + + # Waiting for cache to become expired + time.sleep(5) + + assert TSV("0") == TSV(main_node.query(first_batch)) diff --git a/tests/integration/test_disk_access_storage/test.py b/tests/integration/test_disk_access_storage/test.py index 315440b4358..a47af5ad5b8 100644 --- a/tests/integration/test_disk_access_storage/test.py +++ b/tests/integration/test_disk_access_storage/test.py @@ -39,7 +39,7 @@ def test_create(): def check(): assert instance.query("SHOW CREATE USER u1") == "CREATE USER u1 SETTINGS PROFILE s1\n" - assert instance.query("SHOW CREATE USER u2") == "CREATE USER u2 HOST LOCAL DEFAULT ROLE rx\n" + assert instance.query("SHOW CREATE USER u2") == "CREATE USER u2 IDENTIFIED WITH sha256_password HOST LOCAL DEFAULT ROLE rx\n" assert instance.query("SHOW CREATE ROW POLICY p ON mydb.mytable") == "CREATE ROW POLICY p ON mydb.mytable FOR SELECT USING a < 1000 TO u1, u2\n" assert instance.query("SHOW CREATE QUOTA q") == "CREATE QUOTA q KEYED BY \\'none\\' FOR INTERVAL 1 HOUR MAX QUERIES 100 TO ALL EXCEPT rx\n" assert instance.query("SHOW GRANTS FOR u1") == "" @@ -69,7 +69,7 @@ def test_alter(): def check(): assert instance.query("SHOW CREATE USER u1") == "CREATE USER u1 SETTINGS PROFILE s1\n" - assert instance.query("SHOW CREATE USER u2") == "CREATE USER u2 HOST LOCAL DEFAULT ROLE ry\n" + assert instance.query("SHOW CREATE USER u2") == "CREATE USER u2 IDENTIFIED WITH sha256_password HOST LOCAL DEFAULT ROLE ry\n" assert instance.query("SHOW GRANTS FOR u1") == "GRANT SELECT ON mydb.mytable TO u1\n" assert instance.query("SHOW GRANTS FOR u2") == "GRANT rx, ry TO u2\n" assert instance.query("SHOW CREATE ROLE rx") == "CREATE ROLE rx SETTINGS PROFILE s2\n" diff --git a/tests/integration/test_grant_and_revoke/test.py b/tests/integration/test_grant_and_revoke/test.py index 8df68547f38..7054ce28e59 100644 --- a/tests/integration/test_grant_and_revoke/test.py +++ b/tests/integration/test_grant_and_revoke/test.py @@ -155,9 +155,9 @@ def test_introspection(): assert instance.query("SHOW ENABLED ROLES", user='A') == TSV([[ "R1", 0, 1, 1 ]]) assert instance.query("SHOW ENABLED ROLES", user='B') == TSV([[ "R2", 1, 1, 1 ]]) - assert instance.query("SELECT name, storage, host_ip, host_names, host_names_regexp, host_names_like, default_roles_all, default_roles_list, default_roles_except from system.users WHERE name IN ('A', 'B') ORDER BY name") ==\ - TSV([[ "A", "disk", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ], - [ "B", "disk", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ]]) + assert instance.query("SELECT name, storage, auth_type, auth_params, host_ip, host_names, host_names_regexp, host_names_like, default_roles_all, default_roles_list, default_roles_except from system.users WHERE name IN ('A', 'B') ORDER BY name") ==\ + TSV([[ "A", "disk", "no_password", "[]", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ], + [ "B", "disk", "no_password", "[]", "['::/0']", "[]", "[]", "[]", 1, "[]", "[]" ]]) assert instance.query("SELECT name, storage from system.roles WHERE name IN ('R1', 'R2') ORDER BY name") ==\ TSV([[ "R1", "disk" ], diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 87f0b4ec3be..cbfeb9dac3a 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -360,7 +360,6 @@ def test_max_data_part_size(start_cluster, name, engine): finally: node1.query("DROP TABLE IF EXISTS {}".format(name)) -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("mt_with_overflow","MergeTree()"), ("replicated_mt_with_overflow","ReplicatedMergeTree('/clickhouse/replicated_mt_with_overflow', '1')",), @@ -455,7 +454,6 @@ def test_background_move(start_cluster, name, engine): finally: node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("stopped_moving_mt","MergeTree()"), ("stopped_moving_replicated_mt","ReplicatedMergeTree('/clickhouse/stopped_moving_replicated_mt', '1')",), @@ -722,7 +720,6 @@ def produce_alter_move(node, name): pass -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("concurrently_altering_mt","MergeTree()"), ("concurrently_altering_replicated_mt","ReplicatedMergeTree('/clickhouse/concurrently_altering_replicated_mt', '1')",), @@ -776,7 +773,6 @@ def test_concurrent_alter_move(start_cluster, name, engine): finally: node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("concurrently_dropping_mt","MergeTree()"), ("concurrently_dropping_replicated_mt","ReplicatedMergeTree('/clickhouse/concurrently_dropping_replicated_mt', '1')",), @@ -905,8 +901,6 @@ def test_mutate_to_another_disk(start_cluster, name, engine): finally: node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) - -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("alter_modifying_mt","MergeTree()"), ("replicated_alter_modifying_mt","ReplicatedMergeTree('/clickhouse/replicated_alter_modifying_mt', '1')",), @@ -939,7 +933,11 @@ def test_concurrent_alter_modify(start_cluster, name, engine): def alter_modify(num): for i in range(num): column_type = random.choice(["UInt64", "String"]) - node1.query("ALTER TABLE {} MODIFY COLUMN number {}".format(name, column_type)) + try: + node1.query("ALTER TABLE {} MODIFY COLUMN number {}".format(name, column_type)) + except: + if "Replicated" not in engine: + raise insert(100) diff --git a/tests/integration/test_row_policy/test.py b/tests/integration/test_row_policy/test.py index a1884d059c7..71496c6dbf2 100644 --- a/tests/integration/test_row_policy/test.py +++ b/tests/integration/test_row_policy/test.py @@ -42,8 +42,13 @@ def started_cluster(): CREATE TABLE mydb.`.filtered_table4` (a UInt8, b UInt8, c UInt16 ALIAS a + b) ENGINE MergeTree ORDER BY a; INSERT INTO mydb.`.filtered_table4` values (0, 0), (0, 1), (1, 0), (1, 1); + + CREATE TABLE mydb.local (a UInt8, b UInt8) ENGINE MergeTree ORDER BY a; ''') + node.query("INSERT INTO mydb.local values (2, 0), (2, 1), (1, 0), (1, 1)") + node2.query("INSERT INTO mydb.local values (3, 0), (3, 1), (1, 0), (1, 1)") + yield cluster finally: @@ -122,6 +127,17 @@ def test_single_table_name(): assert node.query("SELECT a + b = 1 FROM mydb.filtered_table3") == TSV([[1], [1]]) +def test_policy_from_users_xml_affects_only_user_assigned(): + assert node.query("SELECT * FROM mydb.filtered_table1") == TSV([[1,0], [1, 1]]) + assert node.query("SELECT * FROM mydb.filtered_table1", user="another") == TSV([[0, 0], [0, 1], [1, 0], [1, 1]]) + + assert node.query("SELECT * FROM mydb.filtered_table2") == TSV([[0, 0, 0, 0], [0, 0, 6, 0]]) + assert node.query("SELECT * FROM mydb.filtered_table2", user="another") == TSV([[0, 0, 0, 0], [0, 0, 6, 0], [1, 2, 3, 4], [4, 3, 2, 1]]) + + assert node.query("SELECT * FROM mydb.local") == TSV([[1,0], [1, 1], [2, 0], [2, 1]]) + assert node.query("SELECT * FROM mydb.local", user="another") == TSV([[1, 0], [1, 1]]) + + def test_custom_table_name(): copy_policy_xml('multiple_tags_with_table_names.xml') assert node.query("SELECT * FROM mydb.table") == TSV([[1, 0], [1, 1]]) @@ -286,9 +302,5 @@ def test_miscellaneous_engines(): # DistributedMergeTree node.query("DROP TABLE IF EXISTS mydb.not_filtered_table") node.query("CREATE TABLE mydb.not_filtered_table (a UInt8, b UInt8) ENGINE Distributed('test_local_cluster', mydb, local)") - node.query("CREATE TABLE mydb.local (a UInt8, b UInt8) ENGINE MergeTree ORDER BY a") - node2.query("CREATE TABLE mydb.local (a UInt8, b UInt8) ENGINE MergeTree ORDER BY a") - node.query("INSERT INTO mydb.local values (2, 0), (2, 1), (1, 0), (1, 1)") - node2.query("INSERT INTO mydb.local values (3, 0), (3, 1), (1, 0), (1, 1)") assert node.query("SELECT * FROM mydb.not_filtered_table", user="another") == TSV([[1, 0], [1, 1], [1, 0], [1, 1]]) assert node.query("SELECT sum(a), b FROM mydb.not_filtered_table GROUP BY b ORDER BY b", user="another") == TSV([[2, 0], [2, 1]]) diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 9154ad67c05..13577864870 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -246,6 +246,50 @@ def test_kafka_consumer_hang(kafka_cluster): # 'dr'||'op' to avoid self matching assert int(instance.query("select count() from system.processes where position(lower(query),'dr'||'op')>0")) == 0 +@pytest.mark.timeout(180) +def test_kafka_consumer_hang2(kafka_cluster): + + instance.query(''' + DROP TABLE IF EXISTS test.kafka; + + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'consumer_hang2', + kafka_group_name = 'consumer_hang2', + kafka_format = 'JSONEachRow'; + + CREATE TABLE test.kafka2 (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'consumer_hang2', + kafka_group_name = 'consumer_hang2', + kafka_format = 'JSONEachRow'; + ''') + + # first consumer subscribe the topic, try to poll some data, and go to rest + instance.query('SELECT * FROM test.kafka') + + # second consumer do the same leading to rebalance in the first + # consumer, try to poll some data + instance.query('SELECT * FROM test.kafka2') + +#echo 'SELECT * FROM test.kafka; SELECT * FROM test.kafka2; DROP TABLE test.kafka;' | clickhouse client -mn & +# kafka_cluster.open_bash_shell('instance') + + # first consumer has pending rebalance callback unprocessed (no poll after select) + # one of those queries was failing because of + # https://github.com/edenhill/librdkafka/issues/2077 + # https://github.com/edenhill/librdkafka/issues/2898 + instance.query('DROP TABLE test.kafka') + instance.query('DROP TABLE test.kafka2') + + + # from a user perspective: we expect no hanging 'drop' queries + # 'dr'||'op' to avoid self matching + assert int(instance.query("select count() from system.processes where position(lower(query),'dr'||'op')>0")) == 0 + + @pytest.mark.timeout(180) def test_kafka_csv_with_delimiter(kafka_cluster): instance.query(''' @@ -973,7 +1017,10 @@ def test_kafka_flush_by_block_size(kafka_cluster): time.sleep(1) - result = instance.query('SELECT count() FROM test.view') + # TODO: due to https://github.com/ClickHouse/ClickHouse/issues/11216 + # second flush happens earlier than expected, so we have 2 parts here instead of one + # flush by block size works correctly, so the feature checked by the test is working correctly + result = instance.query("SELECT count() FROM test.view WHERE _part='all_1_1_0'") # print(result) # kafka_cluster.open_bash_shell('instance') @@ -1130,6 +1177,7 @@ def test_kafka_rebalance(kafka_cluster): print(instance.query('SELECT count(), uniqExact(key), max(key) + 1 FROM test.destination')) + # Some queries to debug... # SELECT * FROM test.destination where key in (SELECT key FROM test.destination group by key having count() <> 1) # select number + 1 as key from numbers(4141) left join test.destination using (key) where test.destination.key = 0; # SELECT * FROM test.destination WHERE key between 2360 and 2370 order by key; @@ -1137,6 +1185,18 @@ def test_kafka_rebalance(kafka_cluster): # select toUInt64(0) as _partition, number + 1 as _offset from numbers(400) left join test.destination using (_partition,_offset) where test.destination.key = 0 order by _offset; # SELECT * FROM test.destination WHERE _partition = 0 and _offset between 220 and 240 order by _offset; + # CREATE TABLE test.reference (key UInt64, value UInt64) ENGINE = Kafka SETTINGS kafka_broker_list = 'kafka1:19092', + # kafka_topic_list = 'topic_with_multiple_partitions', + # kafka_group_name = 'rebalance_test_group_reference', + # kafka_format = 'JSONEachRow', + # kafka_max_block_size = 100000; + # + # CREATE MATERIALIZED VIEW test.reference_mv Engine=Log AS + # SELECT key, value, _topic,_key,_offset, _partition, _timestamp, 'reference' as _consumed_by + # FROM test.reference; + # + # select * from test.reference_mv left join test.destination using (key,_topic,_offset,_partition) where test.destination._consumed_by = ''; + result = int(instance.query('SELECT count() == uniqExact(key) FROM test.destination')) for consumer_index in range(NUMBER_OF_CONSURRENT_CONSUMERS): @@ -1333,6 +1393,41 @@ def test_commits_of_unprocessed_messages_on_drop(kafka_cluster): assert TSV(result) == TSV('{0}\t{0}\t{0}'.format(i[0]-1)), 'Missing data!' + +@pytest.mark.timeout(120) +def test_bad_reschedule(kafka_cluster): + messages = [json.dumps({'key': j+1, 'value': j+1}) for j in range(20000)] + kafka_produce('test_bad_reschedule', messages) + + instance.query(''' + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS kafka_broker_list = 'kafka1:19092', + kafka_topic_list = 'test_bad_reschedule', + kafka_group_name = 'test_bad_reschedule', + kafka_format = 'JSONEachRow', + kafka_max_block_size = 1000; + + CREATE MATERIALIZED VIEW test.destination Engine=Log AS + SELECT + key, + now() as consume_ts, + value, + _topic, + _key, + _offset, + _partition, + _timestamp + FROM test.kafka; + ''') + + while int(instance.query("SELECT count() FROM test.destination")) < 20000: + print("Waiting for consume") + time.sleep(1) + + assert int(instance.query("SELECT max(consume_ts) - min(consume_ts) FROM test.destination")) < 8 + + @pytest.mark.timeout(1200) def test_kafka_duplicates_when_commit_failed(kafka_cluster): messages = [json.dumps({'key': j+1, 'value': 'x' * 300}) for j in range(22)] diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 720701dd616..9f124507e14 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1,6 +1,7 @@ import json import logging import random +import threading import pytest @@ -278,3 +279,31 @@ def test_wrong_s3_syntax(cluster, s3_storage_args): query = "create table test_table_s3_syntax (id UInt32) ENGINE = S3({})".format(s3_storage_args) assert expected_err_msg in instance.query_and_get_error(query) + + +# https://en.wikipedia.org/wiki/One_Thousand_and_One_Nights +def test_s3_glob_scheherazade(cluster): + bucket = cluster.minio_bucket + instance = cluster.instances["dummy"] # type: ClickHouseInstance + table_format = "column1 UInt32, column2 UInt32, column3 UInt32" + max_path = "" + values = "(1, 1, 1)" + nights_per_job = 1001 // 30 + jobs = [] + for night in range(0, 1001, nights_per_job): + def add_tales(start, end): + for i in range(start, end): + path = "night_{}/tale.csv".format(i) + query = "insert into table function s3('http://{}:{}/{}/{}', 'CSV', '{}') values {}".format( + cluster.minio_host, cluster.minio_port, bucket, path, table_format, values) + run_query(instance, query) + + jobs.append(threading.Thread(target=add_tales, args=(night, min(night+nights_per_job, 1001)))) + jobs[-1].start() + + for job in jobs: + job.join() + + query = "select count(), sum(column1), sum(column2), sum(column3) from s3('http://{}:{}/{}/night_*/tale.csv', 'CSV', '{}')".format( + cluster.minio_redirect_host, cluster.minio_redirect_port, bucket, table_format) + assert run_query(instance, query).splitlines() == ["1001\t1001\t1001\t1001"] diff --git a/tests/integration/test_system_queries/test.py b/tests/integration/test_system_queries/test.py index 1761017362a..6f36a13b184 100644 --- a/tests/integration/test_system_queries/test.py +++ b/tests/integration/test_system_queries/test.py @@ -95,17 +95,25 @@ def test_RELOAD_CONFIG_AND_MACROS(started_cluster): def test_SYSTEM_FLUSH_LOGS(started_cluster): instance = cluster.instances['ch1'] + instance.query(''' + SET log_queries = 0; + SYSTEM FLUSH LOGS; + TRUNCATE TABLE system.query_log; + ''') for i in range(4): # Sleep to execute flushing from background thread at first query # by expiration of flush_interval_millisecond and test probable race condition. time.sleep(0.5) result = instance.query(''' - SET log_queries = 1; SELECT 1 FORMAT Null; SET log_queries = 0; SYSTEM FLUSH LOGS; SELECT count() FROM system.query_log;''') - instance.query('TRUNCATE TABLE system.query_log') + instance.query(''' + SET log_queries = 0; + SYSTEM FLUSH LOGS; + TRUNCATE TABLE system.query_log; + ''') assert TSV(result) == TSV('4') diff --git a/tests/integration/test_ttl_move/test.py b/tests/integration/test_ttl_move/test.py index 243268260d0..c6453beea6a 100644 --- a/tests/integration/test_ttl_move/test.py +++ b/tests/integration/test_ttl_move/test.py @@ -59,7 +59,6 @@ def get_used_disks_for_table(node, table_name, partition=None): """.format(name=table_name, suffix=suffix)).strip().split('\n') -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,alter", [ ("mt_test_rule_with_invalid_destination","MergeTree()",0), ("replicated_mt_test_rule_with_invalid_destination","ReplicatedMergeTree('/clickhouse/replicated_test_rule_with_invalid_destination', '1')",0), @@ -119,7 +118,6 @@ def test_rule_with_invalid_destination(started_cluster, name, engine, alter): node1.query("DROP TABLE IF EXISTS {}".format(name)) -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_inserts_to_disk_do_not_work","MergeTree()",0), ("replicated_mt_test_inserts_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_inserts_to_disk_do_not_work', '1')",0), @@ -149,10 +147,12 @@ def test_inserts_to_disk_work(started_cluster, name, engine, positive): assert node1.query("SELECT count() FROM {name}".format(name=name)).strip() == "10" finally: - node1.query("DROP TABLE IF EXISTS {}".format(name)) + try: + node1.query("DROP TABLE IF EXISTS {}".format(name)) + except: + pass -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_moves_to_disk_do_not_work","MergeTree()",0), ("replicated_mt_test_moves_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_do_not_work', '1')",0), @@ -171,7 +171,7 @@ def test_moves_to_disk_work(started_cluster, name, engine, positive): SETTINGS storage_policy='small_jbod_with_external' """.format(name=name, engine=engine)) - wait_expire_1 = 6 + wait_expire_1 = 12 wait_expire_2 = 4 time_1 = time.time() + wait_expire_1 time_2 = time.time() + wait_expire_1 + wait_expire_2 @@ -199,7 +199,6 @@ def test_moves_to_disk_work(started_cluster, name, engine, positive): node1.query("DROP TABLE IF EXISTS {}".format(name)) -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("mt_test_moves_to_volume_work","MergeTree()"), ("replicated_mt_test_moves_to_volume_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_volume_work', '1')"), @@ -246,7 +245,6 @@ def test_moves_to_volume_work(started_cluster, name, engine): node1.query("DROP TABLE IF EXISTS {}".format(name)) -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_inserts_to_volume_do_not_work","MergeTree()",0), ("replicated_mt_test_inserts_to_volume_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_inserts_to_volume_do_not_work', '1')",0), @@ -285,7 +283,6 @@ def test_inserts_to_volume_work(started_cluster, name, engine, positive): node1.query("DROP TABLE IF EXISTS {}".format(name)) -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("mt_test_moves_to_disk_eventually_work","MergeTree()"), ("replicated_mt_test_moves_to_disk_eventually_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_to_disk_eventually_work', '1')"), @@ -374,7 +371,6 @@ def test_replicated_download_ttl_info(started_cluster): continue -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_merges_to_disk_do_not_work","MergeTree()",0), ("replicated_mt_test_merges_to_disk_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_merges_to_disk_do_not_work', '1')",0), @@ -396,7 +392,7 @@ def test_merges_to_disk_work(started_cluster, name, engine, positive): node1.query("SYSTEM STOP MERGES {}".format(name)) node1.query("SYSTEM STOP MOVES {}".format(name)) - wait_expire_1 = 10 + wait_expire_1 = 16 wait_expire_2 = 4 time_1 = time.time() + wait_expire_1 time_2 = time.time() + wait_expire_1 + wait_expire_2 @@ -432,7 +428,6 @@ def test_merges_to_disk_work(started_cluster, name, engine, positive): node1.query("DROP TABLE IF EXISTS {}".format(name)) -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("mt_test_merges_with_full_disk_work","MergeTree()"), ("replicated_mt_test_merges_with_full_disk_work","ReplicatedMergeTree('/clickhouse/replicated_test_merges_with_full_disk_work', '1')"), @@ -499,7 +494,6 @@ def test_merges_with_full_disk_work(started_cluster, name, engine): node1.query("DROP TABLE IF EXISTS {}".format(name)) -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_moves_after_merges_do_not_work","MergeTree()",0), ("replicated_mt_test_moves_after_merges_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_after_merges_do_not_work', '1')",0), @@ -518,7 +512,7 @@ def test_moves_after_merges_work(started_cluster, name, engine, positive): SETTINGS storage_policy='small_jbod_with_external' """.format(name=name, engine=engine)) - wait_expire_1 = 10 + wait_expire_1 = 16 wait_expire_2 = 4 time_1 = time.time() + wait_expire_1 time_2 = time.time() + wait_expire_1 + wait_expire_2 @@ -552,7 +546,6 @@ def test_moves_after_merges_work(started_cluster, name, engine, positive): node1.query("DROP TABLE IF EXISTS {}".format(name)) -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine,positive,bar", [ ("mt_test_moves_after_alter_do_not_work","MergeTree()",0,"DELETE"), ("replicated_mt_test_moves_after_alter_do_not_work","ReplicatedMergeTree('/clickhouse/replicated_test_moves_after_alter_do_not_work', '1')",0,"DELETE"), @@ -658,7 +651,12 @@ def test_materialize_ttl_in_partition(started_cluster, name, engine): node1.query("DROP TABLE IF EXISTS {}".format(name)) -@pytest.mark.skip(reason="Flappy test") +def start_thread(*args, **kwargs): + thread = threading.Thread(*args, **kwargs) + thread.start() + return thread + + @pytest.mark.parametrize("name,engine,positive", [ ("mt_test_alter_multiple_ttls_positive", "MergeTree()", True), ("mt_replicated_test_alter_multiple_ttls_positive", "ReplicatedMergeTree('/clickhouse/replicated_test_alter_multiple_ttls_positive', '1')", True), @@ -689,6 +687,8 @@ limitations under the License.""" """ now = time.time() try: + sleeps = { delay : start_thread(target=time.sleep, args=(delay,)) for delay in [16, 26] } + node1.query(""" CREATE TABLE {name} ( p1 Int64, @@ -697,16 +697,16 @@ limitations under the License.""" ) ENGINE = {engine} ORDER BY tuple() PARTITION BY p1 - TTL d1 + INTERVAL 30 SECOND TO DISK 'jbod2', - d1 + INTERVAL 60 SECOND TO VOLUME 'external' + TTL d1 + INTERVAL 34 SECOND TO DISK 'jbod2', + d1 + INTERVAL 64 SECOND TO VOLUME 'external' SETTINGS storage_policy='jbods_with_external', merge_with_ttl_timeout=0 """.format(name=name, engine=engine)) node1.query(""" ALTER TABLE {name} MODIFY TTL d1 + INTERVAL 0 SECOND TO DISK 'jbod2', - d1 + INTERVAL 5 SECOND TO VOLUME 'external', - d1 + INTERVAL 10 SECOND DELETE + d1 + INTERVAL 14 SECOND TO VOLUME 'external', + d1 + INTERVAL 24 SECOND DELETE """.format(name=name)) for p in range(3): @@ -724,14 +724,14 @@ limitations under the License.""" assert node1.query("SELECT count() FROM {name}".format(name=name)).splitlines() == ["6"] - time.sleep(5) + sleeps[16].join() used_disks = get_used_disks_for_table(node1, name) assert set(used_disks) == {"external"} if positive else {"jbod1", "jbod2"} assert node1.query("SELECT count() FROM {name}".format(name=name)).splitlines() == ["6"] - time.sleep(5) + sleeps[26].join() node1.query("OPTIMIZE TABLE {name} FINAL".format(name=name)) @@ -741,7 +741,6 @@ limitations under the License.""" node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,engine", [ ("concurrently_altering_ttl_mt","MergeTree()"), ("concurrently_altering_ttl_replicated_mt","ReplicatedMergeTree('/clickhouse/concurrently_altering_ttl_replicated_mt', '1')",), @@ -792,7 +791,7 @@ def test_concurrent_alter_with_ttl_move(started_cluster, name, engine): try: node1.query("ALTER TABLE {} MOVE {mt} {mp} TO {md} {mv}".format( name, mt=move_type, mp=move_part, md=move_disk, mv=move_volume)) - except QueryRuntimeException as ex: + except QueryRuntimeException: pass for i in range(num): @@ -809,7 +808,10 @@ def test_concurrent_alter_with_ttl_move(started_cluster, name, engine): what = random.choice(["TO VOLUME 'main'", "TO VOLUME 'external'", "TO DISK 'jbod1'", "TO DISK 'jbod2'", "TO DISK 'external'"]) when = "now()+{}".format(random.randint(-1, 5)) ttls.append("{} {}".format(when, what)) - node1.query("ALTER TABLE {} MODIFY TTL {}".format(name, ", ".join(ttls))) + try: + node1.query("ALTER TABLE {} MODIFY TTL {}".format(name, ", ".join(ttls))) + except QueryRuntimeException: + pass def optimize_table(num): for i in range(num): @@ -832,7 +834,6 @@ def test_concurrent_alter_with_ttl_move(started_cluster, name, engine): finally: node1.query("DROP TABLE IF EXISTS {name}".format(name=name)) -@pytest.mark.skip(reason="Flappy test") @pytest.mark.parametrize("name,positive", [ ("test_double_move_while_select_negative", 0), ("test_double_move_while_select_positive", 1), @@ -870,6 +871,8 @@ def test_double_move_while_select(started_cluster, name, positive): node1.query("INSERT INTO {name} VALUES (3, '{string}')".format(name=name, string=get_random_string(9 * 1024 * 1024))) node1.query("INSERT INTO {name} VALUES (4, '{string}')".format(name=name, string=get_random_string(9 * 1024 * 1024))) + time.sleep(1) + # If SELECT locked old part on external, move shall fail. assert node1.query("SELECT disk_name FROM system.parts WHERE table = '{name}' AND active = 1 AND name = '{part}'" .format(name=name, part=parts[0])).splitlines() == ["jbod1" if positive else "external"] diff --git a/tests/integration/test_version_update_after_mutation/test.py b/tests/integration/test_version_update_after_mutation/test.py index 3f2063bdddb..9e3471175fa 100644 --- a/tests/integration/test_version_update_after_mutation/test.py +++ b/tests/integration/test_version_update_after_mutation/test.py @@ -6,9 +6,9 @@ from helpers.test_tools import assert_eq_with_retry cluster = ClickHouseCluster(__file__) -node1 = cluster.add_instance('node1', with_zookeeper=True, image='yandex/clickhouse-server:20.1.6.3', with_installed_binary=True, stay_alive=True) -node2 = cluster.add_instance('node2', with_zookeeper=True, image='yandex/clickhouse-server:20.1.6.3', with_installed_binary=True, stay_alive=True) -node3 = cluster.add_instance('node3', with_zookeeper=True, image='yandex/clickhouse-server:20.1.6.3', with_installed_binary=True, stay_alive=True) +node1 = cluster.add_instance('node1', with_zookeeper=True, image='yandex/clickhouse-server:20.1.10.70', with_installed_binary=True, stay_alive=True) +node2 = cluster.add_instance('node2', with_zookeeper=True, image='yandex/clickhouse-server:20.1.10.70', with_installed_binary=True, stay_alive=True) +node3 = cluster.add_instance('node3', with_zookeeper=True, image='yandex/clickhouse-server:20.1.10.70', with_installed_binary=True, stay_alive=True) @pytest.fixture(scope="module") def start_cluster(): diff --git a/tests/performance/agg_functions_min_max_any.xml b/tests/performance/agg_functions_min_max_any.xml index 4591182cdfc..9c16cb88970 100644 --- a/tests/performance/agg_functions_min_max_any.xml +++ b/tests/performance/agg_functions_min_max_any.xml @@ -35,10 +35,6 @@ select max(PageCharset) from test.hits where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null select any(PageCharset) from test.hits where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null select anyHeavy(PageCharset) from test.hits where PageCharset != '' group by intHash32(UserID) % 1000000 FORMAT Null -select min(OriginalURL) from test.hits where OriginalURL != '' group by intHash32(UserID) % 1000000 FORMAT Null -select max(OriginalURL) from test.hits where OriginalURL != '' group by intHash32(UserID) % 1000000 FORMAT Null -select any(OriginalURL) from test.hits where OriginalURL != '' group by intHash32(UserID) % 1000000 FORMAT Null -select anyHeavy(OriginalURL) from test.hits where OriginalURL != '' group by intHash32(UserID) % 1000000 FORMAT Null select min(SocialNetwork) from test.hits where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null select max(SocialNetwork) from test.hits where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null select any(SocialNetwork) from test.hits where SocialNetwork != '' group by intHash32(UserID) % 1000000 FORMAT Null diff --git a/tests/performance/functions_with_hash_tables.xml b/tests/performance/functions_with_hash_tables.xml new file mode 100644 index 00000000000..2e9a88107bd --- /dev/null +++ b/tests/performance/functions_with_hash_tables.xml @@ -0,0 +1,8 @@ + + select arrayUniq(range(1 + (number % 100) * 10)) from numbers(100000) format Null + select arrayDistinct(range(1 + (number % 100) * 10)) from numbers(100000) format Null + select arrayEnumerateUniq(range(1 + (number % 100) * 10)) from numbers(100000) format Null + select arrayIntersect(range((1 + number % 100)), range(1, (1 + number % 100) + 1)) from numbers(100000) format Null + select groupUniqArray(rand() % 100) from numbers(1000 * 1000) group by number / 1000 format Null + select entropy(number / 10) from numbers(1000 * 1000) group by number / 1000 format Null + diff --git a/tests/performance/insert_select_default_small_block.xml b/tests/performance/insert_select_default_small_block.xml new file mode 100644 index 00000000000..12e67b09d2f --- /dev/null +++ b/tests/performance/insert_select_default_small_block.xml @@ -0,0 +1,38 @@ + + 1 + + +CREATE TABLE insert_small_block_performance +( + `x` String, + `a` DEFAULT SHA256(x), + `b` DEFAULT SHA256(toString(a)), + `c` DEFAULT SHA256(toString(b)), + `d` DEFAULT SHA256(toString(c)), + `e` DEFAULT SHA256(toString(d)), + `f` DEFAULT SHA256(toString(e)), + `g` DEFAULT SHA256(toString(f)), + `h` DEFAULT SHA256(toString(g)), + `i` DEFAULT SHA256(toString(h)), + `j` DEFAULT SHA256(toString(i)), + `k` DEFAULT SHA256(toString(j)), + `l` DEFAULT SHA256(toString(k)), + `m` DEFAULT SHA256(toString(l)), + `n` DEFAULT SHA256(toString(m)), + `o` DEFAULT SHA256(toString(n)), + `p` DEFAULT SHA256(toString(o)), + `q` DEFAULT SHA256(toString(p)), + `r` DEFAULT SHA256(toString(q)), + `s` DEFAULT SHA256(toString(r)), + `t` DEFAULT SHA256(toString(s)), + `u` DEFAULT SHA256(toString(t)), + `v` DEFAULT SHA256(toString(u)), + `w` DEFAULT SHA256(toString(v)) +) +ENGINE = Null; + + + INSERT INTO insert_small_block_performance (x) SELECT toString(number) FROM numbers(10000); + + DROP TABLE IF EXISTS insert_small_block_performance + diff --git a/tests/performance/leftpad.xml b/tests/performance/leftpad.xml index 4349c6ac0e5..199f9224b89 100644 --- a/tests/performance/leftpad.xml +++ b/tests/performance/leftpad.xml @@ -6,7 +6,6 @@ - diff --git a/tests/performance/sort_radix_trivial.xml b/tests/performance/sort_radix_trivial.xml index fb3dc838a9c..096c3dd0854 100644 --- a/tests/performance/sort_radix_trivial.xml +++ b/tests/performance/sort_radix_trivial.xml @@ -1,5 +1,5 @@ - SELECT rand32() AS x FROM numbers(1000000) ORDER BY x - SELECT rand64() AS x FROM numbers(1000000) ORDER BY x - SELECT 1 / rand64() AS x FROM numbers(1000000) ORDER BY x + SELECT rand32() AS x FROM numbers(1000000) ORDER BY x FORMAT Null + SELECT rand64() AS x FROM numbers(1000000) ORDER BY x FORMAT Null + SELECT 1 / rand64() AS x FROM numbers(1000000) ORDER BY x FORMAT Null diff --git a/tests/queries/0_stateless/001283_strict_resize_bug.reference b/tests/queries/0_stateless/001283_strict_resize_bug.reference new file mode 100644 index 00000000000..e1fe7af425d --- /dev/null +++ b/tests/queries/0_stateless/001283_strict_resize_bug.reference @@ -0,0 +1 @@ +49999995000000 diff --git a/tests/queries/0_stateless/001283_strict_resize_bug.sql b/tests/queries/0_stateless/001283_strict_resize_bug.sql new file mode 100644 index 00000000000..f462f50c61f --- /dev/null +++ b/tests/queries/0_stateless/001283_strict_resize_bug.sql @@ -0,0 +1,7 @@ +drop table if exists num_10m; +create table num_10m (number UInt64) engine = MergeTree order by tuple(); +insert into num_10m select * from numbers(10000000); + +select * from (select sum(number) from num_10m union all select sum(number) from num_10m) limit 1 settings max_block_size = 1024; + +drop table if exists num_1m; diff --git a/tests/queries/0_stateless/00342_escape_sequences.reference b/tests/queries/0_stateless/00342_escape_sequences.reference index b7ecd06be78..50e0756d16f 100644 --- a/tests/queries/0_stateless/00342_escape_sequences.reference +++ b/tests/queries/0_stateless/00342_escape_sequences.reference @@ -1 +1 @@ -07080C0A0D090B5C27223FAA +07080C0A0D090B5C27225C3FAA diff --git a/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh b/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh index efb4b3569fb..5173b5f5772 100755 --- a/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh +++ b/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh @@ -47,7 +47,7 @@ SELECT threads_realtime >= threads_time_user_system_io, any(length(thread_ids)) >= 1 FROM - (SELECT * FROM system.query_log PREWHERE query='$heavy_cpu_query' WHERE type='QueryFinish' ORDER BY event_time DESC LIMIT 1) + (SELECT * FROM system.query_log PREWHERE query='$heavy_cpu_query' WHERE event_date >= today()-1 AND type=2 ORDER BY event_time DESC LIMIT 1) ARRAY JOIN ProfileEvents.Names AS PN, ProfileEvents.Values AS PV" # Check per-thread and per-query ProfileEvents consistency @@ -58,7 +58,7 @@ SELECT PN, PVq, PVt FROM SELECT PN, sum(PV) AS PVt FROM system.query_thread_log ARRAY JOIN ProfileEvents.Names AS PN, ProfileEvents.Values AS PV - WHERE query_id='$query_id' + WHERE event_date >= today()-1 AND query_id='$query_id' GROUP BY PN ) js1 ANY INNER JOIN @@ -66,7 +66,7 @@ ANY INNER JOIN SELECT PN, PV AS PVq FROM system.query_log ARRAY JOIN ProfileEvents.Names AS PN, ProfileEvents.Values AS PV - WHERE query_id='$query_id' + WHERE event_date >= today()-1 AND query_id='$query_id' ) js2 USING PN WHERE diff --git a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh index 5177cb6087c..5ae9dcf67ec 100755 --- a/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh +++ b/tests/queries/0_stateless/00731_long_merge_tree_select_opened_files.sh @@ -14,9 +14,11 @@ $CLICKHOUSE_CLIENT $settings -n -q " DROP TABLE IF EXISTS merge_tree_table; CREATE TABLE merge_tree_table (id UInt64, date Date, uid UInt32) ENGINE = MergeTree(date, id, 8192);" + $CLICKHOUSE_CLIENT $settings -q "INSERT INTO merge_tree_table SELECT (intHash64(number)) % 10000, toDate('2018-08-01'), rand() FROM system.numbers LIMIT 10000000;" -$CLICKHOUSE_CLIENT $settings -q "OPTIMIZE TABLE merge_tree_table FINAL;" +# If merge is already happening, OPTIMIZE will be noop. But we have to ensure that the data is merged. +for i in {1..100}; do $CLICKHOUSE_CLIENT $settings --optimize_throw_if_noop=1 -q "OPTIMIZE TABLE merge_tree_table FINAL;" && break; sleep 1; done # The query may open more files if query log will be flushed during the query. # To lower this chance, we also flush logs before the query. diff --git a/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.reference b/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.reference index 20076c05d5d..b2e236400b0 100644 --- a/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.reference +++ b/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.reference @@ -8,3 +8,4 @@ [0,2,4,6] [0,2,4,6] [0,2,4,6,8] +[['a']] diff --git a/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql b/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql index 9e2ef3f2ff6..76c3d485ccb 100644 --- a/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql +++ b/tests/queries/0_stateless/00752_low_cardinality_lambda_argument.sql @@ -4,3 +4,8 @@ insert into lc_lambda select range(number) from system.numbers limit 10; select arrayFilter(x -> x % 2 == 0, arr) from lc_lambda; drop table if exists lc_lambda; +drop table if exists test_array; +CREATE TABLE test_array(resources_host Array(LowCardinality(String))) ENGINE = MergeTree() ORDER BY (resources_host); +insert into test_array values (['a']); +SELECT arrayMap(i -> [resources_host[i]], arrayEnumerate(resources_host)) FROM test_array; +drop table if exists test_array; diff --git a/tests/queries/0_stateless/00825_protobuf_format_input.reference b/tests/queries/0_stateless/00825_protobuf_format_input.reference index 884cc74c4e5..0c56bc4ebf0 100644 --- a/tests/queries/0_stateless/00825_protobuf_format_input.reference +++ b/tests/queries/0_stateless/00825_protobuf_format_input.reference @@ -8,3 +8,4 @@ a7522158-3d41-4b77-ad69-6c598ee55c49 Ivan Petrov male 1980-12-29 png +7495123456 0 0 2 4 3 9 +ok diff --git a/tests/queries/0_stateless/00825_protobuf_format_input.sh b/tests/queries/0_stateless/00825_protobuf_format_input.sh index d28b70bb002..1c915bc3f24 100755 --- a/tests/queries/0_stateless/00825_protobuf_format_input.sh +++ b/tests/queries/0_stateless/00825_protobuf_format_input.sh @@ -3,7 +3,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -set -e -o pipefail +set -eo pipefail # Run the client. $CLICKHOUSE_CLIENT --multiquery <<'EOF' @@ -48,5 +48,12 @@ source $CURDIR/00825_protobuf_format_input.insh $CLICKHOUSE_CLIENT --query "SELECT * FROM in_persons_00825 ORDER BY uuid;" $CLICKHOUSE_CLIENT --query "SELECT * FROM in_squares_00825 ORDER BY number;" +# Try to input malformed data. +set +eo pipefail +echo -ne '\xe0\x80\x3f\x0b' \ + | $CLICKHOUSE_CLIENT --query="INSERT INTO in_persons_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format:Person'" 2>&1 \ + | grep -qF "Protobuf messages are corrupted" && echo "ok" || echo "fail" +set -eo pipefail + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_persons_00825;" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS in_squares_00825;" diff --git a/tests/queries/0_stateless/00926_multimatch.sql b/tests/queries/0_stateless/00926_multimatch.sql index d54e4fd2280..bf67533bb6c 100644 --- a/tests/queries/0_stateless/00926_multimatch.sql +++ b/tests/queries/0_stateless/00926_multimatch.sql @@ -89,4 +89,4 @@ SELECT [1, 2, 3, 11] = arraySort(multiMatchAllIndices('фабрикант', ['', SELECT [1] = multiMatchAllIndices(materialize('/odezhda-dlya-bega/'), ['/odezhda-dlya-bega/', 'kurtki-i-vetrovki-dlya-bega', 'futbolki-i-mayki-dlya-bega']); SELECT [] = multiMatchAllIndices(materialize('aaaa'), ['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}']); SELECT 'All tests above must return 1, all tests below return something.'; -SELECT arraySort(multiMatchAllIndices(arrayJoin(['aaaa', 'aaaaaa', 'bbbb', 'aaaaaaaaaaaaaa']), ['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}'])); +SELECT arraySort(multiMatchAllIndices(arrayJoin(['aaaa', 'aaaaaa', 'bbbb', 'aaaaaaaaaaaaaa']), ['.*aa.*aaa.*', 'aaaaaa{2}', '(aa){3}'])); diff --git a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh index 9e32c30ce20..1f7571a2404 100755 --- a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh +++ b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.sh @@ -19,7 +19,7 @@ $CLICKHOUSE_CLIENT --use_uncompressed_cache=1 --query_id="test-query-uncompresse sleep 1 $CLICKHOUSE_CLIENT --query="SYSTEM FLUSH LOGS" -$CLICKHOUSE_CLIENT --query="SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'Seek')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'ReadCompressedBytes')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'UncompressedCacheHits')] AS hit FROM system.query_log WHERE (query_id = 'test-query-uncompressed-cache') AND (type = 'QueryFinish') ORDER BY event_time DESC LIMIT 1" +$CLICKHOUSE_CLIENT --query="SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'Seek')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'ReadCompressedBytes')], ProfileEvents.Values[indexOf(ProfileEvents.Names, 'UncompressedCacheHits')] AS hit FROM system.query_log WHERE (query_id = 'test-query-uncompressed-cache') AND (type = 2) AND event_date >= yesterday() ORDER BY event_time DESC LIMIT 1" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS small_table" diff --git a/tests/queries/0_stateless/00956_sensitive_data_masking.sh b/tests/queries/0_stateless/00956_sensitive_data_masking.sh index c492fd35b89..0f76c34eaff 100755 --- a/tests/queries/0_stateless/00956_sensitive_data_masking.sh +++ b/tests/queries/0_stateless/00956_sensitive_data_masking.sh @@ -95,7 +95,7 @@ echo 7 # and finally querylog $CLICKHOUSE_CLIENT \ --server_logs_file=/dev/null \ - --query="select * from system.query_log where query like '%TOPSECRET%';" + --query="select * from system.query_log where event_time>now() - 10 and query like '%TOPSECRET%';" rm -f $tmp_file >/dev/null 2>&1 @@ -117,8 +117,8 @@ sleep 0.1; echo 9 $CLICKHOUSE_CLIENT \ --server_logs_file=/dev/null \ - --query="SELECT if( count() > 0, 'text_log non empty', 'text_log empty') FROM system.text_log WHERE message like '%find_me%'; - select * from system.text_log where message like '%TOPSECRET=TOPSECRET%';" --ignore-error --multiquery + --query="SELECT if( count() > 0, 'text_log non empty', 'text_log empty') FROM system.text_log WHERE event_time>now() - 60 and message like '%find_me%'; + select * from system.text_log where event_time>now() - 60 and message like '%TOPSECRET=TOPSECRET%';" --ignore-error --multiquery echo 'finish' rm -f $tmp_file >/dev/null 2>&1 diff --git a/tests/queries/0_stateless/00974_text_log_table_not_empty.sh b/tests/queries/0_stateless/00974_text_log_table_not_empty.sh index 149f0668bd1..c3cde4c08bb 100755 --- a/tests/queries/0_stateless/00974_text_log_table_not_empty.sh +++ b/tests/queries/0_stateless/00974_text_log_table_not_empty.sh @@ -10,7 +10,7 @@ do ${CLICKHOUSE_CLIENT} --query="SYSTEM FLUSH LOGS" sleep 0.1; -if [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() > 0 FROM system.text_log WHERE position(system.text_log.message, 'SELECT 6103') > 0") == 1 ]]; then echo 1; exit; fi; +if [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT count() > 0 FROM system.text_log WHERE position(system.text_log.message, 'SELECT 6103') > 0 AND event_date >= yesterday()") == 1 ]]; then echo 1; exit; fi; done; diff --git a/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql b/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql index e1e81614ab7..2c99ba54112 100644 --- a/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql +++ b/tests/queries/0_stateless/01070_exception_code_in_query_log_table.sql @@ -3,5 +3,5 @@ SELECT * FROM test_table_for_01070_exception_code_in_query_log_table; -- { serve CREATE TABLE test_table_for_01070_exception_code_in_query_log_table (value UInt64) ENGINE=Memory(); SELECT * FROM test_table_for_01070_exception_code_in_query_log_table; SYSTEM FLUSH LOGS; -SELECT exception_code FROM system.query_log WHERE query = 'SELECT * FROM test_table_for_01070_exception_code_in_query_log_table' ORDER BY exception_code; +SELECT exception_code FROM system.query_log WHERE query = 'SELECT * FROM test_table_for_01070_exception_code_in_query_log_table' AND event_date >= yesterday() AND event_time > now() - INTERVAL 5 MINUTE ORDER BY exception_code; DROP TABLE IF EXISTS test_table_for_01070_exception_code_in_query_log_table; diff --git a/tests/queries/0_stateless/01075_allowed_client_hosts.reference b/tests/queries/0_stateless/01075_allowed_client_hosts.reference index 73f54c6027a..3fdea9d1cda 100644 --- a/tests/queries/0_stateless/01075_allowed_client_hosts.reference +++ b/tests/queries/0_stateless/01075_allowed_client_hosts.reference @@ -8,10 +8,10 @@ CREATE USER test_user_01075 HOST LOCAL, IP \'2001:db8:11a3:9d7:1f34:8a2e:7a0:765 CREATE USER test_user_01075 HOST LOCAL CREATE USER test_user_01075 HOST NONE CREATE USER test_user_01075 HOST LIKE \'@.somesite.com\' -CREATE USER test_user_01075 HOST REGEXP \'.*.anothersite.com\' -CREATE USER test_user_01075 HOST REGEXP \'.*.anothersite.com\', \'.*.anothersite.org\' -CREATE USER test_user_01075 HOST REGEXP \'.*.anothersite2.com\', \'.*.anothersite2.org\' -CREATE USER test_user_01075 HOST REGEXP \'.*.anothersite3.com\', \'.*.anothersite3.org\' +CREATE USER test_user_01075 HOST REGEXP \'.*\\\\.anothersite\\\\.com\' +CREATE USER test_user_01075 HOST REGEXP \'.*\\\\.anothersite\\\\.com\', \'.*\\\\.anothersite\\\\.org\' +CREATE USER test_user_01075 HOST REGEXP \'.*\\\\.anothersite2\\\\.com\', \'.*\\\\.anothersite2\\\\.org\' +CREATE USER test_user_01075 HOST REGEXP \'.*\\\\.anothersite3\\\\.com\', \'.*\\\\.anothersite3\\\\.org\' CREATE USER `test_user_01075_x@localhost` HOST LOCAL CREATE USER test_user_01075_x CREATE USER `test_user_01075_x@192.168.23.15` HOST LIKE \'192.168.23.15\' diff --git a/tests/queries/0_stateless/01091_num_threads.sql b/tests/queries/0_stateless/01091_num_threads.sql index a93568fcee5..876a2d15d1a 100644 --- a/tests/queries/0_stateless/01091_num_threads.sql +++ b/tests/queries/0_stateless/01091_num_threads.sql @@ -8,13 +8,13 @@ WITH ( SELECT query_id FROM system.query_log - WHERE (query = 'SELECT 1') + WHERE (query = 'SELECT 1') AND (event_date >= (today() - 1)) ORDER BY event_time DESC LIMIT 1 ) AS id SELECT uniqExact(thread_id) FROM system.query_thread_log -WHERE (query_id = id) AND (thread_id != master_thread_id); +WHERE (event_date >= (today() - 1)) AND (query_id = id) AND (thread_id != master_thread_id); select sum(number) from numbers(1000000); SYSTEM FLUSH LOGS; @@ -23,13 +23,13 @@ WITH ( SELECT query_id FROM system.query_log - WHERE (query = 'SELECT sum(number) FROM numbers(1000000)') + WHERE (query = 'SELECT sum(number) FROM numbers(1000000)') AND (event_date >= (today() - 1)) ORDER BY event_time DESC LIMIT 1 ) AS id SELECT uniqExact(thread_id) FROM system.query_thread_log -WHERE (query_id = id) AND (thread_id != master_thread_id); +WHERE (event_date >= (today() - 1)) AND (query_id = id) AND (thread_id != master_thread_id); select sum(number) from numbers_mt(1000000); SYSTEM FLUSH LOGS; @@ -38,10 +38,10 @@ WITH ( SELECT query_id FROM system.query_log - WHERE (query = 'SELECT sum(number) FROM numbers_mt(1000000)') + WHERE (query = 'SELECT sum(number) FROM numbers_mt(1000000)') AND (event_date >= (today() - 1)) ORDER BY event_time DESC LIMIT 1 ) AS id SELECT uniqExact(thread_id) > 2 FROM system.query_thread_log -WHERE (query_id = id) AND (thread_id != master_thread_id); +WHERE (event_date >= (today() - 1)) AND (query_id = id) AND (thread_id != master_thread_id); diff --git a/tests/queries/0_stateless/01092_memory_profiler.sql b/tests/queries/0_stateless/01092_memory_profiler.sql index 980f7f73d5d..c20b5c79cdb 100644 --- a/tests/queries/0_stateless/01092_memory_profiler.sql +++ b/tests/queries/0_stateless/01092_memory_profiler.sql @@ -3,4 +3,4 @@ SET allow_introspection_functions = 1; SET memory_profiler_step = 1000000; SELECT ignore(groupArray(number), 'test memory profiler') FROM numbers(10000000); SYSTEM FLUSH LOGS; -WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE trace_type = 'Memory' AND query_id = (SELECT query_id FROM system.query_log WHERE query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1); +WITH addressToSymbol(arrayJoin(trace)) AS symbol SELECT count() > 0 FROM system.trace_log t WHERE event_date >= yesterday() AND trace_type = 'Memory' AND query_id = (SELECT query_id FROM system.query_log WHERE event_date >= yesterday() AND query LIKE '%test memory profiler%' ORDER BY event_time DESC LIMIT 1); diff --git a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference index 59c88f013dd..03ed07cf1a4 100644 --- a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference +++ b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference @@ -1,4 +1,5 @@ Instruction check fail. The CPU does not support SSSE3 instruction set. Instruction check fail. The CPU does not support SSE4.1 instruction set. Instruction check fail. The CPU does not support SSE4.2 instruction set. +Instruction check fail. The CPU does not support POPCNT instruction set. 1 diff --git a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh index 15466e1889a..5ae4f8b3dd2 100755 --- a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh +++ b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh @@ -9,8 +9,13 @@ ${CLICKHOUSE_LOCAL} --query "SELECT max(value LIKE '%sanitize%') FROM system.bui command=$(command -v ${CLICKHOUSE_LOCAL}) -qemu-x86_64-static -cpu qemu64 $command --query "SELECT 1" 2>&1 | grep -v -F "warning: TCG doesn't support requested feature" ||: -qemu-x86_64-static -cpu qemu64,+ssse3 $command --query "SELECT 1" 2>&1 | grep -v -F "warning: TCG doesn't support requested feature" ||: -qemu-x86_64-static -cpu qemu64,+ssse3,+sse4.1 $command --query "SELECT 1" 2>&1 | grep -v -F "warning: TCG doesn't support requested feature" ||: -qemu-x86_64-static -cpu qemu64,+ssse3,+sse4.1,+sse4.2 $command --query "SELECT 1" 2>&1 | grep -v -F "warning: TCG doesn't support requested feature" ||: +function run_with_cpu() +{ + qemu-x86_64-static -cpu "$@" $command --query "SELECT 1" 2>&1 | grep -v -F "warning: TCG doesn't support requested feature" ||: +} +run_with_cpu qemu64 +run_with_cpu qemu64,+ssse3 +run_with_cpu qemu64,+ssse3,+sse4.1 +run_with_cpu qemu64,+ssse3,+sse4.1,+sse4.2 +run_with_cpu qemu64,+ssse3,+sse4.1,+sse4.2,+popcnt diff --git a/tests/queries/0_stateless/01138_join_on_distributed_and_tmp.reference b/tests/queries/0_stateless/01138_join_on_distributed_and_tmp.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01138_join_on_distributed_and_tmp.sql b/tests/queries/0_stateless/01138_join_on_distributed_and_tmp.sql new file mode 100644 index 00000000000..67492e7c683 --- /dev/null +++ b/tests/queries/0_stateless/01138_join_on_distributed_and_tmp.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS foo_local; +DROP TABLE IF EXISTS foo_distributed; + +CREATE TABLE foo_local (bar UInt64) +ENGINE = MergeTree() +ORDER BY tuple(); + +CREATE TABLE foo_distributed AS foo_local +ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), foo_local); + +CREATE TEMPORARY TABLE _tmp_baz (qux UInt64); + +SELECT * FROM foo_distributed JOIN _tmp_baz ON foo_distributed.bar = _tmp_baz.qux; + +DROP TABLE foo_local; +DROP TABLE foo_distributed; diff --git a/tests/queries/0_stateless/01198_client_quota_key.sh b/tests/queries/0_stateless/01198_client_quota_key.sh index b3bc845cd06..f4b66aea6ac 100755 --- a/tests/queries/0_stateless/01198_client_quota_key.sh +++ b/tests/queries/0_stateless/01198_client_quota_key.sh @@ -3,4 +3,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh -$CLICKHOUSE_CLIENT --quota_key Hello --query_id test_quota_key --log_queries 1 --multiquery --query "SELECT 1; SYSTEM FLUSH LOGS; SELECT DISTINCT quota_key FROM system.query_log WHERE query_id = 'test_quota_key'" +$CLICKHOUSE_CLIENT --quota_key Hello --query_id test_quota_key --log_queries 1 --multiquery --query "SELECT 1; SYSTEM FLUSH LOGS; SELECT DISTINCT quota_key FROM system.query_log WHERE event_date >= yesterday() AND event_time >= now() - 300 AND query_id = 'test_quota_key'" diff --git a/tests/queries/0_stateless/01231_log_queries_min_type.sql b/tests/queries/0_stateless/01231_log_queries_min_type.sql index 565a5880b84..f2229c94a8a 100644 --- a/tests/queries/0_stateless/01231_log_queries_min_type.sql +++ b/tests/queries/0_stateless/01231_log_queries_min_type.sql @@ -2,14 +2,14 @@ set log_queries=1; select '01231_log_queries_min_type/QUERY_START'; system flush logs; -select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%'; +select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute; set log_queries_min_type='EXCEPTION_BEFORE_START'; select '01231_log_queries_min_type/EXCEPTION_BEFORE_START'; system flush logs; -select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%'; +select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute; set log_queries_min_type='EXCEPTION_WHILE_PROCESSING'; select '01231_log_queries_min_type/', max(number) from system.numbers limit 1e6 settings max_rows_to_read='100K'; -- { serverError 158; } system flush logs; -select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%'; +select count() from system.query_log where query like '%01231_log_queries_min_type/%' and query not like '%system.query_log%' and event_date = today() and event_time >= now() - interval 1 minute; diff --git a/tests/queries/0_stateless/01247_dist_on_dist_group_by_sharding_key_optimization.sql b/tests/queries/0_stateless/01247_dist_on_dist_group_by_sharding_key_optimization.sql index 7cf171c8c73..29e45ffdb80 100644 --- a/tests/queries/0_stateless/01247_dist_on_dist_group_by_sharding_key_optimization.sql +++ b/tests/queries/0_stateless/01247_dist_on_dist_group_by_sharding_key_optimization.sql @@ -1,5 +1,9 @@ -- TODO: correct testing with real unique shards +-- Avoid "Connection failed at try №1" messages. +SET send_logs_level = 'none'; +SET connect_timeout_with_failover_ms = 5000; + set optimize_distributed_group_by_sharding_key=1; drop table if exists dist_01247; diff --git a/tests/queries/0_stateless/01273_arrow.reference b/tests/queries/0_stateless/01273_arrow.reference index 0f4be2c74a0..0dc503f65e4 100644 --- a/tests/queries/0_stateless/01273_arrow.reference +++ b/tests/queries/0_stateless/01273_arrow.reference @@ -31,8 +31,6 @@ 992 991 990 -ContextLock Number of times the lock of Context was acquired or tried to acquire. This is global lock. -Query Number of queries to be interpreted and potentially executed. Does not include queries that failed to parse or were rejected due to AST size limits, quota limits or limits on the number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries. original: -128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06 -108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06 diff --git a/tests/queries/0_stateless/01273_arrow.sh b/tests/queries/0_stateless/01273_arrow.sh index f659a81f118..deb0aa20ccc 100755 --- a/tests/queries/0_stateless/01273_arrow.sh +++ b/tests/queries/0_stateless/01273_arrow.sh @@ -34,14 +34,6 @@ ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_numbers ORDER BY number DESC L ${CLICKHOUSE_CLIENT} --query="DROP TABLE arrow_numbers" -${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_events" -${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_events (event String, value UInt64, description String) ENGINE = Memory" -${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.events FORMAT Arrow" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_events FORMAT Arrow" -${CLICKHOUSE_CLIENT} --query="SELECT event, description FROM arrow_events WHERE event IN ('ContextLock', 'Query') ORDER BY event" -${CLICKHOUSE_CLIENT} --query="DROP TABLE arrow_events" - - - ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types1" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types2" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types3" @@ -61,8 +53,6 @@ ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types1 values ( -128, # max ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types1 values ( 127, 255, 32767, 65535, 2147483647, 4294967295, 9223372036854775807, 9223372036854775807, -1.032, -1.064, 'string-2', 'fixedstring-2', '2004-06-07', '2004-02-03 04:05:06')" -# 'SELECT -127,-128,-129,126,127,128,255,256,257,-32767,-32768,-32769,32766,32767,32768,65535,65536,65537, -2147483647,-2147483648,-2147483649,2147483646,2147483647,2147483648,4294967295,4294967296,4294967297, -9223372036854775807,-9223372036854775808,9223372036854775806,9223372036854775807,9223372036854775808,18446744073709551615'; - ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types1 FORMAT Arrow" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types2 FORMAT Arrow" echo original: @@ -98,7 +88,6 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types5 (int8 Nullable(Int ${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory" ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types5 values ( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT Arrow" > ${CLICKHOUSE_TMP}/arrow_all_types_5.arrow -#${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 FORMAT Arrow" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types6 FORMAT Arrow" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT Arrow" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types6 FORMAT Arrow" ${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types1 ORDER BY int8 FORMAT Arrow" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types6 FORMAT Arrow" echo dest from null: diff --git a/tests/queries/0_stateless/01273_arrow_stream.reference b/tests/queries/0_stateless/01273_arrow_stream.reference new file mode 100644 index 00000000000..0dc503f65e4 --- /dev/null +++ b/tests/queries/0_stateless/01273_arrow_stream.reference @@ -0,0 +1,60 @@ +9999 +9998 +9997 +9996 +9995 +9994 +9993 +9992 +9991 +9990 +99999 +99998 +99997 +99996 +99995 +99994 +99993 +99992 +99991 +99990 +2 +1 +0 +999 +998 +997 +996 +995 +994 +993 +992 +991 +990 +original: +-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06 +-108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06 +127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1.032 -1.064 string-2 fixedstring-2\0\0 2004-06-07 2004-02-03 04:05:06 +converted: +-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06 +-108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06 +127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1.032 -1.064 string-2 fixedstring-2\0\0 2004-06-07 2004-02-03 04:05:06 +diff: +dest: +79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 1970-01-01 06:29:04 +80 81 82 83 84 85 86 87 88 89 str02 fstr2\0\0\0\0\0\0\0\0\0\0 2005-03-04 2006-08-09 10:11:12 +min: +-128 0 0 0 0 0 0 0 -1 -1 string-1\0\0\0\0\0\0\0 fixedstring-1\0\0 2003-04-05 2003-02-03 +-108 108 8 92 -8 108 -40 -116 -1 -1 string-0\0\0\0\0\0\0\0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 +79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06 +127 -1 -1 -1 -1 -1 -1 -1 -1 -1 string-2\0\0\0\0\0\0\0 fixedstring-2\0\0 2004-06-07 2004-02-03 +max: +-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 1970-01-01 06:22:27 2003-02-03 04:05:06 +-108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 1970-01-01 06:09:16 2002-02-03 04:05:06 +80 81 82 83 84 85 86 87 88 89 str02 fstr2 2005-03-04 05:06:07 2006-08-09 10:11:12 +127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1 -1 string-2 fixedstring-2\0\0 1970-01-01 06:29:36 2004-02-03 04:05:06 +dest from null: +-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06 +-108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06 +127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1.032 -1.064 string-2 fixedstring-2\0\0 2004-06-07 2004-02-03 04:05:06 +\N \N \N \N \N \N \N \N \N \N \N \N \N \N diff --git a/tests/queries/0_stateless/01273_arrow_stream.sh b/tests/queries/0_stateless/01273_arrow_stream.sh new file mode 100755 index 00000000000..a4bb403cb5a --- /dev/null +++ b/tests/queries/0_stateless/01273_arrow_stream.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CUR_DIR/../shell_config.sh + + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS contributors" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE contributors (name String) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.contributors ORDER BY name DESC FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO contributors FORMAT ArrowStream" +# random results +${CLICKHOUSE_CLIENT} --query="SELECT * FROM contributors LIMIT 10" > /dev/null +${CLICKHOUSE_CLIENT} --query="DROP TABLE contributors" + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_numbers" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_numbers (number UInt64) ENGINE = Memory" +# less than default block size (65k) +${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.numbers LIMIT 10000 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_numbers FORMAT ArrowStream" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_numbers ORDER BY number DESC LIMIT 10" +${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE arrow_numbers" + +# More than default block size +${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.numbers LIMIT 100000 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_numbers FORMAT ArrowStream" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_numbers ORDER BY number DESC LIMIT 10" +${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE arrow_numbers" + +${CLICKHOUSE_CLIENT} --max_block_size=2 --query="SELECT * FROM system.numbers LIMIT 3 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_numbers FORMAT ArrowStream" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_numbers ORDER BY number DESC LIMIT 10" + +${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE arrow_numbers" +${CLICKHOUSE_CLIENT} --max_block_size=1 --query="SELECT * FROM system.numbers LIMIT 1000 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_numbers FORMAT ArrowStream" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_numbers ORDER BY number DESC LIMIT 10" + +${CLICKHOUSE_CLIENT} --query="DROP TABLE arrow_numbers" + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types1" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types2" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types3" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types4" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory" +# convert min type +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types3 (int8 Int8, uint8 Int8, int16 Int8, uint16 Int8, int32 Int8, uint32 Int8, int64 Int8, uint64 Int8, float32 Int8, float64 Int8, string FixedString(15), fixedstring FixedString(15), date Date, datetime Date) ENGINE = Memory" +# convert max type +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types4 (int8 Int64, uint8 Int64, int16 Int64, uint16 Int64, int32 Int64, uint32 Int64, int64 Int64, uint64 Int64, float32 Int64, float64 Int64, string String, fixedstring String, date DateTime, datetime DateTime) ENGINE = Memory" + +${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types1 values ( -108, 108, -1016, 1116, -1032, 1132, -1064, 1164, -1.032, -1.064, 'string-0', 'fixedstring', '2001-02-03', '2002-02-03 04:05:06')" + +# min +${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types1 values ( -128, 0, -32768, 0, -2147483648, 0, -9223372036854775808, 0, -1.032, -1.064, 'string-1', 'fixedstring-1', '2003-04-05', '2003-02-03 04:05:06')" + +# max +${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types1 values ( 127, 255, 32767, 65535, 2147483647, 4294967295, 9223372036854775807, 9223372036854775807, -1.032, -1.064, 'string-2', 'fixedstring-2', '2004-06-07', '2004-02-03 04:05:06')" + +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types1 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types2 FORMAT ArrowStream" + +echo original: +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types1 ORDER BY int8" | tee ${CLICKHOUSE_TMP}/arrow_all_types_1.dump +echo converted: +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types2 ORDER BY int8" | tee ${CLICKHOUSE_TMP}/arrow_all_types_2.dump +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types1 ORDER BY int8 FORMAT ArrowStream" > ${CLICKHOUSE_TMP}/arrow_all_types_1.arrow +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types2 ORDER BY int8 FORMAT ArrowStream" > ${CLICKHOUSE_TMP}/arrow_all_types_2.arrow +echo diff: +diff ${CLICKHOUSE_TMP}/arrow_all_types_1.dump ${CLICKHOUSE_TMP}/arrow_all_types_2.dump + +${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE arrow_types2" +${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types3 values ( 79, 81, 82, 83, 84, 85, 86, 87, 88, 89, 'str01', 'fstr1', '2003-03-04', '2004-05-06')" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types3 ORDER BY int8 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types2 FORMAT ArrowStream" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types1 ORDER BY int8 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types3 FORMAT ArrowStream" + +${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types4 values ( 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 'str02', 'fstr2', '2005-03-04 05:06:07', '2006-08-09 10:11:12')" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types4 ORDER BY int8 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types2 FORMAT ArrowStream" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types1 ORDER BY int8 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types4 FORMAT ArrowStream" + +echo dest: +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types2 ORDER BY int8" +echo min: +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types3 ORDER BY int8" +echo max: +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types4 ORDER BY int8" + + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types5" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS arrow_types6" +${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE arrow_types2" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types5 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE arrow_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory" +${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types5 values ( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT ArrowStream" > ${CLICKHOUSE_TMP}/arrow_all_types_5.arrow +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types5 ORDER BY int8 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types6 FORMAT ArrowStream" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types1 ORDER BY int8 FORMAT ArrowStream" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO arrow_types6 FORMAT ArrowStream" +echo dest from null: +${CLICKHOUSE_CLIENT} --query="SELECT * FROM arrow_types6 ORDER BY int8" + +${CLICKHOUSE_CLIENT} --query="DROP TABLE arrow_types5" +${CLICKHOUSE_CLIENT} --query="DROP TABLE arrow_types6" + + +${CLICKHOUSE_CLIENT} --query="DROP TABLE arrow_types1" +${CLICKHOUSE_CLIENT} --query="DROP TABLE arrow_types2" +${CLICKHOUSE_CLIENT} --query="DROP TABLE arrow_types3" +${CLICKHOUSE_CLIENT} --query="DROP TABLE arrow_types4" + diff --git a/tests/queries/0_stateless/01277_fromUnixTimestamp64.reference b/tests/queries/0_stateless/01277_fromUnixTimestamp64.reference new file mode 100644 index 00000000000..610041de31e --- /dev/null +++ b/tests/queries/0_stateless/01277_fromUnixTimestamp64.reference @@ -0,0 +1,5 @@ +const column +UTC 1234567891011 2009-02-13 23:31:31.011 1970-01-15 06:56:07.891011 1970-01-01 00:20:34.567891011 DateTime64(9, \'UTC\') +Asia/Makassar 1234567891011 2009-02-14 07:31:31.011 1970-01-15 14:56:07.891011 1970-01-01 08:20:34.567891011 DateTime64(9, \'Asia/Makassar\') +non-const column +1234567891011 2009-02-13 23:31:31.011 1970-01-15 06:56:07.891011 1970-01-01 00:20:34.567891011 diff --git a/tests/queries/0_stateless/01277_fromUnixTimestamp64.sql b/tests/queries/0_stateless/01277_fromUnixTimestamp64.sql new file mode 100644 index 00000000000..4f1497763e1 --- /dev/null +++ b/tests/queries/0_stateless/01277_fromUnixTimestamp64.sql @@ -0,0 +1,45 @@ +-- -- Error cases +SELECT fromUnixTimestamp64Milli(); -- {serverError 42} +SELECT fromUnixTimestamp64Micro(); -- {serverError 42} +SELECT fromUnixTimestamp64Nano(); -- {serverError 42} + +SELECT fromUnixTimestamp64Milli('abc'); -- {serverError 43} +SELECT fromUnixTimestamp64Micro('abc'); -- {serverError 43} +SELECT fromUnixTimestamp64Nano('abc'); -- {serverError 43} + +SELECT fromUnixTimestamp64Milli('abc', 123); -- {serverError 43} +SELECT fromUnixTimestamp64Micro('abc', 123); -- {serverError 43} +SELECT fromUnixTimestamp64Nano('abc', 123); -- {serverError 43} + +SELECT 'const column'; +WITH + CAST(1234567891011 AS Int64) AS i64, + 'UTC' AS tz +SELECT + tz, + i64, + fromUnixTimestamp64Milli(i64, tz), + fromUnixTimestamp64Micro(i64, tz), + fromUnixTimestamp64Nano(i64, tz) as dt64, + toTypeName(dt64); + +WITH + CAST(1234567891011 AS Int64) AS i64, + 'Asia/Makassar' AS tz +SELECT + tz, + i64, + fromUnixTimestamp64Milli(i64, tz), + fromUnixTimestamp64Micro(i64, tz), + fromUnixTimestamp64Nano(i64, tz) as dt64, + toTypeName(dt64); + +SELECT 'non-const column'; +WITH + CAST(1234567891011 AS Int64) AS i64, + 'UTC' AS tz +SELECT + i64, + fromUnixTimestamp64Milli(i64, tz), + fromUnixTimestamp64Micro(i64, tz), + fromUnixTimestamp64Nano(i64, tz) as dt64; \ No newline at end of file diff --git a/tests/queries/0_stateless/01277_toUnixTimestamp64.reference b/tests/queries/0_stateless/01277_toUnixTimestamp64.reference new file mode 100644 index 00000000000..7b66586b4d2 --- /dev/null +++ b/tests/queries/0_stateless/01277_toUnixTimestamp64.reference @@ -0,0 +1,8 @@ +const column +2019-09-16 19:20:12.345 1568650812345 1568650812345000 1568650812345000000 +2019-09-16 19:20:12.345678 1568650812345 1568650812345678 1568650812345678000 +2019-09-16 19:20:12.345678910 1568650812345 1568650812345678 1568650812345678910 +non-const column +2019-09-16 19:20:12.345 1568650812345 1568650812345000 1568650812345000000 +2019-09-16 19:20:12.345678 1568650812345 1568650812345678 1568650812345678000 +2019-09-16 19:20:12.345678910 1568650812345 1568650812345678 1568650812345678910 diff --git a/tests/queries/0_stateless/01277_toUnixTimestamp64.sql b/tests/queries/0_stateless/01277_toUnixTimestamp64.sql new file mode 100644 index 00000000000..de2b132a2dc --- /dev/null +++ b/tests/queries/0_stateless/01277_toUnixTimestamp64.sql @@ -0,0 +1,33 @@ +-- Error cases +SELECT toUnixTimestamp64Milli(); -- {serverError 42} +SELECT toUnixTimestamp64Micro(); -- {serverError 42} +SELECT toUnixTimestamp64Nano(); -- {serverError 42} + +SELECT toUnixTimestamp64Milli('abc'); -- {serverError 43} +SELECT toUnixTimestamp64Micro('abc'); -- {serverError 43} +SELECT toUnixTimestamp64Nano('abc'); -- {serverError 43} + +SELECT toUnixTimestamp64Milli('abc', 123); -- {serverError 42} +SELECT toUnixTimestamp64Micro('abc', 123); -- {serverError 42} +SELECT toUnixTimestamp64Nano('abc', 123); -- {serverError 42} + +SELECT 'const column'; +WITH toDateTime64('2019-09-16 19:20:12.345678910', 3) AS dt64 +SELECT dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); + +WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 +SELECT dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); + +WITH toDateTime64('2019-09-16 19:20:12.345678910', 9) AS dt64 +SELECT dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); + +SELECT 'non-const column'; +WITH toDateTime64('2019-09-16 19:20:12.345678910', 3) AS x +SELECT materialize(x) as dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); + +WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS x +SELECT materialize(x) as dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); + +WITH toDateTime64('2019-09-16 19:20:12.345678910', 9) AS x +SELECT materialize(x) as dt64, toUnixTimestamp64Milli(dt64), toUnixTimestamp64Micro(dt64), toUnixTimestamp64Nano(dt64); + diff --git a/tests/queries/0_stateless/01277_unixTimestamp64_compatibility.reference b/tests/queries/0_stateless/01277_unixTimestamp64_compatibility.reference new file mode 100644 index 00000000000..9a70396832c --- /dev/null +++ b/tests/queries/0_stateless/01277_unixTimestamp64_compatibility.reference @@ -0,0 +1,9 @@ +2019-09-16 19:20:12.345 2019-09-16 19:20:12.345 2019-09-16 19:20:12.345000 2019-09-16 19:20:12.345000000 +2019-09-16 19:20:12.345678 2019-09-16 19:20:12.345 2019-09-16 19:20:12.345678 2019-09-16 19:20:12.345678000 +2019-09-16 19:20:12.345678910 2019-09-16 19:20:12.345 2019-09-16 19:20:12.345678 2019-09-16 19:20:12.345678910 +with explicit timezone +2019-09-16 19:20:12.345 2019-09-16 19:20:12.345 2019-09-16 19:20:12.345000 2019-09-16 19:20:12.345000000 DateTime64(9, \'UTC\') +2019-09-16 19:20:12.345 2019-09-16 19:20:12.345 2019-09-16 19:20:12.345000 2019-09-16 19:20:12.345000000 DateTime64(9, \'Asia/Makassar\') +1234567891011 1234567891011 1234567891011 1234567891011 +with explicit timezone +1234567891011 1234567891011 1234567891011 1234567891011 Int64 diff --git a/tests/queries/0_stateless/01277_unixTimestamp64_compatibility.sql b/tests/queries/0_stateless/01277_unixTimestamp64_compatibility.sql new file mode 100644 index 00000000000..7d8d0b879b6 --- /dev/null +++ b/tests/queries/0_stateless/01277_unixTimestamp64_compatibility.sql @@ -0,0 +1,64 @@ +WITH + toDateTime64('2019-09-16 19:20:12.345678910', 3) AS dt64 +SELECT + dt64, + fromUnixTimestamp64Milli(toUnixTimestamp64Milli(dt64)), + fromUnixTimestamp64Micro(toUnixTimestamp64Micro(dt64)), + fromUnixTimestamp64Nano(toUnixTimestamp64Nano(dt64)); + +WITH + toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 +SELECT + dt64, + fromUnixTimestamp64Milli(toUnixTimestamp64Milli(dt64)), + fromUnixTimestamp64Micro(toUnixTimestamp64Micro(dt64)), + fromUnixTimestamp64Nano(toUnixTimestamp64Nano(dt64)); + +WITH + toDateTime64('2019-09-16 19:20:12.345678910', 9) AS dt64 +SELECT + dt64, + fromUnixTimestamp64Milli(toUnixTimestamp64Milli(dt64)), + fromUnixTimestamp64Micro(toUnixTimestamp64Micro(dt64)), + fromUnixTimestamp64Nano(toUnixTimestamp64Nano(dt64)); + +SELECT 'with explicit timezone'; +WITH + 'UTC' as timezone, + toDateTime64('2019-09-16 19:20:12.345678910', 3, timezone) AS dt64 +SELECT + dt64, + fromUnixTimestamp64Milli(toUnixTimestamp64Milli(dt64), timezone), + fromUnixTimestamp64Micro(toUnixTimestamp64Micro(dt64), timezone), + fromUnixTimestamp64Nano(toUnixTimestamp64Nano(dt64), timezone) AS v, + toTypeName(v); + +WITH + 'Asia/Makassar' as timezone, + toDateTime64('2019-09-16 19:20:12.345678910', 3, timezone) AS dt64 +SELECT + dt64, + fromUnixTimestamp64Milli(toUnixTimestamp64Milli(dt64), timezone), + fromUnixTimestamp64Micro(toUnixTimestamp64Micro(dt64), timezone), + fromUnixTimestamp64Nano(toUnixTimestamp64Nano(dt64), timezone) AS v, + toTypeName(v); + + +WITH + CAST(1234567891011 AS Int64) AS val +SELECT + val, + toUnixTimestamp64Milli(fromUnixTimestamp64Milli(val)), + toUnixTimestamp64Micro(fromUnixTimestamp64Micro(val)), + toUnixTimestamp64Nano(fromUnixTimestamp64Nano(val)); + +SELECT 'with explicit timezone'; +WITH + 'UTC' as timezone, + CAST(1234567891011 AS Int64) AS val +SELECT + val, + toUnixTimestamp64Milli(fromUnixTimestamp64Milli(val, timezone)), + toUnixTimestamp64Micro(fromUnixTimestamp64Micro(val, timezone)), + toUnixTimestamp64Nano(fromUnixTimestamp64Nano(val, timezone)) AS v, + toTypeName(v); \ No newline at end of file diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.reference b/tests/queries/0_stateless/01280_ttl_where_group_by.reference new file mode 100644 index 00000000000..dead0a5aac3 --- /dev/null +++ b/tests/queries/0_stateless/01280_ttl_where_group_by.reference @@ -0,0 +1,20 @@ +1 1 0 4 +1 2 3 7 +1 3 0 5 +2 1 20 1 +2 1 0 1 +1 1 [0,2,3] 4 +1 1 [5,4,1] 13 +1 3 [1,0,1,0] 17 +2 1 [3,1,0,3] 8 +3 1 [2,4,5] 8 +1 1 0 4 +1 3 10 6 +2 1 0 3 +3 5 8 2 +1 1 0 4 +3 3 13 9 +1 2 7 5 +2 3 6 5 +1 2 3 5 +2 3 3 5 diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.sql b/tests/queries/0_stateless/01280_ttl_where_group_by.sql new file mode 100644 index 00000000000..e61716cfe81 --- /dev/null +++ b/tests/queries/0_stateless/01280_ttl_where_group_by.sql @@ -0,0 +1,87 @@ +drop table if exists ttl_01280_1; + +create table ttl_01280_1 (a Int, b Int, x Int, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second delete where x % 10 == 0 and y > 5; +insert into ttl_01280_1 values (1, 1, 0, 4, now() + 10); +insert into ttl_01280_1 values (1, 1, 10, 6, now()); +insert into ttl_01280_1 values (1, 2, 3, 7, now()); +insert into ttl_01280_1 values (1, 3, 0, 5, now()); +insert into ttl_01280_1 values (2, 1, 20, 1, now()); +insert into ttl_01280_1 values (2, 1, 0, 1, now()); +insert into ttl_01280_1 values (3, 1, 0, 8, now()); +select sleep(1.1) format Null; +optimize table ttl_01280_1 final; +select a, b, x, y from ttl_01280_1; + +drop table if exists ttl_01280_2; + +create table ttl_01280_2 (a Int, b Int, x Array(Int32), y Double, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b set x = minForEach(x), y = sum(y), d = max(d); +insert into ttl_01280_2 values (1, 1, array(0, 2, 3), 4, now() + 10); +insert into ttl_01280_2 values (1, 1, array(5, 4, 3), 6, now()); +insert into ttl_01280_2 values (1, 1, array(5, 5, 1), 7, now()); +insert into ttl_01280_2 values (1, 3, array(3, 0, 4), 5, now()); +insert into ttl_01280_2 values (1, 3, array(1, 1, 2, 1), 9, now()); +insert into ttl_01280_2 values (1, 3, array(3, 2, 1, 0), 3, now()); +insert into ttl_01280_2 values (2, 1, array(3, 3, 3), 7, now()); +insert into ttl_01280_2 values (2, 1, array(11, 1, 0, 3), 1, now()); +insert into ttl_01280_2 values (3, 1, array(2, 4, 5), 8, now()); +select sleep(1.1) format Null; +optimize table ttl_01280_2 final; +select a, b, x, y from ttl_01280_2; + +drop table if exists ttl_01280_3; + +create table ttl_01280_3 (a Int, b Int, x Int64, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set x = argMax(x, d), y = argMax(y, d), d = max(d); +insert into ttl_01280_3 values (1, 1, 0, 4, now() + 10); +insert into ttl_01280_3 values (1, 1, 10, 6, now() + 1); +insert into ttl_01280_3 values (1, 2, 3, 7, now()); +insert into ttl_01280_3 values (1, 3, 0, 5, now()); +insert into ttl_01280_3 values (2, 1, 20, 1, now()); +insert into ttl_01280_3 values (2, 1, 0, 3, now() + 1); +insert into ttl_01280_3 values (3, 1, 0, 3, now()); +insert into ttl_01280_3 values (3, 2, 8, 2, now() + 1); +insert into ttl_01280_3 values (3, 5, 5, 8, now()); +select sleep(2.1) format Null; +optimize table ttl_01280_3 final; +select a, b, x, y from ttl_01280_3; + +drop table if exists ttl_01280_4; + +create table ttl_01280_4 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), -(a + b)) ttl d + interval 1 second group by toDate(d) set x = sum(x), y = max(y); +insert into ttl_01280_4 values (1, 1, 0, 4, now() + 10); +insert into ttl_01280_4 values (10, 2, 3, 3, now()); +insert into ttl_01280_4 values (2, 10, 1, 7, now()); +insert into ttl_01280_4 values (3, 3, 5, 2, now()); +insert into ttl_01280_4 values (1, 5, 4, 9, now()); +select sleep(1.1) format Null; +optimize table ttl_01280_4 final; +select a, b, x, y from ttl_01280_4; + +drop table if exists ttl_01280_5; + +create table ttl_01280_5 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a set x = sum(x); +insert into ttl_01280_5 values (1, 2, 3, 5, now()); +insert into ttl_01280_5 values (2, 10, 1, 5, now()); +insert into ttl_01280_5 values (2, 3, 5, 5, now()); +insert into ttl_01280_5 values (1, 5, 4, 5, now()); +select sleep(1.1) format Null; +optimize table ttl_01280_5 final; +select a, b, x, y from ttl_01280_5; + +drop table if exists ttl_01280_6; + +create table ttl_01280_6 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a; +insert into ttl_01280_6 values (1, 2, 3, 5, now()); +insert into ttl_01280_6 values (2, 10, 3, 5, now()); +insert into ttl_01280_6 values (2, 3, 3, 5, now()); +insert into ttl_01280_6 values (1, 5, 3, 5, now()); +select sleep(1.1) format Null; +optimize table ttl_01280_6 final; +select a, b, x, y from ttl_01280_6; + +create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by x set y = max(y); -- { serverError 450} +create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by b set y = max(y); -- { serverError 450} +create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b, x set y = max(y); -- { serverError 450} +create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set b = min(b), y = max(y); -- { serverError 450} +create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b set y = max(y), y = max(y); -- { serverError 450} +create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a) ttl d + interval 1 second group by toDate(d), a set d = min(d), b = max(b); -- { serverError 450} +create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (d, -(a + b)) ttl d + interval 1 second group by d, -(a + b) set a = sum(a), b = min(b); -- { serverError 450} diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.reference b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh new file mode 100755 index 00000000000..5922b8d74d2 --- /dev/null +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +# Regression for MemoryTracker that had been incorrectly accounted +# (it was reseted before deallocation) +# +# For this will be used: +# - two-level group by +# - max_memory_usage_for_user +# - one users' query in background (to avoid reseting max_memory_usage_for_user) + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +set -o pipefail + +function execute_null() +{ + ${CLICKHOUSE_CLIENT} --format Null -n "$@" +} + +function execute_group_by() +{ + local opts=( + --max_memory_usage_for_user=$((150<<20)) + --max_threads=2 + ) + execute_null "${opts[@]}" <<<'SELECT uniq(number) FROM numbers_mt(toUInt64(1e6)) GROUP BY number % 5e5' +} + +# This is needed to keep at least one running query for user for the time of test. +execute_null <<<'SELECT sleep(3)' & +execute_group_by +# if memory accounting will be incorrect, the second query will be failed with MEMORY_LIMIT_EXCEEDED +execute_group_by +wait diff --git a/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.reference b/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql b/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql new file mode 100644 index 00000000000..8de0f40229c --- /dev/null +++ b/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS data_01283; + +CREATE TABLE data_01283 engine=MergeTree() +ORDER BY key +PARTITION BY key +AS SELECT number key FROM numbers(10); + +SET log_queries=1; +SELECT * FROM data_01283 LIMIT 1 FORMAT Null; +SET log_queries=0; +SYSTEM FLUSH LOGS; + +-- 1 for PullingAsyncPipelineExecutor::pull +-- 1 for AsynchronousBlockInputStream +SELECT + throwIf(count() != 1, 'no query was logged'), + throwIf(length(thread_ids) != 2, 'too many threads used') +FROM system.query_log +WHERE type = 'QueryFinish' AND query LIKE '%data_01283 LIMIT 1%' +GROUP BY thread_ids +FORMAT Null; diff --git a/tests/queries/0_stateless/01284_escape_sequences_php_mysql_style.reference b/tests/queries/0_stateless/01284_escape_sequences_php_mysql_style.reference new file mode 100644 index 00000000000..19a5fc680ff --- /dev/null +++ b/tests/queries/0_stateless/01284_escape_sequences_php_mysql_style.reference @@ -0,0 +1,3 @@ +a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e +1 0 1 1 +1 1 0 0 0 diff --git a/tests/queries/0_stateless/01284_escape_sequences_php_mysql_style.sql b/tests/queries/0_stateless/01284_escape_sequences_php_mysql_style.sql new file mode 100644 index 00000000000..36ad7b4506c --- /dev/null +++ b/tests/queries/0_stateless/01284_escape_sequences_php_mysql_style.sql @@ -0,0 +1,6 @@ +SELECT 'a\_\c\l\i\c\k\h\o\u\s\e', 'a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e'; +select 'aXb' like 'a_b', 'aXb' like 'a\_b', 'a_b' like 'a\_b', 'a_b' like 'a\\_b'; +SELECT match('Hello', '\w+'), match('Hello', '\\w+'), match('Hello', '\\\w+'), match('Hello', '\w\+'), match('Hello', 'w+'); + +SELECT match('Hello', '\He\l\l\o'); -- { serverError 427 } +SELECT match('Hello', '\H\e\l\l\o'); -- { serverError 427 } diff --git a/tests/queries/0_stateless/01284_port.reference b/tests/queries/0_stateless/01284_port.reference new file mode 100644 index 00000000000..7e776595065 --- /dev/null +++ b/tests/queries/0_stateless/01284_port.reference @@ -0,0 +1,24 @@ +ipv4 +0 +80 +80 +80 +80 +hostname +0 +80 +80 +80 +80 +default-port +80 +80 +ipv6 +0 +0 +0 +0 +0 +0 +host-no-dot +0 diff --git a/tests/queries/0_stateless/01284_port.sql b/tests/queries/0_stateless/01284_port.sql new file mode 100644 index 00000000000..9c31a5d42ad --- /dev/null +++ b/tests/queries/0_stateless/01284_port.sql @@ -0,0 +1,34 @@ +select 'ipv4'; +select port('http://127.0.0.1/'); +select port('http://127.0.0.1:80'); +select port('http://127.0.0.1:80/'); +select port('//127.0.0.1:80/'); +select port('127.0.0.1:80'); +select 'hostname'; +select port('http://foobar.com/'); +select port('http://foobar.com:80'); +select port('http://foobar.com:80/'); +select port('//foobar.com:80/'); +select port('foobar.com:80'); + +select 'default-port'; +select port('http://127.0.0.1/', toUInt16(80)); +select port('http://foobar.com/', toUInt16(80)); + +-- unsupported +/* ILLEGAL_TYPE_OF_ARGUMENT */ select port(toFixedString('', 1)); -- { serverError 43; } +/* ILLEGAL_TYPE_OF_ARGUMENT */ select port('', 1); -- { serverError 43; } +/* NUMBER_OF_ARGUMENTS_DOESNT_MATCH */ select port('', 1, 1); -- { serverError 42; } + +-- +-- Known limitations of domain() (getURLHost()) +-- +select 'ipv6'; +select port('http://[2001:db8::8a2e:370:7334]/'); +select port('http://[2001:db8::8a2e:370:7334]:80'); +select port('http://[2001:db8::8a2e:370:7334]:80/'); +select port('//[2001:db8::8a2e:370:7334]:80/'); +select port('[2001:db8::8a2e:370:7334]:80'); +select port('2001:db8::8a2e:370:7334:80'); +select 'host-no-dot'; +select port('//foobar:80/'); diff --git a/tests/queries/0_stateless/01284_view_and_extremes_bug.reference b/tests/queries/0_stateless/01284_view_and_extremes_bug.reference new file mode 100644 index 00000000000..216e97ce082 --- /dev/null +++ b/tests/queries/0_stateless/01284_view_and_extremes_bug.reference @@ -0,0 +1 @@ +World diff --git a/tests/queries/0_stateless/01284_view_and_extremes_bug.sql b/tests/queries/0_stateless/01284_view_and_extremes_bug.sql new file mode 100644 index 00000000000..c444441a258 --- /dev/null +++ b/tests/queries/0_stateless/01284_view_and_extremes_bug.sql @@ -0,0 +1,4 @@ +drop table if exists view_bug_const; +CREATE VIEW view_bug_const AS SELECT 'World' AS hello FROM (SELECT number FROM system.numbers LIMIT 1) AS n1 JOIN (SELECT number FROM system.numbers LIMIT 1) AS n2 USING (number); +select * from view_bug_const; +drop table if exists view_bug_const; diff --git a/tests/queries/0_stateless/01285_engine_join_donmikel.reference b/tests/queries/0_stateless/01285_engine_join_donmikel.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/01285_engine_join_donmikel.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/01285_engine_join_donmikel.sh b/tests/queries/0_stateless/01285_engine_join_donmikel.sh new file mode 100755 index 00000000000..8a867921dbe --- /dev/null +++ b/tests/queries/0_stateless/01285_engine_join_donmikel.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +$CLICKHOUSE_CLIENT --multiquery --query " +DROP TABLE IF EXISTS NmSubj; +DROP TABLE IF EXISTS events; + +create table NmSubj +( + NmId UInt32, + SubjectId UInt32 +) + engine = Join(All, inner, NmId); + +create table events +( + EventDate Date, + EventDateTime DateTime, + EventId String, + SessionId FixedString(36), + PageViewId FixedString(36), + UserId UInt64, + UniqUserId FixedString(36), + UrlReferrer String, + Param1 String, + Param2 String, + Param3 String, + Param4 String, + Param5 String, + Param6 String, + Param7 String, + Param8 String, + Param9 String, + Param10 String, + ApplicationType UInt8, + Locale String, + Lang String, + Version String, + Path String, + QueryString String, + UserHostAddress UInt32 +) + engine = MergeTree() + PARTITION BY (toYYYYMM(EventDate), EventId) + ORDER BY (EventId, EventDate, Locale, ApplicationType, intHash64(UserId)) + SAMPLE BY intHash64(UserId) + SETTINGS index_granularity = 8192; + +insert into NmSubj values (1, 1), (2, 2), (3, 3); +" + +$CLICKHOUSE_CLIENT --query "INSERT INTO events FORMAT TSV" < ${CURDIR}/01285_engine_join_donmikel.tsv + +$CLICKHOUSE_CLIENT --query " +SELECT toInt32(count() / 24) as Count +FROM events as e INNER JOIN NmSubj as ns +ON ns.NmId = toUInt32(e.Param1) +WHERE e.EventDate = today() - 7 AND e.EventId = 'GCO' AND ns.SubjectId = 2073" + +$CLICKHOUSE_CLIENT --multiquery --query " +DROP TABLE NmSubj; +DROP TABLE events; +" diff --git a/tests/queries/0_stateless/01285_engine_join_donmikel.tsv b/tests/queries/0_stateless/01285_engine_join_donmikel.tsv new file mode 100644 index 00000000000..a6f47144ff2 --- /dev/null +++ b/tests/queries/0_stateless/01285_engine_join_donmikel.tsv @@ -0,0 +1,100 @@ +2020-02-17 2020-02-17 19:40:09 EMC wFxiX8iZ3Vns8pO6bx0WvYftGEOM9WMff Ku5ofnKwBaQ0CtcdZa6sAnSQbyufu0zfd 0 X16qWFb_h8JONLGxs7oiXFZTdBFfTniRMG mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=33&userId=274574634&email=Z21haWwucnU%3D&source=mn 657583482 +2020-02-17 2020-02-17 20:23:53 EMC foco1fj5Li6ey>MSfn?jKHBBdiwKXiBmy7Ni ICZBjN7en5snnszHKbTVUU*xyOhuZEgI1EK 0 I7XAKnCMsD9tdFHmmcCrI~KqiKQdo7Gxd5 mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=E17727463464LnJ1&source=mn 2992671877 +2020-02-17 2020-02-17 19:49:04 EMC q3AxEdMQhsr1NewVSKjHzYvh63XdxtGcBZX GVGMPZhenJd83JarI7Qicq5BKRfijT0ls 18536997 D_myvD3U4bmy5anwdNRFaxm KNoDfcCDL9U6 http&o=mail.ru/redir?u=http&o=mail.ru/redir?u=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6userId mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=AD0581918974621haWwuY29t&source=mn 1656424571 +2020-02-17 2020-02-17 21:55:32 EMC ef7tm7k7Yvpb4XIfvf7ON-s4Xv3ztD7SkB KIDkUzEgcAMu%ObAQAkI3jmm3638bSnX0 22341123 6NcB26yJrLhWgldC_HvV2fUOHNLRFvkLbA mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=E943634627468871D581940274695&userId=2746887723D&source=mn 1429586160 +2020-02-17 2020-02-17 21:37:47 EMC 3Y_KZLEJgn4bw?omv4EizIs1lUMz0Li0Ws0 I3QaVG9UvXWvWReLSHuDCs3DdgTbX8av6 28213800 KnoofunrqFKdIta8pMuR!FNX2yu1I6gNo https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https://click.mail&v=2&s=3745772 mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=5YWxpbC5jb20%3D&source=mn 872136829 +2020-02-17 2020-02-17 19:15:34 EMC JcHEtSzMb70K}04FW#ShpZw1ot_fb7wxj8 KG9bmRo6yweN2EX48CUBQcsOqzbHn6g3u 30047107 UKb5rJ5D\bFncBHxCioYswtJKAlv6Ua5MVSrj mn 48 Ru ru 7.0.32 /confirm/vipstatus ?key=75ydQ%3D%3D&source=mn 626057004 +2020-02-17 2020-02-17 18:15:04 EMC _sjptcVU5CF4sTnXh0C6DCYcOo0rNowf 0a0AcabOllf7\rN9X9aa7rVmJ0J1oZGJY2 30648090 Z2K_4kffe21iW1hl2jby1RXAAh5XtPLP3 mn 48 Ru ru 7.0.32 /confirm/vipstatus ?key=5&ts=15819420%3D&source=mn 3682305993 +2020-02-17 2020-02-17 16:10:20 EMC WM44OUvEZse_9hQ0NJsn0k47pX6_UnaS7 LgCWwTMZ+GwiQEbNT95zUnkMCXYqPs1Xt1 0 QUuSVpqq#SgMdveJDW0bQ6s8jn8MLXzL1w mn 187 Ru ru 7.0.32 /confirm/vipstatus ?key=CCD&source=mn 726391852 +2020-02-17 2020-02-17 17:30:44 EMC PGfQVZHuAY0A_kUA\04ceYRjWqgPJtfhCn 4TR9oRKCNrouy:zJBLBi8J4=u7Mkt9jgzgmB 0 xbCoBX9pTDaWiRxW@F3fOgKD9lX58oWsc mn 187 Ru ru 7.0.32 /confirm/vipstatus ?key=DB3D&source=pcap 2498835913 +2020-02-17 2020-02-17 19:10:51 EMC gnd66xWlKf2H2tiZdbivjoz4ILmJ4lp_ ysgw6WLdbNWSHQrybWjhH9uJqnlZMWAv 0 nDQDqsnm/dlpbFMPJE93b6EETHzFxUSzVv pcap 187 Ru ru 7.0.32 /confirm/vipstatus ?key=4&userId=274634688&userId=274688&userId=27465D85CD4F634627468877266792YXJ1&source=mn 816668334 +2020-02-17 2020-02-17 20:52:00 EMC pBzkcTpn50WpO]BZH7^js6TpPGp7Uid7mFg jQVdKDl4+Sp_j5CZc PtVoMGTIGOfYO6Q2 0 aQGyrF9s3qUMPq_dUnOiM6QVMX5PNQ_eiV pcap 187 Ru ru 7.0.32 /confirm/vipstatus ?key=50&email=YWlsLmNvbQ%3D%3D&source=pcap 1770779506 +2020-02-17 2020-02-17 21:31:14 EMC cyTu3wNRCI1TzkxCybFNz\rCkrdDZQxYtlW gmtJF_hWE8M79bWSaKkn8UT5Xdwfb0uK8T 0 RSJUU0SBDIXjpYORna59cWouc20W5VIP mn 187 Ru ru 7.0.32 /confirm/vipstatus ?key=4CD%3D&source=mn 3448881839 +2020-02-17 2020-02-17 17:15:46 EMC I3bvr8ce2SU_&UuDX4J6KRJl5uipnL2Or myxD8nQXFYnKqBoPGGo4Hy6hBTXRL04K 0 ntHjLMvhnb7b09Tu7jRkM8KgS5ni0w4oH mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=36274695&userId=2560&userId=27463465&userId=27468872EDB908&ts=158193819382 2447621965 +2020-02-17 2020-02-17 18:21:21 EMC _468tOVLPpIkgKvPGX8ypjfTZV09k8w5 0KakGIWk9uaL0Rv3]1T5wPeHPklVWzhytg 0 QAJxAuQADoEQqY0HUT56P0O4K5y55K_L https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https://mail&v=2&s=92Ymail&v=2&s=3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D66email&v=2&s=93D158192YWlsLnJ1%26source%3Dmn&c=swm&r=https%3A%2F%2Fwww.wildberries.ru mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=5819392158193634&userId=274688&ts=15819408BD1haWwucnU%3D&source=mn 1547300784 +2020-02-17 2020-02-17 19:07:33 EMC Y8yBoZ7Cp2GUTQ8srLMZJFIZ85WgiBeau wmIgwW88aSGvvjKwV,8EooQonI_90taLrh 0 KXVqerLQyaMaMtuT1FzUjrLev8Fw11mkfO pcap 1 Ru ru 7.0.32 /confirm/vipstatus ?key=72846239707&userId=274688&ts=1581921haWwucnU%3D&source=pcap 2777789095 +2020-02-17 2020-02-17 11:27:36 EMC n66D1ZN9C6y7LASQucHNdkw_QseVRnt32 yeqFeZFa,7cQ_JyTO3yJJKS QaCKEWERPx4Z 24388614 6h91HeiPyIKw8yV0p\tcaK99w_zmnmXU77RI https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https://click.mail.ru/redir?u=https://click.mail&v=2&s=3D158192YWlsLnJ1%26source%3Dmn&c=swm&r=http&o=mail.ru/redir?u=https://click.mail.ru/redir?u=https://mail%3DYWlsLnJ1%26source mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=6167462E4CD570A0312D6BF1BtYWlsLnJ1&source=mn 662531688 +2020-02-17 2020-02-17 14:11:06 EMC jamANU2zei_lBkfEk_Im5ow0gWB5G0gL1S So7Jc6gheYgclfoq8l0hqsPExrMNSXPLV 20685668 nwRXATVEY2PaYr34qwtZX0Q6ah_sceXjbL https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=http&o=mail&v=2&s=9444%26email.ru/redir?u=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69103Dmn&c=swm&r=http&o=mail&v=2&s=92YWlsLnJ1%26source%3Dpc&c=swm&r=https mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=420%3D&source=mn 3233174651 +2020-02-17 2020-02-17 14:32:04 EMC lsruHZ0Ywg1yDdrio.j5piNMTnqNxeaOQJ G6K62xDbm9aJ_kNlLqEo4AhVf46wDtWi 32046518 wgpf1yqVtVhLL7yT9sp1RbqaXGF81GmSiQ6 mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=1581943D&source=pcap 3183159832 +2020-02-17 2020-02-17 15:11:37 EMC 5EW8o4vD850H>TQam3NA2@egQm32jpASIF KJKN5QAHdoqw_RGol5hLvSq6753cKLswM 32971244 8Vh3DVlCjfXs[5hqVMGDDP2iBjoiqhoACB https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https://mail&v=2&s=3D27457AD3D274576userId%3D27461haWwucnU%253D%26source%3Dmn&c=swm&r=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6ts%3D158193Fkey%3D669%26email&v=2&s=3Fkey%3D69D01 mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=5634&userId=274695&ts=15819440446274695&userId=27465&email=Z21haWwucnU%3D&source=pcap 574798405 +2020-02-17 2020-02-17 13:59:25 EMC i6LAlVL3U0GUFFtjiRgfPyjyattjzds6y2 IYv_jZ2k+bNO33nbLKRRGcJvTHVIueEDI 21779069 eUgOCUGbEZM15\'Of0Vz6zQmAT0T0zX3XSO http&o=mail.ru/redir?u=http&o=mail.ru/redir?u=https%3A%2F%2Fwww.wildberries mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=D1F9sail=YWxpbC5ydQ%3D%3D&source=mn 3475274476 +2020-02-17 2020-02-17 06:40:47 EMC A_3372QT5xhTmjNrokk3yZCtuOm52FfZT oOD7es9hGUs7jB3_)jVKh3Oo10IQvAu01 27168564 UYEHX0itbLKZjChWxLsP4a7LUXQLX4iB mn 48 Ru ru 7.0.32 /confirm/vipstatus ?key=467468874634&email=dG92&userId=274695&email=email=a2B7AD60&email=bWFpbC5ydQ%3D%3D&source=mn 3084609784 +2020-02-17 2020-02-17 14:10:13 EMC WSlfONgzmGj8kylbItxLW8MX4u60oapxy U8TMovlAcJTcA~u3KoOFG7pchP3CBPbcB4 28904171 nBfRyelBAkOfR70tIzBusuS0M8_KbRsOGI pcap 187 Ru ru 7.0.32 /confirm/vipstatus ?key=4462F5B37274695&email=Ymail=Z21haWwucnU%3D&source=mn 2020344465 +2020-02-17 2020-02-17 01:43:57 EMC khjsNWq7 Iq564D1u\'vWuKZHN4kfq0Kyfv hcAK5RGOjZHYbFJwufN7\tZeXCRWAEKoT2 0 G0Zi9TAk`t0aUcHRiOcb1|RgoT8xMOH4KV mn 187 Ru ru 7.0.32 /confirm/vipstatus ?key=1581940095&ts=158193819382&userId=27465D8B8F1936256C35B4B466&userId=2746887C1A97&userId=274695&userId=274634&userId 902363005 +2020-02-17 2020-02-17 02:17:35 EMC H56Q4qwa\'eYiFCmGR40Pp7,m6fDiE2nJSmB Efk7W3ZrgNsEi.anvkrteuN>lF2UlX0RZFzr 0 QhyRjVGEsD_4YaQiaLorQ6y0fbCxkEVzQ5 pcap 187 Ru ru 7.0.32 /confirm/vipstatus ?key=44695&email=YW5hcnU%3D&source=mn 2161496531 +2020-02-17 2020-02-17 03:20:48 EMC 1wBv9Q7rDQuImuif>0v1t8OXWSIXymKrI sNujW7iatZLPxEOpI1fZdT4p0QRaotsXPZ 0 LSKNxAcrTMsADK9VI7AMqm05blzLWXCJ05 pcap 187 Ru ru 7.0.32 /confirm/vipstatus ?key=AE1FAC2BFA1E75hce=mn 985607445 +2020-02-17 2020-02-17 06:18:16 EMC 6dqrbyyKHWPBEyPZGbJRKjxtmt0Aq8IsON k57VHYX59Ws9u[L3nNEruOGCVgGH_l27J1 0 _lwuZQycqGeyMxkRSM7pR1lHNU6_V0nVWG pcap 187 Ru ru 7.0.32 /confirm/vipstatus ?key=B27465D88&userId=274572BE3274577273AF0&email=a274578&userId=274692&userId=274688&userId=27465D8192 130983489 +2020-02-17 2020-02-17 09:20:13 EMC UCLTP56_YcQ9rWVJa8rif2Aq5NNqVThlwH 7Rx9OsBs7BemVxuArwZ7N5BqcFwK7CSI 0 JOIbkOPdPPbPCtqRV6wQHsdWYNnmarCxm mn 187 Ru ru 7.0.32 /confirm/vipstatus ?key=A6E819420%3D&source=pcap 2567294448 +2020-02-17 2020-02-17 13:05:00 EMC 28AZ4n0zpsVuwzenZ6*gd0VNMjnT8Kmfncv PFiGZRLTjK1CxfLJajtGVFOATfWnTE00 0 QYPUzvwV=w8ev0E77B|RK6Z<66ZX5y1YIK1J mn 187 Ru ru 7.0.32 /confirm/vipstatus ?key=C19213C0060312E9E28A82&email=bmFA2095&userId=2746346&userId=27457455266&ts=15819213D&source=pcap 2881908356 +2020-02-17 2020-02-17 13:40:53 EMC uDOkWKoTLCbB2SFYoepda0 itJJ9zJgyZVY d50Y87RQCO70ci0u{QDZcA8dU31qm0IS1 0 0MuqyXoTtKYpmheID1AiXWz9sZ0zroDj mn 187 Ru ru 7.0.32 /confirm/vipstatus ?key=A3274695&userId=2746346470&userId=274634&userId=256AEA56926&ts=158194407&userId=27463467720%3D&source=pcap 700256219 +2020-02-17 2020-02-17 13:56:10 EMC rOXnSk45QcoEkw03yMYbGUhD_YZ_yCx9OU DyGXuVg8;P6Tq8CDp2_0XI`zPz4wuG8iJJh 0 3IM74UyhDyv2Jamik02pDQJu_L10vfPCKTm mn 187 Ru ru 7.0.32 /confirm/vipstatus ?key=459&email=bmFA81942095&userId=27468872095&userId=27457F1944672Y29t&source=pcap 607085190 +2020-02-17 2020-02-17 13:57:55 EMC xo09ZqxjuPBo_2aYOgnG\\sH3HDRtPH7xW 1I3ceRxYeGTFgNy30ojqJ]qrOgqu1Ag8yO 0 PwQkOkG2N1_p*ZFjOwmM6FviVATMGY5_rbK mn 187 Ru ru 7.0.32 /confirm/vipstatus ?key=158193979967F628BBA48E274634&email=dGV4LnJ1&source=mn 3851759707 +2020-02-17 2020-02-17 14:09:55 EMC vmlDYHpCg270cyeLJZBJTwBBnnJHH99Av l9FPMTHyyKlHijgeO]2oJK90OO8hWvntXS8 0 0Xu7Ha8eqQdwcG3pm#GNMw6HzEsxoAszdK mn 187 Ru ru 7.0.32 /confirm/vipstatus ?key=BtYWlsLnJ1&source=mn 2783694582 +2020-02-17 2020-02-17 15:31:52 EMC WIzjCT0n1XqAlOuRi2YskEx619ZjBaFe4g z_NGqjbLB0IhQV1SxDRMNG0DlMUxPlqaQ6 0 bv75_7V5B4pBB2aNK\fRwnBDQm0ngrcFPLN mn 187 Ru ru 7.0.32 /confirm/vipstatus ?key=158193628FA603193692&email=bmFA934&userId=265&ts=158192YW5haWwucnU%3D&source=pcap 722494088 +2020-02-17 2020-02-17 06:28:37 EMC 434gE67EZuzQL3ssU31NIt0eoeEpm8YplE7RGkXuG https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https%3A%2F%2Fwwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6source%3Dpc&c=swm&r=http&o=mail.ru/redir?u=https://click.mail.ru/redir?u=http&o=mail&v=2&s=974578Fvipstatus%3Fkey mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=1581938C8193634&email=a29t&source=mn 4267472950 +2020-02-17 2020-02-17 03:43:33 EMC TAl7tSOQaATLUUMZTx90iv0sqRoADhEn2r zvCR7ostHdMiQuVI8uxOyYGf0mm6k3ckHSL 32488481 9kFERwTF"AWKxn0YPOUv_tA8c5X2cdy4bA pcap 1 Ru ru 7.0.32 /confirm/vipstatus ?key=1581936292&email=a256F095&ts=158192&userId=274695&userId=27465D84D65D8B8E7EA692YW5haWwucnU%3D&source 1722142578 +2020-02-17 2020-02-17 04:06:30 EMC kL0p32eE J8hj51lGJQdDpzRytjcc2_AnA eO7MVT5A47viQfb3A01JapoD001J0l_UAt 23078127 KDZSswsocsumoOtSx\\phTvuffYOTHGNoUa mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=7724D&source=mn 2669624134 +2020-02-17 2020-02-17 04:23:38 EMC q3PyeDUQPYw7QN9BwE1eP\nDzWsZDAv3GTN u3GnfRRy>bOqpqFvSDikeB8V0zHacc0ur 26965394 CxxiDtfVM9T6WuleL1P4mMbzF9k0TLWLF pcap 1 Ru ru 7.0.32 /confirm/vipstatus ?key=58193695&ts=1581942095&email=bWFpbC5ydQ%3D%3D&source=mn 2552851257 +2020-02-17 2020-02-17 05:36:35 EMC ipti_lG94H8P5RgFwKzaoxUwWNEV9beK zolMT7vt_ttY|BmlD2PSejtg6O7WzJ2S_ 31107154 ha6rDRxktOKPjl2gzz7R2TpQu2KKta9Q mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=4942095&ts=15819369266&userId=274692158193634&userId=2745523552D695 1459132007 +2020-02-17 2020-02-17 00:45:31 EMC hXw0ETb7piXc>PKMzqxhV_F_UdWZnFB5O VXB5E4w28OrrZT0X1Sn9SrTWuJhjff7I 19656120 v06fJehG\'_m057sFduiQBx(mS5JnuSD6jph mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=B193695&ts=15819346165D85jb20%3D&source=pcap 1300966980 +2020-02-17 2020-02-17 03:44:40 EMC 0Q0EMlWcxIACIHUsZX4mEhJnxPpCJV6Qe ZqVPZ1Yb>rFSB\tzEMNdTEjoWIHLaQ2o5vI_E 17361913 AuRaBJOGvceRXWiMb9\'wolUnAGR0hxXQgOS pcap 1 Ru ru 7.0.32 /confirm/vipstatus ?key=725634&email=bWFpbC5ydQ%3D%3D&source=mn 1552491257 +2020-02-17 2020-02-17 02:35:40 EMC mSk0tnq0soJQslU0KlkSmfnXFu6fqRw0A hjlyHsr0&gybjsAJ0WRqdLJz32l3lmiEgDN 32664890 YAoxYQxl"Ce8HvY2j90VWzKQxlGCTNRAyJ mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=6459&ts=158194464089&userId=27465D82&userId=27455229t&source=mn 3051884197 +2020-02-17 2020-02-17 13:44:36 EMC znOwy_e4iHG8(NdCh2qgz1tjBR0ZjsFus eNSFOPi9oax3Ru7x01JA4TWeX66Ev8T9U 18979240 LosZ3fZjVY8hXjH8AJgKx:O3FNAMVDRGlZ mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=71581944027465D819384019215819397327457AbWFpbC5ydQ%3D%3D&source=mn 1828619156 +2020-02-17 2020-02-17 02:07:55 EMC yvDmRiaYkm43iWRNnnW3YDPBqq55XWVXr 7gpmg2PirkSp]TYhcY7i6]DawnwV9tqC0b 22858229 yDybPZi3Oc7m,JVxfltuRtWm8TmiHQJOjY https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https://click.mail%3DZG1haWwucnU%253D%26source%3Dmn&c=swm&r=http&o=mail%3DZG1haWwucnU%253D%26source%3Dmn&c=swm&r=http&o=mail&v=2&s=9158192YWlsLnJ1%26source%3Dmn&c=swm&r=https://click.mail&v=2&s=3Dmn&c=swm&r=https://click.mail.ru/redir?u=http&o=mail%3DYm_ce%3Dmn mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=158189721581939265D89&email=dG1haWwuY29t&source=pcap 1735264694 +2020-02-17 2020-02-17 00:39:49 EMC BOA0kaQben9xa\fOjQ8YZt2L9UV_FYiMKYa3n 0BL0DaCp3weXj7aIq4ZBhaKzr0Uwpdpm 21820865 xATcz14a2hYJcGJb2gRsIIQ9K2A5fOGo https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https://mail.ru/redir?u=https://mail&v=2&s=90BtYWlsLnJ1%26source%3Dmn&c=swm&r=https://click.mail.ru/redir?u=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6ts%3D158192YWlsLnJ1%26source%3Dmn&c=swm&r=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6userId%3D27457%26ts%3D158192YWlsLnJ1%26source%3Dmn&c=swm&r=http&o=mail.ru/redir?u=http&o=mail.ru/redir?u=http mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=77256160&userId=274634&email=bWFpbC5ydQ%3D%3D&source=mn 1855925025 +2020-02-17 2020-02-17 01:38:01 EMC uNpwiDVW2_p6HEMl4YIHw0z_vOpEqPyir napvpdbMVeSjy KJz5spT94Yu6wGCm_xgkX 27148457 R8wZY_PRklNkxX0Dn6uA{sGHEPaMT3kc8 mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=3C3274695&userId=2745528194401908193467BD9189&ts=1581926&ts=1581939772B67B6AE4CD2DBD910B392YW5haWwucnU 2249892028 +2020-02-17 2020-02-17 23:32:22 EMC uKrr0IYx5Kt1\'QPtv@toS9nQ4ZcrvAD0l8 4dWFNMPK50HO0z6ZPSoX6g02Ceu35kLzjtB 25390020 CXq72Vsbolj2.WJUoa8UoTZlw2wHUHxWX https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=http&o=mail%3DYWlsLnJ1%26source%3Dmn&c=swm&r=https%3A%2F%2Fwwwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6email&v=2&s=3A%2F%2Fwwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D67972 mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=BE304F9CDBC6&ts=1581934&email=YW5haWwuY29t&source=mn 3489237198 +2020-02-17 2020-02-17 22:36:42 EMC hQkEQSurISp_uy9ORd05 yjXxg7QByQH9 FURnnJ51xl2NiQHcN\r6hpIkfYa17e26Mt8 25551409 NBIMBrwi mVoYYuxJq_8Df0CM8CZkrZezFE mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=158194463465&userId=25616C3D&source=mn 2913713687 +2020-02-17 2020-02-17 20:19:16 EMC Xahxt0V3`JlMo+tyHm8MxJFT9NEywdtAZeD YV0_62kSEM9X 0GW7eHjXxF95KfKoarfR 27982291 iuEEgZrUYV04*QSEQg_uWbYo8cewVWPriGO mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=DFD92Y29t&source=mn 1136043092 +2020-02-17 2020-02-17 20:05:44 EMC gAraBOGNzDEdkz8qxSB68sQXrgyTFpX6 KC9GBYXCc2sL;fDL4#6DMT%2ELzdoqHtjMp 27849879 hmvHfcpQ7YcXyk6byq3rSqEUfDuei6M62h mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=71&source=mn 795793408 +2020-02-17 2020-02-17 01:03:41 EMC Sujwqk4khZF0dd3dRrn0zsOYRdkd9ynAR NaKyF0O3rR3r0wXNLf_3KSANWona7Ftry 30475228 calDaEsa910bkYOkR\bbKjQeUvAOoR7CQJH https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=http&o=mail.ru/redir?u=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6source%3Dmn&c=swm&r=http&o=mail%3DYWlsLnJ1%26source%3Dmn&c=swm&r=https://click.mail&v mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=B1581918970&userId=274634&userId=27457&email=YW5hce=mn 3165615449 +2020-02-17 2020-02-17 18:40:14 EMC zBHwSrKIMfAFFJxuvWtIsZ2DA_nAX98Aa NQiRPz123luXCH0fvH?59tbjHdGAlVgiHhc 26251034 0KbAptV7u8SZ?V4N7WHvx[0vY5D_saLU2G https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https://click.mail%3DYWlsLnJ1%26source%3Dmn&c=swm&r=https://click.mail&v=2&s=9158192Ymail&v=2&s=936source%3Dmn&c=swm&r=http&o=mail%3DbW9%26userId%3D2746158192YWlsLnJ1%26source mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=5&userId=274552746887BF4LnJ1&source=mn 115808044 +2020-02-17 2020-02-17 22:03:35 EMC dvrVwEyocDVpJ3YEozYAH1XuC51DQcby59 wcCGF71wVXRj0pz2h131p\\1c7jnQYzejlC 24153343 mHlXjDvel9Ab8kZxtR6ELtFOSq7X1Q8_7 https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=http&o=mail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D65 mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=A63468874692&email=ce=pcap 1125992029 +2020-02-17 2020-02-17 13:11:31 EMC ELifUEthNP3xZTZ5RZd2tIgUWpctZJNKkdp 3qxE4qStGm1jIRPc9,JCHKxNAJqeRRgVaJ5 32523658 opEAQkCl*DGc0VozrkcYlaUmWvfN0IAYa https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=http&o=mail%3DYWlsLnJ1%26source%3Dmn&c=swm&r=https://click.mail&v=2&s=3%26userId%3D274570%26userId%3D274615819383%26userId%3D2745746193%26userId%3D2745745776source%3Dmn&c=swm&r=http&o=mail&v=2&s=34Fww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6source%3Dmn&c=swm&r=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6126source%3Dmn&c=swm&r=https://click.mail%3DbWFpbC5ydQ%253D%253D mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=616&userId=27465&email=YWlsLmNvbQ%3D%3D&source=pcap 660510007 +2020-02-17 2020-02-17 23:08:20 EMC Ww8aun1EXuPT3stlfv12_A4nVx4RfDfGaK Gj6ixLDK{FkfiIQ0ZqHK9IsJMWys80zowLQy 28692974 nB20Riv63uMc]E9s0Bv7x1XcxILAKq2zwc pcap 1 Ru ru 7.0.32 /confirm/vipstatus ?key=3D&source=pcap 458549627 +2020-02-17 2020-02-17 15:45:11 EMC _nkKyF0NV20B2AAxI@oOj8uSGus_lOrqBS 0vXB63kVoAv97D8cUCQSR\bmet_h2UhiQ4X 31753811 zA7KKsesYWPrI1KD"FFmdemMIJ3AcxwZr https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https://click.mail%3DZG1haWwucnU%253D%26source%3Dmn&c=swm&r=http&o=mail&v=2&s=9%26ts%3D1581944577457EC6source%3Dmn&c=swm&r=https://click.mail.ru/redir?u=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D67D492YWlsLnJ1%26source%3Dmn&c=swm&r=https%3A%2F%2Fwwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D685764%26email&v=2&s=9%26email.ru/redir?u=https%3A%2F%2Fwwww.wildberries.ru%2Fconfirm mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=DA8189&email=a2DC9serId=274634&userId=25695&userId=274688&userId=27465 4039013194 +2020-02-17 2020-02-17 14:54:17 EMC wHTfVQKqlnQp!PXzWG80qc0oWU1pvtKeL EaNVlRXuzZ33\fLB0Fmjet29f3xaJplvaiX 30992376 vJnOh3mpOb0h!xPw1slS0e[Ehj1AIx920Jo https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https://click.mail.ru/redir?u=https://click.mail&v=2&s=3c pcap 1 Ru ru 7.0.32 /confirm/vipstatus ?key=7462B60&email=cnU%3D&source=mn 2569826295 +2020-02-17 2020-02-17 22:51:48 EMC C1a6zgET p1v0mSMHJJgMxp8w7uJ7zN14J9Z 5cn50cUip60T6nRihVzsSRarErtllzgH6 26715992 0D4M2m041DlURNuFG9QJtIcjHVkp5_TEZ https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https://click.mail&v=2&s=9446158194419447 mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=DEBE34&email=Z21haWwucnU%3D&source=mn 1504060974 +2020-02-17 2020-02-17 13:53:54 EMC 5a2rniMpduiEUzYFsgZjKHqgoZWbDYts ziqQkEkD3tYgLGhe&QwROPlb3hkAIZW_UI 32696355 0rSWK9UmGDPuRvPwXqs3mK9cF0balOqbS http&o=mail.ru/redir?u=http&o=mail.ru/redir?u=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6email pcap 1 Ru ru 7.0.32 /confirm/vipstatus ?key=77274552E&ts=158192158192YXNvbQ%3D%3D&source=pcap 1675780127 +2020-02-17 2020-02-17 20:08:49 EMC 0IBozTjy/SMIkY5tY\\_GAd\'0VIzFgFK2VSY V0fFnEjkPzK3.y_ng3Nfs1oJtBLCWPq35z 33102601 NQhlj1k9bLy234WJi\rKtOd2239rJDYJSKh https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=http&o=mail.ru/redir?u=http&o=mail.ru/redir?u=https%3A%2F%2Fwwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D61haWwucnU%253D%253D%26source%3Dmn&c=swm&r=https://click.mail%3DYWlsLnJ1%26source%3Dmn&c=swm&r=http&o=mail.ru/redir?u=https://click.mail.ru/redir?u=https%3A%2F%2Fwwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69%26userId%3D2746193A%2F%2Fwwwwwww.wildberries.ru%2Fconfirm%2Fvipstatus mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=D8BD7B082&ts=15819392YW5haWwuY29t&source=mn 832387252 +2020-02-17 2020-02-17 21:39:56 EMC 3aiRtPZF[Sjelo0e2EVnauTwl0tsW9SRi7f i8OkH4au\rfMiBV0E4frxZu9WJpRG5bhp4 29554136 gzPJTobNMGgG1kUYVe9sOwfpHmKiF1p0 https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=http&o=mail%3DbW97B365 mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=AE&ts=158194466&ts=15819213CD274688&ts=1581934688&email=Ymn 74272104 +2020-02-17 2020-02-17 17:21:43 EMC yqM2AqiUS2Lmo0S5KNYR-RKGxDrME0w5U T1Q2fyBqs8qhiWB_JBE3Ko_Ro0VD79pT0 18406884 mVoGr7MMzfVX0HtUKAYeCVX68_NOaiQY https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https%3A%2F%2Fwwwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6email%3DbWFpbC5ydQ%253D%253D%26source%3Dmn&c=swm&r=http&o=mail&v=2&s=9158192YWlsLnJ1%26source%3Dmn&c=swm&r=https mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=5819392&userId=27465D8B446887AF95&email=bWFpbC5ydQ%3D%3D&source=pcap 1422459332 +2020-02-17 2020-02-17 15:06:14 EMC NXf_EKmdwAkZZIKr0N0w4HuVTpNqZ30Ldk0 PpN_0d7ywY45Y_Wm#L2S01pN5klv0Wbsa 21385681 JH3MQSLtn6gUIe92ilJPT.EFDSucrCuGHZ mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=A4627465D819397D96AE65&email=dG1haWwuY29t&source=pcap 959722117 +2020-02-17 2020-02-17 19:52:47 EMC OY90Zcjc8ebLcK_uULQHDNvpwO1hN3MwIat ASdCffiid6DweXoVjmpL8~5DHOzhFnPRuW 33422761 4T1zxu2F4evId\rETTryZBYsCBkTxsXLeuC9 mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=E97265D8A70F&ts=15819403193634&userId=2746887&email=ce=mn 997133189 +2020-02-17 2020-02-17 17:55:20 EMC ManEZ_qtzd0v\bdxdB07LpVWqoZUfpDzZW MnNRUoUE2ASWag8ClKK5l0yPRafHb0gsw 29428950 clyoRyW5\00F6wDd5P188B6k_OdKCbUFQqT mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=52666799692&email=dGF7470E634&email=ce=mn 3015790084 +2020-02-17 2020-02-17 20:07:34 EMC pr7D2hoMay0bwJjvH6UaPLZpeDXtq0Njs bDlkkbc3OnwzdcNZHDcBkR|V0ApitIVgPOv 22120090 TC2aVmxXNfOhSL0y2oqzb=drQElfeo0_OQ mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=581943D&source=mn 2666495492 +2020-02-17 2020-02-17 18:20:19 EMC a_EpwBGWRXNSTY_FyAhRB25SpX8Gw8JaW 0XDllB5u!HDENidqN/CRrkxwnjTXLeEKIyb 32607759 OKByJYVH3pLb:BWhWwvLnnpw0urPjP04i https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6userId%3D27461203Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6email&v=2&s=3D158192YWlsLnJ1%26source%3Dpc&c=swm&r=https mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=BC66&ts=158193926&userId=256BFC35FD2AEAZ21haWwucnU%3D&source=mn 590291581 +2020-02-17 2020-02-17 10:27:11 EMC bn8x3TAHKlMOsdlmzybH0S*iDc0hYull7Zt 7PY8hsBErbSTnpoU7iPxD8CFjIMxK8eN8F 30559777 lgByHEG2ffmexKgJXv5rC6CW78KV4Kia mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=6667&email=bWFpbC5ydQ%3D%3D&source=mn 1898373016 +2020-02-17 2020-02-17 14:09:12 EMC jYFR0DCE2eTkEFCuAIyAHVYLUy9ENh9Cqc dhe0lvmTDchj8XhgtsZ3mCPzv5hTGPM8Q 24813187 Y7MmPfU2YhtaY85K"2xIfqNraWWqBp5QvK https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https%3A%2F%2Fwwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6source%3Dmn&c=swm&r=http&o=mail&v=2&s=96email%3DZG1haWwucnU%253D%26source%3Dmn&c=swm&r=http&o=mail&v=2&s=944576userId%3D27461819443Fkey%3D646192YWlsLnJ1%26source%3Dmn&c=swm&r=https://click.mail.ru/redir?u=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6userId%3D274615819360 mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=AD9397158192&userId=25616AEBD63465D88&email=bmRleC5ydQ%3D%3D&source=mn 1726149098 +2020-02-17 2020-02-17 18:27:00 EMC VnzEdMoo2lhW?Yr6AQyNVw yFnUj_l0pBiG EnQ5CpnJt3LM1hiuC7lsqJChM0i4JMZ0 28169388 zBJ8p4pSlPo_bkk4\t2ObECW2ReYnHoWrQH https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6userId%3D27461203Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6email&v=2&s=3D158192YWlsLnJ1%26source%3Dmn&c=swm&r=https mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=397729t&source=mn 1102313974 +2020-02-17 2020-02-17 19:49:07 EMC fxpHcK20ouDlcjYFRhFT4DMtbMML0qb8ds hBF63Vf_qZrpsfD4EOfd6YGUBvflIJqF 22835697 TkBqOH2Jjp0wH35uDnjAxqgyb2P4HqtznK https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6userId%3D2746158192YWlsLnJ1%26source%3Dmn&c=swm&r=https://click.mail&v=2&s=9158192YWlsLnJ1%26source%3Dmn&c=swm&r=https://click.mail&v=2&s=92YWlsLnJ1%26source%3Dmn&c=swm&r=https://click.mail%3DbWFpbC5ydQ%253D%253D%26source%3Dpc&c=swm&r=http&o=mail%3DZG1haWwucnU%253D%26source%3Dmn&c=swm&r=https://click.mail&v=2&s=9%26source%3Dpc&c pcap 1 Ru ru 7.0.32 /confirm/vipstatus ?key=ADDABAA48E95&userId=2746346887D9420%3D&source=mn 3497823576 +2020-02-17 2020-02-17 13:28:56 EMC hZbZqj2vRV2FpCXlvxIEyqdeQrwceKg35 8fpYiVLE9gfSIzfP_$dzlj1H8tMzVbcg59 20129530 8bY0GHO5KMpHW9cIAke4yQQ94oQfvoHBxY mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=C5ydQ%3D%3D&source=mn 3102735519 +2020-02-17 2020-02-17 21:37:19 EMC TAlSFJzIsXzTWbuEymOTU320iUcNXmLT5j J2Vf06b6CMLFAvwH_wsUNOv4O9nnCy20 17587521 nEczrivzdrNaBYYvdnQRUT2pBBo8hu3kCvI mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=66&email=YWlsLnJ1&source=mn 1613554206 +2020-02-17 2020-02-17 21:58:32 EMC XPXxpWq0wHcK2FB4$JaOa9xIGhEaajLeS l59HKsiwgQuXgAweGZRZrEkN0Pqk_plZ4c 24296347 3s8mV_3W&hisrcwCm6QXCg_0zgLyr6nnqPB https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6userId%3D274616 pcap 1 Ru ru 7.0.32 /confirm/vipstatus ?key=467105695&email=a2YWlsLnJ1&source=pcap 2033387311 +2020-02-17 2020-02-17 16:54:16 EMC 2beVLC_ZYMmZuZbJZ1mjV LQeoXV0BsZKq J4Y22CvJx36nq6KNDMXBg!H5lUx1Z7BoFf 31434491 dabaBlCY351pXUW0HdErDTW_XGkqwmta https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6userId%3D27461203Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6Fconfirm%2Fvipstatus%3Fkey%3D661076userId%3D27457CD15819444613 mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=76031938874634&userId=2564BFA93626&userId=256&email=ce=mn 1613641575 +2020-02-17 2020-02-17 00:27:36 EMC Au3PRMFCW0Ij8iL0ZtsrryAvKJowqXJ8k ntqK0ayP8ltR0twuGraE`Vwgcmux6jFwg 21307924 ruixkHLKjmENcmpbvvbrrLU8FDLIpWOFtU https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https://click.mail.ru/redir?u=https%3A%2F%2Fwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6email.ru/redir?u=https%3A%2F%2Fwwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D6CD2745785dQ mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=A41E772FFC5ydQ%3D%3D&source=pcap 2395792653 +2020-02-17 2020-02-17 22:03:46 EMC Tc9EmNpb5Xy0edNdTCLg7X2eLmGuTdEkIt YsCMp8oiUBPyhHBsDmEZs0xvixA2L6atY 23431515 Bh67PWI6jKZ6Q102nTeN(YP6e8kLoqiVA https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https://mail&v=2&s=96userId%3D274616 pcap 1 Ru ru 7.0.32 /confirm/vipstatus ?key=5D819446&userId=27465D8191897C7462F400&userId=27463465D8192&email=Z21haWwuY29t&source=pcap 2211382541 +2020-02-17 2020-02-17 06:34:15 EMC S13SmJGaHVUBXqd3bs\tRpB3G_hXZBJjxOt6 FP_GWMlfw2REi3Up5Cqu0qWcA99YlCduy 27883082 xJ1eD6ixour7M0K_l6Ziv6awoxMpnzsaW https://click.mail%3DYmail%3Dberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D69444%26email.ru/redir?u=https://mail%3Dby%3D6%26ts%3D158193DBDB3A%2F%2Fwwww.wildberries.ru%2Fconfirm%2Fvipstatus%3Fkey%3D63158192YWlsLnJ1%26source%3Dmn&c=swm&r=https://click.mail&v=2&s=92YWlsLnJ1%26source mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=15819362hQG1haWwuY29t&source=pcap 2045856196 +2020-02-17 2020-02-17 00:36:06 EMC 2mjWiSh70SN_1xOBst7vbYq1z5EL26Z0i 7e5aIak4Pca2woYGZIJi!P_gRQdnPpHDP 28784173 b5hw0LlyfMCyYstJ9XbB6Pq7VN5hO9kIy mn 1 Ru ru 7.0.32 /confirm/vipstatus ?key=15819387AE3634&email=cnU%3D&source=mn 1255281910 diff --git a/tests/queries/0_stateless/01286_constraints_on_default.reference b/tests/queries/0_stateless/01286_constraints_on_default.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/01286_constraints_on_default.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/01286_constraints_on_default.sql b/tests/queries/0_stateless/01286_constraints_on_default.sql new file mode 100644 index 00000000000..d150bac15b5 --- /dev/null +++ b/tests/queries/0_stateless/01286_constraints_on_default.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS default_constraints; +CREATE TABLE default_constraints +( + x UInt8, + y UInt8 DEFAULT x + 1, + CONSTRAINT c CHECK y < 5 +) ENGINE = Memory; + +INSERT INTO default_constraints (x) SELECT number FROM system.numbers LIMIT 5; -- { serverError 469 } +INSERT INTO default_constraints (x) VALUES (0),(1),(2),(3),(4); -- { serverError 469 } + +SELECT y, throwIf(NOT y < 5) FROM default_constraints; +SELECT count() FROM default_constraints; + +DROP TABLE default_constraints; + + +CREATE TEMPORARY TABLE default_constraints +( + x UInt8, + y UInt8 DEFAULT x + 1, + CONSTRAINT c CHECK y < 5 +); + +INSERT INTO default_constraints (x) SELECT number FROM system.numbers LIMIT 5; -- { serverError 469 } +INSERT INTO default_constraints (x) VALUES (0),(1),(2),(3),(4); -- { serverError 469 } + +SELECT y, throwIf(NOT y < 5) FROM default_constraints; +SELECT count() FROM default_constraints; diff --git a/utils/ci/docker-multiarch/update.sh b/utils/ci/docker-multiarch/update.sh index 6e3e18047de..6abcf339607 100755 --- a/utils/ci/docker-multiarch/update.sh +++ b/utils/ci/docker-multiarch/update.sh @@ -37,27 +37,27 @@ fi # get the image if \ - wget -q --spider "$baseUrl/current" \ - && wget -q --spider "$baseUrl/current/$thisTar" \ - ; then - baseUrl+='/current' + wget -q --spider "$baseUrl/current" \ + && wget -q --spider "$baseUrl/current/$thisTar" \ + ; then + baseUrl+='/current' fi wget -qN "$baseUrl/"{{MD5,SHA{1,256}}SUMS{,.gpg},"$thisTarBase.manifest",'unpacked/build-info.txt'} || true wget -N "$baseUrl/$thisTar" # check checksum if [ -f SHA256SUMS ]; then - sha256sum="$(sha256sum "$thisTar" | cut -d' ' -f1)" - if ! grep -q "$sha256sum" SHA256SUMS; then - echo >&2 "error: '$thisTar' has invalid SHA256" - exit 1 - fi + sha256sum="$(sha256sum "$thisTar" | cut -d' ' -f1)" + if ! grep -q "$sha256sum" SHA256SUMS; then + echo >&2 "error: '$thisTar' has invalid SHA256" + exit 1 + fi fi cat > Dockerfile <<-EOF - FROM scratch - ADD $thisTar / - ENV ARCH=${ARCH} UBUNTU_SUITE=${VERSION} DOCKER_REPO=${DOCKER_REPO} + FROM scratch + ADD $thisTar / + ENV ARCH=${ARCH} UBUNTU_SUITE=${VERSION} DOCKER_REPO=${DOCKER_REPO} EOF # add qemu-user-static binary @@ -70,26 +70,26 @@ EOF fi cat >> Dockerfile <<-EOF - # a few minor docker-specific tweaks - # see https://github.com/docker/docker/blob/master/contrib/mkimage/debootstrap - RUN echo '#!/bin/sh' > /usr/sbin/policy-rc.d \\ - && echo 'exit 101' >> /usr/sbin/policy-rc.d \\ - && chmod +x /usr/sbin/policy-rc.d \\ - && dpkg-divert --local --rename --add /sbin/initctl \\ - && cp -a /usr/sbin/policy-rc.d /sbin/initctl \\ - && sed -i 's/^exit.*/exit 0/' /sbin/initctl \\ - && echo 'force-unsafe-io' > /etc/dpkg/dpkg.cfg.d/docker-apt-speedup \\ - && echo 'DPkg::Post-Invoke { "rm -f /var/cache/apt/archives/*.deb /var/cache/apt/archives/partial/*.deb /var/cache/apt/*.bin || true"; };' > /etc/apt/apt.conf.d/docker-clean \\ - && echo 'APT::Update::Post-Invoke { "rm -f /var/cache/apt/archives/*.deb /var/cache/apt/archives/partial/*.deb /var/cache/apt/*.bin || true"; };' >> /etc/apt/apt.conf.d/docker-clean \\ - && echo 'Dir::Cache::pkgcache ""; Dir::Cache::srcpkgcache "";' >> /etc/apt/apt.conf.d/docker-clean \\ - && echo 'Acquire::Languages "none";' > /etc/apt/apt.conf.d/docker-no-languages \\ - && echo 'Acquire::GzipIndexes "true"; Acquire::CompressionTypes::Order:: "gz";' > /etc/apt/apt.conf.d/docker-gzip-indexes + # a few minor docker-specific tweaks + # see https://github.com/docker/docker/blob/master/contrib/mkimage/debootstrap + RUN echo '#!/bin/sh' > /usr/sbin/policy-rc.d \\ + && echo 'exit 101' >> /usr/sbin/policy-rc.d \\ + && chmod +x /usr/sbin/policy-rc.d \\ + && dpkg-divert --local --rename --add /sbin/initctl \\ + && cp -a /usr/sbin/policy-rc.d /sbin/initctl \\ + && sed -i 's/^exit.*/exit 0/' /sbin/initctl \\ + && echo 'force-unsafe-io' > /etc/dpkg/dpkg.cfg.d/docker-apt-speedup \\ + && echo 'DPkg::Post-Invoke { "rm -f /var/cache/apt/archives/*.deb /var/cache/apt/archives/partial/*.deb /var/cache/apt/*.bin || true"; };' > /etc/apt/apt.conf.d/docker-clean \\ + && echo 'APT::Update::Post-Invoke { "rm -f /var/cache/apt/archives/*.deb /var/cache/apt/archives/partial/*.deb /var/cache/apt/*.bin || true"; };' >> /etc/apt/apt.conf.d/docker-clean \\ + && echo 'Dir::Cache::pkgcache ""; Dir::Cache::srcpkgcache "";' >> /etc/apt/apt.conf.d/docker-clean \\ + && echo 'Acquire::Languages "none";' > /etc/apt/apt.conf.d/docker-no-languages \\ + && echo 'Acquire::GzipIndexes "true"; Acquire::CompressionTypes::Order:: "gz";' > /etc/apt/apt.conf.d/docker-gzip-indexes - # enable the universe - RUN sed -i 's/^#\s*\(deb.*universe\)$/\1/g' /etc/apt/sources.list + # enable the universe + RUN sed -i 's/^#\s*\(deb.*universe\)$/\1/g' /etc/apt/sources.list - # overwrite this with 'CMD []' in a dependent Dockerfile - CMD ["/bin/bash"] + # overwrite this with 'CMD []' in a dependent Dockerfile + CMD ["/bin/bash"] EOF docker build -t "${DOCKER_REPO}:${TAG_ARCH}-${VERSION}" . diff --git a/utils/github/__init__.py b/utils/github/__init__.py index e0fb6c8aee5..40a96afc6ff 100644 --- a/utils/github/__init__.py +++ b/utils/github/__init__.py @@ -1,3 +1 @@ # -*- coding: utf-8 -*- - -# REMOVE ME diff --git a/utils/github/__main__.py b/utils/github/__main__.py index 40f04aeb675..1dbc9895bd6 100644 --- a/utils/github/__main__.py +++ b/utils/github/__main__.py @@ -7,15 +7,15 @@ - All pull-requests must be squash-merged or explicitly merged without rebase. - All pull-requests to master must have at least one label prefixed with `pr-`. - Labels that require pull-request to be backported must be red colored (#ff0000). - - Stable branch name must be of form `YY.NUMBER`. - - All stable branches must be forked directly from the master branch and never be merged back, + - Release branch name must be of form `YY.NUMBER`. + - All release branches must be forked directly from the master branch and never be merged back, or merged with any other branches based on the master branch (including master branch itself). Output of this script: - Commits without references from pull-requests. - Pull-requests to master without proper labels. - - Pull-requests that need to be backported, with statuses per stable branch. + - Pull-requests that need to be backported, with statuses per release branch. ''' @@ -29,7 +29,7 @@ import sys try: from termcolor import colored # `pip install termcolor` except ImportError: - sys.exit("Package 'termcolor' not found. Try run: `pip3 install termcolor`") + sys.exit("Package 'termcolor' not found. Try run: `pip3 install [--user] termcolor`") CHECK_MARK = colored('🗸', 'green') @@ -45,8 +45,6 @@ parser.add_argument('--repo', '-r', type=str, default='', metavar='PATH', help='path to the root of the ClickHouse repository') parser.add_argument('--remote', type=str, default='origin', help='remote name of the "ClickHouse/ClickHouse" upstream') -parser.add_argument('-n', type=int, default=3, dest='number', - help='number of last stable branches to consider') parser.add_argument('--token', type=str, required=True, help='token for Github access') parser.add_argument('--login', type=str, @@ -54,31 +52,46 @@ parser.add_argument('--login', type=str, parser.add_argument('--auto-label', action='store_true', dest='autolabel', default=True, help='try to automatically parse PR description and put labels') +# Either select last N release branches, or specify them manually. +group = parser.add_mutually_exclusive_group(required=True) +group.add_argument('-n', type=int, default=3, dest='number', + help='number of last release branches to consider') +group.add_argument('--branch', type=str, action='append', metavar='BRANCH', + help='specific release branch name to consider') + args = parser.parse_args() github = query.Query(args.token, 30) repo = local.Local(args.repo, args.remote, github.get_default_branch()) -stables = repo.get_stables()[-args.number:] # [(branch name, base)] -if not stables: +if not args.branch: + release_branches = repo.get_release_branches()[-args.number:] # [(branch name, base)] +else: + release_branches = [] + all_release_branches = repo.get_release_branches() + for branch in all_release_branches: + if branch[0] in args.branch: + release_branches.append(branch) + +if not release_branches: sys.exit('No release branches found!') else: print('Found release branches:') - for stable in stables: - print(f'{CHECK_MARK} {stable[0]} forked from {stable[1]}') + for branch in release_branches: + print(f'{CHECK_MARK} {branch[0]} forked from {branch[1]}') -first_commit = stables[0][1] +first_commit = release_branches[0][1] pull_requests = github.get_pull_requests(first_commit, args.login) good_commits = set(pull_request['mergeCommit']['oid'] for pull_request in pull_requests) bad_commits = [] # collect and print them in the end from_commit = repo.get_head_commit() -for i in reversed(range(len(stables))): - for commit in repo.iterate(from_commit, stables[i][1]): +for i in reversed(range(len(release_branches))): + for commit in repo.iterate(from_commit, release_branches[i][1]): if str(commit) not in good_commits and commit.author.name != 'robot-clickhouse': bad_commits.append(commit) - from_commit = stables[i][1] + from_commit = release_branches[i][1] members = set(github.get_members("ClickHouse", "ClickHouse")) def print_responsible(pull_request): @@ -146,22 +159,22 @@ if need_backporting: no_backport_labeled = set() wait = set() - for stable in stables: - if repo.comparator(stable[1]) < repo.comparator(pull_request['mergeCommit']['oid']): - targets.append(stable[0]) + for branch in release_branches: + if repo.comparator(branch[1]) < repo.comparator(pull_request['mergeCommit']['oid']): + targets.append(branch[0]) # FIXME: compatibility logic - check for a manually set label, that indicates status 'backported'. - # FIXME: O(n²) - no need to iterate all labels for every `stable` + # FIXME: O(n²) - no need to iterate all labels for every `branch` for label in github.get_labels(pull_request): if re_vlabel.match(label['name']) or re_vlabel_backported.match(label['name']): - if f'v{stable[0]}' == label['name'] or f'v{stable[0]}-backported' == label['name']: - backport_labeled.add(stable[0]) + if f'v{branch[0]}' == label['name'] or f'v{branch[0]}-backported' == label['name']: + backport_labeled.add(branch[0]) if re_vlabel_conflicts.match(label['name']): - if f'v{stable[0]}-conflicts' == label['name']: - conflict_labeled.add(stable[0]) + if f'v{branch[0]}-conflicts' == label['name']: + conflict_labeled.add(branch[0]) if re_vlabel_no_backport.match(label['name']): - if f'v{stable[0]}-no-backport' == label['name']: - no_backport_labeled.add(stable[0]) + if f'v{branch[0]}-no-backport' == label['name']: + no_backport_labeled.add(branch[0]) for event in github.get_timeline(pull_request): if(event['isCrossRepository'] or diff --git a/utils/github/local.py b/utils/github/local.py index 8dbef6b9b6a..96a1ae765bf 100644 --- a/utils/github/local.py +++ b/utils/github/local.py @@ -1,7 +1,9 @@ # -*- coding: utf-8 -*- -# `pip install …` -import git # gitpython +try: + import git # `pip3 install gitpython` +except ImportError: + sys.exit("Package 'gitpython' not found. Try run: `pip3 install [--user] gitpython`") import functools import os @@ -11,7 +13,7 @@ import re class Local: '''Implements some useful methods atop of the local repository ''' - RE_STABLE_REF = re.compile(r'^refs/remotes/.+/\d+\.\d+$') + RE_RELEASE_BRANCH_REF = re.compile(r'^refs/remotes/.+/\d+\.\d+$') def __init__(self, repo_path, remote_name, default_branch_name): self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path)) @@ -42,16 +44,16 @@ class Local: * head (git.Commit)). List is sorted by commits in ascending order. ''' - def get_stables(self): - stables = [] + def get_release_branches(self): + release_branches = [] - for stable in [r for r in self._remote.refs if Local.RE_STABLE_REF.match(r.path)]: - base = self._repo.merge_base(self._default, self._repo.commit(stable)) + for branch in [r for r in self._remote.refs if Local.RE_RELEASE_BRANCH_REF.match(r.path)]: + base = self._repo.merge_base(self._default, self._repo.commit(branch)) if not base: - print(f'Branch {stable.path} is not based on branch {self._default}. Ignoring.') + print(f'Branch {branch.path} is not based on branch {self._default}. Ignoring.') elif len(base) > 1: - print(f'Branch {stable.path} has more than one base commit. Ignoring.') + print(f'Branch {branch.path} has more than one base commit. Ignoring.') else: - stables.append((os.path.basename(stable.name), base[0])) + release_branches.append((os.path.basename(branch.name), base[0])) - return sorted(stables, key=lambda x : self.comparator(x[1])) + return sorted(release_branches, key=lambda x : self.comparator(x[1])) diff --git a/utils/release/release_lib.sh b/utils/release/release_lib.sh index 2e700593523..569e1dfb9f2 100644 --- a/utils/release/release_lib.sh +++ b/utils/release/release_lib.sh @@ -12,10 +12,10 @@ function gen_version_string { function get_version { if [ -z "$VERSION_MAJOR" ] && [ -z "$VERSION_MINOR" ] && [ -z "$VERSION_PATCH" ]; then BASEDIR=$(dirname "${BASH_SOURCE[0]}")/../../ - VERSION_REVISION=`grep "set(VERSION_REVISION" ${BASEDIR}/cmake/version.cmake | sed 's/^.*VERSION_REVISION \(.*\)$/\1/' | sed 's/[) ].*//'` - VERSION_MAJOR=`grep "set(VERSION_MAJOR" ${BASEDIR}/cmake/version.cmake | sed 's/^.*VERSION_MAJOR \(.*\)/\1/' | sed 's/[) ].*//'` - VERSION_MINOR=`grep "set(VERSION_MINOR" ${BASEDIR}/cmake/version.cmake | sed 's/^.*VERSION_MINOR \(.*\)/\1/' | sed 's/[) ].*//'` - VERSION_PATCH=`grep "set(VERSION_PATCH" ${BASEDIR}/cmake/version.cmake | sed 's/^.*VERSION_PATCH \(.*\)/\1/' | sed 's/[) ].*//'` + VERSION_REVISION=`grep "SET(VERSION_REVISION" ${BASEDIR}/cmake/autogenerated_versions.txt | sed 's/^.*VERSION_REVISION \(.*\)$/\1/' | sed 's/[) ].*//'` + VERSION_MAJOR=`grep "SET(VERSION_MAJOR" ${BASEDIR}/cmake/autogenerated_versions.txt | sed 's/^.*VERSION_MAJOR \(.*\)/\1/' | sed 's/[) ].*//'` + VERSION_MINOR=`grep "SET(VERSION_MINOR" ${BASEDIR}/cmake/autogenerated_versions.txt | sed 's/^.*VERSION_MINOR \(.*\)/\1/' | sed 's/[) ].*//'` + VERSION_PATCH=`grep "SET(VERSION_PATCH" ${BASEDIR}/cmake/autogenerated_versions.txt | sed 's/^.*VERSION_PATCH \(.*\)/\1/' | sed 's/[) ].*//'` fi VERSION_PREFIX="${VERSION_PREFIX:-v}" VERSION_POSTFIX_TAG="${VERSION_POSTFIX:--testing}" @@ -90,28 +90,28 @@ function gen_revision_author { git_describe=`git describe` git_hash=`git rev-parse HEAD` - sed -i -e "s/set(VERSION_REVISION [^) ]*/set(VERSION_REVISION $VERSION_REVISION/g;" \ - -e "s/set(VERSION_DESCRIBE [^) ]*/set(VERSION_DESCRIBE $git_describe/g;" \ - -e "s/set(VERSION_GITHASH [^) ]*/set(VERSION_GITHASH $git_hash/g;" \ - -e "s/set(VERSION_MAJOR [^) ]*/set(VERSION_MAJOR $VERSION_MAJOR/g;" \ - -e "s/set(VERSION_MINOR [^) ]*/set(VERSION_MINOR $VERSION_MINOR/g;" \ - -e "s/set(VERSION_PATCH [^) ]*/set(VERSION_PATCH $VERSION_PATCH/g;" \ - -e "s/set(VERSION_STRING [^) ]*/set(VERSION_STRING $VERSION_STRING/g;" \ - cmake/version.cmake + sed -i -e "s/SET(VERSION_REVISION [^) ]*/SET(VERSION_REVISION $VERSION_REVISION/g;" \ + -e "s/SET(VERSION_DESCRIBE [^) ]*/SET(VERSION_DESCRIBE $git_describe/g;" \ + -e "s/SET(VERSION_GITHASH [^) ]*/SET(VERSION_GITHASH $git_hash/g;" \ + -e "s/SET(VERSION_MAJOR [^) ]*/SET(VERSION_MAJOR $VERSION_MAJOR/g;" \ + -e "s/SET(VERSION_MINOR [^) ]*/SET(VERSION_MINOR $VERSION_MINOR/g;" \ + -e "s/SET(VERSION_PATCH [^) ]*/SET(VERSION_PATCH $VERSION_PATCH/g;" \ + -e "s/SET(VERSION_STRING [^) ]*/SET(VERSION_STRING $VERSION_STRING/g;" \ + cmake/autogenerated_versions.txt gen_changelog "$VERSION_STRING" "" "$AUTHOR" "" gen_dockerfiles "$VERSION_STRING" src/Storages/System/StorageSystemContributors.sh ||: utils/list-versions/list-versions.sh > utils/list-versions/version_date.tsv - git commit -m "$auto_message [$VERSION_STRING] [$VERSION_REVISION]" cmake/version.cmake debian/changelog docker/*/Dockerfile src/Storages/System/StorageSystemContributors.generated.cpp utils/list-versions/version_date.tsv + git commit -m "$auto_message [$VERSION_STRING] [$VERSION_REVISION]" cmake/autogenerated_versions.txt debian/changelog docker/*/Dockerfile src/Storages/System/StorageSystemContributors.generated.cpp utils/list-versions/version_date.tsv if [ -z $NO_PUSH ]; then git push fi echo "Generated version: ${VERSION_STRING}, revision: ${VERSION_REVISION}." - # Second tag for correct version information in version.cmake inside tag + # Second tag for correct version information in autogenerated_versions.txt inside tag if git tag --force -a "$tag" -m "$tag" then if [ -z $NO_PUSH ]; then diff --git a/utils/test-data-generator/CMakeLists.txt b/utils/test-data-generator/CMakeLists.txt index 3a94358e86d..758c3cdc0ce 100644 --- a/utils/test-data-generator/CMakeLists.txt +++ b/utils/test-data-generator/CMakeLists.txt @@ -1,8 +1,6 @@ # Disable clang-tidy for protobuf generated files set (CMAKE_CXX_CLANG_TIDY "") -add_compile_options(-Wno-zero-as-null-pointer-constant -Wno-array-bounds) # Protobuf generated files - if (USE_PROTOBUF) protobuf_generate_cpp(ProtobufDelimitedMessagesSerializer_Srcs ProtobufDelimitedMessagesSerializer_Hdrs ${CMAKE_CURRENT_SOURCE_DIR}/../../tests/queries/0_stateless/00825_protobuf_format.proto) protobuf_generate_cpp(ProtobufDelimitedMessagesSerializer_Srcs2 ProtobufDelimitedMessagesSerializer_Hdrs2 ${CMAKE_CURRENT_SOURCE_DIR}/../../tests/queries/0_stateless/00825_protobuf_format_syntax2.proto) diff --git a/website/css/docs.css b/website/css/docs.css index 18bf40de13b..2c175b18db6 100644 --- a/website/css/docs.css +++ b/website/css/docs.css @@ -69,7 +69,6 @@ summary { #content code { color: #111; background: #eee; - padding: 2px; } diff --git a/website/templates/common_meta.html b/website/templates/common_meta.html index 09bef26d575..8b65e67d780 100644 --- a/website/templates/common_meta.html +++ b/website/templates/common_meta.html @@ -1,25 +1,25 @@ {% set description = description or _('ClickHouse is a fast open-source column-oriented database management system that allows generating analytical data reports in real-time using SQL queries') %} - + {% if title %}{{ title }}{% else %}{{ _('ClickHouse - fast open-source OLAP DBMS') }}{% endif %} - + - + {% if page and not single_page %} - + {% endif %} {% include "templates/docs/ld_json.html" %} - +