diff --git a/CMakeLists.txt b/CMakeLists.txt index 6262d17f2d2..a2cc5f15ac8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -182,6 +182,11 @@ else () set (CXX_FLAGS_INTERNAL_COMPILER "-std=c++1z") endif () +if (COMPILER_GCC OR COMPILER_CLANG) + # Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation") +endif () + option(WITH_COVERAGE "Build with coverage." 0) if(WITH_COVERAGE AND COMPILER_CLANG) set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping") @@ -220,7 +225,6 @@ endif () set(THREADS_PREFER_PTHREAD_FLAG ON) find_package (Threads) -include (cmake/find_cxxabi.cmake) include (cmake/find_cxx.cmake) include (cmake/test_compiler.cmake) @@ -404,6 +408,11 @@ if (UNBUNDLED OR NOT (OS_LINUX OR APPLE) OR ARCH_32) option (NO_WERROR "Disable -Werror compiler option" ON) endif () +if (USE_LIBCXX) + set (HAVE_LIBCXX 1) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") +endif() + if (USE_LIBCXX AND USE_INTERNAL_LIBCXX_LIBRARY) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -nostdinc++ -isystem ${LIBCXX_INCLUDE_DIR} -isystem ${LIBCXXABI_INCLUDE_DIR}") endif () diff --git a/cmake/find_cxx.cmake b/cmake/find_cxx.cmake index a10c3259ad9..2b2952f6efd 100644 --- a/cmake/find_cxx.cmake +++ b/cmake/find_cxx.cmake @@ -1,23 +1,26 @@ if (NOT APPLE) - option (USE_INTERNAL_LIBCXX_LIBRARY "Set to FALSE to use system libcxx library instead of bundled" ${NOT_UNBUNDLED}) + option (USE_INTERNAL_LIBCXX_LIBRARY "Set to FALSE to use system libcxx and libcxxabi libraries instead of bundled" ${NOT_UNBUNDLED}) endif () if (USE_INTERNAL_LIBCXX_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcxx/include/vector") - message (WARNING "submodule contrib/libcxx is missing. to fix try run: \n git submodule update --init --recursive") - set (USE_INTERNAL_LIBCXX_LIBRARY 0) + message (WARNING "submodule contrib/libcxx is missing. to fix try run: \n git submodule update --init --recursive") + set (USE_INTERNAL_LIBCXX_LIBRARY 0) +endif () + +if (USE_INTERNAL_LIBCXX_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcxxabi/src") + message (WARNING "submodule contrib/libcxxabi is missing. to fix try run: \n git submodule update --init --recursive") + set (USE_INTERNAL_LIBCXXABI_LIBRARY 0) endif () if (NOT USE_INTERNAL_LIBCXX_LIBRARY) find_library (LIBCXX_LIBRARY c++) - find_path (LIBCXX_INCLUDE_DIR NAMES vector PATHS ${LIBCXX_INCLUDE_PATHS}) -endif () - -if (LIBCXX_LIBRARY AND LIBCXX_INCLUDE_DIR) + find_library (LIBCXXABI_LIBRARY c++abi) else () set (LIBCXX_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcxx/include) - set (USE_INTERNAL_LIBCXX_LIBRARY 1) + set (LIBCXXABI_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcxxabi/include) set (LIBCXX_LIBRARY cxx_static) - set (HAVE_LIBCXX 1) + set (LIBCXXABI_LIBRARY cxxabi_static) endif () -message (STATUS "Using libcxx: ${LIBCXX_INCLUDE_DIR} : ${LIBCXX_LIBRARY}") +message (STATUS "Using libcxx: ${LIBCXX_LIBRARY}") +message (STATUS "Using libcxxabi: ${LIBCXXABI_LIBRARY}") diff --git a/cmake/find_cxxabi.cmake b/cmake/find_cxxabi.cmake deleted file mode 100644 index 8f240597bc4..00000000000 --- a/cmake/find_cxxabi.cmake +++ /dev/null @@ -1,22 +0,0 @@ -if (NOT APPLE) - option (USE_INTERNAL_LIBCXXABI_LIBRARY "Set to FALSE to use system libcxxabi library instead of bundled" ${NOT_UNBUNDLED}) -endif () - -if (USE_INTERNAL_LIBCXXABI_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcxxabi/src") - message (WARNING "submodule contrib/libcxxabi is missing. to fix try run: \n git submodule update --init --recursive") - set (USE_INTERNAL_LIBCXXABI_LIBRARY 0) -endif () - -if (NOT USE_INTERNAL_LIBCXXABI_LIBRARY) - find_library (LIBCXXABI_LIBRARY cxxabi) - find_path (LIBCXXABI_INCLUDE_DIR NAMES vector PATHS ${LIBCXXABI_INCLUDE_PATHS}) -endif () - -if (LIBCXXABI_LIBRARY AND LIBCXXABI_INCLUDE_DIR) -else () - set (LIBCXXABI_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcxxabi/include) - set (USE_INTERNAL_LIBCXXABI_LIBRARY 1) - set (LIBCXXABI_LIBRARY cxxabi_static) -endif () - -message (STATUS "Using libcxxabi: ${LIBCXXABI_INCLUDE_DIR} : ${LIBCXXABI_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 1280a309b01..2cc8ae37806 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -15,12 +15,9 @@ if (USE_INTERNAL_UNWIND_LIBRARY) add_subdirectory (libunwind-cmake) endif () -if (USE_LIBCXX AND USE_INTERNAL_LIBCXXABI_LIBRARY) - add_subdirectory(libcxxabi-cmake) -endif() - if (USE_LIBCXX AND USE_INTERNAL_LIBCXX_LIBRARY) add_subdirectory(libcxx-cmake) + add_subdirectory(libcxxabi-cmake) endif() diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 4840197c2fd..47f057c0559 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -15,7 +15,6 @@ ${JEMALLOC_SOURCE_DIR}/src/extent_mmap.c ${JEMALLOC_SOURCE_DIR}/src/hash.c ${JEMALLOC_SOURCE_DIR}/src/hook.c ${JEMALLOC_SOURCE_DIR}/src/jemalloc.c -${JEMALLOC_SOURCE_DIR}/src/jemalloc_cpp.cpp ${JEMALLOC_SOURCE_DIR}/src/large.c ${JEMALLOC_SOURCE_DIR}/src/log.c ${JEMALLOC_SOURCE_DIR}/src/malloc_io.c diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index f18608f3256..849784e41bd 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -385,6 +385,7 @@ endif() if (USE_JEMALLOC) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # used in Interpreters/AsynchronousMetrics.cpp + target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # new_delete.cpp endif () target_include_directories (dbms PUBLIC ${DBMS_INCLUDE_DIR} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src/Formats/include) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index e0618b934af..837812f84af 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include @@ -172,12 +173,13 @@ void TCPHandler::runImpl() send_exception_with_stack_trace = query_context->getSettingsRef().calculate_text_stack_trace; /// Should we send internal logs to client? + const auto client_logs_level = query_context->getSettingsRef().send_logs_level; if (client_revision >= DBMS_MIN_REVISION_WITH_SERVER_LOGS - && query_context->getSettingsRef().send_logs_level.value != LogsLevel::none) + && client_logs_level.value != LogsLevel::none) { state.logs_queue = std::make_shared(); - state.logs_queue->max_priority = Poco::Logger::parseLevel(query_context->getSettingsRef().send_logs_level.toString()); - CurrentThread::attachInternalTextLogsQueue(state.logs_queue); + state.logs_queue->max_priority = Poco::Logger::parseLevel(client_logs_level.toString()); + CurrentThread::attachInternalTextLogsQueue(state.logs_queue, client_logs_level.value); } query_context->setExternalTablesInitializer([&global_settings, this] (Context & context) diff --git a/dbms/src/AggregateFunctions/QuantileTiming.h b/dbms/src/AggregateFunctions/QuantileTiming.h index 131ca91dbbf..fbf4da725c0 100644 --- a/dbms/src/AggregateFunctions/QuantileTiming.h +++ b/dbms/src/AggregateFunctions/QuantileTiming.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -513,8 +512,6 @@ private: void mediumToLarge() { - CurrentMemoryTracker::alloc(sizeof(detail::QuantileTimingLarge)); - /// While the data is copied from medium, it is not possible to set `large` value (otherwise it will overwrite some data). detail::QuantileTimingLarge * tmp_large = new detail::QuantileTimingLarge; @@ -528,8 +525,6 @@ private: void tinyToLarge() { - CurrentMemoryTracker::alloc(sizeof(detail::QuantileTimingLarge)); - /// While the data is copied from `medium` it is not possible to set `large` value (otherwise it will overwrite some data). detail::QuantileTimingLarge * tmp_large = new detail::QuantileTimingLarge; @@ -562,8 +557,6 @@ public: else if (kind == Kind::Large) { delete large; - - CurrentMemoryTracker::free(sizeof(detail::QuantileTimingLarge)); } } diff --git a/dbms/src/Common/Allocator.h b/dbms/src/Common/Allocator.h index abaa5927e3d..e9569673678 100644 --- a/dbms/src/Common/Allocator.h +++ b/dbms/src/Common/Allocator.h @@ -108,13 +108,92 @@ class AllocatorWithHint : Hint { protected: static constexpr bool clear_memory = clear_memory_; + static constexpr size_t small_memory_threshold = mmap_threshold; public: /// Allocate memory range. void * alloc(size_t size, size_t alignment = 0) { CurrentMemoryTracker::alloc(size); + return allocNoTrack(size, alignment); + } + /// Free memory range. + void free(void * buf, size_t size) + { + freeNoTrack(buf, size); + CurrentMemoryTracker::free(size); + } + + /** Enlarge memory range. + * Data from old range is moved to the beginning of new range. + * Address of memory range could change. + */ + void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0) + { + if (old_size == new_size) + { + /// nothing to do. + /// BTW, it's not possible to change alignment while doing realloc. + } + else if (old_size < mmap_threshold && new_size < mmap_threshold && alignment <= MALLOC_MIN_ALIGNMENT) + { + /// Resize malloc'd memory region with no special alignment requirement. + CurrentMemoryTracker::realloc(old_size, new_size); + + void * new_buf = ::realloc(buf, new_size); + if (nullptr == new_buf) + DB::throwFromErrno("Allocator: Cannot realloc from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + + buf = new_buf; + if constexpr (clear_memory) + if (new_size > old_size) + memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size); + } + else if (old_size >= mmap_threshold && new_size >= mmap_threshold) + { + /// Resize mmap'd memory region. + CurrentMemoryTracker::realloc(old_size, new_size); + + // On apple and freebsd self-implemented mremap used (common/mremap.h) + buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (MAP_FAILED == buf) + DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_MREMAP); + + /// No need for zero-fill, because mmap guarantees it. + } + else if (new_size < small_memory_threshold) + { + /// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once. + CurrentMemoryTracker::realloc(old_size, new_size); + + void * new_buf = allocNoTrack(new_size, alignment); + memcpy(new_buf, buf, std::min(old_size, new_size)); + freeNoTrack(buf, old_size); + buf = new_buf; + } + else + { + /// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods. + + void * new_buf = alloc(new_size, alignment); + memcpy(new_buf, buf, std::min(old_size, new_size)); + free(buf, old_size); + buf = new_buf; + } + + return buf; + } + +protected: + static constexpr size_t getStackThreshold() + { + return 0; + } + +private: + void * allocNoTrack(size_t size, size_t alignment) + { void * buf; if (size >= mmap_threshold) @@ -149,15 +228,14 @@ public: if (0 != res) DB::throwFromErrno("Cannot allocate memory (posix_memalign) " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res); - if (clear_memory) + if constexpr (clear_memory) memset(buf, 0, size); } } return buf; } - /// Free memory range. - void free(void * buf, size_t size) + void freeNoTrack(void * buf, size_t size) { if (size >= mmap_threshold) { @@ -168,63 +246,6 @@ public: { ::free(buf); } - - CurrentMemoryTracker::free(size); - } - - /** Enlarge memory range. - * Data from old range is moved to the beginning of new range. - * Address of memory range could change. - */ - void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0) - { - if (old_size == new_size) - { - /// nothing to do. - /// BTW, it's not possible to change alignment while doing realloc. - } - else if (old_size < mmap_threshold && new_size < mmap_threshold && alignment <= MALLOC_MIN_ALIGNMENT) - { - /// Resize malloc'd memory region with no special alignment requirement. - CurrentMemoryTracker::realloc(old_size, new_size); - - void * new_buf = ::realloc(buf, new_size); - if (nullptr == new_buf) - DB::throwFromErrno("Allocator: Cannot realloc from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); - - buf = new_buf; - if (clear_memory && new_size > old_size) - memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size); - } - else if (old_size >= mmap_threshold && new_size >= mmap_threshold) - { - /// Resize mmap'd memory region. - CurrentMemoryTracker::realloc(old_size, new_size); - - // On apple and freebsd self-implemented mremap used (common/mremap.h) - buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (MAP_FAILED == buf) - DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_MREMAP); - - /// No need for zero-fill, because mmap guarantees it. - } - else - { - /// All other cases that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods. - - void * new_buf = alloc(new_size, alignment); - memcpy(new_buf, buf, std::min(old_size, new_size)); - free(buf, old_size); - buf = new_buf; - } - - return buf; - } - -protected: - static constexpr size_t getStackThreshold() - { - return 0; } }; @@ -267,7 +288,7 @@ public: { if (size <= N) { - if (Base::clear_memory) + if constexpr (Base::clear_memory) memset(stack_memory, 0, N); return stack_memory; } diff --git a/dbms/src/Common/CombinedCardinalityEstimator.h b/dbms/src/Common/CombinedCardinalityEstimator.h index 824f0a8c018..e048e47cab5 100644 --- a/dbms/src/Common/CombinedCardinalityEstimator.h +++ b/dbms/src/Common/CombinedCardinalityEstimator.h @@ -3,7 +3,6 @@ #include #include #include -#include #include @@ -230,7 +229,6 @@ private: if (getContainerType() != details::ContainerType::SMALL) throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); - CurrentMemoryTracker::alloc(sizeof(Medium)); auto tmp_medium = std::make_unique(); for (const auto & x : small) @@ -247,7 +245,6 @@ private: if ((container_type != details::ContainerType::SMALL) && (container_type != details::ContainerType::MEDIUM)) throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); - CurrentMemoryTracker::alloc(sizeof(Large)); auto tmp_large = std::make_unique(); if (container_type == details::ContainerType::SMALL) @@ -277,15 +274,11 @@ private: { delete medium; medium = nullptr; - - CurrentMemoryTracker::free(sizeof(Medium)); } else if (container_type == details::ContainerType::LARGE) { delete large; large = nullptr; - - CurrentMemoryTracker::free(sizeof(Large)); } } diff --git a/dbms/src/Common/CurrentThread.cpp b/dbms/src/Common/CurrentThread.cpp index 5186cec0c41..84d63a04b96 100644 --- a/dbms/src/Common/CurrentThread.cpp +++ b/dbms/src/Common/CurrentThread.cpp @@ -46,6 +46,12 @@ MemoryTracker * CurrentThread::getMemoryTracker() return ¤t_thread->memory_tracker; } +Int64 & CurrentThread::getUntrackedMemory() +{ + /// It assumes that (current_thread != nullptr) is already checked with getMemoryTracker() + return current_thread->untracked_memory; +} + void CurrentThread::updateProgressIn(const Progress & value) { if (unlikely(!current_thread)) @@ -60,11 +66,12 @@ void CurrentThread::updateProgressOut(const Progress & value) current_thread->progress_out.incrementPiecewiseAtomically(value); } -void CurrentThread::attachInternalTextLogsQueue(const std::shared_ptr & logs_queue) +void CurrentThread::attachInternalTextLogsQueue(const std::shared_ptr & logs_queue, + LogsLevel client_logs_level) { if (unlikely(!current_thread)) return; - current_thread->attachInternalTextLogsQueue(logs_queue); + current_thread->attachInternalTextLogsQueue(logs_queue, client_logs_level); } std::shared_ptr CurrentThread::getInternalTextLogsQueue() diff --git a/dbms/src/Common/CurrentThread.h b/dbms/src/Common/CurrentThread.h index 3c248ad903f..685ac879530 100644 --- a/dbms/src/Common/CurrentThread.h +++ b/dbms/src/Common/CurrentThread.h @@ -39,7 +39,8 @@ public: static ThreadGroupStatusPtr getGroup(); /// A logs queue used by TCPHandler to pass logs to a client - static void attachInternalTextLogsQueue(const std::shared_ptr & logs_queue); + static void attachInternalTextLogsQueue(const std::shared_ptr & logs_queue, + LogsLevel client_logs_level); static std::shared_ptr getInternalTextLogsQueue(); /// Makes system calls to update ProfileEvents that contain info from rusage and taskstats @@ -47,6 +48,7 @@ public: static ProfileEvents::Counters & getProfileEvents(); static MemoryTracker * getMemoryTracker(); + static Int64 & getUntrackedMemory(); /// Update read and write rows (bytes) statistics (used in system.query_thread_log) static void updateProgressIn(const Progress & value); diff --git a/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h b/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h index 836fbda222e..548b745cb6e 100644 --- a/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h +++ b/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h @@ -4,7 +4,6 @@ #include #include -#include namespace DB @@ -39,8 +38,6 @@ private: void toLarge() { - CurrentMemoryTracker::alloc(sizeof(Large)); - /// At the time of copying data from `tiny`, setting the value of `large` is still not possible (otherwise it will overwrite some data). Large * tmp_large = new Large; @@ -56,11 +53,7 @@ public: ~HyperLogLogWithSmallSetOptimization() { if (isLarge()) - { delete large; - - CurrentMemoryTracker::free(sizeof(Large)); - } } void insert(Key value) diff --git a/dbms/src/Common/MemoryTracker.cpp b/dbms/src/Common/MemoryTracker.cpp index bc324be4904..b3d661d95ee 100644 --- a/dbms/src/Common/MemoryTracker.cpp +++ b/dbms/src/Common/MemoryTracker.cpp @@ -1,3 +1,5 @@ +#include + #include "MemoryTracker.h" #include #include @@ -17,6 +19,8 @@ namespace DB static constexpr size_t log_peak_memory_usage_every = 1ULL << 30; +/// Each thread could new/delete memory in range of (-untracked_memory_limit, untracked_memory_limit) without access to common counters. +static constexpr Int64 untracked_memory_limit = 4 * 1024 * 1024; MemoryTracker::~MemoryTracker() @@ -85,6 +89,9 @@ void MemoryTracker::alloc(Int64 size) { free(size); + /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc + auto untrack_lock = blocker.cancel(); + std::stringstream message; message << "Memory tracker"; if (description) @@ -100,6 +107,9 @@ void MemoryTracker::alloc(Int64 size) { free(size); + /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc + auto untrack_lock = blocker.cancel(); + std::stringstream message; message << "Memory limit"; if (description) @@ -191,19 +201,41 @@ namespace CurrentMemoryTracker void alloc(Int64 size) { if (auto memory_tracker = DB::CurrentThread::getMemoryTracker()) - memory_tracker->alloc(size); + { + Int64 & untracked = DB::CurrentThread::getUntrackedMemory(); + untracked += size; + if (untracked > untracked_memory_limit) + { + /// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes + /// more. It could be usefull for enlarge Exception message in rethrow logic. + Int64 tmp = untracked; + untracked = 0; + memory_tracker->alloc(tmp); + } + } } void realloc(Int64 old_size, Int64 new_size) { - if (auto memory_tracker = DB::CurrentThread::getMemoryTracker()) - memory_tracker->alloc(new_size - old_size); + Int64 addition = new_size - old_size; + if (addition > 0) + alloc(addition); + else + free(-addition); } void free(Int64 size) { if (auto memory_tracker = DB::CurrentThread::getMemoryTracker()) - memory_tracker->free(size); + { + Int64 & untracked = DB::CurrentThread::getUntrackedMemory(); + untracked -= size; + if (untracked < -untracked_memory_limit) + { + memory_tracker->free(-untracked); + untracked = 0; + } + } } } diff --git a/dbms/src/Common/MemoryTracker.h b/dbms/src/Common/MemoryTracker.h index 9f439c7550c..4ce0ac262fa 100644 --- a/dbms/src/Common/MemoryTracker.h +++ b/dbms/src/Common/MemoryTracker.h @@ -45,7 +45,11 @@ public: void realloc(Int64 old_size, Int64 new_size) { - alloc(new_size - old_size); + Int64 addition = new_size - old_size; + if (addition > 0) + alloc(addition); + else + free(-addition); } /** This function should be called after memory deallocation. diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 35a68c5a90c..b361161483a 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -34,6 +34,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +// Replace NLMSG_OK with explicit casts since that system macro contains signedness bugs which are not going to be fixed. +static inline bool is_nlmsg_ok(const struct nlmsghdr * const nlh, const ssize_t len) +{ + return len >= static_cast(sizeof(*nlh)) && nlh->nlmsg_len >= sizeof(*nlh) && static_cast(len) >= nlh->nlmsg_len; +} namespace { @@ -128,7 +133,7 @@ struct NetlinkMessage if (header.nlmsg_type == NLMSG_ERROR) throw Exception("Can't receive Netlink response: error " + std::to_string(error.error), ErrorCodes::NETLINK_ERROR); - if (!NLMSG_OK((&header), bytes_received)) + if (!is_nlmsg_ok(&header, bytes_received)) throw Exception("Can't receive Netlink response: wrong number of bytes received", ErrorCodes::NETLINK_ERROR); } }; diff --git a/dbms/src/Common/ThreadStatus.cpp b/dbms/src/Common/ThreadStatus.cpp index c2e415ab363..e5fe5d6f23b 100644 --- a/dbms/src/Common/ThreadStatus.cpp +++ b/dbms/src/Common/ThreadStatus.cpp @@ -50,6 +50,19 @@ ThreadStatus::ThreadStatus() ThreadStatus::~ThreadStatus() { + try + { + if (untracked_memory > 0) + memory_tracker.alloc(untracked_memory); + else + memory_tracker.free(-untracked_memory); + } + catch (const DB::Exception &) + { + /// It's a minor tracked memory leak here (not the memory itself but it's counter). + /// We've already allocated a little bit more then the limit and cannot track it in the thread memory tracker or its parent. + } + if (deleter) deleter(); current_thread = nullptr; @@ -117,7 +130,8 @@ void ThreadStatus::assertState(const std::initializer_list & permitted_stat throw Exception(ss.str(), ErrorCodes::LOGICAL_ERROR); } -void ThreadStatus::attachInternalTextLogsQueue(const InternalTextLogsQueuePtr & logs_queue) +void ThreadStatus::attachInternalTextLogsQueue(const InternalTextLogsQueuePtr & logs_queue, + LogsLevel client_logs_level) { logs_queue_ptr = logs_queue; @@ -126,6 +140,7 @@ void ThreadStatus::attachInternalTextLogsQueue(const InternalTextLogsQueuePtr & std::lock_guard lock(thread_group->mutex); thread_group->logs_queue_ptr = logs_queue; + thread_group->client_logs_level = client_logs_level; } } diff --git a/dbms/src/Common/ThreadStatus.h b/dbms/src/Common/ThreadStatus.h index 0d36024b15d..fcfb3e0e19f 100644 --- a/dbms/src/Common/ThreadStatus.h +++ b/dbms/src/Common/ThreadStatus.h @@ -4,6 +4,8 @@ #include #include +#include + #include #include @@ -62,6 +64,8 @@ public: UInt32 master_thread_number = 0; Int32 master_thread_os_id = -1; + LogsLevel client_logs_level = LogsLevel::none; + String query; }; @@ -92,6 +96,8 @@ public: /// TODO: merge them into common entity ProfileEvents::Counters performance_counters{VariableContext::Thread}; MemoryTracker memory_tracker{VariableContext::Thread}; + /// Small amount of untracked memory (per thread atomic-less counter) + Int64 untracked_memory = 0; /// Statistics of read and write rows/bytes Progress progress_in; @@ -130,7 +136,8 @@ public: return thread_state == Died ? nullptr : logs_queue_ptr.lock(); } - void attachInternalTextLogsQueue(const InternalTextLogsQueuePtr & logs_queue); + void attachInternalTextLogsQueue(const InternalTextLogsQueuePtr & logs_queue, + LogsLevel client_logs_level); /// Sets query context for current thread and its thread group /// NOTE: query_context have to be alive until detachQuery() is called diff --git a/dbms/src/Common/new_delete.cpp b/dbms/src/Common/new_delete.cpp new file mode 100644 index 00000000000..aff708135e1 --- /dev/null +++ b/dbms/src/Common/new_delete.cpp @@ -0,0 +1,143 @@ +#include + +#include +#include +#include + +/// Replace default new/delete with memory tracking versions. +/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new +/// https://en.cppreference.com/w/cpp/memory/new/operator_delete +#if NOT_UNBUNDLED + +namespace Memory +{ + +ALWAYS_INLINE void trackMemory(std::size_t size) +{ +#if USE_JEMALLOC + /// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function + /// @note je_mallocx() != je_malloc(). It's expected they don't differ much in allocation logic. + if (likely(size != 0)) + CurrentMemoryTracker::alloc(nallocx(size, 0)); +#else + CurrentMemoryTracker::alloc(size); +#endif +} + +ALWAYS_INLINE bool trackMemoryNoExept(std::size_t size) noexcept +{ + try + { + trackMemory(size); + } + catch (...) + { + return false; + } + + return true; +} + +ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept +{ + try + { +#if USE_JEMALLOC + /// @note It's also possible to use je_malloc_usable_size() here. + if (likely(ptr != nullptr)) + CurrentMemoryTracker::free(sallocx(ptr, 0)); +#else + if (size) + CurrentMemoryTracker::free(size); +#endif + } + catch (...) + {} +} + +} + +/// new + +void * operator new(std::size_t size) +{ + Memory::trackMemory(size); + return Memory::newImpl(size); +} + +void * operator new[](std::size_t size) +{ + Memory::trackMemory(size); + return Memory::newImpl(size); +} + +void * operator new(std::size_t size, const std::nothrow_t &) noexcept +{ + if (likely(Memory::trackMemoryNoExept(size))) + return Memory::newNoExept(size); + return nullptr; +} + +void * operator new[](std::size_t size, const std::nothrow_t &) noexcept +{ + if (likely(Memory::trackMemoryNoExept(size))) + return Memory::newNoExept(size); + return nullptr; +} + +/// delete + +/// C++17 std 21.6.2.1 (11) +/// If a function without a size parameter is defined, the program should also define the corresponding function with a size parameter. +/// If a function with a size parameter is defined, the program shall also define the corresponding version without the size parameter. + +/// cppreference: +/// It's unspecified whether size-aware or size-unaware version is called when deleting objects of +/// incomplete type and arrays of non-class and trivially-destructible class types. + +void operator delete(void * ptr) noexcept +{ + Memory::untrackMemory(ptr); + Memory::deleteImpl(ptr); +} + +void operator delete[](void * ptr) noexcept +{ + Memory::untrackMemory(ptr); + Memory::deleteImpl(ptr); +} + +void operator delete(void * ptr, std::size_t size) noexcept +{ + Memory::untrackMemory(ptr, size); + Memory::deleteSized(ptr, size); +} + +void operator delete[](void * ptr, std::size_t size) noexcept +{ + Memory::untrackMemory(ptr, size); + Memory::deleteSized(ptr, size); +} + +#else + +/// new + +void * operator new(std::size_t size) { return Memory::newImpl(size); } +void * operator new[](std::size_t size) { return Memory::newImpl(size); } + +void * operator new(std::size_t size, const std::nothrow_t &) noexcept { return Memory::newNoExept(size); } +void * operator new[](std::size_t size, const std::nothrow_t &) noexcept { return Memory::newNoExept(size); } + +/// delete + +void operator delete(void * ptr) noexcept { Memory::deleteImpl(ptr); } +void operator delete[](void * ptr) noexcept { Memory::deleteImpl(ptr); } + +void operator delete(void * ptr, const std::nothrow_t &) noexcept { Memory::deleteImpl(ptr); } +void operator delete[](void * ptr, const std::nothrow_t &) noexcept { Memory::deleteImpl(ptr); } + +void operator delete(void * ptr, std::size_t size) noexcept { Memory::deleteSized(ptr, size); } +void operator delete[](void * ptr, std::size_t size) noexcept { Memory::deleteSized(ptr, size); } + +#endif diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 65b205f806c..2098aa34a8c 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -334,7 +334,11 @@ struct Settings : public SettingsCollection M(SettingBool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.") \ \ M(SettingUInt64, max_partitions_per_insert_block, 100, "Limit maximum number of partitions in single INSERTed block. Zero means unlimited. Throw exception if the block contains too many partitions. This setting is a safety threshold, because using large number of partitions is a common misconception.") \ - M(SettingBool, check_query_single_value_result, true, "Return check query result as single 1/0 value") + M(SettingBool, check_query_single_value_result, true, "Return check query result as single 1/0 value") \ + \ + /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ + \ + M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13") DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS) diff --git a/dbms/src/Dictionaries/CacheDictionary.h b/dbms/src/Dictionaries/CacheDictionary.h index c6607196af5..cc613d0d96b 100644 --- a/dbms/src/Dictionaries/CacheDictionary.h +++ b/dbms/src/Dictionaries/CacheDictionary.h @@ -30,8 +30,6 @@ public: const DictionaryLifetime dict_lifetime, const size_t size); - std::exception_ptr getCreationException() const override { return {}; } - std::string getName() const override { return name; } std::string getTypeName() const override { return "Cache"; } @@ -62,8 +60,6 @@ public: const DictionaryStructure & getStructure() const override { return dict_struct; } - std::chrono::time_point getCreationTime() const override { return creation_time; } - bool isInjective(const std::string & attribute_name) const override { return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; @@ -284,8 +280,6 @@ private: mutable std::atomic element_count{0}; mutable std::atomic hit_count{0}; mutable std::atomic query_count{0}; - - const std::chrono::time_point creation_time = std::chrono::system_clock::now(); }; } diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h index 2c080d68b7b..ffac807c04c 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h @@ -50,8 +50,6 @@ public: std::string getKeyDescription() const { return key_description; } - std::exception_ptr getCreationException() const override { return {}; } - std::string getName() const override { return name; } std::string getTypeName() const override { return "ComplexKeyCache"; } @@ -86,8 +84,6 @@ public: const DictionaryStructure & getStructure() const override { return dict_struct; } - std::chrono::time_point getCreationTime() const override { return creation_time; } - bool isInjective(const std::string & attribute_name) const override { return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp index 606e10f2412..39ef9124061 100644 --- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp @@ -29,18 +29,8 @@ ComplexKeyHashedDictionary::ComplexKeyHashedDictionary( , saved_block{std::move(saved_block)} { createAttributes(); - - try - { - loadData(); - calculateBytesAllocated(); - } - catch (...) - { - creation_exception = std::current_exception(); - } - - creation_time = std::chrono::system_clock::now(); + loadData(); + calculateBytesAllocated(); } #define DECLARE(TYPE) \ diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h index b9aaa42a829..54ee8627f9b 100644 --- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.h @@ -32,8 +32,6 @@ public: std::string getKeyDescription() const { return key_description; } - std::exception_ptr getCreationException() const override { return creation_exception; } - std::string getName() const override { return name; } std::string getTypeName() const override { return "ComplexKeyHashed"; } @@ -61,8 +59,6 @@ public: const DictionaryStructure & getStructure() const override { return dict_struct; } - std::chrono::time_point getCreationTime() const override { return creation_time; } - bool isInjective(const std::string & attribute_name) const override { return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; @@ -255,10 +251,6 @@ private: size_t bucket_count = 0; mutable std::atomic query_count{0}; - std::chrono::time_point creation_time; - - std::exception_ptr creation_exception; - BlockPtr saved_block; }; diff --git a/dbms/src/Dictionaries/FlatDictionary.cpp b/dbms/src/Dictionaries/FlatDictionary.cpp index 628178c8542..b7b70748c01 100644 --- a/dbms/src/Dictionaries/FlatDictionary.cpp +++ b/dbms/src/Dictionaries/FlatDictionary.cpp @@ -36,18 +36,8 @@ FlatDictionary::FlatDictionary( , saved_block{std::move(saved_block)} { createAttributes(); - - try - { - loadData(); - calculateBytesAllocated(); - } - catch (...) - { - creation_exception = std::current_exception(); - } - - creation_time = std::chrono::system_clock::now(); + loadData(); + calculateBytesAllocated(); } diff --git a/dbms/src/Dictionaries/FlatDictionary.h b/dbms/src/Dictionaries/FlatDictionary.h index 2a00de6f754..de14cc3dc1a 100644 --- a/dbms/src/Dictionaries/FlatDictionary.h +++ b/dbms/src/Dictionaries/FlatDictionary.h @@ -29,8 +29,6 @@ public: bool require_nonempty, BlockPtr saved_block = nullptr); - std::exception_ptr getCreationException() const override { return creation_exception; } - std::string getName() const override { return name; } std::string getTypeName() const override { return "Flat"; } @@ -58,8 +56,6 @@ public: const DictionaryStructure & getStructure() const override { return dict_struct; } - std::chrono::time_point getCreationTime() const override { return creation_time; } - bool isInjective(const std::string & attribute_name) const override { return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; @@ -244,10 +240,6 @@ private: size_t bucket_count = 0; mutable std::atomic query_count{0}; - std::chrono::time_point creation_time; - - std::exception_ptr creation_exception; - BlockPtr saved_block; }; diff --git a/dbms/src/Dictionaries/HashedDictionary.cpp b/dbms/src/Dictionaries/HashedDictionary.cpp index d67d6dcf9a2..413cfadec39 100644 --- a/dbms/src/Dictionaries/HashedDictionary.cpp +++ b/dbms/src/Dictionaries/HashedDictionary.cpp @@ -30,18 +30,8 @@ HashedDictionary::HashedDictionary( , saved_block{std::move(saved_block)} { createAttributes(); - - try - { - loadData(); - calculateBytesAllocated(); - } - catch (...) - { - creation_exception = std::current_exception(); - } - - creation_time = std::chrono::system_clock::now(); + loadData(); + calculateBytesAllocated(); } diff --git a/dbms/src/Dictionaries/HashedDictionary.h b/dbms/src/Dictionaries/HashedDictionary.h index b0605f26bad..92875f27cf3 100644 --- a/dbms/src/Dictionaries/HashedDictionary.h +++ b/dbms/src/Dictionaries/HashedDictionary.h @@ -28,8 +28,6 @@ public: bool require_nonempty, BlockPtr saved_block = nullptr); - std::exception_ptr getCreationException() const override { return creation_exception; } - std::string getName() const override { return name; } std::string getTypeName() const override { return "Hashed"; } @@ -57,8 +55,6 @@ public: const DictionaryStructure & getStructure() const override { return dict_struct; } - std::chrono::time_point getCreationTime() const override { return creation_time; } - bool isInjective(const std::string & attribute_name) const override { return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; @@ -248,10 +244,6 @@ private: size_t bucket_count = 0; mutable std::atomic query_count{0}; - std::chrono::time_point creation_time; - - std::exception_ptr creation_exception; - BlockPtr saved_block; }; diff --git a/dbms/src/Dictionaries/RangeHashedDictionary.cpp b/dbms/src/Dictionaries/RangeHashedDictionary.cpp index ac509b4d1e5..05f29e05c42 100644 --- a/dbms/src/Dictionaries/RangeHashedDictionary.cpp +++ b/dbms/src/Dictionaries/RangeHashedDictionary.cpp @@ -80,18 +80,8 @@ RangeHashedDictionary::RangeHashedDictionary( , require_nonempty(require_nonempty) { createAttributes(); - - try - { - loadData(); - calculateBytesAllocated(); - } - catch (...) - { - creation_exception = std::current_exception(); - } - - creation_time = std::chrono::system_clock::now(); + loadData(); + calculateBytesAllocated(); } diff --git a/dbms/src/Dictionaries/RangeHashedDictionary.h b/dbms/src/Dictionaries/RangeHashedDictionary.h index b54c88de4e8..a02b1377db5 100644 --- a/dbms/src/Dictionaries/RangeHashedDictionary.h +++ b/dbms/src/Dictionaries/RangeHashedDictionary.h @@ -24,8 +24,6 @@ public: const DictionaryLifetime dict_lifetime, bool require_nonempty); - std::exception_ptr getCreationException() const override { return creation_exception; } - std::string getName() const override { return dictionary_name; } std::string getTypeName() const override { return "RangeHashed"; } @@ -53,8 +51,6 @@ public: const DictionaryStructure & getStructure() const override { return dict_struct; } - std::chrono::time_point getCreationTime() const override { return creation_time; } - bool isInjective(const std::string & attribute_name) const override { return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; @@ -227,10 +223,6 @@ private: size_t element_count = 0; size_t bucket_count = 0; mutable std::atomic query_count{0}; - - std::chrono::time_point creation_time; - - std::exception_ptr creation_exception; }; } diff --git a/dbms/src/Dictionaries/TrieDictionary.h b/dbms/src/Dictionaries/TrieDictionary.h index f434ebbc77d..a873f7bdd16 100644 --- a/dbms/src/Dictionaries/TrieDictionary.h +++ b/dbms/src/Dictionaries/TrieDictionary.h @@ -33,8 +33,6 @@ public: std::string getKeyDescription() const { return key_description; } - std::exception_ptr getCreationException() const override { return creation_exception; } - std::string getName() const override { return name; } std::string getTypeName() const override { return "Trie"; } @@ -62,8 +60,6 @@ public: const DictionaryStructure & getStructure() const override { return dict_struct; } - std::chrono::time_point getCreationTime() const override { return creation_time; } - bool isInjective(const std::string & attribute_name) const override { return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; diff --git a/dbms/src/Interpreters/AsynchronousMetrics.cpp b/dbms/src/Interpreters/AsynchronousMetrics.cpp index 28a6d5dfb4d..621c27d6be8 100644 --- a/dbms/src/Interpreters/AsynchronousMetrics.cpp +++ b/dbms/src/Interpreters/AsynchronousMetrics.cpp @@ -159,10 +159,14 @@ void AsynchronousMetrics::update() size_t max_part_count_for_partition = 0; + size_t number_of_databases = databases.size(); + size_t total_number_of_tables = 0; + for (const auto & db : databases) { for (auto iterator = db.second->getIterator(context); iterator->isValid(); iterator->next()) { + ++total_number_of_tables; auto & table = iterator->table(); StorageMergeTree * table_merge_tree = dynamic_cast(table.get()); StorageReplicatedMergeTree * table_replicated_merge_tree = dynamic_cast(table.get()); @@ -213,6 +217,9 @@ void AsynchronousMetrics::update() set("ReplicasMaxRelativeDelay", max_relative_delay); set("MaxPartCountForPartition", max_part_count_for_partition); + + set("NumberOfDatabases", number_of_databases); + set("NumberOfTables", total_number_of_tables); } #if USE_TCMALLOC diff --git a/dbms/src/Interpreters/CatBoostModel.cpp b/dbms/src/Interpreters/CatBoostModel.cpp index 7f98e5131ae..3e6e66b5c3f 100644 --- a/dbms/src/Interpreters/CatBoostModel.cpp +++ b/dbms/src/Interpreters/CatBoostModel.cpp @@ -504,20 +504,6 @@ std::shared_ptr getCatBoostWrapperHolder(const std::string & CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::string lib_path_, const ExternalLoadableLifetime & lifetime) : name(std::move(name_)), model_path(std::move(model_path_)), lib_path(std::move(lib_path_)), lifetime(lifetime) -{ - try - { - init(); - } - catch (...) - { - creation_exception = std::current_exception(); - } - - creation_time = std::chrono::system_clock::now(); -} - -void CatBoostModel::init() { api_provider = getCatBoostWrapperHolder(lib_path); api = &api_provider->getAPI(); diff --git a/dbms/src/Interpreters/CatBoostModel.h b/dbms/src/Interpreters/CatBoostModel.h index 6f613ad0f24..541dd111c82 100644 --- a/dbms/src/Interpreters/CatBoostModel.h +++ b/dbms/src/Interpreters/CatBoostModel.h @@ -68,9 +68,6 @@ public: std::shared_ptr clone() const override; - std::chrono::time_point getCreationTime() const override { return creation_time; } - std::exception_ptr getCreationException() const override { return creation_exception; } - private: std::string name; std::string model_path; @@ -85,9 +82,6 @@ private: size_t cat_features_count; size_t tree_count; - std::chrono::time_point creation_time; - std::exception_ptr creation_exception; - void init(); }; diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index 612bd2362c8..c7b3d202a28 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -219,7 +219,7 @@ class ExternalLoader::LoadingDispatcher : private boost::noncopyable { public: /// Called to load or reload an object. - using CreateObjectFunction = std::function; /// Called after loading/reloading an object to calculate the time of the next update. @@ -783,7 +783,7 @@ private: std::exception_ptr new_exception; try { - std::tie(new_object, new_exception) = create_object(name, config, config_changed, previous_version); + new_object = create_object(name, config, config_changed, previous_version); } catch (...) { @@ -792,8 +792,6 @@ private: if (!new_object && !new_exception) throw Exception("No object created and no exception raised for " + type_name, ErrorCodes::LOGICAL_ERROR); - if (new_object && new_exception) - new_object = nullptr; /// Calculate a new update time. TimePoint next_update_time; @@ -1152,17 +1150,13 @@ void ExternalLoader::reload(bool load_never_loading) loading_dispatcher->reload(load_never_loading); } -ExternalLoader::ObjectWithException ExternalLoader::createObject( +ExternalLoader::LoadablePtr ExternalLoader::createObject( const String & name, const ObjectConfig & config, bool config_changed, const LoadablePtr & previous_version) const { if (previous_version && !config_changed) - { - auto new_object = previous_version->clone(); - return {new_object, new_object->getCreationException()}; - } + return previous_version->clone(); - auto new_object = create(name, *config.config, config.key_in_config); - return {new_object, new_object->getCreationException()}; + return create(name, *config.config, config.key_in_config); } ExternalLoader::TimePoint ExternalLoader::calculateNextUpdateTime(const LoadablePtr & loaded_object, size_t error_count) const diff --git a/dbms/src/Interpreters/ExternalLoader.h b/dbms/src/Interpreters/ExternalLoader.h index da999bfe21a..4c94b8d69cd 100644 --- a/dbms/src/Interpreters/ExternalLoader.h +++ b/dbms/src/Interpreters/ExternalLoader.h @@ -186,10 +186,8 @@ protected: private: struct ObjectConfig; - using ObjectWithException = std::pair; - ObjectWithException - createObject(const String & name, const ObjectConfig & config, bool config_changed, const LoadablePtr & previous_version) const; + LoadablePtr createObject(const String & name, const ObjectConfig & config, bool config_changed, const LoadablePtr & previous_version) const; TimePoint calculateNextUpdateTime(const LoadablePtr & loaded_object, size_t error_count) const; class ConfigFilesReader; diff --git a/dbms/src/Interpreters/IExternalLoadable.h b/dbms/src/Interpreters/IExternalLoadable.h index 7b875f02060..f8725a67989 100644 --- a/dbms/src/Interpreters/IExternalLoadable.h +++ b/dbms/src/Interpreters/IExternalLoadable.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -41,10 +40,6 @@ public: virtual bool isModified() const = 0; /// Returns new object with the same configuration. Is used to update modified object when lifetime exceeded. virtual std::shared_ptr clone() const = 0; - - virtual std::chrono::time_point getCreationTime() const = 0; - - virtual std::exception_ptr getCreationException() const = 0; }; } diff --git a/dbms/src/Interpreters/InternalTextLogsQueue.cpp b/dbms/src/Interpreters/InternalTextLogsQueue.cpp index a27838a4203..6028514f11f 100644 --- a/dbms/src/Interpreters/InternalTextLogsQueue.cpp +++ b/dbms/src/Interpreters/InternalTextLogsQueue.cpp @@ -20,13 +20,13 @@ Block InternalTextLogsQueue::getSampleBlock() { return Block { {std::make_shared(), "event_time"}, - {std::make_shared(), "event_time_microseconds"}, - {std::make_shared(), "host_name"}, - {std::make_shared(), "query_id"}, - {std::make_shared(), "thread_number"}, - {std::make_shared(), "priority"}, - {std::make_shared(), "source"}, - {std::make_shared(), "text"} + {std::make_shared(), "event_time_microseconds"}, + {std::make_shared(), "host_name"}, + {std::make_shared(), "query_id"}, + {std::make_shared(), "thread_number"}, + {std::make_shared(), "priority"}, + {std::make_shared(), "source"}, + {std::make_shared(), "text"} }; } diff --git a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp index d037f87a857..e6da9b63d7b 100644 --- a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp @@ -101,6 +101,9 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() for (const auto & column : columns) { + if (column.is_virtual) + continue; + res_columns[0]->insert(column.name); res_columns[1]->insert(column.type->getName()); diff --git a/dbms/tests/integration/helpers/client.py b/dbms/tests/integration/helpers/client.py index 9df53c40e67..fd59166b137 100644 --- a/dbms/tests/integration/helpers/client.py +++ b/dbms/tests/integration/helpers/client.py @@ -44,6 +44,9 @@ class Client: return self.get_query_request(sql, stdin=stdin, timeout=timeout, settings=settings, user=user).get_error() + def query_and_get_answer_with_error(self, sql, stdin=None, timeout=None, settings=None, user=None): + return self.get_query_request(sql, stdin=stdin, timeout=timeout, settings=settings, user=user).get_answer_and_error() + class QueryTimeoutExceedException(Exception): pass @@ -110,3 +113,17 @@ class CommandRequest: raise QueryRuntimeException('Client expected to be failed but succeeded! stdout: {}'.format(stdout)) return stderr + + + def get_answer_and_error(self): + self.process.wait() + self.stdout_file.seek(0) + self.stderr_file.seek(0) + + stdout = self.stdout_file.read() + stderr = self.stderr_file.read() + + if self.timer is not None and not self.process_finished_before_timeout and not self.ignore_error: + raise QueryTimeoutExceedException('Client timed out!') + + return (stdout, stderr) diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index fbc6591eaa5..bd3ecb9ae9c 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -527,6 +527,10 @@ class ClickHouseInstance: def query_and_get_error(self, sql, stdin=None, timeout=None, settings=None, user=None): return self.client.query_and_get_error(sql, stdin, timeout, settings, user) + # The same as query_and_get_error but ignores successful query. + def query_and_get_answer_with_error(self, sql, stdin=None, timeout=None, settings=None, user=None): + return self.client.query_and_get_answer_with_error(sql, stdin, timeout, settings, user) + # Connects to the instance via HTTP interface, sends a query and returns the answer def http_query(self, sql, data=None): return urllib.urlopen("http://"+self.ip_address+":8123/?query="+urllib.quote(sql,safe=''), data).read() diff --git a/dbms/tests/integration/test_logs_level/__init__.py b/dbms/tests/integration/test_logs_level/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_logs_level/configs/config_information.xml b/dbms/tests/integration/test_logs_level/configs/config_information.xml new file mode 100644 index 00000000000..b99f57402fd --- /dev/null +++ b/dbms/tests/integration/test_logs_level/configs/config_information.xml @@ -0,0 +1,26 @@ + + + + information + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + + + + true + none + + AcceptCertificateHandler + + + + + 500 + 5368709120 + users.xml + + /etc/clickhouse-server/config.d/*.xml + diff --git a/dbms/tests/integration/test_logs_level/test.py b/dbms/tests/integration/test_logs_level/test.py new file mode 100644 index 00000000000..302686b6fa0 --- /dev/null +++ b/dbms/tests/integration/test_logs_level/test.py @@ -0,0 +1,18 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance('node', main_configs=['configs/config_information.xml']) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def test_check_client_logs_level(start_cluster): + logs = node.query_and_get_answer_with_error("SELECT 1", settings={"send_logs_level": 'trace'})[1] + assert logs.count('Trace') != 0 diff --git a/dbms/tests/performance/joins_in_memory.xml b/dbms/tests/performance/joins_in_memory.xml index 23b009a6027..1da400c48f4 100644 --- a/dbms/tests/performance/joins_in_memory.xml +++ b/dbms/tests/performance/joins_in_memory.xml @@ -13,11 +13,11 @@ CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory - INSERT INTO ints SELECT number AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000) - INSERT INTO ints SELECT 10000 + number % 1000 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000) - INSERT INTO ints SELECT 20000 + number % 100 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000) - INSERT INTO ints SELECT 30000 + number % 10 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000) - INSERT INTO ints SELECT 40000 + number % 1 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000) + INSERT INTO ints SELECT number AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) + INSERT INTO ints SELECT 10000 + number % 1000 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) + INSERT INTO ints SELECT 20000 + number % 100 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) + INSERT INTO ints SELECT 30000 + number % 10 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) + INSERT INTO ints SELECT 40000 + number % 1 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) SELECT COUNT() FROM ints l ANY LEFT JOIN ints r USING i64 WHERE i32 = 200042 SELECT COUNT() FROM ints l ANY LEFT JOIN ints r USING i64,i32,i16,i8 WHERE i32 = 200042 diff --git a/dbms/tests/queries/0_stateless/00877_memory_limit_for_new_delete.reference b/dbms/tests/queries/0_stateless/00877_memory_limit_for_new_delete.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00877_memory_limit_for_new_delete.sql b/dbms/tests/queries/0_stateless/00877_memory_limit_for_new_delete.sql new file mode 100644 index 00000000000..111104bb06e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00877_memory_limit_for_new_delete.sql @@ -0,0 +1,7 @@ +SET max_memory_usage = 1000000000; + +SELECT sum(ignore(*)) FROM ( + SELECT number, argMax(number, (number, toFixedString(toString(number), 1024))) + FROM numbers(1000000) + GROUP BY number +) -- { serverError 241 } diff --git a/dbms/tests/queries/0_stateless/00965_pocopatch_logs_level_bugfix.reference b/dbms/tests/queries/0_stateless/00965_pocopatch_logs_level_bugfix.reference new file mode 100644 index 00000000000..233d012cabe --- /dev/null +++ b/dbms/tests/queries/0_stateless/00965_pocopatch_logs_level_bugfix.reference @@ -0,0 +1,24 @@ +1 +1 +1 +1 +1 +1 + + + + + + + + + + + + + + + + + + diff --git a/dbms/tests/queries/0_stateless/00965_pocopatch_logs_level_bugfix.sh b/dbms/tests/queries/0_stateless/00965_pocopatch_logs_level_bugfix.sh new file mode 100755 index 00000000000..99de8cfb2d5 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00965_pocopatch_logs_level_bugfix.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +> 00965_logs_level_bugfix.tmp + +clickhouse-client --send_logs_level="trace" --query="SELECT 1;" 2>> 00965_logs_level_bugfix.tmp +clickhouse-client --send_logs_level="debug" --query="SELECT 1;" 2>> 00965_logs_level_bugfix.tmp +clickhouse-client --send_logs_level="information" --query="SELECT 1;" 2>> 00965_logs_level_bugfix.tmp +clickhouse-client --send_logs_level="warning" --query="SELECT 1;" 2>> 00965_logs_level_bugfix.tmp +clickhouse-client --send_logs_level="error" --query="SELECT 1;" 2>> 00965_logs_level_bugfix.tmp +clickhouse-client --send_logs_level="none" --query="SELECT 1;" 2>> 00965_logs_level_bugfix.tmp + +awk '{ print $8 }' 00965_logs_level_bugfix.tmp + + diff --git a/dbms/tests/queries/0_stateless/00965_pocopatch_send_logs_level_concurrent_queries.reference b/dbms/tests/queries/0_stateless/00965_pocopatch_send_logs_level_concurrent_queries.reference new file mode 100644 index 00000000000..45c133ba43b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00965_pocopatch_send_logs_level_concurrent_queries.reference @@ -0,0 +1,12 @@ + + + + + + + + + + +***** + diff --git a/dbms/tests/queries/0_stateless/00965_pocopatch_send_logs_level_concurrent_queries.sh b/dbms/tests/queries/0_stateless/00965_pocopatch_send_logs_level_concurrent_queries.sh new file mode 100755 index 00000000000..48b6b41282c --- /dev/null +++ b/dbms/tests/queries/0_stateless/00965_pocopatch_send_logs_level_concurrent_queries.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +> 00965_send_logs_level_concurrent_queries_first.tmp +> 00965_send_logs_level_concurrent_queries_second.tmp + +clickhouse-client --send_logs_level="trace" --query="SELECT * from numbers(100000);" >> /dev/null 2>> 00965_send_logs_level_concurrent_queries_first.tmp & +clickhouse-client --send_logs_level="information" --query="SELECT * from numbers(100000);" >> /dev/null 2>> 00965_send_logs_level_concurrent_queries_second.tmp + +sleep 2 + +awk '{ print $8 }' 00965_send_logs_level_concurrent_queries_first.tmp +echo "*****" +awk '{ print $8 }' 00965_send_logs_level_concurrent_queries_second.tmp + diff --git a/dbms/tests/queries/0_stateless/00972_desc_table_virtual_columns.reference b/dbms/tests/queries/0_stateless/00972_desc_table_virtual_columns.reference new file mode 100644 index 00000000000..b244da814f9 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00972_desc_table_virtual_columns.reference @@ -0,0 +1 @@ +x UInt64 diff --git a/dbms/tests/queries/0_stateless/00972_desc_table_virtual_columns.sql b/dbms/tests/queries/0_stateless/00972_desc_table_virtual_columns.sql new file mode 100644 index 00000000000..920025a844d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00972_desc_table_virtual_columns.sql @@ -0,0 +1,9 @@ +-- No virtual columns should be output in DESC TABLE query. + +DROP TABLE IF EXISTS upyachka; +CREATE TABLE upyachka (x UInt64) ENGINE = Memory; + +-- Merge table has virtual column `_table` +DESC TABLE merge(currentDatabase(), 'upyachka'); + +DROP TABLE upyachka; diff --git a/debian/clickhouse-server.templates b/debian/clickhouse-server.templates index 3053c18c79f..fdab88cf877 100644 --- a/debian/clickhouse-server.templates +++ b/debian/clickhouse-server.templates @@ -1,3 +1,3 @@ Template: clickhouse-server/default-password Type: password -Description: Password for default user. +Description: Password for default user diff --git a/docs/en/operations/server_settings/settings.md b/docs/en/operations/server_settings/settings.md index 493a558c8e2..2f87233e6a3 100644 --- a/docs/en/operations/server_settings/settings.md +++ b/docs/en/operations/server_settings/settings.md @@ -514,7 +514,7 @@ Use the following parameters to configure logging: ``` -## path +## path {#server_settings-path} The path to the directory containing data. diff --git a/docs/en/operations/table_engines/file.md b/docs/en/operations/table_engines/file.md index 82c536646ac..9e3c5b3400b 100644 --- a/docs/en/operations/table_engines/file.md +++ b/docs/en/operations/table_engines/file.md @@ -71,7 +71,7 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64 ## Details of Implementation -- Multiple SELECT queries can be performed concurrently, but INSERT queries will wait each other. +- Multiple `SELECT` queries can be performed concurrently, but `INSERT` queries will wait each other. - Not supported: - `ALTER` - `SELECT ... SAMPLE` diff --git a/docs/en/query_language/agg_functions/combinators.md b/docs/en/query_language/agg_functions/combinators.md index 852b396332c..133b4d489d7 100644 --- a/docs/en/query_language/agg_functions/combinators.md +++ b/docs/en/query_language/agg_functions/combinators.md @@ -22,13 +22,21 @@ Example 2: `uniqArray(arr)` – Count the number of unique elements in all 'arr' ## -State -If you apply this combinator, the aggregate function doesn't return the resulting value (such as the number of unique values for the `uniq` function), but an intermediate state of the aggregation (for `uniq`, this is the hash table for calculating the number of unique values). This is an AggregateFunction(...) that can be used for further processing or stored in a table to finish aggregating later. To work with these states, use the [AggregatingMergeTree](../../operations/table_engines/aggregatingmergetree.md) table engine, the functions [`finalizeAggregation`](../functions/other_functions.md#finalizeaggregation) and [`runningAccumulate`](../functions/other_functions.md#function-runningaccumulate), and the combinators -Merge and -MergeState described below. +If you apply this combinator, the aggregate function doesn't return the resulting value (such as the number of unique values for the [uniq](reference.md#agg_function-uniq) function), but an intermediate state of the aggregation (for `uniq`, this is the hash table for calculating the number of unique values). This is an `AggregateFunction(...)` that can be used for further processing or stored in a table to finish aggregating later. -## -Merge +To work with these states, use: + +- [AggregatingMergeTree](../../operations/table_engines/aggregatingmergetree.md) table engine. +- [finalizeAggregation](../functions/other_functions.md#function-finalizeaggregation) function. +- [runningAccumulate](../functions/other_functions.md#function-runningaccumulate) function. +- [-Merge](#aggregate_functions_combinators_merge) combinator. +- [-MergeState](#aggregate_functions_combinators_mergestate) combinator. + +## -Merge {#aggregate_functions_combinators_merge} If you apply this combinator, the aggregate function takes the intermediate aggregation state as an argument, combines the states to finish aggregation, and returns the resulting value. -## -MergeState. +## -MergeState {#aggregate_functions_combinators_mergestate} Merges the intermediate aggregation states in the same way as the -Merge combinator. However, it doesn't return the resulting value, but an intermediate aggregation state, similar to the -State combinator. diff --git a/docs/en/query_language/create.md b/docs/en/query_language/create.md index a6ae57bc00f..94d32cc49f5 100644 --- a/docs/en/query_language/create.md +++ b/docs/en/query_language/create.md @@ -107,22 +107,26 @@ Besides default data compression, defined in [server settings](../operations/ser Supported compression algorithms: -- `NONE` - no compression for data applied -- `LZ4` -- `LZ4HC(level)` - (level) - LZ4\_HC compression algorithm with defined level. -Possible `level` range: \[3, 12\]. Default value: 9. Greater values stands for better compression and higher CPU usage. Recommended value range: [4,9]. -- `ZSTD(level)` - ZSTD compression algorithm with defined `level`. Possible `level` value range: \[1, 22\]. Default value: 1. -Greater values stands for better compression and higher CPU usage. -- `Delta(delta_bytes)` - compression approach when raw values are replace with difference of two neighbour values. Up to `delta_bytes` are used for storing delta value. -Possible `delta_bytes` values: 1, 2, 4, 8. Default value for delta bytes is `sizeof(type)`, if it is equals to 1, 2, 4, 8 and equals to 1 otherwise. -- `DoubleDelta` - stores delta of deltas in compact binary form, compressing values down to 1 bit (in the best case). Best compression rates are achieved on monotonic sequences with constant stride, e.g. time samples. Can be used against any fixed-width type. Implementation is based on [Gorilla paper](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf), and extended to support 64bit types. The drawback is 1 extra bit for 32-byte wide deltas: 5-bit prefix instead of 4-bit prefix. -- `Gorilla` - stores (parts of) xored values in compact binary form, compressing values down to 1 bit (in the best case). Best compression rate is achieved when neighbouring values are binary equal. Basic use case - floating point data that do not change rapidly. Implementation is based on [Gorilla paper](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf), and extended to support 64bit types. +- `NONE` — No compression. +- `LZ4` — Lossless [data compression algorithm](https://github.com/lz4/lz4) used by default. Applies LZ4 fast compression. +- `LZ4HC[(level)]` — LZ4 CH (high compression) algorithm with configurable level. Default level: 9. If you set `level <= 0`, the default level is applied. Possible levels: [1, 12]. Recommended levels are in range: [4, 9]. +- `ZSTD[(level)]` — [ZSTD compression algorithm](https://en.wikipedia.org/wiki/Zstandard) with configurable `level`. Possible levels: [1, 22]. Default value: 1. +- `Delta(delta_bytes)` — compression approach, when raw values are replaced with the difference of two neighbour values. Up to `delta_bytes` are used for storing delta value, so `delta_bytes` is a maximum size of raw values. +Possible `delta_bytes` values: 1, 2, 4, 8. Default value for `delta_bytes` is `sizeof(type)`, if it is equals to 1, 2, 4, 8. Otherwise it equals 1. +- `DoubleDelta` — Compresses values down to 1 bit (in the best case), using deltas calculation. Best compression rates are achieved on monotonic sequences with constant stride, for example, time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64 bit types. Uses 1 extra bit for 32 byte deltas: 5 bit prefix instead of 4 bit prefix. For additional information, see the "Compressing time stamps" section of the [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf) document. +- `Gorilla` — Compresses values down to 1 bit (in the best case). The codec is efficient when storing series of floating point values that change slowly, because the best compression rate is achieved when neighbouring values are binary equal. Implements the algorithm used in Gorilla TSDB, extending it to support 64 bit types. For additional information, see the "Compressing values" section of the [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf) document. + +High compression levels useful for asymmetric scenarios, like compress once, decompress a lot of times. Greater levels stands for better compression and higher CPU usage. + +!!!warning + You cannot decompress ClickHouse database files with external utilities, for example, `lz4`. Use the special utility [clickhouse-compressor](https://github.com/yandex/ClickHouse/tree/master/dbms/programs/compressor). Syntax example: + ``` CREATE TABLE codec_example ( - dt Date CODEC(ZSTD), /* используется уровень сжатия по-умолчанию */ + dt Date CODEC(ZSTD), ts DateTime CODEC(LZ4HC), float_value Float32 CODEC(NONE), double_value Float64 CODEC(LZ4HC(9)) @@ -134,6 +138,7 @@ ORDER BY dt Codecs can be combined in a pipeline. Default table codec is not included into pipeline (if it should be applied to a column, you have to specify it explicitly in pipeline). Example below shows an optimization approach for storing timeseries metrics. Usually, values for particular metric, stored in `path` does not differ significantly from point to point. Using delta-encoding allows to reduce disk space usage significantly. + ``` CREATE TABLE timeseries_example ( diff --git a/docs/en/query_language/functions/ext_dict_functions.md b/docs/en/query_language/functions/ext_dict_functions.md index 017d941b9f6..6494a2b643e 100644 --- a/docs/en/query_language/functions/ext_dict_functions.md +++ b/docs/en/query_language/functions/ext_dict_functions.md @@ -96,7 +96,7 @@ LIMIT 3 Checks whether the dictionary has the key. ``` -dictHas('dict_name', id) +dictHas('dict_name', id_expr) ``` **Parameters** @@ -116,7 +116,7 @@ Type: `UInt8`. For the hierarchical dictionary, returns an array of dictionary keys starting from passed `id_expr` and continuing along the chain of parent elements. ``` -dictGetHierarchy('dict_name', id) +dictGetHierarchy('dict_name', id_expr) ``` **Parameters** diff --git a/docs/en/query_language/functions/geo.md b/docs/en/query_language/functions/geo.md index 97c4d1b5b4f..2c84a4516ba 100644 --- a/docs/en/query_language/functions/geo.md +++ b/docs/en/query_language/functions/geo.md @@ -151,4 +151,36 @@ SELECT geohashDecode('ezs42') AS res └─────────────────────────────────┘ ``` +## geoToH3 + +Calculates [H3](https://uber.github.io/h3/#/documentation/overview/introduction) point index `(lon, lat)` with specified resolution. + +``` +geoToH3(lon, lat, resolution) +``` + +**Input values** + +- `lon` — Longitude. Type: [Float64](../../data_types/float.md). +- `lat` — Latitude. Type: [Float64](../../data_types/float.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../data_types/int_uint.md). + +**Returned values** + +- Hexagon index number. +- 0 in case of error. + +Type: [UInt64](../../data_types/int_uint.md). + +**Example** + +``` sql +SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index +``` +``` +┌────────────h3Index─┐ +│ 644325524701193974 │ +└────────────────────┘ +``` + [Original article](https://clickhouse.yandex/docs/en/query_language/functions/geo/) diff --git a/docs/en/query_language/functions/other_functions.md b/docs/en/query_language/functions/other_functions.md index d3b7b2082d4..007f1352775 100644 --- a/docs/en/query_language/functions/other_functions.md +++ b/docs/en/query_language/functions/other_functions.md @@ -627,15 +627,36 @@ SELECT replicate(1, ['a', 'b', 'c']) └───────────────────────────────┘ ``` -## filesystemAvailable +## filesystemAvailable {#function-filesystemavailable} -Returns the remaining space information of the disk, in bytes. This information is evaluated using the configured by path. +Returns the amount of remaining space in the filesystem where the files of the databases located. See the [path](../../operations/server_settings/settings.md#server_settings-path) server setting description. + +``` +filesystemAvailable() +``` + +**Returned values** + +- Amount of remaining space in bytes. + +Type: [UInt64](../../data_types/int_uint.md). + +**Example** + +```sql +SELECT filesystemAvailable() AS "Free space", toTypeName(filesystemAvailable()) AS "Type" +``` +```text +┌──Free space─┬─Type───┐ +│ 18152624128 │ UInt64 │ +└─────────────┴────────┘ +``` ## filesystemCapacity Returns the capacity information of the disk, in bytes. This information is evaluated using the configured by path. -## finalizeAggregation +## finalizeAggregation {#function-finalizeaggregation} Takes state of aggregate function. Returns result of aggregation (finalized state). diff --git a/docs/ru/getting_started/example_datasets/star_schema.md b/docs/ru/getting_started/example_datasets/star_schema.md index 1d8af3b29a5..545eaeea6a6 100644 --- a/docs/ru/getting_started/example_datasets/star_schema.md +++ b/docs/ru/getting_started/example_datasets/star_schema.md @@ -101,11 +101,11 @@ CREATE TABLE lineorder_flat ENGINE = MergeTree PARTITION BY toYear(LO_ORDERDATE) ORDER BY (LO_ORDERDATE, LO_ORDERKEY) AS -SELECT * -FROM lineorder -ANY INNER JOIN customer ON LO_CUSTKEY = C_CUSTKEY -ANY INNER JOIN supplier ON LO_SUPPKEY = S_SUPPKEY -ANY INNER JOIN part ON LO_PARTKEY = P_PARTKEY; +SELECT l.*, c.*, s.*, p.* +FROM lineorder l + ANY INNER JOIN customer c ON (c.C_CUSTKEY = l.LO_CUSTKEY) + ANY INNER JOIN supplier s ON (s.S_SUPPKEY = l.LO_SUPPKEY) + ANY INNER JOIN part p ON (p.P_PARTKEY = l.LO_PARTKEY); ALTER TABLE lineorder_flat DROP COLUMN C_CUSTKEY, DROP COLUMN S_SUPPKEY, DROP COLUMN P_PARTKEY; ``` diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 70a4e608e2a..24cf6bea9ed 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -167,6 +167,8 @@ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMA `NULL` форматируется в виде `\N` или `NULL` или пустой неэкранированной строки (см. настройки [format_csv_unquoted_null_literal_as_null](../operations/settings/settings.md#settings-format_csv_unquoted_null_literal_as_null) и [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#settings-input_format_defaults_for_omitted_fields)). +Если установлена настройка [input_format_defaults_for_omitted_fields = 1](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields) и тип столбца не `Nullable(T)`, то пустые значения без кавычек заменяются значениями по умолчанию для типа данных столбца. + Формат CSV поддерживает вывод totals и extremes аналогично `TabSeparated`. ## CSVWithNames diff --git a/docs/ru/operations/server_settings/settings.md b/docs/ru/operations/server_settings/settings.md index 8f1bbcb7488..c1444668bb2 100644 --- a/docs/ru/operations/server_settings/settings.md +++ b/docs/ru/operations/server_settings/settings.md @@ -514,7 +514,7 @@ ClickHouse проверит условия `min_part_size` и `min_part_size_rat ``` -## path +## path {#server_settings-path} Путь к каталогу с данными. diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 53a4eb181eb..1c0a9c0ac24 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -181,20 +181,15 @@ Ok. ## input_format_defaults_for_omitted_fields {#session_settings-input_format_defaults_for_omitted_fields} -Включает/выключает расширенный обмен данными между клиентом ClickHouse и сервером ClickHouse. Параметр применяется для запросов `INSERT`. +При вставке данных запросом `INSERT`, заменяет пропущенные поля значениям по умолчанию для типа данных столбца. -При выполнении запроса`INSERT`, клиент ClickHouse подготавливает данные и отправляет их на сервер для записи. При подготовке данных клиент получает структуру таблицы от сервера. В некоторых случаях клиенту требуется больше информации, чем сервер отправляет по умолчанию. Включите расширенный обмен данными с помощью настройки `input_format_defaults_for_omitted_fields = 1`. +Поддерживаемые форматы вставки: -Если расширенный обмен данными включен, сервер отправляет дополнительные метаданные вместе со структурой таблицы. Состав метаданных зависит от операции. - -Операции, для которых может потребоваться включить расширенный обмен данными: - -- Вставка данных в формате [JSONEachRow](../../interfaces/formats.md#jsoneachrow). - -Для всех остальных операций ClickHouse не применяет этот параметр. +- [JSONEachRow](../../interfaces/formats.md#jsoneachrow) +- [CSV](../../interfaces/formats.md#csv) !!! note "Примечание" - Функциональность расширенного обмена данными потребляет дополнительные вычислительные ресурсы на сервере и может снизить производительность. + Когда опция включена, сервер отправляет клиенту расширенные метаданные. Это требует дополнительных вычислительных ресурсов на сервере и может снизить производительность. Возможные значения: diff --git a/docs/ru/operations/table_engines/file.md b/docs/ru/operations/table_engines/file.md index 731204f928a..b67823b988a 100644 --- a/docs/ru/operations/table_engines/file.md +++ b/docs/ru/operations/table_engines/file.md @@ -68,7 +68,7 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64 ## Детали реализации -- Поддерживается многопоточное чтение и однопоточная запись. +- Поддерживается одновременное выполнение множества запросов `SELECT`, запросы `INSERT` могут выполняться только последовательно. - Не поддерживается: - использование операций `ALTER` и `SELECT...SAMPLE`; - индексы; diff --git a/docs/ru/operations/table_engines/kafka.md b/docs/ru/operations/table_engines/kafka.md index 3fe2e4d5cba..086d4fb4f08 100644 --- a/docs/ru/operations/table_engines/kafka.md +++ b/docs/ru/operations/table_engines/kafka.md @@ -25,7 +25,7 @@ SETTINGS [kafka_row_delimiter = 'delimiter_symbol',] [kafka_schema = '',] [kafka_num_consumers = N,] - [kafka_skip_broken_messages = <0|1>] + [kafka_skip_broken_messages = N] ``` Обязательные параметры: @@ -40,7 +40,7 @@ SETTINGS - `kafka_row_delimiter` – символ-разделитель записей (строк), которым завершается сообщение. - `kafka_schema` – опциональный параметр, необходимый, если используется формат, требующий определения схемы. Например, [Cap'n Proto](https://capnproto.org/) требует путь к файлу со схемой и название корневого объекта `schema.capnp:Message`. - `kafka_num_consumers` – количество потребителей (consumer) на таблицу. По умолчанию: `1`. Укажите больше потребителей, если пропускная способность одного потребителя недостаточна. Общее число потребителей не должно превышать количество партиций в топике, так как на одну партицию может быть назначено не более одного потребителя. -- `kafka_skip_broken_messages` – режим обработки сообщений Kafka. Если `kafka_skip_broken_messages = 1`, то движок отбрасывает сообщения Кафки, которые не получилось обработать. Одно сообщение в точности соответствует одной записи (строке). +- `kafka_skip_broken_messages` – максимальное количество некорректных сообщений в блоке. Если `kafka_skip_broken_messages = N`, то движок отбрасывает `N` сообщений Кафки, которые не получилось обработать. Одно сообщение в точности соответствует одной записи (строке). Значение по умолчанию – 0. Примеры diff --git a/docs/ru/query_language/agg_functions/combinators.md b/docs/ru/query_language/agg_functions/combinators.md index dfee76bb79d..1fcdb111e17 100644 --- a/docs/ru/query_language/agg_functions/combinators.md +++ b/docs/ru/query_language/agg_functions/combinators.md @@ -23,13 +23,22 @@ ## -State -В случае применения этого комбинатора, агрегатная функция возвращает не готовое значение (например, в случае функции `uniq` — количество уникальных значений), а промежуточное состояние агрегации (например, в случае функции `uniq` — хэш-таблицу для расчёта количества уникальных значений), которое имеет тип AggregateFunction(...) и может использоваться для дальнейшей обработки или может быть сохранено в таблицу для последующей доагрегации - смотрите разделы «AggregatingMergeTree» и «функции для работы с промежуточными состояниями агрегации». +В случае применения этого комбинатора, агрегатная функция возвращает не готовое значение (например, в случае функции [uniq](reference.md#agg_function-uniq) — количество уникальных значений), а промежуточное состояние агрегации (например, в случае функции `uniq` — хэш-таблицу для расчёта количества уникальных значений), которое имеет тип `AggregateFunction(...)` и может использоваться для дальнейшей обработки или может быть сохранено в таблицу для последующей доагрегации. -## -Merge +Для работы с промежуточными состояниями предназначены: + +- Движок таблиц [AggregatingMergeTree](../../operations/table_engines/aggregatingmergetree.md). +- Функция [finalizeAggregation](../functions/other_functions.md#function-finalizeaggregation). +- Функция [runningAccumulate](../functions/other_functions.md#function-runningaccumulate). +- Комбинатор [-Merge](#aggregate_functions_combinators_merge). +- Комбинатор [-MergeState](#aggregate_functions_combinators_mergestate). + + +## -Merge {#aggregate_functions_combinators_merge} В случае применения этого комбинатора, агрегатная функция будет принимать в качестве аргумента промежуточное состояние агрегации, доагрегировать (объединять вместе) эти состояния, и возвращать готовое значение. -## -MergeState. +## -MergeState {#aggregate_functions_combinators_mergestate} Выполняет слияние промежуточных состояний агрегации, аналогично комбинатору -Merge, но возвращает не готовое значение, а промежуточное состояние агрегации, аналогично комбинатору -State. diff --git a/docs/ru/query_language/functions/ext_dict_functions.md b/docs/ru/query_language/functions/ext_dict_functions.md index 8901292aeb2..3fb4a110e88 100644 --- a/docs/ru/query_language/functions/ext_dict_functions.md +++ b/docs/ru/query_language/functions/ext_dict_functions.md @@ -1,40 +1,192 @@ # Функции для работы с внешними словарями {#ext_dict_functions} -Информация о подключении и настройке внешних словарей смотрите в разделе [Внешние словари](../dicts/external_dicts.md). +Для получения информации о подключении и настройке, читайте раздел про [внешние словари](../dicts/external_dicts.md). -## dictGetUInt8, dictGetUInt16, dictGetUInt32, dictGetUInt64 +## dictGet -## dictGetInt8, dictGetInt16, dictGetInt32, dictGetInt64 +Получение значения из внешнего словаря. -## dictGetFloat32, dictGetFloat64 +``` +dictGet('dict_name', 'attr_name', id_expr) +dictGetOrDefault('dict_name', 'attr_name', id_expr, default_value_expr) +``` -## dictGetDate, dictGetDateTime +**Параметры** -## dictGetUUID +- `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal). +- `attr_name` — Название колонки словаря. [Строковый литерал](../syntax.md#syntax-string-literal). +- `id_expr` — Значение ключа. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md) или [Tuple](../../data_types/tuple.md) в зависимости от конфигурации словаря. +- `default_value_expr` — Значение которое возвращается, если словарь не содержит колонку с ключом `id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращает значение такого же типа, что и у атрибута `attr_name`. -## dictGetString -`dictGetT('dict_name', 'attr_name', id)` -- получить из словаря dict_name значение атрибута attr_name по ключу id. -`dict_name` и `attr_name` - константные строки. -`id` должен иметь тип UInt64. -Если ключа `id` нет в словаре - вернуть значение по умолчанию, заданное в описании словаря. +**Возвращаемое значение** -## dictGetTOrDefault +- Если ClickHouse успешно обрабатывает атрибут в соотвествии с указаным [типом данных](../dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes), то функция возвращает значение для заданного ключа `id_expr`. +- Если запрашиваемого `id_expr` не оказалось в словаре: -`dictGetT('dict_name', 'attr_name', id, default)` + - `dictGet` возвратит содержимое элемента `` определенного в настройках словаря. + - `dictGetOrDefault` вернет значение переданного `default_value_expr` параметра. -Аналогично функциям `dictGetT`, но значение по умолчанию берётся из последнего аргумента функции. +ClickHouse бросает исключение, если не может обработать значение атрибута или значение несопоставимо с типом атрибута. -## dictIsIn -`dictIsIn('dict_name', child_id, ancestor_id)` -- для иерархического словаря dict_name - узнать, находится ли ключ child_id внутри ancestor_id (или совпадает с ancestor_id). Возвращает UInt8. +**Пример использования** + +Создайте файл `ext-dict-text.csv` со следующим содержимым: + +```text +1,1 +2,2 +``` + +Первая колонка - это `id`, вторая - `c1` + +Конфигурация внешнего словаря: + +```xml + + + ext-dict-test + + + /path-to/ext-dict-test.csv + CSV + + + + + + + + id + + + c1 + UInt32 + + + + 0 + + +``` + +Выполните запрос: + +```sql +SELECT + dictGetOrDefault('ext-dict-test', 'c1', number + 1, toUInt32(number * 10)) AS val, + toТипName(val) AS Type +FROM system.numbers +LIMIT 3 +``` +```text +┌─val─┬─type───┐ +│ 1 │ UInt32 │ +│ 2 │ UInt32 │ +│ 20 │ UInt32 │ +└─────┴────────┘ +``` + +**Смотрите также** + +- [Внешние словари](../dicts/external_dicts.md) -## dictGetHierarchy -`dictGetHierarchy('dict_name', id)` -- для иерархического словаря dict_name - вернуть массив ключей словаря, начиная с id и продолжая цепочкой родительских элементов. Возвращает Array(UInt64). ## dictHas -`dictHas('dict_name', id)` -- проверить наличие ключа в словаре. Возвращает значение типа UInt8, равное 0, если ключа нет и 1, если ключ есть. + +Проверяет наличие записи с заданным ключом в словаре. + +``` +dictHas('dict_name', id_expr) +``` + +**Параметры** + +- `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal). +- `id_expr` — Значение ключа. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md). + +**Возвращаемое значение** + +- 0, если ключ не был обнаружен +- 1, если ключ присутствует в словаре + +Тип: `UInt8`. + +## dictGetHierarchy + +Для иерархических словарей возвращает массив ключей, содержащий ключ `id_expr` и все ключи родительских элементов по цепочке. + +``` +dictGetHierarchy('dict_name', id_expr) +``` + +**Параметры** + +- `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal). +- `id_expr` — Значение ключа. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md). + +**Возвращаемое значение** + +Иерархию ключей словаря. + +Тип: [Array(UInt64)](../../data_types/array.md). + +## dictIsIn + +Осуществляет проверку - является ли ключ родительским во всей иерархической цепочке словаря. + +`dictIsIn ('dict_name', child_id_expr, ancestor_id_expr)` + +**Параметры** + +- `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal). +- `child_id_expr` — Ключ который должен быть проверен. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md). +- `ancestor_id_expr` — Родительский ключ для ключа `child_id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md). + +**Возвращаемое значение** + +- 0, если `child_id_expr` не является потомком для `ancestor_id_expr`. +- 1, если `child_id_expr` является потомком для `ancestor_id_expr` или если `child_id_expr` равен `ancestor_id_expr`. + +Тип: `UInt8`. + +## Другие функции {#ext_dict_functions-other} + +ClickHouse поддерживает специализированные функции для конвертации значений атрибутов словаря к определенному типу, независимо от настроек словаря. + +Функции: + +- `dictGetInt8`, `dictGetInt16`, `dictGetInt32`, `dictGetInt64` +- `dictGetUInt8`, `dictGetUInt16`, `dictGetUInt32`, `dictGetUInt64` +- `dictGetFloat32`, `dictGetFloat64` +- `dictGetDate` +- `dictGetDateTime` +- `dictGetUUID` +- `dictGetString` + +Все эти функции имеют так же `OrDefault` версию. Например, `dictGetDateOrDefault`. + +Синтаксис: + +``` +dictGet[Тип]('dict_name', 'attr_name', id_expr) +dictGet[Тип]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr) +``` + +**Параметры** + +- `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal). +- `attr_name` — Название колонки словаря. [Строковый литерал](../syntax.md#syntax-string-literal). +- `id_expr` — Значение ключа. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md). +- `default_value_expr` — Значение которое возвращается, если словарь не содержит строку с ключом `id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращает значение с таким же типом, что и тип атрибута `attr_name`. + +**Возвращаемое значение** + +- Если ClickHouse успешно обрабатывает атрибут в соотвествии с указаным [типом данных](../dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes),то функция возвращает значение для заданного ключа `id_expr`. +- Если запрашиваемого `id_expr` не оказалось в словаре: + + - `dictGet[Тип]` возвратит содержимое элемента `` определенного в настройках словаря. + - `dictGet[Тип]OrDefault` вернет значение переданного `default_value_expr` параметра. + +ClickHouse бросает исключение, если не может обработать значение атрибута или значение несопоставимо с типом атрибута [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/ext_dict_functions/) diff --git a/docs/ru/query_language/functions/geo.md b/docs/ru/query_language/functions/geo.md index 33092cf804b..b8e37c15aca 100644 --- a/docs/ru/query_language/functions/geo.md +++ b/docs/ru/query_language/functions/geo.md @@ -132,13 +132,17 @@ SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res Декодирует любую строку, закодированную в geohash, на долготу и широту. +``` +geohashDecode(geohash_string) +``` + **Входные значения** -- encoded string — строка, содержащая geohash. +- `geohash_string` — строка, содержащая geohash. **Возвращаемые значения** -- (longitude, latitude) — широта и долгота. Кортеж из двух значений типа `Float64`. +- `(longitude, latitude)` — широта и долгота. Кортеж из двух значений типа `Float64`. **Пример** @@ -154,7 +158,7 @@ SELECT geohashDecode('ezs42') AS res ## geoToH3 -Получает H3 индекс точки (lon, lat) с заданным разрешением +Получает H3 индекс точки `(lon, lat)` с заданным разрешением ``` geoToH3(lon, lat, resolution) @@ -162,15 +166,16 @@ geoToH3(lon, lat, resolution) **Входные значения** -- `lon` - географическая долгота. Тип данных — [Float64](../../data_types/float.md). -- `lat` - географическая широта. Тип данных — [Float64](../../data_types/float.md). -- `resolution` - требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значение — `[0, 15]`. +- `lon` — географическая долгота. Тип данных — [Float64](../../data_types/float.md). +- `lat` — географическая широта. Тип данных — [Float64](../../data_types/float.md). +- `resolution` — требуемое разрешение индекса. Тип данных — [UInt8](../../data_types/int_uint.md). Диапазон возможных значений — `[0, 15]`. **Возвращаемые значения** -Возвращает значение с типом [UInt64] (../../data_types/int_uint.md). -`0` в случае ошибки. -Иначе возвращается индексный номер шестиугольника. +- Порядковый номер шестиугольника. +- 0 в случае ошибки. + +Тип — [UInt64](../../data_types/int_uint.md). **Пример** diff --git a/docs/ru/query_language/functions/other_functions.md b/docs/ru/query_language/functions/other_functions.md index 62af103e02d..6e4913638ee 100644 --- a/docs/ru/query_language/functions/other_functions.md +++ b/docs/ru/query_language/functions/other_functions.md @@ -600,6 +600,39 @@ SELECT replicate(1, ['a', 'b', 'c']) └───────────────────────────────┘ ``` +## filesystemAvailable {#function-filesystemavailable} + +Возвращает объем оставшегося места в файловой системе, в которой расположены файлы баз данных. Смотрите описание конфигурационного параметра сервера [path](../../operations/server_settings/settings.md#server_settings-path). + +``` +filesystemAvailable() +``` + +**Возвращаемое значение** + +- Объем свободного места. + +Тип — [UInt64](../../data_types/int_uint.md). + +**Пример** + +```sql +SELECT filesystemAvailable() AS "Free space", toTypeName(filesystemAvailable()) AS "Type" +``` +```text +┌──Free space─┬─Type───┐ +│ 18152624128 │ UInt64 │ +└─────────────┴────────┘ +``` + +## filesystemCapacity + +Возвращает данные о ёмкости диска. + +## finalizeAggregation {#function-finalizeaggregation} + +Принимает состояние агрегатной функции. Возвращает результат агрегирования. + ## runningAccumulate {#function-runningaccumulate} Принимает на вход состояния агрегатной функции и возвращает столбец со значениями, которые представляют собой результат мёржа этих состояний для выборки строк из блока от первой до текущей строки. Например, принимает состояние агрегатной функции (например, `runningAccumulate(uniqState(UserID))`), и для каждой строки блока возвращает результат агрегатной функции после мёржа состояний функции для всех предыдущих строк и текущей. Таким образом, результат зависит от разбиения данных по блокам и от порядка данных в блоке. diff --git a/docs/zh/query_language/functions/other_functions.md b/docs/zh/query_language/functions/other_functions.md index 85804c0a75d..84fbdaeb3ca 100644 --- a/docs/zh/query_language/functions/other_functions.md +++ b/docs/zh/query_language/functions/other_functions.md @@ -637,7 +637,7 @@ SELECT replicate(1, ['a', 'b', 'c']) 返回磁盘的容量信息,以字节为单位。使用配置文件中的path配置评估此信息。 -## finalizeAggregation +## finalizeAggregation {#function-finalizeaggregation} 获取聚合函数的状态。返回聚合结果(最终状态)。 diff --git a/libs/libcommon/include/common/config_common.h.in b/libs/libcommon/include/common/config_common.h.in index 0cc0950efba..247afd87aea 100644 --- a/libs/libcommon/include/common/config_common.h.in +++ b/libs/libcommon/include/common/config_common.h.in @@ -7,3 +7,4 @@ #cmakedefine01 USE_READLINE #cmakedefine01 USE_LIBEDIT #cmakedefine01 HAVE_READLINE_HISTORY +#cmakedefine01 NOT_UNBUNDLED diff --git a/libs/libcommon/include/common/logger_useful.h b/libs/libcommon/include/common/logger_useful.h index 245a79c7982..c1c39047540 100644 --- a/libs/libcommon/include/common/logger_useful.h +++ b/libs/libcommon/include/common/logger_useful.h @@ -4,41 +4,39 @@ #include #include +#include +#include +#include +#include #ifndef QUERY_PREVIEW_LENGTH #define QUERY_PREVIEW_LENGTH 160 #endif using Poco::Logger; +using Poco::Message; +using DB::LogsLevel; +using DB::CurrentThread; /// Logs a message to a specified logger with that level. -#define LOG_TRACE(logger, message) do { \ - if ((logger)->trace()) {\ - std::stringstream oss_internal_rare; \ - oss_internal_rare << message; \ - (logger)->trace(oss_internal_rare.str());}} while(false) +#define LOG_SIMPLE(logger, message, priority, PRIORITY) do \ +{ \ + const bool is_clients_log = (CurrentThread::getGroup() != nullptr) && \ + (CurrentThread::getGroup()->client_logs_level >= (priority)); \ + if ((logger)->is((PRIORITY)) || is_clients_log) { \ + std::stringstream oss_internal_rare; \ + oss_internal_rare << message; \ + if (auto channel = (logger)->getChannel()) { \ + channel->log(Message((logger)->name(), oss_internal_rare.str(), (PRIORITY))); \ + } \ + } \ +} while (false) -#define LOG_DEBUG(logger, message) do { \ - if ((logger)->debug()) {\ - std::stringstream oss_internal_rare; \ - oss_internal_rare << message; \ - (logger)->debug(oss_internal_rare.str());}} while(false) -#define LOG_INFO(logger, message) do { \ - if ((logger)->information()) {\ - std::stringstream oss_internal_rare; \ - oss_internal_rare << message; \ - (logger)->information(oss_internal_rare.str());}} while(false) +#define LOG_TRACE(logger, message) LOG_SIMPLE(logger, message, LogsLevel::trace, Message::PRIO_TRACE) +#define LOG_DEBUG(logger, message) LOG_SIMPLE(logger, message, LogsLevel::debug, Message::PRIO_DEBUG) +#define LOG_INFO(logger, message) LOG_SIMPLE(logger, message, LogsLevel::information, Message::PRIO_INFORMATION) +#define LOG_WARNING(logger, message) LOG_SIMPLE(logger, message, LogsLevel::warning, Message::PRIO_WARNING) +#define LOG_ERROR(logger, message) LOG_SIMPLE(logger, message, LogsLevel::error, Message::PRIO_ERROR) -#define LOG_WARNING(logger, message) do { \ - if ((logger)->warning()) {\ - std::stringstream oss_internal_rare; \ - oss_internal_rare << message; \ - (logger)->warning(oss_internal_rare.str());}} while(false) - -#define LOG_ERROR(logger, message) do { \ - if ((logger)->error()) {\ - std::stringstream oss_internal_rare; \ - oss_internal_rare << message; \ - (logger)->error(oss_internal_rare.str());}} while(false) diff --git a/libs/libcommon/include/common/memory.h b/libs/libcommon/include/common/memory.h new file mode 100644 index 00000000000..d8dced79cfb --- /dev/null +++ b/libs/libcommon/include/common/memory.h @@ -0,0 +1,65 @@ +#pragma once + +#include +#include + +#if __has_include() +#include +#endif + +#if USE_JEMALLOC +#include + +#if JEMALLOC_VERSION_MAJOR < 4 + #undef USE_JEMALLOC + #define USE_JEMALLOC 0 + #include +#endif +#endif + +#define ALWAYS_INLINE inline __attribute__((__always_inline__)) +#define NO_INLINE __attribute__((__noinline__)) + +namespace Memory +{ + +ALWAYS_INLINE void * newImpl(std::size_t size) +{ + auto * ptr = malloc(size); + if (likely(ptr != nullptr)) + return ptr; + + /// @note no std::get_new_handler logic implemented + throw std::bad_alloc{}; +} + +ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept +{ + return malloc(size); +} + +ALWAYS_INLINE void deleteImpl(void * ptr) noexcept +{ + free(ptr); +} + +#if USE_JEMALLOC + +ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept +{ + if (unlikely(ptr == nullptr)) + return; + + sdallocx(ptr, size, 0); +} + +#else + +ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept +{ + free(ptr); +} + +#endif + +} diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 3b523822451..b3df25d13e6 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -24,6 +24,7 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (zookeeper-copy-tree) add_subdirectory (zookeeper-remove-by-list) add_subdirectory (zookeeper-create-entry-to-download-part) + add_subdirectory (zookeeper-adjust-block-numbers-to-parts) add_subdirectory (wikistat-loader) add_subdirectory (fill-factor) add_subdirectory (check-marks) diff --git a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp index dda1677f3a4..3e449043adc 100644 --- a/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp +++ b/utils/zookeeper-adjust-block-numbers-to-parts/main.cpp @@ -1,20 +1,62 @@ #include #include #include +#include #include #include #include #include -size_t getMaxBlockSizeForPartition(zkutil::ZooKeeper & zk, + +std::vector getAllShards(zkutil::ZooKeeper & zk, const std::string & root) +{ + return zk.getChildren(root); +} + + +std::vector removeNotExistingShards(zkutil::ZooKeeper & zk, const std::string & root, const std::vector & shards) +{ + auto existing_shards = getAllShards(zk, root); + std::vector filtered_shards; + filtered_shards.reserve(shards.size()); + for (const auto & shard : shards) + if (std::find(existing_shards.begin(), existing_shards.end(), shard) == existing_shards.end()) + std::cerr << "Shard " << shard << " not found." << std::endl; + else + filtered_shards.emplace_back(shard); + return filtered_shards; +} + + +std::vector getAllTables(zkutil::ZooKeeper & zk, const std::string & root, const std::string & shard) +{ + return zk.getChildren(root + "/" + shard); +} + + +std::vector removeNotExistingTables(zkutil::ZooKeeper & zk, const std::string & root, const std::string & shard, const std::vector & tables) +{ + auto existing_tables = getAllTables(zk, root, shard); + std::vector filtered_tables; + filtered_tables.reserve(tables.size()); + for (const auto & table : tables) + if (std::find(existing_tables.begin(), existing_tables.end(), table) == existing_tables.end()) + std::cerr << "\tTable " << table << " not found on shard " << shard << "." << std::endl; + else + filtered_tables.emplace_back(table); + return filtered_tables; +} + + +Int64 getMaxBlockNumberForPartition(zkutil::ZooKeeper & zk, const std::string & replica_path, const std::string & partition_name, const DB::MergeTreeDataFormatVersion & format_version) { auto replicas_path = replica_path + "/replicas"; auto replica_hosts = zk.getChildren(replicas_path); - size_t max_block_num = 0; + Int64 max_block_num = 0; for (const auto & replica_host : replica_hosts) { auto parts = zk.getChildren(replicas_path + "/" + replica_host + "/parts"); @@ -24,40 +66,78 @@ size_t getMaxBlockSizeForPartition(zkutil::ZooKeeper & zk, { auto info = DB::MergeTreePartInfo::fromPartName(part, format_version); if (info.partition_id == partition_name) - max_block_num = std::max(info.max_block, max_block_num); + max_block_num = std::max(info.max_block, max_block_num); } catch (const DB::Exception & ex) { - std::cerr << "Exception on: " << ex.displayText() << " will skip part: " << part << std::endl; + std::cerr << ex.displayText() << ", Part " << part << "skipped." << std::endl; } } } return max_block_num; } -std::unordered_map getAllTablesBlockPaths(zkutil::ZooKeeper & zk, const std::string & root) + +Int64 getCurrentBlockNumberForPartition(zkutil::ZooKeeper & zk, const std::string & part_path) { - std::unordered_map result; - auto shards = zk.getChildren(root); - for (const auto & shard : shards) + Coordination::Stat stat; + zk.get(part_path, &stat); + + /// References: + /// https://stackoverflow.com/a/10347910 + /// https://bowenli86.github.io/2016/07/07/distributed%20system/zookeeper/How-does-ZooKeeper-s-persistent-sequential-id-work/ + return (stat.cversion + stat.numChildren) / 2; +} + + +std::unordered_map getPartitionsNeedAdjustingBlockNumbers( + zkutil::ZooKeeper & zk, const std::string & root, const std::vector & shards, const std::vector & tables) +{ + std::unordered_map result; + + std::vector use_shards = shards.empty() ? getAllShards(zk, root) : removeNotExistingShards(zk, root, shards); + + for (const auto & shard : use_shards) { - std::string shard_path = root + "/" + shard; - auto tables = zk.getChildren(shard_path); - for (auto table : tables) + std::cout << "Shard: " << shard << std::endl; + std::vector use_tables = tables.empty() ? getAllTables(zk, root, shard) : removeNotExistingTables(zk, root, shard, tables); + + for (auto table : use_tables) { - std::cerr << "Searching for nodes in: " << table << std::endl; - std::string table_path = shard_path + "/" + table; - auto format_version = DB::ReplicatedMergeTreeTableMetadata::parse(zk.get(table_path + "/metadata")).data_format_version; + std::cout << "\tTable: " << table << std::endl; + std::string table_path = root + "/" + shard + "/" + table; std::string blocks_path = table_path + "/block_numbers"; - auto partitions = zk.getChildren(blocks_path); - if (!partitions.empty()) + + std::vector partitions; + DB::MergeTreeDataFormatVersion format_version; + try { - for (auto partition : partitions) + format_version = DB::ReplicatedMergeTreeTableMetadata::parse(zk.get(table_path + "/metadata")).data_format_version; + partitions = zk.getChildren(blocks_path); + } + catch (const DB::Exception & ex) + { + std::cerr << ex.displayText() << ", table " << table << " skipped." << std::endl; + continue; + } + + for (auto partition : partitions) + { + try { std::string part_path = blocks_path + "/" + partition; - size_t partition_max_block = getMaxBlockSizeForPartition(zk, table_path, partition, format_version); - std::cerr << "\tFound max block number: " << partition_max_block << " for part: " << partition << std::endl; - result.emplace(part_path, partition_max_block); + Int64 partition_max_block = getMaxBlockNumberForPartition(zk, table_path, partition, format_version); + Int64 current_block_number = getCurrentBlockNumberForPartition(zk, part_path); + if (current_block_number < partition_max_block + 1) + { + std::cout << "\t\tPartition: " << partition << ": current block_number: " << current_block_number + << ", max block number: " << partition_max_block << ". Adjusting is required." << std::endl; + result.emplace(part_path, partition_max_block); + } + } + catch (const DB::Exception & ex) + { + std::cerr << ex.displayText() << ", partition " << partition << " skipped." << std::endl; } } } @@ -66,67 +146,137 @@ std::unordered_map getAllTablesBlockPaths(zkutil::ZooKeeper } -void rotateNodes(zkutil::ZooKeeper & zk, const std::string & path, size_t max_block_num) +void setCurrentBlockNumber(zkutil::ZooKeeper & zk, const std::string & path, Int64 new_current_block_number) { - Coordination::Requests requests; - std::string block_prefix = path + "/block-"; - std::string current = zk.create(block_prefix, "", zkutil::CreateMode::EphemeralSequential); - size_t current_block_num = DB::parse(current.c_str() + block_prefix.size(), current.size() - block_prefix.size()); - if (current_block_num >= max_block_num) - { - std::cerr << "Nothing to rotate, current block num: " << current_block_num << " max_block_num:" << max_block_num << std::endl; - return; - } + Int64 current_block_number = getCurrentBlockNumberForPartition(zk, path); - size_t need_to_rotate = max_block_num - current_block_num; - std::cerr << "Will rotate: " << need_to_rotate << " block numbers from " << current_block_num << " to " << max_block_num << std::endl; - - for (size_t i = 0; i < need_to_rotate; ++i) + auto create_ephemeral_nodes = [&](size_t count) { - if (requests.size() == 50) + std::string block_prefix = path + "/block-"; + Coordination::Requests requests; + requests.reserve(count); + for (size_t i = 0; i != count; ++i) + requests.emplace_back(zkutil::makeCreateRequest(block_prefix, "", zkutil::CreateMode::EphemeralSequential)); + auto responses = zk.multi(requests); + + std::vector paths_created; + paths_created.reserve(responses.size()); + for (const auto & response : responses) { - std::cerr << "Rotating: " << i << " block numbers" << std::endl; - zk.multi(requests); - requests.clear(); + const auto * create_response = dynamic_cast(response.get()); + if (!create_response) + { + std::cerr << "\tCould not create ephemeral node " << block_prefix << std::endl; + return false; + } + paths_created.emplace_back(create_response->path_created); } - requests.emplace_back(zkutil::makeCreateRequest(path + "/block-", "", zkutil::CreateMode::EphemeralSequential)); - } - if (!requests.empty()) - { - zk.multi(requests); - } + + std::sort(paths_created.begin(), paths_created.end()); + for (const auto & path_created : paths_created) + { + Int64 number = DB::parse(path_created.c_str() + block_prefix.size(), path_created.size() - block_prefix.size()); + if (number != current_block_number) + { + char suffix[11] = ""; + sprintf(suffix, "%010ld", current_block_number); + std::string expected_path = block_prefix + suffix; + std::cerr << "\t" << path_created << ": Ephemeral node has been created with an unexpected path (expected something like " + << expected_path << ")." << std::endl; + return false; + } + std::cout << "\t" << path_created << std::endl; + ++current_block_number; + } + + return true; + }; + + if (current_block_number >= new_current_block_number) + return; + + std::cout << "Creating ephemeral sequential nodes:" << std::endl; + create_ephemeral_nodes(1); /// Firstly try to create just a single node. + + /// Create other nodes in batches of 50 nodes. + while (current_block_number + 50 <= new_current_block_number) + create_ephemeral_nodes(50); + + create_ephemeral_nodes(new_current_block_number - current_block_number); } + int main(int argc, char ** argv) try { - boost::program_options::options_description desc("Allowed options"); + /// Parse the command line. + namespace po = boost::program_options; + po::options_description desc("Allowed options"); desc.add_options() - ("help,h", "produce help message") - ("address,a", boost::program_options::value()->required(), "addresses of ZooKeeper instances, comma separated. Example: example01e.yandex.ru:2181") - ("path,p", boost::program_options::value()->required(), "path of replica queue to insert node (without trailing slash)"); + ("help,h", "show help") + ("zookeeper,z", po::value(), "Addresses of ZooKeeper instances, comma-separated. Example: example01e.yandex.ru:2181") + ("path,p", po::value(), "[optional] Path of replica queue to insert node (without trailing slash). By default it's /clickhouse/tables") + ("shard,s", po::value(), "[optional] Shards to process, comma-separated. If not specified then the utility will process all the shards.") + ("table,t", po::value(), "[optional] Tables to process, comma-separated. If not specified then the utility will process all the tables.") + ("dry-run", "[optional] Specify if you want this utility just to analyze block numbers without any changes."); - boost::program_options::variables_map options; - boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options); + po::variables_map options; + po::store(po::parse_command_line(argc, argv, desc), options); - if (options.count("help")) + auto show_usage = [&] { - std::cout << "Util for /block_numbers node adjust with max block number in partition" << std::endl; - std::cout << "Usage: " << argv[0] << " [options]" << std::endl; + std::cout << "Usage: " << std::endl; + std::cout << " " << argv[0] << " [options]" << std::endl; std::cout << desc << std::endl; + }; + + if (options.count("help") || (argc == 1)) + { + std::cout << "This utility adjusts the /block_numbers zookeeper nodes to the correct block number in partition." << std::endl; + std::cout << "It might be useful when incorrect block numbers stored in zookeeper don't allow you to insert data into a table or drop/detach a partition." << std::endl; + show_usage(); + return 0; + } + + if (!options.count("zookeeper")) + { + std::cerr << "Option --zookeeper should be set." << std::endl; + show_usage(); return 1; } - std::string global_path = options.at("path").as(); + std::string root = options.count("path") ? options.at("path").as() : "/clickhouse/tables"; - zkutil::ZooKeeper zookeeper(options.at("address").as()); + std::vector shards, tables; + if (options.count("shard")) + boost::split(shards, options.at("shard").as(), boost::algorithm::is_any_of(",")); + if (options.count("table")) + boost::split(tables, options.at("table").as(), boost::algorithm::is_any_of(",")); - auto all_path = getAllTablesBlockPaths(zookeeper, global_path); - for (const auto & [path, max_block_num] : all_path) + /// Check if the adjusting of the block numbers is required. + std::cout << "Checking if adjusting of the block numbers is required:" << std::endl; + zkutil::ZooKeeper zookeeper(options.at("zookeeper").as()); + auto part_paths_with_max_block_numbers = getPartitionsNeedAdjustingBlockNumbers(zookeeper, root, shards, tables); + + if (part_paths_with_max_block_numbers.empty()) { - std::cerr << "Rotating on: " << path << std::endl; - rotateNodes(zookeeper, path, max_block_num); + std::cout << "No adjusting required." << std::endl; + return 0; } + + std::cout << "Required adjusting of " << part_paths_with_max_block_numbers.size() << " block numbers." << std::endl; + + /// Adjust the block numbers. + if (options.count("dry-run")) + { + std::cout << "This is a dry-run, exiting." << std::endl; + return 0; + } + + std::cout << std::endl << "Adjusting the block numbers:" << std::endl; + for (const auto & [part_path, max_block_number] : part_paths_with_max_block_numbers) + setCurrentBlockNumber(zookeeper, part_path, max_block_number + 1); + return 0; } catch (const Poco::Exception & e)