diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e35c05b442..0ed76689a5c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -182,6 +182,11 @@ else () set (CXX_FLAGS_INTERNAL_COMPILER "-std=c++1z") endif () +if (COMPILER_GCC OR COMPILER_CLANG) + # Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation") +endif () + option(WITH_COVERAGE "Build with coverage." 0) if(WITH_COVERAGE AND COMPILER_CLANG) set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping") diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 4840197c2fd..47f057c0559 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -15,7 +15,6 @@ ${JEMALLOC_SOURCE_DIR}/src/extent_mmap.c ${JEMALLOC_SOURCE_DIR}/src/hash.c ${JEMALLOC_SOURCE_DIR}/src/hook.c ${JEMALLOC_SOURCE_DIR}/src/jemalloc.c -${JEMALLOC_SOURCE_DIR}/src/jemalloc_cpp.cpp ${JEMALLOC_SOURCE_DIR}/src/large.c ${JEMALLOC_SOURCE_DIR}/src/log.c ${JEMALLOC_SOURCE_DIR}/src/malloc_io.c diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index bcb44b468d8..dced3ed7b93 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -384,6 +384,7 @@ endif() if (USE_JEMALLOC) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # used in Interpreters/AsynchronousMetrics.cpp + target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # new_delete.cpp endif () target_include_directories (dbms PUBLIC ${DBMS_INCLUDE_DIR} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src/Formats/include) diff --git a/dbms/src/AggregateFunctions/QuantileTiming.h b/dbms/src/AggregateFunctions/QuantileTiming.h index 131ca91dbbf..fbf4da725c0 100644 --- a/dbms/src/AggregateFunctions/QuantileTiming.h +++ b/dbms/src/AggregateFunctions/QuantileTiming.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -513,8 +512,6 @@ private: void mediumToLarge() { - CurrentMemoryTracker::alloc(sizeof(detail::QuantileTimingLarge)); - /// While the data is copied from medium, it is not possible to set `large` value (otherwise it will overwrite some data). detail::QuantileTimingLarge * tmp_large = new detail::QuantileTimingLarge; @@ -528,8 +525,6 @@ private: void tinyToLarge() { - CurrentMemoryTracker::alloc(sizeof(detail::QuantileTimingLarge)); - /// While the data is copied from `medium` it is not possible to set `large` value (otherwise it will overwrite some data). detail::QuantileTimingLarge * tmp_large = new detail::QuantileTimingLarge; @@ -562,8 +557,6 @@ public: else if (kind == Kind::Large) { delete large; - - CurrentMemoryTracker::free(sizeof(detail::QuantileTimingLarge)); } } diff --git a/dbms/src/Common/Allocator.h b/dbms/src/Common/Allocator.h index abaa5927e3d..e9569673678 100644 --- a/dbms/src/Common/Allocator.h +++ b/dbms/src/Common/Allocator.h @@ -108,13 +108,92 @@ class AllocatorWithHint : Hint { protected: static constexpr bool clear_memory = clear_memory_; + static constexpr size_t small_memory_threshold = mmap_threshold; public: /// Allocate memory range. void * alloc(size_t size, size_t alignment = 0) { CurrentMemoryTracker::alloc(size); + return allocNoTrack(size, alignment); + } + /// Free memory range. + void free(void * buf, size_t size) + { + freeNoTrack(buf, size); + CurrentMemoryTracker::free(size); + } + + /** Enlarge memory range. + * Data from old range is moved to the beginning of new range. + * Address of memory range could change. + */ + void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0) + { + if (old_size == new_size) + { + /// nothing to do. + /// BTW, it's not possible to change alignment while doing realloc. + } + else if (old_size < mmap_threshold && new_size < mmap_threshold && alignment <= MALLOC_MIN_ALIGNMENT) + { + /// Resize malloc'd memory region with no special alignment requirement. + CurrentMemoryTracker::realloc(old_size, new_size); + + void * new_buf = ::realloc(buf, new_size); + if (nullptr == new_buf) + DB::throwFromErrno("Allocator: Cannot realloc from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); + + buf = new_buf; + if constexpr (clear_memory) + if (new_size > old_size) + memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size); + } + else if (old_size >= mmap_threshold && new_size >= mmap_threshold) + { + /// Resize mmap'd memory region. + CurrentMemoryTracker::realloc(old_size, new_size); + + // On apple and freebsd self-implemented mremap used (common/mremap.h) + buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (MAP_FAILED == buf) + DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_MREMAP); + + /// No need for zero-fill, because mmap guarantees it. + } + else if (new_size < small_memory_threshold) + { + /// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once. + CurrentMemoryTracker::realloc(old_size, new_size); + + void * new_buf = allocNoTrack(new_size, alignment); + memcpy(new_buf, buf, std::min(old_size, new_size)); + freeNoTrack(buf, old_size); + buf = new_buf; + } + else + { + /// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods. + + void * new_buf = alloc(new_size, alignment); + memcpy(new_buf, buf, std::min(old_size, new_size)); + free(buf, old_size); + buf = new_buf; + } + + return buf; + } + +protected: + static constexpr size_t getStackThreshold() + { + return 0; + } + +private: + void * allocNoTrack(size_t size, size_t alignment) + { void * buf; if (size >= mmap_threshold) @@ -149,15 +228,14 @@ public: if (0 != res) DB::throwFromErrno("Cannot allocate memory (posix_memalign) " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res); - if (clear_memory) + if constexpr (clear_memory) memset(buf, 0, size); } } return buf; } - /// Free memory range. - void free(void * buf, size_t size) + void freeNoTrack(void * buf, size_t size) { if (size >= mmap_threshold) { @@ -168,63 +246,6 @@ public: { ::free(buf); } - - CurrentMemoryTracker::free(size); - } - - /** Enlarge memory range. - * Data from old range is moved to the beginning of new range. - * Address of memory range could change. - */ - void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0) - { - if (old_size == new_size) - { - /// nothing to do. - /// BTW, it's not possible to change alignment while doing realloc. - } - else if (old_size < mmap_threshold && new_size < mmap_threshold && alignment <= MALLOC_MIN_ALIGNMENT) - { - /// Resize malloc'd memory region with no special alignment requirement. - CurrentMemoryTracker::realloc(old_size, new_size); - - void * new_buf = ::realloc(buf, new_size); - if (nullptr == new_buf) - DB::throwFromErrno("Allocator: Cannot realloc from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY); - - buf = new_buf; - if (clear_memory && new_size > old_size) - memset(reinterpret_cast(buf) + old_size, 0, new_size - old_size); - } - else if (old_size >= mmap_threshold && new_size >= mmap_threshold) - { - /// Resize mmap'd memory region. - CurrentMemoryTracker::realloc(old_size, new_size); - - // On apple and freebsd self-implemented mremap used (common/mremap.h) - buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (MAP_FAILED == buf) - DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_MREMAP); - - /// No need for zero-fill, because mmap guarantees it. - } - else - { - /// All other cases that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods. - - void * new_buf = alloc(new_size, alignment); - memcpy(new_buf, buf, std::min(old_size, new_size)); - free(buf, old_size); - buf = new_buf; - } - - return buf; - } - -protected: - static constexpr size_t getStackThreshold() - { - return 0; } }; @@ -267,7 +288,7 @@ public: { if (size <= N) { - if (Base::clear_memory) + if constexpr (Base::clear_memory) memset(stack_memory, 0, N); return stack_memory; } diff --git a/dbms/src/Common/CombinedCardinalityEstimator.h b/dbms/src/Common/CombinedCardinalityEstimator.h index 824f0a8c018..e048e47cab5 100644 --- a/dbms/src/Common/CombinedCardinalityEstimator.h +++ b/dbms/src/Common/CombinedCardinalityEstimator.h @@ -3,7 +3,6 @@ #include #include #include -#include #include @@ -230,7 +229,6 @@ private: if (getContainerType() != details::ContainerType::SMALL) throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); - CurrentMemoryTracker::alloc(sizeof(Medium)); auto tmp_medium = std::make_unique(); for (const auto & x : small) @@ -247,7 +245,6 @@ private: if ((container_type != details::ContainerType::SMALL) && (container_type != details::ContainerType::MEDIUM)) throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); - CurrentMemoryTracker::alloc(sizeof(Large)); auto tmp_large = std::make_unique(); if (container_type == details::ContainerType::SMALL) @@ -277,15 +274,11 @@ private: { delete medium; medium = nullptr; - - CurrentMemoryTracker::free(sizeof(Medium)); } else if (container_type == details::ContainerType::LARGE) { delete large; large = nullptr; - - CurrentMemoryTracker::free(sizeof(Large)); } } diff --git a/dbms/src/Common/CurrentThread.cpp b/dbms/src/Common/CurrentThread.cpp index 78dc57cb26b..84d63a04b96 100644 --- a/dbms/src/Common/CurrentThread.cpp +++ b/dbms/src/Common/CurrentThread.cpp @@ -46,6 +46,12 @@ MemoryTracker * CurrentThread::getMemoryTracker() return ¤t_thread->memory_tracker; } +Int64 & CurrentThread::getUntrackedMemory() +{ + /// It assumes that (current_thread != nullptr) is already checked with getMemoryTracker() + return current_thread->untracked_memory; +} + void CurrentThread::updateProgressIn(const Progress & value) { if (unlikely(!current_thread)) diff --git a/dbms/src/Common/CurrentThread.h b/dbms/src/Common/CurrentThread.h index 645c7f1d561..685ac879530 100644 --- a/dbms/src/Common/CurrentThread.h +++ b/dbms/src/Common/CurrentThread.h @@ -48,6 +48,7 @@ public: static ProfileEvents::Counters & getProfileEvents(); static MemoryTracker * getMemoryTracker(); + static Int64 & getUntrackedMemory(); /// Update read and write rows (bytes) statistics (used in system.query_thread_log) static void updateProgressIn(const Progress & value); diff --git a/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h b/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h index 836fbda222e..548b745cb6e 100644 --- a/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h +++ b/dbms/src/Common/HyperLogLogWithSmallSetOptimization.h @@ -4,7 +4,6 @@ #include #include -#include namespace DB @@ -39,8 +38,6 @@ private: void toLarge() { - CurrentMemoryTracker::alloc(sizeof(Large)); - /// At the time of copying data from `tiny`, setting the value of `large` is still not possible (otherwise it will overwrite some data). Large * tmp_large = new Large; @@ -56,11 +53,7 @@ public: ~HyperLogLogWithSmallSetOptimization() { if (isLarge()) - { delete large; - - CurrentMemoryTracker::free(sizeof(Large)); - } } void insert(Key value) diff --git a/dbms/src/Common/MemoryTracker.cpp b/dbms/src/Common/MemoryTracker.cpp index bc324be4904..b3d661d95ee 100644 --- a/dbms/src/Common/MemoryTracker.cpp +++ b/dbms/src/Common/MemoryTracker.cpp @@ -1,3 +1,5 @@ +#include + #include "MemoryTracker.h" #include #include @@ -17,6 +19,8 @@ namespace DB static constexpr size_t log_peak_memory_usage_every = 1ULL << 30; +/// Each thread could new/delete memory in range of (-untracked_memory_limit, untracked_memory_limit) without access to common counters. +static constexpr Int64 untracked_memory_limit = 4 * 1024 * 1024; MemoryTracker::~MemoryTracker() @@ -85,6 +89,9 @@ void MemoryTracker::alloc(Int64 size) { free(size); + /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc + auto untrack_lock = blocker.cancel(); + std::stringstream message; message << "Memory tracker"; if (description) @@ -100,6 +107,9 @@ void MemoryTracker::alloc(Int64 size) { free(size); + /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc + auto untrack_lock = blocker.cancel(); + std::stringstream message; message << "Memory limit"; if (description) @@ -191,19 +201,41 @@ namespace CurrentMemoryTracker void alloc(Int64 size) { if (auto memory_tracker = DB::CurrentThread::getMemoryTracker()) - memory_tracker->alloc(size); + { + Int64 & untracked = DB::CurrentThread::getUntrackedMemory(); + untracked += size; + if (untracked > untracked_memory_limit) + { + /// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes + /// more. It could be usefull for enlarge Exception message in rethrow logic. + Int64 tmp = untracked; + untracked = 0; + memory_tracker->alloc(tmp); + } + } } void realloc(Int64 old_size, Int64 new_size) { - if (auto memory_tracker = DB::CurrentThread::getMemoryTracker()) - memory_tracker->alloc(new_size - old_size); + Int64 addition = new_size - old_size; + if (addition > 0) + alloc(addition); + else + free(-addition); } void free(Int64 size) { if (auto memory_tracker = DB::CurrentThread::getMemoryTracker()) - memory_tracker->free(size); + { + Int64 & untracked = DB::CurrentThread::getUntrackedMemory(); + untracked -= size; + if (untracked < -untracked_memory_limit) + { + memory_tracker->free(-untracked); + untracked = 0; + } + } } } diff --git a/dbms/src/Common/MemoryTracker.h b/dbms/src/Common/MemoryTracker.h index 9f439c7550c..4ce0ac262fa 100644 --- a/dbms/src/Common/MemoryTracker.h +++ b/dbms/src/Common/MemoryTracker.h @@ -45,7 +45,11 @@ public: void realloc(Int64 old_size, Int64 new_size) { - alloc(new_size - old_size); + Int64 addition = new_size - old_size; + if (addition > 0) + alloc(addition); + else + free(-addition); } /** This function should be called after memory deallocation. diff --git a/dbms/src/Common/TaskStatsInfoGetter.cpp b/dbms/src/Common/TaskStatsInfoGetter.cpp index 35a68c5a90c..b361161483a 100644 --- a/dbms/src/Common/TaskStatsInfoGetter.cpp +++ b/dbms/src/Common/TaskStatsInfoGetter.cpp @@ -34,6 +34,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +// Replace NLMSG_OK with explicit casts since that system macro contains signedness bugs which are not going to be fixed. +static inline bool is_nlmsg_ok(const struct nlmsghdr * const nlh, const ssize_t len) +{ + return len >= static_cast(sizeof(*nlh)) && nlh->nlmsg_len >= sizeof(*nlh) && static_cast(len) >= nlh->nlmsg_len; +} namespace { @@ -128,7 +133,7 @@ struct NetlinkMessage if (header.nlmsg_type == NLMSG_ERROR) throw Exception("Can't receive Netlink response: error " + std::to_string(error.error), ErrorCodes::NETLINK_ERROR); - if (!NLMSG_OK((&header), bytes_received)) + if (!is_nlmsg_ok(&header, bytes_received)) throw Exception("Can't receive Netlink response: wrong number of bytes received", ErrorCodes::NETLINK_ERROR); } }; diff --git a/dbms/src/Common/ThreadStatus.cpp b/dbms/src/Common/ThreadStatus.cpp index bfc61868f7d..e5fe5d6f23b 100644 --- a/dbms/src/Common/ThreadStatus.cpp +++ b/dbms/src/Common/ThreadStatus.cpp @@ -50,6 +50,19 @@ ThreadStatus::ThreadStatus() ThreadStatus::~ThreadStatus() { + try + { + if (untracked_memory > 0) + memory_tracker.alloc(untracked_memory); + else + memory_tracker.free(-untracked_memory); + } + catch (const DB::Exception &) + { + /// It's a minor tracked memory leak here (not the memory itself but it's counter). + /// We've already allocated a little bit more then the limit and cannot track it in the thread memory tracker or its parent. + } + if (deleter) deleter(); current_thread = nullptr; diff --git a/dbms/src/Common/ThreadStatus.h b/dbms/src/Common/ThreadStatus.h index 77c816b2b2b..fcfb3e0e19f 100644 --- a/dbms/src/Common/ThreadStatus.h +++ b/dbms/src/Common/ThreadStatus.h @@ -96,6 +96,8 @@ public: /// TODO: merge them into common entity ProfileEvents::Counters performance_counters{VariableContext::Thread}; MemoryTracker memory_tracker{VariableContext::Thread}; + /// Small amount of untracked memory (per thread atomic-less counter) + Int64 untracked_memory = 0; /// Statistics of read and write rows/bytes Progress progress_in; diff --git a/dbms/src/Common/new_delete.cpp b/dbms/src/Common/new_delete.cpp new file mode 100644 index 00000000000..d9140f9459d --- /dev/null +++ b/dbms/src/Common/new_delete.cpp @@ -0,0 +1,142 @@ +#include + +#include +#include + +/// Replace default new/delete with memory tracking versions. +/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new +/// https://en.cppreference.com/w/cpp/memory/new/operator_delete +#if NOT_UNBUNDLED + +namespace Memory +{ + +ALWAYS_INLINE void trackMemory(std::size_t size) +{ +#if USE_JEMALLOC + /// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function + /// @note je_mallocx() != je_malloc(). It's expected they don't differ much in allocation logic. + if (likely(size != 0)) + CurrentMemoryTracker::alloc(nallocx(size, 0)); +#else + CurrentMemoryTracker::alloc(size); +#endif +} + +ALWAYS_INLINE bool trackMemoryNoExept(std::size_t size) noexcept +{ + try + { + trackMemory(size); + } + catch (...) + { + return false; + } + + return true; +} + +ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept +{ + try + { +#if USE_JEMALLOC + /// @note It's also possible to use je_malloc_usable_size() here. + if (likely(ptr != nullptr)) + CurrentMemoryTracker::free(sallocx(ptr, 0)); +#else + if (size) + CurrentMemoryTracker::free(size); +#endif + } + catch (...) + {} +} + +} + +/// new + +void * operator new(std::size_t size) +{ + Memory::trackMemory(size); + return Memory::newImpl(size); +} + +void * operator new[](std::size_t size) +{ + Memory::trackMemory(size); + return Memory::newImpl(size); +} + +void * operator new(std::size_t size, const std::nothrow_t &) noexcept +{ + if (likely(Memory::trackMemoryNoExept(size))) + return Memory::newNoExept(size); + return nullptr; +} + +void * operator new[](std::size_t size, const std::nothrow_t &) noexcept +{ + if (likely(Memory::trackMemoryNoExept(size))) + return Memory::newNoExept(size); + return nullptr; +} + +/// delete + +/// C++17 std 21.6.2.1 (11) +/// If a function without a size parameter is defined, the program should also define the corresponding function with a size parameter. +/// If a function with a size parameter is defined, the program shall also define the corresponding version without the size parameter. + +/// cppreference: +/// It's unspecified whether size-aware or size-unaware version is called when deleting objects of +/// incomplete type and arrays of non-class and trivially-destructible class types. + +void operator delete(void * ptr) noexcept +{ + Memory::untrackMemory(ptr); + Memory::deleteImpl(ptr); +} + +void operator delete[](void * ptr) noexcept +{ + Memory::untrackMemory(ptr); + Memory::deleteImpl(ptr); +} + +void operator delete(void * ptr, std::size_t size) noexcept +{ + Memory::untrackMemory(ptr, size); + Memory::deleteSized(ptr, size); +} + +void operator delete[](void * ptr, std::size_t size) noexcept +{ + Memory::untrackMemory(ptr, size); + Memory::deleteSized(ptr, size); +} + +#else + +/// new + +void * operator new(std::size_t size) { return Memory::newImpl(size); } +void * operator new[](std::size_t size) { return Memory::newImpl(size); } + +void * operator new(std::size_t size, const std::nothrow_t &) noexcept { return Memory::newNoExept(size); } +void * operator new[](std::size_t size, const std::nothrow_t &) noexcept { return Memory::newNoExept(size); } + +/// delete + +void operator delete(void * ptr) noexcept { Memory::deleteImpl(ptr); } +void operator delete[](void * ptr) noexcept { Memory::deleteImpl(ptr); } + +void operator delete(void * ptr, const std::nothrow_t &) noexcept { Memory::deleteImpl(ptr); } +void operator delete[](void * ptr, const std::nothrow_t &) noexcept { Memory::deleteImpl(ptr); } + +void operator delete(void * ptr, std::size_t size) noexcept { Memory::deleteSized(ptr, size); } +void operator delete[](void * ptr, std::size_t size) noexcept { Memory::deleteSized(ptr, size); } + +#endif diff --git a/dbms/tests/performance/joins_in_memory.xml b/dbms/tests/performance/joins_in_memory.xml index 23b009a6027..1da400c48f4 100644 --- a/dbms/tests/performance/joins_in_memory.xml +++ b/dbms/tests/performance/joins_in_memory.xml @@ -13,11 +13,11 @@ CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory - INSERT INTO ints SELECT number AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000) - INSERT INTO ints SELECT 10000 + number % 1000 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000) - INSERT INTO ints SELECT 20000 + number % 100 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000) - INSERT INTO ints SELECT 30000 + number % 10 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000) - INSERT INTO ints SELECT 40000 + number % 1 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000) + INSERT INTO ints SELECT number AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) + INSERT INTO ints SELECT 10000 + number % 1000 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) + INSERT INTO ints SELECT 20000 + number % 100 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) + INSERT INTO ints SELECT 30000 + number % 10 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) + INSERT INTO ints SELECT 40000 + number % 1 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000) SELECT COUNT() FROM ints l ANY LEFT JOIN ints r USING i64 WHERE i32 = 200042 SELECT COUNT() FROM ints l ANY LEFT JOIN ints r USING i64,i32,i16,i8 WHERE i32 = 200042 diff --git a/dbms/tests/queries/0_stateless/00877_memory_limit_for_new_delete.reference b/dbms/tests/queries/0_stateless/00877_memory_limit_for_new_delete.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00877_memory_limit_for_new_delete.sql b/dbms/tests/queries/0_stateless/00877_memory_limit_for_new_delete.sql new file mode 100644 index 00000000000..111104bb06e --- /dev/null +++ b/dbms/tests/queries/0_stateless/00877_memory_limit_for_new_delete.sql @@ -0,0 +1,7 @@ +SET max_memory_usage = 1000000000; + +SELECT sum(ignore(*)) FROM ( + SELECT number, argMax(number, (number, toFixedString(toString(number), 1024))) + FROM numbers(1000000) + GROUP BY number +) -- { serverError 241 } diff --git a/docs/en/query_language/functions/ext_dict_functions.md b/docs/en/query_language/functions/ext_dict_functions.md index 017d941b9f6..6494a2b643e 100644 --- a/docs/en/query_language/functions/ext_dict_functions.md +++ b/docs/en/query_language/functions/ext_dict_functions.md @@ -96,7 +96,7 @@ LIMIT 3 Checks whether the dictionary has the key. ``` -dictHas('dict_name', id) +dictHas('dict_name', id_expr) ``` **Parameters** @@ -116,7 +116,7 @@ Type: `UInt8`. For the hierarchical dictionary, returns an array of dictionary keys starting from passed `id_expr` and continuing along the chain of parent elements. ``` -dictGetHierarchy('dict_name', id) +dictGetHierarchy('dict_name', id_expr) ``` **Parameters** diff --git a/docs/ru/query_language/functions/ext_dict_functions.md b/docs/ru/query_language/functions/ext_dict_functions.md index 8901292aeb2..3fb4a110e88 100644 --- a/docs/ru/query_language/functions/ext_dict_functions.md +++ b/docs/ru/query_language/functions/ext_dict_functions.md @@ -1,40 +1,192 @@ # Функции для работы с внешними словарями {#ext_dict_functions} -Информация о подключении и настройке внешних словарей смотрите в разделе [Внешние словари](../dicts/external_dicts.md). +Для получения информации о подключении и настройке, читайте раздел про [внешние словари](../dicts/external_dicts.md). -## dictGetUInt8, dictGetUInt16, dictGetUInt32, dictGetUInt64 +## dictGet -## dictGetInt8, dictGetInt16, dictGetInt32, dictGetInt64 +Получение значения из внешнего словаря. -## dictGetFloat32, dictGetFloat64 +``` +dictGet('dict_name', 'attr_name', id_expr) +dictGetOrDefault('dict_name', 'attr_name', id_expr, default_value_expr) +``` -## dictGetDate, dictGetDateTime +**Параметры** -## dictGetUUID +- `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal). +- `attr_name` — Название колонки словаря. [Строковый литерал](../syntax.md#syntax-string-literal). +- `id_expr` — Значение ключа. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md) или [Tuple](../../data_types/tuple.md) в зависимости от конфигурации словаря. +- `default_value_expr` — Значение которое возвращается, если словарь не содержит колонку с ключом `id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращает значение такого же типа, что и у атрибута `attr_name`. -## dictGetString -`dictGetT('dict_name', 'attr_name', id)` -- получить из словаря dict_name значение атрибута attr_name по ключу id. -`dict_name` и `attr_name` - константные строки. -`id` должен иметь тип UInt64. -Если ключа `id` нет в словаре - вернуть значение по умолчанию, заданное в описании словаря. +**Возвращаемое значение** -## dictGetTOrDefault +- Если ClickHouse успешно обрабатывает атрибут в соотвествии с указаным [типом данных](../dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes), то функция возвращает значение для заданного ключа `id_expr`. +- Если запрашиваемого `id_expr` не оказалось в словаре: -`dictGetT('dict_name', 'attr_name', id, default)` + - `dictGet` возвратит содержимое элемента `` определенного в настройках словаря. + - `dictGetOrDefault` вернет значение переданного `default_value_expr` параметра. -Аналогично функциям `dictGetT`, но значение по умолчанию берётся из последнего аргумента функции. +ClickHouse бросает исключение, если не может обработать значение атрибута или значение несопоставимо с типом атрибута. -## dictIsIn -`dictIsIn('dict_name', child_id, ancestor_id)` -- для иерархического словаря dict_name - узнать, находится ли ключ child_id внутри ancestor_id (или совпадает с ancestor_id). Возвращает UInt8. +**Пример использования** + +Создайте файл `ext-dict-text.csv` со следующим содержимым: + +```text +1,1 +2,2 +``` + +Первая колонка - это `id`, вторая - `c1` + +Конфигурация внешнего словаря: + +```xml + + + ext-dict-test + + + /path-to/ext-dict-test.csv + CSV + + + + + + + + id + + + c1 + UInt32 + + + + 0 + + +``` + +Выполните запрос: + +```sql +SELECT + dictGetOrDefault('ext-dict-test', 'c1', number + 1, toUInt32(number * 10)) AS val, + toТипName(val) AS Type +FROM system.numbers +LIMIT 3 +``` +```text +┌─val─┬─type───┐ +│ 1 │ UInt32 │ +│ 2 │ UInt32 │ +│ 20 │ UInt32 │ +└─────┴────────┘ +``` + +**Смотрите также** + +- [Внешние словари](../dicts/external_dicts.md) -## dictGetHierarchy -`dictGetHierarchy('dict_name', id)` -- для иерархического словаря dict_name - вернуть массив ключей словаря, начиная с id и продолжая цепочкой родительских элементов. Возвращает Array(UInt64). ## dictHas -`dictHas('dict_name', id)` -- проверить наличие ключа в словаре. Возвращает значение типа UInt8, равное 0, если ключа нет и 1, если ключ есть. + +Проверяет наличие записи с заданным ключом в словаре. + +``` +dictHas('dict_name', id_expr) +``` + +**Параметры** + +- `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal). +- `id_expr` — Значение ключа. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md). + +**Возвращаемое значение** + +- 0, если ключ не был обнаружен +- 1, если ключ присутствует в словаре + +Тип: `UInt8`. + +## dictGetHierarchy + +Для иерархических словарей возвращает массив ключей, содержащий ключ `id_expr` и все ключи родительских элементов по цепочке. + +``` +dictGetHierarchy('dict_name', id_expr) +``` + +**Параметры** + +- `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal). +- `id_expr` — Значение ключа. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md). + +**Возвращаемое значение** + +Иерархию ключей словаря. + +Тип: [Array(UInt64)](../../data_types/array.md). + +## dictIsIn + +Осуществляет проверку - является ли ключ родительским во всей иерархической цепочке словаря. + +`dictIsIn ('dict_name', child_id_expr, ancestor_id_expr)` + +**Параметры** + +- `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal). +- `child_id_expr` — Ключ который должен быть проверен. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md). +- `ancestor_id_expr` — Родительский ключ для ключа `child_id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md). + +**Возвращаемое значение** + +- 0, если `child_id_expr` не является потомком для `ancestor_id_expr`. +- 1, если `child_id_expr` является потомком для `ancestor_id_expr` или если `child_id_expr` равен `ancestor_id_expr`. + +Тип: `UInt8`. + +## Другие функции {#ext_dict_functions-other} + +ClickHouse поддерживает специализированные функции для конвертации значений атрибутов словаря к определенному типу, независимо от настроек словаря. + +Функции: + +- `dictGetInt8`, `dictGetInt16`, `dictGetInt32`, `dictGetInt64` +- `dictGetUInt8`, `dictGetUInt16`, `dictGetUInt32`, `dictGetUInt64` +- `dictGetFloat32`, `dictGetFloat64` +- `dictGetDate` +- `dictGetDateTime` +- `dictGetUUID` +- `dictGetString` + +Все эти функции имеют так же `OrDefault` версию. Например, `dictGetDateOrDefault`. + +Синтаксис: + +``` +dictGet[Тип]('dict_name', 'attr_name', id_expr) +dictGet[Тип]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr) +``` + +**Параметры** + +- `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal). +- `attr_name` — Название колонки словаря. [Строковый литерал](../syntax.md#syntax-string-literal). +- `id_expr` — Значение ключа. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md). +- `default_value_expr` — Значение которое возвращается, если словарь не содержит строку с ключом `id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращает значение с таким же типом, что и тип атрибута `attr_name`. + +**Возвращаемое значение** + +- Если ClickHouse успешно обрабатывает атрибут в соотвествии с указаным [типом данных](../dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes),то функция возвращает значение для заданного ключа `id_expr`. +- Если запрашиваемого `id_expr` не оказалось в словаре: + + - `dictGet[Тип]` возвратит содержимое элемента `` определенного в настройках словаря. + - `dictGet[Тип]OrDefault` вернет значение переданного `default_value_expr` параметра. + +ClickHouse бросает исключение, если не может обработать значение атрибута или значение несопоставимо с типом атрибута [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/ext_dict_functions/) diff --git a/libs/libcommon/include/common/config_common.h.in b/libs/libcommon/include/common/config_common.h.in index 0cc0950efba..247afd87aea 100644 --- a/libs/libcommon/include/common/config_common.h.in +++ b/libs/libcommon/include/common/config_common.h.in @@ -7,3 +7,4 @@ #cmakedefine01 USE_READLINE #cmakedefine01 USE_LIBEDIT #cmakedefine01 HAVE_READLINE_HISTORY +#cmakedefine01 NOT_UNBUNDLED diff --git a/libs/libcommon/include/common/memory.h b/libs/libcommon/include/common/memory.h new file mode 100644 index 00000000000..d8dced79cfb --- /dev/null +++ b/libs/libcommon/include/common/memory.h @@ -0,0 +1,65 @@ +#pragma once + +#include +#include + +#if __has_include() +#include +#endif + +#if USE_JEMALLOC +#include + +#if JEMALLOC_VERSION_MAJOR < 4 + #undef USE_JEMALLOC + #define USE_JEMALLOC 0 + #include +#endif +#endif + +#define ALWAYS_INLINE inline __attribute__((__always_inline__)) +#define NO_INLINE __attribute__((__noinline__)) + +namespace Memory +{ + +ALWAYS_INLINE void * newImpl(std::size_t size) +{ + auto * ptr = malloc(size); + if (likely(ptr != nullptr)) + return ptr; + + /// @note no std::get_new_handler logic implemented + throw std::bad_alloc{}; +} + +ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept +{ + return malloc(size); +} + +ALWAYS_INLINE void deleteImpl(void * ptr) noexcept +{ + free(ptr); +} + +#if USE_JEMALLOC + +ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept +{ + if (unlikely(ptr == nullptr)) + return; + + sdallocx(ptr, size, 0); +} + +#else + +ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept +{ + free(ptr); +} + +#endif + +}