Merge remote-tracking branch 'upstream/master' into fix25

This commit is contained in:
proller 2019-07-19 13:10:10 +03:00
commit 574fe9d59f
22 changed files with 554 additions and 119 deletions

View File

@ -182,6 +182,11 @@ else ()
set (CXX_FLAGS_INTERNAL_COMPILER "-std=c++1z") set (CXX_FLAGS_INTERNAL_COMPILER "-std=c++1z")
endif () endif ()
if (COMPILER_GCC OR COMPILER_CLANG)
# Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation")
endif ()
option(WITH_COVERAGE "Build with coverage." 0) option(WITH_COVERAGE "Build with coverage." 0)
if(WITH_COVERAGE AND COMPILER_CLANG) if(WITH_COVERAGE AND COMPILER_CLANG)
set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping") set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping")

View File

@ -15,7 +15,6 @@ ${JEMALLOC_SOURCE_DIR}/src/extent_mmap.c
${JEMALLOC_SOURCE_DIR}/src/hash.c ${JEMALLOC_SOURCE_DIR}/src/hash.c
${JEMALLOC_SOURCE_DIR}/src/hook.c ${JEMALLOC_SOURCE_DIR}/src/hook.c
${JEMALLOC_SOURCE_DIR}/src/jemalloc.c ${JEMALLOC_SOURCE_DIR}/src/jemalloc.c
${JEMALLOC_SOURCE_DIR}/src/jemalloc_cpp.cpp
${JEMALLOC_SOURCE_DIR}/src/large.c ${JEMALLOC_SOURCE_DIR}/src/large.c
${JEMALLOC_SOURCE_DIR}/src/log.c ${JEMALLOC_SOURCE_DIR}/src/log.c
${JEMALLOC_SOURCE_DIR}/src/malloc_io.c ${JEMALLOC_SOURCE_DIR}/src/malloc_io.c

View File

@ -384,6 +384,7 @@ endif()
if (USE_JEMALLOC) if (USE_JEMALLOC)
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # used in Interpreters/AsynchronousMetrics.cpp target_include_directories (dbms SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # used in Interpreters/AsynchronousMetrics.cpp
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${JEMALLOC_INCLUDE_DIR}) # new_delete.cpp
endif () endif ()
target_include_directories (dbms PUBLIC ${DBMS_INCLUDE_DIR} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src/Formats/include) target_include_directories (dbms PUBLIC ${DBMS_INCLUDE_DIR} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/src/Formats/include)

View File

@ -1,7 +1,6 @@
#pragma once #pragma once
#include <Common/HashTable/Hash.h> #include <Common/HashTable/Hash.h>
#include <Common/MemoryTracker.h>
#include <Common/PODArray.h> #include <Common/PODArray.h>
#include <IO/ReadBuffer.h> #include <IO/ReadBuffer.h>
#include <IO/WriteBuffer.h> #include <IO/WriteBuffer.h>
@ -513,8 +512,6 @@ private:
void mediumToLarge() void mediumToLarge()
{ {
CurrentMemoryTracker::alloc(sizeof(detail::QuantileTimingLarge));
/// While the data is copied from medium, it is not possible to set `large` value (otherwise it will overwrite some data). /// While the data is copied from medium, it is not possible to set `large` value (otherwise it will overwrite some data).
detail::QuantileTimingLarge * tmp_large = new detail::QuantileTimingLarge; detail::QuantileTimingLarge * tmp_large = new detail::QuantileTimingLarge;
@ -528,8 +525,6 @@ private:
void tinyToLarge() void tinyToLarge()
{ {
CurrentMemoryTracker::alloc(sizeof(detail::QuantileTimingLarge));
/// While the data is copied from `medium` it is not possible to set `large` value (otherwise it will overwrite some data). /// While the data is copied from `medium` it is not possible to set `large` value (otherwise it will overwrite some data).
detail::QuantileTimingLarge * tmp_large = new detail::QuantileTimingLarge; detail::QuantileTimingLarge * tmp_large = new detail::QuantileTimingLarge;
@ -562,8 +557,6 @@ public:
else if (kind == Kind::Large) else if (kind == Kind::Large)
{ {
delete large; delete large;
CurrentMemoryTracker::free(sizeof(detail::QuantileTimingLarge));
} }
} }

View File

@ -108,13 +108,92 @@ class AllocatorWithHint : Hint
{ {
protected: protected:
static constexpr bool clear_memory = clear_memory_; static constexpr bool clear_memory = clear_memory_;
static constexpr size_t small_memory_threshold = mmap_threshold;
public: public:
/// Allocate memory range. /// Allocate memory range.
void * alloc(size_t size, size_t alignment = 0) void * alloc(size_t size, size_t alignment = 0)
{ {
CurrentMemoryTracker::alloc(size); CurrentMemoryTracker::alloc(size);
return allocNoTrack(size, alignment);
}
/// Free memory range.
void free(void * buf, size_t size)
{
freeNoTrack(buf, size);
CurrentMemoryTracker::free(size);
}
/** Enlarge memory range.
* Data from old range is moved to the beginning of new range.
* Address of memory range could change.
*/
void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0)
{
if (old_size == new_size)
{
/// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
}
else if (old_size < mmap_threshold && new_size < mmap_threshold && alignment <= MALLOC_MIN_ALIGNMENT)
{
/// Resize malloc'd memory region with no special alignment requirement.
CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
DB::throwFromErrno("Allocator: Cannot realloc from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
buf = new_buf;
if constexpr (clear_memory)
if (new_size > old_size)
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
}
else if (old_size >= mmap_threshold && new_size >= mmap_threshold)
{
/// Resize mmap'd memory region.
CurrentMemoryTracker::realloc(old_size, new_size);
// On apple and freebsd self-implemented mremap used (common/mremap.h)
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_MREMAP);
/// No need for zero-fill, because mmap guarantees it.
}
else if (new_size < small_memory_threshold)
{
/// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = allocNoTrack(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
freeNoTrack(buf, old_size);
buf = new_buf;
}
else
{
/// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
void * new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
free(buf, old_size);
buf = new_buf;
}
return buf;
}
protected:
static constexpr size_t getStackThreshold()
{
return 0;
}
private:
void * allocNoTrack(size_t size, size_t alignment)
{
void * buf; void * buf;
if (size >= mmap_threshold) if (size >= mmap_threshold)
@ -149,15 +228,14 @@ public:
if (0 != res) if (0 != res)
DB::throwFromErrno("Cannot allocate memory (posix_memalign) " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res); DB::throwFromErrno("Cannot allocate memory (posix_memalign) " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
if (clear_memory) if constexpr (clear_memory)
memset(buf, 0, size); memset(buf, 0, size);
} }
} }
return buf; return buf;
} }
/// Free memory range. void freeNoTrack(void * buf, size_t size)
void free(void * buf, size_t size)
{ {
if (size >= mmap_threshold) if (size >= mmap_threshold)
{ {
@ -168,63 +246,6 @@ public:
{ {
::free(buf); ::free(buf);
} }
CurrentMemoryTracker::free(size);
}
/** Enlarge memory range.
* Data from old range is moved to the beginning of new range.
* Address of memory range could change.
*/
void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0)
{
if (old_size == new_size)
{
/// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
}
else if (old_size < mmap_threshold && new_size < mmap_threshold && alignment <= MALLOC_MIN_ALIGNMENT)
{
/// Resize malloc'd memory region with no special alignment requirement.
CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
DB::throwFromErrno("Allocator: Cannot realloc from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
buf = new_buf;
if (clear_memory && new_size > old_size)
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
}
else if (old_size >= mmap_threshold && new_size >= mmap_threshold)
{
/// Resize mmap'd memory region.
CurrentMemoryTracker::realloc(old_size, new_size);
// On apple and freebsd self-implemented mremap used (common/mremap.h)
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_MREMAP);
/// No need for zero-fill, because mmap guarantees it.
}
else
{
/// All other cases that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
void * new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
free(buf, old_size);
buf = new_buf;
}
return buf;
}
protected:
static constexpr size_t getStackThreshold()
{
return 0;
} }
}; };
@ -267,7 +288,7 @@ public:
{ {
if (size <= N) if (size <= N)
{ {
if (Base::clear_memory) if constexpr (Base::clear_memory)
memset(stack_memory, 0, N); memset(stack_memory, 0, N);
return stack_memory; return stack_memory;
} }

View File

@ -3,7 +3,6 @@
#include <Common/HashTable/SmallTable.h> #include <Common/HashTable/SmallTable.h>
#include <Common/HashTable/HashSet.h> #include <Common/HashTable/HashSet.h>
#include <Common/HyperLogLogCounter.h> #include <Common/HyperLogLogCounter.h>
#include <Common/MemoryTracker.h>
#include <Core/Defines.h> #include <Core/Defines.h>
@ -230,7 +229,6 @@ private:
if (getContainerType() != details::ContainerType::SMALL) if (getContainerType() != details::ContainerType::SMALL)
throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR);
CurrentMemoryTracker::alloc(sizeof(Medium));
auto tmp_medium = std::make_unique<Medium>(); auto tmp_medium = std::make_unique<Medium>();
for (const auto & x : small) for (const auto & x : small)
@ -247,7 +245,6 @@ private:
if ((container_type != details::ContainerType::SMALL) && (container_type != details::ContainerType::MEDIUM)) if ((container_type != details::ContainerType::SMALL) && (container_type != details::ContainerType::MEDIUM))
throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR); throw Poco::Exception("Internal error", ErrorCodes::LOGICAL_ERROR);
CurrentMemoryTracker::alloc(sizeof(Large));
auto tmp_large = std::make_unique<Large>(); auto tmp_large = std::make_unique<Large>();
if (container_type == details::ContainerType::SMALL) if (container_type == details::ContainerType::SMALL)
@ -277,15 +274,11 @@ private:
{ {
delete medium; delete medium;
medium = nullptr; medium = nullptr;
CurrentMemoryTracker::free(sizeof(Medium));
} }
else if (container_type == details::ContainerType::LARGE) else if (container_type == details::ContainerType::LARGE)
{ {
delete large; delete large;
large = nullptr; large = nullptr;
CurrentMemoryTracker::free(sizeof(Large));
} }
} }

View File

@ -46,6 +46,12 @@ MemoryTracker * CurrentThread::getMemoryTracker()
return &current_thread->memory_tracker; return &current_thread->memory_tracker;
} }
Int64 & CurrentThread::getUntrackedMemory()
{
/// It assumes that (current_thread != nullptr) is already checked with getMemoryTracker()
return current_thread->untracked_memory;
}
void CurrentThread::updateProgressIn(const Progress & value) void CurrentThread::updateProgressIn(const Progress & value)
{ {
if (unlikely(!current_thread)) if (unlikely(!current_thread))

View File

@ -48,6 +48,7 @@ public:
static ProfileEvents::Counters & getProfileEvents(); static ProfileEvents::Counters & getProfileEvents();
static MemoryTracker * getMemoryTracker(); static MemoryTracker * getMemoryTracker();
static Int64 & getUntrackedMemory();
/// Update read and write rows (bytes) statistics (used in system.query_thread_log) /// Update read and write rows (bytes) statistics (used in system.query_thread_log)
static void updateProgressIn(const Progress & value); static void updateProgressIn(const Progress & value);

View File

@ -4,7 +4,6 @@
#include <Common/HyperLogLogCounter.h> #include <Common/HyperLogLogCounter.h>
#include <Common/HashTable/SmallTable.h> #include <Common/HashTable/SmallTable.h>
#include <Common/MemoryTracker.h>
namespace DB namespace DB
@ -39,8 +38,6 @@ private:
void toLarge() void toLarge()
{ {
CurrentMemoryTracker::alloc(sizeof(Large));
/// At the time of copying data from `tiny`, setting the value of `large` is still not possible (otherwise it will overwrite some data). /// At the time of copying data from `tiny`, setting the value of `large` is still not possible (otherwise it will overwrite some data).
Large * tmp_large = new Large; Large * tmp_large = new Large;
@ -56,11 +53,7 @@ public:
~HyperLogLogWithSmallSetOptimization() ~HyperLogLogWithSmallSetOptimization()
{ {
if (isLarge()) if (isLarge())
{
delete large; delete large;
CurrentMemoryTracker::free(sizeof(Large));
}
} }
void insert(Key value) void insert(Key value)

View File

@ -1,3 +1,5 @@
#include <cstdlib>
#include "MemoryTracker.h" #include "MemoryTracker.h"
#include <common/likely.h> #include <common/likely.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
@ -17,6 +19,8 @@ namespace DB
static constexpr size_t log_peak_memory_usage_every = 1ULL << 30; static constexpr size_t log_peak_memory_usage_every = 1ULL << 30;
/// Each thread could new/delete memory in range of (-untracked_memory_limit, untracked_memory_limit) without access to common counters.
static constexpr Int64 untracked_memory_limit = 4 * 1024 * 1024;
MemoryTracker::~MemoryTracker() MemoryTracker::~MemoryTracker()
@ -85,6 +89,9 @@ void MemoryTracker::alloc(Int64 size)
{ {
free(size); free(size);
/// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
auto untrack_lock = blocker.cancel();
std::stringstream message; std::stringstream message;
message << "Memory tracker"; message << "Memory tracker";
if (description) if (description)
@ -100,6 +107,9 @@ void MemoryTracker::alloc(Int64 size)
{ {
free(size); free(size);
/// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
auto untrack_lock = blocker.cancel();
std::stringstream message; std::stringstream message;
message << "Memory limit"; message << "Memory limit";
if (description) if (description)
@ -191,19 +201,41 @@ namespace CurrentMemoryTracker
void alloc(Int64 size) void alloc(Int64 size)
{ {
if (auto memory_tracker = DB::CurrentThread::getMemoryTracker()) if (auto memory_tracker = DB::CurrentThread::getMemoryTracker())
memory_tracker->alloc(size); {
Int64 & untracked = DB::CurrentThread::getUntrackedMemory();
untracked += size;
if (untracked > untracked_memory_limit)
{
/// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes
/// more. It could be usefull for enlarge Exception message in rethrow logic.
Int64 tmp = untracked;
untracked = 0;
memory_tracker->alloc(tmp);
}
}
} }
void realloc(Int64 old_size, Int64 new_size) void realloc(Int64 old_size, Int64 new_size)
{ {
if (auto memory_tracker = DB::CurrentThread::getMemoryTracker()) Int64 addition = new_size - old_size;
memory_tracker->alloc(new_size - old_size); if (addition > 0)
alloc(addition);
else
free(-addition);
} }
void free(Int64 size) void free(Int64 size)
{ {
if (auto memory_tracker = DB::CurrentThread::getMemoryTracker()) if (auto memory_tracker = DB::CurrentThread::getMemoryTracker())
memory_tracker->free(size); {
Int64 & untracked = DB::CurrentThread::getUntrackedMemory();
untracked -= size;
if (untracked < -untracked_memory_limit)
{
memory_tracker->free(-untracked);
untracked = 0;
}
}
} }
} }

View File

@ -45,7 +45,11 @@ public:
void realloc(Int64 old_size, Int64 new_size) void realloc(Int64 old_size, Int64 new_size)
{ {
alloc(new_size - old_size); Int64 addition = new_size - old_size;
if (addition > 0)
alloc(addition);
else
free(-addition);
} }
/** This function should be called after memory deallocation. /** This function should be called after memory deallocation.

View File

@ -34,6 +34,11 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
} }
// Replace NLMSG_OK with explicit casts since that system macro contains signedness bugs which are not going to be fixed.
static inline bool is_nlmsg_ok(const struct nlmsghdr * const nlh, const ssize_t len)
{
return len >= static_cast<ssize_t>(sizeof(*nlh)) && nlh->nlmsg_len >= sizeof(*nlh) && static_cast<size_t>(len) >= nlh->nlmsg_len;
}
namespace namespace
{ {
@ -128,7 +133,7 @@ struct NetlinkMessage
if (header.nlmsg_type == NLMSG_ERROR) if (header.nlmsg_type == NLMSG_ERROR)
throw Exception("Can't receive Netlink response: error " + std::to_string(error.error), ErrorCodes::NETLINK_ERROR); throw Exception("Can't receive Netlink response: error " + std::to_string(error.error), ErrorCodes::NETLINK_ERROR);
if (!NLMSG_OK((&header), bytes_received)) if (!is_nlmsg_ok(&header, bytes_received))
throw Exception("Can't receive Netlink response: wrong number of bytes received", ErrorCodes::NETLINK_ERROR); throw Exception("Can't receive Netlink response: wrong number of bytes received", ErrorCodes::NETLINK_ERROR);
} }
}; };

View File

@ -50,6 +50,19 @@ ThreadStatus::ThreadStatus()
ThreadStatus::~ThreadStatus() ThreadStatus::~ThreadStatus()
{ {
try
{
if (untracked_memory > 0)
memory_tracker.alloc(untracked_memory);
else
memory_tracker.free(-untracked_memory);
}
catch (const DB::Exception &)
{
/// It's a minor tracked memory leak here (not the memory itself but it's counter).
/// We've already allocated a little bit more then the limit and cannot track it in the thread memory tracker or its parent.
}
if (deleter) if (deleter)
deleter(); deleter();
current_thread = nullptr; current_thread = nullptr;

View File

@ -96,6 +96,8 @@ public:
/// TODO: merge them into common entity /// TODO: merge them into common entity
ProfileEvents::Counters performance_counters{VariableContext::Thread}; ProfileEvents::Counters performance_counters{VariableContext::Thread};
MemoryTracker memory_tracker{VariableContext::Thread}; MemoryTracker memory_tracker{VariableContext::Thread};
/// Small amount of untracked memory (per thread atomic-less counter)
Int64 untracked_memory = 0;
/// Statistics of read and write rows/bytes /// Statistics of read and write rows/bytes
Progress progress_in; Progress progress_in;

View File

@ -0,0 +1,142 @@
#include <new>
#include <common/memory.h>
#include <Common/MemoryTracker.h>
/// Replace default new/delete with memory tracking versions.
/// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new
/// https://en.cppreference.com/w/cpp/memory/new/operator_delete
#if NOT_UNBUNDLED
namespace Memory
{
ALWAYS_INLINE void trackMemory(std::size_t size)
{
#if USE_JEMALLOC
/// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function
/// @note je_mallocx() != je_malloc(). It's expected they don't differ much in allocation logic.
if (likely(size != 0))
CurrentMemoryTracker::alloc(nallocx(size, 0));
#else
CurrentMemoryTracker::alloc(size);
#endif
}
ALWAYS_INLINE bool trackMemoryNoExept(std::size_t size) noexcept
{
try
{
trackMemory(size);
}
catch (...)
{
return false;
}
return true;
}
ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept
{
try
{
#if USE_JEMALLOC
/// @note It's also possible to use je_malloc_usable_size() here.
if (likely(ptr != nullptr))
CurrentMemoryTracker::free(sallocx(ptr, 0));
#else
if (size)
CurrentMemoryTracker::free(size);
#endif
}
catch (...)
{}
}
}
/// new
void * operator new(std::size_t size)
{
Memory::trackMemory(size);
return Memory::newImpl(size);
}
void * operator new[](std::size_t size)
{
Memory::trackMemory(size);
return Memory::newImpl(size);
}
void * operator new(std::size_t size, const std::nothrow_t &) noexcept
{
if (likely(Memory::trackMemoryNoExept(size)))
return Memory::newNoExept(size);
return nullptr;
}
void * operator new[](std::size_t size, const std::nothrow_t &) noexcept
{
if (likely(Memory::trackMemoryNoExept(size)))
return Memory::newNoExept(size);
return nullptr;
}
/// delete
/// C++17 std 21.6.2.1 (11)
/// If a function without a size parameter is defined, the program should also define the corresponding function with a size parameter.
/// If a function with a size parameter is defined, the program shall also define the corresponding version without the size parameter.
/// cppreference:
/// It's unspecified whether size-aware or size-unaware version is called when deleting objects of
/// incomplete type and arrays of non-class and trivially-destructible class types.
void operator delete(void * ptr) noexcept
{
Memory::untrackMemory(ptr);
Memory::deleteImpl(ptr);
}
void operator delete[](void * ptr) noexcept
{
Memory::untrackMemory(ptr);
Memory::deleteImpl(ptr);
}
void operator delete(void * ptr, std::size_t size) noexcept
{
Memory::untrackMemory(ptr, size);
Memory::deleteSized(ptr, size);
}
void operator delete[](void * ptr, std::size_t size) noexcept
{
Memory::untrackMemory(ptr, size);
Memory::deleteSized(ptr, size);
}
#else
/// new
void * operator new(std::size_t size) { return Memory::newImpl(size); }
void * operator new[](std::size_t size) { return Memory::newImpl(size); }
void * operator new(std::size_t size, const std::nothrow_t &) noexcept { return Memory::newNoExept(size); }
void * operator new[](std::size_t size, const std::nothrow_t &) noexcept { return Memory::newNoExept(size); }
/// delete
void operator delete(void * ptr) noexcept { Memory::deleteImpl(ptr); }
void operator delete[](void * ptr) noexcept { Memory::deleteImpl(ptr); }
void operator delete(void * ptr, const std::nothrow_t &) noexcept { Memory::deleteImpl(ptr); }
void operator delete[](void * ptr, const std::nothrow_t &) noexcept { Memory::deleteImpl(ptr); }
void operator delete(void * ptr, std::size_t size) noexcept { Memory::deleteSized(ptr, size); }
void operator delete[](void * ptr, std::size_t size) noexcept { Memory::deleteSized(ptr, size); }
#endif

View File

@ -13,11 +13,11 @@
<create_query>CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory</create_query> <create_query>CREATE TABLE ints (i64 Int64, i32 Int32, i16 Int16, i8 Int8) ENGINE = Memory</create_query>
<fill_query>INSERT INTO ints SELECT number AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000)</fill_query> <fill_query>INSERT INTO ints SELECT number AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000)</fill_query>
<fill_query>INSERT INTO ints SELECT 10000 + number % 1000 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000)</fill_query> <fill_query>INSERT INTO ints SELECT 10000 + number % 1000 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000)</fill_query>
<fill_query>INSERT INTO ints SELECT 20000 + number % 100 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000)</fill_query> <fill_query>INSERT INTO ints SELECT 20000 + number % 100 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000)</fill_query>
<fill_query>INSERT INTO ints SELECT 30000 + number % 10 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000)</fill_query> <fill_query>INSERT INTO ints SELECT 30000 + number % 10 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000)</fill_query>
<fill_query>INSERT INTO ints SELECT 40000 + number % 1 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(10000)</fill_query> <fill_query>INSERT INTO ints SELECT 40000 + number % 1 AS i64, i64 AS i32, i64 AS i16, i64 AS i8 FROM numbers(5000)</fill_query>
<query tag='ANY LEFT'>SELECT COUNT() FROM ints l ANY LEFT JOIN ints r USING i64 WHERE i32 = 200042</query> <query tag='ANY LEFT'>SELECT COUNT() FROM ints l ANY LEFT JOIN ints r USING i64 WHERE i32 = 200042</query>
<query tag='ANY LEFT KEY'>SELECT COUNT() FROM ints l ANY LEFT JOIN ints r USING i64,i32,i16,i8 WHERE i32 = 200042</query> <query tag='ANY LEFT KEY'>SELECT COUNT() FROM ints l ANY LEFT JOIN ints r USING i64,i32,i16,i8 WHERE i32 = 200042</query>

View File

@ -0,0 +1,7 @@
SET max_memory_usage = 1000000000;
SELECT sum(ignore(*)) FROM (
SELECT number, argMax(number, (number, toFixedString(toString(number), 1024)))
FROM numbers(1000000)
GROUP BY number
) -- { serverError 241 }

View File

@ -96,7 +96,7 @@ LIMIT 3
Checks whether the dictionary has the key. Checks whether the dictionary has the key.
``` ```
dictHas('dict_name', id) dictHas('dict_name', id_expr)
``` ```
**Parameters** **Parameters**
@ -116,7 +116,7 @@ Type: `UInt8`.
For the hierarchical dictionary, returns an array of dictionary keys starting from passed `id_expr` and continuing along the chain of parent elements. For the hierarchical dictionary, returns an array of dictionary keys starting from passed `id_expr` and continuing along the chain of parent elements.
``` ```
dictGetHierarchy('dict_name', id) dictGetHierarchy('dict_name', id_expr)
``` ```
**Parameters** **Parameters**

View File

@ -1,40 +1,192 @@
# Функции для работы с внешними словарями {#ext_dict_functions} # Функции для работы с внешними словарями {#ext_dict_functions}
Информация о подключении и настройке внешних словарей смотрите в разделе [Внешние словари](../dicts/external_dicts.md). Для получения информации о подключении и настройке, читайте раздел про [внешние словари](../dicts/external_dicts.md).
## dictGetUInt8, dictGetUInt16, dictGetUInt32, dictGetUInt64 ## dictGet
## dictGetInt8, dictGetInt16, dictGetInt32, dictGetInt64 Получение значения из внешнего словаря.
## dictGetFloat32, dictGetFloat64 ```
dictGet('dict_name', 'attr_name', id_expr)
dictGetOrDefault('dict_name', 'attr_name', id_expr, default_value_expr)
```
## dictGetDate, dictGetDateTime **Параметры**
## dictGetUUID - `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal).
- `attr_name` — Название колонки словаря. [Строковый литерал](../syntax.md#syntax-string-literal).
- `id_expr` — Значение ключа. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md) или [Tuple](../../data_types/tuple.md) в зависимости от конфигурации словаря.
- `default_value_expr` — Значение которое возвращается, если словарь не содержит колонку с ключом `id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращает значение такого же типа, что и у атрибута `attr_name`.
## dictGetString **Возвращаемое значение**
`dictGetT('dict_name', 'attr_name', id)`
- получить из словаря dict_name значение атрибута attr_name по ключу id.
`dict_name` и `attr_name` - константные строки.
`id` должен иметь тип UInt64.
Если ключа `id` нет в словаре - вернуть значение по умолчанию, заданное в описании словаря.
## dictGetTOrDefault - Если ClickHouse успешно обрабатывает атрибут в соотвествии с указаным [типом данных](../dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes), то функция возвращает значение для заданного ключа `id_expr`.
- Если запрашиваемого `id_expr` не оказалось в словаре:
`dictGetT('dict_name', 'attr_name', id, default)` - `dictGet` возвратит содержимое элемента `<null_value>` определенного в настройках словаря.
- `dictGetOrDefault` вернет значение переданного `default_value_expr` параметра.
Аналогично функциям `dictGetT`, но значение по умолчанию берётся из последнего аргумента функции. ClickHouse бросает исключение, если не может обработать значение атрибута или значение несопоставимо с типом атрибута.
## dictIsIn **Пример использования**
`dictIsIn('dict_name', child_id, ancestor_id)`
- для иерархического словаря dict_name - узнать, находится ли ключ child_id внутри ancestor_id (или совпадает с ancestor_id). Возвращает UInt8. Создайте файл `ext-dict-text.csv` со следующим содержимым:
```text
1,1
2,2
```
Первая колонка - это `id`, вторая - `c1`
Конфигурация внешнего словаря:
```xml
<yandex>
<dictionary>
<name>ext-dict-test</name>
<source>
<file>
<path>/path-to/ext-dict-test.csv</path>
<format>CSV</format>
</file>
</source>
<layout>
<flat />
</layout>
<structure>
<id>
<name>id</name>
</id>
<attribute>
<name>c1</name>
<type>UInt32</type>
<null_value></null_value>
</attribute>
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
```
Выполните запрос:
```sql
SELECT
dictGetOrDefault('ext-dict-test', 'c1', number + 1, toUInt32(number * 10)) AS val,
toТипName(val) AS Type
FROM system.numbers
LIMIT 3
```
```text
┌─val─┬─type───┐
│ 1 │ UInt32 │
│ 2 │ UInt32 │
│ 20 │ UInt32 │
└─────┴────────┘
```
**Смотрите также**
- [Внешние словари](../dicts/external_dicts.md)
## dictGetHierarchy
`dictGetHierarchy('dict_name', id)`
- для иерархического словаря dict_name - вернуть массив ключей словаря, начиная с id и продолжая цепочкой родительских элементов. Возвращает Array(UInt64).
## dictHas ## dictHas
`dictHas('dict_name', id)`
- проверить наличие ключа в словаре. Возвращает значение типа UInt8, равное 0, если ключа нет и 1, если ключ есть. Проверяет наличие записи с заданным ключом в словаре.
```
dictHas('dict_name', id_expr)
```
**Параметры**
- `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal).
- `id_expr` — Значение ключа. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md).
**Возвращаемое значение**
- 0, если ключ не был обнаружен
- 1, если ключ присутствует в словаре
Тип: `UInt8`.
## dictGetHierarchy
Для иерархических словарей возвращает массив ключей, содержащий ключ `id_expr` и все ключи родительских элементов по цепочке.
```
dictGetHierarchy('dict_name', id_expr)
```
**Параметры**
- `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal).
- `id_expr` — Значение ключа. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md).
**Возвращаемое значение**
Иерархию ключей словаря.
Тип: [Array(UInt64)](../../data_types/array.md).
## dictIsIn
Осуществляет проверку - является ли ключ родительским во всей иерархической цепочке словаря.
`dictIsIn ('dict_name', child_id_expr, ancestor_id_expr)`
**Параметры**
- `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal).
- `child_id_expr` — Ключ который должен быть проверен. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md).
- `ancestor_id_expr` — Родительский ключ для ключа `child_id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md).
**Возвращаемое значение**
- 0, если `child_id_expr` не является потомком для `ancestor_id_expr`.
- 1, если `child_id_expr` является потомком для `ancestor_id_expr` или если `child_id_expr` равен `ancestor_id_expr`.
Тип: `UInt8`.
## Другие функции {#ext_dict_functions-other}
ClickHouse поддерживает специализированные функции для конвертации значений атрибутов словаря к определенному типу, независимо от настроек словаря.
Функции:
- `dictGetInt8`, `dictGetInt16`, `dictGetInt32`, `dictGetInt64`
- `dictGetUInt8`, `dictGetUInt16`, `dictGetUInt32`, `dictGetUInt64`
- `dictGetFloat32`, `dictGetFloat64`
- `dictGetDate`
- `dictGetDateTime`
- `dictGetUUID`
- `dictGetString`
Все эти функции имеют так же `OrDefault` версию. Например, `dictGetDateOrDefault`.
Синтаксис:
```
dictGet[Тип]('dict_name', 'attr_name', id_expr)
dictGet[Тип]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr)
```
**Параметры**
- `dict_name` — Название словаря. [Строковый литерал](../syntax.md#syntax-string-literal).
- `attr_name` — Название колонки словаря. [Строковый литерал](../syntax.md#syntax-string-literal).
- `id_expr` — Значение ключа. [Выражение](../syntax.md#syntax-expressions) возвращает значение типа [UInt64](../../data_types/int_uint.md).
- `default_value_expr` — Значение которое возвращается, если словарь не содержит строку с ключом `id_expr`. [Выражение](../syntax.md#syntax-expressions) возвращает значение с таким же типом, что и тип атрибута `attr_name`.
**Возвращаемое значение**
- Если ClickHouse успешно обрабатывает атрибут в соотвествии с указаным [типом данных](../dicts/external_dicts_dict_structure.md#ext_dict_structure-attributes),то функция возвращает значение для заданного ключа `id_expr`.
- Если запрашиваемого `id_expr` не оказалось в словаре:
- `dictGet[Тип]` возвратит содержимое элемента `<null_value>` определенного в настройках словаря.
- `dictGet[Тип]OrDefault` вернет значение переданного `default_value_expr` параметра.
ClickHouse бросает исключение, если не может обработать значение атрибута или значение несопоставимо с типом атрибута
[Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/ext_dict_functions/) <!--hide--> [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/ext_dict_functions/) <!--hide-->

View File

@ -7,3 +7,4 @@
#cmakedefine01 USE_READLINE #cmakedefine01 USE_READLINE
#cmakedefine01 USE_LIBEDIT #cmakedefine01 USE_LIBEDIT
#cmakedefine01 HAVE_READLINE_HISTORY #cmakedefine01 HAVE_READLINE_HISTORY
#cmakedefine01 NOT_UNBUNDLED

View File

@ -0,0 +1,65 @@
#pragma once
#include <new>
#include <common/likely.h>
#if __has_include(<common/config_common.h>)
#include <common/config_common.h>
#endif
#if USE_JEMALLOC
#include <jemalloc/jemalloc.h>
#if JEMALLOC_VERSION_MAJOR < 4
#undef USE_JEMALLOC
#define USE_JEMALLOC 0
#include <cstdlib>
#endif
#endif
#define ALWAYS_INLINE inline __attribute__((__always_inline__))
#define NO_INLINE __attribute__((__noinline__))
namespace Memory
{
ALWAYS_INLINE void * newImpl(std::size_t size)
{
auto * ptr = malloc(size);
if (likely(ptr != nullptr))
return ptr;
/// @note no std::get_new_handler logic implemented
throw std::bad_alloc{};
}
ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
{
return malloc(size);
}
ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
{
free(ptr);
}
#if USE_JEMALLOC
ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept
{
if (unlikely(ptr == nullptr))
return;
sdallocx(ptr, size, 0);
}
#else
ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept
{
free(ptr);
}
#endif
}