mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 09:32:06 +00:00
Merge branch 'master' into fix-kafka-again
This commit is contained in:
commit
dffe0eba40
@ -317,6 +317,7 @@ include (cmake/find_hdfs3.cmake) # uses protobuf
|
||||
include (cmake/find_consistent-hashing.cmake)
|
||||
include (cmake/find_base64.cmake)
|
||||
include (cmake/find_hyperscan.cmake)
|
||||
include (cmake/find_lfalloc.cmake)
|
||||
find_contrib_lib(cityhash)
|
||||
find_contrib_lib(farmhash)
|
||||
find_contrib_lib(metrohash)
|
||||
|
@ -10,7 +10,3 @@ ClickHouse is an open-source column-oriented database management system that all
|
||||
* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announces and reports about events.
|
||||
* [Contacts](https://clickhouse.yandex/#contacts) can help to get your questions answered if there are any.
|
||||
* You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person.
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [ClickHouse Community Meetup](https://www.eventbrite.com/e/clickhouse-meetup-in-madrid-registration-55376746339) in Madrid on April 2.
|
||||
|
9
cmake/find_lfalloc.cmake
Normal file
9
cmake/find_lfalloc.cmake
Normal file
@ -0,0 +1,9 @@
|
||||
if (NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE)
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lfalloc/src/lf_allocX64.h")
|
||||
message (FATAL_ERROR "submodule contrib/lfalloc is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
endif()
|
||||
set (USE_LFALLOC 1)
|
||||
set (USE_LFALLOC_RANDOM_HINT 1)
|
||||
set (LFALLOC_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lfalloc/src)
|
||||
message (STATUS "Using lfalloc=${USE_LFALLOC}: ${LFALLOC_INCLUDE_DIR}")
|
||||
endif ()
|
@ -36,6 +36,8 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY)
|
||||
set (ENABLE_DATA_SQLITE 0 CACHE BOOL "")
|
||||
set (ENABLE_DATA_MYSQL 0 CACHE BOOL "")
|
||||
set (ENABLE_DATA_POSTGRESQL 0 CACHE BOOL "")
|
||||
set (ENABLE_ENCODINGS 0 CACHE BOOL "")
|
||||
|
||||
# new after 2.0.0:
|
||||
set (POCO_ENABLE_ZIP 0 CACHE BOOL "")
|
||||
set (POCO_ENABLE_PAGECOMPILER 0 CACHE BOOL "")
|
||||
|
1
contrib/CMakeLists.txt
vendored
1
contrib/CMakeLists.txt
vendored
@ -284,6 +284,7 @@ endif ()
|
||||
|
||||
if (USE_INTERNAL_BROTLI_LIBRARY)
|
||||
add_subdirectory(brotli-cmake)
|
||||
target_compile_definitions(brotli PRIVATE BROTLI_BUILD_PORTABLE=1)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_PROTOBUF_LIBRARY)
|
||||
|
2
contrib/hyperscan
vendored
2
contrib/hyperscan
vendored
@ -1 +1 @@
|
||||
Subproject commit 05dab0efee80be405aad5f74721b692b6889b75e
|
||||
Subproject commit 05b0f9064cca4bd55548dedb0a32ed9461146c1e
|
1813
contrib/lfalloc/src/lf_allocX64.h
Normal file
1813
contrib/lfalloc/src/lf_allocX64.h
Normal file
File diff suppressed because it is too large
Load Diff
23
contrib/lfalloc/src/lfmalloc.h
Normal file
23
contrib/lfalloc/src/lfmalloc.h
Normal file
@ -0,0 +1,23 @@
|
||||
#pragma once
|
||||
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "util/system/compiler.h"
|
||||
|
||||
namespace NMalloc {
|
||||
volatile inline bool IsAllocatorCorrupted = false;
|
||||
|
||||
static inline void AbortFromCorruptedAllocator() {
|
||||
IsAllocatorCorrupted = true;
|
||||
abort();
|
||||
}
|
||||
|
||||
struct TAllocHeader {
|
||||
void* Block;
|
||||
size_t AllocSize;
|
||||
void Y_FORCE_INLINE Encode(void* block, size_t size, size_t signature) {
|
||||
Block = block;
|
||||
AllocSize = size | signature;
|
||||
}
|
||||
};
|
||||
}
|
33
contrib/lfalloc/src/util/README.md
Normal file
33
contrib/lfalloc/src/util/README.md
Normal file
@ -0,0 +1,33 @@
|
||||
Style guide for the util folder is a stricter version of general style guide (mostly in terms of ambiguity resolution).
|
||||
|
||||
* all {} must be in K&R style
|
||||
* &, * tied closer to a type, not to variable
|
||||
* always use `using` not `typedef`
|
||||
* even a single line block must be in braces {}:
|
||||
```
|
||||
if (A) {
|
||||
B();
|
||||
}
|
||||
```
|
||||
* _ at the end of private data member of a class - `First_`, `Second_`
|
||||
* every .h file must be accompanied with corresponding .cpp to avoid a leakage and check that it is self contained
|
||||
* prohibited to use `printf`-like functions
|
||||
|
||||
|
||||
Things declared in the general style guide, which sometimes are missed:
|
||||
|
||||
* `template <`, not `template<`
|
||||
* `noexcept`, not `throw ()` nor `throw()`, not required for destructors
|
||||
* indents inside `namespace` same as inside `class`
|
||||
|
||||
|
||||
Requirements for a new code (and for corrections in an old code which involves change of behaviour) in util:
|
||||
|
||||
* presence of UNIT-tests
|
||||
* presence of comments in Doxygen style
|
||||
* accessors without Get prefix (`Length()`, but not `GetLength()`)
|
||||
|
||||
This guide is not a mandatory as there is the general style guide.
|
||||
Nevertheless if it is not followed, then a next `ya style .` run in the util folder will undeservedly update authors of some lines of code.
|
||||
|
||||
Thus before a commit it is recommended to run `ya style .` in the util folder.
|
51
contrib/lfalloc/src/util/system/atomic.h
Normal file
51
contrib/lfalloc/src/util/system/atomic.h
Normal file
@ -0,0 +1,51 @@
|
||||
#pragma once
|
||||
|
||||
#include "defaults.h"
|
||||
|
||||
using TAtomicBase = intptr_t;
|
||||
using TAtomic = volatile TAtomicBase;
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#include "atomic_gcc.h"
|
||||
#elif defined(_MSC_VER)
|
||||
#include "atomic_win.h"
|
||||
#else
|
||||
#error unsupported platform
|
||||
#endif
|
||||
|
||||
#if !defined(ATOMIC_COMPILER_BARRIER)
|
||||
#define ATOMIC_COMPILER_BARRIER()
|
||||
#endif
|
||||
|
||||
static inline TAtomicBase AtomicSub(TAtomic& a, TAtomicBase v) {
|
||||
return AtomicAdd(a, -v);
|
||||
}
|
||||
|
||||
static inline TAtomicBase AtomicGetAndSub(TAtomic& a, TAtomicBase v) {
|
||||
return AtomicGetAndAdd(a, -v);
|
||||
}
|
||||
|
||||
#if defined(USE_GENERIC_SETGET)
|
||||
static inline TAtomicBase AtomicGet(const TAtomic& a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
static inline void AtomicSet(TAtomic& a, TAtomicBase v) {
|
||||
a = v;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline bool AtomicTryLock(TAtomic* a) {
|
||||
return AtomicCas(a, 1, 0);
|
||||
}
|
||||
|
||||
static inline bool AtomicTryAndTryLock(TAtomic* a) {
|
||||
return (AtomicGet(*a) == 0) && AtomicTryLock(a);
|
||||
}
|
||||
|
||||
static inline void AtomicUnlock(TAtomic* a) {
|
||||
ATOMIC_COMPILER_BARRIER();
|
||||
AtomicSet(*a, 0);
|
||||
}
|
||||
|
||||
#include "atomic_ops.h"
|
90
contrib/lfalloc/src/util/system/atomic_gcc.h
Normal file
90
contrib/lfalloc/src/util/system/atomic_gcc.h
Normal file
@ -0,0 +1,90 @@
|
||||
#pragma once
|
||||
|
||||
#define ATOMIC_COMPILER_BARRIER() __asm__ __volatile__("" \
|
||||
: \
|
||||
: \
|
||||
: "memory")
|
||||
|
||||
static inline TAtomicBase AtomicGet(const TAtomic& a) {
|
||||
TAtomicBase tmp;
|
||||
#if defined(_arm64_)
|
||||
__asm__ __volatile__(
|
||||
"ldar %x[value], %[ptr] \n\t"
|
||||
: [value] "=r"(tmp)
|
||||
: [ptr] "Q"(a)
|
||||
: "memory");
|
||||
#else
|
||||
__atomic_load(&a, &tmp, __ATOMIC_ACQUIRE);
|
||||
#endif
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static inline void AtomicSet(TAtomic& a, TAtomicBase v) {
|
||||
#if defined(_arm64_)
|
||||
__asm__ __volatile__(
|
||||
"stlr %x[value], %[ptr] \n\t"
|
||||
: [ptr] "=Q"(a)
|
||||
: [value] "r"(v)
|
||||
: "memory");
|
||||
#else
|
||||
__atomic_store(&a, &v, __ATOMIC_RELEASE);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicIncrement(TAtomic& p) {
|
||||
return __atomic_add_fetch(&p, 1, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicGetAndIncrement(TAtomic& p) {
|
||||
return __atomic_fetch_add(&p, 1, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicDecrement(TAtomic& p) {
|
||||
return __atomic_sub_fetch(&p, 1, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicGetAndDecrement(TAtomic& p) {
|
||||
return __atomic_fetch_sub(&p, 1, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicAdd(TAtomic& p, intptr_t v) {
|
||||
return __atomic_add_fetch(&p, v, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicGetAndAdd(TAtomic& p, intptr_t v) {
|
||||
return __atomic_fetch_add(&p, v, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicSwap(TAtomic* p, intptr_t v) {
|
||||
(void)p; // disable strange 'parameter set but not used' warning on gcc
|
||||
intptr_t ret;
|
||||
__atomic_exchange(p, &v, &ret, __ATOMIC_SEQ_CST);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline bool AtomicCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
|
||||
(void)a; // disable strange 'parameter set but not used' warning on gcc
|
||||
return __atomic_compare_exchange(a, &compare, &exchange, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicGetAndCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
|
||||
(void)a; // disable strange 'parameter set but not used' warning on gcc
|
||||
__atomic_compare_exchange(a, &compare, &exchange, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
|
||||
return compare;
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicOr(TAtomic& a, intptr_t b) {
|
||||
return __atomic_or_fetch(&a, b, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicXor(TAtomic& a, intptr_t b) {
|
||||
return __atomic_xor_fetch(&a, b, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicAnd(TAtomic& a, intptr_t b) {
|
||||
return __atomic_and_fetch(&a, b, __ATOMIC_SEQ_CST);
|
||||
}
|
||||
|
||||
static inline void AtomicBarrier() {
|
||||
__sync_synchronize();
|
||||
}
|
189
contrib/lfalloc/src/util/system/atomic_ops.h
Normal file
189
contrib/lfalloc/src/util/system/atomic_ops.h
Normal file
@ -0,0 +1,189 @@
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
template <typename T>
|
||||
inline TAtomic* AsAtomicPtr(T volatile* target) {
|
||||
return reinterpret_cast<TAtomic*>(target);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline const TAtomic* AsAtomicPtr(T const volatile* target) {
|
||||
return reinterpret_cast<const TAtomic*>(target);
|
||||
}
|
||||
|
||||
// integral types
|
||||
|
||||
template <typename T>
|
||||
struct TAtomicTraits {
|
||||
enum {
|
||||
Castable = std::is_integral<T>::value && sizeof(T) == sizeof(TAtomicBase) && !std::is_const<T>::value,
|
||||
};
|
||||
};
|
||||
|
||||
template <typename T, typename TT>
|
||||
using TEnableIfCastable = std::enable_if_t<TAtomicTraits<T>::Castable, TT>;
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, T> AtomicGet(T const volatile& target) {
|
||||
return static_cast<T>(AtomicGet(*AsAtomicPtr(&target)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, void> AtomicSet(T volatile& target, TAtomicBase value) {
|
||||
AtomicSet(*AsAtomicPtr(&target), value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, T> AtomicIncrement(T volatile& target) {
|
||||
return static_cast<T>(AtomicIncrement(*AsAtomicPtr(&target)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, T> AtomicGetAndIncrement(T volatile& target) {
|
||||
return static_cast<T>(AtomicGetAndIncrement(*AsAtomicPtr(&target)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, T> AtomicDecrement(T volatile& target) {
|
||||
return static_cast<T>(AtomicDecrement(*AsAtomicPtr(&target)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, T> AtomicGetAndDecrement(T volatile& target) {
|
||||
return static_cast<T>(AtomicGetAndDecrement(*AsAtomicPtr(&target)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, T> AtomicAdd(T volatile& target, TAtomicBase value) {
|
||||
return static_cast<T>(AtomicAdd(*AsAtomicPtr(&target), value));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, T> AtomicGetAndAdd(T volatile& target, TAtomicBase value) {
|
||||
return static_cast<T>(AtomicGetAndAdd(*AsAtomicPtr(&target), value));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, T> AtomicSub(T volatile& target, TAtomicBase value) {
|
||||
return static_cast<T>(AtomicSub(*AsAtomicPtr(&target), value));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, T> AtomicGetAndSub(T volatile& target, TAtomicBase value) {
|
||||
return static_cast<T>(AtomicGetAndSub(*AsAtomicPtr(&target), value));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, T> AtomicSwap(T volatile* target, TAtomicBase exchange) {
|
||||
return static_cast<T>(AtomicSwap(AsAtomicPtr(target), exchange));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, bool> AtomicCas(T volatile* target, TAtomicBase exchange, TAtomicBase compare) {
|
||||
return AtomicCas(AsAtomicPtr(target), exchange, compare);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, T> AtomicGetAndCas(T volatile* target, TAtomicBase exchange, TAtomicBase compare) {
|
||||
return static_cast<T>(AtomicGetAndCas(AsAtomicPtr(target), exchange, compare));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, bool> AtomicTryLock(T volatile* target) {
|
||||
return AtomicTryLock(AsAtomicPtr(target));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, bool> AtomicTryAndTryLock(T volatile* target) {
|
||||
return AtomicTryAndTryLock(AsAtomicPtr(target));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, void> AtomicUnlock(T volatile* target) {
|
||||
AtomicUnlock(AsAtomicPtr(target));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, T> AtomicOr(T volatile& target, TAtomicBase value) {
|
||||
return static_cast<T>(AtomicOr(*AsAtomicPtr(&target), value));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, T> AtomicAnd(T volatile& target, TAtomicBase value) {
|
||||
return static_cast<T>(AtomicAnd(*AsAtomicPtr(&target), value));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline TEnableIfCastable<T, T> AtomicXor(T volatile& target, TAtomicBase value) {
|
||||
return static_cast<T>(AtomicXor(*AsAtomicPtr(&target), value));
|
||||
}
|
||||
|
||||
// pointer types
|
||||
|
||||
template <typename T>
|
||||
inline T* AtomicGet(T* const volatile& target) {
|
||||
return reinterpret_cast<T*>(AtomicGet(*AsAtomicPtr(&target)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline void AtomicSet(T* volatile& target, T* value) {
|
||||
AtomicSet(*AsAtomicPtr(&target), reinterpret_cast<TAtomicBase>(value));
|
||||
}
|
||||
|
||||
using TNullPtr = decltype(nullptr);
|
||||
|
||||
template <typename T>
|
||||
inline void AtomicSet(T* volatile& target, TNullPtr) {
|
||||
AtomicSet(*AsAtomicPtr(&target), 0);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T* AtomicSwap(T* volatile* target, T* exchange) {
|
||||
return reinterpret_cast<T*>(AtomicSwap(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T* AtomicSwap(T* volatile* target, TNullPtr) {
|
||||
return reinterpret_cast<T*>(AtomicSwap(AsAtomicPtr(target), 0));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool AtomicCas(T* volatile* target, T* exchange, T* compare) {
|
||||
return AtomicCas(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange), reinterpret_cast<TAtomicBase>(compare));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T* AtomicGetAndCas(T* volatile* target, T* exchange, T* compare) {
|
||||
return reinterpret_cast<T*>(AtomicGetAndCas(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange), reinterpret_cast<TAtomicBase>(compare)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool AtomicCas(T* volatile* target, T* exchange, TNullPtr) {
|
||||
return AtomicCas(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange), 0);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T* AtomicGetAndCas(T* volatile* target, T* exchange, TNullPtr) {
|
||||
return reinterpret_cast<T*>(AtomicGetAndCas(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange), 0));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool AtomicCas(T* volatile* target, TNullPtr, T* compare) {
|
||||
return AtomicCas(AsAtomicPtr(target), 0, reinterpret_cast<TAtomicBase>(compare));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T* AtomicGetAndCas(T* volatile* target, TNullPtr, T* compare) {
|
||||
return reinterpret_cast<T*>(AtomicGetAndCas(AsAtomicPtr(target), 0, reinterpret_cast<TAtomicBase>(compare)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool AtomicCas(T* volatile* target, TNullPtr, TNullPtr) {
|
||||
return AtomicCas(AsAtomicPtr(target), 0, 0);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T* AtomicGetAndCas(T* volatile* target, TNullPtr, TNullPtr) {
|
||||
return reinterpret_cast<T*>(AtomicGetAndCas(AsAtomicPtr(target), 0, 0));
|
||||
}
|
114
contrib/lfalloc/src/util/system/atomic_win.h
Normal file
114
contrib/lfalloc/src/util/system/atomic_win.h
Normal file
@ -0,0 +1,114 @@
|
||||
#pragma once
|
||||
|
||||
#include <intrin.h>
|
||||
|
||||
#define USE_GENERIC_SETGET
|
||||
|
||||
#if defined(_i386_)
|
||||
|
||||
#pragma intrinsic(_InterlockedIncrement)
|
||||
#pragma intrinsic(_InterlockedDecrement)
|
||||
#pragma intrinsic(_InterlockedExchangeAdd)
|
||||
#pragma intrinsic(_InterlockedExchange)
|
||||
#pragma intrinsic(_InterlockedCompareExchange)
|
||||
|
||||
static inline intptr_t AtomicIncrement(TAtomic& a) {
|
||||
return _InterlockedIncrement((volatile long*)&a);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicGetAndIncrement(TAtomic& a) {
|
||||
return _InterlockedIncrement((volatile long*)&a) - 1;
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicDecrement(TAtomic& a) {
|
||||
return _InterlockedDecrement((volatile long*)&a);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicGetAndDecrement(TAtomic& a) {
|
||||
return _InterlockedDecrement((volatile long*)&a) + 1;
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicAdd(TAtomic& a, intptr_t b) {
|
||||
return _InterlockedExchangeAdd((volatile long*)&a, b) + b;
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicGetAndAdd(TAtomic& a, intptr_t b) {
|
||||
return _InterlockedExchangeAdd((volatile long*)&a, b);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicSwap(TAtomic* a, intptr_t b) {
|
||||
return _InterlockedExchange((volatile long*)a, b);
|
||||
}
|
||||
|
||||
static inline bool AtomicCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
|
||||
return _InterlockedCompareExchange((volatile long*)a, exchange, compare) == compare;
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicGetAndCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
|
||||
return _InterlockedCompareExchange((volatile long*)a, exchange, compare);
|
||||
}
|
||||
|
||||
#else // _x86_64_
|
||||
|
||||
#pragma intrinsic(_InterlockedIncrement64)
|
||||
#pragma intrinsic(_InterlockedDecrement64)
|
||||
#pragma intrinsic(_InterlockedExchangeAdd64)
|
||||
#pragma intrinsic(_InterlockedExchange64)
|
||||
#pragma intrinsic(_InterlockedCompareExchange64)
|
||||
|
||||
static inline intptr_t AtomicIncrement(TAtomic& a) {
|
||||
return _InterlockedIncrement64((volatile __int64*)&a);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicGetAndIncrement(TAtomic& a) {
|
||||
return _InterlockedIncrement64((volatile __int64*)&a) - 1;
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicDecrement(TAtomic& a) {
|
||||
return _InterlockedDecrement64((volatile __int64*)&a);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicGetAndDecrement(TAtomic& a) {
|
||||
return _InterlockedDecrement64((volatile __int64*)&a) + 1;
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicAdd(TAtomic& a, intptr_t b) {
|
||||
return _InterlockedExchangeAdd64((volatile __int64*)&a, b) + b;
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicGetAndAdd(TAtomic& a, intptr_t b) {
|
||||
return _InterlockedExchangeAdd64((volatile __int64*)&a, b);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicSwap(TAtomic* a, intptr_t b) {
|
||||
return _InterlockedExchange64((volatile __int64*)a, b);
|
||||
}
|
||||
|
||||
static inline bool AtomicCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
|
||||
return _InterlockedCompareExchange64((volatile __int64*)a, exchange, compare) == compare;
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicGetAndCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
|
||||
return _InterlockedCompareExchange64((volatile __int64*)a, exchange, compare);
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicOr(TAtomic& a, intptr_t b) {
|
||||
return _InterlockedOr64(&a, b) | b;
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicAnd(TAtomic& a, intptr_t b) {
|
||||
return _InterlockedAnd64(&a, b) & b;
|
||||
}
|
||||
|
||||
static inline intptr_t AtomicXor(TAtomic& a, intptr_t b) {
|
||||
return _InterlockedXor64(&a, b) ^ b;
|
||||
}
|
||||
|
||||
#endif // _x86_
|
||||
|
||||
//TODO
|
||||
static inline void AtomicBarrier() {
|
||||
TAtomic val = 0;
|
||||
|
||||
AtomicSwap(&val, 0);
|
||||
}
|
617
contrib/lfalloc/src/util/system/compiler.h
Normal file
617
contrib/lfalloc/src/util/system/compiler.h
Normal file
@ -0,0 +1,617 @@
|
||||
#pragma once
|
||||
|
||||
// useful cross-platfrom definitions for compilers
|
||||
|
||||
/**
|
||||
* @def Y_FUNC_SIGNATURE
|
||||
*
|
||||
* Use this macro to get pretty function name (see example).
|
||||
*
|
||||
* @code
|
||||
* void Hi() {
|
||||
* Cout << Y_FUNC_SIGNATURE << Endl;
|
||||
* }
|
||||
|
||||
* template <typename T>
|
||||
* void Do() {
|
||||
* Cout << Y_FUNC_SIGNATURE << Endl;
|
||||
* }
|
||||
|
||||
* int main() {
|
||||
* Hi(); // void Hi()
|
||||
* Do<int>(); // void Do() [T = int]
|
||||
* Do<TString>(); // void Do() [T = TString]
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
#if defined(__GNUC__)
|
||||
#define Y_FUNC_SIGNATURE __PRETTY_FUNCTION__
|
||||
#elif defined(_MSC_VER)
|
||||
#define Y_FUNC_SIGNATURE __FUNCSIG__
|
||||
#else
|
||||
#define Y_FUNC_SIGNATURE ""
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define Y_PRINTF_FORMAT(n, m) __attribute__((__format__(__printf__, n, m)))
|
||||
#endif
|
||||
|
||||
#ifndef Y_PRINTF_FORMAT
|
||||
#define Y_PRINTF_FORMAT(n, m)
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#define Y_NO_SANITIZE(...) __attribute__((no_sanitize(__VA_ARGS__)))
|
||||
#endif
|
||||
|
||||
#if !defined(Y_NO_SANITIZE)
|
||||
#define Y_NO_SANITIZE(...)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @def Y_DECLARE_UNUSED
|
||||
*
|
||||
* Macro is needed to silence compiler warning about unused entities (e.g. function or argument).
|
||||
*
|
||||
* @code
|
||||
* Y_DECLARE_UNUSED int FunctionUsedSolelyForDebugPurposes();
|
||||
* assert(FunctionUsedSolelyForDebugPurposes() == 42);
|
||||
*
|
||||
* void Foo(const int argumentUsedOnlyForDebugPurposes Y_DECLARE_UNUSED) {
|
||||
* assert(argumentUsedOnlyForDebugPurposes == 42);
|
||||
* // however you may as well omit `Y_DECLARE_UNUSED` and use `UNUSED` macro instead
|
||||
* Y_UNUSED(argumentUsedOnlyForDebugPurposes);
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
#ifdef __GNUC__
|
||||
#define Y_DECLARE_UNUSED __attribute__((unused))
|
||||
#endif
|
||||
|
||||
#ifndef Y_DECLARE_UNUSED
|
||||
#define Y_DECLARE_UNUSED
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define Y_LIKELY(Cond) __builtin_expect(!!(Cond), 1)
|
||||
#define Y_UNLIKELY(Cond) __builtin_expect(!!(Cond), 0)
|
||||
#define Y_PREFETCH_READ(Pointer, Priority) __builtin_prefetch((const void*)(Pointer), 0, Priority)
|
||||
#define Y_PREFETCH_WRITE(Pointer, Priority) __builtin_prefetch((const void*)(Pointer), 1, Priority)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @def Y_FORCE_INLINE
|
||||
*
|
||||
* Macro to use in place of 'inline' in function declaration/definition to force
|
||||
* it to be inlined.
|
||||
*/
|
||||
#if !defined(Y_FORCE_INLINE)
|
||||
#if defined(CLANG_COVERAGE)
|
||||
#/* excessive __always_inline__ might significantly slow down compilation of an instrumented unit */
|
||||
#define Y_FORCE_INLINE inline
|
||||
#elif defined(_MSC_VER)
|
||||
#define Y_FORCE_INLINE __forceinline
|
||||
#elif defined(__GNUC__)
|
||||
#/* Clang also defines __GNUC__ (as 4) */
|
||||
#define Y_FORCE_INLINE inline __attribute__((__always_inline__))
|
||||
#else
|
||||
#define Y_FORCE_INLINE inline
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @def Y_NO_INLINE
|
||||
*
|
||||
* Macro to use in place of 'inline' in function declaration/definition to
|
||||
* prevent it from being inlined.
|
||||
*/
|
||||
#if !defined(Y_NO_INLINE)
|
||||
#if defined(_MSC_VER)
|
||||
#define Y_NO_INLINE __declspec(noinline)
|
||||
#elif defined(__GNUC__) || defined(__INTEL_COMPILER)
|
||||
#/* Clang also defines __GNUC__ (as 4) */
|
||||
#define Y_NO_INLINE __attribute__((__noinline__))
|
||||
#else
|
||||
#define Y_NO_INLINE
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//to cheat compiler about strict aliasing or similar problems
|
||||
#if defined(__GNUC__)
|
||||
#define Y_FAKE_READ(X) \
|
||||
do { \
|
||||
__asm__ __volatile__("" \
|
||||
: \
|
||||
: "m"(X)); \
|
||||
} while (0)
|
||||
|
||||
#define Y_FAKE_WRITE(X) \
|
||||
do { \
|
||||
__asm__ __volatile__("" \
|
||||
: "=m"(X)); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#if !defined(Y_FAKE_READ)
|
||||
#define Y_FAKE_READ(X)
|
||||
#endif
|
||||
|
||||
#if !defined(Y_FAKE_WRITE)
|
||||
#define Y_FAKE_WRITE(X)
|
||||
#endif
|
||||
|
||||
#ifndef Y_PREFETCH_READ
|
||||
#define Y_PREFETCH_READ(Pointer, Priority) (void)(const void*)(Pointer), (void)Priority
|
||||
#endif
|
||||
|
||||
#ifndef Y_PREFETCH_WRITE
|
||||
#define Y_PREFETCH_WRITE(Pointer, Priority) (void)(const void*)(Pointer), (void)Priority
|
||||
#endif
|
||||
|
||||
#ifndef Y_LIKELY
|
||||
#define Y_LIKELY(Cond) (Cond)
|
||||
#define Y_UNLIKELY(Cond) (Cond)
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define _packed __attribute__((packed))
|
||||
#else
|
||||
#define _packed
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define Y_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
|
||||
#endif
|
||||
|
||||
#ifndef Y_WARN_UNUSED_RESULT
|
||||
#define Y_WARN_UNUSED_RESULT
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define Y_HIDDEN __attribute__((visibility("hidden")))
|
||||
#endif
|
||||
|
||||
#if !defined(Y_HIDDEN)
|
||||
#define Y_HIDDEN
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define Y_PUBLIC __attribute__((visibility("default")))
|
||||
#endif
|
||||
|
||||
#if !defined(Y_PUBLIC)
|
||||
#define Y_PUBLIC
|
||||
#endif
|
||||
|
||||
#if !defined(Y_UNUSED) && !defined(__cplusplus)
|
||||
#define Y_UNUSED(var) (void)(var)
|
||||
#endif
|
||||
#if !defined(Y_UNUSED) && defined(__cplusplus)
|
||||
template <class... Types>
|
||||
constexpr Y_FORCE_INLINE int Y_UNUSED(Types&&...) {
|
||||
return 0;
|
||||
};
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @def Y_ASSUME
|
||||
*
|
||||
* Macro that tells the compiler that it can generate optimized code
|
||||
* as if the given expression will always evaluate true.
|
||||
* The behavior is undefined if it ever evaluates false.
|
||||
*
|
||||
* @code
|
||||
* // factored into a function so that it's testable
|
||||
* inline int Avg(int x, int y) {
|
||||
* if (x >= 0 && y >= 0) {
|
||||
* return (static_cast<unsigned>(x) + static_cast<unsigned>(y)) >> 1;
|
||||
* } else {
|
||||
* // a slower implementation
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* // we know that xs and ys are non-negative from domain knowledge,
|
||||
* // but we can't change the types of xs and ys because of API constrains
|
||||
* int Foo(const TVector<int>& xs, const TVector<int>& ys) {
|
||||
* TVector<int> avgs;
|
||||
* avgs.resize(xs.size());
|
||||
* for (size_t i = 0; i < xs.size(); ++i) {
|
||||
* auto x = xs[i];
|
||||
* auto y = ys[i];
|
||||
* Y_ASSUME(x >= 0);
|
||||
* Y_ASSUME(y >= 0);
|
||||
* xs[i] = Avg(x, y);
|
||||
* }
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
#if defined(__GNUC__)
|
||||
#define Y_ASSUME(condition) ((condition) ? (void)0 : __builtin_unreachable())
|
||||
#elif defined(_MSC_VER)
|
||||
#define Y_ASSUME(condition) __assume(condition)
|
||||
#else
|
||||
#define Y_ASSUME(condition) Y_UNUSED(condition)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
[[noreturn]]
|
||||
#endif
|
||||
Y_HIDDEN void _YandexAbort();
|
||||
|
||||
/**
|
||||
* @def Y_UNREACHABLE
|
||||
*
|
||||
* Macro that marks the rest of the code branch unreachable.
|
||||
* The behavior is undefined if it's ever reached.
|
||||
*
|
||||
* @code
|
||||
* switch (i % 3) {
|
||||
* case 0:
|
||||
* return foo;
|
||||
* case 1:
|
||||
* return bar;
|
||||
* case 2:
|
||||
* return baz;
|
||||
* default:
|
||||
* Y_UNREACHABLE();
|
||||
* }
|
||||
* @endcode
|
||||
*/
|
||||
#if defined(__GNUC__) || defined(_MSC_VER)
|
||||
#define Y_UNREACHABLE() Y_ASSUME(0)
|
||||
#else
|
||||
#define Y_UNREACHABLE() _YandexAbort()
|
||||
#endif
|
||||
|
||||
#if defined(undefined_sanitizer_enabled)
|
||||
#define _ubsan_enabled_
|
||||
#endif
|
||||
|
||||
#ifdef __clang__
|
||||
|
||||
#if __has_feature(thread_sanitizer)
|
||||
#define _tsan_enabled_
|
||||
#endif
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#define _msan_enabled_
|
||||
#endif
|
||||
#if __has_feature(address_sanitizer)
|
||||
#define _asan_enabled_
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if defined(thread_sanitizer_enabled) || defined(__SANITIZE_THREAD__)
|
||||
#define _tsan_enabled_
|
||||
#endif
|
||||
#if defined(memory_sanitizer_enabled)
|
||||
#define _msan_enabled_
|
||||
#endif
|
||||
#if defined(address_sanitizer_enabled) || defined(__SANITIZE_ADDRESS__)
|
||||
#define _asan_enabled_
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(_asan_enabled_) || defined(_msan_enabled_) || defined(_tsan_enabled_) || defined(_ubsan_enabled_)
|
||||
#define _san_enabled_
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define __PRETTY_FUNCTION__ __FUNCSIG__
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define Y_WEAK __attribute__((weak))
|
||||
#else
|
||||
#define Y_WEAK
|
||||
#endif
|
||||
|
||||
#if defined(__CUDACC_VER_MAJOR__)
|
||||
#define Y_CUDA_AT_LEAST(x, y) (__CUDACC_VER_MAJOR__ > x || (__CUDACC_VER_MAJOR__ == x && __CUDACC_VER_MINOR__ >= y))
|
||||
#else
|
||||
#define Y_CUDA_AT_LEAST(x, y) 0
|
||||
#endif
|
||||
|
||||
// NVidia CUDA C++ Compiler did not know about noexcept keyword until version 9.0
|
||||
#if !Y_CUDA_AT_LEAST(9, 0)
|
||||
#if defined(__CUDACC__) && !defined(noexcept)
|
||||
#define noexcept throw ()
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define Y_COLD __attribute__((cold))
|
||||
#define Y_LEAF __attribute__((leaf))
|
||||
#define Y_WRAPPER __attribute__((artificial))
|
||||
#else
|
||||
#define Y_COLD
|
||||
#define Y_LEAF
|
||||
#define Y_WRAPPER
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @def Y_PRAGMA
|
||||
*
|
||||
* Macro for use in other macros to define compiler pragma
|
||||
* See below for other usage examples
|
||||
*
|
||||
* @code
|
||||
* #if defined(__clang__) || defined(__GNUC__)
|
||||
* #define Y_PRAGMA_NO_WSHADOW \
|
||||
* Y_PRAGMA("GCC diagnostic ignored \"-Wshadow\"")
|
||||
* #elif defined(_MSC_VER)
|
||||
* #define Y_PRAGMA_NO_WSHADOW \
|
||||
* Y_PRAGMA("warning(disable:4456 4457")
|
||||
* #else
|
||||
* #define Y_PRAGMA_NO_WSHADOW
|
||||
* #endif
|
||||
* @endcode
|
||||
*/
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define Y_PRAGMA(x) _Pragma(x)
|
||||
#elif defined(_MSC_VER)
|
||||
#define Y_PRAGMA(x) __pragma(x)
|
||||
#else
|
||||
#define Y_PRAGMA(x)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @def Y_PRAGMA_DIAGNOSTIC_PUSH
|
||||
*
|
||||
* Cross-compiler pragma to save diagnostic settings
|
||||
*
|
||||
* @see
|
||||
* GCC: https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Pragmas.html
|
||||
* MSVC: https://msdn.microsoft.com/en-us/library/2c8f766e.aspx
|
||||
* Clang: https://clang.llvm.org/docs/UsersManual.html#controlling-diagnostics-via-pragmas
|
||||
*
|
||||
* @code
|
||||
* Y_PRAGMA_DIAGNOSTIC_PUSH
|
||||
* @endcode
|
||||
*/
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define Y_PRAGMA_DIAGNOSTIC_PUSH \
|
||||
Y_PRAGMA("GCC diagnostic push")
|
||||
#elif defined(_MSC_VER)
|
||||
#define Y_PRAGMA_DIAGNOSTIC_PUSH \
|
||||
Y_PRAGMA(warning(push))
|
||||
#else
|
||||
#define Y_PRAGMA_DIAGNOSTIC_PUSH
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @def Y_PRAGMA_DIAGNOSTIC_POP
|
||||
*
|
||||
* Cross-compiler pragma to restore diagnostic settings
|
||||
*
|
||||
* @see
|
||||
* GCC: https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Pragmas.html
|
||||
* MSVC: https://msdn.microsoft.com/en-us/library/2c8f766e.aspx
|
||||
* Clang: https://clang.llvm.org/docs/UsersManual.html#controlling-diagnostics-via-pragmas
|
||||
*
|
||||
* @code
|
||||
* Y_PRAGMA_DIAGNOSTIC_POP
|
||||
* @endcode
|
||||
*/
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define Y_PRAGMA_DIAGNOSTIC_POP \
|
||||
Y_PRAGMA("GCC diagnostic pop")
|
||||
#elif defined(_MSC_VER)
|
||||
#define Y_PRAGMA_DIAGNOSTIC_POP \
|
||||
Y_PRAGMA(warning(pop))
|
||||
#else
|
||||
#define Y_PRAGMA_DIAGNOSTIC_POP
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @def Y_PRAGMA_NO_WSHADOW
|
||||
*
|
||||
* Cross-compiler pragma to disable warnings about shadowing variables
|
||||
*
|
||||
* @code
|
||||
* Y_PRAGMA_DIAGNOSTIC_PUSH
|
||||
* Y_PRAGMA_NO_WSHADOW
|
||||
*
|
||||
* // some code which use variable shadowing, e.g.:
|
||||
*
|
||||
* for (int i = 0; i < 100; ++i) {
|
||||
* Use(i);
|
||||
*
|
||||
* for (int i = 42; i < 100500; ++i) { // this i is shadowing previous i
|
||||
* AnotherUse(i);
|
||||
* }
|
||||
* }
|
||||
*
|
||||
* Y_PRAGMA_DIAGNOSTIC_POP
|
||||
* @endcode
|
||||
*/
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define Y_PRAGMA_NO_WSHADOW \
|
||||
Y_PRAGMA("GCC diagnostic ignored \"-Wshadow\"")
|
||||
#elif defined(_MSC_VER)
|
||||
#define Y_PRAGMA_NO_WSHADOW \
|
||||
Y_PRAGMA(warning(disable : 4456 4457))
|
||||
#else
|
||||
#define Y_PRAGMA_NO_WSHADOW
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @ def Y_PRAGMA_NO_UNUSED_FUNCTION
|
||||
*
|
||||
* Cross-compiler pragma to disable warnings about unused functions
|
||||
*
|
||||
* @see
|
||||
* GCC: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
|
||||
* Clang: https://clang.llvm.org/docs/DiagnosticsReference.html#wunused-function
|
||||
* MSVC: there is no such warning
|
||||
*
|
||||
* @code
|
||||
* Y_PRAGMA_DIAGNOSTIC_PUSH
|
||||
* Y_PRAGMA_NO_UNUSED_FUNCTION
|
||||
*
|
||||
* // some code which introduces a function which later will not be used, e.g.:
|
||||
*
|
||||
* void Foo() {
|
||||
* }
|
||||
*
|
||||
* int main() {
|
||||
* return 0; // Foo() never called
|
||||
* }
|
||||
*
|
||||
* Y_PRAGMA_DIAGNOSTIC_POP
|
||||
* @endcode
|
||||
*/
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define Y_PRAGMA_NO_UNUSED_FUNCTION \
|
||||
Y_PRAGMA("GCC diagnostic ignored \"-Wunused-function\"")
|
||||
#else
|
||||
#define Y_PRAGMA_NO_UNUSED_FUNCTION
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @ def Y_PRAGMA_NO_UNUSED_PARAMETER
|
||||
*
|
||||
* Cross-compiler pragma to disable warnings about unused function parameters
|
||||
*
|
||||
* @see
|
||||
* GCC: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
|
||||
* Clang: https://clang.llvm.org/docs/DiagnosticsReference.html#wunused-parameter
|
||||
* MSVC: https://msdn.microsoft.com/en-us/library/26kb9fy0.aspx
|
||||
*
|
||||
* @code
|
||||
* Y_PRAGMA_DIAGNOSTIC_PUSH
|
||||
* Y_PRAGMA_NO_UNUSED_PARAMETER
|
||||
*
|
||||
* // some code which introduces a function with unused parameter, e.g.:
|
||||
*
|
||||
* void foo(int a) {
|
||||
* // a is not referenced
|
||||
* }
|
||||
*
|
||||
* int main() {
|
||||
* foo(1);
|
||||
* return 0;
|
||||
* }
|
||||
*
|
||||
* Y_PRAGMA_DIAGNOSTIC_POP
|
||||
* @endcode
|
||||
*/
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define Y_PRAGMA_NO_UNUSED_PARAMETER \
|
||||
Y_PRAGMA("GCC diagnostic ignored \"-Wunused-parameter\"")
|
||||
#elif defined(_MSC_VER)
|
||||
#define Y_PRAGMA_NO_UNUSED_PARAMETER \
|
||||
Y_PRAGMA(warning(disable : 4100))
|
||||
#else
|
||||
#define Y_PRAGMA_NO_UNUSED_PARAMETER
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @def Y_PRAGMA_NO_DEPRECATED
|
||||
*
|
||||
* Cross compiler pragma to disable warnings and errors about deprecated
|
||||
*
|
||||
* @see
|
||||
* GCC: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
|
||||
* Clang: https://clang.llvm.org/docs/DiagnosticsReference.html#wdeprecated
|
||||
* MSVC: https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-3-c4996?view=vs-2017
|
||||
*
|
||||
* @code
|
||||
* Y_PRAGMA_DIAGNOSTIC_PUSH
|
||||
* Y_PRAGMA_NO_DEPRECATED
|
||||
*
|
||||
* [deprecated] void foo() {
|
||||
* // ...
|
||||
* }
|
||||
*
|
||||
* int main() {
|
||||
* foo();
|
||||
* return 0;
|
||||
* }
|
||||
*
|
||||
* Y_PRAGMA_DIAGNOSTIC_POP
|
||||
* @endcode
|
||||
*/
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
#define Y_PRAGMA_NO_DEPRECATED \
|
||||
Y_PRAGMA("GCC diagnostic ignored \"-Wdeprecated\"")
|
||||
#elif defined(_MSC_VER)
|
||||
#define Y_PRAGMA_NO_DEPRECATED \
|
||||
Y_PRAGMA(warning(disable : 4996))
|
||||
#else
|
||||
#define Y_PRAGMA_NO_DEPRECATED
|
||||
#endif
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
/**
|
||||
* @def Y_CONST_FUNCTION
|
||||
methods and functions, marked with this method are promised to:
|
||||
1. do not have side effects
|
||||
2. this method do not read global memory
|
||||
NOTE: this attribute can't be set for methods that depend on data, pointed by this
|
||||
this allow compilers to do hard optimization of that functions
|
||||
NOTE: in common case this attribute can't be set if method have pointer-arguments
|
||||
NOTE: as result there no any reason to discard result of such method
|
||||
*/
|
||||
#define Y_CONST_FUNCTION [[gnu::const]]
|
||||
#endif
|
||||
|
||||
#if !defined(Y_CONST_FUNCTION)
|
||||
#define Y_CONST_FUNCTION
|
||||
#endif
|
||||
|
||||
#if defined(__clang__) || defined(__GNUC__)
|
||||
/**
|
||||
* @def Y_PURE_FUNCTION
|
||||
methods and functions, marked with this method are promised to:
|
||||
1. do not have side effects
|
||||
2. result will be the same if no global memory changed
|
||||
this allow compilers to do hard optimization of that functions
|
||||
NOTE: as result there no any reason to discard result of such method
|
||||
*/
|
||||
#define Y_PURE_FUNCTION [[gnu::pure]]
|
||||
#endif
|
||||
|
||||
#if !defined(Y_PURE_FUNCTION)
|
||||
#define Y_PURE_FUNCTION
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @ def Y_HAVE_INT128
|
||||
*
|
||||
* Defined when the compiler supports __int128 extension
|
||||
*
|
||||
* @code
|
||||
*
|
||||
* #if defined(Y_HAVE_INT128)
|
||||
* __int128 myVeryBigInt = 12345678901234567890;
|
||||
* #endif
|
||||
*
|
||||
* @endcode
|
||||
*/
|
||||
#if defined(__SIZEOF_INT128__)
|
||||
#define Y_HAVE_INT128 1
|
||||
#endif
|
||||
|
||||
/**
|
||||
* XRAY macro must be passed to compiler if XRay is enabled.
|
||||
*
|
||||
* Define everything XRay-specific as a macro so that it doesn't cause errors
|
||||
* for compilers that doesn't support XRay.
|
||||
*/
|
||||
#if defined(XRAY) && defined(__cplusplus)
|
||||
#include <xray/xray_interface.h>
|
||||
#define Y_XRAY_ALWAYS_INSTRUMENT [[clang::xray_always_instrument]]
|
||||
#define Y_XRAY_NEVER_INSTRUMENT [[clang::xray_never_instrument]]
|
||||
#define Y_XRAY_CUSTOM_EVENT(__string, __length) \
|
||||
do { \
|
||||
__xray_customevent(__string, __length); \
|
||||
} while (0)
|
||||
#else
|
||||
#define Y_XRAY_ALWAYS_INSTRUMENT
|
||||
#define Y_XRAY_NEVER_INSTRUMENT
|
||||
#define Y_XRAY_CUSTOM_EVENT(__string, __length) \
|
||||
do { \
|
||||
} while (0)
|
||||
#endif
|
168
contrib/lfalloc/src/util/system/defaults.h
Normal file
168
contrib/lfalloc/src/util/system/defaults.h
Normal file
@ -0,0 +1,168 @@
|
||||
#pragma once
|
||||
|
||||
#include "platform.h"
|
||||
|
||||
#if defined _unix_
|
||||
#define LOCSLASH_C '/'
|
||||
#define LOCSLASH_S "/"
|
||||
#else
|
||||
#define LOCSLASH_C '\\'
|
||||
#define LOCSLASH_S "\\"
|
||||
#endif // _unix_
|
||||
|
||||
#if defined(__INTEL_COMPILER) && defined(__cplusplus)
|
||||
#include <new>
|
||||
#endif
|
||||
|
||||
// low and high parts of integers
|
||||
#if !defined(_win_)
|
||||
#include <sys/param.h>
|
||||
#endif
|
||||
|
||||
#if defined(BSD) || defined(_android_)
|
||||
|
||||
#if defined(BSD)
|
||||
#include <machine/endian.h>
|
||||
#endif
|
||||
|
||||
#if defined(_android_)
|
||||
#include <endian.h>
|
||||
#endif
|
||||
|
||||
#if (BYTE_ORDER == LITTLE_ENDIAN)
|
||||
#define _little_endian_
|
||||
#elif (BYTE_ORDER == BIG_ENDIAN)
|
||||
#define _big_endian_
|
||||
#else
|
||||
#error unknown endian not supported
|
||||
#endif
|
||||
|
||||
#elif (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(WHATEVER_THAT_HAS_BIG_ENDIAN)
|
||||
#define _big_endian_
|
||||
#else
|
||||
#define _little_endian_
|
||||
#endif
|
||||
|
||||
// alignment
|
||||
#if (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(__alpha__) || defined(__ia64__) || defined(WHATEVER_THAT_NEEDS_ALIGNING_QUADS)
|
||||
#define _must_align8_
|
||||
#endif
|
||||
|
||||
#if (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(__alpha__) || defined(__ia64__) || defined(WHATEVER_THAT_NEEDS_ALIGNING_LONGS)
|
||||
#define _must_align4_
|
||||
#endif
|
||||
|
||||
#if (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(__alpha__) || defined(__ia64__) || defined(WHATEVER_THAT_NEEDS_ALIGNING_SHORTS)
|
||||
#define _must_align2_
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define alias_hack __attribute__((__may_alias__))
|
||||
#endif
|
||||
|
||||
#ifndef alias_hack
|
||||
#define alias_hack
|
||||
#endif
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
|
||||
#define PRAGMA(x) _Pragma(#x)
|
||||
#define RCSID(idstr) PRAGMA(comment(exestr, idstr))
|
||||
#else
|
||||
#define RCSID(idstr) static const char rcsid[] = idstr
|
||||
#endif
|
||||
|
||||
#include "compiler.h"
|
||||
|
||||
#ifdef _win_
|
||||
#include <malloc.h>
|
||||
#elif defined(_sun_)
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define Y_IF_DEBUG(X)
|
||||
#else
|
||||
#define Y_IF_DEBUG(X) X
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @def Y_ARRAY_SIZE
|
||||
*
|
||||
* This macro is needed to get number of elements in a statically allocated fixed size array. The
|
||||
* expression is a compile-time constant and therefore can be used in compile time computations.
|
||||
*
|
||||
* @code
|
||||
* enum ENumbers {
|
||||
* EN_ONE,
|
||||
* EN_TWO,
|
||||
* EN_SIZE
|
||||
* }
|
||||
*
|
||||
* const char* NAMES[] = {
|
||||
* "one",
|
||||
* "two"
|
||||
* }
|
||||
*
|
||||
* static_assert(Y_ARRAY_SIZE(NAMES) == EN_SIZE, "you should define `NAME` for each enumeration");
|
||||
* @endcode
|
||||
*
|
||||
* This macro also catches type errors. If you see a compiler error like "warning: division by zero
|
||||
* is undefined" when using `Y_ARRAY_SIZE` then you are probably giving it a pointer.
|
||||
*
|
||||
* Since all of our code is expected to work on a 64 bit platform where pointers are 8 bytes we may
|
||||
* falsefully accept pointers to types of sizes that are divisors of 8 (1, 2, 4 and 8).
|
||||
*/
|
||||
#if defined(__cplusplus)
|
||||
namespace NArraySizePrivate {
|
||||
template <class T>
|
||||
struct TArraySize;
|
||||
|
||||
template <class T, size_t N>
|
||||
struct TArraySize<T[N]> {
|
||||
enum {
|
||||
Result = N
|
||||
};
|
||||
};
|
||||
|
||||
template <class T, size_t N>
|
||||
struct TArraySize<T (&)[N]> {
|
||||
enum {
|
||||
Result = N
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
#define Y_ARRAY_SIZE(arr) ((size_t)::NArraySizePrivate::TArraySize<decltype(arr)>::Result)
|
||||
#else
|
||||
#undef Y_ARRAY_SIZE
|
||||
#define Y_ARRAY_SIZE(arr) \
|
||||
((sizeof(arr) / sizeof((arr)[0])) / static_cast<size_t>(!(sizeof(arr) % sizeof((arr)[0]))))
|
||||
#endif
|
||||
|
||||
#undef Y_ARRAY_BEGIN
|
||||
#define Y_ARRAY_BEGIN(arr) (arr)
|
||||
|
||||
#undef Y_ARRAY_END
|
||||
#define Y_ARRAY_END(arr) ((arr) + Y_ARRAY_SIZE(arr))
|
||||
|
||||
/**
|
||||
* Concatenates two symbols, even if one of them is itself a macro.
|
||||
*/
|
||||
#define Y_CAT(X, Y) Y_CAT_I(X, Y)
|
||||
#define Y_CAT_I(X, Y) Y_CAT_II(X, Y)
|
||||
#define Y_CAT_II(X, Y) X##Y
|
||||
|
||||
#define Y_STRINGIZE(X) UTIL_PRIVATE_STRINGIZE_AUX(X)
|
||||
#define UTIL_PRIVATE_STRINGIZE_AUX(X) #X
|
||||
|
||||
#if defined(__COUNTER__)
|
||||
#define Y_GENERATE_UNIQUE_ID(N) Y_CAT(N, __COUNTER__)
|
||||
#endif
|
||||
|
||||
#if !defined(Y_GENERATE_UNIQUE_ID)
|
||||
#define Y_GENERATE_UNIQUE_ID(N) Y_CAT(N, __LINE__)
|
||||
#endif
|
||||
|
||||
#define NPOS ((size_t)-1)
|
242
contrib/lfalloc/src/util/system/platform.h
Normal file
242
contrib/lfalloc/src/util/system/platform.h
Normal file
@ -0,0 +1,242 @@
|
||||
#pragma once
|
||||
|
||||
// What OS ?
|
||||
// our definition has the form _{osname}_
|
||||
|
||||
#if defined(_WIN64)
|
||||
#define _win64_
|
||||
#define _win32_
|
||||
#elif defined(__WIN32__) || defined(_WIN32) // _WIN32 is also defined by the 64-bit compiler for backward compatibility
|
||||
#define _win32_
|
||||
#else
|
||||
#define _unix_
|
||||
#if defined(__sun__) || defined(sun) || defined(sparc) || defined(__sparc)
|
||||
#define _sun_
|
||||
#endif
|
||||
#if defined(__hpux__)
|
||||
#define _hpux_
|
||||
#endif
|
||||
#if defined(__linux__)
|
||||
#define _linux_
|
||||
#endif
|
||||
#if defined(__FreeBSD__)
|
||||
#define _freebsd_
|
||||
#endif
|
||||
#if defined(__CYGWIN__)
|
||||
#define _cygwin_
|
||||
#endif
|
||||
#if defined(__APPLE__)
|
||||
#define _darwin_
|
||||
#endif
|
||||
#if defined(__ANDROID__)
|
||||
#define _android_
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__IOS__)
|
||||
#define _ios_
|
||||
#endif
|
||||
|
||||
#if defined(_linux_)
|
||||
#if defined(_musl_)
|
||||
//nothing to do
|
||||
#elif defined(_android_)
|
||||
#define _bionic_
|
||||
#else
|
||||
#define _glibc_
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_darwin_)
|
||||
#define unix
|
||||
#define __unix__
|
||||
#endif
|
||||
|
||||
#if defined(_win32_) || defined(_win64_)
|
||||
#define _win_
|
||||
#endif
|
||||
|
||||
#if defined(__arm__) || defined(__ARM__) || defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM)
|
||||
#if defined(__arm64) || defined(__arm64__) || defined(__aarch64__)
|
||||
#define _arm64_
|
||||
#else
|
||||
#define _arm32_
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(_arm64_) || defined(_arm32_)
|
||||
#define _arm_
|
||||
#endif
|
||||
|
||||
/* __ia64__ and __x86_64__ - defined by GNU C.
|
||||
* _M_IA64, _M_X64, _M_AMD64 - defined by Visual Studio.
|
||||
*
|
||||
* Microsoft can define _M_IX86, _M_AMD64 (before Visual Studio 8)
|
||||
* or _M_X64 (starting in Visual Studio 8).
|
||||
*/
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64)
|
||||
#define _x86_64_
|
||||
#endif
|
||||
|
||||
#if defined(__i386__) || defined(_M_IX86)
|
||||
#define _i386_
|
||||
#endif
|
||||
|
||||
#if defined(__ia64__) || defined(_M_IA64)
|
||||
#define _ia64_
|
||||
#endif
|
||||
|
||||
#if defined(__powerpc__)
|
||||
#define _ppc_
|
||||
#endif
|
||||
|
||||
#if defined(__powerpc64__)
|
||||
#define _ppc64_
|
||||
#endif
|
||||
|
||||
#if !defined(sparc) && !defined(__sparc) && !defined(__hpux__) && !defined(__alpha__) && !defined(_ia64_) && !defined(_x86_64_) && !defined(_arm_) && !defined(_i386_) && !defined(_ppc_) && !defined(_ppc64_)
|
||||
#error "platform not defined, please, define one"
|
||||
#endif
|
||||
|
||||
#if defined(_x86_64_) || defined(_i386_)
|
||||
#define _x86_
|
||||
#endif
|
||||
|
||||
#if defined(__MIC__)
|
||||
#define _mic_
|
||||
#define _k1om_
|
||||
#endif
|
||||
|
||||
// stdio or MessageBox
|
||||
#if defined(__CONSOLE__) || defined(_CONSOLE)
|
||||
#define _console_
|
||||
#endif
|
||||
#if (defined(_win_) && !defined(_console_))
|
||||
#define _windows_
|
||||
#elif !defined(_console_)
|
||||
#define _console_
|
||||
#endif
|
||||
|
||||
#if defined(__SSE__) || defined(SSE_ENABLED)
|
||||
#define _sse_
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__) || defined(SSE2_ENABLED)
|
||||
#define _sse2_
|
||||
#endif
|
||||
|
||||
#if defined(__SSE3__) || defined(SSE3_ENABLED)
|
||||
#define _sse3_
|
||||
#endif
|
||||
|
||||
#if defined(__SSSE3__) || defined(SSSE3_ENABLED)
|
||||
#define _ssse3_
|
||||
#endif
|
||||
|
||||
#if defined(POPCNT_ENABLED)
|
||||
#define _popcnt_
|
||||
#endif
|
||||
|
||||
#if defined(__DLL__) || defined(_DLL)
|
||||
#define _dll_
|
||||
#endif
|
||||
|
||||
// 16, 32 or 64
|
||||
#if defined(__sparc_v9__) || defined(_x86_64_) || defined(_ia64_) || defined(_arm64_) || defined(_ppc64_)
|
||||
#define _64_
|
||||
#else
|
||||
#define _32_
|
||||
#endif
|
||||
|
||||
/* All modern 64-bit Unix systems use scheme LP64 (long, pointers are 64-bit).
|
||||
* Microsoft uses a different scheme: LLP64 (long long, pointers are 64-bit).
|
||||
*
|
||||
* Scheme LP64 LLP64
|
||||
* char 8 8
|
||||
* short 16 16
|
||||
* int 32 32
|
||||
* long 64 32
|
||||
* long long 64 64
|
||||
* pointer 64 64
|
||||
*/
|
||||
|
||||
#if defined(_32_)
|
||||
#define SIZEOF_PTR 4
|
||||
#elif defined(_64_)
|
||||
#define SIZEOF_PTR 8
|
||||
#endif
|
||||
|
||||
#define PLATFORM_DATA_ALIGN SIZEOF_PTR
|
||||
|
||||
#if !defined(SIZEOF_PTR)
|
||||
#error todo
|
||||
#endif
|
||||
|
||||
#define SIZEOF_CHAR 1
|
||||
#define SIZEOF_UNSIGNED_CHAR 1
|
||||
#define SIZEOF_SHORT 2
|
||||
#define SIZEOF_UNSIGNED_SHORT 2
|
||||
#define SIZEOF_INT 4
|
||||
#define SIZEOF_UNSIGNED_INT 4
|
||||
|
||||
#if defined(_32_)
|
||||
#define SIZEOF_LONG 4
|
||||
#define SIZEOF_UNSIGNED_LONG 4
|
||||
#elif defined(_64_)
|
||||
#if defined(_win_)
|
||||
#define SIZEOF_LONG 4
|
||||
#define SIZEOF_UNSIGNED_LONG 4
|
||||
#else
|
||||
#define SIZEOF_LONG 8
|
||||
#define SIZEOF_UNSIGNED_LONG 8
|
||||
#endif // _win_
|
||||
#endif // _32_
|
||||
|
||||
#if !defined(SIZEOF_LONG)
|
||||
#error todo
|
||||
#endif
|
||||
|
||||
#define SIZEOF_LONG_LONG 8
|
||||
#define SIZEOF_UNSIGNED_LONG_LONG 8
|
||||
|
||||
#undef SIZEOF_SIZE_T // in case we include <Python.h> which defines it, too
|
||||
#define SIZEOF_SIZE_T SIZEOF_PTR
|
||||
|
||||
#if defined(__INTEL_COMPILER)
|
||||
#pragma warning(disable 1292)
|
||||
#pragma warning(disable 1469)
|
||||
#pragma warning(disable 193)
|
||||
#pragma warning(disable 271)
|
||||
#pragma warning(disable 383)
|
||||
#pragma warning(disable 424)
|
||||
#pragma warning(disable 444)
|
||||
#pragma warning(disable 584)
|
||||
#pragma warning(disable 593)
|
||||
#pragma warning(disable 981)
|
||||
#pragma warning(disable 1418)
|
||||
#pragma warning(disable 304)
|
||||
#pragma warning(disable 810)
|
||||
#pragma warning(disable 1029)
|
||||
#pragma warning(disable 1419)
|
||||
#pragma warning(disable 177)
|
||||
#pragma warning(disable 522)
|
||||
#pragma warning(disable 858)
|
||||
#pragma warning(disable 111)
|
||||
#pragma warning(disable 1599)
|
||||
#pragma warning(disable 411)
|
||||
#pragma warning(disable 304)
|
||||
#pragma warning(disable 858)
|
||||
#pragma warning(disable 444)
|
||||
#pragma warning(disable 913)
|
||||
#pragma warning(disable 310)
|
||||
#pragma warning(disable 167)
|
||||
#pragma warning(disable 180)
|
||||
#pragma warning(disable 1572)
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#undef _WINSOCKAPI_
|
||||
#define _WINSOCKAPI_
|
||||
#undef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
117
contrib/lfalloc/src/util/system/types.h
Normal file
117
contrib/lfalloc/src/util/system/types.h
Normal file
@ -0,0 +1,117 @@
|
||||
#pragma once
|
||||
|
||||
// DO_NOT_STYLE
|
||||
|
||||
#include "platform.h"
|
||||
|
||||
#include <inttypes.h>
|
||||
|
||||
typedef int8_t i8;
|
||||
typedef int16_t i16;
|
||||
typedef uint8_t ui8;
|
||||
typedef uint16_t ui16;
|
||||
|
||||
typedef int yssize_t;
|
||||
#define PRIYSZT "d"
|
||||
|
||||
#if defined(_darwin_) && defined(_32_)
|
||||
typedef unsigned long ui32;
|
||||
typedef long i32;
|
||||
#else
|
||||
typedef uint32_t ui32;
|
||||
typedef int32_t i32;
|
||||
#endif
|
||||
|
||||
#if defined(_darwin_) && defined(_64_)
|
||||
typedef unsigned long ui64;
|
||||
typedef long i64;
|
||||
#else
|
||||
typedef uint64_t ui64;
|
||||
typedef int64_t i64;
|
||||
#endif
|
||||
|
||||
#define LL(number) INT64_C(number)
|
||||
#define ULL(number) UINT64_C(number)
|
||||
|
||||
// Macro for size_t and ptrdiff_t types
|
||||
#if defined(_32_)
|
||||
# if defined(_darwin_)
|
||||
# define PRISZT "lu"
|
||||
# undef PRIi32
|
||||
# define PRIi32 "li"
|
||||
# undef SCNi32
|
||||
# define SCNi32 "li"
|
||||
# undef PRId32
|
||||
# define PRId32 "li"
|
||||
# undef SCNd32
|
||||
# define SCNd32 "li"
|
||||
# undef PRIu32
|
||||
# define PRIu32 "lu"
|
||||
# undef SCNu32
|
||||
# define SCNu32 "lu"
|
||||
# undef PRIx32
|
||||
# define PRIx32 "lx"
|
||||
# undef SCNx32
|
||||
# define SCNx32 "lx"
|
||||
# elif !defined(_cygwin_)
|
||||
# define PRISZT PRIu32
|
||||
# else
|
||||
# define PRISZT "u"
|
||||
# endif
|
||||
# define SCNSZT SCNu32
|
||||
# define PRIPDT PRIi32
|
||||
# define SCNPDT SCNi32
|
||||
# define PRITMT PRIi32
|
||||
# define SCNTMT SCNi32
|
||||
#elif defined(_64_)
|
||||
# if defined(_darwin_)
|
||||
# define PRISZT "lu"
|
||||
# undef PRIu64
|
||||
# define PRIu64 PRISZT
|
||||
# undef PRIx64
|
||||
# define PRIx64 "lx"
|
||||
# undef PRIX64
|
||||
# define PRIX64 "lX"
|
||||
# undef PRId64
|
||||
# define PRId64 "ld"
|
||||
# undef PRIi64
|
||||
# define PRIi64 "li"
|
||||
# undef SCNi64
|
||||
# define SCNi64 "li"
|
||||
# undef SCNu64
|
||||
# define SCNu64 "lu"
|
||||
# undef SCNx64
|
||||
# define SCNx64 "lx"
|
||||
# else
|
||||
# define PRISZT PRIu64
|
||||
# endif
|
||||
# define SCNSZT SCNu64
|
||||
# define PRIPDT PRIi64
|
||||
# define SCNPDT SCNi64
|
||||
# define PRITMT PRIi64
|
||||
# define SCNTMT SCNi64
|
||||
#else
|
||||
# error "Unsupported platform"
|
||||
#endif
|
||||
|
||||
// SUPERLONG
|
||||
#if !defined(DONT_USE_SUPERLONG) && !defined(SUPERLONG_MAX)
|
||||
#define SUPERLONG_MAX ~LL(0)
|
||||
typedef i64 SUPERLONG;
|
||||
#endif
|
||||
|
||||
// UNICODE
|
||||
// UCS-2, native byteorder
|
||||
typedef ui16 wchar16;
|
||||
// internal symbol type: UTF-16LE
|
||||
typedef wchar16 TChar;
|
||||
typedef ui32 wchar32;
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <basetsd.h>
|
||||
typedef SSIZE_T ssize_t;
|
||||
#define HAVE_SSIZE_T 1
|
||||
#include <wchar.h>
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
2
contrib/poco
vendored
2
contrib/poco
vendored
@ -1 +1 @@
|
||||
Subproject commit fe5505e56c27b6ecb0dcbc40c49dc2caf4e9637f
|
||||
Subproject commit 29439cf7fa32c1a2d62d925bb6d6a3f14668a4a2
|
@ -20,7 +20,7 @@ set (CONFIG_VERSION ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config_version.h)
|
||||
set (CONFIG_COMMON ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config.h)
|
||||
|
||||
include (cmake/version.cmake)
|
||||
message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION}")
|
||||
message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION} ${VERSION_OFFICIAL}")
|
||||
configure_file (src/Common/config.h.in ${CONFIG_COMMON})
|
||||
configure_file (src/Common/config_version.h.in ${CONFIG_VERSION})
|
||||
|
||||
@ -155,7 +155,6 @@ if (USE_EMBEDDED_COMPILER)
|
||||
target_include_directories (dbms SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS})
|
||||
endif ()
|
||||
|
||||
|
||||
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
|
||||
# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
|
||||
set_source_files_properties(
|
||||
@ -214,6 +213,10 @@ target_link_libraries (clickhouse_common_io
|
||||
|
||||
target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR})
|
||||
|
||||
if (USE_LFALLOC)
|
||||
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${LFALLOC_INCLUDE_DIR})
|
||||
endif ()
|
||||
|
||||
if(CPUID_LIBRARY)
|
||||
target_link_libraries(clickhouse_common_io PRIVATE ${CPUID_LIBRARY})
|
||||
endif()
|
||||
|
@ -1,11 +1,11 @@
|
||||
# This strings autochanged from release_lib.sh:
|
||||
set(VERSION_REVISION 54417)
|
||||
set(VERSION_REVISION 54418)
|
||||
set(VERSION_MAJOR 19)
|
||||
set(VERSION_MINOR 5)
|
||||
set(VERSION_MINOR 6)
|
||||
set(VERSION_PATCH 1)
|
||||
set(VERSION_GITHASH 628ed349c335b79a441a1bd6e4bc791d61dfe62c)
|
||||
set(VERSION_DESCRIBE v19.5.1.1-testing)
|
||||
set(VERSION_STRING 19.5.1.1)
|
||||
set(VERSION_GITHASH 30d3496c36cf3945c9828ac0b7cf7d1774a9f845)
|
||||
set(VERSION_DESCRIBE v19.6.1.1-testing)
|
||||
set(VERSION_STRING 19.6.1.1)
|
||||
# end of autochange
|
||||
|
||||
set(VERSION_EXTRA "" CACHE STRING "")
|
||||
@ -24,3 +24,7 @@ set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}")
|
||||
set (VERSION_SO "${VERSION_STRING}")
|
||||
|
||||
math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000")
|
||||
|
||||
if(YANDEX_OFFICIAL_BUILD)
|
||||
set(VERSION_OFFICIAL " (official build)")
|
||||
endif()
|
||||
|
@ -1523,7 +1523,7 @@ private:
|
||||
|
||||
void showClientVersion()
|
||||
{
|
||||
std::cout << DBMS_NAME << " client version " << VERSION_STRING << "." << std::endl;
|
||||
std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include "ClusterCopier.h"
|
||||
|
||||
#include <chrono>
|
||||
|
||||
#include <Poco/Util/XMLConfiguration.h>
|
||||
#include <Poco/Logger.h>
|
||||
#include <Poco/ConsoleChannel.h>
|
||||
@ -13,14 +12,11 @@
|
||||
#include <Poco/FileChannel.h>
|
||||
#include <Poco/SplitterChannel.h>
|
||||
#include <Poco/Util/HelpFormatter.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <pcg_random.hpp>
|
||||
|
||||
#include <common/logger_useful.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <daemon/OwnPatternFormatter.h>
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/ZooKeeper/ZooKeeper.h>
|
||||
#include <Common/ZooKeeper/KeeperException.h>
|
||||
@ -61,6 +57,7 @@
|
||||
#include <DataStreams/NullBlockOutputStream.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <Functions/registerFunctions.h>
|
||||
#include <TableFunctions/registerTableFunctions.h>
|
||||
#include <AggregateFunctions/registerAggregateFunctions.h>
|
||||
@ -500,9 +497,6 @@ static ASTPtr extractPartitionKey(const ASTPtr & storage_ast)
|
||||
ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
|
||||
ASTPtr arguments_ast = engine.arguments->clone();
|
||||
ASTs & arguments = arguments_ast->children;
|
||||
|
||||
if (isExtendedDefinitionStorage(storage_ast))
|
||||
{
|
||||
if (storage.partition_by)
|
||||
@ -516,6 +510,12 @@ static ASTPtr extractPartitionKey(const ASTPtr & storage_ast)
|
||||
bool is_replicated = startsWith(engine.name, "Replicated");
|
||||
size_t min_args = is_replicated ? 3 : 1;
|
||||
|
||||
if (!engine.arguments)
|
||||
throw Exception("Expected arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
ASTPtr arguments_ast = engine.arguments->clone();
|
||||
ASTs & arguments = arguments_ast->children;
|
||||
|
||||
if (arguments.size() < min_args)
|
||||
throw Exception("Expected at least " + toString(min_args) + " arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
@ -894,6 +894,28 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void uploadTaskDescription(const std::string & task_path, const std::string & task_file, const bool force)
|
||||
{
|
||||
auto local_task_description_path = task_path + "/description";
|
||||
|
||||
String task_config_str;
|
||||
{
|
||||
ReadBufferFromFile in(task_file);
|
||||
readStringUntilEOF(task_config_str, in);
|
||||
}
|
||||
if (task_config_str.empty())
|
||||
return;
|
||||
|
||||
auto zookeeper = context.getZooKeeper();
|
||||
|
||||
zookeeper->createAncestors(local_task_description_path);
|
||||
auto code = zookeeper->tryCreate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent);
|
||||
if (code && force)
|
||||
zookeeper->createOrUpdate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent);
|
||||
|
||||
LOG_DEBUG(log, "Task description " << ((code && !force) ? "not " : "") << "uploaded to " << local_task_description_path << " with result " << code << " ("<< zookeeper->error2string(code) << ")");
|
||||
}
|
||||
|
||||
void reloadTaskDescription()
|
||||
{
|
||||
auto zookeeper = context.getZooKeeper();
|
||||
@ -2104,6 +2126,10 @@ void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options)
|
||||
|
||||
options.addOption(Poco::Util::Option("task-path", "", "path to task in ZooKeeper")
|
||||
.argument("task-path").binding("task-path"));
|
||||
options.addOption(Poco::Util::Option("task-file", "", "path to task file for uploading in ZooKeeper to task-path")
|
||||
.argument("task-file").binding("task-file"));
|
||||
options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists")
|
||||
.argument("task-upload-force").binding("task-upload-force"));
|
||||
options.addOption(Poco::Util::Option("safe-mode", "", "disables ALTER DROP PARTITION in case of errors")
|
||||
.binding("safe-mode"));
|
||||
options.addOption(Poco::Util::Option("copy-fault-probability", "", "the copying fails with specified probability (used to test partition state recovering)")
|
||||
@ -2154,6 +2180,11 @@ void ClusterCopierApp::mainImpl()
|
||||
auto copier = std::make_unique<ClusterCopier>(task_path, host_id, default_database, *context);
|
||||
copier->setSafeMode(is_safe_mode);
|
||||
copier->setCopyFaultProbability(copy_fault_probability);
|
||||
|
||||
auto task_file = config().getString("task-file", "");
|
||||
if (!task_file.empty())
|
||||
copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false));
|
||||
|
||||
copier->init();
|
||||
copier->process();
|
||||
}
|
||||
|
@ -369,7 +369,7 @@ void LocalServer::setupUsers()
|
||||
|
||||
static void showClientVersion()
|
||||
{
|
||||
std::cout << DBMS_NAME << " client version " << VERSION_STRING << "." << '\n';
|
||||
std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << '\n';
|
||||
}
|
||||
|
||||
std::string LocalServer::getHelpHeader() const
|
||||
|
@ -296,7 +296,7 @@ void HTTPHandler::processQuery(
|
||||
/// The client can pass a HTTP header indicating supported compression method (gzip or deflate).
|
||||
String http_response_compression_methods = request.get("Accept-Encoding", "");
|
||||
bool client_supports_http_compression = false;
|
||||
ZlibCompressionMethod http_response_compression_method {};
|
||||
CompressionMethod http_response_compression_method {};
|
||||
|
||||
if (!http_response_compression_methods.empty())
|
||||
{
|
||||
@ -305,12 +305,17 @@ void HTTPHandler::processQuery(
|
||||
if (std::string::npos != http_response_compression_methods.find("gzip"))
|
||||
{
|
||||
client_supports_http_compression = true;
|
||||
http_response_compression_method = ZlibCompressionMethod::Gzip;
|
||||
http_response_compression_method = CompressionMethod::Gzip;
|
||||
}
|
||||
else if (std::string::npos != http_response_compression_methods.find("deflate"))
|
||||
{
|
||||
client_supports_http_compression = true;
|
||||
http_response_compression_method = ZlibCompressionMethod::Zlib;
|
||||
http_response_compression_method = CompressionMethod::Zlib;
|
||||
}
|
||||
else if (http_response_compression_methods == "br")
|
||||
{
|
||||
client_supports_http_compression = true;
|
||||
http_response_compression_method = CompressionMethod::Brotli;
|
||||
}
|
||||
}
|
||||
|
||||
@ -394,11 +399,11 @@ void HTTPHandler::processQuery(
|
||||
{
|
||||
if (http_request_compression_method_str == "gzip")
|
||||
{
|
||||
in_post = std::make_unique<ZlibInflatingReadBuffer>(*in_post_raw, ZlibCompressionMethod::Gzip);
|
||||
in_post = std::make_unique<ZlibInflatingReadBuffer>(*in_post_raw, CompressionMethod::Gzip);
|
||||
}
|
||||
else if (http_request_compression_method_str == "deflate")
|
||||
{
|
||||
in_post = std::make_unique<ZlibInflatingReadBuffer>(*in_post_raw, ZlibCompressionMethod::Zlib);
|
||||
in_post = std::make_unique<ZlibInflatingReadBuffer>(*in_post_raw, CompressionMethod::Zlib);
|
||||
}
|
||||
#if USE_BROTLI
|
||||
else if (http_request_compression_method_str == "br")
|
||||
@ -606,7 +611,7 @@ void HTTPHandler::processQuery(
|
||||
|
||||
executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context,
|
||||
[&response] (const String & content_type) { response.setContentType(content_type); },
|
||||
[&response] (const String & current_query_id) { response.add("Query-Id", current_query_id); });
|
||||
[&response] (const String & current_query_id) { response.add("X-ClickHouse-Query-Id", current_query_id); });
|
||||
|
||||
if (used_output.hasDelayed())
|
||||
{
|
||||
|
@ -132,7 +132,7 @@ int Server::run()
|
||||
}
|
||||
if (config().hasOption("version"))
|
||||
{
|
||||
std::cout << DBMS_NAME << " server version " << VERSION_STRING << "." << std::endl;
|
||||
std::cout << DBMS_NAME << " server version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
|
||||
return 0;
|
||||
}
|
||||
return Application::run();
|
||||
|
85
dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp
Normal file
85
dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.cpp
Normal file
@ -0,0 +1,85 @@
|
||||
#include <AggregateFunctions/AggregateFunctionLeastSqr.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
#include <AggregateFunctions/FactoryHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
AggregateFunctionPtr createAggregateFunctionLeastSqr(
|
||||
const String & name,
|
||||
const DataTypes & arguments,
|
||||
const Array & params
|
||||
)
|
||||
{
|
||||
assertNoParameters(name, params);
|
||||
assertBinary(name, arguments);
|
||||
|
||||
const IDataType * x_arg = arguments.front().get();
|
||||
|
||||
WhichDataType which_x {
|
||||
x_arg
|
||||
};
|
||||
|
||||
const IDataType * y_arg = arguments.back().get();
|
||||
|
||||
WhichDataType which_y {
|
||||
y_arg
|
||||
};
|
||||
|
||||
#define FOR_LEASTSQR_TYPES_2(M, T) \
|
||||
M(T, UInt8) \
|
||||
M(T, UInt16) \
|
||||
M(T, UInt32) \
|
||||
M(T, UInt64) \
|
||||
M(T, Int8) \
|
||||
M(T, Int16) \
|
||||
M(T, Int32) \
|
||||
M(T, Int64) \
|
||||
M(T, Float32) \
|
||||
M(T, Float64)
|
||||
#define FOR_LEASTSQR_TYPES(M) \
|
||||
FOR_LEASTSQR_TYPES_2(M, UInt8) \
|
||||
FOR_LEASTSQR_TYPES_2(M, UInt16) \
|
||||
FOR_LEASTSQR_TYPES_2(M, UInt32) \
|
||||
FOR_LEASTSQR_TYPES_2(M, UInt64) \
|
||||
FOR_LEASTSQR_TYPES_2(M, Int8) \
|
||||
FOR_LEASTSQR_TYPES_2(M, Int16) \
|
||||
FOR_LEASTSQR_TYPES_2(M, Int32) \
|
||||
FOR_LEASTSQR_TYPES_2(M, Int64) \
|
||||
FOR_LEASTSQR_TYPES_2(M, Float32) \
|
||||
FOR_LEASTSQR_TYPES_2(M, Float64)
|
||||
#define DISPATCH(T1, T2) \
|
||||
if (which_x.idx == TypeIndex::T1 && which_y.idx == TypeIndex::T2) \
|
||||
return std::make_shared<AggregateFunctionLeastSqr<T1, T2>>( \
|
||||
arguments, \
|
||||
params \
|
||||
);
|
||||
|
||||
FOR_LEASTSQR_TYPES(DISPATCH)
|
||||
|
||||
#undef FOR_LEASTSQR_TYPES_2
|
||||
#undef FOR_LEASTSQR_TYPES
|
||||
#undef DISPATCH
|
||||
|
||||
throw Exception(
|
||||
"Illegal types ("
|
||||
+ x_arg->getName() + ", " + y_arg->getName()
|
||||
+ ") of arguments of aggregate function " + name
|
||||
+ ", must be Native Ints, Native UInts or Floats",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void registerAggregateFunctionLeastSqr(AggregateFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction("leastSqr", createAggregateFunctionLeastSqr);
|
||||
}
|
||||
|
||||
}
|
195
dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h
Normal file
195
dbms/src/AggregateFunctions/AggregateFunctionLeastSqr.h
Normal file
@ -0,0 +1,195 @@
|
||||
#pragma once
|
||||
|
||||
#include <AggregateFunctions/IAggregateFunction.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <limits>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
template <typename X, typename Y, typename Ret>
|
||||
struct AggregateFunctionLeastSqrData final
|
||||
{
|
||||
size_t count = 0;
|
||||
Ret sum_x = 0;
|
||||
Ret sum_y = 0;
|
||||
Ret sum_xx = 0;
|
||||
Ret sum_xy = 0;
|
||||
|
||||
void add(X x, Y y)
|
||||
{
|
||||
count += 1;
|
||||
sum_x += x;
|
||||
sum_y += y;
|
||||
sum_xx += x * x;
|
||||
sum_xy += x * y;
|
||||
}
|
||||
|
||||
void merge(const AggregateFunctionLeastSqrData & other)
|
||||
{
|
||||
count += other.count;
|
||||
sum_x += other.sum_x;
|
||||
sum_y += other.sum_y;
|
||||
sum_xx += other.sum_xx;
|
||||
sum_xy += other.sum_xy;
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
{
|
||||
writeBinary(count, buf);
|
||||
writeBinary(sum_x, buf);
|
||||
writeBinary(sum_y, buf);
|
||||
writeBinary(sum_xx, buf);
|
||||
writeBinary(sum_xy, buf);
|
||||
}
|
||||
|
||||
void deserialize(ReadBuffer & buf)
|
||||
{
|
||||
readBinary(count, buf);
|
||||
readBinary(sum_x, buf);
|
||||
readBinary(sum_y, buf);
|
||||
readBinary(sum_xx, buf);
|
||||
readBinary(sum_xy, buf);
|
||||
}
|
||||
|
||||
Ret getK() const
|
||||
{
|
||||
Ret divisor = sum_xx * count - sum_x * sum_x;
|
||||
|
||||
if (divisor == 0)
|
||||
return std::numeric_limits<Ret>::quiet_NaN();
|
||||
|
||||
return (sum_xy * count - sum_x * sum_y) / divisor;
|
||||
}
|
||||
|
||||
Ret getB(Ret k) const
|
||||
{
|
||||
if (count == 0)
|
||||
return std::numeric_limits<Ret>::quiet_NaN();
|
||||
|
||||
return (sum_y - k * sum_x) / count;
|
||||
}
|
||||
};
|
||||
|
||||
/// Calculates simple linear regression parameters.
|
||||
/// Result is a tuple (k, b) for y = k * x + b equation, solved by least squares approximation.
|
||||
template <typename X, typename Y, typename Ret = Float64>
|
||||
class AggregateFunctionLeastSqr final : public IAggregateFunctionDataHelper<
|
||||
AggregateFunctionLeastSqrData<X, Y, Ret>,
|
||||
AggregateFunctionLeastSqr<X, Y, Ret>
|
||||
>
|
||||
{
|
||||
public:
|
||||
AggregateFunctionLeastSqr(
|
||||
const DataTypes & arguments,
|
||||
const Array & params
|
||||
):
|
||||
IAggregateFunctionDataHelper<
|
||||
AggregateFunctionLeastSqrData<X, Y, Ret>,
|
||||
AggregateFunctionLeastSqr<X, Y, Ret>
|
||||
> {arguments, params}
|
||||
{
|
||||
// notice: arguments has been checked before
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return "leastSqr";
|
||||
}
|
||||
|
||||
const char * getHeaderFilePath() const override
|
||||
{
|
||||
return __FILE__;
|
||||
}
|
||||
|
||||
void add(
|
||||
AggregateDataPtr place,
|
||||
const IColumn ** columns,
|
||||
size_t row_num,
|
||||
Arena *
|
||||
) const override
|
||||
{
|
||||
auto col_x {
|
||||
static_cast<const ColumnVector<X> *>(columns[0])
|
||||
};
|
||||
auto col_y {
|
||||
static_cast<const ColumnVector<Y> *>(columns[1])
|
||||
};
|
||||
|
||||
X x = col_x->getData()[row_num];
|
||||
Y y = col_y->getData()[row_num];
|
||||
|
||||
this->data(place).add(x, y);
|
||||
}
|
||||
|
||||
void merge(
|
||||
AggregateDataPtr place,
|
||||
ConstAggregateDataPtr rhs, Arena *
|
||||
) const override
|
||||
{
|
||||
this->data(place).merge(this->data(rhs));
|
||||
}
|
||||
|
||||
void serialize(
|
||||
ConstAggregateDataPtr place,
|
||||
WriteBuffer & buf
|
||||
) const override
|
||||
{
|
||||
this->data(place).serialize(buf);
|
||||
}
|
||||
|
||||
void deserialize(
|
||||
AggregateDataPtr place,
|
||||
ReadBuffer & buf, Arena *
|
||||
) const override
|
||||
{
|
||||
this->data(place).deserialize(buf);
|
||||
}
|
||||
|
||||
DataTypePtr getReturnType() const override
|
||||
{
|
||||
DataTypes types {
|
||||
std::make_shared<DataTypeNumber<Ret>>(),
|
||||
std::make_shared<DataTypeNumber<Ret>>(),
|
||||
};
|
||||
|
||||
Strings names {
|
||||
"k",
|
||||
"b",
|
||||
};
|
||||
|
||||
return std::make_shared<DataTypeTuple>(
|
||||
std::move(types),
|
||||
std::move(names)
|
||||
);
|
||||
}
|
||||
|
||||
void insertResultInto(
|
||||
ConstAggregateDataPtr place,
|
||||
IColumn & to
|
||||
) const override
|
||||
{
|
||||
Ret k = this->data(place).getK();
|
||||
Ret b = this->data(place).getB(k);
|
||||
|
||||
auto & col_tuple = static_cast<ColumnTuple &>(to);
|
||||
auto & col_k = static_cast<ColumnVector<Ret> &>(col_tuple.getColumn(0));
|
||||
auto & col_b = static_cast<ColumnVector<Ret> &>(col_tuple.getColumn(1));
|
||||
|
||||
col_k.getData().push_back(k);
|
||||
col_b.getData().push_back(b);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
@ -24,8 +24,7 @@ struct WithoutOverflowPolicy
|
||||
static DataTypePtr promoteType(const DataTypePtr & data_type)
|
||||
{
|
||||
if (!data_type->canBePromoted())
|
||||
throw new Exception{"Values to be summed are expected to be Numeric, Float or Decimal.",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
throw Exception{"Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
|
||||
return data_type->promoteNumericType();
|
||||
}
|
||||
|
@ -16,7 +16,6 @@
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HyperLogLogWithSmallSetOptimization.h>
|
||||
#include <Common/CombinedCardinalityEstimator.h>
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
#include <AggregateFunctions/UniquesHashSet.h>
|
||||
|
@ -29,6 +29,7 @@ void registerAggregateFunctionsBitwise(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsBitmap(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionsMaxIntersections(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionEntropy(AggregateFunctionFactory &);
|
||||
void registerAggregateFunctionLeastSqr(AggregateFunctionFactory &);
|
||||
|
||||
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
|
||||
void registerAggregateFunctionCombinatorArray(AggregateFunctionCombinatorFactory &);
|
||||
@ -69,6 +70,7 @@ void registerAggregateFunctions()
|
||||
registerAggregateFunctionHistogram(factory);
|
||||
registerAggregateFunctionRetention(factory);
|
||||
registerAggregateFunctionEntropy(factory);
|
||||
registerAggregateFunctionLeastSqr(factory);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -271,7 +271,7 @@ private:
|
||||
void initBlockInput();
|
||||
void initBlockLogsInput();
|
||||
|
||||
void throwUnexpectedPacket(UInt64 packet_type, const char * expected) const;
|
||||
[[noreturn]] void throwUnexpectedPacket(UInt64 packet_type, const char * expected) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -21,6 +21,7 @@ namespace DB
|
||||
class ColumnString final : public COWPtrHelper<IColumn, ColumnString>
|
||||
{
|
||||
public:
|
||||
using Char = UInt8;
|
||||
using Chars = PaddedPODArray<UInt8>;
|
||||
|
||||
private:
|
||||
|
@ -10,7 +10,7 @@ namespace DB
|
||||
|
||||
/** Aligned piece of memory.
|
||||
* It can only be allocated and destroyed.
|
||||
* MemoryTracker is not used. It is intended for small pieces of memory.
|
||||
* MemoryTracker is not used. AlignedBuffer is intended for small pieces of memory.
|
||||
*/
|
||||
class AlignedBuffer : private boost::noncopyable
|
||||
{
|
||||
|
@ -1,190 +0,0 @@
|
||||
#include <Common/Allocator.h>
|
||||
|
||||
#if !defined(__APPLE__) && !defined(__FreeBSD__)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#include <cstdlib>
|
||||
#include <algorithm>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <Core/Defines.h>
|
||||
#ifdef THREAD_SANITIZER
|
||||
/// Thread sanitizer does not intercept mremap. The usage of mremap will lead to false positives.
|
||||
#define DISABLE_MREMAP 1
|
||||
#endif
|
||||
#include <common/mremap.h>
|
||||
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/formatReadable.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
extern const int CANNOT_MUNMAP;
|
||||
extern const int CANNOT_MREMAP;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Many modern allocators (for example, tcmalloc) do not do a mremap for realloc,
|
||||
* even in case of large enough chunks of memory.
|
||||
* Although this allows you to increase performance and reduce memory consumption during realloc.
|
||||
* To fix this, we do mremap manually if the chunk of memory is large enough.
|
||||
* The threshold (64 MB) is chosen quite large, since changing the address space is
|
||||
* very slow, especially in the case of a large number of threads.
|
||||
* We expect that the set of operations mmap/something to do/mremap can only be performed about 1000 times per second.
|
||||
*
|
||||
* PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB.
|
||||
*/
|
||||
#ifdef NDEBUG
|
||||
static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20);
|
||||
#else
|
||||
/// In debug build, use small mmap threshold to reproduce more memory stomping bugs.
|
||||
/// Along with ASLR it will hopefully detect more issues than ASan.
|
||||
/// The program may fail due to the limit on number of memory mappings.
|
||||
static constexpr size_t MMAP_THRESHOLD = 4096;
|
||||
#endif
|
||||
|
||||
static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
|
||||
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
|
||||
|
||||
|
||||
template <bool clear_memory_>
|
||||
void * Allocator<clear_memory_>::mmap_hint()
|
||||
{
|
||||
#if ALLOCATOR_ASLR
|
||||
return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(rng));
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <bool clear_memory_>
|
||||
void * Allocator<clear_memory_>::alloc(size_t size, size_t alignment)
|
||||
{
|
||||
CurrentMemoryTracker::alloc(size);
|
||||
|
||||
void * buf;
|
||||
|
||||
if (size >= MMAP_THRESHOLD)
|
||||
{
|
||||
if (alignment > MMAP_MIN_ALIGNMENT)
|
||||
throw DB::Exception("Too large alignment " + formatReadableSizeWithBinarySuffix(alignment) + ": more than page size when allocating "
|
||||
+ formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
buf = mmap(mmap_hint(), size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (MAP_FAILED == buf)
|
||||
DB::throwFromErrno("Allocator: Cannot mmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
|
||||
/// No need for zero-fill, because mmap guarantees it.
|
||||
}
|
||||
else
|
||||
{
|
||||
if (alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
if (clear_memory)
|
||||
buf = ::calloc(size, 1);
|
||||
else
|
||||
buf = ::malloc(size);
|
||||
|
||||
if (nullptr == buf)
|
||||
DB::throwFromErrno("Allocator: Cannot malloc " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
else
|
||||
{
|
||||
buf = nullptr;
|
||||
int res = posix_memalign(&buf, alignment, size);
|
||||
|
||||
if (0 != res)
|
||||
DB::throwFromErrno("Cannot allocate memory (posix_memalign) " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
|
||||
|
||||
if (clear_memory)
|
||||
memset(buf, 0, size);
|
||||
}
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
||||
template <bool clear_memory_>
|
||||
void Allocator<clear_memory_>::free(void * buf, size_t size)
|
||||
{
|
||||
if (size >= MMAP_THRESHOLD)
|
||||
{
|
||||
if (0 != munmap(buf, size))
|
||||
DB::throwFromErrno("Allocator: Cannot munmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_MUNMAP);
|
||||
}
|
||||
else
|
||||
{
|
||||
::free(buf);
|
||||
}
|
||||
|
||||
CurrentMemoryTracker::free(size);
|
||||
}
|
||||
|
||||
|
||||
template <bool clear_memory_>
|
||||
void * Allocator<clear_memory_>::realloc(void * buf, size_t old_size, size_t new_size, size_t alignment)
|
||||
{
|
||||
if (old_size == new_size)
|
||||
{
|
||||
/// nothing to do.
|
||||
/// BTW, it's not possible to change alignment while doing realloc.
|
||||
}
|
||||
else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD && alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
/// Resize malloc'd memory region with no special alignment requirement.
|
||||
CurrentMemoryTracker::realloc(old_size, new_size);
|
||||
|
||||
void * new_buf = ::realloc(buf, new_size);
|
||||
if (nullptr == new_buf)
|
||||
DB::throwFromErrno("Allocator: Cannot realloc from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
|
||||
buf = new_buf;
|
||||
if (clear_memory && new_size > old_size)
|
||||
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
|
||||
}
|
||||
else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
|
||||
{
|
||||
/// Resize mmap'd memory region.
|
||||
CurrentMemoryTracker::realloc(old_size, new_size);
|
||||
|
||||
// On apple and freebsd self-implemented mremap used (common/mremap.h)
|
||||
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (MAP_FAILED == buf)
|
||||
DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_MREMAP);
|
||||
|
||||
/// No need for zero-fill, because mmap guarantees it.
|
||||
}
|
||||
else
|
||||
{
|
||||
/// All other cases that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
|
||||
|
||||
void * new_buf = alloc(new_size, alignment);
|
||||
memcpy(new_buf, buf, std::min(old_size, new_size));
|
||||
free(buf, old_size);
|
||||
buf = new_buf;
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
|
||||
/// Explicit template instantiations.
|
||||
template class Allocator<true>;
|
||||
template class Allocator<false>;
|
@ -10,11 +10,88 @@
|
||||
#define ALLOCATOR_ASLR 1
|
||||
#endif
|
||||
|
||||
#if ALLOCATOR_ASLR
|
||||
#include <pcg_random.hpp>
|
||||
#include <Common/randomSeed.h>
|
||||
#include <pcg_random.hpp>
|
||||
#include <Common/randomSeed.h>
|
||||
|
||||
#if !defined(__APPLE__) && !defined(__FreeBSD__)
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
#include <cstdlib>
|
||||
#include <algorithm>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <Core/Defines.h>
|
||||
#ifdef THREAD_SANITIZER
|
||||
/// Thread sanitizer does not intercept mremap. The usage of mremap will lead to false positives.
|
||||
#define DISABLE_MREMAP 1
|
||||
#endif
|
||||
#include <common/mremap.h>
|
||||
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/formatReadable.h>
|
||||
|
||||
|
||||
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
|
||||
/** Many modern allocators (for example, tcmalloc) do not do a mremap for realloc,
|
||||
* even in case of large enough chunks of memory.
|
||||
* Although this allows you to increase performance and reduce memory consumption during realloc.
|
||||
* To fix this, we do mremap manually if the chunk of memory is large enough.
|
||||
* The threshold (64 MB) is chosen quite large, since changing the address space is
|
||||
* very slow, especially in the case of a large number of threads.
|
||||
* We expect that the set of operations mmap/something to do/mremap can only be performed about 1000 times per second.
|
||||
*
|
||||
* PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB.
|
||||
*/
|
||||
#ifdef NDEBUG
|
||||
static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20);
|
||||
#else
|
||||
/// In debug build, use small mmap threshold to reproduce more memory stomping bugs.
|
||||
/// Along with ASLR it will hopefully detect more issues than ASan.
|
||||
/// The program may fail due to the limit on number of memory mappings.
|
||||
static constexpr size_t MMAP_THRESHOLD = 4096;
|
||||
#endif
|
||||
|
||||
static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
|
||||
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
extern const int CANNOT_MUNMAP;
|
||||
extern const int CANNOT_MREMAP;
|
||||
}
|
||||
}
|
||||
|
||||
namespace AllocatorHints
|
||||
{
|
||||
struct DefaultHint
|
||||
{
|
||||
void * mmap_hint()
|
||||
{
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
struct RandomHint
|
||||
{
|
||||
void * mmap_hint()
|
||||
{
|
||||
return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(rng));
|
||||
}
|
||||
private:
|
||||
pcg64 rng{randomSeed()};
|
||||
};
|
||||
}
|
||||
|
||||
/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
|
||||
* Also used in hash tables.
|
||||
@ -23,31 +100,126 @@
|
||||
* - passing the size into the `free` method;
|
||||
* - by the presence of the `alignment` argument;
|
||||
* - the possibility of zeroing memory (used in hash tables);
|
||||
* - hint class for mmap
|
||||
* - mmap_threshold for using mmap less or more
|
||||
*/
|
||||
template <bool clear_memory_>
|
||||
class Allocator
|
||||
template <bool clear_memory_, typename Hint, size_t mmap_threshold>
|
||||
class AllocatorWithHint : Hint
|
||||
{
|
||||
#if ALLOCATOR_ASLR
|
||||
private:
|
||||
pcg64 rng{randomSeed()};
|
||||
#endif
|
||||
void * mmap_hint();
|
||||
|
||||
protected:
|
||||
static constexpr bool clear_memory = clear_memory_;
|
||||
|
||||
public:
|
||||
/// Allocate memory range.
|
||||
void * alloc(size_t size, size_t alignment = 0);
|
||||
void * alloc(size_t size, size_t alignment = 0)
|
||||
{
|
||||
CurrentMemoryTracker::alloc(size);
|
||||
|
||||
void * buf;
|
||||
|
||||
if (size >= mmap_threshold)
|
||||
{
|
||||
if (alignment > MMAP_MIN_ALIGNMENT)
|
||||
throw DB::Exception("Too large alignment " + formatReadableSizeWithBinarySuffix(alignment) + ": more than page size when allocating "
|
||||
+ formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
buf = mmap(Hint::mmap_hint(), size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (MAP_FAILED == buf)
|
||||
DB::throwFromErrno("Allocator: Cannot mmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
|
||||
/// No need for zero-fill, because mmap guarantees it.
|
||||
}
|
||||
else
|
||||
{
|
||||
if (alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
if constexpr (clear_memory)
|
||||
buf = ::calloc(size, 1);
|
||||
else
|
||||
buf = ::malloc(size);
|
||||
|
||||
if (nullptr == buf)
|
||||
DB::throwFromErrno("Allocator: Cannot malloc " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
}
|
||||
else
|
||||
{
|
||||
buf = nullptr;
|
||||
int res = posix_memalign(&buf, alignment, size);
|
||||
|
||||
if (0 != res)
|
||||
DB::throwFromErrno("Cannot allocate memory (posix_memalign) " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
|
||||
|
||||
if (clear_memory)
|
||||
memset(buf, 0, size);
|
||||
}
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
/// Free memory range.
|
||||
void free(void * buf, size_t size);
|
||||
void free(void * buf, size_t size)
|
||||
{
|
||||
if (size >= mmap_threshold)
|
||||
{
|
||||
if (0 != munmap(buf, size))
|
||||
DB::throwFromErrno("Allocator: Cannot munmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_MUNMAP);
|
||||
}
|
||||
else
|
||||
{
|
||||
::free(buf);
|
||||
}
|
||||
|
||||
CurrentMemoryTracker::free(size);
|
||||
}
|
||||
|
||||
/** Enlarge memory range.
|
||||
* Data from old range is moved to the beginning of new range.
|
||||
* Address of memory range could change.
|
||||
*/
|
||||
void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0);
|
||||
void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0)
|
||||
{
|
||||
if (old_size == new_size)
|
||||
{
|
||||
/// nothing to do.
|
||||
/// BTW, it's not possible to change alignment while doing realloc.
|
||||
}
|
||||
else if (old_size < mmap_threshold && new_size < mmap_threshold && alignment <= MALLOC_MIN_ALIGNMENT)
|
||||
{
|
||||
/// Resize malloc'd memory region with no special alignment requirement.
|
||||
CurrentMemoryTracker::realloc(old_size, new_size);
|
||||
|
||||
void * new_buf = ::realloc(buf, new_size);
|
||||
if (nullptr == new_buf)
|
||||
DB::throwFromErrno("Allocator: Cannot realloc from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
|
||||
buf = new_buf;
|
||||
if (clear_memory && new_size > old_size)
|
||||
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
|
||||
}
|
||||
else if (old_size >= mmap_threshold && new_size >= mmap_threshold)
|
||||
{
|
||||
/// Resize mmap'd memory region.
|
||||
CurrentMemoryTracker::realloc(old_size, new_size);
|
||||
|
||||
// On apple and freebsd self-implemented mremap used (common/mremap.h)
|
||||
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (MAP_FAILED == buf)
|
||||
DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_MREMAP);
|
||||
|
||||
/// No need for zero-fill, because mmap guarantees it.
|
||||
}
|
||||
else
|
||||
{
|
||||
/// All other cases that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
|
||||
|
||||
void * new_buf = alloc(new_size, alignment);
|
||||
memcpy(new_buf, buf, std::min(old_size, new_size));
|
||||
free(buf, old_size);
|
||||
buf = new_buf;
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
protected:
|
||||
static constexpr size_t getStackThreshold()
|
||||
@ -56,6 +228,13 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
#if ALLOCATOR_ASLR
|
||||
template <bool clear_memory>
|
||||
using Allocator = AllocatorWithHint<clear_memory, AllocatorHints::RandomHint, MMAP_THRESHOLD>;
|
||||
#else
|
||||
template <bool clear_memory>
|
||||
using Allocator = AllocatorWithHint<clear_memory, AllocatorHints::DefaultHint, MMAP_THRESHOLD>;
|
||||
#endif
|
||||
|
||||
/** When using AllocatorWithStackMemory, located on the stack,
|
||||
* GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.
|
||||
|
@ -49,7 +49,7 @@ private:
|
||||
ProfileEvents::increment(ProfileEvents::ArenaAllocChunks);
|
||||
ProfileEvents::increment(ProfileEvents::ArenaAllocBytes, size_);
|
||||
|
||||
begin = reinterpret_cast<char *>(Allocator::alloc(size_));
|
||||
begin = reinterpret_cast<char *>(Allocator<false>::alloc(size_));
|
||||
pos = begin;
|
||||
end = begin + size_ - pad_right;
|
||||
prev = prev_;
|
||||
@ -57,7 +57,7 @@ private:
|
||||
|
||||
~Chunk()
|
||||
{
|
||||
Allocator::free(begin, size());
|
||||
Allocator<false>::free(begin, size());
|
||||
|
||||
if (prev)
|
||||
delete prev;
|
||||
|
@ -55,7 +55,7 @@ public:
|
||||
char * alloc(const size_t size)
|
||||
{
|
||||
if (size > max_fixed_block_size)
|
||||
return static_cast<char *>(Allocator::alloc(size));
|
||||
return static_cast<char *>(Allocator<false>::alloc(size));
|
||||
|
||||
/// find list of required size
|
||||
const auto list_idx = findFreeListIndex(size);
|
||||
@ -76,7 +76,7 @@ public:
|
||||
void free(char * ptr, const size_t size)
|
||||
{
|
||||
if (size > max_fixed_block_size)
|
||||
return Allocator::free(ptr, size);
|
||||
return Allocator<false>::free(ptr, size);
|
||||
|
||||
/// find list of required size
|
||||
const auto list_idx = findFreeListIndex(size);
|
||||
|
@ -422,6 +422,10 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_MPROTECT = 445;
|
||||
extern const int FUNCTION_NOT_ALLOWED = 446;
|
||||
extern const int HYPERSCAN_CANNOT_SCAN_TEXT = 447;
|
||||
extern const int BROTLI_READ_FAILED = 448;
|
||||
extern const int BROTLI_WRITE_FAILED = 449;
|
||||
extern const int BAD_TTL_EXPRESSION = 450;
|
||||
extern const int BAD_TTL_FILE = 451;
|
||||
|
||||
extern const int KEEPER_EXCEPTION = 999;
|
||||
extern const int POCO_EXCEPTION = 1000;
|
||||
|
@ -21,11 +21,6 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_TRUNCATE_FILE;
|
||||
}
|
||||
|
||||
const char * getVersion()
|
||||
{
|
||||
return VERSION_STRING;
|
||||
}
|
||||
|
||||
std::string errnoToString(int code, int e)
|
||||
{
|
||||
const size_t buf_size = 128;
|
||||
@ -82,14 +77,15 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
|
||||
}
|
||||
catch (const Exception & e)
|
||||
{
|
||||
stream << "(version " << getVersion() << ") " << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace);
|
||||
stream << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace) << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
try
|
||||
{
|
||||
stream << "(version " << getVersion() << ") " << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code()
|
||||
<< ", e.displayText() = " << e.displayText();
|
||||
stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code()
|
||||
<< ", e.displayText() = " << e.displayText()
|
||||
<< " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
|
||||
}
|
||||
catch (...) {}
|
||||
}
|
||||
@ -103,7 +99,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
|
||||
if (status)
|
||||
name += " (demangling status: " + toString(status) + ")";
|
||||
|
||||
stream << "(version " << getVersion() << ") " << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what();
|
||||
stream << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what() << ", version = " << VERSION_STRING << VERSION_OFFICIAL;
|
||||
}
|
||||
catch (...) {}
|
||||
}
|
||||
@ -117,7 +113,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
|
||||
if (status)
|
||||
name += " (demangling status: " + toString(status) + ")";
|
||||
|
||||
stream << "(version " << getVersion() << ") " << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name;
|
||||
stream << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
|
||||
}
|
||||
catch (...) {}
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ namespace ErrorCodes
|
||||
* template parameter is available as Creator
|
||||
*/
|
||||
template <typename CreatorFunc>
|
||||
class IFactoryWithAliases
|
||||
class IFactoryWithAliases : public IHints<2, IFactoryWithAliases<CreatorFunc>>
|
||||
{
|
||||
protected:
|
||||
using Creator = CreatorFunc;
|
||||
@ -76,7 +76,7 @@ public:
|
||||
throw Exception(factory_name + ": alias name '" + alias_name + "' is not unique", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
std::vector<String> getAllRegisteredNames() const
|
||||
std::vector<String> getAllRegisteredNames() const override
|
||||
{
|
||||
std::vector<String> result;
|
||||
auto getter = [](const auto & pair) { return pair.first; };
|
||||
@ -106,13 +106,7 @@ public:
|
||||
return aliases.count(name) || case_insensitive_aliases.count(name);
|
||||
}
|
||||
|
||||
std::vector<String> getHints(const String & name) const
|
||||
{
|
||||
static const auto registered_names = getAllRegisteredNames();
|
||||
return prompter.getHints(name, registered_names);
|
||||
}
|
||||
|
||||
virtual ~IFactoryWithAliases() {}
|
||||
virtual ~IFactoryWithAliases() override {}
|
||||
|
||||
private:
|
||||
using InnerMap = std::unordered_map<String, Creator>; // name -> creator
|
||||
@ -127,13 +121,6 @@ private:
|
||||
|
||||
/// Case insensitive aliases
|
||||
AliasMap case_insensitive_aliases;
|
||||
|
||||
/**
|
||||
* prompter for names, if a person makes a typo for some function or type, it
|
||||
* helps to find best possible match (in particular, edit distance is done like in clang
|
||||
* (max edit distance is (typo.size() + 2) / 3)
|
||||
*/
|
||||
NamePrompter</*MaxNumHints=*/2> prompter;
|
||||
};
|
||||
|
||||
}
|
||||
|
53
dbms/src/Common/LFAllocator.cpp
Normal file
53
dbms/src/Common/LFAllocator.cpp
Normal file
@ -0,0 +1,53 @@
|
||||
#include <Common/config.h>
|
||||
|
||||
#if USE_LFALLOC
|
||||
#include "LFAllocator.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <lf_allocX64.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
void * LFAllocator::alloc(size_t size, size_t alignment)
|
||||
{
|
||||
if (alignment == 0)
|
||||
return LFAlloc(size);
|
||||
else
|
||||
{
|
||||
void * ptr;
|
||||
int res = LFPosixMemalign(&ptr, alignment, size);
|
||||
return res ? nullptr : ptr;
|
||||
}
|
||||
}
|
||||
|
||||
void LFAllocator::free(void * buf, size_t)
|
||||
{
|
||||
LFFree(buf);
|
||||
}
|
||||
|
||||
void * LFAllocator::realloc(void * old_ptr, size_t, size_t new_size, size_t alignment)
|
||||
{
|
||||
if (old_ptr == nullptr)
|
||||
{
|
||||
void * result = LFAllocator::alloc(new_size, alignment);
|
||||
return result;
|
||||
}
|
||||
if (new_size == 0)
|
||||
{
|
||||
LFFree(old_ptr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void * new_ptr = LFAllocator::alloc(new_size, alignment);
|
||||
if (new_ptr == nullptr)
|
||||
return nullptr;
|
||||
size_t old_size = LFGetSize(old_ptr);
|
||||
memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
|
||||
LFFree(old_ptr);
|
||||
return new_ptr;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
22
dbms/src/Common/LFAllocator.h
Normal file
22
dbms/src/Common/LFAllocator.h
Normal file
@ -0,0 +1,22 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/config.h>
|
||||
|
||||
#if !USE_LFALLOC
|
||||
#error "do not include this file until USE_LFALLOC is set to 1"
|
||||
#endif
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct LFAllocator
|
||||
{
|
||||
static void * alloc(size_t size, size_t alignment = 0);
|
||||
|
||||
static void free(void * buf, size_t);
|
||||
|
||||
static void * realloc(void * buf, size_t, size_t new_size, size_t alignment = 0);
|
||||
};
|
||||
|
||||
}
|
@ -97,4 +97,23 @@ private:
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t MaxNumHints, class Self>
|
||||
class IHints
|
||||
{
|
||||
public:
|
||||
|
||||
virtual std::vector<String> getAllRegisteredNames() const = 0;
|
||||
|
||||
std::vector<String> getHints(const String & name) const
|
||||
{
|
||||
static const auto registered_names = getAllRegisteredNames();
|
||||
return prompter.getHints(name, registered_names);
|
||||
}
|
||||
|
||||
virtual ~IHints() = default;
|
||||
|
||||
private:
|
||||
NamePrompter<MaxNumHints> prompter;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,48 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/PODArray.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/**
|
||||
* This class is intended to push sortable data into.
|
||||
* When looking up values the container ensures that it is sorted for log(N) lookup
|
||||
*
|
||||
* Note, this is only efficient when the insertions happen in one stage, followed by all retrievals
|
||||
* This way the data only gets sorted once.
|
||||
*/
|
||||
|
||||
template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>>
|
||||
class SortedLookupPODArray : private PaddedPODArray<T, INITIAL_SIZE, TAllocator>
|
||||
{
|
||||
public:
|
||||
using Base = PaddedPODArray<T, INITIAL_SIZE, TAllocator>;
|
||||
using typename Base::PODArray;
|
||||
using Base::cbegin;
|
||||
using Base::cend;
|
||||
|
||||
template <typename U, typename ... TAllocatorParams>
|
||||
void insert(U && x, TAllocatorParams &&... allocator_params)
|
||||
{
|
||||
Base::push_back(std::forward<U>(x), std::forward<TAllocatorParams>(allocator_params)...);
|
||||
sorted = false;
|
||||
}
|
||||
|
||||
typename Base::const_iterator upper_bound (const T& k)
|
||||
{
|
||||
if (!sorted)
|
||||
this->sort();
|
||||
return std::upper_bound(this->cbegin(), this->cend(), k);
|
||||
}
|
||||
private:
|
||||
void sort()
|
||||
{
|
||||
std::sort(this->begin(), this->end());
|
||||
sorted = true;
|
||||
}
|
||||
|
||||
bool sorted = false;
|
||||
};
|
||||
|
||||
}
|
@ -25,6 +25,8 @@
|
||||
#cmakedefine01 USE_BROTLI
|
||||
#cmakedefine01 USE_SSL
|
||||
#cmakedefine01 USE_HYPERSCAN
|
||||
#cmakedefine01 USE_LFALLOC
|
||||
#cmakedefine01 USE_LFALLOC_RANDOM_HINT
|
||||
|
||||
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
|
||||
#cmakedefine01 LLVM_HAS_RTTI
|
||||
|
@ -20,6 +20,7 @@
|
||||
#cmakedefine VERSION_MINOR @VERSION_MINOR@
|
||||
#cmakedefine VERSION_PATCH @VERSION_PATCH@
|
||||
#cmakedefine VERSION_STRING "@VERSION_STRING@"
|
||||
#cmakedefine VERSION_OFFICIAL "@VERSION_OFFICIAL@"
|
||||
#cmakedefine VERSION_FULL "@VERSION_FULL@"
|
||||
#cmakedefine VERSION_DESCRIBE "@VERSION_DESCRIBE@"
|
||||
#cmakedefine VERSION_GITHASH "@VERSION_GITHASH@"
|
||||
@ -42,3 +43,7 @@
|
||||
#else
|
||||
#define DBMS_VERSION_PATCH 0
|
||||
#endif
|
||||
|
||||
#if !defined(VERSION_OFFICIAL)
|
||||
# define VERSION_OFFICIAL ""
|
||||
#endif
|
||||
|
@ -35,7 +35,7 @@ bool CachedCompressedReadBuffer::nextImpl()
|
||||
UInt128 key = cache->hash(path, file_pos);
|
||||
owned_cell = cache->get(key);
|
||||
|
||||
if (!owned_cell || !codec)
|
||||
if (!owned_cell)
|
||||
{
|
||||
/// If not, read it from the file.
|
||||
initInput();
|
||||
@ -49,21 +49,22 @@ bool CachedCompressedReadBuffer::nextImpl()
|
||||
|
||||
if (owned_cell->compressed_size)
|
||||
{
|
||||
owned_cell->data.resize(size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer());
|
||||
owned_cell->additional_bytes = codec->getAdditionalSizeAtTheEndOfBuffer();
|
||||
owned_cell->data.resize(size_decompressed + owned_cell->additional_bytes);
|
||||
decompress(owned_cell->data.data(), size_decompressed, size_compressed_without_checksum);
|
||||
|
||||
/// Put data into cache.
|
||||
cache->set(key, owned_cell);
|
||||
}
|
||||
|
||||
/// Put data into cache.
|
||||
/// NOTE: Even if we don't read anything (compressed_size == 0)
|
||||
/// because we can reuse this information and don't reopen file in future
|
||||
cache->set(key, owned_cell);
|
||||
}
|
||||
|
||||
if (owned_cell->data.size() == 0)
|
||||
{
|
||||
owned_cell = nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
working_buffer = Buffer(owned_cell->data.data(), owned_cell->data.data() + owned_cell->data.size() - codec->getAdditionalSizeAtTheEndOfBuffer());
|
||||
working_buffer = Buffer(owned_cell->data.data(), owned_cell->data.data() + owned_cell->data.size() - owned_cell->additional_bytes);
|
||||
|
||||
file_pos += owned_cell->compressed_size;
|
||||
|
||||
|
@ -125,19 +125,34 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
UInt8 getDeltaBytesSize(DataTypePtr column_type)
|
||||
{
|
||||
UInt8 delta_bytes_size = 1;
|
||||
if (column_type && column_type->haveMaximumSizeOfValue())
|
||||
{
|
||||
size_t max_size = column_type->getSizeOfValueInMemory();
|
||||
if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
|
||||
delta_bytes_size = static_cast<UInt8>(max_size);
|
||||
}
|
||||
return delta_bytes_size;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void CompressionCodecDelta::useInfoAboutType(DataTypePtr data_type)
|
||||
{
|
||||
delta_bytes_size = getDeltaBytesSize(data_type);
|
||||
}
|
||||
|
||||
void registerCodecDelta(CompressionCodecFactory & factory)
|
||||
{
|
||||
UInt8 method_code = UInt8(CompressionMethodByte::Delta);
|
||||
factory.registerCompressionCodecWithType("Delta", method_code, [&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr
|
||||
{
|
||||
UInt8 delta_bytes_size = 1;
|
||||
if (column_type && column_type->haveMaximumSizeOfValue())
|
||||
{
|
||||
size_t max_size = column_type->getSizeOfValueInMemory();
|
||||
if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
|
||||
delta_bytes_size = static_cast<UInt8>(max_size);
|
||||
}
|
||||
|
||||
UInt8 delta_bytes_size = getDeltaBytesSize(column_type);
|
||||
if (arguments && !arguments->children.empty())
|
||||
{
|
||||
if (arguments->children.size() > 1)
|
||||
|
@ -14,15 +14,18 @@ public:
|
||||
|
||||
String getCodecDesc() const override;
|
||||
|
||||
void useInfoAboutType(DataTypePtr data_type) override;
|
||||
|
||||
protected:
|
||||
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
||||
|
||||
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
|
||||
|
||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override { return uncompressed_size + 2; }
|
||||
|
||||
|
||||
private:
|
||||
const UInt8 delta_bytes_size;
|
||||
UInt8 delta_bytes_size;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
@ -21,16 +21,6 @@ extern const int CORRUPTED_DATA;
|
||||
CompressionCodecMultiple::CompressionCodecMultiple(Codecs codecs)
|
||||
: codecs(codecs)
|
||||
{
|
||||
std::ostringstream ss;
|
||||
for (size_t idx = 0; idx < codecs.size(); idx++)
|
||||
{
|
||||
if (idx != 0)
|
||||
ss << ',' << ' ';
|
||||
|
||||
const auto codec = codecs[idx];
|
||||
ss << codec->getCodecDesc();
|
||||
}
|
||||
codec_desc = ss.str();
|
||||
}
|
||||
|
||||
UInt8 CompressionCodecMultiple::getMethodByte() const
|
||||
@ -40,7 +30,16 @@ UInt8 CompressionCodecMultiple::getMethodByte() const
|
||||
|
||||
String CompressionCodecMultiple::getCodecDesc() const
|
||||
{
|
||||
return codec_desc;
|
||||
std::ostringstream ss;
|
||||
for (size_t idx = 0; idx < codecs.size(); idx++)
|
||||
{
|
||||
if (idx != 0)
|
||||
ss << ',' << ' ';
|
||||
|
||||
const auto codec = codecs[idx];
|
||||
ss << codec->getCodecDesc();
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecMultiple::getMaxCompressedDataSize(UInt32 uncompressed_size) const
|
||||
@ -79,6 +78,14 @@ UInt32 CompressionCodecMultiple::doCompressData(const char * source, UInt32 sour
|
||||
return 1 + codecs.size() + source_size;
|
||||
}
|
||||
|
||||
void CompressionCodecMultiple::useInfoAboutType(DataTypePtr data_type)
|
||||
{
|
||||
for (auto & codec : codecs)
|
||||
{
|
||||
codec->useInfoAboutType(data_type);
|
||||
}
|
||||
}
|
||||
|
||||
void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size) const
|
||||
{
|
||||
UInt8 compression_methods_size = source[0];
|
||||
|
@ -17,6 +17,8 @@ public:
|
||||
|
||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
|
||||
void useInfoAboutType(DataTypePtr data_type) override;
|
||||
|
||||
protected:
|
||||
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
||||
|
||||
@ -24,7 +26,6 @@ protected:
|
||||
|
||||
private:
|
||||
Codecs codecs;
|
||||
String codec_desc;
|
||||
|
||||
};
|
||||
|
||||
|
@ -58,6 +58,9 @@ public:
|
||||
/// Read method byte from compressed source
|
||||
static UInt8 readMethod(const char * source);
|
||||
|
||||
/// Some codecs may use information about column type which appears after codec creation
|
||||
virtual void useInfoAboutType(DataTypePtr /* data_type */) { }
|
||||
|
||||
protected:
|
||||
|
||||
/// Return size of compressed data without header
|
||||
|
@ -23,20 +23,21 @@ namespace DB
|
||||
class TaskNotification final : public Poco::Notification
|
||||
{
|
||||
public:
|
||||
explicit TaskNotification(const BackgroundSchedulePool::TaskInfoPtr & task) : task(task) {}
|
||||
explicit TaskNotification(const BackgroundSchedulePoolTaskInfoPtr & task) : task(task) {}
|
||||
void execute() { task->execute(); }
|
||||
|
||||
private:
|
||||
BackgroundSchedulePool::TaskInfoPtr task;
|
||||
BackgroundSchedulePoolTaskInfoPtr task;
|
||||
};
|
||||
|
||||
|
||||
BackgroundSchedulePool::TaskInfo::TaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const TaskFunc & function_)
|
||||
: pool(pool_) , log_name(log_name_) , function(function_)
|
||||
BackgroundSchedulePoolTaskInfo::BackgroundSchedulePoolTaskInfo(
|
||||
BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_)
|
||||
: pool(pool_), log_name(log_name_), function(function_)
|
||||
{
|
||||
}
|
||||
|
||||
bool BackgroundSchedulePool::TaskInfo::schedule()
|
||||
bool BackgroundSchedulePoolTaskInfo::schedule()
|
||||
{
|
||||
std::lock_guard lock(schedule_mutex);
|
||||
|
||||
@ -47,7 +48,7 @@ bool BackgroundSchedulePool::TaskInfo::schedule()
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BackgroundSchedulePool::TaskInfo::scheduleAfter(size_t ms)
|
||||
bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t ms)
|
||||
{
|
||||
std::lock_guard lock(schedule_mutex);
|
||||
|
||||
@ -58,7 +59,7 @@ bool BackgroundSchedulePool::TaskInfo::scheduleAfter(size_t ms)
|
||||
return true;
|
||||
}
|
||||
|
||||
void BackgroundSchedulePool::TaskInfo::deactivate()
|
||||
void BackgroundSchedulePoolTaskInfo::deactivate()
|
||||
{
|
||||
std::lock_guard lock_exec(exec_mutex);
|
||||
std::lock_guard lock_schedule(schedule_mutex);
|
||||
@ -73,13 +74,13 @@ void BackgroundSchedulePool::TaskInfo::deactivate()
|
||||
pool.cancelDelayedTask(shared_from_this(), lock_schedule);
|
||||
}
|
||||
|
||||
void BackgroundSchedulePool::TaskInfo::activate()
|
||||
void BackgroundSchedulePoolTaskInfo::activate()
|
||||
{
|
||||
std::lock_guard lock(schedule_mutex);
|
||||
deactivated = false;
|
||||
}
|
||||
|
||||
bool BackgroundSchedulePool::TaskInfo::activateAndSchedule()
|
||||
bool BackgroundSchedulePoolTaskInfo::activateAndSchedule()
|
||||
{
|
||||
std::lock_guard lock(schedule_mutex);
|
||||
|
||||
@ -91,7 +92,7 @@ bool BackgroundSchedulePool::TaskInfo::activateAndSchedule()
|
||||
return true;
|
||||
}
|
||||
|
||||
void BackgroundSchedulePool::TaskInfo::execute()
|
||||
void BackgroundSchedulePoolTaskInfo::execute()
|
||||
{
|
||||
Stopwatch watch;
|
||||
CurrentMetrics::Increment metric_increment{CurrentMetrics::BackgroundSchedulePoolTask};
|
||||
@ -131,7 +132,7 @@ void BackgroundSchedulePool::TaskInfo::execute()
|
||||
}
|
||||
}
|
||||
|
||||
void BackgroundSchedulePool::TaskInfo::scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock)
|
||||
void BackgroundSchedulePoolTaskInfo::scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock)
|
||||
{
|
||||
scheduled = true;
|
||||
|
||||
@ -145,7 +146,7 @@ void BackgroundSchedulePool::TaskInfo::scheduleImpl(std::lock_guard<std::mutex>
|
||||
pool.queue.enqueueNotification(new TaskNotification(shared_from_this()));
|
||||
}
|
||||
|
||||
Coordination::WatchCallback BackgroundSchedulePool::TaskInfo::getWatchCallback()
|
||||
Coordination::WatchCallback BackgroundSchedulePoolTaskInfo::getWatchCallback()
|
||||
{
|
||||
return [t = shared_from_this()](const Coordination::WatchResponse &)
|
||||
{
|
||||
|
@ -20,6 +20,8 @@ namespace DB
|
||||
{
|
||||
|
||||
class TaskNotification;
|
||||
class BackgroundSchedulePoolTaskInfo;
|
||||
class BackgroundSchedulePoolTaskHolder;
|
||||
|
||||
|
||||
/** Executes functions scheduled at a specific point in time.
|
||||
@ -35,84 +37,14 @@ class TaskNotification;
|
||||
class BackgroundSchedulePool
|
||||
{
|
||||
public:
|
||||
class TaskInfo;
|
||||
friend class BackgroundSchedulePoolTaskInfo;
|
||||
|
||||
using TaskInfo = BackgroundSchedulePoolTaskInfo;
|
||||
using TaskInfoPtr = std::shared_ptr<TaskInfo>;
|
||||
using TaskFunc = std::function<void()>;
|
||||
using TaskHolder = BackgroundSchedulePoolTaskHolder;
|
||||
using DelayedTasks = std::multimap<Poco::Timestamp, TaskInfoPtr>;
|
||||
|
||||
class TaskInfo : public std::enable_shared_from_this<TaskInfo>, private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
TaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const TaskFunc & function_);
|
||||
|
||||
/// Schedule for execution as soon as possible (if not already scheduled).
|
||||
/// If the task was already scheduled with delay, the delay will be ignored.
|
||||
bool schedule();
|
||||
|
||||
/// Schedule for execution after specified delay.
|
||||
bool scheduleAfter(size_t ms);
|
||||
|
||||
/// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task.
|
||||
void deactivate();
|
||||
|
||||
void activate();
|
||||
|
||||
/// Atomically activate task and schedule it for execution.
|
||||
bool activateAndSchedule();
|
||||
|
||||
/// get Coordination::WatchCallback needed for notifications from ZooKeeper watches.
|
||||
Coordination::WatchCallback getWatchCallback();
|
||||
|
||||
private:
|
||||
friend class TaskNotification;
|
||||
friend class BackgroundSchedulePool;
|
||||
|
||||
void execute();
|
||||
|
||||
void scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock);
|
||||
|
||||
BackgroundSchedulePool & pool;
|
||||
std::string log_name;
|
||||
TaskFunc function;
|
||||
|
||||
std::mutex exec_mutex;
|
||||
std::mutex schedule_mutex;
|
||||
|
||||
/// Invariants:
|
||||
/// * If deactivated is true then scheduled, delayed and executing are all false.
|
||||
/// * scheduled and delayed cannot be true at the same time.
|
||||
bool deactivated = false;
|
||||
bool scheduled = false;
|
||||
bool delayed = false;
|
||||
bool executing = false;
|
||||
|
||||
/// If the task is scheduled with delay, points to element of delayed_tasks.
|
||||
DelayedTasks::iterator iterator;
|
||||
};
|
||||
|
||||
class TaskHolder
|
||||
{
|
||||
public:
|
||||
TaskHolder() = default;
|
||||
explicit TaskHolder(const TaskInfoPtr & task_info_) : task_info(task_info_) {}
|
||||
TaskHolder(const TaskHolder & other) = delete;
|
||||
TaskHolder(TaskHolder && other) noexcept = default;
|
||||
TaskHolder & operator=(const TaskHolder & other) noexcept = delete;
|
||||
TaskHolder & operator=(TaskHolder && other) noexcept = default;
|
||||
|
||||
~TaskHolder()
|
||||
{
|
||||
if (task_info)
|
||||
task_info->deactivate();
|
||||
}
|
||||
|
||||
TaskInfo * operator->() { return task_info.get(); }
|
||||
const TaskInfo * operator->() const { return task_info.get(); }
|
||||
|
||||
private:
|
||||
TaskInfoPtr task_info;
|
||||
};
|
||||
|
||||
TaskHolder createTask(const std::string & log_name, const TaskFunc & function);
|
||||
|
||||
size_t getNumberOfThreads() const { return size; }
|
||||
@ -153,4 +85,81 @@ private:
|
||||
void attachToThreadGroup();
|
||||
};
|
||||
|
||||
|
||||
class BackgroundSchedulePoolTaskInfo : public std::enable_shared_from_this<BackgroundSchedulePoolTaskInfo>, private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
BackgroundSchedulePoolTaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_);
|
||||
|
||||
/// Schedule for execution as soon as possible (if not already scheduled).
|
||||
/// If the task was already scheduled with delay, the delay will be ignored.
|
||||
bool schedule();
|
||||
|
||||
/// Schedule for execution after specified delay.
|
||||
bool scheduleAfter(size_t ms);
|
||||
|
||||
/// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task.
|
||||
void deactivate();
|
||||
|
||||
void activate();
|
||||
|
||||
/// Atomically activate task and schedule it for execution.
|
||||
bool activateAndSchedule();
|
||||
|
||||
/// get Coordination::WatchCallback needed for notifications from ZooKeeper watches.
|
||||
Coordination::WatchCallback getWatchCallback();
|
||||
|
||||
private:
|
||||
friend class TaskNotification;
|
||||
friend class BackgroundSchedulePool;
|
||||
|
||||
void execute();
|
||||
|
||||
void scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock);
|
||||
|
||||
BackgroundSchedulePool & pool;
|
||||
std::string log_name;
|
||||
BackgroundSchedulePool::TaskFunc function;
|
||||
|
||||
std::mutex exec_mutex;
|
||||
std::mutex schedule_mutex;
|
||||
|
||||
/// Invariants:
|
||||
/// * If deactivated is true then scheduled, delayed and executing are all false.
|
||||
/// * scheduled and delayed cannot be true at the same time.
|
||||
bool deactivated = false;
|
||||
bool scheduled = false;
|
||||
bool delayed = false;
|
||||
bool executing = false;
|
||||
|
||||
/// If the task is scheduled with delay, points to element of delayed_tasks.
|
||||
BackgroundSchedulePool::DelayedTasks::iterator iterator;
|
||||
};
|
||||
|
||||
using BackgroundSchedulePoolTaskInfoPtr = std::shared_ptr<BackgroundSchedulePoolTaskInfo>;
|
||||
|
||||
|
||||
class BackgroundSchedulePoolTaskHolder
|
||||
{
|
||||
public:
|
||||
BackgroundSchedulePoolTaskHolder() = default;
|
||||
explicit BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskInfoPtr & task_info_) : task_info(task_info_) {}
|
||||
BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskHolder & other) = delete;
|
||||
BackgroundSchedulePoolTaskHolder(BackgroundSchedulePoolTaskHolder && other) noexcept = default;
|
||||
BackgroundSchedulePoolTaskHolder & operator=(const BackgroundSchedulePoolTaskHolder & other) noexcept = delete;
|
||||
BackgroundSchedulePoolTaskHolder & operator=(BackgroundSchedulePoolTaskHolder && other) noexcept = default;
|
||||
|
||||
~BackgroundSchedulePoolTaskHolder()
|
||||
{
|
||||
if (task_info)
|
||||
task_info->deactivate();
|
||||
}
|
||||
|
||||
BackgroundSchedulePoolTaskInfo * operator->() { return task_info.get(); }
|
||||
const BackgroundSchedulePoolTaskInfo * operator->() const { return task_info.get(); }
|
||||
|
||||
private:
|
||||
BackgroundSchedulePoolTaskInfoPtr task_info;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -5,8 +5,6 @@
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Poco/Ext/ThreadNumber.h>
|
||||
|
||||
|
||||
namespace CurrentMetrics
|
||||
|
@ -43,6 +43,9 @@ struct BlockIO
|
||||
|
||||
BlockIO & operator= (const BlockIO & rhs)
|
||||
{
|
||||
if (this == &rhs)
|
||||
return *this;
|
||||
|
||||
out.reset();
|
||||
in.reset();
|
||||
process_list_entry.reset();
|
||||
|
@ -6,6 +6,10 @@
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/PODArray.h>
|
||||
|
||||
#include <Common/config.h>
|
||||
#if USE_LFALLOC
|
||||
#include <Common/LFAllocator.h>
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -33,7 +37,9 @@ struct MarkInCompressedFile
|
||||
return "(" + DB::toString(offset_in_compressed_file) + "," + DB::toString(offset_in_decompressed_block) + ")";
|
||||
}
|
||||
};
|
||||
|
||||
#if USE_LFALLOC
|
||||
using MarksInCompressedFile = PODArray<MarkInCompressedFile, 4096, LFAllocator>;
|
||||
#else
|
||||
using MarksInCompressedFile = PODArray<MarkInCompressedFile>;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include <future>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
|
||||
|
@ -8,8 +8,6 @@
|
||||
#include <condition_variable>
|
||||
|
||||
|
||||
class MemoryTracker;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/MemoryTracker.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
|
||||
|
208
dbms/src/DataStreams/TTLBlockInputStream.cpp
Normal file
208
dbms/src/DataStreams/TTLBlockInputStream.cpp
Normal file
@ -0,0 +1,208 @@
|
||||
#include <DataStreams/TTLBlockInputStream.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <Interpreters/evaluateMissingDefaults.h>
|
||||
#include <Interpreters/SyntaxAnalyzer.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
TTLBlockInputStream::TTLBlockInputStream(
|
||||
const BlockInputStreamPtr & input_,
|
||||
const MergeTreeData & storage_,
|
||||
const MergeTreeData::MutableDataPartPtr & data_part_,
|
||||
time_t current_time_)
|
||||
: storage(storage_)
|
||||
, data_part(data_part_)
|
||||
, current_time(current_time_)
|
||||
, old_ttl_infos(data_part->ttl_infos)
|
||||
, log(&Logger::get(storage.getLogName() + " (TTLBlockInputStream)"))
|
||||
, date_lut(DateLUT::instance())
|
||||
{
|
||||
children.push_back(input_);
|
||||
|
||||
const auto & column_defaults = storage.getColumns().getDefaults();
|
||||
ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
|
||||
for (const auto & [name, ttl_info] : old_ttl_infos.columns_ttl)
|
||||
{
|
||||
if (ttl_info.min <= current_time)
|
||||
{
|
||||
new_ttl_infos.columns_ttl.emplace(name, MergeTreeDataPart::TTLInfo{});
|
||||
empty_columns.emplace(name);
|
||||
|
||||
auto it = column_defaults.find(name);
|
||||
|
||||
if (it != column_defaults.end())
|
||||
default_expr_list->children.emplace_back(
|
||||
setAlias(it->second.expression, it->first));
|
||||
}
|
||||
else
|
||||
new_ttl_infos.columns_ttl.emplace(name, ttl_info);
|
||||
}
|
||||
|
||||
if (old_ttl_infos.table_ttl.min > current_time)
|
||||
new_ttl_infos.table_ttl = old_ttl_infos.table_ttl;
|
||||
|
||||
if (!default_expr_list->children.empty())
|
||||
{
|
||||
auto syntax_result = SyntaxAnalyzer(storage.global_context).analyze(
|
||||
default_expr_list, storage.getColumns().getAllPhysical());
|
||||
defaults_expression = ExpressionAnalyzer{default_expr_list, syntax_result, storage.global_context}.getActions(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Block TTLBlockInputStream::getHeader() const
|
||||
{
|
||||
return children.at(0)->getHeader();
|
||||
}
|
||||
|
||||
Block TTLBlockInputStream::readImpl()
|
||||
{
|
||||
Block block = children.at(0)->read();
|
||||
if (!block)
|
||||
return block;
|
||||
|
||||
if (storage.hasTableTTL())
|
||||
{
|
||||
/// Skip all data if table ttl is expired for part
|
||||
if (old_ttl_infos.table_ttl.max <= current_time)
|
||||
{
|
||||
rows_removed = data_part->rows_count;
|
||||
return {};
|
||||
}
|
||||
|
||||
if (old_ttl_infos.table_ttl.min <= current_time)
|
||||
removeRowsWithExpiredTableTTL(block);
|
||||
}
|
||||
|
||||
removeValuesWithExpiredColumnTTL(block);
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
void TTLBlockInputStream::readSuffixImpl()
|
||||
{
|
||||
for (const auto & elem : new_ttl_infos.columns_ttl)
|
||||
new_ttl_infos.updatePartMinTTL(elem.second.min);
|
||||
|
||||
new_ttl_infos.updatePartMinTTL(new_ttl_infos.table_ttl.min);
|
||||
|
||||
data_part->ttl_infos = std::move(new_ttl_infos);
|
||||
data_part->empty_columns = std::move(empty_columns);
|
||||
|
||||
if (rows_removed)
|
||||
LOG_INFO(log, "Removed " << rows_removed << " rows with expired ttl from part " << data_part->name);
|
||||
}
|
||||
|
||||
void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
|
||||
{
|
||||
storage.ttl_table_entry.expression->execute(block);
|
||||
|
||||
const auto & current = block.getByName(storage.ttl_table_entry.result_column);
|
||||
const IColumn * ttl_column = current.column.get();
|
||||
|
||||
MutableColumns result_columns;
|
||||
result_columns.reserve(getHeader().columns());
|
||||
for (const auto & name : storage.getColumns().getNamesOfPhysical())
|
||||
{
|
||||
auto & column_with_type = block.getByName(name);
|
||||
const IColumn * values_column = column_with_type.column.get();
|
||||
MutableColumnPtr result_column = values_column->cloneEmpty();
|
||||
result_column->reserve(block.rows());
|
||||
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
|
||||
if (cur_ttl > current_time)
|
||||
{
|
||||
new_ttl_infos.table_ttl.update(cur_ttl);
|
||||
result_column->insertFrom(*values_column, i);
|
||||
}
|
||||
else
|
||||
++rows_removed;
|
||||
}
|
||||
result_columns.emplace_back(std::move(result_column));
|
||||
}
|
||||
|
||||
block = getHeader().cloneWithColumns(std::move(result_columns));
|
||||
}
|
||||
|
||||
void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
|
||||
{
|
||||
Block block_with_defaults;
|
||||
if (defaults_expression)
|
||||
{
|
||||
block_with_defaults = block;
|
||||
defaults_expression->execute(block_with_defaults);
|
||||
}
|
||||
|
||||
for (const auto & [name, ttl_entry] : storage.ttl_entries_by_name)
|
||||
{
|
||||
const auto & old_ttl_info = old_ttl_infos.columns_ttl[name];
|
||||
auto & new_ttl_info = new_ttl_infos.columns_ttl[name];
|
||||
|
||||
if (old_ttl_info.min > current_time)
|
||||
continue;
|
||||
|
||||
if (old_ttl_info.max <= current_time)
|
||||
continue;
|
||||
|
||||
if (!block.has(ttl_entry.result_column))
|
||||
ttl_entry.expression->execute(block);
|
||||
|
||||
ColumnPtr default_column = nullptr;
|
||||
if (block_with_defaults.has(name))
|
||||
default_column = block_with_defaults.getByName(name).column->convertToFullColumnIfConst();
|
||||
|
||||
auto & column_with_type = block.getByName(name);
|
||||
const IColumn * values_column = column_with_type.column.get();
|
||||
MutableColumnPtr result_column = values_column->cloneEmpty();
|
||||
result_column->reserve(block.rows());
|
||||
|
||||
const auto & current = block.getByName(ttl_entry.result_column);
|
||||
const IColumn * ttl_column = current.column.get();
|
||||
|
||||
for (size_t i = 0; i < block.rows(); ++i)
|
||||
{
|
||||
UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
|
||||
|
||||
if (cur_ttl <= current_time)
|
||||
{
|
||||
if (default_column)
|
||||
result_column->insertFrom(*default_column, i);
|
||||
else
|
||||
result_column->insertDefault();
|
||||
}
|
||||
else
|
||||
{
|
||||
new_ttl_info.update(cur_ttl);
|
||||
empty_columns.erase(name);
|
||||
result_column->insertFrom(*values_column, i);
|
||||
}
|
||||
}
|
||||
column_with_type.column = std::move(result_column);
|
||||
}
|
||||
|
||||
for (const auto & elem : storage.ttl_entries_by_name)
|
||||
if (block.has(elem.second.result_column))
|
||||
block.erase(elem.second.result_column);
|
||||
}
|
||||
|
||||
UInt32 TTLBlockInputStream::getTimestampByIndex(const IColumn * column, size_t ind)
|
||||
{
|
||||
if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
|
||||
return date_lut.fromDayNum(DayNum(column_date->getData()[ind]));
|
||||
else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(column))
|
||||
return column_date_time->getData()[ind];
|
||||
else
|
||||
throw Exception("Unexpected type of result ttl column", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
}
|
60
dbms/src/DataStreams/TTLBlockInputStream.h
Normal file
60
dbms/src/DataStreams/TTLBlockInputStream.h
Normal file
@ -0,0 +1,60 @@
|
||||
#pragma once
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPart.h>
|
||||
#include <Core/Block.h>
|
||||
|
||||
#include <common/DateLUT.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class TTLBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
TTLBlockInputStream(
|
||||
const BlockInputStreamPtr & input_,
|
||||
const MergeTreeData & storage_,
|
||||
const MergeTreeData::MutableDataPartPtr & data_part_,
|
||||
time_t current_time
|
||||
);
|
||||
|
||||
String getName() const override { return "TTLBlockInputStream"; }
|
||||
|
||||
Block getHeader() const override;
|
||||
|
||||
protected:
|
||||
Block readImpl() override;
|
||||
|
||||
/// Finalizes ttl infos and updates data part
|
||||
void readSuffixImpl() override;
|
||||
|
||||
private:
|
||||
const MergeTreeData & storage;
|
||||
|
||||
/// ttl_infos and empty_columns are updating while reading
|
||||
const MergeTreeData::MutableDataPartPtr & data_part;
|
||||
|
||||
time_t current_time;
|
||||
|
||||
MergeTreeDataPart::TTLInfos old_ttl_infos;
|
||||
MergeTreeDataPart::TTLInfos new_ttl_infos;
|
||||
NameSet empty_columns;
|
||||
|
||||
size_t rows_removed = 0;
|
||||
Logger * log;
|
||||
DateLUTImpl date_lut;
|
||||
|
||||
std::unordered_map<String, String> defaults_result_column;
|
||||
ExpressionActionsPtr defaults_expression;
|
||||
private:
|
||||
/// Removes values with expired ttl and computes new min_ttl and empty_columns for part
|
||||
void removeValuesWithExpiredColumnTTL(Block & block);
|
||||
|
||||
/// Remove rows with expired table ttl and computes new min_ttl for part
|
||||
void removeRowsWithExpiredTableTTL(Block & block);
|
||||
|
||||
UInt32 getTimestampByIndex(const IColumn * column, size_t ind);
|
||||
};
|
||||
|
||||
}
|
@ -20,7 +20,7 @@ namespace ErrorCodes
|
||||
namespace
|
||||
{
|
||||
|
||||
class DataTypeDomanIPv4 : public DataTypeDomainWithSimpleSerialization
|
||||
class DataTypeDomainIPv4 : public DataTypeDomainWithSimpleSerialization
|
||||
{
|
||||
public:
|
||||
const char * getName() const override
|
||||
@ -63,7 +63,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
class DataTypeDomanIPv6 : public DataTypeDomainWithSimpleSerialization
|
||||
class DataTypeDomainIPv6 : public DataTypeDomainWithSimpleSerialization
|
||||
{
|
||||
public:
|
||||
const char * getName() const override
|
||||
@ -111,8 +111,8 @@ public:
|
||||
|
||||
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory)
|
||||
{
|
||||
factory.registerDataTypeDomain("UInt32", std::make_unique<DataTypeDomanIPv4>());
|
||||
factory.registerDataTypeDomain("FixedString(16)", std::make_unique<DataTypeDomanIPv6>());
|
||||
factory.registerDataTypeDomain("UInt32", std::make_unique<DataTypeDomainIPv4>());
|
||||
factory.registerDataTypeDomain("FixedString(16)", std::make_unique<DataTypeDomainIPv6>());
|
||||
}
|
||||
|
||||
} // namespace DB
|
||||
|
@ -690,10 +690,9 @@ void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams(
|
||||
};
|
||||
|
||||
if (!settings.continuous_reading)
|
||||
{
|
||||
low_cardinality_state->num_pending_rows = 0;
|
||||
|
||||
if (!settings.continuous_reading)
|
||||
{
|
||||
/// Remember in state that some granules were skipped and we need to update dictionary.
|
||||
low_cardinality_state->need_update_dictionary = true;
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ namespace DB
|
||||
class IDataTypeDummy : public DataTypeWithSimpleSerialization
|
||||
{
|
||||
private:
|
||||
void throwNoSerialization() const
|
||||
[[noreturn]] void throwNoSerialization() const
|
||||
{
|
||||
throw Exception("Serialization is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
@ -77,7 +77,7 @@ namespace ProtobufColumnMatcher
|
||||
|
||||
namespace details
|
||||
{
|
||||
void throwNoCommonColumns();
|
||||
[[noreturn]] void throwNoCommonColumns();
|
||||
|
||||
class ColumnNameMatcher
|
||||
{
|
||||
|
@ -385,73 +385,61 @@ public:
|
||||
bool readStringInto(PaddedPODArray<UInt8> &) override
|
||||
{
|
||||
cannotConvertType("String");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readInt8(Int8 &) override
|
||||
{
|
||||
cannotConvertType("Int8");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readUInt8(UInt8 &) override
|
||||
{
|
||||
cannotConvertType("UInt8");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readInt16(Int16 &) override
|
||||
{
|
||||
cannotConvertType("Int16");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readUInt16(UInt16 &) override
|
||||
{
|
||||
cannotConvertType("UInt16");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readInt32(Int32 &) override
|
||||
{
|
||||
cannotConvertType("Int32");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readUInt32(UInt32 &) override
|
||||
{
|
||||
cannotConvertType("UInt32");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readInt64(Int64 &) override
|
||||
{
|
||||
cannotConvertType("Int64");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readUInt64(UInt64 &) override
|
||||
{
|
||||
cannotConvertType("UInt64");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readUInt128(UInt128 &) override
|
||||
{
|
||||
cannotConvertType("UInt128");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readFloat32(Float32 &) override
|
||||
{
|
||||
cannotConvertType("Float32");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readFloat64(Float64 &) override
|
||||
{
|
||||
cannotConvertType("Float64");
|
||||
return false;
|
||||
}
|
||||
|
||||
void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) override {}
|
||||
@ -460,59 +448,50 @@ public:
|
||||
bool readEnum8(Int8 &) override
|
||||
{
|
||||
cannotConvertType("Enum");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readEnum16(Int16 &) override
|
||||
{
|
||||
cannotConvertType("Enum");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readUUID(UUID &) override
|
||||
{
|
||||
cannotConvertType("UUID");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readDate(DayNum &) override
|
||||
{
|
||||
cannotConvertType("Date");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readDateTime(time_t &) override
|
||||
{
|
||||
cannotConvertType("DateTime");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readDecimal32(Decimal32 &, UInt32, UInt32) override
|
||||
{
|
||||
cannotConvertType("Decimal32");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readDecimal64(Decimal64 &, UInt32, UInt32) override
|
||||
{
|
||||
cannotConvertType("Decimal64");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readDecimal128(Decimal128 &, UInt32, UInt32) override
|
||||
{
|
||||
cannotConvertType("Decimal128");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) override
|
||||
{
|
||||
cannotConvertType("AggregateFunction");
|
||||
return false;
|
||||
}
|
||||
|
||||
protected:
|
||||
void cannotConvertType(const String & type_name)
|
||||
[[noreturn]] void cannotConvertType(const String & type_name)
|
||||
{
|
||||
throw Exception(
|
||||
String("Could not convert type '") + field->type_name() + "' from protobuf field '" + field->name() + "' to data type '"
|
||||
@ -520,7 +499,7 @@ protected:
|
||||
ErrorCodes::PROTOBUF_BAD_CAST);
|
||||
}
|
||||
|
||||
void cannotConvertValue(const String & value, const String & type_name)
|
||||
[[noreturn]] void cannotConvertValue(const String & value, const String & type_name)
|
||||
{
|
||||
throw Exception(
|
||||
"Could not convert value '" + value + "' from protobuf field '" + field->name() + "' to data type '" + type_name + "'",
|
||||
@ -557,7 +536,6 @@ protected:
|
||||
catch (...)
|
||||
{
|
||||
cannotConvertValue(StringRef(str.data(), str.size()).toString(), TypeName<To>::get());
|
||||
__builtin_unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -334,14 +334,14 @@ public:
|
||||
virtual void writeAggregateFunction(const AggregateFunctionPtr &, ConstAggregateDataPtr) override { cannotConvertType("AggregateFunction"); }
|
||||
|
||||
protected:
|
||||
void cannotConvertType(const String & type_name)
|
||||
[[noreturn]] void cannotConvertType(const String & type_name)
|
||||
{
|
||||
throw Exception(
|
||||
"Could not convert data type '" + type_name + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
|
||||
ErrorCodes::PROTOBUF_BAD_CAST);
|
||||
}
|
||||
|
||||
void cannotConvertValue(const String & value)
|
||||
[[noreturn]] void cannotConvertValue(const String & value)
|
||||
{
|
||||
throw Exception(
|
||||
"Could not convert value '" + value + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
|
||||
|
@ -423,7 +423,7 @@ inline bool tryParseImpl<DataTypeDateTime>(DataTypeDateTime::FieldType & x, Read
|
||||
|
||||
/** Throw exception with verbose message when string value is not parsed completely.
|
||||
*/
|
||||
void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, Block & block, size_t result);
|
||||
[[noreturn]] void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, Block & block, size_t result);
|
||||
|
||||
|
||||
enum class ConvertFromStringExceptionMode
|
||||
|
@ -520,7 +520,7 @@ void resizeDynamicSize(ArraySource && array_source, ValueSource && value_source,
|
||||
while (!sink.isEnd())
|
||||
{
|
||||
size_t row_num = array_source.rowNum();
|
||||
bool has_size = !size_null_map || (size_null_map && (*size_null_map)[row_num]);
|
||||
bool has_size = !size_null_map || (*size_null_map)[row_num];
|
||||
|
||||
if (has_size)
|
||||
{
|
||||
|
@ -8,12 +8,14 @@
|
||||
#include <Columns/ColumnNullable.h>
|
||||
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/UTF8Helpers.h>
|
||||
|
||||
#include <Functions/GatherUtils/IArraySource.h>
|
||||
#include <Functions/GatherUtils/IValueSource.h>
|
||||
#include <Functions/GatherUtils/Slices.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -276,6 +278,92 @@ struct StringSource
|
||||
};
|
||||
|
||||
|
||||
/// Differs to StringSource by having 'offest' and 'length' in code points instead of bytes in getSlice* methods.
|
||||
/** NOTE: The behaviour of substring and substringUTF8 is inconsistent when negative offset is greater than string size:
|
||||
* substring:
|
||||
* hello
|
||||
* ^-----^ - offset -10, length 7, result: "he"
|
||||
* substringUTF8:
|
||||
* hello
|
||||
* ^-----^ - offset -10, length 7, result: "hello"
|
||||
* This may be subject for change.
|
||||
*/
|
||||
struct UTF8StringSource : public StringSource
|
||||
{
|
||||
using StringSource::StringSource;
|
||||
|
||||
static const ColumnString::Char * skipCodePointsForward(const ColumnString::Char * pos, size_t size, const ColumnString::Char * end)
|
||||
{
|
||||
for (size_t i = 0; i < size && pos < end; ++i)
|
||||
pos += UTF8::seqLength(*pos); /// NOTE pos may become greater than end. It is Ok due to padding in PaddedPODArray.
|
||||
return pos;
|
||||
}
|
||||
|
||||
static const ColumnString::Char * skipCodePointsBackward(const ColumnString::Char * pos, size_t size, const ColumnString::Char * begin)
|
||||
{
|
||||
for (size_t i = 0; i < size && pos > begin; ++i)
|
||||
{
|
||||
--pos;
|
||||
if (pos == begin)
|
||||
break;
|
||||
UTF8::syncBackward(pos, begin);
|
||||
}
|
||||
return pos;
|
||||
}
|
||||
|
||||
Slice getSliceFromLeft(size_t offset) const
|
||||
{
|
||||
auto begin = &elements[prev_offset];
|
||||
auto end = elements.data() + offsets[row_num] - 1;
|
||||
auto res_begin = skipCodePointsForward(begin, offset, end);
|
||||
|
||||
if (res_begin >= end)
|
||||
return {begin, 0};
|
||||
|
||||
return {res_begin, size_t(end - res_begin)};
|
||||
}
|
||||
|
||||
Slice getSliceFromLeft(size_t offset, size_t length) const
|
||||
{
|
||||
auto begin = &elements[prev_offset];
|
||||
auto end = elements.data() + offsets[row_num] - 1;
|
||||
auto res_begin = skipCodePointsForward(begin, offset, end);
|
||||
|
||||
if (res_begin >= end)
|
||||
return {begin, 0};
|
||||
|
||||
auto res_end = skipCodePointsForward(res_begin, length, end);
|
||||
|
||||
if (res_end >= end)
|
||||
return {res_begin, size_t(end - res_begin)};
|
||||
|
||||
return {res_begin, size_t(res_end - res_begin)};
|
||||
}
|
||||
|
||||
Slice getSliceFromRight(size_t offset) const
|
||||
{
|
||||
auto begin = &elements[prev_offset];
|
||||
auto end = elements.data() + offsets[row_num] - 1;
|
||||
auto res_begin = skipCodePointsBackward(end, offset, begin);
|
||||
|
||||
return {res_begin, size_t(end - res_begin)};
|
||||
}
|
||||
|
||||
Slice getSliceFromRight(size_t offset, size_t length) const
|
||||
{
|
||||
auto begin = &elements[prev_offset];
|
||||
auto end = elements.data() + offsets[row_num] - 1;
|
||||
auto res_begin = skipCodePointsBackward(end, offset, begin);
|
||||
auto res_end = skipCodePointsForward(res_begin, length, end);
|
||||
|
||||
if (res_end >= end)
|
||||
return {res_begin, size_t(end - res_begin)};
|
||||
|
||||
return {res_begin, size_t(res_end - res_begin)};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct FixedStringSource
|
||||
{
|
||||
using Slice = NumericArraySlice<UInt8>;
|
||||
|
@ -432,15 +432,20 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable
|
||||
current_has_nullable = true;
|
||||
else
|
||||
{
|
||||
typename Map::mapped_type * value = nullptr;
|
||||
|
||||
if constexpr (is_numeric_column)
|
||||
++map[columns[arg]->getElement(i)];
|
||||
value = &map[columns[arg]->getElement(i)];
|
||||
else if constexpr (std::is_same<ColumnType, ColumnString>::value || std::is_same<ColumnType, ColumnFixedString>::value)
|
||||
++map[columns[arg]->getDataAt(i)];
|
||||
value = &map[columns[arg]->getDataAt(i)];
|
||||
else
|
||||
{
|
||||
const char * data = nullptr;
|
||||
++map[columns[arg]->serializeValueIntoArena(i, arena, data)];
|
||||
value = &map[columns[arg]->serializeValueIntoArena(i, arena, data)];
|
||||
}
|
||||
|
||||
if (*value == arg)
|
||||
++(*value);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -61,7 +61,7 @@ public:
|
||||
{
|
||||
auto array_size = col_num->getInt(i);
|
||||
|
||||
if (unlikely(array_size) < 0)
|
||||
if (unlikely(array_size < 0))
|
||||
throw Exception("Array size cannot be negative: while executing function " + getName(), ErrorCodes::TOO_LARGE_ARRAY_SIZE);
|
||||
|
||||
offset += array_size;
|
||||
|
@ -153,7 +153,7 @@ template <typename A, typename B>
|
||||
struct NumIfImpl<A, B, NumberTraits::Error>
|
||||
{
|
||||
private:
|
||||
static void throw_error()
|
||||
[[noreturn]] static void throw_error()
|
||||
{
|
||||
throw Exception("Internal logic error: invalid types of arguments 2 and 3 of if", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
330
dbms/src/Functions/isValidUTF8.cpp
Normal file
330
dbms/src/Functions/isValidUTF8.cpp
Normal file
@ -0,0 +1,330 @@
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringOrArrayToT.h>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
# include <emmintrin.h>
|
||||
# include <smmintrin.h>
|
||||
# include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
/// inspired by https://github.com/cyb70289/utf8/
|
||||
struct ValidUTF8Impl
|
||||
{
|
||||
/*
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2019 Yibo Cai
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
|
||||
*
|
||||
* Table 3-7. Well-Formed UTF-8 Byte Sequences
|
||||
*
|
||||
* +--------------------+------------+-------------+------------+-------------+
|
||||
* | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
|
||||
* +--------------------+------------+-------------+------------+-------------+
|
||||
* | U+0000..U+007F | 00..7F | | | |
|
||||
* +--------------------+------------+-------------+------------+-------------+
|
||||
* | U+0080..U+07FF | C2..DF | 80..BF | | |
|
||||
* +--------------------+------------+-------------+------------+-------------+
|
||||
* | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
|
||||
* +--------------------+------------+-------------+------------+-------------+
|
||||
* | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
|
||||
* +--------------------+------------+-------------+------------+-------------+
|
||||
* | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
|
||||
* +--------------------+------------+-------------+------------+-------------+
|
||||
* | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
|
||||
* +--------------------+------------+-------------+------------+-------------+
|
||||
* | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
|
||||
* +--------------------+------------+-------------+------------+-------------+
|
||||
* | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
|
||||
* +--------------------+------------+-------------+------------+-------------+
|
||||
* | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
|
||||
* +--------------------+------------+-------------+------------+-------------+
|
||||
*/
|
||||
|
||||
static inline UInt8 isValidUTF8Naive(const UInt8 * data, UInt64 len)
|
||||
{
|
||||
while (len)
|
||||
{
|
||||
int bytes;
|
||||
const UInt8 byte1 = data[0];
|
||||
/* 00..7F */
|
||||
if (byte1 <= 0x7F)
|
||||
{
|
||||
bytes = 1;
|
||||
}
|
||||
/* C2..DF, 80..BF */
|
||||
else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF && static_cast<Int8>(data[1]) <= static_cast<Int8>(0xBF))
|
||||
{
|
||||
bytes = 2;
|
||||
}
|
||||
else if (len >= 3)
|
||||
{
|
||||
const UInt8 byte2 = data[1];
|
||||
bool byte2_ok = static_cast<Int8>(byte2) <= static_cast<Int8>(0xBF);
|
||||
bool byte3_ok = static_cast<Int8>(data[2]) <= static_cast<Int8>(0xBF);
|
||||
|
||||
if (byte2_ok && byte3_ok &&
|
||||
/* E0, A0..BF, 80..BF */
|
||||
((byte1 == 0xE0 && byte2 >= 0xA0) ||
|
||||
/* E1..EC, 80..BF, 80..BF */
|
||||
(byte1 >= 0xE1 && byte1 <= 0xEC) ||
|
||||
/* ED, 80..9F, 80..BF */
|
||||
(byte1 == 0xED && byte2 <= 0x9F) ||
|
||||
/* EE..EF, 80..BF, 80..BF */
|
||||
(byte1 >= 0xEE && byte1 <= 0xEF)))
|
||||
{
|
||||
bytes = 3;
|
||||
}
|
||||
else if (len >= 4)
|
||||
{
|
||||
bool byte4_ok = static_cast<Int8>(data[3]) <= static_cast<Int8>(0xBF);
|
||||
if (byte2_ok && byte3_ok && byte4_ok &&
|
||||
/* F0, 90..BF, 80..BF, 80..BF */
|
||||
((byte1 == 0xF0 && byte2 >= 0x90) ||
|
||||
/* F1..F3, 80..BF, 80..BF, 80..BF */
|
||||
(byte1 >= 0xF1 && byte1 <= 0xF3) ||
|
||||
/* F4, 80..8F, 80..BF, 80..BF */
|
||||
(byte1 == 0xF4 && byte2 <= 0x8F)))
|
||||
{
|
||||
bytes = 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
len -= bytes;
|
||||
data += bytes;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifndef __SSE4_1__
|
||||
static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { return isValidUTF8Naive(data, len); }
|
||||
#else
|
||||
static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len)
|
||||
{
|
||||
/*
|
||||
* Map high nibble of "First Byte" to legal character length minus 1
|
||||
* 0x00 ~ 0xBF --> 0
|
||||
* 0xC0 ~ 0xDF --> 1
|
||||
* 0xE0 ~ 0xEF --> 2
|
||||
* 0xF0 ~ 0xFF --> 3
|
||||
*/
|
||||
const __m128i first_len_tbl = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3);
|
||||
|
||||
/* Map "First Byte" to 8-th item of range table (0xC2 ~ 0xF4) */
|
||||
const __m128i first_range_tbl = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8);
|
||||
|
||||
/*
|
||||
* Range table, map range index to min and max values
|
||||
*/
|
||||
const __m128i range_min_tbl
|
||||
= _mm_setr_epi8(0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, 0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F);
|
||||
|
||||
const __m128i range_max_tbl
|
||||
= _mm_setr_epi8(0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, 0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
|
||||
|
||||
/*
|
||||
* Tables for fast handling of four special First Bytes(E0,ED,F0,F4), after
|
||||
* which the Second Byte are not 80~BF. It contains "range index adjustment".
|
||||
* +------------+---------------+------------------+----------------+
|
||||
* | First Byte | original range| range adjustment | adjusted range |
|
||||
* +------------+---------------+------------------+----------------+
|
||||
* | E0 | 2 | 2 | 4 |
|
||||
* +------------+---------------+------------------+----------------+
|
||||
* | ED | 2 | 3 | 5 |
|
||||
* +------------+---------------+------------------+----------------+
|
||||
* | F0 | 3 | 3 | 6 |
|
||||
* +------------+---------------+------------------+----------------+
|
||||
* | F4 | 4 | 4 | 8 |
|
||||
* +------------+---------------+------------------+----------------+
|
||||
*/
|
||||
|
||||
/* index1 -> E0, index14 -> ED */
|
||||
const __m128i df_ee_tbl = _mm_setr_epi8(0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0);
|
||||
|
||||
/* index1 -> F0, index5 -> F4 */
|
||||
const __m128i ef_fe_tbl = _mm_setr_epi8(0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
|
||||
__m128i prev_input = _mm_set1_epi8(0);
|
||||
__m128i prev_first_len = _mm_set1_epi8(0);
|
||||
__m128i error = _mm_set1_epi8(0);
|
||||
|
||||
auto check_packed = [&](__m128i input) noexcept
|
||||
{
|
||||
/* high_nibbles = input >> 4 */
|
||||
const __m128i high_nibbles = _mm_and_si128(_mm_srli_epi16(input, 4), _mm_set1_epi8(0x0F));
|
||||
|
||||
/* first_len = legal character length minus 1 */
|
||||
/* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
|
||||
/* first_len = first_len_tbl[high_nibbles] */
|
||||
__m128i first_len = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
|
||||
|
||||
/* First Byte: set range index to 8 for bytes within 0xC0 ~ 0xFF */
|
||||
/* range = first_range_tbl[high_nibbles] */
|
||||
__m128i range = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
|
||||
|
||||
/* Second Byte: set range index to first_len */
|
||||
/* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
|
||||
/* range |= (first_len, prev_first_len) << 1 byte */
|
||||
range = _mm_or_si128(range, _mm_alignr_epi8(first_len, prev_first_len, 15));
|
||||
|
||||
/* Third Byte: set range index to saturate_sub(first_len, 1) */
|
||||
/* 0 for 00~7F, 0 for C0~DF, 1 for E0~EF, 2 for F0~FF */
|
||||
__m128i tmp1;
|
||||
__m128i tmp2;
|
||||
/* tmp1 = saturate_sub(first_len, 1) */
|
||||
tmp1 = _mm_subs_epu8(first_len, _mm_set1_epi8(1));
|
||||
/* tmp2 = saturate_sub(prev_first_len, 1) */
|
||||
tmp2 = _mm_subs_epu8(prev_first_len, _mm_set1_epi8(1));
|
||||
/* range |= (tmp1, tmp2) << 2 bytes */
|
||||
range = _mm_or_si128(range, _mm_alignr_epi8(tmp1, tmp2, 14));
|
||||
|
||||
/* Fourth Byte: set range index to saturate_sub(first_len, 2) */
|
||||
/* 0 for 00~7F, 0 for C0~DF, 0 for E0~EF, 1 for F0~FF */
|
||||
/* tmp1 = saturate_sub(first_len, 2) */
|
||||
tmp1 = _mm_subs_epu8(first_len, _mm_set1_epi8(2));
|
||||
/* tmp2 = saturate_sub(prev_first_len, 2) */
|
||||
tmp2 = _mm_subs_epu8(prev_first_len, _mm_set1_epi8(2));
|
||||
/* range |= (tmp1, tmp2) << 3 bytes */
|
||||
range = _mm_or_si128(range, _mm_alignr_epi8(tmp1, tmp2, 13));
|
||||
|
||||
/*
|
||||
* Now we have below range indices caluclated
|
||||
* Correct cases:
|
||||
* - 8 for C0~FF
|
||||
* - 3 for 1st byte after F0~FF
|
||||
* - 2 for 1st byte after E0~EF or 2nd byte after F0~FF
|
||||
* - 1 for 1st byte after C0~DF or 2nd byte after E0~EF or
|
||||
* 3rd byte after F0~FF
|
||||
* - 0 for others
|
||||
* Error cases:
|
||||
* 9,10,11 if non ascii First Byte overlaps
|
||||
* E.g., F1 80 C2 90 --> 8 3 10 2, where 10 indicates error
|
||||
*/
|
||||
|
||||
/* Adjust Second Byte range for special First Bytes(E0,ED,F0,F4) */
|
||||
/* Overlaps lead to index 9~15, which are illegal in range table */
|
||||
__m128i shift1, pos, range2;
|
||||
/* shift1 = (input, prev_input) << 1 byte */
|
||||
shift1 = _mm_alignr_epi8(input, prev_input, 15);
|
||||
pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
|
||||
/*
|
||||
* shift1: | EF F0 ... FE | FF 00 ... ... DE | DF E0 ... EE |
|
||||
* pos: | 0 1 15 | 16 17 239| 240 241 255|
|
||||
* pos-240: | 0 0 0 | 0 0 0 | 0 1 15 |
|
||||
* pos+112: | 112 113 127| >= 128 | >= 128 |
|
||||
*/
|
||||
tmp1 = _mm_subs_epu8(pos, _mm_set1_epi8(240));
|
||||
range2 = _mm_shuffle_epi8(df_ee_tbl, tmp1);
|
||||
tmp2 = _mm_adds_epu8(pos, _mm_set1_epi8(112));
|
||||
range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp2));
|
||||
|
||||
range = _mm_add_epi8(range, range2);
|
||||
|
||||
/* Load min and max values per calculated range index */
|
||||
__m128i minv = _mm_shuffle_epi8(range_min_tbl, range);
|
||||
__m128i maxv = _mm_shuffle_epi8(range_max_tbl, range);
|
||||
|
||||
/* Check value range */
|
||||
error = _mm_or_si128(error, _mm_cmplt_epi8(input, minv));
|
||||
error = _mm_or_si128(error, _mm_cmpgt_epi8(input, maxv));
|
||||
|
||||
prev_input = input;
|
||||
prev_first_len = first_len;
|
||||
|
||||
data += 16;
|
||||
len -= 16;
|
||||
};
|
||||
|
||||
while (len >= 16)
|
||||
check_packed(_mm_loadu_si128(reinterpret_cast<const __m128i *>(data)));
|
||||
|
||||
/// 0 <= len <= 15 for now. Reading data from data - 1 because of right padding of 15 and left padding
|
||||
/// Then zero some bytes from the unknown memory and check again.
|
||||
alignas(16) char buf[32];
|
||||
_mm_store_si128(reinterpret_cast<__m128i *>(buf), _mm_loadu_si128(reinterpret_cast<const __m128i *>(data - 1)));
|
||||
memset(buf + len + 1, 0, 16);
|
||||
check_packed(_mm_loadu_si128(reinterpret_cast<__m128i *>(buf + 1)));
|
||||
|
||||
/* Reduce error vector, error_reduced = 0xFFFF if error == 0 */
|
||||
return _mm_movemask_epi8(_mm_cmpeq_epi8(error, _mm_set1_epi8(0))) == 0xFFFF;
|
||||
}
|
||||
#endif
|
||||
|
||||
static constexpr bool is_fixed_to_constant = false;
|
||||
|
||||
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res)
|
||||
{
|
||||
size_t size = offsets.size();
|
||||
size_t prev_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
res[i] = isValidUTF8(data.data() + prev_offset, offsets[i] - 1 - prev_offset);
|
||||
prev_offset = offsets[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_fixed_to_constant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/) {}
|
||||
|
||||
static void vector_fixed_to_vector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res)
|
||||
{
|
||||
size_t size = data.size() / n;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
res[i] = isValidUTF8(data.data() + i * n, n);
|
||||
}
|
||||
|
||||
static void array(const ColumnString::Offsets &, PaddedPODArray<UInt8> &)
|
||||
{
|
||||
throw Exception("Cannot apply function isValidUTF8 to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
};
|
||||
|
||||
struct NameValidUTF8
|
||||
{
|
||||
static constexpr auto name = "isValidUTF8";
|
||||
};
|
||||
using FunctionValidUTF8 = FunctionStringOrArrayToT<ValidUTF8Impl, NameValidUTF8, UInt8>;
|
||||
|
||||
void registerFunctionValidUTF8(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionValidUTF8>();
|
||||
}
|
||||
|
||||
}
|
@ -17,15 +17,15 @@ struct ExtractQueryStringAndFragment
|
||||
res_data = data;
|
||||
res_size = 0;
|
||||
|
||||
Pos pos = data;
|
||||
Pos end = pos + size;
|
||||
Pos end = data + size;
|
||||
Pos pos;
|
||||
|
||||
if (end != (pos = find_first_symbols<'?'>(pos, end)))
|
||||
if (end != (pos = find_first_symbols<'?'>(data, end)))
|
||||
{
|
||||
res_data = pos + (without_leading_char ? 1 : 0);
|
||||
res_size = end - res_data;
|
||||
}
|
||||
else if (end != (pos = find_first_symbols<'#'>(pos, end)))
|
||||
else if (end != (pos = find_first_symbols<'#'>(data, end)))
|
||||
{
|
||||
res_data = pos;
|
||||
res_size = end - res_data;
|
||||
|
@ -9,6 +9,7 @@ void registerFunctionEmpty(FunctionFactory &);
|
||||
void registerFunctionNotEmpty(FunctionFactory &);
|
||||
void registerFunctionLength(FunctionFactory &);
|
||||
void registerFunctionLengthUTF8(FunctionFactory &);
|
||||
void registerFunctionValidUTF8(FunctionFactory &);
|
||||
void registerFunctionLower(FunctionFactory &);
|
||||
void registerFunctionUpper(FunctionFactory &);
|
||||
void registerFunctionLowerUTF8(FunctionFactory &);
|
||||
@ -17,7 +18,6 @@ void registerFunctionReverse(FunctionFactory &);
|
||||
void registerFunctionReverseUTF8(FunctionFactory &);
|
||||
void registerFunctionsConcat(FunctionFactory &);
|
||||
void registerFunctionSubstring(FunctionFactory &);
|
||||
void registerFunctionSubstringUTF8(FunctionFactory &);
|
||||
void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &);
|
||||
void registerFunctionStartsWith(FunctionFactory &);
|
||||
void registerFunctionEndsWith(FunctionFactory &);
|
||||
@ -36,6 +36,7 @@ void registerFunctionsString(FunctionFactory & factory)
|
||||
registerFunctionNotEmpty(factory);
|
||||
registerFunctionLength(factory);
|
||||
registerFunctionLengthUTF8(factory);
|
||||
registerFunctionValidUTF8(factory);
|
||||
registerFunctionLower(factory);
|
||||
registerFunctionUpper(factory);
|
||||
registerFunctionLowerUTF8(factory);
|
||||
@ -44,7 +45,6 @@ void registerFunctionsString(FunctionFactory & factory)
|
||||
registerFunctionReverseUTF8(factory);
|
||||
registerFunctionsConcat(factory);
|
||||
registerFunctionSubstring(factory);
|
||||
registerFunctionSubstringUTF8(factory);
|
||||
registerFunctionAppendTrailingCharIfAbsent(factory);
|
||||
registerFunctionStartsWith(factory);
|
||||
registerFunctionEndsWith(factory);
|
||||
|
@ -28,10 +28,13 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
/// If 'is_utf8' - measure offset and length in code points instead of bytes.
|
||||
/// UTF8 variant is not available for FixedString arguments.
|
||||
template <bool is_utf8>
|
||||
class FunctionSubstring : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "substring";
|
||||
static constexpr auto name = is_utf8 ? "substringUTF8" : "substring";
|
||||
static FunctionPtr create(const Context &)
|
||||
{
|
||||
return std::make_shared<FunctionSubstring>();
|
||||
@ -56,7 +59,7 @@ public:
|
||||
+ toString(number_of_arguments) + ", should be 2 or 3",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
if (!isStringOrFixedString(arguments[0]))
|
||||
if ((is_utf8 && !isString(arguments[0])) || !isStringOrFixedString(arguments[0]))
|
||||
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (!isNumber(arguments[1]))
|
||||
@ -80,7 +83,7 @@ public:
|
||||
Int64 start_value, Int64 length_value, Block & block, size_t result, Source && source,
|
||||
size_t input_rows_count)
|
||||
{
|
||||
auto col_res = ColumnString::create();
|
||||
auto col_res = ColumnString::create();
|
||||
|
||||
if (!column_length)
|
||||
{
|
||||
@ -145,30 +148,48 @@ public:
|
||||
throw Exception("Third argument provided for function substring could not be negative.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
}
|
||||
|
||||
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
|
||||
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
|
||||
length_value, block, result, StringSource(*col), input_rows_count);
|
||||
else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_string.get()))
|
||||
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
|
||||
length_value, block, result, FixedStringSource(*col_fixed), input_rows_count);
|
||||
else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
|
||||
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
|
||||
length_value, block, result, ConstSource<StringSource>(*col_const), input_rows_count);
|
||||
else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
|
||||
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
|
||||
length_value, block, result, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);
|
||||
if constexpr (is_utf8)
|
||||
{
|
||||
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
|
||||
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
|
||||
length_value, block, result, UTF8StringSource(*col), input_rows_count);
|
||||
else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
|
||||
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
|
||||
length_value, block, result, ConstSource<UTF8StringSource>(*col_const), input_rows_count);
|
||||
else
|
||||
throw Exception(
|
||||
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
{
|
||||
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
|
||||
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
|
||||
length_value, block, result, StringSource(*col), input_rows_count);
|
||||
else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_string.get()))
|
||||
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
|
||||
length_value, block, result, FixedStringSource(*col_fixed), input_rows_count);
|
||||
else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
|
||||
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
|
||||
length_value, block, result, ConstSource<StringSource>(*col_const), input_rows_count);
|
||||
else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
|
||||
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
|
||||
length_value, block, result, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);
|
||||
else
|
||||
throw Exception(
|
||||
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
void registerFunctionSubstring(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionSubstring>(FunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("substr", FunctionSubstring::name, FunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("mid", FunctionSubstring::name, FunctionFactory::CaseInsensitive); /// from MySQL dialect
|
||||
factory.registerFunction<FunctionSubstring<false>>(FunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("substr", "substring", FunctionFactory::CaseInsensitive);
|
||||
factory.registerAlias("mid", "substring", FunctionFactory::CaseInsensitive); /// from MySQL dialect
|
||||
|
||||
factory.registerFunction<FunctionSubstring<true>>(FunctionFactory::CaseSensitive);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,166 +0,0 @@
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Core/ColumnNumbers.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
}
|
||||
|
||||
|
||||
/** If the string is encoded in UTF-8, then it selects a substring of code points in it.
|
||||
* Otherwise, the behavior is undefined.
|
||||
*/
|
||||
struct SubstringUTF8Impl
|
||||
{
|
||||
static void vector(const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
size_t start,
|
||||
size_t length,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
res_data.reserve(data.size());
|
||||
size_t size = offsets.size();
|
||||
res_offsets.resize(size);
|
||||
|
||||
ColumnString::Offset prev_offset = 0;
|
||||
ColumnString::Offset res_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
ColumnString::Offset j = prev_offset;
|
||||
ColumnString::Offset pos = 1;
|
||||
ColumnString::Offset bytes_start = 0;
|
||||
ColumnString::Offset bytes_length = 0;
|
||||
while (j < offsets[i] - 1)
|
||||
{
|
||||
if (pos == start)
|
||||
bytes_start = j - prev_offset + 1;
|
||||
|
||||
if (data[j] < 0xBF)
|
||||
j += 1;
|
||||
else if (data[j] < 0xE0)
|
||||
j += 2;
|
||||
else if (data[j] < 0xF0)
|
||||
j += 3;
|
||||
else
|
||||
j += 1;
|
||||
|
||||
if (pos >= start && pos < start + length)
|
||||
bytes_length = j - prev_offset + 1 - bytes_start;
|
||||
else if (pos >= start + length)
|
||||
break;
|
||||
|
||||
++pos;
|
||||
}
|
||||
|
||||
if (bytes_start == 0)
|
||||
{
|
||||
res_data.resize(res_data.size() + 1);
|
||||
res_data[res_offset] = 0;
|
||||
++res_offset;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t bytes_to_copy = std::min(offsets[i] - prev_offset - bytes_start, bytes_length);
|
||||
res_data.resize(res_data.size() + bytes_to_copy + 1);
|
||||
memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &data[prev_offset + bytes_start - 1], bytes_to_copy);
|
||||
res_offset += bytes_to_copy + 1;
|
||||
res_data[res_offset - 1] = 0;
|
||||
}
|
||||
res_offsets[i] = res_offset;
|
||||
prev_offset = offsets[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class FunctionSubstringUTF8 : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "substringUTF8";
|
||||
static FunctionPtr create(const Context &)
|
||||
{
|
||||
return std::make_shared<FunctionSubstringUTF8>();
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
size_t getNumberOfArguments() const override
|
||||
{
|
||||
return 3;
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(
|
||||
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (!isNumber(arguments[1]) || !isNumber(arguments[2]))
|
||||
throw Exception("Illegal type " + (isNumber(arguments[1]) ? arguments[2]->getName() : arguments[1]->getName())
|
||||
+ " of argument of function "
|
||||
+ getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
|
||||
{
|
||||
const ColumnPtr column_string = block.getByPosition(arguments[0]).column;
|
||||
const ColumnPtr column_start = block.getByPosition(arguments[1]).column;
|
||||
const ColumnPtr column_length = block.getByPosition(arguments[2]).column;
|
||||
|
||||
if (!column_start->isColumnConst() || !column_length->isColumnConst())
|
||||
throw Exception("2nd and 3rd arguments of function " + getName() + " must be constants.", ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
Field start_field = (*block.getByPosition(arguments[1]).column)[0];
|
||||
Field length_field = (*block.getByPosition(arguments[2]).column)[0];
|
||||
|
||||
if (start_field.getType() != Field::Types::UInt64 || length_field.getType() != Field::Types::UInt64)
|
||||
throw Exception("2nd and 3rd arguments of function " + getName() + " must be non-negative and must have UInt type.", ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
UInt64 start = start_field.get<UInt64>();
|
||||
UInt64 length = length_field.get<UInt64>();
|
||||
|
||||
if (start == 0)
|
||||
throw Exception("Second argument of function substring must be greater than 0.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
|
||||
/// Otherwise may lead to overflow and pass bounds check inside inner loop.
|
||||
if (start >= 0x8000000000000000ULL || length >= 0x8000000000000000ULL)
|
||||
throw Exception("Too large values of 2nd or 3rd argument provided for function substring.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
|
||||
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
|
||||
{
|
||||
auto col_res = ColumnString::create();
|
||||
SubstringUTF8Impl::vector(col->getChars(), col->getOffsets(), start, length, col_res->getChars(), col_res->getOffsets());
|
||||
block.getByPosition(result).column = std::move(col_res);
|
||||
}
|
||||
else
|
||||
throw Exception(
|
||||
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
};
|
||||
|
||||
void registerFunctionSubstringUTF8(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionSubstringUTF8>();
|
||||
}
|
||||
|
||||
}
|
@ -7,6 +7,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BROTLI_READ_FAILED;
|
||||
}
|
||||
|
||||
|
||||
class BrotliReadBuffer::BrotliStateWrapper
|
||||
{
|
||||
public:
|
||||
@ -29,7 +35,7 @@ public:
|
||||
BrotliReadBuffer::BrotliReadBuffer(ReadBuffer &in_, size_t buf_size, char *existing_memory, size_t alignment)
|
||||
: BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment)
|
||||
, in(in_)
|
||||
, brotli(new BrotliStateWrapper())
|
||||
, brotli(std::make_unique<BrotliStateWrapper>())
|
||||
, in_available(0)
|
||||
, in_data(nullptr)
|
||||
, out_capacity(0)
|
||||
@ -56,7 +62,7 @@ bool BrotliReadBuffer::nextImpl()
|
||||
|
||||
if (brotli->result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && (!in_available || in.eof()))
|
||||
{
|
||||
throw Exception(std::string("brotli decode error"), ErrorCodes::CANNOT_READ_ALL_DATA);
|
||||
throw Exception("brotli decode error", ErrorCodes::BROTLI_READ_FAILED);
|
||||
}
|
||||
|
||||
out_capacity = internal_buffer.size();
|
||||
@ -76,13 +82,13 @@ bool BrotliReadBuffer::nextImpl()
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(std::string("brotli decode error"), ErrorCodes::CANNOT_READ_ALL_DATA);
|
||||
throw Exception("brotli decode error", ErrorCodes::BROTLI_READ_FAILED);
|
||||
}
|
||||
}
|
||||
|
||||
if (brotli->result == BROTLI_DECODER_RESULT_ERROR)
|
||||
{
|
||||
throw Exception(std::string("brotli decode error"), ErrorCodes::CANNOT_READ_ALL_DATA);
|
||||
throw Exception("brotli decode error", ErrorCodes::BROTLI_READ_FAILED);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -34,5 +34,6 @@ private:
|
||||
|
||||
bool eof;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
126
dbms/src/IO/BrotliWriteBuffer.cpp
Normal file
126
dbms/src/IO/BrotliWriteBuffer.cpp
Normal file
@ -0,0 +1,126 @@
|
||||
#include <Common/config.h>
|
||||
#if USE_BROTLI
|
||||
|
||||
#include <IO/BrotliWriteBuffer.h>
|
||||
#include <brotli/encode.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int BROTLI_WRITE_FAILED;
|
||||
}
|
||||
|
||||
|
||||
class BrotliWriteBuffer::BrotliStateWrapper
|
||||
{
|
||||
public:
|
||||
BrotliStateWrapper()
|
||||
: state(BrotliEncoderCreateInstance(nullptr, nullptr, nullptr))
|
||||
{
|
||||
}
|
||||
|
||||
~BrotliStateWrapper()
|
||||
{
|
||||
BrotliEncoderDestroyInstance(state);
|
||||
}
|
||||
|
||||
public:
|
||||
BrotliEncoderState * state;
|
||||
};
|
||||
|
||||
BrotliWriteBuffer::BrotliWriteBuffer(WriteBuffer & out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment)
|
||||
: BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment)
|
||||
, brotli(std::make_unique<BrotliStateWrapper>())
|
||||
, in_available(0)
|
||||
, in_data(nullptr)
|
||||
, out_capacity(0)
|
||||
, out_data(nullptr)
|
||||
, out(out_)
|
||||
{
|
||||
BrotliEncoderSetParameter(brotli->state, BROTLI_PARAM_QUALITY, static_cast<uint32_t>(compression_level));
|
||||
// Set LZ77 window size. According to brotli sources default value is 24 (c/tools/brotli.c:81)
|
||||
BrotliEncoderSetParameter(brotli->state, BROTLI_PARAM_LGWIN, 24);
|
||||
}
|
||||
|
||||
BrotliWriteBuffer::~BrotliWriteBuffer()
|
||||
{
|
||||
finish();
|
||||
}
|
||||
|
||||
void BrotliWriteBuffer::nextImpl()
|
||||
{
|
||||
if (!offset())
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
in_data = reinterpret_cast<unsigned char *>(working_buffer.begin());
|
||||
in_available = offset();
|
||||
|
||||
do
|
||||
{
|
||||
out.nextIfAtEnd();
|
||||
out_data = reinterpret_cast<unsigned char *>(out.position());
|
||||
out_capacity = out.buffer().end() - out.position();
|
||||
|
||||
int result = BrotliEncoderCompressStream(
|
||||
brotli->state,
|
||||
in_available ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH,
|
||||
&in_available,
|
||||
&in_data,
|
||||
&out_capacity,
|
||||
&out_data,
|
||||
nullptr);
|
||||
|
||||
out.position() = out.buffer().end() - out_capacity;
|
||||
|
||||
if (result == 0)
|
||||
{
|
||||
throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED);
|
||||
}
|
||||
}
|
||||
while (in_available > 0 || out_capacity == 0);
|
||||
}
|
||||
|
||||
void BrotliWriteBuffer::finish()
|
||||
{
|
||||
if (finished)
|
||||
return;
|
||||
|
||||
next();
|
||||
|
||||
while (true)
|
||||
{
|
||||
out.nextIfAtEnd();
|
||||
out_data = reinterpret_cast<unsigned char *>(out.position());
|
||||
out_capacity = out.buffer().end() - out.position();
|
||||
|
||||
int result = BrotliEncoderCompressStream(
|
||||
brotli->state,
|
||||
BROTLI_OPERATION_FINISH,
|
||||
&in_available,
|
||||
&in_data,
|
||||
&out_capacity,
|
||||
&out_data,
|
||||
nullptr);
|
||||
|
||||
out.position() = out.buffer().end() - out_capacity;
|
||||
|
||||
if (BrotliEncoderIsFinished(brotli->state))
|
||||
{
|
||||
finished = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (result == 0)
|
||||
{
|
||||
throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
40
dbms/src/IO/BrotliWriteBuffer.h
Normal file
40
dbms/src/IO/BrotliWriteBuffer.h
Normal file
@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class BrotliWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
|
||||
{
|
||||
public:
|
||||
BrotliWriteBuffer(
|
||||
WriteBuffer & out_,
|
||||
int compression_level,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0);
|
||||
|
||||
~BrotliWriteBuffer() override;
|
||||
|
||||
void finish();
|
||||
|
||||
private:
|
||||
void nextImpl() override;
|
||||
|
||||
class BrotliStateWrapper;
|
||||
std::unique_ptr<BrotliStateWrapper> brotli;
|
||||
|
||||
size_t in_available;
|
||||
const uint8_t * in_data;
|
||||
|
||||
size_t out_capacity;
|
||||
uint8_t * out_data;
|
||||
|
||||
WriteBuffer & out;
|
||||
|
||||
bool finished = false;
|
||||
};
|
||||
|
||||
}
|
@ -24,7 +24,8 @@ namespace DB
|
||||
* Differs in that is doesn't do unneeded memset. (And also tries to do as little as possible.)
|
||||
* Also allows to allocate aligned piece of memory (to use with O_DIRECT, for example).
|
||||
*/
|
||||
struct Memory : boost::noncopyable, Allocator<false>
|
||||
template <typename Allocator = Allocator<false>>
|
||||
struct Memory : boost::noncopyable, Allocator
|
||||
{
|
||||
/// Padding is needed to allow usage of 'memcpySmallAllowReadWriteOverflow15' function with this buffer.
|
||||
static constexpr size_t pad_right = 15;
|
||||
@ -136,7 +137,7 @@ template <typename Base>
|
||||
class BufferWithOwnMemory : public Base
|
||||
{
|
||||
protected:
|
||||
Memory memory;
|
||||
Memory<> memory;
|
||||
public:
|
||||
/// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership.
|
||||
BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)
|
||||
|
@ -3,7 +3,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
enum class ZlibCompressionMethod
|
||||
enum class CompressionMethod
|
||||
{
|
||||
/// DEFLATE compression with gzip header and CRC32 checksum.
|
||||
/// This option corresponds to files produced by gzip(1) or HTTP Content-Encoding: gzip.
|
||||
@ -11,6 +11,7 @@ enum class ZlibCompressionMethod
|
||||
/// DEFLATE compression with zlib header and Adler32 checksum.
|
||||
/// This option corresponds to HTTP Content-Encoding: deflate.
|
||||
Zlib,
|
||||
Brotli,
|
||||
};
|
||||
|
||||
}
|
@ -179,7 +179,7 @@ private:
|
||||
*/
|
||||
virtual bool nextImpl() { return false; }
|
||||
|
||||
void throwReadAfterEOF()
|
||||
[[noreturn]] void throwReadAfterEOF()
|
||||
{
|
||||
throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
|
||||
}
|
||||
|
@ -187,6 +187,9 @@ off_t ReadBufferAIO::doSeek(off_t off, int whence)
|
||||
pos = working_buffer.end();
|
||||
first_unread_pos_in_file = new_pos_in_file;
|
||||
|
||||
/// If we go back, than it's not eof
|
||||
is_eof = false;
|
||||
|
||||
/// We can not use the result of the current asynchronous request.
|
||||
skip();
|
||||
}
|
||||
|
@ -43,6 +43,7 @@ protected:
|
||||
ProfileCallback profile_callback;
|
||||
clockid_t clock_type;
|
||||
|
||||
/// Children implementation should be able to seek backwards
|
||||
virtual off_t doSeek(off_t off, int whence) = 0;
|
||||
};
|
||||
|
||||
|
@ -164,7 +164,7 @@ void readVectorBinary(std::vector<T> & v, ReadBuffer & buf, size_t MAX_VECTOR_SI
|
||||
void assertString(const char * s, ReadBuffer & buf);
|
||||
void assertEOF(ReadBuffer & buf);
|
||||
|
||||
void throwAtAssertionFailed(const char * s, ReadBuffer & buf);
|
||||
[[noreturn]] void throwAtAssertionFailed(const char * s, ReadBuffer & buf);
|
||||
|
||||
inline void assertChar(char symbol, ReadBuffer & buf)
|
||||
{
|
||||
|
@ -6,6 +6,11 @@
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
|
||||
#include <Common/config.h>
|
||||
#if USE_LFALLOC
|
||||
#include <Common/LFAllocator.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
@ -20,8 +25,13 @@ namespace DB
|
||||
|
||||
struct UncompressedCacheCell
|
||||
{
|
||||
Memory data;
|
||||
#if USE_LFALLOC
|
||||
Memory<LFAllocator> data;
|
||||
#else
|
||||
Memory<> data;
|
||||
#endif
|
||||
size_t compressed_size;
|
||||
UInt32 additional_bytes;
|
||||
};
|
||||
|
||||
struct UncompressedSizeWeightFunction
|
||||
|
@ -113,7 +113,7 @@ readVarUInt(T & x, ReadBuffer & istr)
|
||||
}
|
||||
|
||||
|
||||
inline void throwReadAfterEOF()
|
||||
[[noreturn]] inline void throwReadAfterEOF()
|
||||
{
|
||||
throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
|
||||
}
|
||||
|
@ -76,34 +76,47 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
|
||||
{
|
||||
if (compress)
|
||||
{
|
||||
if (compression_method == ZlibCompressionMethod::Gzip)
|
||||
if (compression_method == CompressionMethod::Gzip)
|
||||
{
|
||||
#if defined(POCO_CLICKHOUSE_PATCH)
|
||||
*response_header_ostr << "Content-Encoding: gzip\r\n";
|
||||
#else
|
||||
response.set("Content-Encoding", "gzip");
|
||||
response_body_ostr = &(response.send());
|
||||
#endif
|
||||
out_raw.emplace(*response_body_ostr);
|
||||
deflating_buf.emplace(*out_raw, compression_method, compression_level, working_buffer.size(), working_buffer.begin());
|
||||
out = &*deflating_buf;
|
||||
}
|
||||
else if (compression_method == ZlibCompressionMethod::Zlib)
|
||||
else if (compression_method == CompressionMethod::Zlib)
|
||||
{
|
||||
#if defined(POCO_CLICKHOUSE_PATCH)
|
||||
*response_header_ostr << "Content-Encoding: deflate\r\n";
|
||||
#else
|
||||
response.set("Content-Encoding", "deflate");
|
||||
response_body_ostr = &(response.send());
|
||||
#endif
|
||||
out_raw.emplace(*response_body_ostr);
|
||||
deflating_buf.emplace(*out_raw, compression_method, compression_level, working_buffer.size(), working_buffer.begin());
|
||||
out = &*deflating_buf;
|
||||
}
|
||||
else if (compression_method == CompressionMethod::Brotli)
|
||||
{
|
||||
#if defined(POCO_CLICKHOUSE_PATCH)
|
||||
*response_header_ostr << "Content-Encoding: br\r\n";
|
||||
#else
|
||||
response.set("Content-Encoding", "br");
|
||||
response_body_ostr = &(response.send());
|
||||
#endif
|
||||
out_raw.emplace(*response_body_ostr);
|
||||
brotli_buf.emplace(*out_raw, compression_level, working_buffer.size(), working_buffer.begin());
|
||||
out = &*brotli_buf;
|
||||
}
|
||||
|
||||
else
|
||||
throw Exception("Logical error: unknown compression method passed to WriteBufferFromHTTPServerResponse",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
/// Use memory allocated for the outer buffer in the buffer pointed to by out. This avoids extra allocation and copy.
|
||||
|
||||
#if !defined(POCO_CLICKHOUSE_PATCH)
|
||||
response_body_ostr = &(response.send());
|
||||
#endif
|
||||
|
||||
out_raw.emplace(*response_body_ostr);
|
||||
deflating_buf.emplace(*out_raw, compression_method, compression_level, working_buffer.size(), working_buffer.begin());
|
||||
out = &*deflating_buf;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -133,7 +146,7 @@ WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse(
|
||||
Poco::Net::HTTPServerResponse & response_,
|
||||
unsigned keep_alive_timeout_,
|
||||
bool compress_,
|
||||
ZlibCompressionMethod compression_method_,
|
||||
CompressionMethod compression_method_,
|
||||
size_t size)
|
||||
: BufferWithOwnMemory<WriteBuffer>(size)
|
||||
, request(request_)
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
#include <IO/ZlibDeflatingWriteBuffer.h>
|
||||
#include <IO/BrotliWriteBuffer.h>
|
||||
#include <IO/HTTPCommon.h>
|
||||
#include <IO/Progress.h>
|
||||
#include <Common/NetException.h>
|
||||
@ -49,7 +50,7 @@ private:
|
||||
bool add_cors_header = false;
|
||||
unsigned keep_alive_timeout = 0;
|
||||
bool compress = false;
|
||||
ZlibCompressionMethod compression_method;
|
||||
CompressionMethod compression_method;
|
||||
int compression_level = Z_DEFAULT_COMPRESSION;
|
||||
|
||||
std::ostream * response_body_ostr = nullptr;
|
||||
@ -60,6 +61,7 @@ private:
|
||||
|
||||
std::optional<WriteBufferFromOStream> out_raw;
|
||||
std::optional<ZlibDeflatingWriteBuffer> deflating_buf;
|
||||
std::optional<BrotliWriteBuffer> brotli_buf;
|
||||
|
||||
WriteBuffer * out = nullptr; /// Uncompressed HTTP body is written to this buffer. Points to out_raw or possibly to deflating_buf.
|
||||
|
||||
@ -89,7 +91,7 @@ public:
|
||||
Poco::Net::HTTPServerResponse & response_,
|
||||
unsigned keep_alive_timeout_,
|
||||
bool compress_ = false, /// If true - set Content-Encoding header and compress the result.
|
||||
ZlibCompressionMethod compression_method_ = ZlibCompressionMethod::Gzip,
|
||||
CompressionMethod compression_method_ = CompressionMethod::Gzip,
|
||||
size_t size = DBMS_DEFAULT_BUFFER_SIZE);
|
||||
|
||||
/// Writes progess in repeating HTTP headers.
|
||||
|
@ -6,7 +6,7 @@ namespace DB
|
||||
|
||||
ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer(
|
||||
WriteBuffer & out_,
|
||||
ZlibCompressionMethod compression_method,
|
||||
CompressionMethod compression_method,
|
||||
int compression_level,
|
||||
size_t buf_size,
|
||||
char * existing_memory,
|
||||
@ -23,7 +23,7 @@ ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer(
|
||||
zstr.avail_out = 0;
|
||||
|
||||
int window_bits = 15;
|
||||
if (compression_method == ZlibCompressionMethod::Gzip)
|
||||
if (compression_method == CompressionMethod::Gzip)
|
||||
{
|
||||
window_bits += 16;
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/ZlibCompressionMethod.h>
|
||||
#include <IO/CompressionMethod.h>
|
||||
|
||||
#include <zlib.h>
|
||||
|
||||
@ -21,7 +21,7 @@ class ZlibDeflatingWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
|
||||
public:
|
||||
ZlibDeflatingWriteBuffer(
|
||||
WriteBuffer & out_,
|
||||
ZlibCompressionMethod compression_method,
|
||||
CompressionMethod compression_method,
|
||||
int compression_level,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
char * existing_memory = nullptr,
|
||||
|
@ -6,7 +6,7 @@ namespace DB
|
||||
|
||||
ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
|
||||
ReadBuffer & in_,
|
||||
ZlibCompressionMethod compression_method,
|
||||
CompressionMethod compression_method,
|
||||
size_t buf_size,
|
||||
char * existing_memory,
|
||||
size_t alignment)
|
||||
@ -23,7 +23,7 @@ ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
|
||||
zstr.avail_out = 0;
|
||||
|
||||
int window_bits = 15;
|
||||
if (compression_method == ZlibCompressionMethod::Gzip)
|
||||
if (compression_method == CompressionMethod::Gzip)
|
||||
{
|
||||
window_bits += 16;
|
||||
}
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
#include <IO/ZlibCompressionMethod.h>
|
||||
#include <IO/CompressionMethod.h>
|
||||
|
||||
#include <zlib.h>
|
||||
|
||||
@ -22,7 +22,7 @@ class ZlibInflatingReadBuffer : public BufferWithOwnMemory<ReadBuffer>
|
||||
public:
|
||||
ZlibInflatingReadBuffer(
|
||||
ReadBuffer & in_,
|
||||
ZlibCompressionMethod compression_method,
|
||||
CompressionMethod compression_method,
|
||||
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
|
||||
char * existing_memory = nullptr,
|
||||
size_t alignment = 0);
|
||||
|
71
dbms/src/IO/tests/gtest_aio_seek_back_after_eof.cpp
Normal file
71
dbms/src/IO/tests/gtest_aio_seek_back_after_eof.cpp
Normal file
@ -0,0 +1,71 @@
|
||||
#pragma GCC diagnostic ignored "-Wsign-compare"
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#pragma clang diagnostic ignored "-Wundef"
|
||||
#endif
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Core/Defines.h>
|
||||
#include <port/unistd.h>
|
||||
#include <IO/ReadBufferAIO.h>
|
||||
#include <fstream>
|
||||
|
||||
namespace
|
||||
{
|
||||
std::string createTmpFileForEOFtest()
|
||||
{
|
||||
char pattern[] = "/tmp/fileXXXXXX";
|
||||
char * dir = ::mkdtemp(pattern);
|
||||
return std::string(dir) + "/foo";
|
||||
}
|
||||
|
||||
void prepare_for_eof(std::string & filename, std::string & buf)
|
||||
{
|
||||
static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
|
||||
|
||||
filename = createTmpFileForEOFtest();
|
||||
|
||||
size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE;
|
||||
buf.reserve(n);
|
||||
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
buf += symbols[i % symbols.length()];
|
||||
|
||||
std::ofstream out(filename.c_str());
|
||||
out << buf;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
TEST(ReadBufferAIOTest, TestReadAfterAIO)
|
||||
{
|
||||
using namespace DB;
|
||||
std::string data;
|
||||
std::string file_path;
|
||||
prepare_for_eof(file_path, data);
|
||||
ReadBufferAIO testbuf(file_path);
|
||||
|
||||
std::string newdata;
|
||||
newdata.resize(data.length());
|
||||
|
||||
size_t total_read = testbuf.read(newdata.data(), newdata.length());
|
||||
EXPECT_EQ(total_read, data.length());
|
||||
EXPECT_TRUE(testbuf.eof());
|
||||
|
||||
|
||||
testbuf.seek(data.length() - 100);
|
||||
|
||||
std::string smalldata;
|
||||
smalldata.resize(100);
|
||||
size_t read_after_eof = testbuf.read(smalldata.data(), smalldata.size());
|
||||
EXPECT_EQ(read_after_eof, 100);
|
||||
EXPECT_TRUE(testbuf.eof());
|
||||
|
||||
|
||||
testbuf.seek(0);
|
||||
std::string repeatdata;
|
||||
repeatdata.resize(data.length());
|
||||
size_t read_after_eof_big = testbuf.read(repeatdata.data(), repeatdata.size());
|
||||
EXPECT_EQ(read_after_eof_big, data.length());
|
||||
EXPECT_TRUE(testbuf.eof());
|
||||
}
|
@ -23,7 +23,7 @@ try
|
||||
|
||||
{
|
||||
DB::WriteBufferFromFile buf("test_zlib_buffers.gz", DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_CREAT | O_TRUNC);
|
||||
DB::ZlibDeflatingWriteBuffer deflating_buf(buf, DB::ZlibCompressionMethod::Gzip, /* compression_level = */ 3);
|
||||
DB::ZlibDeflatingWriteBuffer deflating_buf(buf, DB::CompressionMethod::Gzip, /* compression_level = */ 3);
|
||||
|
||||
stopwatch.restart();
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
@ -41,7 +41,7 @@ try
|
||||
|
||||
{
|
||||
DB::ReadBufferFromFile buf("test_zlib_buffers.gz");
|
||||
DB::ZlibInflatingReadBuffer inflating_buf(buf, DB::ZlibCompressionMethod::Gzip);
|
||||
DB::ZlibInflatingReadBuffer inflating_buf(buf, DB::CompressionMethod::Gzip);
|
||||
|
||||
stopwatch.restart();
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
|
@ -71,8 +71,8 @@ private:
|
||||
|
||||
void visit(ASTSelectQuery & select, ASTPtr &) const
|
||||
{
|
||||
if (select.tables)
|
||||
tryVisit<ASTTablesInSelectQuery>(select.tables);
|
||||
if (select.tables())
|
||||
tryVisit<ASTTablesInSelectQuery>(select.refTables());
|
||||
|
||||
visitChildren(select);
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user